]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osdc: Fix crash on OSD failure 35422/head
authorAdam C. Emerson <aemerson@redhat.com>
Fri, 5 Jun 2020 19:18:35 +0000 (15:18 -0400)
committerAdam C. Emerson <aemerson@redhat.com>
Fri, 5 Jun 2020 19:18:35 +0000 (15:18 -0400)
OSDSession::lock should not use lockdep, as we violate its rules in a
place that's safe. Revert it to std::mutex.

Thanks to Or Friedmann <ofriedma@redhat.com> for finding the problem.

Fixes: https://tracker.ceph.com/issues/45878
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
src/osdc/Objecter.cc
src/osdc/Objecter.h

index 6c260d57cecc2efef1c024d474c8a512d2329e11..79ed8c6185db9dc65ecaa5c53f32df43fe006d70 100644 (file)
@@ -1506,7 +1506,7 @@ int Objecter::pool_snap_list(int64_t poolid, vector<uint64_t> *snaps)
 }
 
 // sl may be unlocked.
-void Objecter::_check_op_pool_dne(Op *op, std::unique_lock<ceph::shared_mutex> *sl)
+void Objecter::_check_op_pool_dne(Op *op, std::unique_lock<std::shared_mutex> *sl)
 {
   // rwlock is locked unique
 
@@ -3031,8 +3031,9 @@ int Objecter::_recalc_linger_op_target(LingerOp *linger_op,
     if (linger_op->session != s) {
       // NB locking two sessions (s and linger_op->session) at the
       // same time here is only safe because we are the only one that
-      // takes two, and we are holding rwlock for write.  Disable
-      // lockdep because it doesn't know that.
+      // takes two, and we are holding rwlock for write.  We use
+      // std::shared_mutex in OSDSession because lockdep doesn't know
+      // that.
       unique_lock sl(s->lock);
       _session_linger_op_remove(linger_op->session, linger_op);
       _session_linger_op_assign(s, linger_op);
index 321055b041b5eb4ed8cf03247f725a41b3485dd6..18f17d53ac6e802f957a71fe022630f2e303cfd9 100644 (file)
@@ -2372,7 +2372,12 @@ public:
     std::map<uint64_t,OSDBackoff*> backoffs_by_id;
 
     int osd;
-    ceph::shared_mutex lock;
+    // NB locking two sessions at the same time is only safe because
+    // it is only done in _recalc_linger_op_target with s and
+    // linger_op->session, and it holds rwlock for write.  We disable
+    // lockdep (using std::sharedMutex) because lockdep doesn't know
+    // that.
+    std::shared_mutex lock;
 
     int incarnation;
     ConnectionRef con;
@@ -2380,9 +2385,7 @@ public:
     std::unique_ptr<std::mutex[]> completion_locks;
 
     OSDSession(CephContext *cct, int o) :
-      osd(o), lock(ceph::make_shared_mutex(
-                    fmt::format("OSDSession::lock #{}", o))),
-      incarnation(0), con(NULL),
+      osd(o), incarnation(0), con(NULL),
       num_locks(cct->_conf->objecter_completion_locks_per_session),
       completion_locks(new std::mutex[num_locks]) {}
 
@@ -2502,7 +2505,7 @@ public:
   }
 
 private:
-  void _check_op_pool_dne(Op *op, std::unique_lock<ceph::shared_mutex> *sl);
+  void _check_op_pool_dne(Op *op, std::unique_lock<std::shared_mutex> *sl);
   void _send_op_map_check(Op *op);
   void _op_cancel_map_check(Op *op);
   void _check_linger_pool_dne(LingerOp *op, bool *need_unregister);