]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: fix split vs finish_write race 24992/head
authorSage Weil <sage@redhat.com>
Mon, 13 Aug 2018 17:10:31 +0000 (12:10 -0500)
committerJonathan Brielmaier <jbrielmaier@suse.de>
Thu, 8 Nov 2018 10:14:54 +0000 (11:14 +0100)
In _tcx_finish(), we were looking at the right Cache for the collection,
and then calling finish_write with that Cache and taking the lock.  This
could race with a split_cache() such that after we got the lock the
collection was not on a different cache.  This would in turn lead to a
failed assertion later on in _rm_buffer when the sharedblob was trimmed.

Fixes: http://tracker.ceph.com/issues/24439
Signed-off-by: Sage Weil <sage@redhat.com>
(cherry picked from commit f21f1f14f2d2a465ba072118bd8e32271bf8906e)
Signed-off-by: Jonathan Brielmaier <jbrielmaier@suse.de>
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index b326d39bcd2ecaa9e3bf15ca3667abb5a0b5812c..9b5fcface4112bc8d0b45b4f2641ce0ab34da394 100644 (file)
@@ -1370,10 +1370,8 @@ void BlueStore::BufferSpace::read(
   cache->logger->inc(l_bluestore_buffer_miss_bytes, miss_bytes);
 }
 
-void BlueStore::BufferSpace::finish_write(Cache* cache, uint64_t seq)
+void BlueStore::BufferSpace::_finish_write(Cache* cache, uint64_t seq)
 {
-  std::lock_guard<std::recursive_mutex> l(cache->lock);
-
   auto i = writing.begin();
   while (i != writing.end()) {
     if (i->seq > seq) {
@@ -1648,6 +1646,23 @@ void BlueStore::SharedBlob::put_ref(uint64_t offset, uint32_t length,
   }
 }
 
+void BlueStore::SharedBlob::finish_write(uint64_t seq)
+{
+  while (true) {
+    Cache *cache = coll->cache;
+    std::lock_guard<std::recursive_mutex> l(cache->lock);
+    if (coll->cache != cache) {
+      ldout(coll->store->cct, 20) << __func__
+                                 << " raced with sb cache update, was " << cache
+                                 << ", now " << coll->cache << ", retrying"
+                                 << dendl;
+      continue;
+    }
+    bc._finish_write(cache, seq);
+    break;
+  }
+}
+
 // SharedBlobSet
 
 #undef dout_prefix
@@ -8398,7 +8413,7 @@ void BlueStore::_txc_finish(TransContext *txc)
   assert(txc->state == TransContext::STATE_FINISHING);
 
   for (auto& sb : txc->shared_blobs_written) {
-    sb->bc.finish_write(sb->get_cache(), txc->seq);
+    sb->finish_write(txc->seq);
   }
   txc->shared_blobs_written.clear();
 
index 6a14042df107d6f4c419eb609457fb1f9883b7ad..3c81ebc0b9c016df06f6acb9cba479390dabf203 100644 (file)
@@ -333,7 +333,7 @@ public:
       b->cache_private = _discard(cache, offset, bl.length());
       _add_buffer(cache, b, (flags & Buffer::FLAG_NOCACHE) ? 0 : 1, nullptr);
     }
-    void finish_write(Cache* cache, uint64_t seq);
+    void _finish_write(Cache* cache, uint64_t seq);
     void did_read(Cache* cache, uint32_t offset, bufferlist& bl) {
       std::lock_guard<std::recursive_mutex> l(cache->lock);
       Buffer *b = new Buffer(this, Buffer::STATE_CLEAN, 0, offset, bl);
@@ -410,6 +410,8 @@ public:
     void put_ref(uint64_t offset, uint32_t length,
                 PExtentVector *r, set<SharedBlob*> *maybe_unshared_blobs);
 
+    void finish_write(uint64_t seq);
+
     friend bool operator==(const SharedBlob &l, const SharedBlob &r) {
       return l.get_sbid() == r.get_sbid();
     }