os/bluestore: avoid race between split_cache and get/put pin/unpin

author Sage Weil <sage@redhat.com>

Wed, 15 Jan 2020 23:09:24 +0000 (17:09 -0600)

committer Sage Weil <sage@redhat.com>

Wed, 15 Jan 2020 23:14:08 +0000 (17:14 -0600)
author Sage Weil <sage@redhat.com>
Wed, 15 Jan 2020 23:09:24 +0000 (17:09 -0600)
committer Sage Weil <sage@redhat.com>
Wed, 15 Jan 2020 23:14:08 +0000 (17:14 -0600)
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc

index fecba3ad5986039e88a88e1ee453b817100649c3..6dfe103e89ba96c37f35ae93a4425ed5fc2419cb 100644 (file)
--- a/src/os/bluestore/BlueStore.cc
+++ b/src/os/bluestore/BlueStore.cc
@@ -3731,13 +3731,24 @@ void BlueStore::Collection::split_cache(
        ldout(store->cct, 20) << __func__ << " moving " << o << " " << o->oid
                             << dendl;
  
-      onode_map.cache->_rm(p->second);
+      // move the onode to the new map before futzing with the cache
+      // shard, ensuring that nref is always >= 2, and no racing
+      // thread can trigger a pin or unpin (which does *not* behave
+      // well when we are clearing and resetting the 's' shard
+      // pointer!).
        p = onode_map.onode_map.erase(p);
-
-      o->c = dest;
-      dest->onode_map.cache->_add(o, 1);
        dest->onode_map.onode_map[o->oid] = o;
  
+      if (onode_map.cache != dest->onode_map.cache) {
+       // move onode to a different cache shard
+       onode_map.cache->_rm(o);
+       o->c = dest;
+       dest->onode_map.cache->_add(o, 1);
+      } else {
+       // the onode is in the same cache shard, making our move simpler.
+       o->c = dest;
+      }
+
        // move over shared blobs and buffers.  cover shared blobs from
        // both extent map and spanning blob map (the full extent map
        // may not be faulted in)
author	Sage Weil <sage@redhat.com>
	Wed, 15 Jan 2020 23:09:24 +0000 (17:09 -0600)
committer	Sage Weil <sage@redhat.com>
	Wed, 15 Jan 2020 23:14:08 +0000 (17:14 -0600)