From c1a67d9af5f387fb9590660bf42ec58d18f5a5f7 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 19 Oct 2021 16:28:55 -0500
Subject: [PATCH] os/bluestore: _txc_create inside of alloc_and_submit_lock

Create the transaction inside of the SMR lock.  Otherwise, we may get a
deadlock between the cleaner C and a normal write op W:

W                         C
_txc_create seq 1
                          lock alloc_and_submit
                          _txc_create seq 2
                          ...
                          unlock alloc_and_submit
lock alloc_and_submit
...
block on flush
                          _txc_finish_io, but blocked by seq 1
<deadlock>

The root issue here is the txc's are misordered with respect to the
alloc_and_submit lock.

Fix by moving the _txc_create inside the lock!

Signed-off-by: Sage Weil <sage@newdream.net>
---
 src/os/bluestore/BlueStore.cc | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc
index b4d53766889..02e51ed1f6f 100644
--- a/src/os/bluestore/BlueStore.cc
+++ b/src/os/bluestore/BlueStore.cc
@@ -13536,10 +13536,6 @@ int BlueStore::queue_transactions(
   OpSequencer *osr = c->osr.get();
   dout(10) << __func__ << " ch " << c << " " << c->cid << dendl;
 
-  // prepare
-  TransContext *txc = _txc_create(static_cast<Collection*>(ch.get()), osr,
-				  &on_commit, op);
-
   // With HM-SMR drives (and ZNS SSDs) we want the I/O allocation and I/O
   // submission to happen atomically because if I/O submission happens in a
   // different order than I/O allocation, we end up issuing non-sequential
@@ -13549,6 +13545,11 @@ int BlueStore::queue_transactions(
   if (bdev->is_smr()) {
     atomic_alloc_and_submit_lock.lock();
   }
+
+  // prepare
+  TransContext *txc = _txc_create(static_cast<Collection*>(ch.get()), osr,
+				  &on_commit, op);
+
   for (vector<Transaction>::iterator p = tls.begin(); p != tls.end(); ++p) {
     txc->bytes += (*p).get_num_bytes();
     _txc_add_transaction(txc, &(*p));
-- 
2.39.5