From: Matan Breizman Date: Tue, 11 Mar 2025 09:13:18 +0000 (+0000) Subject: Revert "os/bluestore: Modify _deferred_replay" X-Git-Tag: testing/wip-vshankar-testing-20250311.124427-debug~2^2~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=e1c75071b4ec516da639457c6132b7742dde12b2;p=ceph-ci.git Revert "os/bluestore: Modify _deferred_replay" This reverts commit c10a794f72613ffdccdd64e24840dfce93174f30. Signed-off-by: Matan Breizman --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 9a85de72850..255373ddfc3 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -15119,7 +15119,7 @@ void BlueStore::_kv_sync_thread() auto sync_start = mono_clock::now(); #endif // submit synct synchronously (block and wait for it to commit) - int r = cct->_conf->bluestore_debug_omit_kv_commit ? + int r = db_was_opened_read_only || cct->_conf->bluestore_debug_omit_kv_commit ? 0 : db->submit_transaction_sync(synct); ceph_assert(r == 0); @@ -15518,14 +15518,19 @@ int BlueStore::_deferred_replay() } ); } + CollectionRef ch = _get_collection(coll_t::meta()); + bool fake_ch = false; + if (!ch) { + // hmm, replaying initial mkfs? + ch = static_cast(create_new_collection(coll_t::meta()).get()); + fake_ch = true; + } + OpSequencer *osr = static_cast(ch->osr.get()); if (tracepoint_debug_deferred_replay_start) tracepoint_debug_deferred_replay_start(); - IOContext ioctx(cct, nullptr); - KeyValueDB::Transaction t = db->get_transaction(); KeyValueDB::Iterator it = db->get_iterator(PREFIX_DEFERRED); for (it->lower_bound(string()); it->valid(); it->next(), ++count) { dout(20) << __func__ << " replay " << pretty_binary_string(it->key()) << dendl; - t->rmkey(PREFIX_DEFERRED, it->key()); bluestore_deferred_transaction_t *deferred_txn = new bluestore_deferred_transaction_t; bufferlist bl = it->value(); @@ -15542,29 +15547,22 @@ int BlueStore::_deferred_replay() bool has_some = _eliminate_outdated_deferred(deferred_txn, bluefs_extents); if (has_some) { if (tracepoint_debug_deferred_replay_track) tracepoint_debug_deferred_replay_track(*deferred_txn); - for (auto& op: deferred_txn->ops) { - for (auto& e : op.extents) { - bufferlist t; - op.data.splice(0, e.length, &t); - bdev->aio_write(e.offset, t, &ioctx, false); - } - } + TransContext *txc = _txc_create(ch.get(), osr, nullptr); + txc->deferred_txn = deferred_txn; + txc->set_state(TransContext::STATE_KV_DONE); + _txc_state_proc(txc); } else { delete deferred_txn; } } out: - bdev->aio_submit(&ioctx); - dout(20) << __func__ << "waiting to complete IO" << dendl; - ioctx.aio_wait(); - dout(20) << __func__ << "wait done" << dendl; - if (!db_was_opened_read_only) { - db->submit_transaction_sync(t); - dout(20) << __func__ << "removed L keys" << dendl; - } else { - dout(10) << __func__ << "DB read-pnly, skipped L keys removal" << dendl; - } + dout(20) << __func__ << " draining osr" << dendl; + _osr_register_zombie(osr); + _osr_drain_all(); if (tracepoint_debug_deferred_replay_end) tracepoint_debug_deferred_replay_end(); + if (fake_ch) { + new_coll_map.clear(); + } dout(10) << __func__ << " completed " << count << " events" << dendl; return r; }