From: Adam Kupczyk Date: Mon, 25 Oct 2021 11:30:26 +0000 (+0200) Subject: os/bluestore: Disable compaction then no-column-b is storing allocations to bluefs... X-Git-Tag: v17.1.0~576^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=867f0a249e136aa7caea7c6432229fb0f27f6e2c;p=ceph-ci.git os/bluestore: Disable compaction then no-column-b is storing allocations to bluefs file During BlueStore umount we store current allocation state to disk, in form of bluefs file. If RocksDB was performing compaction during capture of allocator state, it could cause corruption. Solution is to delete db (stop RocksDB) before state capture. Fixes: https://tracker.ceph.com/issues/52399 Signed-off-by: Adam Kupczyk --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 4f59ebde508..16129c86c3d 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -6089,7 +6089,12 @@ out_fm: void BlueStore::_close_db_and_around(bool read_only) { - _close_db(read_only); + if (db) { + _close_db_leave_bluefs(); + } + if (bluefs) { + _close_bluefs(read_only); + } _close_fm(); _close_alloc(); _close_bdev(); @@ -6327,6 +6332,13 @@ void BlueStore::_close_db(bool cold_close) } } +void BlueStore::_close_db_leave_bluefs() +{ + ceph_assert(db); + delete db; + db = nullptr; +} + void BlueStore::_dump_alloc_on_failure() { auto dump_interval = @@ -7274,6 +7286,7 @@ int BlueStore::umount() dout(20) << __func__ << " closing" << dendl; } + _close_db_leave_bluefs(); // GBH - Vault the allocation state dout(5) << "NCB::BlueStore::umount->store_allocation_state_on_bluestore() " << dendl; if (was_mounted && fm->is_null_manager()) { @@ -17210,6 +17223,9 @@ const unsigned MAX_EXTENTS_IN_BUFFER = 4 * 1024; // 4K extents = 64KB of data //----------------------------------------------------------------------------------- int BlueStore::store_allocator(Allocator* src_allocator) { + // when storing allocations to file we must be sure there is no background compactions + // the easiest way to achieve it is to make sure db is closed + ceph_assert(db == nullptr); utime_t start_time = ceph_clock_now(); int ret = 0; @@ -17989,6 +18005,7 @@ int BlueStore::read_allocation_from_drive_for_bluestore_tool(bool test_store_and } if (test_store_and_restore) { + _close_db_leave_bluefs(); dout(5) << "calling store_allocator(shared_alloc.a)" << dendl; store_allocator(shared_alloc.a); Allocator* alloc2 = create_bitmap_allocator(bdev_size); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 29ff87d5ff1..517ec18a1e5 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2403,6 +2403,7 @@ private: bool to_repair_db=false, bool read_only = false); void _close_db(bool read_only); + void _close_db_leave_bluefs(); int _open_fm(KeyValueDB::Transaction t, bool read_only, bool fm_restore = false); void _close_fm(); int _write_out_fm_meta(uint64_t target_size);