From: Igor Fedotov Date: Fri, 5 Nov 2021 09:38:51 +0000 (+0300) Subject: os/bluestore: do not select absent device in volume selector X-Git-Tag: v16.2.7~24^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=edbdfd5002d4cf5895adbb9f87b7817b14272e19;p=ceph.git os/bluestore: do not select absent device in volume selector Fixes: ttps://tracker.ceph.com/issues/53139 Signed-off-by: Igor Fedotov (cherry picked from commit 39132b0e460d2d667f02019335dced50d5db641d) --- diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 16b64636b891..37a000ac6ef6 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -3819,10 +3819,9 @@ void* OriginalVolumeSelector::get_hint_by_dir(std::string_view dirname) const { // match up with bluestore. the slow device is always the second // one (when a dedicated block.db device is present and used at // bdev 0). the wal device is always last. - if (boost::algorithm::ends_with(dirname, ".slow")) { + if (boost::algorithm::ends_with(dirname, ".slow") && slow_total) { res = BlueFS::BDEV_SLOW; - } - else if (boost::algorithm::ends_with(dirname, ".wal")) { + } else if (boost::algorithm::ends_with(dirname, ".wal") && wal_total) { res = BlueFS::BDEV_WAL; } } diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 04d67cabe479..77a12bd51009 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -9176,6 +9176,23 @@ void BlueStore::inject_zombie_spanning_blob(coll_t cid, ghobject_t oid, db->submit_transaction_sync(txn); } +void BlueStore::inject_bluefs_file(std::string_view dir, std::string_view name, size_t new_size) +{ + ceph_assert(bluefs); + + BlueFS::FileWriter* p_handle = nullptr; + auto ret = bluefs->open_for_write(dir, name, &p_handle, false); + ceph_assert(ret == 0); + + std::string s('0', new_size); + bufferlist bl; + bl.append(s); + p_handle->append(bl); + + bluefs->fsync(p_handle); + bluefs->close_writer(p_handle); +} + void BlueStore::collect_metadata(map *pm) { dout(10) << __func__ << dendl; @@ -16534,8 +16551,13 @@ uint8_t RocksDBBlueFSVolumeSelector::select_prefer_bdev(void* h) { void RocksDBBlueFSVolumeSelector::get_paths(const std::string& base, paths& res) const { - res.emplace_back(base, l_totals[LEVEL_DB - LEVEL_FIRST]); - res.emplace_back(base + ".slow", l_totals[LEVEL_SLOW - LEVEL_FIRST]); + auto db_size = l_totals[LEVEL_DB - LEVEL_FIRST]; + res.emplace_back(base, db_size); + auto slow_size = l_totals[LEVEL_SLOW - LEVEL_FIRST]; + if (slow_size == 0) { + slow_size = db_size; + } + res.emplace_back(base + ".slow", slow_size); } void* RocksDBBlueFSVolumeSelector::get_hint_by_dir(std::string_view dirname) const { diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 3783f12c928d..0a5c451e5602 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -3024,6 +3024,10 @@ public: // resets per_pool_omap | pgmeta_omap for onode void inject_legacy_omap(coll_t cid, ghobject_t oid); + void inject_bluefs_file(std::string_view dir, + std::string_view name, + size_t new_size); + void compact() override { ceph_assert(db); db->compact(); diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index 069d72101b5d..c3926a3bf4a4 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -9525,6 +9525,69 @@ TEST_P(StoreTestSpecificAUSize, OmapUpgradeTest) { } } +TEST_P(StoreTestSpecificAUSize, BluefsWriteInSingleDiskEnvTest) { + if (string(GetParam()) != "bluestore") + return; + + g_conf().apply_changes(nullptr); + + StartDeferred(0x1000); + + BlueStore* bstore = dynamic_cast (store.get()); + ceph_assert(bstore); + bstore->inject_bluefs_file("db.slow", "store_test_injection_slow", 1 << 20ul); + bstore->inject_bluefs_file("db.wal", "store_test_injection_wal", 1 << 20ul); + bstore->inject_bluefs_file("db", "store_test_injection_wal", 1 << 20ul); + + AdminSocket* admin_socket = g_ceph_context->get_admin_socket(); + ceph_assert(admin_socket); + + ceph::bufferlist in, out; + ostringstream err; + auto r = admin_socket->execute_command( + { "{\"prefix\": \"bluefs stats\"}" }, + in, err, &out); + if (r != 0) { + cerr << "failure querying: " << cpp_strerror(r) << std::endl; + } else { + std::cout << std::string(out.c_str(), out.length()) << std::endl; + } +} + +TEST_P(StoreTestSpecificAUSize, BluefsWriteInNoWalDiskEnvTest) { + if (string(GetParam()) != "bluestore") + return; + + SetVal(g_conf(), "bluestore_block_db_path", "db"); + SetVal(g_conf(), "bluestore_block_db_size", stringify(1ull << 31).c_str()); + SetVal(g_conf(), "bluestore_block_db_create", "true"); + + g_conf().apply_changes(nullptr); + + StartDeferred(0x1000); + + BlueStore* bstore = dynamic_cast (store.get()); + ceph_assert(bstore); + bstore->inject_bluefs_file("db.slow", "store_test_injection_slow", 1 << 20ul); + bstore->inject_bluefs_file("db.wal", "store_test_injection_wal", 1 << 20ul); + bstore->inject_bluefs_file("db", "store_test_injection_wal", 1 << 20ul); + + AdminSocket* admin_socket = g_ceph_context->get_admin_socket(); + ceph_assert(admin_socket); + + ceph::bufferlist in, out; + ostringstream err; + auto r = admin_socket->execute_command( + { "{\"prefix\": \"bluefs stats\"}" }, + in, err, &out); + if (r != 0) { + cerr << "failure querying: " << cpp_strerror(r) << std::endl; + } + else { + std::cout << std::string(out.c_str(), out.length()) << std::endl; + } +} + #endif // WITH_BLUESTORE int main(int argc, char **argv) {