From 90bf20764dbb7386b4f979a5bf641fc0fcbdf7fa Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 31 Jul 2024 12:17:30 +0200 Subject: [PATCH] os/bluestore: test bluefs wal v2 Signed-off-by: Pere Diaz Bou (cherry picked from commit 4961dca7c7c8e8675ad82e2124b1a0c334e22b57) --- src/test/objectstore/test_bluefs.cc | 655 ++++++++++++++++++++++++++++ 1 file changed, 655 insertions(+) diff --git a/src/test/objectstore/test_bluefs.cc b/src/test/objectstore/test_bluefs.cc index 3e55d1392da2a..8e21e878923bb 100644 --- a/src/test/objectstore/test_bluefs.cc +++ b/src/test/objectstore/test_bluefs.cc @@ -11,6 +11,8 @@ #include #include #include +#include "common/dout.h" +#include "common/debug.h" #include "global/global_init.h" #include "common/ceph_argparse.h" #include "include/stringify.h" @@ -18,6 +20,7 @@ #include "common/errno.h" #include "os/bluestore/Allocator.h" +#include "os/bluestore/bluefs_types.h" #include "os/bluestore/bluestore_common.h" #include "os/bluestore/BlueFS.h" @@ -891,6 +894,658 @@ TEST(BlueFS, test_tracker_50965) { fs.umount(); } +// borrowed from store_test +static bool bl_eq(bufferlist& expected, bufferlist& actual) { + if (expected.contents_equal(actual)) + return true; + + unsigned first = 0; + if(expected.length() != actual.length()) { + cout << "--- buffer lengths mismatch " << std::hex + << "expected 0x" << expected.length() << " != actual 0x" + << actual.length() << std::dec << std::endl; + } + auto len = std::min(expected.length(), actual.length()); + while ( first 0 && expected[last-1] == actual[last-1]) + --last; + if(len > 0) { + cout << "--- buffer mismatch between offset 0x" << std::hex << first + << " and 0x" << last << ", total 0x" << len << std::dec + << std::endl; + cout << "--- expected:\n"; + expected.hexdump(cout); + cout << "--- actual:\n"; + actual.hexdump(cout); + } + return false; +} + +TEST(BlueFS, test_wal_write) { + uint64_t size_wal = 1048576 * 64; + TempBdev bdev_wal{size_wal}; + uint64_t size_db = 1048576 * 128; + TempBdev bdev_db{size_db}; + uint64_t size_slow = 1048576 * 256; + TempBdev bdev_slow{size_slow}; + + ConfSaver conf(g_ceph_context->_conf); + conf.SetVal("bluefs_min_flush_size", "65536"); + conf.ApplyChanges(); + + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false)); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); + + string dir_db = "db.wal"; + ASSERT_EQ(0, fs.mkdir(dir_db)); + + string wal_file = "wal1.log"; + BlueFS::FileWriter *writer; + ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false)); + ASSERT_NE(nullptr, writer); + + bufferlist bl1; + auto gen_debugable = [](size_t amount, bufferlist& bl) { + for (size_t i = 0; i < amount; i++) { + bl.append('a'); + } + }; + gen_debugable(70000, bl1); + fs.append_try_flush(writer, bl1.c_str(), bl1.length()); + fs.fsync(writer); + + // WAL files don't update internal extents while writing to save memory, only on _replay + fs.umount(); + fs.mount(); + + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + bufferlist read_bl; + fs.read(reader, 0, 70000, &read_bl, NULL); + ASSERT_TRUE(bl_eq(bl1, read_bl)); + delete reader; + fs.umount(); + +} + +TEST(BlueFS, test_wal_write_multiple) { + uint64_t size_wal = 1048576 * 64; + TempBdev bdev_wal{size_wal}; + uint64_t size_db = 1048576 * 128; + TempBdev bdev_db{size_db}; + uint64_t size_slow = 1048576 * 256; + TempBdev bdev_slow{size_slow}; + + ConfSaver conf(g_ceph_context->_conf); + conf.SetVal("bluefs_min_flush_size", "65536"); + conf.ApplyChanges(); + + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false)); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); + + string dir_db = "db.wal"; + ASSERT_EQ(0, fs.mkdir(dir_db)); + + string wal_file = "wal1.log"; + BlueFS::FileWriter *writer; + ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false)); + ASSERT_NE(nullptr, writer); + + auto gen_debugable = [](size_t amount, bufferlist& bl, char c) { + for (size_t i = 0; i < amount; i++) { + bl.append(c); + } + }; + size_t buffer_size = 70000; + for (int i = 0; i < 10; i++) { + bufferlist bl1; + gen_debugable(buffer_size, bl1, 'a' + i); + fs.append_try_flush(writer, bl1.c_str(), bl1.length()); + fs.fsync(writer); + + // WAL files don't update internal extents while writing to save memory + fs.umount(); + fs.mount(); + + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + bufferlist read_bl; + fs.read(reader, i * buffer_size, buffer_size, &read_bl, NULL); + ASSERT_TRUE(bl_eq(bl1, read_bl)); + delete reader; + } + fs.umount(); +} + +TEST(BlueFS, test_wal_write_multiple_recover) { + uint64_t size_wal = 1048576 * 64; + TempBdev bdev_wal{size_wal}; + uint64_t size_db = 1048576 * 128; + TempBdev bdev_db{size_db}; + uint64_t size_slow = 1048576 * 256; + TempBdev bdev_slow{size_slow}; + + ConfSaver conf(g_ceph_context->_conf); + conf.SetVal("bluefs_min_flush_size", "65536"); + conf.ApplyChanges(); + + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false)); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); + + string dir_db = "db.wal"; + ASSERT_EQ(0, fs.mkdir(dir_db)); + + string wal_file = "wal1.log"; + BlueFS::FileWriter *writer; + ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false)); + ASSERT_NE(nullptr, writer); + + auto gen_debugable = [](size_t amount, bufferlist& bl, char c) { + for (size_t i = 0; i < amount; i++) { + bl.append(c); + } + }; + size_t buffer_size = 70000; + uint64_t flush_count = 10; + for (int i = 0; i < 10; i++) { + bufferlist bl1; + gen_debugable(buffer_size, bl1, 'a' + i); + fs.append_try_flush(writer, bl1.c_str(), bl1.length()); + fs.fsync(writer); + } + fs.close_writer(writer); + + fs.umount(); + fs.mount(); + + uint64_t size = 0; + + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + fs.stat(dir_db, wal_file, &size, nullptr); + ASSERT_EQ(reader->file->wal_flushes.size(), flush_count); + ASSERT_EQ(size, 70000 * flush_count); + delete reader; +} + +TEST(BlueFS, test_wal_write_multiple_recover_fsync_end) { + uint64_t size_wal = 1048576 * 64; + TempBdev bdev_wal{size_wal}; + uint64_t size_db = 1048576 * 128; + TempBdev bdev_db{size_db}; + uint64_t size_slow = 1048576 * 256; + TempBdev bdev_slow{size_slow}; + + ConfSaver conf(g_ceph_context->_conf); + conf.SetVal("bluefs_min_flush_size", "524288"); + conf.ApplyChanges(); + + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false)); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); + + string dir_db = "db.wal"; + ASSERT_EQ(0, fs.mkdir(dir_db)); + + string wal_file = "wal1.log"; + BlueFS::FileWriter *writer; + ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false)); + ASSERT_NE(nullptr, writer); + + auto gen_debugable = [](size_t amount, bufferlist& bl, char c) { + for (size_t i = 0; i < amount; i++) { + bl.append(c); + } + }; + size_t buffer_size = 8; + uint64_t flush_count = 1; + for (int i = 0; i < 10; i++) { + bufferlist bl1; + gen_debugable(buffer_size, bl1, 'a' + i); + fs.append_try_flush(writer, bl1.c_str(), bl1.length()); + } + fs.fsync(writer); + fs.close_writer(writer); + + fs.umount(); + fs.mount(); + + uint64_t size = 0; + + { // test size + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + fs.stat(dir_db, wal_file, &size, nullptr); + ASSERT_EQ(reader->file->wal_flushes.size(), flush_count); + ASSERT_EQ(size, buffer_size * 10); + delete reader; + } + + // test contents + for (int i = 0; i < 10; i++) { + bufferlist bl1; + gen_debugable(buffer_size, bl1, 'a' + i); + + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + bufferlist read_bl; + fs.read(reader, i * buffer_size, buffer_size, &read_bl, NULL); + ASSERT_TRUE(bl_eq(bl1, read_bl)); + delete reader; + } +} + +TEST(BlueFS, test_wal_read_2_partial) { + uint64_t size_wal = 1048576 * 64; + TempBdev bdev_wal{size_wal}; + uint64_t size_db = 1048576 * 128; + TempBdev bdev_db{size_db}; + uint64_t size_slow = 1048576 * 256; + TempBdev bdev_slow{size_slow}; + + ConfSaver conf(g_ceph_context->_conf); + conf.SetVal("bluefs_min_flush_size", "65536"); + conf.ApplyChanges(); + + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false)); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); + + string dir_db = "db.wal"; + ASSERT_EQ(0, fs.mkdir(dir_db)); + + + auto gen_debugable_alternating = [](size_t amount, bufferlist& bl, char c) { + for (size_t i = 0; i < amount; i++) { + bl.append(c + (rand() % 10)); + } + }; + uint64_t flush_count = 2; + uint64_t flush_size = 65536; + uint64_t total_size = flush_count * flush_size; + + vector chunk_sizes = {total_size / 32, total_size / 64}; + + uint64_t chunk_size = flush_size; + bufferlist contents; + ASSERT_TRUE((total_size)%chunk_size == 0); + + string wal_file("wal1.log"); + + BlueFS::FileWriter *writer; + ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false)); + ASSERT_NE(nullptr, writer); + // + uint64_t remaining = total_size; + while(remaining > 0) { + bufferlist bl1; + gen_debugable_alternating(chunk_size, bl1, 'a'+ (remaining % 10)); + fs.append_try_flush(writer, bl1.c_str(), bl1.length()); + contents.append(bl1); + remaining -= chunk_size; + } + fs.fsync(writer); + fs.close_writer(writer); + + fs.umount(); + fs.mount(); + + uint64_t size = 0; + + + { // test size + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + fs.stat(dir_db, wal_file, &size, nullptr); + ASSERT_EQ(reader->file->wal_flushes.size(), flush_count); + ASSERT_EQ(size, total_size); + delete reader; + } + + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + bufferlist read_bl; + uint64_t offset = chunk_size / 2; + uint64_t length = chunk_size; + fs.read(reader, offset, length, &read_bl, NULL); + bufferlist chunk_contents; + chunk_contents.substr_of(contents, offset, length); + ASSERT_TRUE(bl_eq(chunk_contents, read_bl)); + delete reader; +} + +TEST(BlueFS, test_wal_read_2_partial_compact) { + uint64_t size_wal = 1048576 * 64; + TempBdev bdev_wal{size_wal}; + uint64_t size_db = 1048576 * 128; + TempBdev bdev_db{size_db}; + uint64_t size_slow = 1048576 * 256; + TempBdev bdev_slow{size_slow}; + + ConfSaver conf(g_ceph_context->_conf); + conf.SetVal("bluefs_min_flush_size", "65536"); + conf.ApplyChanges(); + + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false)); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); + + string dir_db = "db.wal"; + ASSERT_EQ(0, fs.mkdir(dir_db)); + + + auto gen_debugable_alternating = [](size_t amount, bufferlist& bl, char c) { + for (size_t i = 0; i < amount; i++) { + bl.append(c + (rand() % 10)); + } + }; + uint64_t flush_count = 2; + uint64_t flush_size = 65536; + uint64_t total_size = flush_count * flush_size; + + vector chunk_sizes = {total_size / 32, total_size / 64}; + + uint64_t chunk_size = flush_size; + bufferlist contents; + ASSERT_TRUE((total_size)%chunk_size == 0); + + string wal_file("wal1.log"); + + BlueFS::FileWriter *writer; + ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false)); + ASSERT_NE(nullptr, writer); + // + uint64_t remaining = total_size; + while(remaining > 0) { + bufferlist bl1; + gen_debugable_alternating(chunk_size, bl1, 'a'+ (remaining % 10)); + fs.append_try_flush(writer, bl1.c_str(), bl1.length()); + contents.append(bl1); + remaining -= chunk_size; + } + fs.fsync(writer); + fs.close_writer(writer); + + fs.compact_log(); + fs.umount(); + fs.mount(); + + uint64_t size = 0; + + + { // test size + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + fs.stat(dir_db, wal_file, &size, nullptr); + ASSERT_EQ(reader->file->wal_flushes.size(), flush_count); + ASSERT_EQ(size, total_size); + delete reader; + } + + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + bufferlist read_bl; + uint64_t offset = chunk_size / 2; + uint64_t length = chunk_size; + fs.read(reader, offset, length, &read_bl, NULL); + bufferlist chunk_contents; + chunk_contents.substr_of(contents, offset, length); + ASSERT_TRUE(bl_eq(chunk_contents, read_bl)); + delete reader; +} + +TEST(BlueFS, test_wal_write_multiple_recover_partial_reads) { + // read from the middle of one flush and end somewhere in between other flush + uint64_t size_wal = 1048576 * 64; + TempBdev bdev_wal{size_wal}; + uint64_t size_db = 1048576 * 128; + TempBdev bdev_db{size_db}; + uint64_t size_slow = 1048576 * 256; + TempBdev bdev_slow{size_slow}; + + ConfSaver conf(g_ceph_context->_conf); + conf.SetVal("bluefs_min_flush_size", "524288"); + conf.ApplyChanges(); + + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false)); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); + + string dir_db = "db.wal"; + ASSERT_EQ(0, fs.mkdir(dir_db)); + + + auto gen_debugable_alternating = [](size_t amount, bufferlist& bl, char c) { + for (size_t i = 0; i < amount; i++) { + bl.append(c + (i % 10)); + } + }; + uint64_t flush_count = 2; + uint64_t flush_size = 524288; + uint64_t total_size = flush_count * flush_size; + + vector chunk_sizes = {total_size / 32, total_size / 64}; + + uint64_t fileid = 0; + for (auto chunk_size : chunk_sizes) { + bufferlist contents; + fileid++; + ASSERT_TRUE((total_size)%chunk_size == 0); + + string wal_file("wal"); + wal_file += to_string(fileid); + wal_file += ".log"; + + BlueFS::FileWriter *writer; + ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false)); + ASSERT_NE(nullptr, writer); + // + uint64_t remaining = total_size; + while(remaining > 0) { + bufferlist bl1; + gen_debugable_alternating(chunk_size, bl1, 'a'+ (remaining % 10)); + fs.append_try_flush(writer, bl1.c_str(), bl1.length()); + contents.append(bl1); + remaining -= chunk_size; + } + fs.fsync(writer); + fs.close_writer(writer); + + fs.umount(); + fs.mount(); + + uint64_t size = 0; + + + { // test size + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + fs.stat(dir_db, wal_file, &size, nullptr); + ASSERT_EQ(reader->file->wal_flushes.size(), flush_count); + ASSERT_EQ(size, total_size); + delete reader; + } + + uint64_t read_chunks = size / chunk_size; // WAL default chunk size is 32k + for (uint64_t chunk = 0; chunk < read_chunks; chunk++) { + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + bufferlist read_bl; + uint64_t offset = chunk * chunk_size; + uint64_t length = chunk_size; + fs.read(reader, offset, length, &read_bl, NULL); + bufferlist chunk_contents; + chunk_contents.substr_of(contents, offset, length); + ASSERT_TRUE(bl_eq(chunk_contents, read_bl)); + delete reader; + } + + } +} + + +TEST(BlueFS, test_wal_write_truncate) { + uint64_t size_wal = 1048576 * 64; + TempBdev bdev_wal{size_wal}; + uint64_t size_db = 1048576 * 128; + TempBdev bdev_db{size_db}; + uint64_t size_slow = 1048576 * 256; + TempBdev bdev_slow{size_slow}; + + ConfSaver conf(g_ceph_context->_conf); + conf.SetVal("bluefs_min_flush_size", "65536"); + conf.ApplyChanges(); + + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false)); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); + + string dir_db = "db.wal"; + ASSERT_EQ(0, fs.mkdir(dir_db)); + + string wal_file = "wal1.log"; + BlueFS::FileWriter *writer; + ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false)); + ASSERT_NE(nullptr, writer); + + bufferlist bl1; + auto gen_debugable = [](size_t amount, bufferlist& bl) { + for (size_t i = 0; i < amount; i++) { + bl.append('a'); + } + }; + gen_debugable(70000, bl1); + fs.append_try_flush(writer, bl1.c_str(), bl1.length()); + fs.fsync(writer); + fs.append_try_flush(writer, bl1.c_str(), bl1.length()); + fs.fsync(writer); + fs.truncate(writer, 70000+BlueFS::File::WALFlush::extra_envelope_size_on_front_and_tail()); + fs.fsync(writer); + + fs.umount(); + fs.mount(); + + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + ASSERT_EQ(reader->file->fnode.wal_limit, 70000+BlueFS::File::WALFlush::extra_envelope_size_on_front_and_tail()); + ASSERT_EQ(reader->file->wal_flushes.size(), 1); + bufferlist read_bl; + fs.read(reader, 0, 70000, &read_bl, NULL); + ASSERT_TRUE(bl_eq(bl1, read_bl)); + delete reader; + fs.umount(); + +} + +TEST(BlueFS, test_wal_read_after_rollback_to_v1) { + // test whether we still read with v2 version even though new files will be v1 + uint64_t size_wal = 1048576 * 64; + TempBdev bdev_wal{size_wal}; + uint64_t size_db = 1048576 * 128; + TempBdev bdev_db{size_db}; + uint64_t size_slow = 1048576 * 256; + TempBdev bdev_slow{size_slow}; + + ConfSaver conf(g_ceph_context->_conf); + conf.SetVal("bluefs_min_flush_size", "65536"); + conf.SetVal("bluefs_wal_v2", "true"); + conf.ApplyChanges(); + + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false)); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false)); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); + + string dir_db = "db.wal"; + ASSERT_EQ(0, fs.mkdir(dir_db)); + + string wal_file = "wal1.log"; + BlueFS::FileWriter *writer; + ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false)); + ASSERT_EQ(writer->file->fnode.type, WAL_V2); + ASSERT_NE(nullptr, writer); + + bufferlist bl1; + auto gen_debugable = [](size_t amount, bufferlist& bl) { + for (size_t i = 0; i < amount; i++) { + bl.append('a'); + } + }; + gen_debugable(70000, bl1); + fs.append_try_flush(writer, bl1.c_str(), bl1.length()); + fs.fsync(writer); + + g_ceph_context->_conf.set_val("bluefs_wal_v2", "false"); + fs.umount(); + fs.mount(); + + BlueFS::FileReader *reader; + ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader)); + bufferlist read_bl; + fs.read(reader, 0, 70000, &read_bl, NULL); + ASSERT_TRUE(bl_eq(bl1, read_bl)); + delete reader; + + { + // open another file to ensure v1 is set correctly + string wal_file = "wal2.log"; + BlueFS::FileWriter *writer; + ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false)); + ASSERT_NE(nullptr, writer); + ASSERT_EQ(writer->file->fnode.type, REGULAR); + } + fs.umount(); +} + TEST(BlueFS, test_truncate_stable_53129) { ConfSaver conf(g_ceph_context->_conf); -- 2.39.5