#include <thread>
#include <stack>
#include <gtest/gtest.h>
+#include "common/dout.h"
+#include "common/debug.h"
#include "global/global_init.h"
#include "common/ceph_argparse.h"
#include "include/stringify.h"
#include "common/errno.h"
#include "os/bluestore/Allocator.h"
+#include "os/bluestore/bluefs_types.h"
#include "os/bluestore/bluestore_common.h"
#include "os/bluestore/BlueFS.h"
fs.umount();
}
+// borrowed from store_test
+static bool bl_eq(bufferlist& expected, bufferlist& actual) {
+ if (expected.contents_equal(actual))
+ return true;
+
+ unsigned first = 0;
+ if(expected.length() != actual.length()) {
+ cout << "--- buffer lengths mismatch " << std::hex
+ << "expected 0x" << expected.length() << " != actual 0x"
+ << actual.length() << std::dec << std::endl;
+ }
+ auto len = std::min(expected.length(), actual.length());
+ while ( first<len && expected[first] == actual[first])
+ ++first;
+ unsigned last = len;
+ while (last > 0 && expected[last-1] == actual[last-1])
+ --last;
+ if(len > 0) {
+ cout << "--- buffer mismatch between offset 0x" << std::hex << first
+ << " and 0x" << last << ", total 0x" << len << std::dec
+ << std::endl;
+ cout << "--- expected:\n";
+ expected.hexdump(cout);
+ cout << "--- actual:\n";
+ actual.hexdump(cout);
+ }
+ return false;
+}
+
+TEST(BlueFS, test_wal_write) {
+ uint64_t size_wal = 1048576 * 64;
+ TempBdev bdev_wal{size_wal};
+ uint64_t size_db = 1048576 * 128;
+ TempBdev bdev_db{size_db};
+ uint64_t size_slow = 1048576 * 256;
+ TempBdev bdev_slow{size_slow};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_min_flush_size", "65536");
+ conf.ApplyChanges();
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+ string dir_db = "db.wal";
+ ASSERT_EQ(0, fs.mkdir(dir_db));
+
+ string wal_file = "wal1.log";
+ BlueFS::FileWriter *writer;
+ ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+ ASSERT_NE(nullptr, writer);
+
+ bufferlist bl1;
+ auto gen_debugable = [](size_t amount, bufferlist& bl) {
+ for (size_t i = 0; i < amount; i++) {
+ bl.append('a');
+ }
+ };
+ gen_debugable(70000, bl1);
+ fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+ fs.fsync(writer);
+
+ // WAL files don't update internal extents while writing to save memory, only on _replay
+ fs.umount();
+ fs.mount();
+
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ bufferlist read_bl;
+ fs.read(reader, 0, 70000, &read_bl, NULL);
+ ASSERT_TRUE(bl_eq(bl1, read_bl));
+ delete reader;
+ fs.umount();
+
+}
+
+TEST(BlueFS, test_wal_write_multiple) {
+ uint64_t size_wal = 1048576 * 64;
+ TempBdev bdev_wal{size_wal};
+ uint64_t size_db = 1048576 * 128;
+ TempBdev bdev_db{size_db};
+ uint64_t size_slow = 1048576 * 256;
+ TempBdev bdev_slow{size_slow};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_min_flush_size", "65536");
+ conf.ApplyChanges();
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+ string dir_db = "db.wal";
+ ASSERT_EQ(0, fs.mkdir(dir_db));
+
+ string wal_file = "wal1.log";
+ BlueFS::FileWriter *writer;
+ ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+ ASSERT_NE(nullptr, writer);
+
+ auto gen_debugable = [](size_t amount, bufferlist& bl, char c) {
+ for (size_t i = 0; i < amount; i++) {
+ bl.append(c);
+ }
+ };
+ size_t buffer_size = 70000;
+ for (int i = 0; i < 10; i++) {
+ bufferlist bl1;
+ gen_debugable(buffer_size, bl1, 'a' + i);
+ fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+ fs.fsync(writer);
+
+ // WAL files don't update internal extents while writing to save memory
+ fs.umount();
+ fs.mount();
+
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ bufferlist read_bl;
+ fs.read(reader, i * buffer_size, buffer_size, &read_bl, NULL);
+ ASSERT_TRUE(bl_eq(bl1, read_bl));
+ delete reader;
+ }
+ fs.umount();
+}
+
+TEST(BlueFS, test_wal_write_multiple_recover) {
+ uint64_t size_wal = 1048576 * 64;
+ TempBdev bdev_wal{size_wal};
+ uint64_t size_db = 1048576 * 128;
+ TempBdev bdev_db{size_db};
+ uint64_t size_slow = 1048576 * 256;
+ TempBdev bdev_slow{size_slow};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_min_flush_size", "65536");
+ conf.ApplyChanges();
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+ string dir_db = "db.wal";
+ ASSERT_EQ(0, fs.mkdir(dir_db));
+
+ string wal_file = "wal1.log";
+ BlueFS::FileWriter *writer;
+ ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+ ASSERT_NE(nullptr, writer);
+
+ auto gen_debugable = [](size_t amount, bufferlist& bl, char c) {
+ for (size_t i = 0; i < amount; i++) {
+ bl.append(c);
+ }
+ };
+ size_t buffer_size = 70000;
+ uint64_t flush_count = 10;
+ for (int i = 0; i < 10; i++) {
+ bufferlist bl1;
+ gen_debugable(buffer_size, bl1, 'a' + i);
+ fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+ fs.fsync(writer);
+ }
+ fs.close_writer(writer);
+
+ fs.umount();
+ fs.mount();
+
+ uint64_t size = 0;
+
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ fs.stat(dir_db, wal_file, &size, nullptr);
+ ASSERT_EQ(reader->file->wal_flushes.size(), flush_count);
+ ASSERT_EQ(size, 70000 * flush_count);
+ delete reader;
+}
+
+TEST(BlueFS, test_wal_write_multiple_recover_fsync_end) {
+ uint64_t size_wal = 1048576 * 64;
+ TempBdev bdev_wal{size_wal};
+ uint64_t size_db = 1048576 * 128;
+ TempBdev bdev_db{size_db};
+ uint64_t size_slow = 1048576 * 256;
+ TempBdev bdev_slow{size_slow};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_min_flush_size", "524288");
+ conf.ApplyChanges();
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+ string dir_db = "db.wal";
+ ASSERT_EQ(0, fs.mkdir(dir_db));
+
+ string wal_file = "wal1.log";
+ BlueFS::FileWriter *writer;
+ ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+ ASSERT_NE(nullptr, writer);
+
+ auto gen_debugable = [](size_t amount, bufferlist& bl, char c) {
+ for (size_t i = 0; i < amount; i++) {
+ bl.append(c);
+ }
+ };
+ size_t buffer_size = 8;
+ uint64_t flush_count = 1;
+ for (int i = 0; i < 10; i++) {
+ bufferlist bl1;
+ gen_debugable(buffer_size, bl1, 'a' + i);
+ fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+ }
+ fs.fsync(writer);
+ fs.close_writer(writer);
+
+ fs.umount();
+ fs.mount();
+
+ uint64_t size = 0;
+
+ { // test size
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ fs.stat(dir_db, wal_file, &size, nullptr);
+ ASSERT_EQ(reader->file->wal_flushes.size(), flush_count);
+ ASSERT_EQ(size, buffer_size * 10);
+ delete reader;
+ }
+
+ // test contents
+ for (int i = 0; i < 10; i++) {
+ bufferlist bl1;
+ gen_debugable(buffer_size, bl1, 'a' + i);
+
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ bufferlist read_bl;
+ fs.read(reader, i * buffer_size, buffer_size, &read_bl, NULL);
+ ASSERT_TRUE(bl_eq(bl1, read_bl));
+ delete reader;
+ }
+}
+
+TEST(BlueFS, test_wal_read_2_partial) {
+ uint64_t size_wal = 1048576 * 64;
+ TempBdev bdev_wal{size_wal};
+ uint64_t size_db = 1048576 * 128;
+ TempBdev bdev_db{size_db};
+ uint64_t size_slow = 1048576 * 256;
+ TempBdev bdev_slow{size_slow};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_min_flush_size", "65536");
+ conf.ApplyChanges();
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+ string dir_db = "db.wal";
+ ASSERT_EQ(0, fs.mkdir(dir_db));
+
+
+ auto gen_debugable_alternating = [](size_t amount, bufferlist& bl, char c) {
+ for (size_t i = 0; i < amount; i++) {
+ bl.append(c + (rand() % 10));
+ }
+ };
+ uint64_t flush_count = 2;
+ uint64_t flush_size = 65536;
+ uint64_t total_size = flush_count * flush_size;
+
+ vector<uint64_t> chunk_sizes = {total_size / 32, total_size / 64};
+
+ uint64_t chunk_size = flush_size;
+ bufferlist contents;
+ ASSERT_TRUE((total_size)%chunk_size == 0);
+
+ string wal_file("wal1.log");
+
+ BlueFS::FileWriter *writer;
+ ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+ ASSERT_NE(nullptr, writer);
+ //
+ uint64_t remaining = total_size;
+ while(remaining > 0) {
+ bufferlist bl1;
+ gen_debugable_alternating(chunk_size, bl1, 'a'+ (remaining % 10));
+ fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+ contents.append(bl1);
+ remaining -= chunk_size;
+ }
+ fs.fsync(writer);
+ fs.close_writer(writer);
+
+ fs.umount();
+ fs.mount();
+
+ uint64_t size = 0;
+
+
+ { // test size
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ fs.stat(dir_db, wal_file, &size, nullptr);
+ ASSERT_EQ(reader->file->wal_flushes.size(), flush_count);
+ ASSERT_EQ(size, total_size);
+ delete reader;
+ }
+
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ bufferlist read_bl;
+ uint64_t offset = chunk_size / 2;
+ uint64_t length = chunk_size;
+ fs.read(reader, offset, length, &read_bl, NULL);
+ bufferlist chunk_contents;
+ chunk_contents.substr_of(contents, offset, length);
+ ASSERT_TRUE(bl_eq(chunk_contents, read_bl));
+ delete reader;
+}
+
+TEST(BlueFS, test_wal_read_2_partial_compact) {
+ uint64_t size_wal = 1048576 * 64;
+ TempBdev bdev_wal{size_wal};
+ uint64_t size_db = 1048576 * 128;
+ TempBdev bdev_db{size_db};
+ uint64_t size_slow = 1048576 * 256;
+ TempBdev bdev_slow{size_slow};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_min_flush_size", "65536");
+ conf.ApplyChanges();
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+ string dir_db = "db.wal";
+ ASSERT_EQ(0, fs.mkdir(dir_db));
+
+
+ auto gen_debugable_alternating = [](size_t amount, bufferlist& bl, char c) {
+ for (size_t i = 0; i < amount; i++) {
+ bl.append(c + (rand() % 10));
+ }
+ };
+ uint64_t flush_count = 2;
+ uint64_t flush_size = 65536;
+ uint64_t total_size = flush_count * flush_size;
+
+ vector<uint64_t> chunk_sizes = {total_size / 32, total_size / 64};
+
+ uint64_t chunk_size = flush_size;
+ bufferlist contents;
+ ASSERT_TRUE((total_size)%chunk_size == 0);
+
+ string wal_file("wal1.log");
+
+ BlueFS::FileWriter *writer;
+ ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+ ASSERT_NE(nullptr, writer);
+ //
+ uint64_t remaining = total_size;
+ while(remaining > 0) {
+ bufferlist bl1;
+ gen_debugable_alternating(chunk_size, bl1, 'a'+ (remaining % 10));
+ fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+ contents.append(bl1);
+ remaining -= chunk_size;
+ }
+ fs.fsync(writer);
+ fs.close_writer(writer);
+
+ fs.compact_log();
+ fs.umount();
+ fs.mount();
+
+ uint64_t size = 0;
+
+
+ { // test size
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ fs.stat(dir_db, wal_file, &size, nullptr);
+ ASSERT_EQ(reader->file->wal_flushes.size(), flush_count);
+ ASSERT_EQ(size, total_size);
+ delete reader;
+ }
+
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ bufferlist read_bl;
+ uint64_t offset = chunk_size / 2;
+ uint64_t length = chunk_size;
+ fs.read(reader, offset, length, &read_bl, NULL);
+ bufferlist chunk_contents;
+ chunk_contents.substr_of(contents, offset, length);
+ ASSERT_TRUE(bl_eq(chunk_contents, read_bl));
+ delete reader;
+}
+
+TEST(BlueFS, test_wal_write_multiple_recover_partial_reads) {
+ // read from the middle of one flush and end somewhere in between other flush
+ uint64_t size_wal = 1048576 * 64;
+ TempBdev bdev_wal{size_wal};
+ uint64_t size_db = 1048576 * 128;
+ TempBdev bdev_db{size_db};
+ uint64_t size_slow = 1048576 * 256;
+ TempBdev bdev_slow{size_slow};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_min_flush_size", "524288");
+ conf.ApplyChanges();
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+ string dir_db = "db.wal";
+ ASSERT_EQ(0, fs.mkdir(dir_db));
+
+
+ auto gen_debugable_alternating = [](size_t amount, bufferlist& bl, char c) {
+ for (size_t i = 0; i < amount; i++) {
+ bl.append(c + (i % 10));
+ }
+ };
+ uint64_t flush_count = 2;
+ uint64_t flush_size = 524288;
+ uint64_t total_size = flush_count * flush_size;
+
+ vector<uint64_t> chunk_sizes = {total_size / 32, total_size / 64};
+
+ uint64_t fileid = 0;
+ for (auto chunk_size : chunk_sizes) {
+ bufferlist contents;
+ fileid++;
+ ASSERT_TRUE((total_size)%chunk_size == 0);
+
+ string wal_file("wal");
+ wal_file += to_string(fileid);
+ wal_file += ".log";
+
+ BlueFS::FileWriter *writer;
+ ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+ ASSERT_NE(nullptr, writer);
+ //
+ uint64_t remaining = total_size;
+ while(remaining > 0) {
+ bufferlist bl1;
+ gen_debugable_alternating(chunk_size, bl1, 'a'+ (remaining % 10));
+ fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+ contents.append(bl1);
+ remaining -= chunk_size;
+ }
+ fs.fsync(writer);
+ fs.close_writer(writer);
+
+ fs.umount();
+ fs.mount();
+
+ uint64_t size = 0;
+
+
+ { // test size
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ fs.stat(dir_db, wal_file, &size, nullptr);
+ ASSERT_EQ(reader->file->wal_flushes.size(), flush_count);
+ ASSERT_EQ(size, total_size);
+ delete reader;
+ }
+
+ uint64_t read_chunks = size / chunk_size; // WAL default chunk size is 32k
+ for (uint64_t chunk = 0; chunk < read_chunks; chunk++) {
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ bufferlist read_bl;
+ uint64_t offset = chunk * chunk_size;
+ uint64_t length = chunk_size;
+ fs.read(reader, offset, length, &read_bl, NULL);
+ bufferlist chunk_contents;
+ chunk_contents.substr_of(contents, offset, length);
+ ASSERT_TRUE(bl_eq(chunk_contents, read_bl));
+ delete reader;
+ }
+
+ }
+}
+
+
+TEST(BlueFS, test_wal_write_truncate) {
+ uint64_t size_wal = 1048576 * 64;
+ TempBdev bdev_wal{size_wal};
+ uint64_t size_db = 1048576 * 128;
+ TempBdev bdev_db{size_db};
+ uint64_t size_slow = 1048576 * 256;
+ TempBdev bdev_slow{size_slow};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_min_flush_size", "65536");
+ conf.ApplyChanges();
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+ string dir_db = "db.wal";
+ ASSERT_EQ(0, fs.mkdir(dir_db));
+
+ string wal_file = "wal1.log";
+ BlueFS::FileWriter *writer;
+ ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+ ASSERT_NE(nullptr, writer);
+
+ bufferlist bl1;
+ auto gen_debugable = [](size_t amount, bufferlist& bl) {
+ for (size_t i = 0; i < amount; i++) {
+ bl.append('a');
+ }
+ };
+ gen_debugable(70000, bl1);
+ fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+ fs.fsync(writer);
+ fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+ fs.fsync(writer);
+ fs.truncate(writer, 70000+BlueFS::File::WALFlush::extra_envelope_size_on_front_and_tail());
+ fs.fsync(writer);
+
+ fs.umount();
+ fs.mount();
+
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ ASSERT_EQ(reader->file->fnode.wal_limit, 70000+BlueFS::File::WALFlush::extra_envelope_size_on_front_and_tail());
+ ASSERT_EQ(reader->file->wal_flushes.size(), 1);
+ bufferlist read_bl;
+ fs.read(reader, 0, 70000, &read_bl, NULL);
+ ASSERT_TRUE(bl_eq(bl1, read_bl));
+ delete reader;
+ fs.umount();
+
+}
+
+TEST(BlueFS, test_wal_read_after_rollback_to_v1) {
+ // test whether we still read with v2 version even though new files will be v1
+ uint64_t size_wal = 1048576 * 64;
+ TempBdev bdev_wal{size_wal};
+ uint64_t size_db = 1048576 * 128;
+ TempBdev bdev_db{size_db};
+ uint64_t size_slow = 1048576 * 256;
+ TempBdev bdev_slow{size_slow};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_min_flush_size", "65536");
+ conf.SetVal("bluefs_wal_v2", "true");
+ conf.ApplyChanges();
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+ string dir_db = "db.wal";
+ ASSERT_EQ(0, fs.mkdir(dir_db));
+
+ string wal_file = "wal1.log";
+ BlueFS::FileWriter *writer;
+ ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+ ASSERT_EQ(writer->file->fnode.type, WAL_V2);
+ ASSERT_NE(nullptr, writer);
+
+ bufferlist bl1;
+ auto gen_debugable = [](size_t amount, bufferlist& bl) {
+ for (size_t i = 0; i < amount; i++) {
+ bl.append('a');
+ }
+ };
+ gen_debugable(70000, bl1);
+ fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+ fs.fsync(writer);
+
+ g_ceph_context->_conf.set_val("bluefs_wal_v2", "false");
+ fs.umount();
+ fs.mount();
+
+ BlueFS::FileReader *reader;
+ ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+ bufferlist read_bl;
+ fs.read(reader, 0, 70000, &read_bl, NULL);
+ ASSERT_TRUE(bl_eq(bl1, read_bl));
+ delete reader;
+
+ {
+ // open another file to ensure v1 is set correctly
+ string wal_file = "wal2.log";
+ BlueFS::FileWriter *writer;
+ ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+ ASSERT_NE(nullptr, writer);
+ ASSERT_EQ(writer->file->fnode.type, REGULAR);
+ }
+ fs.umount();
+}
+
TEST(BlueFS, test_truncate_stable_53129) {
ConfSaver conf(g_ceph_context->_conf);