]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
os/bluestore: test bluefs wal v2
authorPere Diaz Bou <pere-altea@hotmail.com>
Wed, 31 Jul 2024 10:17:30 +0000 (12:17 +0200)
committerAdam Kupczyk <akupczyk@ibm.com>
Fri, 28 Mar 2025 16:06:02 +0000 (16:06 +0000)
Signed-off-by: Pere Diaz Bou <pere-altea@hotmail.com>
(cherry picked from commit 4961dca7c7c8e8675ad82e2124b1a0c334e22b57)

src/test/objectstore/test_bluefs.cc

index 3e55d1392da2adfd38a12d388a90740ab077d00e..8e21e878923bb321cc59aa0031210219fe2ea1f1 100644 (file)
@@ -11,6 +11,8 @@
 #include <thread>
 #include <stack>
 #include <gtest/gtest.h>
+#include "common/dout.h"
+#include "common/debug.h"
 #include "global/global_init.h"
 #include "common/ceph_argparse.h"
 #include "include/stringify.h"
@@ -18,6 +20,7 @@
 #include "common/errno.h"
 
 #include "os/bluestore/Allocator.h"
+#include "os/bluestore/bluefs_types.h"
 #include "os/bluestore/bluestore_common.h"
 #include "os/bluestore/BlueFS.h"
 
@@ -891,6 +894,658 @@ TEST(BlueFS, test_tracker_50965) {
   fs.umount();
 }
 
+// borrowed from store_test
+static bool bl_eq(bufferlist& expected, bufferlist& actual) {
+  if (expected.contents_equal(actual))
+    return true;
+
+  unsigned first = 0;
+  if(expected.length() != actual.length()) {
+    cout << "--- buffer lengths mismatch " << std::hex
+         << "expected 0x" << expected.length() << " != actual 0x"
+         << actual.length() << std::dec << std::endl;
+  }
+  auto len = std::min(expected.length(), actual.length());
+  while ( first<len && expected[first] == actual[first])
+    ++first;
+  unsigned last = len;
+  while (last > 0 && expected[last-1] == actual[last-1])
+    --last;
+  if(len > 0) {
+    cout << "--- buffer mismatch between offset 0x" << std::hex << first
+         << " and 0x" << last << ", total 0x" << len << std::dec
+         << std::endl;
+    cout << "--- expected:\n";
+    expected.hexdump(cout);
+    cout << "--- actual:\n";
+    actual.hexdump(cout);
+  }
+  return false;
+}
+
+TEST(BlueFS, test_wal_write) {
+  uint64_t size_wal = 1048576 * 64;
+  TempBdev bdev_wal{size_wal};
+  uint64_t size_db = 1048576 * 128;
+  TempBdev bdev_db{size_db};
+  uint64_t size_slow = 1048576 * 256;
+  TempBdev bdev_slow{size_slow};
+
+  ConfSaver conf(g_ceph_context->_conf);
+  conf.SetVal("bluefs_min_flush_size", "65536");
+  conf.ApplyChanges();
+
+  BlueFS fs(g_ceph_context);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL,  bdev_wal.path,  false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB,   bdev_db.path,   false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+  uuid_d fsid;
+  ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+  ASSERT_EQ(0, fs.mount());
+  ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+  string dir_db = "db.wal";
+  ASSERT_EQ(0, fs.mkdir(dir_db));
+
+  string wal_file = "wal1.log";
+  BlueFS::FileWriter *writer;
+  ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+  ASSERT_NE(nullptr, writer);
+
+  bufferlist bl1;
+  auto gen_debugable = [](size_t amount, bufferlist& bl) {
+    for (size_t i = 0; i < amount; i++) {
+      bl.append('a');
+    }
+  };
+  gen_debugable(70000, bl1);
+  fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+  fs.fsync(writer);
+
+  // WAL files don't update internal extents while writing to save memory, only on _replay
+  fs.umount();
+  fs.mount();
+
+  BlueFS::FileReader *reader;
+  ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+  bufferlist read_bl;
+  fs.read(reader, 0, 70000, &read_bl, NULL);
+  ASSERT_TRUE(bl_eq(bl1, read_bl));
+  delete reader;
+  fs.umount();
+
+}
+
+TEST(BlueFS, test_wal_write_multiple) {
+  uint64_t size_wal = 1048576 * 64;
+  TempBdev bdev_wal{size_wal};
+  uint64_t size_db = 1048576 * 128;
+  TempBdev bdev_db{size_db};
+  uint64_t size_slow = 1048576 * 256;
+  TempBdev bdev_slow{size_slow};
+
+  ConfSaver conf(g_ceph_context->_conf);
+  conf.SetVal("bluefs_min_flush_size", "65536");
+  conf.ApplyChanges();
+
+  BlueFS fs(g_ceph_context);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL,  bdev_wal.path,  false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB,   bdev_db.path,   false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+  uuid_d fsid;
+  ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+  ASSERT_EQ(0, fs.mount());
+  ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+  string dir_db = "db.wal";
+  ASSERT_EQ(0, fs.mkdir(dir_db));
+
+  string wal_file = "wal1.log";
+  BlueFS::FileWriter *writer;
+  ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+  ASSERT_NE(nullptr, writer);
+
+  auto gen_debugable = [](size_t amount, bufferlist& bl, char c) {
+    for (size_t i = 0; i < amount; i++) {
+      bl.append(c);
+    }
+  };
+  size_t buffer_size = 70000;
+  for (int i = 0; i < 10; i++) {
+    bufferlist bl1;
+    gen_debugable(buffer_size, bl1, 'a' + i);
+    fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+    fs.fsync(writer);
+
+    // WAL files don't update internal extents while writing to save memory
+    fs.umount();
+    fs.mount();
+
+    BlueFS::FileReader *reader;
+    ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+    bufferlist read_bl;
+    fs.read(reader, i * buffer_size, buffer_size, &read_bl, NULL);
+    ASSERT_TRUE(bl_eq(bl1, read_bl));
+    delete reader;
+  }
+  fs.umount();
+}
+
+TEST(BlueFS, test_wal_write_multiple_recover) {
+  uint64_t size_wal = 1048576 * 64;
+  TempBdev bdev_wal{size_wal};
+  uint64_t size_db = 1048576 * 128;
+  TempBdev bdev_db{size_db};
+  uint64_t size_slow = 1048576 * 256;
+  TempBdev bdev_slow{size_slow};
+
+  ConfSaver conf(g_ceph_context->_conf);
+  conf.SetVal("bluefs_min_flush_size", "65536");
+  conf.ApplyChanges();
+
+  BlueFS fs(g_ceph_context);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL,  bdev_wal.path,  false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB,   bdev_db.path,   false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+  uuid_d fsid;
+  ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+  ASSERT_EQ(0, fs.mount());
+  ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+  string dir_db = "db.wal";
+  ASSERT_EQ(0, fs.mkdir(dir_db));
+
+  string wal_file = "wal1.log";
+  BlueFS::FileWriter *writer;
+  ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+  ASSERT_NE(nullptr, writer);
+
+  auto gen_debugable = [](size_t amount, bufferlist& bl, char c) {
+    for (size_t i = 0; i < amount; i++) {
+      bl.append(c);
+    }
+  };
+  size_t buffer_size = 70000;
+  uint64_t flush_count = 10;
+  for (int i = 0; i < 10; i++) {
+    bufferlist bl1;
+    gen_debugable(buffer_size, bl1, 'a' + i);
+    fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+    fs.fsync(writer);
+  }
+  fs.close_writer(writer);
+
+  fs.umount();
+  fs.mount();
+
+  uint64_t size = 0;
+
+  BlueFS::FileReader *reader;
+  ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+  fs.stat(dir_db, wal_file, &size, nullptr);
+  ASSERT_EQ(reader->file->wal_flushes.size(), flush_count);
+  ASSERT_EQ(size, 70000  * flush_count);
+  delete reader;
+}
+
+TEST(BlueFS, test_wal_write_multiple_recover_fsync_end) {
+  uint64_t size_wal = 1048576 * 64;
+  TempBdev bdev_wal{size_wal};
+  uint64_t size_db = 1048576 * 128;
+  TempBdev bdev_db{size_db};
+  uint64_t size_slow = 1048576 * 256;
+  TempBdev bdev_slow{size_slow};
+
+  ConfSaver conf(g_ceph_context->_conf);
+  conf.SetVal("bluefs_min_flush_size", "524288");
+  conf.ApplyChanges();
+
+  BlueFS fs(g_ceph_context);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL,  bdev_wal.path,  false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB,   bdev_db.path,   false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+  uuid_d fsid;
+  ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+  ASSERT_EQ(0, fs.mount());
+  ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+  string dir_db = "db.wal";
+  ASSERT_EQ(0, fs.mkdir(dir_db));
+
+  string wal_file = "wal1.log";
+  BlueFS::FileWriter *writer;
+  ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+  ASSERT_NE(nullptr, writer);
+
+  auto gen_debugable = [](size_t amount, bufferlist& bl, char c) {
+    for (size_t i = 0; i < amount; i++) {
+      bl.append(c);
+    }
+  };
+  size_t buffer_size = 8;
+  uint64_t flush_count = 1;
+  for (int i = 0; i < 10; i++) {
+    bufferlist bl1;
+    gen_debugable(buffer_size, bl1, 'a' + i);
+    fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+  }
+  fs.fsync(writer);
+  fs.close_writer(writer);
+
+  fs.umount();
+  fs.mount();
+
+  uint64_t size = 0;
+
+  { // test size
+    BlueFS::FileReader *reader;
+    ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+    fs.stat(dir_db, wal_file, &size, nullptr);
+    ASSERT_EQ(reader->file->wal_flushes.size(), flush_count);
+    ASSERT_EQ(size, buffer_size * 10);
+    delete reader;
+  }
+
+  // test contents
+  for (int i = 0; i < 10; i++) {
+    bufferlist bl1;
+    gen_debugable(buffer_size, bl1, 'a' + i);
+
+    BlueFS::FileReader *reader;
+    ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+    bufferlist read_bl;
+    fs.read(reader, i * buffer_size, buffer_size, &read_bl, NULL);
+    ASSERT_TRUE(bl_eq(bl1, read_bl));
+    delete reader;
+  }
+}
+
+TEST(BlueFS, test_wal_read_2_partial) {
+  uint64_t size_wal = 1048576 * 64;
+  TempBdev bdev_wal{size_wal};
+  uint64_t size_db = 1048576 * 128;
+  TempBdev bdev_db{size_db};
+  uint64_t size_slow = 1048576 * 256;
+  TempBdev bdev_slow{size_slow};
+
+  ConfSaver conf(g_ceph_context->_conf);
+  conf.SetVal("bluefs_min_flush_size", "65536");
+  conf.ApplyChanges();
+
+  BlueFS fs(g_ceph_context);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL,  bdev_wal.path,  false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB,   bdev_db.path,   false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+  uuid_d fsid;
+  ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+  ASSERT_EQ(0, fs.mount());
+  ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+  string dir_db = "db.wal";
+  ASSERT_EQ(0, fs.mkdir(dir_db));
+
+
+  auto gen_debugable_alternating = [](size_t amount, bufferlist& bl, char c) {
+    for (size_t i = 0; i < amount; i++) {
+      bl.append(c + (rand() % 10));
+    }
+  };
+  uint64_t flush_count = 2;
+  uint64_t flush_size = 65536;
+  uint64_t total_size = flush_count * flush_size;
+
+  vector<uint64_t> chunk_sizes = {total_size / 32, total_size / 64};
+
+  uint64_t chunk_size = flush_size;
+  bufferlist contents;
+  ASSERT_TRUE((total_size)%chunk_size == 0);
+
+  string wal_file("wal1.log");
+
+  BlueFS::FileWriter *writer;
+  ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+  ASSERT_NE(nullptr, writer);
+  //
+  uint64_t remaining = total_size;
+  while(remaining > 0) {
+    bufferlist bl1;
+    gen_debugable_alternating(chunk_size, bl1, 'a'+ (remaining % 10));
+    fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+    contents.append(bl1);
+    remaining -= chunk_size;
+  }
+  fs.fsync(writer);
+  fs.close_writer(writer);
+
+  fs.umount();
+  fs.mount();
+
+  uint64_t size = 0;
+
+
+  { // test size
+    BlueFS::FileReader *reader;
+    ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+    fs.stat(dir_db, wal_file, &size, nullptr);
+    ASSERT_EQ(reader->file->wal_flushes.size(), flush_count);
+    ASSERT_EQ(size, total_size);
+    delete reader;
+  }
+
+  BlueFS::FileReader *reader;
+  ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+  bufferlist read_bl;
+  uint64_t offset = chunk_size / 2;
+  uint64_t length = chunk_size;
+  fs.read(reader, offset, length, &read_bl, NULL);
+  bufferlist chunk_contents;
+  chunk_contents.substr_of(contents, offset, length);
+  ASSERT_TRUE(bl_eq(chunk_contents, read_bl));
+  delete reader;
+}
+
+TEST(BlueFS, test_wal_read_2_partial_compact) {
+  uint64_t size_wal = 1048576 * 64;
+  TempBdev bdev_wal{size_wal};
+  uint64_t size_db = 1048576 * 128;
+  TempBdev bdev_db{size_db};
+  uint64_t size_slow = 1048576 * 256;
+  TempBdev bdev_slow{size_slow};
+
+  ConfSaver conf(g_ceph_context->_conf);
+  conf.SetVal("bluefs_min_flush_size", "65536");
+  conf.ApplyChanges();
+
+  BlueFS fs(g_ceph_context);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL,  bdev_wal.path,  false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB,   bdev_db.path,   false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+  uuid_d fsid;
+  ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+  ASSERT_EQ(0, fs.mount());
+  ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+  string dir_db = "db.wal";
+  ASSERT_EQ(0, fs.mkdir(dir_db));
+
+
+  auto gen_debugable_alternating = [](size_t amount, bufferlist& bl, char c) {
+    for (size_t i = 0; i < amount; i++) {
+      bl.append(c + (rand() % 10));
+    }
+  };
+  uint64_t flush_count = 2;
+  uint64_t flush_size = 65536;
+  uint64_t total_size = flush_count * flush_size;
+
+  vector<uint64_t> chunk_sizes = {total_size / 32, total_size / 64};
+
+  uint64_t chunk_size = flush_size;
+  bufferlist contents;
+  ASSERT_TRUE((total_size)%chunk_size == 0);
+
+  string wal_file("wal1.log");
+
+  BlueFS::FileWriter *writer;
+  ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+  ASSERT_NE(nullptr, writer);
+  //
+  uint64_t remaining = total_size;
+  while(remaining > 0) {
+    bufferlist bl1;
+    gen_debugable_alternating(chunk_size, bl1, 'a'+ (remaining % 10));
+    fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+    contents.append(bl1);
+    remaining -= chunk_size;
+  }
+  fs.fsync(writer);
+  fs.close_writer(writer);
+
+  fs.compact_log();
+  fs.umount();
+  fs.mount();
+
+  uint64_t size = 0;
+
+
+  { // test size
+    BlueFS::FileReader *reader;
+    ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+    fs.stat(dir_db, wal_file, &size, nullptr);
+    ASSERT_EQ(reader->file->wal_flushes.size(), flush_count);
+    ASSERT_EQ(size, total_size);
+    delete reader;
+  }
+
+  BlueFS::FileReader *reader;
+  ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+  bufferlist read_bl;
+  uint64_t offset = chunk_size / 2;
+  uint64_t length = chunk_size;
+  fs.read(reader, offset, length, &read_bl, NULL);
+  bufferlist chunk_contents;
+  chunk_contents.substr_of(contents, offset, length);
+  ASSERT_TRUE(bl_eq(chunk_contents, read_bl));
+  delete reader;
+}
+
+TEST(BlueFS, test_wal_write_multiple_recover_partial_reads) {
+  // read from the middle of one flush and end somewhere in between other flush
+  uint64_t size_wal = 1048576 * 64;
+  TempBdev bdev_wal{size_wal};
+  uint64_t size_db = 1048576 * 128;
+  TempBdev bdev_db{size_db};
+  uint64_t size_slow = 1048576 * 256;
+  TempBdev bdev_slow{size_slow};
+
+  ConfSaver conf(g_ceph_context->_conf);
+  conf.SetVal("bluefs_min_flush_size", "524288");
+  conf.ApplyChanges();
+
+  BlueFS fs(g_ceph_context);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL,  bdev_wal.path,  false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB,   bdev_db.path,   false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+  uuid_d fsid;
+  ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+  ASSERT_EQ(0, fs.mount());
+  ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+  string dir_db = "db.wal";
+  ASSERT_EQ(0, fs.mkdir(dir_db));
+
+
+  auto gen_debugable_alternating = [](size_t amount, bufferlist& bl, char c) {
+    for (size_t i = 0; i < amount; i++) {
+      bl.append(c + (i % 10));
+    }
+  };
+  uint64_t flush_count = 2;
+  uint64_t flush_size = 524288;
+  uint64_t total_size = flush_count * flush_size;
+
+  vector<uint64_t> chunk_sizes = {total_size / 32, total_size / 64};
+
+  uint64_t fileid = 0;
+  for (auto chunk_size : chunk_sizes) {
+    bufferlist contents;
+    fileid++;
+    ASSERT_TRUE((total_size)%chunk_size == 0);
+
+    string wal_file("wal");
+    wal_file += to_string(fileid);
+    wal_file += ".log";
+
+    BlueFS::FileWriter *writer;
+    ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+    ASSERT_NE(nullptr, writer);
+    //
+    uint64_t remaining = total_size;
+    while(remaining > 0) {
+      bufferlist bl1;
+      gen_debugable_alternating(chunk_size, bl1, 'a'+ (remaining % 10));
+      fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+      contents.append(bl1);
+      remaining -= chunk_size;
+    }
+    fs.fsync(writer);
+    fs.close_writer(writer);
+
+    fs.umount();
+    fs.mount();
+
+    uint64_t size = 0;
+
+
+    { // test size
+      BlueFS::FileReader *reader;
+      ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+      fs.stat(dir_db, wal_file, &size, nullptr);
+      ASSERT_EQ(reader->file->wal_flushes.size(), flush_count);
+      ASSERT_EQ(size, total_size);
+      delete reader;
+    }
+
+    uint64_t read_chunks =  size / chunk_size; // WAL default chunk size is 32k
+    for (uint64_t chunk = 0; chunk < read_chunks; chunk++) {
+      BlueFS::FileReader *reader;
+      ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+      bufferlist read_bl;
+      uint64_t offset = chunk * chunk_size;
+      uint64_t length = chunk_size;
+      fs.read(reader, offset, length, &read_bl, NULL);
+      bufferlist chunk_contents;
+      chunk_contents.substr_of(contents, offset, length);
+      ASSERT_TRUE(bl_eq(chunk_contents, read_bl));
+      delete reader;
+    }
+
+  }
+}
+
+
+TEST(BlueFS, test_wal_write_truncate) {
+  uint64_t size_wal = 1048576 * 64;
+  TempBdev bdev_wal{size_wal};
+  uint64_t size_db = 1048576 * 128;
+  TempBdev bdev_db{size_db};
+  uint64_t size_slow = 1048576 * 256;
+  TempBdev bdev_slow{size_slow};
+
+  ConfSaver conf(g_ceph_context->_conf);
+  conf.SetVal("bluefs_min_flush_size", "65536");
+  conf.ApplyChanges();
+
+  BlueFS fs(g_ceph_context);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL,  bdev_wal.path,  false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB,   bdev_db.path,   false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+  uuid_d fsid;
+  ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+  ASSERT_EQ(0, fs.mount());
+  ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+  string dir_db = "db.wal";
+  ASSERT_EQ(0, fs.mkdir(dir_db));
+
+  string wal_file = "wal1.log";
+  BlueFS::FileWriter *writer;
+  ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+  ASSERT_NE(nullptr, writer);
+
+  bufferlist bl1;
+  auto gen_debugable = [](size_t amount, bufferlist& bl) {
+    for (size_t i = 0; i < amount; i++) {
+      bl.append('a');
+    }
+  };
+  gen_debugable(70000, bl1);
+  fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+  fs.fsync(writer);
+  fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+  fs.fsync(writer);
+  fs.truncate(writer, 70000+BlueFS::File::WALFlush::extra_envelope_size_on_front_and_tail());
+  fs.fsync(writer);
+
+  fs.umount();
+  fs.mount();
+
+  BlueFS::FileReader *reader;
+  ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+  ASSERT_EQ(reader->file->fnode.wal_limit, 70000+BlueFS::File::WALFlush::extra_envelope_size_on_front_and_tail());
+  ASSERT_EQ(reader->file->wal_flushes.size(), 1);
+  bufferlist read_bl;
+  fs.read(reader, 0, 70000, &read_bl, NULL);
+  ASSERT_TRUE(bl_eq(bl1, read_bl));
+  delete reader;
+  fs.umount();
+
+}
+
+TEST(BlueFS, test_wal_read_after_rollback_to_v1) {
+  // test whether we still read with v2 version even though new files will be v1
+  uint64_t size_wal = 1048576 * 64;
+  TempBdev bdev_wal{size_wal};
+  uint64_t size_db = 1048576 * 128;
+  TempBdev bdev_db{size_db};
+  uint64_t size_slow = 1048576 * 256;
+  TempBdev bdev_slow{size_slow};
+
+  ConfSaver conf(g_ceph_context->_conf);
+  conf.SetVal("bluefs_min_flush_size", "65536");
+  conf.SetVal("bluefs_wal_v2", "true");
+  conf.ApplyChanges();
+
+  BlueFS fs(g_ceph_context);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL,  bdev_wal.path,  false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB,   bdev_db.path,   false));
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false));
+  uuid_d fsid;
+  ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+  ASSERT_EQ(0, fs.mount());
+  ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+  string dir_db = "db.wal";
+  ASSERT_EQ(0, fs.mkdir(dir_db));
+
+  string wal_file = "wal1.log";
+  BlueFS::FileWriter *writer;
+  ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+  ASSERT_EQ(writer->file->fnode.type, WAL_V2);
+  ASSERT_NE(nullptr, writer);
+
+  bufferlist bl1;
+  auto gen_debugable = [](size_t amount, bufferlist& bl) {
+    for (size_t i = 0; i < amount; i++) {
+      bl.append('a');
+    }
+  };
+  gen_debugable(70000, bl1);
+  fs.append_try_flush(writer, bl1.c_str(), bl1.length());
+  fs.fsync(writer);
+
+  g_ceph_context->_conf.set_val("bluefs_wal_v2", "false");
+  fs.umount();
+  fs.mount();
+
+  BlueFS::FileReader *reader;
+  ASSERT_EQ(0, fs.open_for_read(dir_db, wal_file, &reader));
+  bufferlist read_bl;
+  fs.read(reader, 0, 70000, &read_bl, NULL);
+  ASSERT_TRUE(bl_eq(bl1, read_bl));
+  delete reader;
+
+  {
+    // open another file to ensure v1 is set correctly
+    string wal_file = "wal2.log";
+    BlueFS::FileWriter *writer;
+    ASSERT_EQ(0, fs.open_for_write(dir_db, wal_file, &writer, false));
+    ASSERT_NE(nullptr, writer);
+    ASSERT_EQ(writer->file->fnode.type, REGULAR);
+  }
+  fs.umount();
+}
+
 TEST(BlueFS, test_truncate_stable_53129) {
 
   ConfSaver conf(g_ceph_context->_conf);