From: Yuri Weinstein Date: Mon, 9 Oct 2023 22:04:39 +0000 (-0700) Subject: Merge pull request #48169 from ifed01/wip-ifed-fix-bluefs-truncate-qui X-Git-Tag: v17.2.7~44 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e6d5808216d9b22483b46e1b05efdefae326da1a;p=ceph.git Merge pull request #48169 from ifed01/wip-ifed-fix-bluefs-truncate-qui quincy: os/bluestore: fix no metadata update on truncate+fsync Reviewed-by: Adam Kupczyk --- e6d5808216d9b22483b46e1b05efdefae326da1a diff --cc src/test/objectstore/test_bluefs.cc index 5698651f4edf,f06308ecbb17..785e2d83d7c2 --- a/src/test/objectstore/test_bluefs.cc +++ b/src/test/objectstore/test_bluefs.cc @@@ -1302,45 -1302,66 +1302,104 @@@ TEST(BlueFS, test_concurrent_dir_link_a } } } +TEST(BlueFS, broken_unlink_fsync_seq) { + uint64_t size = 1048576 * 128; + TempBdev bdev{size}; + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); + { + /* + * This reproduces a weird file op sequence (unlink+fsync) that Octopus + * RocksDB might issue to BlueFS when recycle_log_file_num setting is 0 + * See https://tracker.ceph.com/issues/55636 for more details + * + */ + char buf[1048571]; // this is biggish, but intentionally not evenly aligned + for (unsigned i = 0; i < sizeof(buf); ++i) { + buf[i] = i; + } + BlueFS::FileWriter *h; + ASSERT_EQ(0, fs.mkdir("dir")); + ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false)); + + h->append(buf, sizeof(buf)); + fs.flush(h); + h->append(buf, sizeof(buf)); + fs.unlink("dir", "file"); + fs.fsync(h); + fs.close_writer(h); + } + fs.umount(); + + // remount and check log can replay safe? + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); + fs.umount(); +} + TEST(BlueFS, truncate_fsync) { + uint64_t bdev_size = 128 * 1048576; + uint64_t block_size = 4096; + uint64_t reserved = 1048576; + TempBdev bdev{bdev_size}; + uuid_d fsid; + const char* DIR_NAME="dir"; + const char* FILE_NAME="file1"; + + size_t sizes[] = {3, 1024, 4096, 1024 * 4096}; + for (size_t i = 0; i < sizeof(sizes) / sizeof(sizes[0]); i++) { + const size_t content_size= sizes[i]; + const size_t read_size = p2roundup(content_size, size_t(block_size)); + const std::string content(content_size, 'x'); + { + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, reserved)); + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); + { + BlueFS::FileWriter *h; + ASSERT_EQ(0, fs.mkdir("dir")); + ASSERT_EQ(0, fs.open_for_write(DIR_NAME, FILE_NAME, &h, false)); + h->append(content.c_str(), content.length()); + fs.fsync(h); + fs.close_writer(h); + } + { + BlueFS::FileReader *h; + ASSERT_EQ(0, fs.open_for_read(DIR_NAME, FILE_NAME, &h)); + bufferlist bl; + ASSERT_EQ(content.length(), fs.read(h, 0, read_size, &bl, NULL)); + ASSERT_EQ(0, strncmp(content.c_str(), bl.c_str(), content.length())); + delete h; + } + { + BlueFS::FileWriter *h; + ASSERT_EQ(0, fs.open_for_write(DIR_NAME, FILE_NAME, &h, true)); + fs.truncate(h, 0); + fs.fsync(h); + fs.close_writer(h); + } + } + { + //this was broken due to https://tracker.ceph.com/issues/55307 + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, reserved)); + ASSERT_EQ(0, fs.mount()); + BlueFS::FileReader *h; + ASSERT_EQ(0, fs.open_for_read(DIR_NAME, FILE_NAME, &h)); + bufferlist bl; + ASSERT_EQ(0, fs.read(h, 0, read_size, &bl, NULL)); + delete h; + fs.umount(); + } + } + } + int main(int argc, char **argv) { auto args = argv_to_vec(argc, argv); map defaults = {