From: Adam Kupczyk Date: Mon, 24 May 2021 12:27:05 +0000 (+0200) Subject: os/bluestore/bluefs: Add test that detects bluefs inconsistency X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=be7c0a8061c016555067af4ac6477872c08f8b31;p=ceph.git os/bluestore/bluefs: Add test that detects bluefs inconsistency Add test that detects possible scenario that will cause BlueFS to have file that contains data that has never been written. This is done by tricking replay log to already accept file metadata (size, allocations), but actual data stored in these allocations is not yet synced to disk. Scenario: 1) write to file h1 on SLOW device 2) flush h1 (and trigger h1 mark to be added to bluefs replay log) 3) write to file h2 4) fsync h2 (forces replay log to be written) The result is: - bluefs log now has stable state of h1 - SLOW device is not yet flushed (no fdatasync()) Test detects this condition and fails. Cherry-picked from: c591a6e14e2c956d268adcaa9aa3e9c8a1fdea2a Signed-off-by: Adam Kupczyk --- diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index d46f8faacb8d7..386a7be68d60a 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -3176,6 +3176,18 @@ void BlueFS::_close_writer(FileWriter *h) delete h; } +uint64_t BlueFS::debug_get_dirty_seq(FileWriter *h) +{ + std::lock_guard l(lock); + return h->file->dirty_seq; +} + +bool BlueFS::debug_get_is_dev_dirty(FileWriter *h, uint8_t dev) +{ + std::lock_guard l(lock); + return h->dirty_devs[dev]; +} + int BlueFS::open_for_read( const string& dirname, const string& filename, diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 2115870f049c7..c55e8389de4c3 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -633,6 +633,8 @@ public: const PerfCounters* get_perf_counters() const { return logger; } + uint64_t debug_get_dirty_seq(FileWriter *h); + bool debug_get_is_dev_dirty(FileWriter *h, uint8_t dev); private: // Wrappers for BlockDevice::read(...) and BlockDevice::read_random(...) diff --git a/src/test/objectstore/test_bluefs.cc b/src/test/objectstore/test_bluefs.cc index e33f424d37add..28b722599474f 100644 --- a/src/test/objectstore/test_bluefs.cc +++ b/src/test/objectstore/test_bluefs.cc @@ -694,6 +694,74 @@ TEST(BlueFS, test_replay) { fs.umount(); } +TEST(BlueFS, test_tracker_50965) { + uint64_t size_wal = 1048576 * 64; + TempBdev bdev_wal{size_wal}; + uint64_t size_db = 1048576 * 128; + TempBdev bdev_db{size_db}; + uint64_t size_slow = 1048576 * 256; + TempBdev bdev_slow{size_slow}; + + g_ceph_context->_conf.set_val( + "bluefs_min_flush_size", + "65536"); + + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0)); + fs.add_block_extent(BlueFS::BDEV_WAL, 0, size_wal); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0)); + fs.add_block_extent(BlueFS::BDEV_DB, 0, size_db); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0)); + fs.add_block_extent(BlueFS::BDEV_SLOW, 0, size_slow); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid)); + ASSERT_EQ(0, fs.mount()); + + string dir_slow = "dir.slow"; + ASSERT_EQ(0, fs.mkdir(dir_slow)); + string dir_db = "dir_db"; + ASSERT_EQ(0, fs.mkdir(dir_db)); + + string file_slow = "file"; + BlueFS::FileWriter *h_slow; + ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false)); + ASSERT_NE(nullptr, h_slow); + + string file_db = "file"; + BlueFS::FileWriter *h_db; + ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false)); + ASSERT_NE(nullptr, h_db); + + bufferlist bl1; + std::unique_ptr buf1 = gen_buffer(70000); + bufferptr bp1 = buffer::claim_char(70000, buf1.get()); + bl1.push_back(bp1); + h_slow->append(bl1.c_str(), bl1.length()); + fs.flush(h_slow); + + uint64_t h_slow_dirty_seq_1 = fs.debug_get_dirty_seq(h_slow); + + bufferlist bl2; + std::unique_ptr buf2 = gen_buffer(1000); + bufferptr bp2 = buffer::claim_char(1000, buf2.get()); + bl2.push_back(bp2); + h_db->append(bl2.c_str(), bl2.length()); + fs.fsync(h_db); + + uint64_t h_slow_dirty_seq_2 = fs.debug_get_dirty_seq(h_slow); + bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW); + + //problem if allocations are stable in log but slow device is not flushed yet + ASSERT_FALSE(h_slow_dirty_seq_1 != 0 && + h_slow_dirty_seq_2 == 0 && + h_slow_dev_dirty == true); + + fs.close_writer(h_slow); + fs.close_writer(h_db); + + fs.umount(); +} + int main(int argc, char **argv) { vector args; argv_to_vec(argc, (const char **)argv, args);