From b1ce81eac7ff6d29e16ae9ed5dbbb02580dfc362 Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Wed, 11 Aug 2021 15:41:34 +0200 Subject: [PATCH] os/bluestore/bluefs: Cleanup allocation consistency check code When bluefs_log_replay_check_allocations is set _replay performs checks if allocations/deallocations of extents are valid and properly aligned. The changes include: - now deallocations are also checked for valid alignment (more sanity checks) - bluefs log read from super is checked once - before reading any transaction Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.cc | 107 +++++++++++++------------------------ src/os/bluestore/BlueFS.h | 7 +-- 2 files changed, 40 insertions(+), 74 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 3a653f425d043..df51d17a6eeed 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -967,29 +967,36 @@ int BlueFS::_open_super() return 0; } -int BlueFS::_check_new_allocations(const bluefs_fnode_t& fnode, - size_t dev_count, - boost::dynamic_bitset* used_blocks) +int BlueFS::_check_allocations(const bluefs_fnode_t& fnode, + boost::dynamic_bitset* used_blocks, + bool is_alloc, //true when allocating, false when deallocating + const char* op_name) { auto& fnode_extents = fnode.extents; for (auto e : fnode_extents) { auto id = e.bdev; bool fail = false; - ceph_assert(id < dev_count); + ceph_assert(id < MAX_BDEV); + if (int r = _verify_alloc_granularity(id, e.offset, e.length, + op_name); r < 0) { + return r; + } apply_for_bitset_range(e.offset, e.length, alloc_size[id], used_blocks[id], [&](uint64_t pos, boost::dynamic_bitset &bs) { - if (bs.test(pos)) { - fail = true; - } - bs.set(pos); + if (is_alloc == bs.test(pos)) { + fail = true; + } else { + bs.flip(pos); + } } ); if (fail) { - derr << __func__ << " invalid extent " << int(e.bdev) - << ": 0x" << std::hex << e.offset << "~" << e.length - << std::dec << ": duplicate reference, ino " << fnode.ino - << dendl; + derr << __func__ << " " << op_name << " invalid extent " << int(e.bdev) + << ": 0x" << std::hex << e.offset << "~" << e.length << std::dec + << (is_alloc == true ? + ": duplicate reference, ino " : ": double free, ino ") + << fnode.ino << dendl; return -EFAULT; } } @@ -1067,11 +1074,15 @@ int BlueFS::_replay(bool noop, bool to_stdout) used_blocks[i].resize(round_up_to(bdev[i]->get_size(), alloc_size[i]) / alloc_size[i]); } } + // check initial log layout + int r = _check_allocations(log_file->fnode, + used_blocks, true, "Log from super"); + if (r < 0) { + return r; + } } } - bool first_log_check = true; - while (true) { ceph_assert((log_reader->buf.pos & ~super.block_mask()) == 0); uint64_t pos = log_reader->buf.pos; @@ -1375,34 +1386,13 @@ int BlueFS::_replay(bool noop, bool to_stdout) } if (!noop) { FileRef f = _get_file(fnode.ino); - if (cct->_conf->bluefs_log_replay_check_allocations) { - // check initial log layout - if (first_log_check) { - first_log_check = false; - int r = _check_new_allocations(log_file->fnode, - MAX_BDEV, used_blocks); - if (r < 0) { - return r; - } - } - - auto& fnode_extents = f->fnode.extents; - for (auto e : fnode_extents) { - auto id = e.bdev; - if (int r = _verify_alloc_granularity(id, e.offset, e.length, - "OP_FILE_UPDATE"); r < 0) { - return r; - } - apply_for_bitset_range(e.offset, e.length, alloc_size[id], - used_blocks[id], - [&](uint64_t pos, boost::dynamic_bitset &bs) { - ceph_assert(bs.test(pos)); - bs.reset(pos); - } - ); + if (cct->_conf->bluefs_log_replay_check_allocations) { + int r = _check_allocations(f->fnode, + used_blocks, false, "OP_FILE_UPDATE"); + if (r < 0) { + return r; } } - if (fnode.ino != 1) { vselector->sub_usage(f->vselector_hint, f->fnode); } @@ -1415,8 +1405,8 @@ int BlueFS::_replay(bool noop, bool to_stdout) ino_last = fnode.ino; } if (cct->_conf->bluefs_log_replay_check_allocations) { - int r = _check_new_allocations(f->fnode, - MAX_BDEV, used_blocks); + int r = _check_allocations(f->fnode, + used_blocks, true, "OP_FILE_UPDATE"); if (r < 0) { return r; } @@ -1441,27 +1431,10 @@ int BlueFS::_replay(bool noop, bool to_stdout) ceph_assert(p != file_map.end()); vselector->sub_usage(p->second->vselector_hint, p->second->fnode); if (cct->_conf->bluefs_log_replay_check_allocations) { - auto& fnode_extents = p->second->fnode.extents; - for (auto e : fnode_extents) { - auto id = e.bdev; - bool fail = false; - - apply_for_bitset_range(e.offset, e.length, alloc_size[id], used_blocks[id], - [&](uint64_t pos, boost::dynamic_bitset &bs) { - if (!bs.test(pos)) { - fail = true; - } - bs.reset(pos); - } - ); - if (fail) { - derr << __func__ << " invalid extent " << int(id) - << ": 0x" << std::hex << e.offset << "~" << e.length - << std::dec - << ": not in use but is allocated for removed ino " << ino - << dendl; - return -EFAULT; - } + int r = _check_allocations(p->second->fnode, + used_blocks, false, "OP_FILE_REMOVE"); + if (r < 0) { + return r; } } file_map.erase(p); @@ -1485,14 +1458,6 @@ int BlueFS::_replay(bool noop, bool to_stdout) if (!noop) { vselector->add_usage(log_file->vselector_hint, log_file->fnode); } - if (!noop && first_log_check && - cct->_conf->bluefs_log_replay_check_allocations) { - int r = _check_new_allocations(log_file->fnode, - MAX_BDEV, used_blocks); - if (r < 0) { - return r; - } - } dout(10) << __func__ << " log file size was 0x" << std::hex << log_file->fnode.size << std::dec << dendl; diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index aefe083c993fb..988c7e37f9e98 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -447,9 +447,10 @@ private: int _open_super(); int _write_super(int dev); - int _check_new_allocations(const bluefs_fnode_t& fnode, - size_t dev_count, - boost::dynamic_bitset* used_blocks); + int _check_allocations(const bluefs_fnode_t& fnode, + boost::dynamic_bitset* used_blocks, + bool is_alloc, //true when allocating, false when deallocating + const char* op_name); int _verify_alloc_granularity( __u8 id, uint64_t offset, uint64_t length, const char *op); -- 2.39.5