From: Adam Kupczyk Date: Wed, 11 Aug 2021 13:41:34 +0000 (+0200) Subject: os/bluestore/bluefs: Cleanup allocation consistency check code X-Git-Tag: v16.2.11~62^2~4 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=f0ef4c93e220fd37131245b7ab5d66834109a7b0;p=ceph.git os/bluestore/bluefs: Cleanup allocation consistency check code When bluefs_log_replay_check_allocations is set _replay performs checks if allocations/deallocations of extents are valid and properly aligned. The changes include: - now deallocations are also checked for valid alignment (more sanity checks) - bluefs log read from super is checked once - before reading any transaction Signed-off-by: Adam Kupczyk --- diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 5fd53fa6913a5..2d0dc36ee22ae 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -967,29 +967,36 @@ int BlueFS::_open_super() return 0; } -int BlueFS::_check_new_allocations(const bluefs_fnode_t& fnode, - size_t dev_count, - boost::dynamic_bitset* used_blocks) +int BlueFS::_check_allocations(const bluefs_fnode_t& fnode, + boost::dynamic_bitset* used_blocks, + bool is_alloc, //true when allocating, false when deallocating + const char* op_name) { auto& fnode_extents = fnode.extents; for (auto e : fnode_extents) { auto id = e.bdev; bool fail = false; - ceph_assert(id < dev_count); + ceph_assert(id < MAX_BDEV); + if (int r = _verify_alloc_granularity(id, e.offset, e.length, + op_name); r < 0) { + return r; + } apply_for_bitset_range(e.offset, e.length, alloc_size[id], used_blocks[id], [&](uint64_t pos, boost::dynamic_bitset &bs) { - if (bs.test(pos)) { - fail = true; - } - bs.set(pos); + if (is_alloc == bs.test(pos)) { + fail = true; + } else { + bs.flip(pos); + } } ); if (fail) { - derr << __func__ << " invalid extent " << int(e.bdev) - << ": 0x" << std::hex << e.offset << "~" << e.length - << std::dec << ": duplicate reference, ino " << fnode.ino - << dendl; + derr << __func__ << " " << op_name << " invalid extent " << int(e.bdev) + << ": 0x" << std::hex << e.offset << "~" << e.length << std::dec + << (is_alloc == true ? + ": duplicate reference, ino " : ": double free, ino ") + << fnode.ino << dendl; return -EFAULT; } } @@ -1067,11 +1074,15 @@ int BlueFS::_replay(bool noop, bool to_stdout) used_blocks[i].resize(round_up_to(bdev[i]->get_size(), alloc_size[i]) / alloc_size[i]); } } + // check initial log layout + int r = _check_allocations(log_file->fnode, + used_blocks, true, "Log from super"); + if (r < 0) { + return r; + } } } - bool first_log_check = true; - while (true) { ceph_assert((log_reader->buf.pos & ~super.block_mask()) == 0); uint64_t pos = log_reader->buf.pos; @@ -1384,34 +1395,13 @@ int BlueFS::_replay(bool noop, bool to_stdout) } if (!noop) { FileRef f = _get_file(fnode.ino); - if (cct->_conf->bluefs_log_replay_check_allocations) { - // check initial log layout - if (first_log_check) { - first_log_check = false; - int r = _check_new_allocations(log_file->fnode, - MAX_BDEV, used_blocks); - if (r < 0) { - return r; - } - } - - auto& fnode_extents = f->fnode.extents; - for (auto e : fnode_extents) { - auto id = e.bdev; - if (int r = _verify_alloc_granularity(id, e.offset, e.length, - "OP_FILE_UPDATE"); r < 0) { - return r; - } - apply_for_bitset_range(e.offset, e.length, alloc_size[id], - used_blocks[id], - [&](uint64_t pos, boost::dynamic_bitset &bs) { - ceph_assert(bs.test(pos)); - bs.reset(pos); - } - ); + if (cct->_conf->bluefs_log_replay_check_allocations) { + int r = _check_allocations(f->fnode, + used_blocks, false, "OP_FILE_UPDATE"); + if (r < 0) { + return r; } } - if (fnode.ino != 1) { vselector->sub_usage(f->vselector_hint, f->fnode); } @@ -1424,8 +1414,8 @@ int BlueFS::_replay(bool noop, bool to_stdout) ino_last = fnode.ino; } if (cct->_conf->bluefs_log_replay_check_allocations) { - int r = _check_new_allocations(f->fnode, - MAX_BDEV, used_blocks); + int r = _check_allocations(f->fnode, + used_blocks, true, "OP_FILE_UPDATE"); if (r < 0) { return r; } @@ -1453,27 +1443,10 @@ int BlueFS::_replay(bool noop, bool to_stdout) ceph_assert(p != file_map.end()); vselector->sub_usage(p->second->vselector_hint, p->second->fnode); if (cct->_conf->bluefs_log_replay_check_allocations) { - auto& fnode_extents = p->second->fnode.extents; - for (auto e : fnode_extents) { - auto id = e.bdev; - bool fail = false; - - apply_for_bitset_range(e.offset, e.length, alloc_size[id], used_blocks[id], - [&](uint64_t pos, boost::dynamic_bitset &bs) { - if (!bs.test(pos)) { - fail = true; - } - bs.reset(pos); - } - ); - if (fail) { - derr << __func__ << " invalid extent " << int(id) - << ": 0x" << std::hex << e.offset << "~" << e.length - << std::dec - << ": not in use but is allocated for removed ino " << ino - << dendl; - return -EFAULT; - } + int r = _check_allocations(p->second->fnode, + used_blocks, false, "OP_FILE_REMOVE"); + if (r < 0) { + return r; } } file_map.erase(p); @@ -1497,14 +1470,6 @@ int BlueFS::_replay(bool noop, bool to_stdout) if (!noop) { vselector->add_usage(log_file->vselector_hint, log_file->fnode); } - if (!noop && first_log_check && - cct->_conf->bluefs_log_replay_check_allocations) { - int r = _check_new_allocations(log_file->fnode, - MAX_BDEV, used_blocks); - if (r < 0) { - return r; - } - } dout(10) << __func__ << " log file size was 0x" << std::hex << log_file->fnode.size << std::dec << dendl; diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index aefe083c993fb..988c7e37f9e98 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -447,9 +447,10 @@ private: int _open_super(); int _write_super(int dev); - int _check_new_allocations(const bluefs_fnode_t& fnode, - size_t dev_count, - boost::dynamic_bitset* used_blocks); + int _check_allocations(const bluefs_fnode_t& fnode, + boost::dynamic_bitset* used_blocks, + bool is_alloc, //true when allocating, false when deallocating + const char* op_name); int _verify_alloc_granularity( __u8 id, uint64_t offset, uint64_t length, const char *op);