From: Sage Weil Date: Sat, 1 Feb 2020 17:46:18 +0000 (-0600) Subject: os/bluestore/BlueFS: verify that fnodes respect the bluefs alloc_size X-Git-Tag: v15.1.1~469^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2232828dc3cbb7a8d1522999e13c56dad74da0cb;p=ceph.git os/bluestore/BlueFS: verify that fnodes respect the bluefs alloc_size If the files aren't respecting the configured allocator size, we will leak (granular) space when they are deleted. And this shouldn't happen. Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 56663666221e..e22629019912 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -855,6 +855,37 @@ int BlueFS::_adjust_granularity( return 0; } +int BlueFS::_verify_alloc_granularity( + __u8 id, uint64_t offset, uint64_t length, const char *op) +{ + if ((offset & (alloc_size[id] - 1)) || + (length & (alloc_size[id] - 1))) { + derr << __func__ << " " << op << " of " << (int)id + << ":0x" << std::hex << offset << "~" << length << std::dec + << " does not align to alloc_size 0x" + << std::hex << alloc_size[id] << std::dec << dendl; + // be helpful + auto need = alloc_size[id]; + while (need && ((offset & (need - 1)) || + (length & (need - 1)))) { + need >>= 1; + } + if (need) { + const char *which; + if (id == BDEV_SLOW || + (id == BDEV_DB && !bdev[BDEV_SLOW])) { + which = "bluefs_shared_alloc_size"; + } else { + which = "bluefs_alloc_size"; + } + derr << "work-around by setting " << which << " = " << need + << " for this OSD" << dendl; + } + return -EFAULT; + } + return 0; +} + int BlueFS::_replay(bool noop, bool to_stdout) { dout(10) << __func__ << (noop ? " NO-OP" : "") << dendl; @@ -1287,7 +1318,6 @@ int BlueFS::_replay(bool noop, bool to_stdout) std::cout << " 0x" << std::hex << pos << std::dec << ": op_file_update " << " " << fnode << std::endl; } - if (!noop) { FileRef f = _get_file(fnode.ino); if (cct->_conf->bluefs_log_replay_check_allocations) { @@ -1304,7 +1334,12 @@ int BlueFS::_replay(bool noop, bool to_stdout) auto& fnode_extents = f->fnode.extents; for (auto e : fnode_extents) { auto id = e.bdev; - apply_for_bitset_range(e.offset, e.length, alloc_size[id], used_blocks[id], + if (int r = _verify_alloc_granularity(id, e.offset, e.length, + "OP_FILE_UPDATE"); r < 0) { + return r; + } + apply_for_bitset_range(e.offset, e.length, alloc_size[id], + used_blocks[id], [&](uint64_t pos, boost::dynamic_bitset &bs) { ceph_assert(bs.test(pos)); bs.reset(pos); diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 733bc55e3ff5..8066c314d26a 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -412,6 +412,9 @@ private: size_t dev_count, boost::dynamic_bitset* owned_blocks, boost::dynamic_bitset* used_blocks); + int _verify_alloc_granularity( + __u8 id, uint64_t offset, uint64_t length, + const char *op); int _adjust_granularity( __u8 id, uint64_t *offset, uint64_t *length, const char *op); int _replay(bool noop, bool to_stdout = false); ///< replay journal