]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore/BlueFS: verify that fnodes respect the bluefs alloc_size
authorSage Weil <sage@redhat.com>
Sat, 1 Feb 2020 17:46:18 +0000 (11:46 -0600)
committerSage Weil <sage@redhat.com>
Fri, 7 Feb 2020 21:56:10 +0000 (15:56 -0600)
If the files aren't respecting the configured allocator size, we will
leak (granular) space when they are deleted.

And this shouldn't happen.

Signed-off-by: Sage Weil <sage@redhat.com>
src/os/bluestore/BlueFS.cc
src/os/bluestore/BlueFS.h

index 56663666221e67ff9492a4d4d193757f5e42c414..e2262901991261a8de8659027afad896d8f827c4 100644 (file)
@@ -855,6 +855,37 @@ int BlueFS::_adjust_granularity(
   return 0;
 }
 
+int BlueFS::_verify_alloc_granularity(
+  __u8 id, uint64_t offset, uint64_t length, const char *op)
+{
+  if ((offset & (alloc_size[id] - 1)) ||
+      (length & (alloc_size[id] - 1))) {
+    derr << __func__ << " " << op << " of " << (int)id
+        << ":0x" << std::hex << offset << "~" << length << std::dec
+        << " does not align to alloc_size 0x"
+        << std::hex << alloc_size[id] << std::dec << dendl;
+    // be helpful
+    auto need = alloc_size[id];
+    while (need && ((offset & (need - 1)) ||
+                   (length & (need - 1)))) {
+      need >>= 1;
+    }
+    if (need) {
+      const char *which;
+      if (id == BDEV_SLOW ||
+         (id == BDEV_DB && !bdev[BDEV_SLOW])) {
+       which = "bluefs_shared_alloc_size";
+      } else {
+       which = "bluefs_alloc_size";
+      }
+      derr << "work-around by setting " << which << " = " << need
+          << " for this OSD" << dendl;
+    }
+    return -EFAULT;
+  }
+  return 0;
+}
+
 int BlueFS::_replay(bool noop, bool to_stdout)
 {
   dout(10) << __func__ << (noop ? " NO-OP" : "") << dendl;
@@ -1287,7 +1318,6 @@ int BlueFS::_replay(bool noop, bool to_stdout)
             std::cout << " 0x" << std::hex << pos << std::dec
                       << ":  op_file_update " << " " << fnode << std::endl;
           }
-
           if (!noop) {
            FileRef f = _get_file(fnode.ino);
             if (cct->_conf->bluefs_log_replay_check_allocations) {
@@ -1304,7 +1334,12 @@ int BlueFS::_replay(bool noop, bool to_stdout)
               auto& fnode_extents = f->fnode.extents;
               for (auto e : fnode_extents) {
                 auto id = e.bdev;
-                apply_for_bitset_range(e.offset, e.length, alloc_size[id], used_blocks[id],
+               if (int r = _verify_alloc_granularity(id, e.offset, e.length,
+                                                     "OP_FILE_UPDATE"); r < 0) {
+                 return r;
+               }
+                apply_for_bitset_range(e.offset, e.length, alloc_size[id],
+                                      used_blocks[id],
                   [&](uint64_t pos, boost::dynamic_bitset<uint64_t> &bs) {
                     ceph_assert(bs.test(pos));
                     bs.reset(pos);
index 733bc55e3ff5c6dfea8b172bd4ea6aa82c7700c0..8066c314d26a1c962ef2b209ef6ed64ddeaae8f5 100644 (file)
@@ -412,6 +412,9 @@ private:
     size_t dev_count,
     boost::dynamic_bitset<uint64_t>* owned_blocks,
     boost::dynamic_bitset<uint64_t>* used_blocks);
+  int _verify_alloc_granularity(
+    __u8 id, uint64_t offset, uint64_t length,
+    const char *op);
   int _adjust_granularity(
     __u8 id, uint64_t *offset, uint64_t *length, const char *op);
   int _replay(bool noop, bool to_stdout = false); ///< replay journal