]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: get rid of obsolete stuff in bluefs.
authorIgor Fedotov <ifedotov@suse.com>
Fri, 21 Aug 2020 09:09:43 +0000 (12:09 +0300)
committerIgor Fedotov <ifedotov@suse.com>
Thu, 3 Sep 2020 09:44:24 +0000 (12:44 +0300)
This primarily touches legacy BlueFS code intended to maintain main
device space gifting/reclaiming. Which is obsolete duto to using single
main device allocator shared among BlueStore and BlueFS.

Fixes: https://tracker.ceph.com/issues/46886
Signed-off-by: Igor Fedotov <ifedotov@suse.com>
src/os/bluestore/BlueFS.cc
src/os/bluestore/BlueFS.h
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h
src/os/bluestore/bluefs_types.cc
src/os/bluestore/bluefs_types.h
src/os/bluestore/bluestore_tool.cc
src/test/objectstore/test_bluefs.cc

index d3ad01da1554213d2894ff665b60e4855cd68667..648a23ca052aabca98de5ddac27d7a2a8d093288 100644 (file)
@@ -120,10 +120,8 @@ private:
          f->dump_string("device", bluefs->get_device_name(dev));
          ceph_assert(bluefs->alloc[dev]);
           auto total = bluefs->get_total(dev);
-          auto free = bluefs->alloc[dev]->get_free();
-          auto used = bluefs->alloc[dev] == bluefs->shared_bdev_alloc ?
-            bluefs->shared_bdev_used.load() :
-            total - free;
+          auto free = bluefs->get_free(dev);
+          auto used = bluefs->get_used(dev);
 
           f->dump_int("total", total);
           f->dump_int("free", free);
@@ -172,14 +170,16 @@ private:
   }
 };
 
-BlueFS::BlueFS(CephContext* cct)
+BlueFS::BlueFS(CephContext* cct,
+               bluefs_shared_alloc_context_t* _shared_alloc)
   : cct(cct),
     bdev(MAX_BDEV),
     ioc(MAX_BDEV),
-    block_all(MAX_BDEV),
+    block_reserved(MAX_BDEV),
     alloc(MAX_BDEV),
     alloc_size(MAX_BDEV, 0),
-    pending_release(MAX_BDEV)
+    pending_release(MAX_BDEV),
+    shared_alloc(_shared_alloc)
 {
   discard_cb[BDEV_WAL] = wal_discard_cb;
   discard_cb[BDEV_DB] = db_discard_cb;
@@ -302,35 +302,30 @@ void BlueFS::_update_logger_stats()
   logger->set(l_bluefs_log_bytes, log_writer->file->fnode.size);
 
   if (alloc[BDEV_WAL]) {
-    logger->set(l_bluefs_wal_total_bytes, block_all[BDEV_WAL].size());
-    logger->set(l_bluefs_wal_used_bytes,
-               block_all[BDEV_WAL].size() - alloc[BDEV_WAL]->get_free());
+    logger->set(l_bluefs_wal_total_bytes, _get_total(BDEV_WAL));
+    logger->set(l_bluefs_wal_used_bytes, _get_used(BDEV_WAL));
   }
   if (alloc[BDEV_DB]) {
-    logger->set(l_bluefs_db_total_bytes, block_all[BDEV_DB].size());
-    uint64_t used = alloc[BDEV_DB] == shared_bdev_alloc ?
-      shared_bdev_used.load() :
-      block_all[BDEV_DB].size() - alloc[BDEV_DB]->get_free();
-    logger->set(l_bluefs_db_used_bytes, used);
+    logger->set(l_bluefs_db_total_bytes, _get_total(BDEV_DB));
+    logger->set(l_bluefs_db_used_bytes, _get_used(BDEV_DB));
   }
   if (alloc[BDEV_SLOW]) {
-    logger->set(l_bluefs_slow_total_bytes, block_all[BDEV_SLOW].size());
-    uint64_t used = alloc[BDEV_SLOW] == shared_bdev_alloc ?
-      shared_bdev_used.load() :
-      block_all[BDEV_SLOW].size() - alloc[BDEV_SLOW]->get_free();
-    logger->set(l_bluefs_slow_used_bytes, used);
+    logger->set(l_bluefs_slow_total_bytes, _get_total(BDEV_SLOW));
+    logger->set(l_bluefs_slow_used_bytes, _get_used(BDEV_SLOW));
   }
 }
 
 int BlueFS::add_block_device(unsigned id, const string& path, bool trim,
-                             bool shared_with_bluestore,
-                            Allocator* _shared_bdev_alloc)
+                             uint64_t reserved,
+                             bool shared_with_bluestore)
 {
-  dout(10) << __func__ << " bdev " << id << " path " << path << dendl;
+  dout(10) << __func__ << " bdev " << id << " path " << path << " "
+           << reserved << dendl;
   ceph_assert(id < bdev.size());
   ceph_assert(bdev[id] == NULL);
   BlockDevice *b = BlockDevice::create(cct, path, NULL, NULL,
                                       discard_cb[id], static_cast<void*>(this));
+  block_reserved[id] = reserved;
   if (shared_with_bluestore) {
     b->set_no_exclusive_lock();
   }
@@ -347,10 +342,10 @@ int BlueFS::add_block_device(unsigned id, const string& path, bool trim,
          << " size " << byte_u_t(b->get_size()) << dendl;
   bdev[id] = b;
   ioc[id] = new IOContext(cct, NULL);
-  if (_shared_bdev_alloc) {
-    ceph_assert(shared_bdev_alloc == nullptr);
-    alloc[id] = shared_bdev_alloc = _shared_bdev_alloc;
-    need_shared_alloc_init = true;
+  if (shared_with_bluestore) {
+    ceph_assert(shared_alloc); // to be set in ctor before
+    alloc[id] = shared_alloc->a;
+    shared_alloc_id = id;
   }
   return 0;
 }
@@ -362,47 +357,20 @@ bool BlueFS::bdev_support_label(unsigned id)
   return bdev[id]->supported_bdev_label();
 }
 
-uint64_t BlueFS::get_block_device_size(unsigned id)
+uint64_t BlueFS::get_block_device_size(unsigned id) const
 {
   if (id < bdev.size() && bdev[id])
     return bdev[id]->get_size();
   return 0;
 }
 
-void BlueFS::_add_block_extent(bool create, unsigned id, uint64_t offset,
-                              uint64_t length, bool skip)
-{
-  dout(1) << __func__ << " bdev " << id
-          << " create " << create
-         << " 0x" << std::hex << offset << "~" << length << std::dec
-         << " skip " << skip
-         << dendl;
-
-  ceph_assert(id < bdev.size());
-  ceph_assert(bdev[id]);
-  ceph_assert(bdev[id]->get_size() >= offset + length);
-  block_all[id].insert(offset, length);
-
-  if (!create) {
-    ceph_assert(id < alloc.size());
-    ceph_assert(alloc[id]);
-    if (!skip)
-      log_t.op_alloc_add(id, offset, length);
-    if (alloc[id] != shared_bdev_alloc) {
-      alloc[id]->init_add_free(offset, length);
-    }
-  }
-
-  dout(10) << __func__ << " done" << dendl;
-}
-
 void BlueFS::handle_discard(unsigned id, interval_set<uint64_t>& to_release)
 {
   dout(10) << __func__ << " bdev " << id << dendl;
   ceph_assert(alloc[id]);
   alloc[id]->release(to_release);
-  if (alloc[id] == shared_bdev_alloc) {
-    shared_bdev_used -= to_release.size();
+  if (is_shared_alloc(id)) {
+    shared_alloc->bluefs_used -= to_release.size();
   }
 }
 
@@ -411,36 +379,44 @@ uint64_t BlueFS::get_used()
   std::lock_guard l(lock);
   uint64_t used = 0;
   for (unsigned id = 0; id < MAX_BDEV; ++id) {
-    if (alloc[id]) {
-      if (alloc[id] != shared_bdev_alloc) {
-        used += block_all[id].size() - alloc[id]->get_free();
-      } else {
-        used += shared_bdev_used;
-      }
-    }
+    used += _get_used(id);
   }
   return used;
 }
 
-uint64_t BlueFS::get_used(unsigned id)
+uint64_t BlueFS::_get_used(unsigned id) const
 {
-  ceph_assert(id < alloc.size());
-  ceph_assert(alloc[id]);
-  std::lock_guard l(lock);
   uint64_t used = 0;
-  if (alloc[id] != shared_bdev_alloc) {
-    used = block_all[id].size() - alloc[id]->get_free();
+  if (!alloc[id])
+     return 0;
+
+  if (is_shared_alloc(id)) {
+    used = shared_alloc->bluefs_used;
   } else {
-    used += shared_bdev_used;
+    used = _get_total(id) - alloc[id]->get_free();
   }
   return used;
 }
 
+uint64_t BlueFS::get_used(unsigned id)
+{
+  ceph_assert(id < alloc.size());
+  ceph_assert(alloc[id]);
+  std::lock_guard l(lock);
+  return _get_used(id);
+}
+
+uint64_t BlueFS::_get_total(unsigned id) const
+{
+  ceph_assert(id < bdev.size());
+  ceph_assert(id < block_reserved.size());
+  return get_block_device_size(id) - block_reserved[id];
+}
+
 uint64_t BlueFS::get_total(unsigned id)
 {
   std::lock_guard l(lock);
-  ceph_assert(id < block_all.size());
-  return block_all[id].size();
+  return _get_total(id);
 }
 
 uint64_t BlueFS::get_free(unsigned id)
@@ -463,57 +439,28 @@ void BlueFS::dump_block_extents(ostream& out)
     if (!bdev[i]) {
       continue;
     }
-    auto owned = get_total(i);
+    auto total = get_total(i);
     auto free = get_free(i);
 
-    out << i << " : device size 0x" << std::hex << bdev[i]->get_size()
-        << " : own 0x" << block_all[i]
-        << " = 0x" << owned
-        << " : using 0x" << owned - free
-       << std::dec << "(" << byte_u_t(owned - free) << ")";
+    out << i << " : device size 0x" << std::hex << total
+        << " : using 0x" << total - free
+       << std::dec << "(" << byte_u_t(total - free) << ")";
     out << "\n";
   }
 }
 
-void BlueFS::get_usage(vector<pair<uint64_t,uint64_t>> *usage)
-{
-  std::lock_guard l(lock);
-  usage->resize(bdev.size());
-  for (unsigned id = 0; id < bdev.size(); ++id) {
-    if (!bdev[id]) {
-      (*usage)[id] = make_pair(0, 0);
-      continue;
-    }
-    (*usage)[id].first = alloc[id]->get_free();
-    (*usage)[id].second = block_all[id].size();
-    uint64_t used =
-      (block_all[id].size() - (*usage)[id].first) * 100 / block_all[id].size();
-    dout(10) << __func__ << " bdev " << id
-            << " free " << (*usage)[id].first
-            << " (" << byte_u_t((*usage)[id].first) << ")"
-            << " / " << (*usage)[id].second
-            << " (" << byte_u_t((*usage)[id].second) << ")"
-            << ", used " << used << "%"
-            << dendl;
-  }
-}
-
 int BlueFS::get_block_extents(unsigned id, interval_set<uint64_t> *extents)
 {
   std::lock_guard l(lock);
   dout(10) << __func__ << " bdev " << id << dendl;
-  if (id >= block_all.size())
+  if (id >= alloc.size())
     return -EINVAL;
-  if (alloc[id] && alloc[id] == shared_bdev_alloc) {
-    for (auto& p : file_map) {
-      for (auto& q : p.second->fnode.extents) {
-        if (alloc[q.bdev] == shared_bdev_alloc) {
-          extents->insert(q.offset, q.length);
-        }
+  for (auto& p : file_map) {
+    for (auto& q : p.second->fnode.extents) {
+      if (q.bdev == id && alloc[q.bdev] == shared_alloc->a) {
+        extents->insert(q.offset, q.length);
       }
     }
-  } else {
-    *extents = block_all[id];
   }
   return 0;
 }
@@ -557,17 +504,6 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout)
 
   // initial txn
   log_t.op_init();
-  for (unsigned bdev = 0; bdev < MAX_BDEV; ++bdev) {
-    interval_set<uint64_t>& p = block_all[bdev];
-    if (p.empty())
-      continue;
-    for (interval_set<uint64_t>::iterator q = p.begin(); q != p.end(); ++q) {
-      dout(20) << __func__ << " op_alloc_add " << bdev << " 0x"
-               << std::hex << q.get_start() << "~" << q.get_len() << std::dec
-               << dendl;
-      log_t.op_alloc_add(bdev, q.get_start(), q.get_len());
-    }
-  }
   _flush_and_sync_log(l);
 
   // write supers
@@ -580,11 +516,13 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout)
   super = bluefs_super_t();
   _close_writer(log_writer);
   log_writer = NULL;
-  block_all.clear();
   vselector.reset(nullptr);
   _stop_alloc();
   _shutdown_logger();
-  need_shared_alloc_init = false;
+  if (shared_alloc) {
+    ceph_assert(shared_alloc->need_init);
+    shared_alloc->need_init = false;
+  }
 
   dout(10) << __func__ << " success" << dendl;
   return 0;
@@ -593,7 +531,6 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout)
 void BlueFS::_init_alloc()
 {
   dout(20) << __func__ << dendl;
-  block_unused_too_granular.resize(MAX_BDEV);
 
   if (bdev[BDEV_WAL]) {
     alloc_size[BDEV_WAL] = cct->_conf->bluefs_alloc_size;
@@ -618,11 +555,10 @@ void BlueFS::_init_alloc()
     }
     ceph_assert(bdev[id]->get_size());
     ceph_assert(alloc_size[id]);
-    if (alloc[id]) {
+    if (is_shared_alloc(id)) {
       dout(1) << __func__ << " shared, id " << id
         << " alloc_size 0x" << std::hex << alloc_size[id]
         << " size 0x" << bdev[id]->get_size() << std::dec << dendl;
-      shared_bdev_used = 0;
     } else {
       std::string name = "bluefs-";
       const char* devnames[] = { "wal","db","slow" };
@@ -636,10 +572,9 @@ void BlueFS::_init_alloc()
       alloc[id] = Allocator::create(cct, cct->_conf->bluefs_allocator,
                                    bdev[id]->get_size(),
                                    alloc_size[id], name);
-      interval_set<uint64_t>& p = block_all[id];
-      for (interval_set<uint64_t>::iterator q = p.begin(); q != p.end(); ++q) {
-        alloc[id]->init_add_free(q.get_start(), q.get_len());
-      }
+      alloc[id]->init_add_free(
+        block_reserved[id],
+        _get_total(id) - block_reserved[id]);
     }
   }
 }
@@ -653,19 +588,19 @@ void BlueFS::_stop_alloc()
   }
 
   for (size_t i = 0; i < alloc.size(); ++i) {
-    if (alloc[i] && alloc[i] != shared_bdev_alloc) {
+    if (alloc[i] && !is_shared_alloc(i)) {
       alloc[i]->shutdown();
       delete alloc[i];
       alloc[i] = nullptr;
     }
   }
-  block_unused_too_granular.clear();
 }
 
 int BlueFS::mount()
 {
   dout(1) << __func__ << dendl;
 
+  bool shared_alloc_ready = shared_alloc && shared_alloc->a;
   int r = _open_super();
   if (r < 0) {
     derr << __func__ << " failed to open super: " << cpp_strerror(r) << dendl;
@@ -681,8 +616,6 @@ int BlueFS::mount()
         get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100));
   }
 
-  block_all.clear();
-  block_all.resize(MAX_BDEV);
   _init_alloc();
   _init_logger();
 
@@ -697,18 +630,25 @@ int BlueFS::mount()
   for (auto& p : file_map) {
     dout(30) << __func__ << " noting alloc for " << p.second->fnode << dendl;
     for (auto& q : p.second->fnode.extents) {
-      if (alloc[q.bdev] == shared_bdev_alloc) {
-        if (need_shared_alloc_init) {
+      if (is_shared_alloc(q.bdev)) {
+        // we might have still uninitialized shared_alloc at this point
+        // just bypass initialization then
+        if (shared_alloc_ready && shared_alloc->need_init) {
+          ceph_assert(shared_alloc->a);
           alloc[q.bdev]->init_rm_free(q.offset, q.length);
-          shared_bdev_used += q.length;
+          shared_alloc->bluefs_used += q.length;
         }
       } else {
         alloc[q.bdev]->init_rm_free(q.offset, q.length);
       }
     }
   }
-  need_shared_alloc_init = false;
-  dout(1) << __func__ << " shared_bdev_used = " << shared_bdev_used << dendl;
+  if (shared_alloc_ready) {
+    shared_alloc->need_init = false;
+  }
+  dout(1) << __func__ << " shared_bdev_used = "
+          << (shared_alloc_ready ? (int64_t)shared_alloc->bluefs_used : -1)
+          << dendl;
 
   // set up the log for future writes
   log_writer = _create_writer(_get_file(1));
@@ -872,7 +812,6 @@ int BlueFS::_open_super()
 
 int BlueFS::_check_new_allocations(const bluefs_fnode_t& fnode,
   size_t dev_count,
-  boost::dynamic_bitset<uint64_t>* owned_blocks,
   boost::dynamic_bitset<uint64_t>* used_blocks)
 {
   auto& fnode_extents = fnode.extents;
@@ -880,21 +819,6 @@ int BlueFS::_check_new_allocations(const bluefs_fnode_t& fnode,
     auto id = e.bdev;
     bool fail = false;
     ceph_assert(id < dev_count);
-    apply_for_bitset_range(e.offset, e.length, alloc_size[id], owned_blocks[id],
-      [&](uint64_t pos, boost::dynamic_bitset<uint64_t> &bs) {
-        if (!bs.test(pos)) {
-          fail = true;
-        }
-      }
-    );
-    if (fail) {
-      derr << __func__ << " invalid extent " << int(id)
-        << ": 0x" << std::hex << e.offset << "~" << e.length
-        << std::dec
-        << ": wasn't given but allocated for ino " << fnode.ino
-        << dendl;
-      return -EFAULT;
-    }
 
     apply_for_bitset_range(e.offset, e.length, alloc_size[id], used_blocks[id],
       [&](uint64_t pos, boost::dynamic_bitset<uint64_t> &bs) {
@@ -915,51 +839,6 @@ int BlueFS::_check_new_allocations(const bluefs_fnode_t& fnode,
   return 0;
 }
 
-int BlueFS::_adjust_granularity(
-  __u8 id, uint64_t *offset, uint64_t *length, bool alloc)
-{
-  const char *op = alloc ? "op_alloc_add" : "op_alloc_rm";
-  auto oldo = *offset;
-  auto oldl = *length;
-  if (*offset & (alloc_size[id] - 1)) {
-    *offset &= ~(alloc_size[id] - 1);
-    *offset += alloc_size[id];
-    if (*length > *offset - oldo) {
-      if (alloc) {
-       block_unused_too_granular[id].insert(oldo, *offset - oldo);
-      } else {
-       block_unused_too_granular[id].erase(oldo, *offset - oldo);
-      }
-      *length -= (*offset - oldo);
-    } else {
-      if (alloc) {
-       block_unused_too_granular[id].insert(oldo, *length);
-      } else {
-       block_unused_too_granular[id].erase(oldo, *length);
-      }
-      *length = 0;
-    }
-  }
-  if (*length & (alloc_size[id] - 1)) {
-    *length &= ~(alloc_size[id] - 1);
-    if (alloc) {
-      block_unused_too_granular[id].insert(
-       *offset + *length,
-       oldo + oldl - *offset - *length);
-    } else {
-      block_unused_too_granular[id].erase(
-       *offset + *length,
-       oldo + oldl - *offset - *length);
-    }
-  }
-  if (oldo != *offset || oldl != *length) {
-    dout(10) << __func__ << " " << op << " "
-            << (int)id << ":" << std::hex << oldo << "~" << oldl
-            << " -> " << (int)id << ":" << *offset << "~" << *length << dendl;
-  }
-  return 0;
-}
-
 int BlueFS::_verify_alloc_granularity(
   __u8 id, uint64_t offset, uint64_t length, const char *op)
 {
@@ -1000,11 +879,6 @@ int BlueFS::_replay(bool noop, bool to_stdout)
   FileRef log_file;
   log_file = _get_file(1);
 
-  // sanity check
-  for (auto& a : block_unused_too_granular) {
-    ceph_assert(a.empty());
-  }
-
   log_file->fnode = super.log_fnode;
   if (!noop) {
     log_file->vselector_hint =
@@ -1028,14 +902,12 @@ int BlueFS::_replay(bool noop, bool to_stdout)
   bool seen_recs = false;
 
   boost::dynamic_bitset<uint64_t> used_blocks[MAX_BDEV];
-  boost::dynamic_bitset<uint64_t> owned_blocks[MAX_BDEV];
 
   if (!noop) {
     if (cct->_conf->bluefs_log_replay_check_allocations) {
       for (size_t i = 0; i < MAX_BDEV; ++i) {
        if (alloc_size[i] != 0 && bdev[i] != nullptr) {
          used_blocks[i].resize(round_up_to(bdev[i]->get_size(), alloc_size[i]) / alloc_size[i]);
-         owned_blocks[i].resize(round_up_to(bdev[i]->get_size(), alloc_size[i]) / alloc_size[i]);
        }
       }
     }
@@ -1211,123 +1083,26 @@ int BlueFS::_replay(bool noop, bool to_stdout)
        break;
 
       case bluefs_transaction_t::OP_ALLOC_ADD:
+       // LEGACY, do nothing but read params
         {
-         __u8 id;
-         uint64_t offset, length;
-         decode(id, p);
-         decode(offset, p);
-         decode(length, p);
-         dout(20) << __func__ << " 0x" << std::hex << pos << std::dec
-                   << ":  op_alloc_add " << " " << (int)id
-                   << ":0x" << std::hex << offset << "~" << length << std::dec
-                   << dendl;
-          if (unlikely(to_stdout)) {
-            std::cout << " 0x" << std::hex << pos << std::dec
-                      << ":  op_alloc_add " << " " << (int)id
-                      << ":0x" << std::hex << offset << "~" << length << std::dec
-                      << std::endl;
-          }
-         if (!noop) {
-           block_all[id].insert(offset, length);
-           _adjust_granularity(id, &offset, &length, true);
-           if (length &&
-               alloc[id] != shared_bdev_alloc) {
-             alloc[id]->init_add_free(offset, length);
-           }
-
-            if (cct->_conf->bluefs_log_replay_check_allocations) {
-              bool fail = false;
-              apply_for_bitset_range(offset, length, alloc_size[id], owned_blocks[id],
-                [&](uint64_t pos, boost::dynamic_bitset<uint64_t> &bs) {
-                  if (bs.test(pos)) {
-                    fail = true;
-                  } else {
-                    bs.set(pos);
-                  }
-                }
-              );
-              if (fail) {
-                derr << __func__ << " invalid extent " << (int)id
-                  << ": 0x" << std::hex << offset << "~" << length
-                  << std::dec << ": already given" << dendl;
-                return -EFAULT;
-              }
-              apply_for_bitset_range(offset, length, alloc_size[id], used_blocks[id],
-                [&](uint64_t pos, boost::dynamic_bitset<uint64_t> &bs) {
-                  if (bs.test(pos)) {
-                    fail = true;
-                  }
-                }
-              );
-              if (fail) {
-                derr << __func__ << " invalid extent " << int(id)
-                  << ": 0x" << std::hex << offset << "~" << length
-                  << std::dec << ": already in use" << dendl;
-                return -EFAULT;
-              }
-            }
-         }
-       }
+          __u8 id;
+          uint64_t offset, length;
+          decode(id, p);
+          decode(offset, p);
+          decode(length, p);
+        }
        break;
 
       case bluefs_transaction_t::OP_ALLOC_RM:
+       // LEGACY, do nothing but read params
         {
-         __u8 id;
-         uint64_t offset, length;
-         decode(id, p);
-         decode(offset, p);
-         decode(length, p);
-         dout(20) << __func__ << " 0x" << std::hex << pos << std::dec
-                   << ":  op_alloc_rm " << " " << (int)id
-                   << ":0x" << std::hex << offset << "~" << length << std::dec
-                   << dendl;
-          if (unlikely(to_stdout)) {
-            std::cout << " 0x" << std::hex << pos << std::dec
-                      << ":  op_alloc_rm " << " " << (int)id
-                      << ":0x" << std::hex << offset << "~" << length << std::dec
-                      << std::endl;
-          }
-         if (!noop) {
-           block_all[id].erase(offset, length);
-           _adjust_granularity(id, &offset, &length, false);
-           if (length && alloc[id] != shared_bdev_alloc) {
-             alloc[id]->init_rm_free(offset, length);
-           }
-            if (cct->_conf->bluefs_log_replay_check_allocations) {
-              bool fail = false;
-              apply_for_bitset_range(offset, length, alloc_size[id], owned_blocks[id],
-                [&](uint64_t pos, boost::dynamic_bitset<uint64_t> &bs) {
-                  if (!bs.test(pos)) {
-                    fail = true;
-                  } else {
-                    bs.reset(pos);
-                  }
-                }
-              );
-              if (fail) {
-                derr << __func__ << " invalid extent " << int(id)
-                  << ": 0x" << std::hex << offset << "~" << length
-                  << std::dec << ": wasn't given" << dendl;
-                return -EFAULT;
-              }
-
-              apply_for_bitset_range(offset, length, alloc_size[id], used_blocks[id],
-                [&](uint64_t pos, boost::dynamic_bitset<uint64_t> &bs) {
-                  if (bs.test(pos)) {
-                    fail = true;
-                  }
-                }
-              );
-              if (fail) {
-                derr << __func__ << " invalid extent " << (int)id
-                   << ": 0x" << std::hex << offset << "~" << length
-                  << std::dec << ": still in use" << dendl;
-                return -EFAULT;
-              }
-            }
-          }
-       }
-       break;
+          __u8 id;
+          uint64_t offset, length;
+          decode(id, p);
+          decode(offset, p);
+          decode(length, p);
+        }
+        break;
 
       case bluefs_transaction_t::OP_DIR_LINK:
         {
@@ -1448,7 +1223,7 @@ int BlueFS::_replay(bool noop, bool to_stdout)
               if (first_log_check) {
                 first_log_check = false;
                 int r = _check_new_allocations(log_file->fnode,
-                  MAX_BDEV, owned_blocks, used_blocks);
+                  MAX_BDEV, used_blocks);
                 if (r < 0) {
                   return r;
                 }
@@ -1484,7 +1259,7 @@ int BlueFS::_replay(bool noop, bool to_stdout)
            }
             if (cct->_conf->bluefs_log_replay_check_allocations) {
               int r = _check_new_allocations(f->fnode,
-                MAX_BDEV, owned_blocks, used_blocks);
+                MAX_BDEV, used_blocks);
               if (r < 0) {
                 return r;
               }
@@ -1513,21 +1288,6 @@ int BlueFS::_replay(bool noop, bool to_stdout)
               for (auto e : fnode_extents) {
                 auto id = e.bdev;
                 bool fail = false;
-                apply_for_bitset_range(e.offset, e.length, alloc_size[id], owned_blocks[id],
-                  [&](uint64_t pos, boost::dynamic_bitset<uint64_t> &bs) {
-                    if (!bs.test(pos)) {
-                      fail = true;
-                    }
-                  }
-                );
-                if (fail) {
-                  derr << __func__ << " invalid extent " << int(id)
-                    << ": 0x" << std::hex << e.offset << "~" << e.length
-                    << std::dec
-                    << ": wasn't given but is allocated for removed ino " << ino
-                    << dendl;
-                  return -EFAULT;
-                }
 
                 apply_for_bitset_range(e.offset, e.length, alloc_size[id], used_blocks[id],
                   [&](uint64_t pos, boost::dynamic_bitset<uint64_t> &bs) {
@@ -1571,7 +1331,7 @@ int BlueFS::_replay(bool noop, bool to_stdout)
   if (!noop && first_log_check &&
         cct->_conf->bluefs_log_replay_check_allocations) {
     int r = _check_new_allocations(log_file->fnode,
-      MAX_BDEV, owned_blocks, used_blocks);
+      MAX_BDEV, used_blocks);
     if (r < 0) {
       return r;
     }
@@ -1598,10 +1358,6 @@ int BlueFS::_replay(bool noop, bool to_stdout)
     }
   }
 
-  for (unsigned id = 0; id < block_unused_too_granular.size(); ++id) {
-    dout(10) << __func__ << " block_unused_too_granular " << id << ": "
-            << block_unused_too_granular[id] << dendl;
-  }
   dout(10) << __func__ << " done" << dendl;
   return 0;
 }
@@ -1713,8 +1469,8 @@ int BlueFS::device_migrate_to_existing(
        PExtentVector to_release;
        to_release.emplace_back(old_ext.offset, old_ext.length);
        alloc[old_ext.bdev]->release(to_release);
-        if (alloc[old_ext.bdev] == shared_bdev_alloc) {
-          shared_bdev_used -= to_release.size();
+        if (is_shared_alloc(old_ext.bdev)) {
+          shared_alloc->bluefs_used -= to_release.size();
         }
       }
 
@@ -1854,8 +1610,8 @@ int BlueFS::device_migrate_to_new(
        PExtentVector to_release;
        to_release.emplace_back(old_ext.offset, old_ext.length);
        alloc[old_ext.bdev]->release(to_release);
-        if (alloc[old_ext.bdev] == shared_bdev_alloc) {
-          shared_bdev_used -= to_release.size();
+        if (is_shared_alloc(old_ext.bdev)) {
+          shared_alloc->bluefs_used -= to_release.size();
         }
       }
 
@@ -2172,8 +1928,6 @@ uint64_t BlueFS::_estimate_log_size()
   int avg_file_size = 12;
   uint64_t size = 4096 * 2;
   size += file_map.size() * (1 + sizeof(bluefs_fnode_t));
-  for (auto& p : block_all)
-    size += p.num_intervals() * (1 + 1 + sizeof(uint64_t) * 2);
   size += dir_map.size() + (1 + avg_dir_size);
   size += file_map.size() * (1 + avg_dir_size + avg_file_size);
   return round_up_to(size, super.block_size);
@@ -2217,38 +1971,6 @@ void BlueFS::_compact_log_dump_metadata(bluefs_transaction_t *t,
   dout(20) << __func__ << " op_init" << dendl;
 
   t->op_init();
-  for (unsigned bdev = 0; bdev < MAX_BDEV; ++bdev) {
-    interval_set<uint64_t>& p = block_all[bdev];
-    for (interval_set<uint64_t>::iterator q = p.begin(); q != p.end(); ++q) {
-      auto bdev_new = bdev;
-      if ((flags & REMOVE_WAL) && bdev == BDEV_WAL) {
-       continue;
-      }
-      if ((flags & REMOVE_DB) && bdev == BDEV_DB) {
-       continue;
-      }
-      if ((flags & RENAME_SLOW2DB) && bdev == BDEV_SLOW) {
-       bdev_new = BDEV_DB;
-      }
-      if ((flags & RENAME_DB2SLOW) && bdev == BDEV_DB) {
-       bdev_new = BDEV_SLOW;
-      }
-      if (bdev == BDEV_NEWDB) {
-       // REMOVE_DB xor RENAME_DB
-       ceph_assert(!(flags & REMOVE_DB) != !(flags & RENAME_DB2SLOW));
-       ceph_assert(!(flags & RENAME_SLOW2DB));
-       bdev_new = BDEV_DB;
-      }
-      if (bdev == BDEV_NEWWAL) {
-       ceph_assert(flags & REMOVE_WAL);
-       bdev_new = BDEV_WAL;
-      }
-      dout(20) << __func__ << " op_alloc_add " << bdev_new << " 0x"
-               << std::hex << q.get_start() << "~" << q.get_len() << std::dec
-               << dendl;
-      t->op_alloc_add(bdev_new, q.get_start(), q.get_len());
-    }
-  }
   for (auto& [ino, file_ref] : file_map) {
     if (ino == 1)
       continue;
@@ -2733,8 +2455,8 @@ int BlueFS::_flush_and_sync_log(std::unique_lock<ceph::mutex>& l,
        }
       }
       alloc[i]->release(to_release[i]);
-      if (alloc[i] == shared_bdev_alloc) {
-        shared_bdev_used -= to_release[i].size();
+      if (is_shared_alloc(i)) {
+        shared_alloc->bluefs_used -= to_release[i].size();
       }
     }
   }
@@ -3105,7 +2827,7 @@ void BlueFS::flush_bdev()
   for (unsigned i = 0; i < MAX_BDEV; i++) {
     // alloc space from BDEV_SLOW is unexpected.
     // So most cases we don't alloc from BDEV_SLOW and so avoiding flush not-used device.
-    if (bdev[i] && ((i != BDEV_SLOW) || (block_all[i].size() - alloc[i]->get_free()))) {
+    if (bdev[i] && (i != BDEV_SLOW || _get_used(i))) {
       bdev[i]->flush();
     }
   }
@@ -3146,8 +2868,8 @@ int BlueFS::_allocate_without_fallback(uint8_t id, uint64_t len,
       alloc[id]->dump();
     return -ENOSPC;
   }
-  if (alloc[id] == shared_bdev_alloc) {
-    shared_bdev_used += alloc_len;
+  if (is_shared_alloc(id)) {
+    shared_alloc->bluefs_used += alloc_len;
   }
 
   return 0;
@@ -3192,14 +2914,13 @@ int BlueFS::_allocate(uint8_t id, uint64_t len,
            << std::dec << dendl;
     return -ENOSPC;
   } else {
-    uint64_t total_allocated =
-      block_all[id].size() - alloc[id]->get_free();
-    if (max_bytes[id] < total_allocated) {
-      logger->set(max_bytes_pcounters[id], total_allocated);
-      max_bytes[id] = total_allocated;
+    uint64_t used = _get_used(id);
+    if (max_bytes[id] < used) {
+      logger->set(max_bytes_pcounters[id], used);
+      max_bytes[id] = used;
     }
-    if (alloc[id] == shared_bdev_alloc) {
-      shared_bdev_used += alloc_len;
+    if (is_shared_alloc(id)) {
+      shared_alloc->bluefs_used += alloc_len;
     }
   }
 
@@ -3834,16 +3555,6 @@ int BlueFS::do_replay_recovery_read(FileReader *log_reader,
   return 0;
 }
 
-void BlueFS::debug_inject_duplicate_gift(unsigned id,
-  uint64_t offset,
-  uint64_t len)
-{
-  dout(0) << __func__ << dendl;
-  if (id < alloc.size() && alloc[id]) {
-    alloc[id]->init_add_free(offset, len);
-  }
-}
-
 // ===============================================
 // OriginalVolumeSelector
 
index c8e2063ce79a0e835edc48e1f12b7e424c36e0e0..b14b94b911d7cf1c2fcd3365b94dac648a66d495 100644 (file)
@@ -70,7 +70,22 @@ public:
   virtual void get_paths(const std::string& base, paths& res) const = 0;
   virtual void dump(std::ostream& sout) = 0;
 };
-class BlueFS;
+
+struct bluefs_shared_alloc_context_t {
+  bool need_init = false;
+  Allocator* a = nullptr;
+
+  std::atomic<uint64_t> bluefs_used = 0;
+
+  void set(Allocator* _a) {
+    a = _a;
+    need_init = true;
+    bluefs_used = 0;
+  }
+  void reset() {
+    a = nullptr;
+  }
+};
 
 class BlueFS {
 public:
@@ -300,19 +315,20 @@ private:
    */
   std::vector<BlockDevice*> bdev;                  ///< block devices we can use
   std::vector<IOContext*> ioc;                     ///< IOContexts for bdevs
-  std::vector<interval_set<uint64_t> > block_all;  ///< extents in bdev we own
+  std::vector<uint64_t> block_reserved;            ///< starting reserve extent per device
   std::vector<Allocator*> alloc;                   ///< allocators for bdevs
   std::vector<uint64_t> alloc_size;                ///< alloc size for each device
   std::vector<interval_set<uint64_t>> pending_release; ///< extents to release
-  std::vector<interval_set<uint64_t>> block_unused_too_granular;
+  //std::vector<interval_set<uint64_t>> block_unused_too_granular;
 
   BlockDevice::aio_callback_t discard_cb[3]; //discard callbacks for each dev
 
   std::unique_ptr<BlueFSVolumeSelector> vselector;
-  bool need_shared_alloc_init = false;
-  Allocator* shared_bdev_alloc = nullptr;
-  std::atomic<uint64_t> shared_bdev_used = 0;
-
+  bluefs_shared_alloc_context_t* shared_alloc = nullptr;
+  unsigned shared_alloc_id = unsigned(-1);
+  inline bool is_shared_alloc(unsigned id) const {
+    return id == shared_alloc_id;
+  }
 
   class SocketHook;
   SocketHook* asok_hook = nullptr;
@@ -326,6 +342,10 @@ private:
 
   void _pad_bl(ceph::buffer::list& bl);  ///< pad ceph::buffer::list to block size w/ zeros
 
+  uint64_t _get_used(unsigned id) const;
+  uint64_t _get_total(unsigned id) const;
+
+
   FileRef _get_file(uint64_t ino);
   void _drop_link(FileRef f);
 
@@ -399,13 +419,10 @@ private:
   int _write_super(int dev);
   int _check_new_allocations(const bluefs_fnode_t& fnode,
     size_t dev_count,
-    boost::dynamic_bitset<uint64_t>* owned_blocks,
     boost::dynamic_bitset<uint64_t>* used_blocks);
   int _verify_alloc_granularity(
     __u8 id, uint64_t offset, uint64_t length,
     const char *op);
-  int _adjust_granularity(
-    __u8 id, uint64_t *offset, uint64_t *length, bool alloc);
   int _replay(bool noop, bool to_stdout = false); ///< replay journal
 
   FileWriter *_create_writer(FileRef f);
@@ -420,11 +437,8 @@ private:
     return 4096;
   }
 
-  void _add_block_extent(bool create, unsigned bdev, uint64_t offset,
-                        uint64_t len, bool skip=false);
-
 public:
-  BlueFS(CephContext* cct);
+  BlueFS(CephContext* cct, bluefs_shared_alloc_context_t* _shared_alloc);
   ~BlueFS();
 
   // the super is always stored on bdev 0
@@ -458,7 +472,6 @@ public:
   uint64_t get_total(unsigned id);
   uint64_t get_free(unsigned id);
   uint64_t get_used(unsigned id);
-  void get_usage(std::vector<pair<uint64_t,uint64_t>> *usage); // [<free,total> ...]
   void dump_perf_counters(ceph::Formatter *f);
 
   void dump_block_extents(std::ostream& out);
@@ -519,19 +532,10 @@ public:
   }
 
   int add_block_device(unsigned bdev, const std::string& path, bool trim,
-                      bool shared_with_bluestore = false,
-                       Allocator* shared_bdev_alloc = nullptr);
+                       uint64_t reserved,
+                      bool shared_with_bluestore = false);
   bool bdev_support_label(unsigned id);
-  uint64_t get_block_device_size(unsigned bdev);
-
-  /// gift more block space
-  void add_block_extent(bool create, unsigned bdev, uint64_t offset, uint64_t len,
-                        bool skip=false) {
-    std::unique_lock l(lock);
-    _add_block_extent(create, bdev, offset, len, skip);
-    int r = _flush_and_sync_log(l);
-    ceph_assert(r == 0);
-  }
+  uint64_t get_block_device_size(unsigned bdev) const;
 
   // handler for discard event
   void handle_discard(unsigned dev, interval_set<uint64_t>& to_release);
@@ -590,7 +594,6 @@ public:
                              bufferlist* bl);
 
   /// test purpose methods
-  void debug_inject_duplicate_gift(unsigned bdev, uint64_t offset, uint64_t len);
   const PerfCounters* get_perf_counters() const {
     return logger;
   }
index 331559da0bab63a6edb7fcaa3411faa89acdee11..f66da6a5556ff25d11a03e03893a1e28b5429760 100644 (file)
@@ -4147,8 +4147,8 @@ static void discard_cb(void *priv, void *priv2)
 void BlueStore::handle_discard(interval_set<uint64_t>& to_release)
 {
   dout(10) << __func__ << dendl;
-  ceph_assert(alloc);
-  alloc->release(to_release);
+  ceph_assert(shared_alloc.a);
+  shared_alloc.a->release(to_release);
 }
 
 BlueStore::BlueStore(CephContext *cct, const string& path)
@@ -4916,7 +4916,6 @@ int BlueStore::_open_bdev(bool create)
 void BlueStore::_validate_bdev()
 {
   ceph_assert(bdev);
-  ceph_assert(min_alloc_size); // _get_odisk_reserved depends on that
   uint64_t dev_size = bdev->get_size();
   ceph_assert(dev_size > _get_ondisk_reserved());
 }
@@ -5058,7 +5057,7 @@ int BlueStore::_write_out_fm_meta(uint64_t target_size)
 
 int BlueStore::_open_alloc()
 {
-  ceph_assert(alloc == NULL);
+  ceph_assert(shared_alloc.a == NULL);
   ceph_assert(bdev->get_size());
 
   uint64_t alloc_size = min_alloc_size;
@@ -5069,11 +5068,11 @@ int BlueStore::_open_alloc()
     alloc_size = _zoned_piggyback_device_parameters_onto(alloc_size);
   }
 
-  alloc = Allocator::create(cct, cct->_conf->bluestore_allocator,
+  shared_alloc.set(Allocator::create(cct, cct->_conf->bluestore_allocator,
                             bdev->get_size(),
-                            alloc_size, "block");
+                            alloc_size, "block"));
 
-  if (!alloc) {
+  if (!shared_alloc.a) {
     lderr(cct) << __func__ << " Allocator::unknown alloc type "
                << cct->_conf->bluestore_allocator
                << dendl;
@@ -5081,7 +5080,7 @@ int BlueStore::_open_alloc()
   }
 
   if (bdev->is_smr()) {
-    alloc->set_zone_states(fm->get_zone_states(db));
+    shared_alloc.a->set_zone_states(fm->get_zone_states(db));
   }
 
   uint64_t num = 0, bytes = 0;
@@ -5091,7 +5090,7 @@ int BlueStore::_open_alloc()
   fm->enumerate_reset();
   uint64_t offset, length;
   while (fm->enumerate_next(db, &offset, &length)) {
-    alloc->init_add_free(offset, length);
+    shared_alloc.a->init_add_free(offset, length);
     ++num;
     bytes += length;
   }
@@ -5099,7 +5098,7 @@ int BlueStore::_open_alloc()
 
   dout(1) << __func__ << " loaded " << byte_u_t(bytes)
     << " in " << num << " extents"
-    << " available " << byte_u_t(alloc->get_free())
+    << " available " << byte_u_t(shared_alloc.a->get_free())
     << dendl;
 
   return 0;
@@ -5110,10 +5109,10 @@ void BlueStore::_close_alloc()
   ceph_assert(bdev);
   bdev->discard_drain();
 
-  ceph_assert(alloc);
-  alloc->shutdown();
-  delete alloc;
-  alloc = NULL;
+  ceph_assert(shared_alloc.a);
+  shared_alloc.a->shutdown();
+  delete shared_alloc.a;
+  shared_alloc.reset();
 }
 
 int BlueStore::_open_fsid(bool create)
@@ -5275,7 +5274,7 @@ bool BlueStore::test_mount_in_use()
 int BlueStore::_minimal_open_bluefs(bool create)
 {
   int r;
-  bluefs = new BlueFS(cct);
+  bluefs = new BlueFS(cct, &shared_alloc);
 
   string bfn;
   struct stat st;
@@ -5284,7 +5283,8 @@ int BlueStore::_minimal_open_bluefs(bool create)
   if (::stat(bfn.c_str(), &st) == 0) {
     r = bluefs->add_block_device(
       BlueFS::BDEV_DB, bfn,
-      create && cct->_conf->bdev_enable_discard);
+      create && cct->_conf->bdev_enable_discard,
+      SUPER_RESERVED);
     if (r < 0) {
       derr << __func__ << " add block device(" << bfn << ") returned: "
             << cpp_strerror(r) << dendl;
@@ -5303,13 +5303,6 @@ int BlueStore::_minimal_open_bluefs(bool create)
         goto free_bluefs;
       }
     }
-    if (create) {
-      bluefs->add_block_extent(
-        create,
-       BlueFS::BDEV_DB,
-       SUPER_RESERVED,
-       bluefs->get_block_device_size(BlueFS::BDEV_DB) - SUPER_RESERVED);
-    }
     bluefs_layout.shared_bdev = BlueFS::BDEV_SLOW;
     bluefs_layout.dedicated_db = true;
   } else {
@@ -5328,27 +5321,19 @@ int BlueStore::_minimal_open_bluefs(bool create)
   bfn = path + "/block";
   // never trim here
   r = bluefs->add_block_device(bluefs_layout.shared_bdev, bfn, false,
-                               true,
-                              alloc);
+                               0, // no need to provide valid 'reserved' for shared dev
+                               true);
   if (r < 0) {
     derr << __func__ << " add block device(" << bfn << ") returned: "
          << cpp_strerror(r) << dendl;
     goto free_bluefs;
   }
-  if (create) {
-    auto reserved = _get_ondisk_reserved();
-
-    bluefs->add_block_extent(
-      create,
-      bluefs_layout.shared_bdev,
-      reserved,
-      p2align(bdev->get_size(), min_alloc_size) - reserved);
-  }
 
   bfn = path + "/block.wal";
   if (::stat(bfn.c_str(), &st) == 0) {
     r = bluefs->add_block_device(BlueFS::BDEV_WAL, bfn,
-                                create && cct->_conf->bdev_enable_discard);
+                                create && cct->_conf->bdev_enable_discard,
+                                 BDEV_LABEL_BLOCK_SIZE);
     if (r < 0) {
       derr << __func__ << " add block device(" << bfn << ") returned: "
            << cpp_strerror(r) << dendl;
@@ -5367,13 +5352,6 @@ int BlueStore::_minimal_open_bluefs(bool create)
       }
     }
 
-    if (create) {
-      bluefs->add_block_extent(
-        create,
-        BlueFS::BDEV_WAL, BDEV_LABEL_BLOCK_SIZE,
-         bluefs->get_block_device_size(BlueFS::BDEV_WAL) -
-         BDEV_LABEL_BLOCK_SIZE);
-    }
     bluefs_layout.dedicated_wal = true;
   } else {
     r = 0;
@@ -5393,7 +5371,7 @@ free_bluefs:
   return r;
 }
 
-int BlueStore::_open_bluefs(bool create)
+int BlueStore::_open_bluefs(bool create, bool read_only)
 {
   int r = _minimal_open_bluefs(create);
   if (r < 0) {
@@ -5487,6 +5465,7 @@ int BlueStore::_open_db_and_around(bool read_only)
   if (do_bluefs) {
     // open in read-only first to read FM list and init allocator
     // as they might be needed for some BlueFS procedures
+
     r = _open_db(false, false, true);
     if (r < 0)
       return r;
@@ -5662,7 +5641,7 @@ int BlueStore::_prepare_db_environment(bool create, bool read_only,
       return -EINVAL;
     }
 
-    r = _open_bluefs(create);
+    r = _open_bluefs(create, read_only);
     if (r < 0) {
       return r;
     }
@@ -5833,7 +5812,7 @@ void BlueStore::_dump_alloc_on_failure()
     cct->_conf->bluestore_bluefs_alloc_failure_dump_interval;
   if (dump_interval > 0 &&
     next_dump_on_bluefs_alloc_failure <= ceph_clock_now()) {
-    alloc->dump();
+    shared_alloc.a->dump();
     next_dump_on_bluefs_alloc_failure = ceph_clock_now();
     next_dump_on_bluefs_alloc_failure += dump_interval;
   }
@@ -6180,15 +6159,15 @@ int BlueStore::mkfs()
       return r;
     alloc_size = _zoned_piggyback_device_parameters_onto(alloc_size);
   }
-  alloc = Allocator::create(cct, cct->_conf->bluestore_allocator,
+  shared_alloc.set(Allocator::create(cct, cct->_conf->bluestore_allocator,
     bdev->get_size(),
-    alloc_size, "block");
-  if (!alloc) {
+    alloc_size, "block"));
+  if (!shared_alloc.a) {
     r = -EINVAL;
     goto out_close_bdev;
   }
   reserved = _get_ondisk_reserved();
-  alloc->init_add_free(reserved,
+  shared_alloc.a->init_add_free(reserved,
     p2align(bdev->get_size(), min_alloc_size) - reserved);
 
   r = _open_db(true);
@@ -6243,8 +6222,8 @@ int BlueStore::mkfs()
  out_close_db:
   _close_db(false);
  out_close_bdev:
-  delete alloc;
-  alloc = nullptr;
+  delete shared_alloc.a;
+  shared_alloc.reset();
   _close_bdev();
  out_close_fsid:
   _close_fsid();
@@ -6318,7 +6297,6 @@ int BlueStore::add_new_bluefs_device(int id, const string& dev_path)
 
   r = _mount_for_bluefs();
 
-  int reserved = 0;
   if (id == BlueFS::BDEV_NEWWAL) {
     string p = path + "/block.wal";
     r = _setup_block_symlink_or_file("block.wal", dev_path,
@@ -6327,7 +6305,8 @@ int BlueStore::add_new_bluefs_device(int id, const string& dev_path)
     ceph_assert(r == 0);
 
     r = bluefs->add_block_device(BlueFS::BDEV_NEWWAL, p,
-                                cct->_conf->bdev_enable_discard);
+                                cct->_conf->bdev_enable_discard,
+                                 BDEV_LABEL_BLOCK_SIZE);
     ceph_assert(r == 0);
 
     if (bluefs->bdev_support_label(BlueFS::BDEV_NEWWAL)) {
@@ -6339,7 +6318,6 @@ int BlueStore::add_new_bluefs_device(int id, const string& dev_path)
       ceph_assert(r == 0);
     }
 
-    reserved = BDEV_LABEL_BLOCK_SIZE;
     bluefs_layout.dedicated_wal = true;
   } else if (id == BlueFS::BDEV_NEWDB) {
     string p = path + "/block.db";
@@ -6349,7 +6327,8 @@ int BlueStore::add_new_bluefs_device(int id, const string& dev_path)
     ceph_assert(r == 0);
 
     r = bluefs->add_block_device(BlueFS::BDEV_NEWDB, p,
-                                cct->_conf->bdev_enable_discard);
+                                cct->_conf->bdev_enable_discard,
+                                 SUPER_RESERVED);
     ceph_assert(r == 0);
 
     if (bluefs->bdev_support_label(BlueFS::BDEV_NEWDB)) {
@@ -6360,7 +6339,6 @@ int BlueStore::add_new_bluefs_device(int id, const string& dev_path)
        true);
       ceph_assert(r == 0);
     }
-    reserved = SUPER_RESERVED;
     bluefs_layout.shared_bdev = BlueFS::BDEV_SLOW;
     bluefs_layout.dedicated_db = true;
   }
@@ -6368,12 +6346,6 @@ int BlueStore::add_new_bluefs_device(int id, const string& dev_path)
   bluefs->umount();
   bluefs->mount();
 
-  bluefs->add_block_extent(
-    false,
-    id,
-    reserved,
-    bluefs->get_block_device_size(id) - reserved, true);
-
   r = bluefs->prepare_new_device(id, bluefs_layout);
   ceph_assert(r == 0);
 
@@ -6459,7 +6431,6 @@ int BlueStore::migrate_to_new_bluefs_device(const set<int>& devs_source,
 
   r = _mount_for_bluefs();
 
-  int reserved = 0;
   string link_db;
   string link_wal;
   if (devs_source.count(BlueFS::BDEV_DB) &&
@@ -6481,7 +6452,8 @@ int BlueStore::migrate_to_new_bluefs_device(const set<int>& devs_source,
     bluefs_layout.dedicated_wal = true;
 
     r = bluefs->add_block_device(BlueFS::BDEV_NEWWAL, dev_path,
-                                cct->_conf->bdev_enable_discard);
+                                cct->_conf->bdev_enable_discard,
+                                 BDEV_LABEL_BLOCK_SIZE);
     ceph_assert(r == 0);
 
     if (bluefs->bdev_support_label(BlueFS::BDEV_NEWWAL)) {
@@ -6492,7 +6464,6 @@ int BlueStore::migrate_to_new_bluefs_device(const set<int>& devs_source,
        true);
       ceph_assert(r == 0);
     }
-    reserved = BDEV_LABEL_BLOCK_SIZE;
   } else if (id == BlueFS::BDEV_NEWDB) {
     target_name = "block.db";
     target_size = cct->_conf->bluestore_block_db_size;
@@ -6500,7 +6471,8 @@ int BlueStore::migrate_to_new_bluefs_device(const set<int>& devs_source,
     bluefs_layout.dedicated_db = true;
 
     r = bluefs->add_block_device(BlueFS::BDEV_NEWDB, dev_path,
-                                cct->_conf->bdev_enable_discard);
+                                cct->_conf->bdev_enable_discard,
+                                 SUPER_RESERVED);
     ceph_assert(r == 0);
 
     if (bluefs->bdev_support_label(BlueFS::BDEV_NEWDB)) {
@@ -6511,16 +6483,11 @@ int BlueStore::migrate_to_new_bluefs_device(const set<int>& devs_source,
        true);
       ceph_assert(r == 0);
     }
-    reserved = SUPER_RESERVED;
   }
 
   bluefs->umount();
   bluefs->mount();
 
-  bluefs->add_block_extent(
-    false,
-    id, reserved, bluefs->get_block_device_size(id) - reserved);
-
   r = bluefs->device_migrate_to_new(cct, devs_source, id, bluefs_layout);
 
   if (r < 0) {
@@ -6606,28 +6573,20 @@ int BlueStore::expand_devices(ostream& out)
       continue;
     }
 
-    interval_set<uint64_t> before;
-    bluefs->get_block_extents(devid, &before);
-    ceph_assert(!before.empty());
-    uint64_t end = before.range_end();
-    if (end < size) {
-      out << devid
-         <<" : expanding " << " from 0x" << std::hex
-         << end << " to 0x" << size << std::dec << std::endl;
-      bluefs->add_block_extent(false, devid, end, size-end);
-      string p = get_device_path(devid);
-      const char* path = p.c_str();
-      if (path == nullptr) {
-       derr << devid
-             <<": can't find device path " << dendl;
-       continue;
-      }
-      if (bluefs->bdev_support_label(devid)) {
-        if (_set_bdev_label_size(p, size) >= 0) {
-          out << devid
-            << " : size label updated to " << size
-            << std::endl;
-        }
+    out << devid
+       <<" : expanding " << " to 0x" << size << std::dec << std::endl;
+    string p = get_device_path(devid);
+    const char* path = p.c_str();
+    if (path == nullptr) {
+      derr << devid
+           <<": can't find device path " << dendl;
+      continue;
+    }
+    if (bluefs->bdev_support_label(devid)) {
+      if (_set_bdev_label_size(p, size) >= 0) {
+        out << devid
+          << " : size label updated to " << size
+          << std::endl;
       }
     }
   }
@@ -6637,8 +6596,6 @@ int BlueStore::expand_devices(ostream& out)
     out << bluefs_layout.shared_bdev
       << " : expanding " << " from 0x" << std::hex
       << size0 << " to 0x" << size << std::dec << std::endl;
-    bluefs->add_block_extent(false,
-      bluefs_layout.shared_bdev, size0, size - size0);
     _write_out_fm_meta(size);
     if (bdev->supported_bdev_label()) {
       if (_set_bdev_label_size(path, size) >= 0) {
@@ -8282,17 +8239,18 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair)
              continue;
            }
            PExtentVector exts;
-           int64_t alloc_len = alloc->allocate(e->length, min_alloc_size,
-                                               0, 0, &exts);
+           int64_t alloc_len =
+              shared_alloc.a->allocate(e->length, min_alloc_size,
+                                      0, 0, &exts);
            if (alloc_len < 0 || alloc_len < (int64_t)e->length) {
              derr << __func__
                   << " failed to allocate 0x" << std::hex << e->length
                   << " allocated 0x " << (alloc_len < 0 ? 0 : alloc_len)
                   << " min_alloc_size 0x" << min_alloc_size
-                  << " available 0x " << alloc->get_free()
+                  << " available 0x " << shared_alloc.a->get_free()
                   << std::dec << dendl;
              if (alloc_len > 0) {
-               alloc->release(exts);
+                shared_alloc.a->release(exts);
              }
              bypass_rest = true;
              break;
@@ -8372,7 +8330,7 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair)
                 << "~" << it.get_len() << std::dec << dendl;
        fm->release(it.get_start(), it.get_len(), txn);
       }
-      alloc->release(to_release);
+      shared_alloc.a->release(to_release);
       to_release.clear();
     } // if (it) {
   } //if (repair && repairer.preprocess_misreference()) {
@@ -8634,7 +8592,7 @@ void BlueStore::inject_leaked(uint64_t len)
   txn = db->get_transaction();
 
   PExtentVector exts;
-  int64_t alloc_len = alloc->allocate(len, min_alloc_size,
+  int64_t alloc_len = shared_alloc.a->allocate(len, min_alloc_size,
                                           min_alloc_size * 256, 0, &exts);
   ceph_assert(alloc_len >= (int64_t)len);
   for (auto& p : exts) {
@@ -8912,7 +8870,7 @@ void BlueStore::_get_statfs_overall(struct store_statfs_t *buf)
     db->estimate_prefix_size(PREFIX_OMAP, string()) +
     db->estimate_prefix_size(PREFIX_PERPOOL_OMAP, string());
 
-  uint64_t bfree = alloc->get_free();
+  uint64_t bfree = shared_alloc.a->get_free();
 
   if (bluefs) {
     buf->internally_reserved = 0;
@@ -10607,6 +10565,7 @@ ObjectMap::ObjectMapIterator BlueStore::get_omap_iterator(
 // write helpers
 
 uint64_t BlueStore::_get_ondisk_reserved() const {
+  ceph_assert(min_alloc_size);
   return round_up_to(
     std::max<uint64_t>(SUPER_RESERVED, min_alloc_size), min_alloc_size);
 }
@@ -11416,7 +11375,7 @@ void BlueStore::_txc_release_alloc(TransContext *txc)
     }
     dout(10) << __func__ << "(sync) " << txc << " " << std::hex
              << txc->released << std::dec << dendl;
-    alloc->release(txc->released);
+    shared_alloc.a->release(txc->released);
   }
 
 out:
@@ -11923,7 +11882,7 @@ void BlueStore::_kv_finalize_thread()
       _reap_collections();
 
       logger->set(l_bluestore_fragmentation,
-         (uint64_t)(alloc->get_fragmentation() * 1000));
+         (uint64_t)(shared_alloc.a->get_fragmentation() * 1000));
 
       log_latency("kv_final",
        l_bluestore_kv_final_lat,
@@ -13579,17 +13538,17 @@ int BlueStore::_do_alloc_write(
   PExtentVector prealloc;
   prealloc.reserve(2 * wctx->writes.size());;
   int64_t prealloc_left = 0;
-  prealloc_left = alloc->allocate(
+  prealloc_left = shared_alloc.a->allocate(
     need, min_alloc_size, need,
     0, &prealloc);
   if (prealloc_left < 0 || prealloc_left < (int64_t)need) {
     derr << __func__ << " failed to allocate 0x" << std::hex << need
          << " allocated 0x " << (prealloc_left < 0 ? 0 : prealloc_left)
          << " min_alloc_size 0x" << min_alloc_size
-         << " available 0x " << alloc->get_free()
+         << " available 0x " << shared_alloc.a->get_free()
          << std::dec << dendl;
     if (prealloc.size()) {
-      alloc->release(prealloc);
+      shared_alloc.a->release(prealloc);
     }
     return -ENOSPC;
   }
index edd6c015fbde525cf6326657547844a2d6702a7b..2f0f80160fd52761733d62c96d36c2857ab72c40 100644 (file)
@@ -2051,7 +2051,9 @@ private:
   BlockDevice *bdev = nullptr;
   std::string freelist_type;
   FreelistManager *fm = nullptr;
-  Allocator *alloc = nullptr;
+
+  bluefs_shared_alloc_context_t shared_alloc;
+
   uuid_d fsid;
   int path_fd = -1;  ///< open handle to $path
   int fsid_fd = -1;  ///< open handle (locked) to $path/fsid
@@ -2353,7 +2355,7 @@ private:
 
   int _minimal_open_bluefs(bool create);
   void _minimal_close_bluefs();
-  int _open_bluefs(bool create);
+  int _open_bluefs(bool create, bool read_only);
   void _close_bluefs(bool cold_close);
 
   // Limited (u)mount intended for BlueFS operations only
index 8b1a1d5cb397d63234551abb27aacb2315245be0..4a2a8152c2d7b650bbfc0c1f66ce96f7cfab06f8 100644 (file)
@@ -228,8 +228,6 @@ void bluefs_transaction_t::generate_test_instances(
   ls.push_back(new bluefs_transaction_t);
   ls.push_back(new bluefs_transaction_t);
   ls.back()->op_init();
-  ls.back()->op_alloc_add(0, 0, 123123211);
-  ls.back()->op_alloc_rm(1, 0, 123);
   ls.back()->op_dir_create("dir");
   ls.back()->op_dir_create("dir2");
   bluefs_fnode_t fnode;
index 42bc6ebae6edcc3302573c4ac949a05df6d29848..eea4845349e81caaef470c397ba350140a45e6f9 100644 (file)
@@ -185,8 +185,8 @@ struct bluefs_transaction_t {
   typedef enum {
     OP_NONE = 0,
     OP_INIT,        ///< initial (empty) file system marker
-    OP_ALLOC_ADD,   ///< add extent to available block storage (extent)
-    OP_ALLOC_RM,    ///< remove extent from available block storage (extent)
+    OP_ALLOC_ADD,   ///< OBSOLETE: add extent to available block storage (extent)
+    OP_ALLOC_RM,    ///< OBSOLETE: remove extent from available block storage (extent)
     OP_DIR_LINK,    ///< (re)set a dir entry (dirname, filename, ino)
     OP_DIR_UNLINK,  ///< remove a dir entry (dirname, filename)
     OP_DIR_CREATE,  ///< create a dir (dirname)
@@ -214,20 +214,6 @@ struct bluefs_transaction_t {
     using ceph::encode;
     encode((__u8)OP_INIT, op_bl);
   }
-  void op_alloc_add(uint8_t id, uint64_t offset, uint64_t length) {
-    using ceph::encode;
-    encode((__u8)OP_ALLOC_ADD, op_bl);
-    encode(id, op_bl);
-    encode(offset, op_bl);
-    encode(length, op_bl);
-  }
-  void op_alloc_rm(uint8_t id, uint64_t offset, uint64_t length) {
-    using ceph::encode;
-    encode((__u8)OP_ALLOC_RM, op_bl);
-    encode(id, op_bl);
-    encode(offset, op_bl);
-    encode(length, op_bl);
-  }
   void op_dir_create(const std::string& dir) {
     using ceph::encode;
     encode((__u8)OP_DIR_CREATE, op_bl);
@@ -284,5 +270,4 @@ struct bluefs_transaction_t {
 WRITE_CLASS_ENCODER(bluefs_transaction_t)
 
 std::ostream& operator<<(std::ostream& out, const bluefs_transaction_t& t);
-
 #endif
index 326658e446a941aecbcc3438a26cd157f205b4f4..40900e38559fff403a53e6c8cd5bd9120bc761ef 100644 (file)
@@ -161,7 +161,7 @@ void add_devices(
       cout << " -> " << target_path;
     }
     cout << std::endl;
-    int r = fs->add_block_device(e.second, e.first, false);
+    int r = fs->add_block_device(e.second, e.first, false, 0); // 'reserved' is fake
     if (r < 0) {
       cerr << "unable to open " << e.first << ": " << cpp_strerror(r) << std::endl;
       exit(EXIT_FAILURE);
@@ -169,13 +169,15 @@ void add_devices(
   }
 }
 
-BlueFS *open_bluefs(
+BlueFS *open_bluefs_readonly(
   CephContext *cct,
   const string& path,
   const vector<string>& devs)
 {
   validate_path(cct, path, true);
-  BlueFS *fs = new BlueFS(cct);
+  // We provide no shared allocator which prevents bluefs to operate in R/W mode.
+  // Read-only mode isn't strictly enforced though
+  BlueFS *fs = new BlueFS(cct, nullptr);
 
   add_devices(fs, cct, devs);
 
@@ -194,7 +196,9 @@ void log_dump(
   const vector<string>& devs)
 {
   validate_path(cct, path, true);
-  BlueFS *fs = new BlueFS(cct);
+  // We provide no shared allocator which prevents bluefs to operate in R/W mode.
+  // Read-only mode isn't strictly enforced though
+  BlueFS *fs = new BlueFS(cct, nullptr);
 
   add_devices(fs, cct, devs);
   int r = fs->log_dump();
@@ -595,7 +599,7 @@ int main(int argc, char **argv)
     }
   }
   else if (action == "bluefs-export") {
-    BlueFS *fs = open_bluefs(cct.get(), path, devs);
+    BlueFS *fs = open_bluefs_readonly(cct.get(), path, devs);
 
     vector<string> dirs;
     int r = fs->readdir("", &dirs);
index 427af756cd6b8cc39c5d028afc10b54271a78567..add3f86c62c1033d6d2212c9952fa4188f0867de 100644 (file)
@@ -89,18 +89,16 @@ TEST(BlueFS, mkfs) {
   uint64_t size = 1048576 * 128;
   TempBdev bdev{size};
   uuid_d fsid;
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
 }
 
 TEST(BlueFS, mkfs_mount) {
   uint64_t size = 1048576 * 128;
   TempBdev bdev{size};
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());
@@ -110,74 +108,11 @@ TEST(BlueFS, mkfs_mount) {
   fs.umount();
 }
 
-TEST(BlueFS, mkfs_mount_duplicate_gift) {
-  uint64_t size = 1048576 * 128;
-  TempBdev bdev{ size };
-  bluefs_extent_t dup_ext;
-  {
-    BlueFS fs(g_ceph_context);
-    ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-    fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
-    uuid_d fsid;
-    ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
-    ASSERT_EQ(0, fs.mount());
-
-    {
-      BlueFS::FileWriter *h;
-      ASSERT_EQ(0, fs.mkdir("dir"));
-      ASSERT_EQ(0, fs.open_for_write("dir", "file1", &h, false));
-      h->append("foo", 3);
-      h->append("bar", 3);
-      h->append("baz", 3);
-      fs.fsync(h);
-      ceph_assert(h->file->fnode.extents.size() > 0);
-      dup_ext = h->file->fnode.extents[0];
-      ceph_assert(dup_ext.bdev == BlueFS::BDEV_DB);
-      fs.close_writer(h);
-    }
-
-    fs.umount();
-  }
-
-  {
-    BlueFS fs(g_ceph_context);
-    ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-    ASSERT_EQ(0, fs.mount());
-    // free allocation presumably allocated for file1 
-    std::cout << "duplicate extent: " << std::hex
-      << dup_ext.offset << "~" << dup_ext.length
-      << std::dec << std::endl;
-    fs.debug_inject_duplicate_gift(BlueFS::BDEV_DB, dup_ext.offset, dup_ext.length);
-    {
-      // overwrite file1 with file2 
-      BlueFS::FileWriter *h;
-      ASSERT_EQ(0, fs.open_for_write("dir", "file2", &h, false));
-      h->append("foo", 3);
-      h->append("bar", 3);
-      h->append("baz", 3);
-      fs.fsync(h);
-      fs.close_writer(h);
-    }
-    fs.umount();
-  }
-
-  g_ceph_context->_conf.set_val_or_die("bluefs_log_replay_check_allocations", "true");
-  g_ceph_context->_conf.apply_changes(nullptr);
-
-  {
-    // this should fail
-    BlueFS fs(g_ceph_context);
-    ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-    ASSERT_NE(0, fs.mount());
-  }
-}
-
 TEST(BlueFS, write_read) {
   uint64_t size = 1048576 * 128;
   TempBdev bdev{size};
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());
@@ -206,9 +141,8 @@ TEST(BlueFS, write_read) {
 TEST(BlueFS, small_appends) {
   uint64_t size = 1048576 * 128;
   TempBdev bdev{size};
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());
@@ -239,14 +173,13 @@ TEST(BlueFS, very_large_write) {
   // we'll write a ~5G file, so allocate more than that for the whole fs
   uint64_t size = 1048576 * 1024 * 8ull;
   TempBdev bdev{size};
-  BlueFS fs(g_ceph_context);
+  BlueFS fs(g_ceph_context, nullptr);
 
   bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
   g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
   uint64_t total_written = 0;
 
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());
@@ -430,9 +363,8 @@ TEST(BlueFS, test_flush_1) {
     "65536");
   g_ceph_context->_conf.apply_changes(nullptr);
 
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());
@@ -465,9 +397,8 @@ TEST(BlueFS, test_flush_2) {
     "65536");
   g_ceph_context->_conf.apply_changes(nullptr);
 
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());
@@ -493,9 +424,8 @@ TEST(BlueFS, test_flush_3) {
     "65536");
   g_ceph_context->_conf.apply_changes(nullptr);
 
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());
@@ -527,9 +457,8 @@ TEST(BlueFS, test_simple_compaction_sync) {
   uint64_t size = 1048576 * 128;
   TempBdev bdev{size};
 
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());
@@ -580,9 +509,8 @@ TEST(BlueFS, test_simple_compaction_async) {
   uint64_t size = 1048576 * 128;
   TempBdev bdev{size};
 
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());
@@ -636,9 +564,8 @@ TEST(BlueFS, test_compaction_sync) {
     "bluefs_compact_log_sync",
     "true");
 
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());
@@ -674,9 +601,8 @@ TEST(BlueFS, test_compaction_async) {
     "bluefs_compact_log_sync",
     "false");
 
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());
@@ -712,9 +638,8 @@ TEST(BlueFS, test_replay) {
     "bluefs_compact_log_sync",
     "false");
 
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());
@@ -758,9 +683,8 @@ TEST(BlueFS, test_replay_growth) {
   conf.SetVal("bluefs_sync_write", "true");
   conf.ApplyChanges();
 
-  BlueFS fs(g_ceph_context);
-  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
-  fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576);
+  BlueFS fs(g_ceph_context, nullptr);
+  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
   uuid_d fsid;
   ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
   ASSERT_EQ(0, fs.mount());