From 1c4825dbd298f81217ff2c65f5bc75ea8aa841de Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Thu, 3 Oct 2019 20:38:13 +0300 Subject: [PATCH] os/bluestore: share main device allocator with BlueFS. Signed-off-by: Igor Fedotov --- src/os/bluestore/BlueFS.cc | 66 ++++++++++++++++++++++------------- src/os/bluestore/BlueFS.h | 4 ++- src/os/bluestore/BlueStore.cc | 42 ++++++++++++++-------- 3 files changed, 71 insertions(+), 41 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 0e9d443cbcd11..2d3377020e915 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -168,12 +168,16 @@ BlueFS::BlueFS(CephContext* cct) : cct(cct), bdev(MAX_BDEV), ioc(MAX_BDEV), - block_all(MAX_BDEV) + block_all(MAX_BDEV), + alloc(MAX_BDEV), + alloc_size(MAX_BDEV, 0), + pending_release(MAX_BDEV) { discard_cb[BDEV_WAL] = wal_discard_cb; discard_cb[BDEV_DB] = db_discard_cb; discard_cb[BDEV_SLOW] = slow_discard_cb; asok_hook = SocketHook::create(this); + } BlueFS::~BlueFS() @@ -307,7 +311,8 @@ void BlueFS::_update_logger_stats() } int BlueFS::add_block_device(unsigned id, const string& path, bool trim, - bool shared_with_bluestore) + bool shared_with_bluestore, + Allocator* _shared_bdev_alloc) { dout(10) << __func__ << " bdev " << id << " path " << path << dendl; ceph_assert(id < bdev.size()); @@ -330,6 +335,10 @@ int BlueFS::add_block_device(unsigned id, const string& path, bool trim, << " size " << byte_u_t(b->get_size()) << dendl; bdev[id] = b; ioc[id] = new IOContext(cct, NULL); + if (_shared_bdev_alloc) { + ceph_assert(shared_bdev_alloc == nullptr); + alloc[id] = shared_bdev_alloc = _shared_bdev_alloc; + } return 0; } @@ -360,10 +369,9 @@ void BlueFS::_add_block_extent(unsigned id, uint64_t offset, uint64_t length, ceph_assert(bdev[id]->get_size() >= offset + length); block_all[id].insert(offset, length); - if (id < alloc.size() && alloc[id]) { + if (id < alloc.size() && alloc[id] && alloc[id] != shared_bdev_alloc) { if (!skip) log_t.op_alloc_add(id, offset, length); - alloc[id]->init_add_free(offset, length); } @@ -535,9 +543,6 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout) void BlueFS::_init_alloc() { dout(20) << __func__ << dendl; - alloc.resize(MAX_BDEV); - alloc_size.resize(MAX_BDEV, 0); - pending_release.resize(MAX_BDEV); block_unused_too_granular.resize(MAX_BDEV); if (bdev[BDEV_WAL]) { @@ -562,22 +567,28 @@ void BlueFS::_init_alloc() continue; } ceph_assert(bdev[id]->get_size()); - std::string name = "bluefs-"; - const char* devnames[] = {"wal","db","slow"}; - if (id <= BDEV_SLOW) - name += devnames[id]; - else - name += to_string(uintptr_t(this)); ceph_assert(alloc_size[id]); - dout(1) << __func__ << " id " << id - << " alloc_size 0x" << std::hex << alloc_size[id] - << " size 0x" << bdev[id]->get_size() << std::dec << dendl; - alloc[id] = Allocator::create(cct, cct->_conf->bluefs_allocator, - bdev[id]->get_size(), - alloc_size[id], name); - interval_set& p = block_all[id]; - for (interval_set::iterator q = p.begin(); q != p.end(); ++q) { - alloc[id]->init_add_free(q.get_start(), q.get_len()); + if (alloc[id]) { + dout(1) << __func__ << " shared, id " << id + << " alloc_size 0x" << std::hex << alloc_size[id] + << " size 0x" << bdev[id]->get_size() << std::dec << dendl; + } else { + std::string name = "bluefs-"; + const char* devnames[] = { "wal","db","slow" }; + if (id <= BDEV_SLOW) + name += devnames[id]; + else + name += to_string(uintptr_t(this)); + dout(1) << __func__ << " new, id " << id + << " alloc_size 0x" << std::hex << alloc_size[id] + << " size 0x" << bdev[id]->get_size() << std::dec << dendl; + alloc[id] = Allocator::create(cct, cct->_conf->bluefs_allocator, + bdev[id]->get_size(), + alloc_size[id], name); + interval_set& p = block_all[id]; + for (interval_set::iterator q = p.begin(); q != p.end(); ++q) { + alloc[id]->init_add_free(q.get_start(), q.get_len()); + } } } } @@ -591,12 +602,16 @@ void BlueFS::_stop_alloc() } for (auto p : alloc) { - if (p != nullptr) { + if (p != nullptr && p != shared_bdev_alloc) { p->shutdown(); delete p; } } - alloc.clear(); + for (size_t i = 0; i < alloc.size(); ++i) { + if (alloc[i] != shared_bdev_alloc) { + alloc[i] = nullptr; + } + } block_unused_too_granular.clear(); } @@ -1159,7 +1174,8 @@ int BlueFS::_replay(bool noop, bool to_stdout) if (!noop) { block_all[id].insert(offset, length); _adjust_granularity(id, &offset, &length, true); - if (length) { + if (length && + alloc[id] != shared_bdev_alloc) { alloc[id]->init_add_free(offset, length); } diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index fe12df9fba3f8..0b5aeaa0cd033 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -309,6 +309,7 @@ private: BlockDevice::aio_callback_t discard_cb[3]; //discard callbacks for each dev std::unique_ptr vselector; + Allocator* shared_bdev_alloc = nullptr; class SocketHook; SocketHook* asok_hook = nullptr; @@ -514,7 +515,8 @@ public: } int add_block_device(unsigned bdev, const std::string& path, bool trim, - bool shared_with_bluestore=false); + bool shared_with_bluestore = false, + Allocator* shared_bdev_alloc = nullptr); bool bdev_support_label(unsigned id); uint64_t get_block_device_size(unsigned bdev); diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index b014f68b2dc21..ae63ada0ce544 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5357,13 +5357,22 @@ int BlueStore::_minimal_open_bluefs(bool create) bfn = path + "/block"; // never trim here r = bluefs->add_block_device(bluefs_layout.shared_bdev, bfn, false, - true /* shared with bluestore */); + true, + alloc); if (r < 0) { derr << __func__ << " add block device(" << bfn << ") returned: " << cpp_strerror(r) << dendl; goto free_bluefs; } - + if (create) { + auto reserved = _get_ondisk_reserved(); + + bluefs->add_block_extent( + bluefs_layout.shared_bdev, + reserved, + p2align(bdev->get_size(), min_alloc_size) - reserved); + } + bfn = path + "/block.wal"; if (::stat(bfn.c_str(), &st) == 0) { r = bluefs->add_block_device(BlueFS::BDEV_WAL, bfn, @@ -6067,7 +6076,7 @@ int BlueStore::mkfs() dout(1) << __func__ << " path " << path << dendl; int r; uuid_d old_fsid; - + uint64_t reserved; if (cct->_conf->osd_max_object_size > OBJECT_MAX_SIZE) { derr << __func__ << " osd_max_object_size " << cct->_conf->osd_max_object_size << " > bluestore max " @@ -6189,6 +6198,17 @@ int BlueStore::mkfs() goto out_close_bdev; } + alloc = Allocator::create(cct, cct->_conf->bluestore_allocator, + bdev->get_size(), + min_alloc_size, "block"); + if (!alloc) { + r = -EINVAL; + goto out_close_bdev; + } + reserved = _get_ondisk_reserved(); + alloc->init_add_free(reserved, + p2align(bdev->get_size(), min_alloc_size) - reserved); + r = _open_db(true); if (r < 0) goto out_close_bdev; @@ -6241,6 +6261,8 @@ int BlueStore::mkfs() out_close_db: _close_db(false); out_close_bdev: + delete alloc; + alloc = nullptr; _close_bdev(); out_close_fsid: _close_fsid(); @@ -8878,24 +8900,14 @@ void BlueStore::_get_statfs_overall(struct store_statfs_t *buf) uint64_t bfree = alloc->get_free(); if (bluefs) { - int64_t bluefs_total = bluefs->get_total(bluefs_layout.shared_bdev); - int64_t bluefs_free = bluefs->get_free(bluefs_layout.shared_bdev); - // part of our shared device is "free" according to BlueFS, but we - // can't touch bluestore_bluefs_min of it. - int64_t shared_available = std::min( - bluefs_free, - int64_t(bluefs_total - cct->_conf->bluestore_bluefs_min)); - buf->internally_reserved = bluefs_total - shared_available; - if (shared_available > 0) { - bfree += shared_available; - } + buf->internally_reserved = 0; // include dedicated db, too, if that isn't the shared device. if (bluefs_layout.shared_bdev != BlueFS::BDEV_DB) { buf->total += bluefs->get_total(BlueFS::BDEV_DB); } // call any non-omap bluefs space "internal metadata" buf->internal_metadata = - std::max(bluefs->get_used(), (uint64_t)cct->_conf->bluestore_bluefs_min) + bluefs->get_used() - buf->omap_allocated; } -- 2.39.5