From: Sage Weil Date: Mon, 19 Aug 2019 13:46:09 +0000 (-0500) Subject: os/bluestore/BlueFS: apply shared_alloc_size to shared device X-Git-Tag: v12.2.13~146^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=90b5d4952d8f8fa77f5ac99c423a715d23675c67;p=ceph.git os/bluestore/BlueFS: apply shared_alloc_size to shared device Keep an alloc_size vector so that we have this value handy at all times. Allow bluestore to fetch this value directly instead of looking at the bluefs_* config options since this encapsulates things a bit better, and also isn't vulnerable to the config setting changing at runtime. Signed-off-by: Sage Weil (cherry picked from commit e8b5a458c306dfe166b09239fc9f57c04fa47ead) # Conflicts: # src/os/bluestore/BlueFS.cc - missing assert(got != 0) in luminous - ROUND_UP_TO vs round_up_to - missing fallback allocations - no BDEV_NEW* # src/os/bluestore/BlueFS.h - adjacent declarations # src/os/bluestore/BlueStore.cc - P2ROUNDUP vs p2roundup --- diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index c357f44c144e..cc58f6de609b 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -269,8 +269,8 @@ int BlueFS::reclaim_blocks(unsigned id, uint64_t want, assert(id < alloc.size()); assert(alloc[id]); - int64_t got = alloc[id]->allocate(want, cct->_conf->bluefs_alloc_size, 0, - extents); + int64_t got = alloc[id]->allocate(want, alloc_size[id], 0, extents); + ceph_assert(got != 0); if (got < 0) { derr << __func__ << " failed to allocate space to return to bluestore" << dendl; @@ -460,7 +460,19 @@ void BlueFS::_init_alloc() { dout(20) << __func__ << dendl; alloc.resize(MAX_BDEV); + alloc_size.resize(MAX_BDEV, 0); pending_release.resize(MAX_BDEV); + + if (bdev[BDEV_WAL]) { + alloc_size[BDEV_WAL] = cct->_conf->bluefs_alloc_size; + } + if (bdev[BDEV_SLOW]) { + alloc_size[BDEV_DB] = cct->_conf->bluefs_alloc_size; + alloc_size[BDEV_SLOW] = cct->_conf->bluefs_shared_alloc_size; + } else { + alloc_size[BDEV_DB] = cct->_conf->bluefs_shared_alloc_size; + } + for (unsigned id = 0; id < bdev.size(); ++id) { if (!bdev[id]) { continue; @@ -473,9 +485,13 @@ void BlueFS::_init_alloc() name += devnames[id]; else name += to_string(uintptr_t(this)); + ceph_assert(alloc_size[id]); + dout(20) << __func__ << " id " << id + << " alloc_size 0x" << std::hex << alloc_size[id] + << " size 0x" << bdev[id]->get_size() << std::dec << dendl; alloc[id] = Allocator::create(cct, cct->_conf->bluefs_allocator, bdev[id]->get_size(), - cct->_conf->bluefs_alloc_size, name); + alloc_size[id], name); interval_set& p = block_all[id]; for (interval_set::iterator q = p.begin(); q != p.end(); ++q) { alloc[id]->init_add_free(q.get_start(), q.get_len()); @@ -1339,9 +1355,13 @@ void BlueFS::_compact_log_async(std::unique_lock& l) log_t.clear(); _compact_log_dump_metadata(&t); + uint64_t max_alloc_size = std::max(alloc_size[BDEV_WAL], + std::max(alloc_size[BDEV_DB], + alloc_size[BDEV_SLOW])); + // conservative estimate for final encoded size new_log_jump_to = ROUND_UP_TO(t.op_bl.length() + super.block_size * 2, - cct->_conf->bluefs_alloc_size); + max_alloc_size); t.op_jump(log_seq, new_log_jump_to); bufferlist bl; @@ -1946,28 +1966,27 @@ int BlueFS::_allocate(uint8_t id, uint64_t len, { dout(10) << __func__ << " len 0x" << std::hex << len << std::dec << " from " << (int)id << dendl; - assert(id < alloc.size()); - uint64_t min_alloc_size = cct->_conf->bluefs_alloc_size; - - uint64_t left = ROUND_UP_TO(len, min_alloc_size); + ceph_assert(id < alloc.size()); int64_t alloc_len = 0; PExtentVector extents; - if (alloc[id]) { uint64_t hint = 0; if (!node->extents.empty() && node->extents.back().bdev == id) { hint = node->extents.back().end(); } extents.reserve(4); // 4 should be (more than) enough for most allocations - alloc_len = alloc[id]->allocate(left, min_alloc_size, hint, &extents); + alloc_len = alloc[id]->allocate(ROUND_UP_TO(len, alloc_size[id]), + alloc_size[id], hint, &extents); } - if (alloc_len < 0 || alloc_len < (int64_t)left) { + if (!alloc[id] || + alloc_len < 0 || + alloc_len < (int64_t)ROUND_UP_TO(len, alloc_size[id])) { if (alloc_len > 0) { alloc[id]->release(extents); } if (id != BDEV_SLOW) { if (bdev[id]) { - dout(1) << __func__ << " failed to allocate 0x" << std::hex << left + dout(1) << __func__ << " failed to allocate 0x" << std::hex << len << " on bdev " << (int)id << ", free 0x" << alloc[id]->get_free() << "; fallback to bdev " << (int)id + 1 diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index e708aa5c990a..7f56e105b5f1 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -266,6 +266,7 @@ private: vector > block_all; ///< extents in bdev we own vector block_total; ///< sum of block_all vector alloc; ///< allocators for bdevs + vector alloc_size; ///< alloc size for each device vector> pending_release; ///< extents to release BlueFSDeviceExpander* slow_dev_expander = nullptr; @@ -354,6 +355,9 @@ public: void umount(); void collect_metadata(map *pm); + uint64_t get_alloc_size(int id) { + return alloc_size[id]; + } int fsck(); uint64_t get_used(); diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index b06c5d629b3d..7cd2ca7fbbde 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5380,7 +5380,8 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents) // reclaim from bluefs? if (reclaim) { // round up to alloc size - reclaim = P2ROUNDUP(reclaim, cct->_conf->bluefs_alloc_size); + uint64_t alloc_size = bluefs->get_alloc_size(bluefs_shared_bdev); + reclaim = P2ROUNDUP(reclaim, alloc_size); // hard cap to fit into 32 bits reclaim = MIN(reclaim, 1ull<<31);