From: Sage Weil Date: Wed, 7 Aug 2019 17:41:33 +0000 (-0500) Subject: os/bluestore/BlueFS: apply shared_alloc_size to shared device X-Git-Tag: v14.2.5~12^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=17e6bbe0e79cb32ef786f8a0b2464f15e7fbc056;p=ceph.git os/bluestore/BlueFS: apply shared_alloc_size to shared device Keep an alloc_size vector so that we have this value handy at all times. Allow bluestore to fetch this value directly instead of looking at the bluefs_* config options since this encapsulates things a bit better, and also isn't vulnerable to the config setting changing at runtime. Signed-off-by: Sage Weil (cherry picked from commit e8b5a458c306dfe166b09239fc9f57c04fa47ead) --- diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index b70862badc23..b30e012d45e6 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -332,8 +332,7 @@ int BlueFS::reclaim_blocks(unsigned id, uint64_t want, ceph_assert(id < alloc.size()); ceph_assert(alloc[id]); - int64_t got = alloc[id]->allocate(want, cct->_conf->bluefs_alloc_size, 0, - extents); + int64_t got = alloc[id]->allocate(want, alloc_size[id], 0, extents); ceph_assert(got != 0); if (got < 0) { derr << __func__ << " failed to allocate space to return to bluestore" @@ -510,7 +509,26 @@ void BlueFS::_init_alloc() { dout(20) << __func__ << dendl; alloc.resize(MAX_BDEV); + alloc_size.resize(MAX_BDEV, 0); pending_release.resize(MAX_BDEV); + + if (bdev[BDEV_WAL]) { + alloc_size[BDEV_WAL] = cct->_conf->bluefs_alloc_size; + } + if (bdev[BDEV_SLOW]) { + alloc_size[BDEV_DB] = cct->_conf->bluefs_alloc_size; + alloc_size[BDEV_SLOW] = cct->_conf->bluefs_shared_alloc_size; + } else { + alloc_size[BDEV_DB] = cct->_conf->bluefs_shared_alloc_size; + } + // new wal and db devices are never shared + if (bdev[BDEV_NEWWAL]) { + alloc_size[BDEV_NEWWAL] = cct->_conf->bluefs_alloc_size; + } + if (bdev[BDEV_NEWDB]) { + alloc_size[BDEV_NEWDB] = cct->_conf->bluefs_alloc_size; + } + for (unsigned id = 0; id < bdev.size(); ++id) { if (!bdev[id]) { continue; @@ -522,9 +540,13 @@ void BlueFS::_init_alloc() name += devnames[id]; else name += to_string(uintptr_t(this)); + ceph_assert(alloc_size[id]); + dout(20) << __func__ << " id " << id + << " alloc_size 0x" << std::hex << alloc_size[id] + << " size 0x" << bdev[id]->get_size() << std::dec << dendl; alloc[id] = Allocator::create(cct, cct->_conf->bluefs_allocator, bdev[id]->get_size(), - cct->_conf->bluefs_alloc_size, name); + alloc_size[id], name); interval_set& p = block_all[id]; for (interval_set::iterator q = p.begin(); q != p.end(); ++q) { alloc[id]->init_add_free(q.get_start(), q.get_len()); @@ -1935,9 +1957,13 @@ void BlueFS::_compact_log_async(std::unique_lock& l) log_t.clear(); _compact_log_dump_metadata(&t, 0); + uint64_t max_alloc_size = std::max(alloc_size[BDEV_WAL], + std::max(alloc_size[BDEV_DB], + alloc_size[BDEV_SLOW])); + // conservative estimate for final encoded size new_log_jump_to = round_up_to(t.op_bl.length() + super.block_size * 2, - cct->_conf->bluefs_alloc_size); + max_alloc_size); t.op_jump(log_seq, new_log_jump_to); // allocate @@ -2569,8 +2595,8 @@ int BlueFS::_expand_slow_device(uint64_t need, PExtentVector& extents) { int r = -ENOSPC; if (slow_dev_expander) { - auto min_alloc_size = cct->_conf->bluefs_alloc_size; int id = _get_slow_device_id(); + auto min_alloc_size = alloc_size[id]; ceph_assert(id <= (int)alloc.size() && alloc[id]); auto min_need = round_up_to(need, min_alloc_size); need = std::max(need, @@ -2592,14 +2618,12 @@ int BlueFS::_allocate_without_fallback(uint8_t id, uint64_t len, dout(10) << __func__ << " len 0x" << std::hex << len << std::dec << " from " << (int)id << dendl; assert(id < alloc.size()); - uint64_t min_alloc_size = cct->_conf->bluefs_alloc_size; - - uint64_t left = round_up_to(len, min_alloc_size); - if (!alloc[id]) { return -ENOENT; } extents->reserve(4); // 4 should be (more than) enough for most allocations + uint64_t min_alloc_size = alloc_size[id]; + uint64_t left = round_up_to(len, min_alloc_size); int64_t alloc_len = alloc[id]->allocate(left, min_alloc_size, 0, extents); if (alloc_len < 0 || alloc_len < (int64_t)left) { if (alloc_len > 0) { @@ -2626,27 +2650,26 @@ int BlueFS::_allocate(uint8_t id, uint64_t len, dout(10) << __func__ << " len 0x" << std::hex << len << std::dec << " from " << (int)id << dendl; ceph_assert(id < alloc.size()); - uint64_t min_alloc_size = cct->_conf->bluefs_alloc_size; - - uint64_t left = round_up_to(len, min_alloc_size); int64_t alloc_len = 0; PExtentVector extents; - uint64_t hint = 0; if (alloc[id]) { if (!node->extents.empty() && node->extents.back().bdev == id) { hint = node->extents.back().end(); } extents.reserve(4); // 4 should be (more than) enough for most allocations - alloc_len = alloc[id]->allocate(left, min_alloc_size, hint, &extents); + alloc_len = alloc[id]->allocate(round_up_to(len, alloc_size[id]), + alloc_size[id], hint, &extents); } - if (alloc_len < 0 || alloc_len < (int64_t)left) { + if (!alloc[id] || + alloc_len < 0 || + alloc_len < (int64_t)round_up_to(len, alloc_size[id])) { if (alloc_len > 0) { alloc[id]->release(extents); } if (id != BDEV_SLOW) { if (bdev[id]) { - dout(1) << __func__ << " failed to allocate 0x" << std::hex << left + dout(1) << __func__ << " failed to allocate 0x" << std::hex << len << " on bdev " << (int)id << ", free 0x" << alloc[id]->get_free() << "; fallback to bdev " << (int)id + 1 @@ -2654,13 +2677,13 @@ int BlueFS::_allocate(uint8_t id, uint64_t len, } return _allocate(id + 1, len, node); } - dout(1) << __func__ << " unable to allocate 0x" << std::hex << left + dout(1) << __func__ << " unable to allocate 0x" << std::hex << len << " on bdev " << (int)id << ", free 0x" << (alloc[id] ? alloc[id]->get_free() : (uint64_t)-1) << "; fallback to slow device expander " << std::dec << dendl; extents.clear(); - if (_expand_slow_device(left, extents) == 0) { + if (_expand_slow_device(len, extents) == 0) { id = _get_slow_device_id(); for (auto& e : extents) { _add_block_extent(id, e.offset, e.length); @@ -2669,19 +2692,20 @@ int BlueFS::_allocate(uint8_t id, uint64_t len, auto* last_alloc = alloc[id]; ceph_assert(last_alloc); // try again - alloc_len = last_alloc->allocate(left, min_alloc_size, hint, &extents); - if (alloc_len < 0 || alloc_len < (int64_t)left) { + alloc_len = last_alloc->allocate(round_up_to(len, alloc_size[id]), + alloc_size[id], hint, &extents); + if (alloc_len < 0 || alloc_len < (int64_t)len) { if (alloc_len > 0) { last_alloc->release(extents); } - derr << __func__ << " failed to allocate 0x" << std::hex << left + derr << __func__ << " failed to allocate 0x" << std::hex << len << " on bdev " << (int)id << ", free 0x" << last_alloc->get_free() << std::dec << dendl; return -ENOSPC; } } else { derr << __func__ << " failed to expand slow device to fit +0x" - << std::hex << left << std::dec + << std::hex << len << std::dec << dendl; return -ENOSPC; } diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index a75d2559e531..fb89c213d2ad 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -306,6 +306,7 @@ private: vector ioc; ///< IOContexts for bdevs vector > block_all; ///< extents in bdev we own vector alloc; ///< allocators for bdevs + vector alloc_size; ///< alloc size for each device vector> pending_release; ///< extents to release BlockDevice::aio_callback_t discard_cb[3]; //discard callbacks for each dev @@ -423,6 +424,9 @@ public: void collect_metadata(map *pm, unsigned skip_bdev_id); void get_devices(set *ls); + uint64_t get_alloc_size(int id) { + return alloc_size[id]; + } int fsck(); int device_migrate_to_new( diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 477efefa9ec2..30fea6012414 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5002,20 +5002,20 @@ int BlueStore::_minimal_open_bluefs(bool create) bdev->get_size() * (cct->_conf->bluestore_bluefs_min_ratio + cct->_conf->bluestore_bluefs_gift_ratio); initial = std::max(initial, cct->_conf->bluestore_bluefs_min); - if (cct->_conf->bluefs_alloc_size % min_alloc_size) { - derr << __func__ << " bluefs_alloc_size 0x" << std::hex - << cct->_conf->bluefs_alloc_size << " is not a multiple of " + uint64_t alloc_size = cct->_conf->bluefs_shared_alloc_size; + if (alloc_size % min_alloc_size) { + derr << __func__ << " bluefs_shared_alloc_size 0x" << std::hex + << alloc_size << " is not a multiple of " << "min_alloc_size 0x" << min_alloc_size << std::dec << dendl; r = -EINVAL; goto free_bluefs; } // align to bluefs's alloc_size - initial = p2roundup(initial, cct->_conf->bluefs_alloc_size); + initial = p2roundup(initial, alloc_size); // put bluefs in the middle of the device in case it is an HDD - uint64_t start = p2align((bdev->get_size() - initial) / 2, - cct->_conf->bluefs_alloc_size); + uint64_t start = p2align((bdev->get_size() - initial) / 2, alloc_size); //avoiding superblock overwrite - start = std::max(cct->_conf->bluefs_alloc_size, start); + start = std::max(alloc_size, start); ceph_assert(start >=_get_ondisk_reserved()); bluefs->add_block_extent(bluefs_shared_bdev, start, initial); @@ -5461,8 +5461,9 @@ int BlueStore::allocate_bluefs_freespace( ceph_assert(min_size <= size); if (size) { // round up to alloc size - min_size = p2roundup(min_size, cct->_conf->bluefs_alloc_size); - size = p2roundup(size, cct->_conf->bluefs_alloc_size); + uint64_t alloc_size = bluefs->get_alloc_size(bluefs_shared_bdev); + min_size = p2roundup(min_size, alloc_size); + size = p2roundup(size, alloc_size); PExtentVector extents_local; PExtentVector* extents = extents_out ? extents_out : &extents_local; @@ -5477,8 +5478,7 @@ int BlueStore::allocate_bluefs_freespace( dout(10) << __func__ << " gifting " << gift << " (" << byte_u_t(gift) << ")" << dendl; - alloc_len = alloc->allocate(gift, cct->_conf->bluefs_alloc_size, - 0, 0, extents); + alloc_len = alloc->allocate(gift, alloc_size, 0, 0, extents); if (alloc_len > 0) { allocated += alloc_len; size -= alloc_len; @@ -5490,7 +5490,7 @@ int BlueStore::allocate_bluefs_freespace( << " failed to allocate on 0x" << std::hex << gift << " min_size 0x" << min_size << " > allocated total 0x" << allocated - << " bluefs_alloc_size 0x" << cct->_conf->bluefs_alloc_size + << " bluefs_shared_alloc_size 0x" << alloc_size << " allocated 0x" << (alloc_len < 0 ? 0 : alloc_len) << " available 0x " << alloc->get_free() << std::dec << dendl; @@ -5631,7 +5631,8 @@ int BlueStore::_balance_bluefs_freespace() // reclaim from bluefs? if (delta < 0) { // round up to alloc size - auto reclaim = p2roundup(uint64_t(-delta), cct->_conf->bluefs_alloc_size); + uint64_t alloc_size = bluefs->get_alloc_size(bluefs_shared_bdev); + auto reclaim = p2roundup(uint64_t(-delta), alloc_size); // hard cap to fit into 32 bits reclaim = std::min(reclaim, 1ull << 31);