From 165a56b2dee03ec2c0f5df0004ddf2b1175671ab Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Mon, 11 Mar 2019 19:13:19 +0300 Subject: [PATCH] os/bluestore be more tolerant to lack of space for bluefs. 'gift' space is just advisory for allocation, part of it actually requested from BlueFS is mandatory only. Hence do not fail when unable to allocate the whole space. Fixes: https://tracker.ceph.com/issues/38760 Signed-off-by: Igor Fedotov (cherry picked from commit dbc1a78787baacd7bbc98ff8bbb72e609def2ad6) --- src/os/bluestore/BlueFS.cc | 3 +- src/os/bluestore/BlueFS.h | 5 ++- src/os/bluestore/BlueStore.cc | 49 +++++++++++++++++++----------- src/os/bluestore/BlueStore.h | 12 ++++++-- src/test/objectstore/store_test.cc | 6 ++-- 5 files changed, 50 insertions(+), 25 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index dbc7d35893013..d9c102d159ac5 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -2366,6 +2366,7 @@ int BlueFS::_expand_slow_device(uint64_t need, PExtentVector& extents) auto min_alloc_size = cct->_conf->bluefs_alloc_size; int id = _get_slow_device_id(); ceph_assert(id <= (int)alloc.size() && alloc[id]); + auto min_need = round_up_to(need, min_alloc_size); need = std::max(need, slow_dev_expander->get_recommended_expansion_delta( alloc[id]->get_free(), block_all[id].size())); @@ -2374,7 +2375,7 @@ int BlueFS::_expand_slow_device(uint64_t need, PExtentVector& extents) dout(10) << __func__ << " expanding slow device by 0x" << std::hex << need << std::dec << dendl; - r = slow_dev_expander->allocate_freespace(need, extents); + r = slow_dev_expander->allocate_freespace(min_need, need, extents); } return r; } diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index c0438232fc541..2eae38fc15fe5 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -48,7 +48,10 @@ protected: public: virtual uint64_t get_recommended_expansion_delta(uint64_t bluefs_free, uint64_t bluefs_total) = 0; - virtual int allocate_freespace(uint64_t size, PExtentVector& extents) = 0; + virtual int allocate_freespace( + uint64_t min_size, + uint64_t size, + PExtentVector& extents) = 0; }; class BlueFS { diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index da7d05f5c192c..8358ecbaa9dab 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5554,10 +5554,15 @@ void BlueStore::_dump_alloc_on_failure() } -int BlueStore::allocate_bluefs_freespace(uint64_t size, PExtentVector* extents_out) +int BlueStore::allocate_bluefs_freespace( + uint64_t min_size, + uint64_t size, + PExtentVector* extents_out) { + ceph_assert(min_size <= size); if (size) { // round up to alloc size + min_size = p2roundup(min_size, cct->_conf->bluefs_alloc_size); size = p2roundup(size, cct->_conf->bluefs_alloc_size); PExtentVector extents_local; @@ -5565,32 +5570,39 @@ int BlueStore::allocate_bluefs_freespace(uint64_t size, PExtentVector* extents_o uint64_t gift; + uint64_t allocated = 0; + int64_t alloc_len; do { // hard cap to fit into 32 bits gift = std::min(size, 1ull << 31); dout(10) << __func__ << " gifting " << gift << " (" << byte_u_t(gift) << ")" << dendl; - int64_t alloc_len = alloc->allocate(gift, cct->_conf->bluefs_alloc_size, - 0, 0, extents); + alloc_len = alloc->allocate(gift, cct->_conf->bluefs_alloc_size, + 0, 0, extents); + if (alloc_len) { + allocated += alloc_len; + size -= alloc_len; + } - if (alloc_len < (int64_t)gift) { + if (alloc_len < (int64_t)gift && (min_size > allocated)) { derr << __func__ - << " failed to allocate on 0x" << std::hex << gift - << " bluefs_alloc_size 0x" << cct->_conf->bluefs_alloc_size - << " allocated 0x" << alloc_len - << " available 0x " << alloc->get_free() - << std::dec << dendl; - - alloc->dump(); - alloc->release(*extents); - extents->clear(); + << " failed to allocate on 0x" << std::hex << gift + << " min_size 0x" << min_size + << " > allocated total 0x" << allocated + << " bluefs_alloc_size 0x" << cct->_conf->bluefs_alloc_size + << " allocated 0x" << alloc_len + << " available 0x " << alloc->get_free() + << std::dec << dendl; + + alloc->dump(); + alloc->release(*extents); + extents->clear(); return -ENOSPC; } - size -= gift; - } while (size); + } while (size && alloc_len > 0); for (auto& e : *extents) { - dout(1) << __func__ << " gifting " << e << " to bluefs" << dendl; + dout(5) << __func__ << " gifting " << e << " to bluefs" << dendl; bluefs_extents.insert(e.offset, e.length); ++out_of_sync_fm; // apply to bluefs if not requested from outside @@ -6240,7 +6252,10 @@ int BlueStore::migrate_to_existing_bluefs_device(const set& devs_source, dout(1) << __func__ << " Allocating more space at slow device for BlueFS: +" << used_space - target_free << " bytes" << dendl; - r = allocate_bluefs_freespace(used_space - target_free, nullptr); + r = allocate_bluefs_freespace( + used_space - target_free, + used_space - target_free, + nullptr); umount(); if (r != 0) { diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index f734a4550fd86..f93fcabac0f7d 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2661,7 +2661,10 @@ public: Either automatically applies allocated extents to underlying BlueFS (extents == nullptr) or just return them (non-null extents) provided */ - int allocate_bluefs_freespace(uint64_t size, PExtentVector* extents); + int allocate_bluefs_freespace( + uint64_t min_size, + uint64_t size, + PExtentVector* extents); void log_latency_fn(int idx, const ceph::timespan& lat, @@ -2991,8 +2994,11 @@ private: auto delta = _get_bluefs_size_delta(bluefs_free, bluefs_total); return delta > 0 ? delta : 0; } - int allocate_freespace(uint64_t size, PExtentVector& extents) override { - return allocate_bluefs_freespace(size, &extents); + int allocate_freespace( + uint64_t min_size, + uint64_t size, + PExtentVector& extents) override { + return allocate_bluefs_freespace(min_size, size, &extents); }; }; diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index 60450ed794119..7e1d442f4ebb6 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -7636,11 +7636,11 @@ TEST_P(StoreTest, allocateBlueFSTest) { uint64_t to_alloc = g_conf().get_val("bluefs_alloc_size"); - int r = bstore->allocate_bluefs_freespace(to_alloc, nullptr); + int r = bstore->allocate_bluefs_freespace(to_alloc, to_alloc, nullptr); ASSERT_EQ(r, 0); - r = bstore->allocate_bluefs_freespace(statfs.total, nullptr); + r = bstore->allocate_bluefs_freespace(statfs.total, statfs.total, nullptr); ASSERT_EQ(r, -ENOSPC); - r = bstore->allocate_bluefs_freespace(to_alloc * 16, nullptr); + r = bstore->allocate_bluefs_freespace(to_alloc * 16, to_alloc * 16, nullptr); ASSERT_EQ(r, 0); store->umount(); ASSERT_EQ(store->fsck(false), 0); // do fsck explicitly -- 2.39.5