#endif
#include "common/debug.h"
#include "common/admin_socket.h"
+
#define dout_subsys ceph_subsys_bluestore
using TOPNSPC::common::cmd_getval;
} else if (type == "btree") {
return new BtreeAllocator(cct, size, block_size, name);
} else if (type == "hybrid") {
- return new HybridAllocator(cct, size, block_size,
+ return new HybridAvlAllocator(cct, size, block_size,
cct->_conf.get_val<uint64_t>("bluestore_hybrid_alloc_mem_cap"),
name);
#ifdef HAVE_LIBZBD
ceph_assert(size != 0);
ceph_assert(size <= num_free);
+ //FIXME minor: techically this is wrong since find should return end()
+ // if exact matching offset isn't found. Which might be the case when we're
+ // trying to remove a subchunk from the middle of existing chunk.
+ // But it looks like avl containers tolerate this thing and return the chunk
+ // before the 'start' offset.
auto rs = range_tree.find(range_t{start, end}, range_tree.key_comp());
/* Make sure we completely overlap with someone */
if (rs == range_tree.end() ||
uint64_t max_mem,
std::string_view name) :
Allocator(name, device_size, block_size),
+ cct(cct),
range_size_alloc_threshold(
cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_bf_threshold")),
range_size_alloc_free_pct(
cct->_conf.get_val<uint64_t>("bluestore_avl_alloc_ff_max_search_count")),
max_search_bytes(
cct->_conf.get_val<Option::size_t>("bluestore_avl_alloc_ff_max_search_bytes")),
- range_count_cap(max_mem / sizeof(range_seg_t)),
- cct(cct)
+ range_count_cap(max_mem / sizeof(range_seg_t))
{
ldout(cct, 10) << __func__ << " 0x" << std::hex << get_capacity() << "/"
<< get_block_size() << std::dec << dendl;
// i.e. (range_count_cap > 0)
ceph_assert(false);
}
+ // to be overriden by Hybrid wrapper
+ virtual uint64_t _get_spilled_over() const {
+ return 0;
+ }
+ virtual uint64_t _spillover_allocate(uint64_t want,
+ uint64_t unit,
+ uint64_t max_alloc_size,
+ int64_t hint,
+ PExtentVector* extents) {
+ // this should be overriden when range count cap is present,
+ // i.e. (range_count_cap > 0)
+ ceph_assert(false);
+ return 0;
+ }
+
protected:
// called when extent to be released/marked free
virtual void _add_to_tree(uint64_t start, uint64_t size);
#include "HybridAllocator.h"
-#include <bit>
-#include <limits>
-
-#include "common/config_proxy.h"
-#include "common/debug.h"
-
-#define dout_context cct
+#define dout_context (T::get_context())
#define dout_subsys ceph_subsys_bluestore
#undef dout_prefix
-#define dout_prefix *_dout << "HybridAllocator "
-
-
-int64_t HybridAllocator::allocate(
- uint64_t want,
- uint64_t unit,
- uint64_t max_alloc_size,
- int64_t hint,
- PExtentVector* extents)
-{
- ldout(cct, 10) << __func__ << std::hex
- << " 0x" << want
- << "/" << unit
- << "," << max_alloc_size
- << "," << hint
- << std::dec << dendl;
- ceph_assert(std::has_single_bit(unit));
- ceph_assert(want % unit == 0);
-
- if (max_alloc_size == 0) {
- max_alloc_size = want;
- }
- if (constexpr auto cap = std::numeric_limits<decltype(bluestore_pextent_t::length)>::max();
- max_alloc_size >= cap) {
- max_alloc_size = p2align(uint64_t(cap), (uint64_t)get_block_size());
- }
-
- int64_t res;
-
- typedef
- std::function<int64_t(uint64_t, uint64_t, uint64_t, int64_t, PExtentVector*)>
- alloc_fn;
- alloc_fn priA = [&](uint64_t _want,
- uint64_t _unit,
- uint64_t _max_alloc_size,
- int64_t _hint,
- PExtentVector* _extents) {
- return _allocate(_want, _unit, _max_alloc_size, _hint, _extents);
- };
- alloc_fn secA = [&](uint64_t _want,
- uint64_t _unit,
- uint64_t _max_alloc_size,
- int64_t _hint,
- PExtentVector* _extents) {
- return bmap_alloc ?
- bmap_alloc->allocate(_want, _unit, _max_alloc_size, _hint, _extents) :
- 0;
- };
-
- std::lock_guard l(lock);
- // try bitmap first to avoid unneeded contiguous extents split if
- // desired amount is less than shortes range in AVL
- if (bmap_alloc && bmap_alloc->get_free() &&
- want < _lowest_size_available()) {
- std::swap(priA, secA);
- }
-
- {
- auto orig_size = extents->size();
- res = priA(want, unit, max_alloc_size, hint, extents);
- if (res < 0) {
- // allocator shouldn't return new extents on error
- ceph_assert(orig_size == extents->size());
- res = 0;
- }
- }
- if ((uint64_t)res < want) {
- auto orig_size = extents->size();
- auto res2 = secA(want - res, unit, max_alloc_size, hint, extents);
- if (res2 > 0) {
- res += res2;
- } else {
- ceph_assert(orig_size == extents->size());
- }
- }
- return res ? res : -ENOSPC;
-}
-
-void HybridAllocator::release(const interval_set<uint64_t>& release_set) {
- std::lock_guard l(lock);
- // this will attempt to put free ranges into AvlAllocator first and
- // fallback to bitmap one via _try_insert_range call
- _release(release_set);
-}
-
-uint64_t HybridAllocator::get_free()
-{
- std::lock_guard l(lock);
- return (bmap_alloc ? bmap_alloc->get_free() : 0) + _get_free();
-}
-
-double HybridAllocator::get_fragmentation()
-{
- std::lock_guard l(lock);
- auto f = AvlAllocator::_get_fragmentation();
- auto bmap_free = bmap_alloc ? bmap_alloc->get_free() : 0;
- if (bmap_free) {
- auto _free = _get_free() + bmap_free;
- auto bf = bmap_alloc->get_fragmentation();
-
- f = f * _get_free() / _free + bf * bmap_free / _free;
- }
- return f;
-}
-
-void HybridAllocator::dump()
-{
- std::lock_guard l(lock);
- AvlAllocator::_dump();
- if (bmap_alloc) {
- bmap_alloc->dump();
- }
- ldout(cct, 0) << __func__
- << " avl_free: " << _get_free()
- << " bmap_free: " << (bmap_alloc ? bmap_alloc->get_free() : 0)
- << dendl;
-}
-
-void HybridAllocator::foreach(
- std::function<void(uint64_t offset, uint64_t length)> notify)
-{
- std::lock_guard l(lock);
- AvlAllocator::_foreach(notify);
- if (bmap_alloc) {
- bmap_alloc->foreach(notify);
- }
-}
+#define dout_prefix *_dout << (std::string(this->get_type()) + "::").c_str()
-void HybridAllocator::init_rm_free(uint64_t offset, uint64_t length)
+/*
+ * class HybridAvlAllocator
+ *
+ *
+ */
+const char* HybridAvlAllocator::get_type() const
{
- if (!length)
- return;
- std::lock_guard l(lock);
- ldout(cct, 10) << __func__ << std::hex
- << " offset 0x" << offset
- << " length 0x" << length
- << std::dec << dendl;
- _try_remove_from_tree(offset, length,
- [&](uint64_t o, uint64_t l, bool found) {
- if (!found) {
- if (bmap_alloc) {
- bmap_alloc->init_rm_free(o, l);
- } else {
- lderr(cct) << "init_rm_free lambda " << std::hex
- << "Uexpected extent: "
- << " 0x" << o << "~" << l
- << std::dec << dendl;
- ceph_assert(false);
- }
- }
- });
+ return "hybrid";
}
-void HybridAllocator::shutdown()
-{
- std::lock_guard l(lock);
- _shutdown();
- if (bmap_alloc) {
- bmap_alloc->shutdown();
- delete bmap_alloc;
- bmap_alloc = nullptr;
- }
-}
-
-void HybridAllocator::_spillover_range(uint64_t start, uint64_t end)
-{
- auto size = end - start;
- dout(20) << __func__
- << std::hex << " "
- << start << "~" << size
- << std::dec
- << dendl;
- ceph_assert(size);
- if (!bmap_alloc) {
- dout(1) << __func__
- << " constructing fallback allocator"
- << dendl;
- bmap_alloc = new BitmapAllocator(cct,
- get_capacity(),
- get_block_size(),
- get_name() + ".fallback");
- }
- bmap_alloc->init_add_free(start, size);
-}
-
-void HybridAllocator::_add_to_tree(uint64_t start, uint64_t size)
-{
- if (bmap_alloc) {
- uint64_t head = bmap_alloc->claim_free_to_left(start);
- uint64_t tail = bmap_alloc->claim_free_to_right(start + size);
- ceph_assert(head <= start);
- start -= head;
- size += head + tail;
- }
- AvlAllocator::_add_to_tree(start, size);
-}
+#include "HybridAllocator_impl.h"
#include "AvlAllocator.h"
#include "BitmapAllocator.h"
-class HybridAllocator : public AvlAllocator {
+template <typename PrimaryAllocator>
+class HybridAllocatorBase : public PrimaryAllocator {
BitmapAllocator* bmap_alloc = nullptr;
public:
- HybridAllocator(CephContext* cct, int64_t device_size, int64_t _block_size,
- uint64_t max_mem,
- std::string_view name) :
- AvlAllocator(cct, device_size, _block_size, max_mem, name) {
- }
- const char* get_type() const override
- {
- return "hybrid";
+ HybridAllocatorBase(CephContext* cct, int64_t device_size, int64_t _block_size,
+ uint64_t max_mem,
+ std::string_view name) :
+ PrimaryAllocator(cct, device_size, _block_size, max_mem, name) {
}
int64_t allocate(
uint64_t want,
uint64_t max_alloc_size,
int64_t hint,
PExtentVector *extents) override;
- void release(const interval_set<uint64_t>& release_set) override;
- uint64_t get_free() override;
- double get_fragmentation() override;
+ using PrimaryAllocator::release;
+ uint64_t get_free() override {
+ std::lock_guard l(PrimaryAllocator::get_lock());
+ return (bmap_alloc ? bmap_alloc->get_free() : 0) +
+ PrimaryAllocator::_get_free();
+ }
+
+ double get_fragmentation() override {
+ std::lock_guard l(PrimaryAllocator::get_lock());
+ auto f = PrimaryAllocator::_get_fragmentation();
+ auto bmap_free = bmap_alloc ? bmap_alloc->get_free() : 0;
+ if (bmap_free) {
+ auto _free = PrimaryAllocator::_get_free() + bmap_free;
+ auto bf = bmap_alloc->get_fragmentation();
+
+ f = f * PrimaryAllocator::_get_free() / _free + bf * bmap_free / _free;
+ }
+ return f;
+ }
void dump() override;
+
void foreach(
- std::function<void(uint64_t offset, uint64_t length)> notify) override;
+ std::function<void(uint64_t, uint64_t)> notify) override {
+ std::lock_guard l(PrimaryAllocator::get_lock());
+ PrimaryAllocator::_foreach(notify);
+ if (bmap_alloc) {
+ bmap_alloc->foreach(notify);
+ }
+ }
void init_rm_free(uint64_t offset, uint64_t length) override;
- void shutdown() override;
+ void shutdown() override {
+ std::lock_guard l(PrimaryAllocator::get_lock());
+ PrimaryAllocator::_shutdown();
+ if (bmap_alloc) {
+ bmap_alloc->shutdown();
+ delete bmap_alloc;
+ bmap_alloc = nullptr;
+ }
+ }
protected:
// intended primarily for UT
private:
void _spillover_range(uint64_t start, uint64_t end) override;
+ uint64_t _spillover_allocate(uint64_t want,
+ uint64_t unit,
+ uint64_t max_alloc_size,
+ int64_t hint,
+ PExtentVector* extents) override;
+
+ // Allocates up to 'want' bytes from primary or secondary allocator.
+ // Returns:
+ // 0 (and unmDodified extents) if error occurred or nothing
+ // has been allocated. 'extents' vector remains unmodified
+ // amount of allocated bytes (<= want) if something has been allocated,
+ // 'extents' vector gets new extents, existing ones are preserved.
+ uint64_t _allocate_or_rollback(bool primary,
+ uint64_t want,
+ uint64_t unit,
+ uint64_t max_alloc_size,
+ int64_t hint,
+ PExtentVector* extents);
+
+ uint64_t _get_spilled_over() const override {
+ return bmap_alloc ? bmap_alloc->get_free() : 0;
+ }
// called when extent to be released/marked free
- void _add_to_tree(uint64_t start, uint64_t size) override;
+ void _add_to_tree(uint64_t start, uint64_t size) override {
+ if (bmap_alloc) {
+ uint64_t head = bmap_alloc->claim_free_to_left(start);
+ uint64_t tail = bmap_alloc->claim_free_to_right(start + size);
+ ceph_assert(head <= start);
+ start -= head;
+ size += head + tail;
+ }
+ PrimaryAllocator::_add_to_tree(start, size);
+ }
+};
+
+class HybridAvlAllocator : public HybridAllocatorBase<AvlAllocator> {
+public:
+ HybridAvlAllocator(CephContext* cct, int64_t device_size, int64_t _block_size,
+ uint64_t max_mem,
+ std::string_view name) :
+ HybridAllocatorBase<AvlAllocator>(cct,
+ device_size, _block_size, max_mem, name) {
+ }
+ const char* get_type() const override;
};
+
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "HybridAllocator.h"
+
+#define dout_context (T::get_context())
+#define dout_subsys ceph_subsys_bluestore
+#undef dout_prefix
+#define dout_prefix *_dout << (std::string(this->get_type()) + "::")
+
+template <typename T>
+int64_t HybridAllocatorBase<T>::allocate(
+ uint64_t want,
+ uint64_t unit,
+ uint64_t max_alloc_size,
+ int64_t hint,
+ PExtentVector* extents)
+{
+ dout(10) << __func__ << std::hex
+ << " 0x" << want
+ << "/" << unit
+ << "," << max_alloc_size
+ << "," << hint
+ << std::dec << dendl;
+ ceph_assert(std::has_single_bit(unit));
+ ceph_assert(want % unit == 0);
+
+ if (max_alloc_size == 0) {
+ max_alloc_size = want;
+ }
+ if (constexpr auto cap = std::numeric_limits<uint32_t>::max();
+ max_alloc_size >= cap) {
+ max_alloc_size = p2align(uint64_t(cap), (uint64_t)T::get_block_size());
+ }
+
+ std::lock_guard l(T::get_lock());
+
+ // try bitmap first to avoid unneeded contiguous extents split if
+ // desired amount is less than shortes range in AVL
+ bool primary_first = !(bmap_alloc && bmap_alloc->get_free() &&
+ want < T::_lowest_size_available());
+
+ int64_t res = _allocate_or_rollback(primary_first,
+ want, unit, max_alloc_size, hint, extents);
+ ceph_assert(res >= 0);
+ if ((uint64_t)res < want) {
+ auto orig_size = extents->size();
+ int64_t res2 = 0;
+ // try alternate allocator
+ if (!primary_first) {
+ res2 = T::_allocate(want - res, unit, max_alloc_size, hint, extents);
+ } else if (bmap_alloc) {
+ res2 =
+ bmap_alloc->allocate(want - res, unit, max_alloc_size, hint, extents);
+ }
+ if (res2 >= 0) {
+ res += res2;
+ } else {
+ // allocator shouldn't return new extents on error
+ ceph_assert(orig_size == extents->size());
+ }
+ }
+ return res ? res : -ENOSPC;
+}
+
+template <typename T>
+void HybridAllocatorBase<T>::dump()
+{
+ std::lock_guard l(T::get_lock());
+ T::_dump();
+ if (bmap_alloc) {
+ bmap_alloc->dump();
+ }
+ dout(0) << __func__
+ << " avl_free: " << T::_get_free()
+ << " bmap_free: " << (bmap_alloc ? bmap_alloc->get_free() : 0)
+ << dendl;
+}
+
+template <typename T>
+void HybridAllocatorBase<T>::init_rm_free(uint64_t offset, uint64_t length)
+{
+ if (!length)
+ return;
+ std::lock_guard l(T::get_lock());
+ dout(10) << __func__ << std::hex
+ << " offset 0x" << offset
+ << " length 0x" << length
+ << std::dec << dendl;
+ T::_try_remove_from_tree(offset, length,
+ [&](uint64_t o, uint64_t l, bool found) {
+ if (!found) {
+ if (bmap_alloc) {
+ bmap_alloc->init_rm_free(o, l);
+ } else {
+ derr << __func__ << " lambda " << std::hex
+ << "Uexpected extent: "
+ << " 0x" << o << "~" << l
+ << std::dec << dendl;
+ ceph_assert(false);
+ }
+ }
+ });
+}
+
+template <typename T>
+void HybridAllocatorBase<T>::_spillover_range(uint64_t start, uint64_t end)
+{
+ auto size = end - start;
+ dout(20) << __func__
+ << std::hex << " "
+ << start << "~" << size
+ << std::dec
+ << dendl;
+ ceph_assert(size);
+ if (!bmap_alloc) {
+ dout(1) << __func__
+ << " constructing fallback allocator"
+ << dendl;
+ bmap_alloc = new BitmapAllocator(T::get_context(),
+ T::get_capacity(),
+ T::get_block_size(),
+ T::get_name() + ".fallback");
+ }
+ bmap_alloc->init_add_free(start, size);
+}
+
+template <typename T>
+uint64_t HybridAllocatorBase<T>::_spillover_allocate(uint64_t want,
+ uint64_t unit,
+ uint64_t max_alloc_size,
+ int64_t hint,
+ PExtentVector* extents)
+{
+ return _allocate_or_rollback(false,
+ want,
+ unit,
+ max_alloc_size,
+ hint,
+ extents);
+}
+
+template <typename PrimaryAllocator>
+uint64_t HybridAllocatorBase<PrimaryAllocator>::_allocate_or_rollback(
+ bool primary,
+ uint64_t want,
+ uint64_t unit,
+ uint64_t max_alloc_size,
+ int64_t hint,
+ PExtentVector* extents)
+{
+ int64_t res = 0;
+ ceph_assert(extents);
+ // preserve original 'extents' vector state
+ auto orig_size = extents->size();
+ if (primary) {
+ res = PrimaryAllocator::_allocate(want, unit, max_alloc_size, hint, extents);
+ } else if (bmap_alloc) {
+ res = bmap_alloc->allocate(want, unit, max_alloc_size, hint, extents);
+ }
+ if (res < 0) {
+ // got a failure, release already allocated
+ PExtentVector local_extents;
+ PExtentVector* e = extents;
+ if (orig_size) {
+ local_extents.insert(
+ local_extents.end(), extents->begin() + orig_size, extents->end());
+ e = &local_extents;
+ }
+
+ if (e->size()) {
+ if(primary) {
+ PrimaryAllocator::_release(*e);
+ } else if (bmap_alloc) {
+ bmap_alloc->release(*e);
+ }
+ }
+ extents->resize(orig_size);
+ res = 0;
+ }
+ return (uint64_t)res;
+}
#include "os/bluestore/HybridAllocator.h"
-class TestHybridAllocator : public HybridAllocator {
+class TestHybridAllocator : public HybridAvlAllocator {
public:
TestHybridAllocator(CephContext* cct,
int64_t device_size,
int64_t _block_size,
uint64_t max_entries,
const std::string& name) :
- HybridAllocator(cct, device_size, _block_size,
+ HybridAvlAllocator(cct, device_size, _block_size,
max_entries,
name) {
}