os/bluestore/FreelistManager.cc
os/bluestore/KernelDevice.cc
os/bluestore/StupidAllocator.cc
+ os/bluestore/BitMapAllocator.cc
+ os/bluestore/BitAllocator.cc
os/fs/FS.cc
${libos_xfs_srcs})
if(${HAVE_LIBFUSE})
os/bluestore/ExtentFreelistManager.cc \
os/bluestore/FreelistManager.cc \
os/bluestore/KernelDevice.cc \
+ os/bluestore/BitMapAllocator.cc \
+ os/bluestore/BitAllocator.cc \
os/bluestore/StupidAllocator.cc
endif
os/bluestore/KernelDevice.h \
os/bluestore/ExtentFreelistManager.h \
os/bluestore/FreelistManager.h \
+ os/bluestore/BitMapAllocator.h \
+ os/bluestore/BitAllocator.h \
os/bluestore/StupidAllocator.h
endif
#include "Allocator.h"
#include "StupidAllocator.h"
+#include "BitMapAllocator.h"
#include "common/debug.h"
#define dout_subsys ceph_subsys_bluestore
Allocator *Allocator::create(string type, int64_t size)
{
- if (type == "stupid")
+ if (type == "stupid") {
return new StupidAllocator;
+ } else if (type == "bitmap") {
+ return new BitMapAllocator(size);
+ }
derr << "Allocator::" << __func__ << " unknown alloc type " << type << dendl;
return NULL;
}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Bitmap based in-memory allocator.
+ * Author: Ramesh Chander, Ramesh.Chander@sandisk.com
+ */
+
+#include "BitAllocator.h"
+#include <assert.h>
+
+#define debug_assert assert
+#define MIN(x, y) ((x) > (y) ? (y) : (x))
+
+/*
+ * BmapEntityList functions.
+ */
+BmapEntityListIter::BmapEntityListIter(BmapEntityList *list)
+{
+ m_list = list;
+ m_start_idx = list->get_marker();
+ m_cur_idx = m_start_idx;
+ m_wrap = false;
+}
+
+BmapEntityListIter::BmapEntityListIter(BmapEntityList *list, bool wrap)
+{
+ m_list = list;
+ m_start_idx = list->get_marker();
+ m_cur_idx = m_start_idx;
+ m_wrap = wrap;
+}
+
+BmapEntityListIter::BmapEntityListIter(BmapEntityList *list, int64_t start_idx)
+{
+ m_list = list;
+ m_wrap = false;
+ m_start_idx = start_idx;
+ m_cur_idx = m_start_idx;
+ m_wrapped = false;
+}
+
+BmapEntityListIter::BmapEntityListIter(BmapEntityList *list, int64_t start_idx, bool wrap)
+{
+ m_list = list;
+ m_wrap = wrap;
+ m_start_idx = start_idx;
+ m_cur_idx = m_start_idx;
+ m_wrapped = false;
+}
+
+BmapEntity * BmapEntityListIter::next()
+{
+ int64_t cur_idx = m_cur_idx;
+
+ if (m_wrapped &&
+ cur_idx == m_start_idx) {
+ /*
+ * End of wrap cycle
+ */
+ return NULL;
+ }
+ m_cur_idx++;
+
+ if (m_cur_idx == m_list->size()
+ &&m_wrap) {
+ m_cur_idx %= m_list->size();
+ m_wrapped = true;
+ }
+
+ if (cur_idx == m_list->size()) {
+ /*
+ * End of list
+ */
+ return NULL;
+ }
+ debug_assert(cur_idx < m_list->size());
+ return m_list->get_nth_item(cur_idx);
+}
+
+int64_t BmapEntityListIter::index()
+{
+ return m_cur_idx;
+}
+
+void BmapEntityListIter::decr_idx()
+{
+ m_cur_idx--;
+ debug_assert(m_cur_idx > 0);
+}
+
+/*
+ * Bitmap Entry functions.
+ */
+BmapEntry::BmapEntry(bool full)
+{
+ if (full) {
+ m_bits = BmapEntry::full_bmask();
+ } else {
+ m_bits = BmapEntry::empty_bmask();
+ }
+}
+
+BmapEntry::~BmapEntry()
+{
+
+}
+
+bmap_t BmapEntry::full_bmask() {
+ return (bmap_t) -1;
+}
+
+int64_t BmapEntry::size() {
+ return (sizeof(bmap_t) * 8);
+}
+
+bmap_t BmapEntry::empty_bmask() {
+ return (bmap_t) 0;
+}
+
+bmap_t BmapEntry::align_mask(int x)
+{
+ return ((x) >= BmapEntry::size()? (bmap_t) -1 : (~(((bmap_t) -1) >> (x))));
+}
+
+bmap_t BmapEntry::bit_mask(int bit)
+{
+ return ((bmap_t) 0x1 << ((BmapEntry::size() - 1) - bit));
+}
+bool BmapEntry::check_bit(int bit)
+{
+ return (m_bits & bit_mask(bit));
+}
+
+bmap_t BmapEntry::atomic_fetch()
+{
+ return std::atomic_load(&m_bits);
+}
+
+bool BmapEntry::is_allocated(int64_t start_bit, int64_t num_bits)
+{
+ for (int i = start_bit; i < num_bits + start_bit; i++) {
+ if (!check_bit(i)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void BmapEntry::clear_bit(int bit)
+{
+ bmap_t bmask = bit_mask(bit);
+ (void) std::atomic_fetch_and(&m_bits, ~(bmask));
+}
+
+void BmapEntry::clear_bits(int offset, int num_bits)
+{
+ if (num_bits == 0) {
+ return;
+ }
+ bmap_t bmask = BmapEntry::align_mask(num_bits) >> offset;
+ (void) std::atomic_fetch_and(&m_bits, ~(bmask));
+}
+
+void BmapEntry::set_bits(int offset, int num_bits)
+{
+ for (int i = 0; i < num_bits; i++) {
+ bmap_t bmask = bit_mask(i + offset);
+ (void) std::atomic_fetch_or(&m_bits, bmask);
+ }
+}
+
+/*
+ * Allocate a bit if it was free.
+ * Retruns true if it was free.
+ */
+bool BmapEntry::check_n_set_bit(int bit)
+{
+ bmap_t bmask = bit_mask(bit);
+ return !(atomic_fetch_or(&m_bits, bmask) & bmask);
+}
+
+/*
+ * Find N cont free bits in BitMap starting from an offset.
+ *
+ * Returns number of continuous bits found.
+ */
+int BmapEntry::find_n_cont_bits(int start_offset, int64_t num_bits)
+{
+ int count = 0;
+ int i = 0;
+
+ if (num_bits == 0) {
+ return 0;
+ }
+
+ if (start_offset >= BmapEntry::size()) {
+ return 0;
+ }
+
+ for (i = start_offset; i < BmapEntry::size() && count < num_bits; i++) {
+ if (!check_n_set_bit(i)) {
+ break;
+ }
+ count++;
+ }
+
+ return count;
+}
+
+/*
+ * Find N free bits starting search from an given offset.
+ *
+ * Returns number of bits found, start bit and end of
+ * index next to bit where our search ended + 1.
+ */
+int BmapEntry::find_n_free_bits(int start_idx, int64_t max_bits,
+ int *free_bit, int *end_idx)
+{
+ int i = 0;
+ int count = 0;
+
+ *free_bit = 0;
+ debug_assert(max_bits > 0);
+
+ /*
+ * Find free bit aligned to bit_align return the bit_num in free_bit.
+ */
+ if (atomic_fetch() == BmapEntry::full_bmask()) {
+ /*
+ * All bits full, return fail.
+ */
+ *end_idx = BmapEntry::size();
+ return 0;
+ }
+
+ /*
+ * Do a serial scan on bitmap.
+ */
+ for (i = start_idx; i < BmapEntry::size(); i++) {
+ if (check_n_set_bit(i)) {
+ /*
+ * Found first free bit
+ */
+ *free_bit = i;
+ count++;
+ break;
+ }
+ }
+ count += find_n_cont_bits(i + 1, max_bits - 1);
+
+ (*end_idx) = i + count;
+ return count;
+}
+
+/*
+ * Find first series of contiguous bits free in bitmap starting
+ * from start offset that either
+ * satisfy our need or are touching right edge of bitmap.
+ *
+ * Returns allocated bits, start bit of allocated, number of bits
+ * scanned from start offset.
+ */
+int
+BmapEntry::find_first_set_bits(int64_t required_blocks,
+ int bit_offset, int *start_offset,
+ int64_t *scanned)
+{
+ int allocated = 0;
+ int conti = 0;
+ int end_idx = 0;
+
+ *scanned = 0;
+
+ while (bit_offset < BmapEntry::size()) {
+ conti = find_n_free_bits(bit_offset, required_blocks,
+ start_offset, &end_idx);
+
+ *scanned += end_idx - bit_offset;
+ /*
+ * Either end of bitmap or got required.
+ */
+ if (conti == required_blocks ||
+ (conti + *start_offset == BmapEntry::size())) {
+ allocated += conti;
+ break;
+ }
+
+ /*
+ * Did not get expected, search from next index again.
+ */
+ clear_bits(*start_offset, conti);
+ allocated = 0;
+
+ bit_offset = end_idx;
+ }
+
+ return allocated;
+}
+
+/*
+ * Find N number of free bits in bitmap. Need not be contiguous.
+ */
+int BmapEntry::find_any_free_bits(int start_offset, int64_t num_blocks,
+ int64_t *allocated_blocks, int64_t block_offset,
+ int64_t *scanned)
+{
+ int allocated = 0;
+ int required = num_blocks;
+ int i = 0;
+
+ *scanned = 0;
+
+ if (atomic_fetch() == BmapEntry::full_bmask()) {
+ return 0;
+ }
+
+ /*
+ * Do a serial scan on bitmap.
+ */
+ for (i = start_offset; i < BmapEntry::size() &&
+ allocated < required; i++) {
+ if (check_n_set_bit(i)) {
+ allocated_blocks[allocated] = i + block_offset;
+ allocated++;
+ }
+ }
+
+ *scanned = i - start_offset;
+ return allocated;
+}
+
+/*
+ * Bitmap List related functions.
+ */
+int64_t BmapList::incr_marker(int64_t add)
+{
+ return std::atomic_fetch_add(&m_marker, add);
+}
+
+void BmapList::set_marker(int64_t val)
+{
+ atomic_store(&m_marker, val);
+}
+
+int64_t BmapList::get_marker()
+{
+ return std::atomic_load(&m_marker);
+}
+
+/*
+ * Zone related functions.
+ */
+void BitMapZone::init(int64_t zone_num, int64_t total_blocks, bool def)
+{
+ m_zone_num = zone_num;
+ m_total_blocks = total_blocks;
+
+ m_used_blocks = def? total_blocks: 0;
+
+ int64_t num_bmaps = total_blocks / BmapEntry::size();
+ debug_assert(!(total_blocks % BmapEntry::size()));
+
+ BmapEntity **bmaps = new BmapEntity *[num_bmaps];
+ for (int i = 0; i < num_bmaps; i++) {
+ bmaps[i] = new BmapEntry(def);
+ }
+
+ BmapEntityList *list = new BmapList(bmaps, num_bmaps, 0);
+
+ m_bmap_list = list;
+ m_state = ZONE_ACTIVE;
+}
+
+BitMapZone::BitMapZone(int64_t zone_num, int64_t total_blocks)
+{
+ init(zone_num, total_blocks, false);
+}
+
+BitMapZone::BitMapZone(int64_t zone_num, int64_t total_blocks, bool def)
+{
+ init(zone_num, total_blocks, def);
+}
+
+BitMapZone::~BitMapZone()
+{
+ lock_zone(true);
+
+ m_state = ZONE_FREE;
+ BmapEntityList *list = m_bmap_list;
+
+ for (int64_t i = 0; i < list->size(); i++) {
+ BmapEntry *bmap = (BmapEntry *) list->get_nth_item(i);
+ delete bmap;
+ }
+
+ delete [] list->get_item_list();
+ delete list;
+
+ unlock_zone();
+}
+
+/*
+ * Check if some search took zone marker to end.
+ */
+bool BitMapZone::is_exhausted()
+{
+ if (m_bmap_list->get_marker() >=
+ m_total_blocks) {
+ m_bmap_list->set_marker(0);
+ return true;
+ }
+ return false;
+}
+
+void BitMapZone::reset_marker()
+{
+ m_bmap_list->set_marker(0);
+}
+
+bool BitMapZone::is_allocated(int64_t start_block, int64_t num_blocks)
+{
+ BmapEntry *bmap = NULL;
+ int bit = 0;
+ int64_t falling_in_bmap = 0;
+
+ while (num_blocks) {
+ bit = start_block % BmapEntry::size();
+ bmap = (BmapEntry *) m_bmap_list->get_nth_item(start_block /
+ BmapEntry::size());
+ falling_in_bmap = MIN(num_blocks, BmapEntry::size() - bit);
+
+ if (!bmap->is_allocated(bit, falling_in_bmap)) {
+ return false;
+ }
+
+ start_block += falling_in_bmap;
+ num_blocks -= falling_in_bmap;
+ }
+
+ return true;
+}
+
+/*
+ * Allocate N continuous bits in a zone starting from
+ * marker provided in iter.
+ */
+int64_t BitMapZone::alloc_cont_bits(int64_t num_blocks,
+ BmapEntityListIter *iter,
+ int64_t *scanned)
+{
+ BmapEntry *bmap = NULL;
+ int64_t required = num_blocks;
+ while ((bmap = (BmapEntry *) iter->next())) {
+ int64_t found = 0;
+ int64_t max_expected = MIN(required, BmapEntry::size());
+ found = bmap->find_n_cont_bits(0, max_expected);
+
+ required -= found;
+
+ if (found < max_expected) {
+ break;
+ }
+ }
+
+ /*
+ * scanned == allocated
+ */
+ *scanned = num_blocks - required;
+ return (num_blocks - required);
+}
+
+void BitMapZone::set_blocks_used(int64_t start_block, int64_t num_blocks)
+{
+ BmapEntry *bmap = NULL;
+ int bit = 0;
+ int64_t falling_in_bmap = 0;
+ int64_t blks = num_blocks;
+
+ while (blks) {
+ bit = start_block % BmapEntry::size();
+ bmap = (BmapEntry *) m_bmap_list->get_nth_item(start_block /
+ BmapEntry::size());
+ falling_in_bmap = MIN(blks, BmapEntry::size() - bit);
+
+ bmap->set_bits(bit, falling_in_bmap);
+
+ start_block += falling_in_bmap;
+ blks -= falling_in_bmap;
+ }
+ add_used_blocks(num_blocks);
+}
+
+void BitMapZone::free_blocks_int(int64_t start_block, int64_t num_blocks)
+{
+ BmapEntry *bmap = NULL;
+ int bit = 0;
+ int64_t falling_in_bmap = 0;
+
+
+ while (num_blocks) {
+ bit = start_block % BmapEntry::size();
+ bmap = (BmapEntry *) m_bmap_list->get_nth_item(start_block /
+ BmapEntry::size());
+ falling_in_bmap = MIN(num_blocks, BmapEntry::size() - bit);
+
+ bmap->clear_bits(bit, falling_in_bmap);
+
+ start_block += falling_in_bmap;
+ num_blocks -= falling_in_bmap;
+ }
+
+}
+
+int64_t BitMapZone::get_index()
+{
+ return m_zone_num;
+}
+
+bool BitMapZone::lock_zone(bool wait)
+{
+ if (wait) {
+ m_lock.lock();
+ return true;
+ }
+
+ if (m_lock.try_lock()) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void BitMapZone::unlock_zone()
+{
+ m_lock.unlock();
+}
+
+int64_t BitMapZone::get_used_blocks()
+{
+ return std::atomic_load(&m_used_blocks);
+}
+
+int64_t BitMapZone::size() {
+ return m_total_blocks;
+}
+
+int64_t BitMapZone::add_used_blocks(int64_t blks)
+{
+ return std::atomic_fetch_add(&m_used_blocks, blks) + blks;
+}
+
+int64_t BitMapZone::sub_used_blocks(int64_t blks)
+{
+ int64_t used_blks = std::atomic_fetch_sub(&m_used_blocks, blks) - blks;
+ debug_assert(used_blks >= 0);
+ return used_blks;
+}
+
+/*
+ * Try find N contiguous blocks in a Zone.
+ * Nothing less than N is good and considered failure.
+ *
+ * Caller must take exclusive lock on Zone.
+ */
+int64_t BitMapZone::alloc_blocks(int64_t num_blocks, int64_t *start_block)
+{
+ int64_t bmap_idx = 0;
+ int bit_idx = 0;
+ int64_t marker = m_bmap_list->get_marker();
+ int64_t marker_add = 0;
+ BmapEntry *bmap = NULL;
+ int64_t allocated = 0;
+ int64_t zone_block_offset = get_index() * size();
+
+ bmap_idx = marker / BmapEntry::size();
+ bit_idx = marker % BmapEntry::size();
+
+ BmapEntityListIter iter = BmapEntityListIter(
+ m_bmap_list, bmap_idx);
+
+ while ((bmap = (BmapEntry *) iter.next())) {
+ int64_t scanned = 0;
+ int start_offset = -1;
+
+ allocated = bmap->find_first_set_bits(num_blocks,
+ bit_idx, &start_offset, &scanned);
+
+ marker_add += scanned;
+ bit_idx = 0;
+
+ if (allocated > 0) {
+ (*start_block) = start_offset +
+ (iter.index() - 1) * bmap->size() +
+ m_zone_num * size();
+
+ allocated += alloc_cont_bits(num_blocks - allocated,
+ &iter, &scanned);
+ marker_add += scanned;
+ /*
+ * Iter need to go one step back for case when allocation
+ * is not enough and start from last bitmap again.
+ */
+ iter.decr_idx();
+ bit_idx = scanned % BmapEntry::size();
+ }
+
+ if (allocated < num_blocks) {
+ free_blocks_int((*start_block - zone_block_offset), allocated);
+ allocated = 0;
+ } else {
+ /*
+ * Got required.
+ */
+ break;
+ }
+ }
+
+ m_bmap_list->incr_marker(marker_add);
+ add_used_blocks(allocated);
+
+ return allocated;
+}
+
+void BitMapZone::free_blocks(int64_t start_block, int64_t num_blocks)
+{
+ free_blocks_int(start_block, num_blocks);
+ debug_assert(get_used_blocks() > 0);
+ sub_used_blocks(num_blocks);
+}
+
+/*
+ * Allocate N blocks, dis-contiguous are fine
+ */
+int64_t BitMapZone::alloc_blocks_dis(int64_t num_blocks, int64_t *alloc_blocks)
+{
+ int64_t bmap_idx = 0;
+ int bit = 0;
+ int64_t marker_add = 0;
+ BmapEntry *bmap = NULL;
+ int64_t allocated = 0;
+ int64_t blk_off = 0;
+
+ bmap_idx = m_bmap_list->get_marker() / BmapEntry::size();
+ bit = m_bmap_list->get_marker() % BmapEntry::size();
+
+ BmapEntityListIter iter = BmapEntityListIter(
+ m_bmap_list, bmap_idx);
+ while ((bmap = (BmapEntry *) iter.next())) {
+ int64_t scanned = 0;
+ blk_off = (iter.index() - 1) * BmapEntry::size() +
+ m_zone_num * size();
+ allocated += bmap->find_any_free_bits(bit, num_blocks - allocated,
+ &alloc_blocks[allocated], blk_off, &scanned);
+
+ marker_add += scanned;
+ }
+
+ m_bmap_list->incr_marker(marker_add);
+ add_used_blocks(allocated);
+
+ return allocated;
+}
+
+/*
+ * Zone List related functions
+ */
+
+ZoneList::ZoneList(BmapEntity **list, int64_t len) :
+ BmapEntityList(list, len)
+{
+ m_marker = 0;
+ return;
+}
+
+ZoneList::ZoneList(BmapEntity **list, int64_t len, int64_t marker) :
+ BmapEntityList(list, len)
+{
+ m_marker = marker;
+ return;
+}
+
+int64_t ZoneList::incr_marker(int64_t add) {
+ std::lock_guard<std::mutex> l (m_marker_mutex);
+ m_marker += add;
+ m_marker %= size();
+
+ return m_marker;
+}
+
+int64_t ZoneList::get_marker()
+{
+ std::lock_guard<std::mutex> l (m_marker_mutex);
+ return m_marker;
+}
+
+void ZoneList::set_marker(int64_t val)
+{
+ std::lock_guard<std::mutex> l (m_marker_mutex);
+ m_marker = val;
+}
+
+/*
+ * Main allocator functions.
+ */
+BitAllocator::BitAllocator(int64_t total_blocks, int64_t zone_size_block, bmap_alloc_mode_t mode)
+{
+ init(total_blocks, zone_size_block, mode, false);
+}
+
+BitAllocator::BitAllocator(int64_t total_blocks, int64_t zone_size_block,
+ bmap_alloc_mode_t mode, bool def)
+{
+ init(total_blocks, zone_size_block, mode, def);
+}
+
+void BitAllocator::init(int64_t total_blocks, int64_t zone_size_block,
+ bmap_alloc_mode_t mode, bool def)
+{
+ int64_t total_zones = 0;
+
+ if (mode != SERIAL && mode != CONCURRENT) {
+ debug_assert(0);
+ }
+
+ if (total_blocks <= 0) {
+ debug_assert(0);
+ }
+
+ if (zone_size_block == 0 ||
+ zone_size_block < BmapEntry::size()) {
+ debug_assert(0);
+ }
+
+ zone_size_block = (zone_size_block / BmapEntry::size()) *
+ BmapEntry::size();
+
+ if (total_blocks < zone_size_block) {
+ debug_assert(0);
+ }
+
+ total_blocks = (total_blocks / zone_size_block) * zone_size_block;
+ total_zones = total_blocks / zone_size_block;
+
+ debug_assert(total_blocks > 0);
+ debug_assert(total_zones > 0);
+
+ pthread_rwlock_init(&m_alloc_slow_lock, NULL);
+
+ m_total_blocks = total_blocks;
+ m_total_zones = total_zones;
+ m_zone_size_blocks = zone_size_block;
+ m_alloc_mode = mode;
+ m_allocated_blocks = def? total_blocks: 0;
+ m_reserved_blocks = 0;
+
+ BmapEntity **zonesp = new BmapEntity* [total_zones];
+ for (int i = 0; i < total_zones; i++) {
+ zonesp[i] = new BitMapZone(i, zone_size_block, def);
+ }
+
+ BmapEntityList *list = new ZoneList(zonesp, total_zones, 0);
+
+ m_zone_list = list;
+
+ m_state = ALLOC_ACTIVE;
+}
+
+BitAllocator::~BitAllocator()
+{
+ alloc_lock(true);
+
+ m_state = ALLOC_DESTROY;
+ BmapEntityList *list = m_zone_list;
+
+ for (int64_t i = 0; i < list->size(); i++) {
+ BitMapZone *zone = (BitMapZone *) list->get_nth_item(i);
+ delete zone;
+ }
+
+ delete [] list->get_item_list();
+ delete list;
+
+ alloc_unlock();
+ pthread_rwlock_destroy(&m_alloc_slow_lock);
+}
+
+void
+BitAllocator::shutdown()
+{
+ alloc_lock(true);
+ serial_lock();
+}
+
+int64_t BitAllocator::sub_used_blocks(int64_t num_blocks)
+{
+ int64_t used_blks =
+ std::atomic_fetch_sub(&m_allocated_blocks, num_blocks);
+ debug_assert(used_blks > 0);
+ return used_blks;
+}
+
+int64_t BitAllocator::add_used_blocks(int64_t num_blocks)
+{
+ return std::atomic_fetch_add(&m_allocated_blocks, num_blocks) + num_blocks;
+}
+
+int64_t BitAllocator::get_used_blocks()
+{
+ return std::atomic_load(&m_allocated_blocks);
+}
+
+int64_t BitAllocator::get_reserved_blocks()
+{
+ return m_reserved_blocks;
+}
+
+int64_t BitAllocator::size()
+{
+ return m_total_blocks;
+}
+
+bool BitAllocator::reserve_blocks(int64_t num)
+{
+ bool res = false;
+ std::lock_guard<std::mutex> l(m_res_blocks_lock);
+ if (add_used_blocks(num) <= size()) {
+ res = true;
+ m_reserved_blocks += num;
+ } else {
+ sub_used_blocks(num);
+ res = false;
+ }
+
+ debug_assert(m_allocated_blocks <= size());
+ return res;
+}
+
+void BitAllocator::unreserve(int64_t needed, int64_t allocated)
+{
+ std::lock_guard<std::mutex> l(m_res_blocks_lock);
+ sub_used_blocks(needed - allocated);
+ m_reserved_blocks -= needed;
+ debug_assert(m_allocated_blocks >= 0);
+}
+
+void BitAllocator::unreserve_blocks(int64_t unused)
+{
+ unreserve(unused, 0);
+}
+
+void BitAllocator::serial_lock()
+{
+ if (m_alloc_mode == SERIAL) {
+ m_serial_mutex.lock();
+ }
+}
+
+void BitAllocator::serial_unlock()
+{
+ if (m_alloc_mode == SERIAL) {
+ m_serial_mutex.unlock();
+ }
+}
+
+void BitAllocator::alloc_lock(bool write) {
+ if (write) {
+ pthread_rwlock_wrlock(&m_alloc_slow_lock);
+ } else {
+ pthread_rwlock_rdlock(&m_alloc_slow_lock);
+ }
+}
+
+void BitAllocator::alloc_unlock()
+{
+ pthread_rwlock_unlock(&m_alloc_slow_lock);
+}
+
+bool BitAllocator::zone_free_to_alloc(BmapEntity *item,
+ int64_t required, bool wait)
+{
+ BitMapZone *zone = (BitMapZone *) item;
+ if (!zone->lock_zone(wait)) {
+ return false;
+ }
+
+ if (zone->is_exhausted()) {
+ if (zone->get_index() ==
+ m_zone_list->get_marker()) {
+ m_zone_list->incr_marker(1);
+ }
+ zone->unlock_zone();
+ return false;
+ }
+
+ if (zone->get_used_blocks() + required >
+ zone->size()) {
+ zone->unlock_zone();
+ return false;
+ }
+
+ return true;
+}
+
+bool BitAllocator::is_allocated(int64_t start_block, int64_t num_blocks)
+{
+ BitMapZone *zone = NULL;
+ int64_t zone_block_offset = 0;
+ int64_t falling_in_zone = 0;
+
+ debug_assert(start_block >= 0 &&
+ (start_block + num_blocks <= size()));
+
+ if (num_blocks == 0) {
+ return true;
+ }
+
+ assert(start_block >= 0);
+
+ while (num_blocks) {
+ zone = (BitMapZone *) m_zone_list->get_nth_item(
+ start_block / m_zone_size_blocks);
+
+ zone_block_offset = start_block % m_zone_size_blocks;
+ falling_in_zone = MIN(m_zone_size_blocks - zone_block_offset,
+ num_blocks);
+ if (!zone->is_allocated(zone_block_offset, falling_in_zone)) {
+ return false;
+ }
+ start_block += falling_in_zone;
+ num_blocks -= falling_in_zone;
+ }
+ return true;
+}
+
+bool BitAllocator::is_allocated(int64_t *alloc_blocks, int64_t num_blocks)
+{
+ for (int64_t i = 0; i < num_blocks; i++) {
+ return is_allocated(alloc_blocks[i], 1);
+ }
+
+ return true;
+}
+
+/*
+ * Allocate N contiguous blocks.
+ */
+int64_t BitAllocator::alloc_blocks_int(int64_t num_blocks,
+ int64_t *start_block, bool wait)
+{
+
+ int64_t zone_mark = m_zone_list->get_marker();
+ BitMapZone *zone = NULL;
+ int64_t allocated = 0;
+
+ BmapEntityListIter iter = BmapEntityListIter(
+ m_zone_list, zone_mark, true);
+
+ while ((zone = (BitMapZone *) iter.next())) {
+
+ if (!zone_free_to_alloc(zone,
+ num_blocks - allocated, wait)) {
+ continue;
+ }
+
+ allocated = zone->alloc_blocks(num_blocks, start_block);
+
+ zone->unlock_zone();
+ if (allocated == num_blocks) {
+ break;
+ }
+
+ zone->free_blocks(*start_block, allocated);
+ allocated = 0;
+ }
+
+ return allocated;
+}
+
+bool BitAllocator::check_input_dis(int64_t num_blocks)
+{
+ if (num_blocks == 0) {
+ return false;
+ }
+
+ if (num_blocks > size()) {
+ return false;
+ }
+ return true;
+}
+
+bool BitAllocator::check_input(int64_t num_blocks)
+{
+ if (num_blocks == 0) {
+ return false;
+ }
+
+ if (num_blocks > m_zone_size_blocks) {
+ return false;
+ }
+ return true;
+}
+
+/*
+ * Interface to allocate blocks after reserve.
+ */
+int64_t BitAllocator::alloc_blocks_res(int64_t num_blocks, int64_t *start_block)
+{
+ int scans = 2;
+ int64_t allocated = 0;
+
+ if (!check_input(num_blocks)) {
+ return 0;
+ }
+
+ alloc_lock(false);
+ serial_lock();
+
+ while (scans && !allocated) {
+ allocated = alloc_blocks_int(num_blocks, start_block, false);
+ scans --;
+ }
+
+ if (!allocated) {
+ /*
+ * Could not find anything in two scans.
+ * Go in serial manner.
+ */
+ allocated = alloc_blocks_int(num_blocks, start_block, true);
+ }
+
+ debug_assert(is_allocated(*start_block, allocated));
+ unreserve(num_blocks, allocated);
+
+ serial_unlock();
+ alloc_unlock();
+
+ return allocated;
+}
+
+int64_t BitAllocator::alloc_blocks(int64_t num_blocks, int64_t *start_block)
+{
+ int scans = 2;
+ int64_t allocated = 0;
+
+ if (!check_input(num_blocks)) {
+ return 0;
+ }
+
+ alloc_lock(false);
+ serial_lock();
+
+ if (!reserve_blocks(num_blocks)) {
+ goto exit;
+ }
+
+ while (scans && !allocated) {
+ allocated = alloc_blocks_int(num_blocks, start_block, false);
+ scans --;
+ }
+
+ if (!allocated) {
+ /*
+ * Could not find anything in two scans.
+ * Go in serial manner.
+ */
+ allocated = alloc_blocks_int(num_blocks, start_block, true);
+ }
+
+ unreserve(num_blocks, allocated);
+ debug_assert((m_allocated_blocks <= m_total_blocks));
+ debug_assert(is_allocated(*start_block, allocated));
+
+exit:
+ serial_unlock();
+ alloc_unlock();
+
+ return allocated;
+}
+
+void BitAllocator::free_blocks_int(int64_t start_block, int64_t num_blocks)
+{
+ BitMapZone *zone = NULL;
+ int64_t zone_block_offset = 0;
+ int64_t falling_in_zone = 0;
+
+ debug_assert(start_block >= 0 &&
+ (start_block + num_blocks) <= size());
+
+ if (num_blocks == 0) {
+ return;
+ }
+
+ assert(start_block >= 0);
+
+ while (num_blocks) {
+ zone = (BitMapZone *) m_zone_list->get_nth_item(
+ start_block / m_zone_size_blocks);
+
+ zone_block_offset = start_block % m_zone_size_blocks;
+
+ falling_in_zone = MIN(m_zone_size_blocks - zone_block_offset,
+ num_blocks);
+ zone->free_blocks(zone_block_offset, falling_in_zone);
+ start_block += falling_in_zone;
+ num_blocks -= falling_in_zone;
+ }
+
+}
+
+void BitAllocator::free_blocks(int64_t start_block, int64_t num_blocks)
+{
+ alloc_lock(false);
+ debug_assert(is_allocated(start_block, num_blocks));
+
+ free_blocks_int(start_block, num_blocks);
+ (void) sub_used_blocks(num_blocks);
+
+ alloc_unlock();
+}
+
+void BitAllocator::set_blocks_used(int64_t start_block, int64_t num_blocks)
+{
+ BitMapZone *zone = NULL;
+ int64_t zone_block_offset = 0;
+ int64_t falling_in_zone = 0;
+ int64_t blks = num_blocks;
+ int64_t start_blk = start_block;
+
+ if (num_blocks == 0) {
+ return;
+ }
+
+ alloc_lock(false);
+ serial_lock();
+
+ assert(start_block >= 0);
+
+ while (blks) {
+ zone = (BitMapZone *) m_zone_list->get_nth_item(
+ start_blk / m_zone_size_blocks);
+
+ zone_block_offset = start_blk % m_zone_size_blocks;
+ falling_in_zone = MIN(m_zone_size_blocks - zone_block_offset,
+ blks);
+ zone->set_blocks_used(zone_block_offset, falling_in_zone);
+ start_blk += falling_in_zone;
+ blks -= falling_in_zone;
+ }
+
+ add_used_blocks(num_blocks);
+ debug_assert(is_allocated(start_block, num_blocks));
+
+ serial_unlock();
+ alloc_unlock();
+}
+
+int64_t BitAllocator::alloc_blocks_dis_int(int64_t num_blocks,
+ int64_t *block_list, bool lock)
+{
+ int64_t zone_mark = m_zone_list->get_marker();
+ BitMapZone *zone = NULL;
+ int64_t allocated = 0;
+
+ alloc_lock(false);
+ serial_lock();
+
+ BmapEntityListIter iter = BmapEntityListIter(
+ m_zone_list, zone_mark, true);
+
+ while ((zone = (BitMapZone *) iter.next())) {
+
+ if (!zone_free_to_alloc(zone, 1, lock)) {
+ continue;
+ }
+
+ allocated += zone->alloc_blocks_dis(num_blocks, &block_list[allocated]);
+ zone->unlock_zone();
+ if (allocated == num_blocks) {
+ break;
+ }
+ }
+
+ alloc_unlock();
+ serial_unlock();
+
+ return allocated;
+}
+
+/*
+ * Allocate N dis-contiguous blocks.
+ */
+int64_t BitAllocator::alloc_blocks_dis(int64_t num_blocks, int64_t *block_list)
+{
+ int scans = 2;
+ int64_t allocated = 0;
+
+ if (!check_input_dis(num_blocks)) {
+ return 0;
+ }
+
+ if (!reserve_blocks(num_blocks)) {
+ return 0;
+ }
+
+ while (scans && allocated < num_blocks) {
+ allocated += alloc_blocks_dis_int(num_blocks, &block_list[allocated], false);
+ scans --;
+ }
+
+ if (allocated < num_blocks) {
+ /*
+ * Could not find anything in two scans.
+ * Go in serial manner to get something for sure
+ * if available.
+ */
+ allocated += alloc_blocks_dis_int(num_blocks, &block_list[allocated], true);
+ }
+
+ unreserve(num_blocks, allocated);
+ debug_assert(is_allocated(block_list, allocated));
+
+ return allocated;
+}
+
+void BitAllocator::free_blocks_dis(int64_t num_blocks, int64_t *block_list)
+{
+ alloc_lock(false);
+
+ for (int64_t i = 0; i < num_blocks; i++) {
+ free_blocks_int(block_list[i], 1);
+ }
+
+ debug_assert(get_used_blocks() > 0);
+ sub_used_blocks(num_blocks);
+ alloc_unlock();
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Bitmap based in memory allocator.
+ * Author: Ramesh Chander, Ramesh.Chander@sandisk.com
+ */
+
+#ifndef CEPH_OS_BLUESTORE_BITALLOCATOR_H
+#define CEPH_OS_BLUESTORE_BITALLOCATOR_H
+
+#include <stdint.h>
+#include <pthread.h>
+#include <mutex>
+#include <atomic>
+
+class BmapEntity {
+public:
+ static int64_t size();
+ virtual int64_t get_index() { return -1; }
+ virtual bool is_allocated(int64_t start, int64_t num) = 0;
+
+ virtual ~BmapEntity() { }
+};
+
+
+class BmapEntityList {
+ BmapEntity **m_items;
+ int64_t m_num_items;
+
+public:
+
+ BmapEntityList(BmapEntity **list, int64_t len) {
+ m_items = list;
+ m_num_items = len;
+ }
+
+ virtual ~BmapEntityList() { }
+
+ BmapEntity *get_nth_item(int64_t idx) {
+ return m_items[idx];
+ }
+
+ BmapEntity** get_item_list() {
+ return m_items;
+ }
+
+ virtual int64_t incr_marker(int64_t add) = 0;
+ virtual int64_t get_marker() = 0;
+ virtual void set_marker(int64_t val) = 0;
+
+ int64_t size() {
+ return m_num_items;
+ }
+};
+
+
+class BmapEntityListIter {
+ BmapEntityList *m_list;
+ int64_t m_start_idx;
+ int64_t m_cur_idx;
+ bool m_wrap;
+ bool m_wrapped;
+public:
+
+ BmapEntityListIter(BmapEntityList *list);
+
+ BmapEntityListIter(BmapEntityList *list, bool wrap);
+
+ BmapEntityListIter(BmapEntityList *list, int64_t start_idx);
+
+ BmapEntityListIter(BmapEntityList *list, int64_t start_idx, bool wrap);
+
+ BmapEntity *next();
+ int64_t index();
+ void decr_idx();
+};
+
+typedef unsigned long bmap_t;
+
+class BmapEntry: public BmapEntity {
+
+private:
+ std::atomic<bmap_t> m_bits;
+public:
+ static bmap_t full_bmask();
+ static int64_t size();
+ static bmap_t empty_bmask();
+ static bmap_t align_mask(int x);
+ bmap_t bit_mask(int bit_num);
+ bmap_t atomic_fetch();
+ BmapEntry(bool val);
+
+ void clear_bit(int bit);
+ void clear_bits(int offset, int num_bits);
+ void set_bits(int offset, int num_bits);
+ bool check_n_set_bit(int bit);
+ bool check_bit(int bit);
+ bool is_allocated(int64_t start_bit, int64_t num_bits);
+
+ int find_n_cont_bits(int start_offset, int64_t num_bits);
+ int find_n_free_bits(int start_idx, int64_t max_bits,
+ int *free_bit, int *end_idx);
+ int find_first_set_bits(int64_t required_blocks, int bit_offset,
+ int *start_offset, int64_t *scanned);
+
+ int find_any_free_bits(int start_offset, int64_t num_blocks,
+ int64_t *alloc_list, int64_t block_offset,
+ int64_t *scanned);
+
+ virtual ~BmapEntry();
+
+};
+
+class BmapList: public BmapEntityList {
+
+ std::atomic<int64_t> m_marker;
+public:
+ BmapList(BmapEntity **bmaps, int len, int64_t marker):
+ BmapEntityList(bmaps, len) {
+ m_marker = marker;
+ }
+
+ int64_t incr_marker(int64_t add);
+ void set_marker(int64_t val);
+ int64_t get_marker();
+};
+
+
+class BitMapZone: public BmapEntity {
+
+private:
+ int64_t m_total_blocks;
+ int64_t m_zone_num;
+ std::atomic<int64_t> m_used_blocks;
+
+ BmapEntityList *m_bmap_list;
+
+ enum {ZONE_FREE = 0, ZONE_ACTIVE = 1} m_state;
+ std::mutex m_lock;
+
+ int64_t alloc_cont_bits(int64_t num_blocks, BmapEntityListIter *iter, int64_t *bmap_out_idx);
+ void free_blocks_int(int64_t start_block, int64_t num_blocks);
+ void init(int64_t zone_num, int64_t total_blocks, bool def);
+
+public:
+ BitMapZone(int64_t zone_num, int64_t total_blocks);
+ BitMapZone(int64_t zone_num, int64_t total_blocks, bool def);
+ bool lock_zone(bool wait);
+ void unlock_zone();
+
+ int64_t get_used_blocks();
+
+ int64_t add_used_blocks(int64_t blks);
+ int64_t sub_used_blocks(int64_t blks);
+
+ int64_t alloc_blocks(int64_t num_blocks, int64_t *start_block);
+ void set_blocks_used(int64_t start_block, int64_t num_blocks);
+ void free_blocks(int64_t start_block, int64_t num_blocks);
+
+ int64_t alloc_blocks_dis(int64_t num_blocks, int64_t *allocated_blocks);
+
+ bool is_exhausted();
+ bool is_allocated(int64_t stat_block, int64_t num_blocks);
+ int64_t get_index();
+ int64_t size();
+ void reset_marker();
+
+ virtual ~BitMapZone();
+};
+
+typedef enum bmap_alloc_mode {
+ SERIAL = 1,
+ CONCURRENT = 2,
+} bmap_alloc_mode_t;
+
+
+class ZoneList : public BmapEntityList {
+
+private:
+ int64_t m_marker;
+ std::mutex m_marker_mutex;
+
+public:
+
+ ZoneList(BmapEntity **list, int64_t len);
+ ZoneList(BmapEntity **list, int64_t len, int64_t marker);
+ int64_t incr_marker(int64_t add);
+ int64_t get_marker();
+ void set_marker(int64_t val);
+
+};
+
+class BitAllocator {
+private:
+ bmap_alloc_mode_t m_alloc_mode;
+ int64_t m_zone_size_blocks;
+ int64_t m_total_zones;
+
+ int64_t m_total_blocks;
+ std::atomic<int64_t> m_allocated_blocks;
+ int64_t m_reserved_blocks;
+ std::mutex m_res_blocks_lock;
+ BmapEntityList *m_zone_list;
+
+ enum {ALLOC_DESTROY = 0, ALLOC_ACTIVE = 1} m_state;
+
+ std::mutex m_serial_mutex;
+ pthread_rwlock_t m_alloc_slow_lock;
+
+ bool is_allocated(int64_t start_block, int64_t num_blocks);
+ bool is_allocated(int64_t *blocks, int64_t num_blocks);
+
+ int64_t alloc_blocks_int(int64_t num_blocks, int64_t *start_block, bool lock);
+ int64_t alloc_blocks_dis_int(int64_t num_blocks, int64_t *block_list, bool lock);
+ void free_blocks_int(int64_t start_block, int64_t num_blocks);
+ bool zone_free_to_alloc(BmapEntity *zone, int64_t required, bool lock);
+
+ void serial_lock();
+ void serial_unlock();
+
+ void alloc_lock(bool write);
+ void alloc_unlock();
+ bool check_input(int64_t num_blocks);
+ bool check_input_dis(int64_t num_blocks);
+ void unreserve(int64_t needed, int64_t allocated);
+ void init(int64_t total_blocks, int64_t zone_size_block, bmap_alloc_mode_t mode, bool def);
+
+public:
+
+ BitAllocator(int64_t total_blocks, int64_t zone_size_block, bmap_alloc_mode_t mode);
+ BitAllocator(int64_t total_blocks, int64_t zone_size_block, bmap_alloc_mode_t mode, bool def);
+ ~BitAllocator();
+ void shutdown();
+ int64_t alloc_blocks(int64_t num_blocks, int64_t *start_block);
+ int64_t alloc_blocks_res(int64_t num_blocks, int64_t *start_block);
+ void free_blocks(int64_t start_block, int64_t num_blocks);
+ void set_blocks_used(int64_t start_block, int64_t num_blocks);
+ int64_t sub_used_blocks(int64_t num_blocks);
+ int64_t add_used_blocks(int64_t num_blocks);
+ int64_t get_used_blocks();
+ int64_t get_reserved_blocks();
+ int64_t size();
+ bool reserve_blocks(int64_t blocks);
+ void unreserve_blocks(int64_t blocks);
+
+ int64_t alloc_blocks_dis(int64_t num_blocks, int64_t *block_list);
+ void free_blocks_dis(int64_t num_blocks, int64_t *block_list);
+};
+
+#endif //End of file
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Bitmap based in-memory allocator.
+ * Author: Ramesh Chander, Ramesh.Chander@sandisk.com
+ *
+ * TBD list:
+ * 1. Make commiting and un commiting lists concurrent.
+ */
+
+#include "BitAllocator.h"
+
+#include "BitMapAllocator.h"
+#include "bluestore_types.h"
+#include "BlueStore.h"
+
+#define dout_subsys ceph_subsys_bluestore
+#undef dout_prefix
+#define dout_prefix *_dout << "bitmapalloc:"
+
+#define NEXT_MULTIPLE(x, m) (!(x) ? 0: (((((x) - 1) / (m)) + 1) * (m)))
+
+BitMapAllocator::BitMapAllocator(int64_t device_size)
+ : m_num_uncommitted(0),
+ m_num_committing(0)
+{
+ int64_t block_size = g_conf->bluestore_min_alloc_size;
+ int64_t zone_size_blks = 1024; // Change it later
+
+ m_block_size = block_size;
+ m_bit_alloc = new BitAllocator(device_size / block_size,
+ zone_size_blks, CONCURRENT, true);
+ assert(m_bit_alloc);
+ if (!m_bit_alloc) {
+ dout(10) << __func__ << "Unable to intialize Bit Allocator" << dendl;
+ }
+ dout(10) << __func__ <<" instance "<< (uint64_t) this <<
+ " size " << device_size << dendl;
+}
+
+BitMapAllocator::~BitMapAllocator()
+{
+ delete m_bit_alloc;
+}
+
+void BitMapAllocator::insert_free(uint64_t off, uint64_t len)
+{
+ dout(20) << __func__ <<" instance "<< (uint64_t) this <<
+ " offset " << off << " len "<< len << dendl;
+
+ assert(!(off % m_block_size));
+ assert(!(len % m_block_size));
+
+ m_bit_alloc->free_blocks(off / m_block_size,
+ len / m_block_size);
+}
+
+int BitMapAllocator::reserve(uint64_t need)
+{
+ int nblks = need / m_block_size; // apply floor
+ assert(!(need % m_block_size));
+ dout(10) << __func__ <<" instance "<< (uint64_t) this <<
+ " num_used " << m_bit_alloc->get_used_blocks() <<
+ " total " << m_bit_alloc->size() << dendl;
+
+ if (m_bit_alloc->reserve_blocks(nblks)) {
+ return ENOSPC;
+ }
+ return 0;
+}
+
+void BitMapAllocator::unreserve(uint64_t unused)
+{
+ int nblks = unused / m_block_size;
+ assert(!(unused % m_block_size));
+
+ dout(10) << __func__ <<" instance "<< (uint64_t) this <<
+ " unused " << nblks <<
+ " num used " << m_bit_alloc->get_used_blocks() <<
+ " total " << m_bit_alloc->size() << dendl;
+
+ m_bit_alloc->unreserve_blocks(nblks);
+}
+
+int BitMapAllocator::allocate(
+ uint64_t want_size, uint64_t alloc_unit, int64_t hint,
+ uint64_t *offset, uint32_t *length)
+{
+
+ assert(!(alloc_unit % m_block_size));
+ int64_t len = NEXT_MULTIPLE(want_size, m_block_size);
+ int64_t nblks = len / m_block_size;
+
+ assert(alloc_unit);
+
+ int64_t start_blk = 0;
+ int64_t count = 0;
+
+ dout(10) << __func__ <<" instance "<< (uint64_t) this
+ << " want_size " << want_size
+ << " alloc_unit " << alloc_unit
+ << " hint " << hint
+ << dendl;
+
+ *offset = 0;
+ *length = 0;
+
+ count = m_bit_alloc->alloc_blocks_res(nblks, &start_blk);
+ if (count == 0) {
+ return ENOSPC;
+ }
+ *offset = start_blk * m_block_size;
+ *length = count * m_block_size;
+
+ dout(20) << __func__ <<" instance "<< (uint64_t) this
+ << " offset " << *offset << " length " << *length << dendl;
+
+ return 0;
+}
+
+int BitMapAllocator::release(
+ uint64_t offset, uint64_t length)
+{
+ std::lock_guard<std::mutex> l(m_lock);
+ dout(10) << __func__ << " " << offset << "~" << length << dendl;
+ m_uncommitted.insert(offset, length);
+ m_num_uncommitted += length;
+ return 0;
+}
+
+uint64_t BitMapAllocator::get_free()
+{
+ return ((
+ m_bit_alloc->size() - m_bit_alloc->get_used_blocks()) *
+ m_block_size);
+}
+
+void BitMapAllocator::dump(ostream& out)
+{
+ std::lock_guard<std::mutex> l(m_lock);
+ dout(30) << __func__ <<" instance "<< (uint64_t) this
+ << " committing: " << m_committing.num_intervals()
+ << " extents" << dendl;
+
+ for (auto p = m_committing.begin();
+ p != m_committing.end(); ++p) {
+ dout(30) << __func__ <<" instance "<< (uint64_t) this
+ << " " << p.get_start() << "~" << p.get_len() << dendl;
+ }
+ dout(30) << __func__ <<" instance "<< (uint64_t) this
+ << " uncommitted: " << m_uncommitted.num_intervals()
+ << " extents" << dendl;
+
+ for (auto p = m_uncommitted.begin();
+ p != m_uncommitted.end(); ++p) {
+ dout(30) << __func__ << " " << p.get_start() << "~" << p.get_len() << dendl;
+ }
+}
+
+void BitMapAllocator::init_add_free(uint64_t offset, uint64_t length)
+{
+ dout(10) << __func__ <<" instance "<< (uint64_t) this <<
+ " offset " << offset << " length " << length << dendl;
+
+ insert_free(NEXT_MULTIPLE(offset, m_block_size),
+ (length / m_block_size) * m_block_size);
+}
+
+void BitMapAllocator::init_rm_free(uint64_t offset, uint64_t length)
+{
+ dout(10) << __func__ <<" instance "<< (uint64_t) this <<
+ " offset " << offset << " length " << length << dendl;
+
+ assert(!(offset % m_block_size));
+ assert(!(offset % m_block_size));
+
+ int64_t first_blk = offset / m_block_size;
+ int64_t count = length / m_block_size;
+
+ m_bit_alloc->set_blocks_used(first_blk, count);
+}
+
+
+void BitMapAllocator::shutdown()
+{
+ dout(10) << __func__ <<" instance "<< (uint64_t) this << dendl;
+ m_bit_alloc->shutdown();
+ //delete m_bit_alloc; //Fix this
+}
+
+void BitMapAllocator::commit_start()
+{
+ std::lock_guard<std::mutex> l(m_lock);
+
+ dout(10) << __func__ <<" instance "<< (uint64_t) this
+ << " releasing " << m_num_uncommitted
+ << " in extents " << m_uncommitted.num_intervals() << dendl;
+ assert(m_committing.empty());
+ m_committing.swap(m_uncommitted);
+ m_num_committing = m_num_uncommitted;
+ m_num_uncommitted = 0;
+}
+
+void BitMapAllocator::commit_finish()
+{
+ std::lock_guard<std::mutex> l(m_lock);
+ dout(10) << __func__ <<" instance "<< (uint64_t) this
+ << " released " << m_num_committing
+ << " in extents " << m_committing.num_intervals() << dendl;
+ for (auto p = m_committing.begin();
+ p != m_committing.end();
+ ++p) {
+ insert_free(p.get_start(), p.get_len());
+ }
+ m_committing.clear();
+ m_num_committing = 0;
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_OS_BLUESTORE_BITMAPALLOCATOR_H
+#define CEPH_OS_BLUESTORE_BITMAPALLOCATOR_H
+
+#include <mutex>
+
+#include "Allocator.h"
+#include "BitAllocator.h"
+#include "include/btree_interval_set.h"
+
+class BitMapAllocator : public Allocator {
+ std::mutex m_lock;
+
+ int64_t m_num_uncommitted;
+ int64_t m_num_committing;
+ int64_t m_block_size;
+ int64_t m_num_reserved;
+
+ btree_interval_set<uint64_t> m_uncommitted; ///< released but not yet usable
+ btree_interval_set<uint64_t> m_committing; ///< released but not yet usable
+ BitAllocator *m_bit_alloc; // Bit allocator instance
+
+ void insert_free(uint64_t offset, uint64_t len);
+
+public:
+ BitMapAllocator();
+ BitMapAllocator(int64_t device_size);
+ ~BitMapAllocator();
+
+ int reserve(uint64_t need);
+ void unreserve(uint64_t unused);
+
+ int allocate(
+ uint64_t want_size, uint64_t alloc_unit, int64_t hint,
+ uint64_t *offset, uint32_t *length);
+
+ int release(
+ uint64_t offset, uint64_t length);
+
+ void commit_start();
+ void commit_finish();
+
+ uint64_t get_free();
+
+ void dump(std::ostream& out);
+
+ void init_add_free(uint64_t offset, uint64_t length);
+ void init_rm_free(uint64_t offset, uint64_t length);
+
+ void shutdown();
+};
+
+#endif
unittest_bluefs_CXXFLAGS = $(UNITTEST_CXXFLAGS)
check_TESTPROGRAMS += unittest_bluefs
+unittest_bit_alloc_SOURCES = test/objectstore/BitAllocator_test.cc
+unittest_bit_alloc_LDADD = $(LIBOS) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
+unittest_bluefs_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+check_TESTPROGRAMS += unittest_bit_alloc
+
unittest_bluestore_types_SOURCES = test/objectstore/test_bluestore_types.cc
unittest_bluestore_types_LDADD = $(LIBOS) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
unittest_bluestore_types_CXXFLAGS = $(UNITTEST_CXXFLAGS)
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Bitmap based in-memory allocator unit test cases.
+ * Author: Ramesh Chander, Ramesh.Chander@sandisk.com
+ */
+
+#include "os/bluestore/BitAllocator.h"
+#include <stdio.h>
+#include <assert.h>
+#define debug_assert assert
+#define NUM_THREADS 16
+#define MAX_BLOCKS (1024 * 1024 * 4)
+
+void test_bmap_iter()
+{
+ int num_items = 5;
+ int off = 2;
+ class BmapEntityTmp : public BmapEntity {
+ int64_t m_num;
+ int64_t m_len;
+ public:
+ BmapEntityTmp(int num) {
+ m_num = num;
+ m_len = num;
+ }
+
+ int64_t get_index() {
+ return m_num;
+ }
+ bool is_allocated(int64_t s, int64_t num)
+ {
+ return true;
+ }
+ };
+
+ class BmapEntityListTmp: public BmapEntityList {
+ private:
+ int64_t m_marker;
+ public:
+ BmapEntityListTmp(BmapEntity **bmaps, int len, int64_t marker):
+ BmapEntityList(bmaps, len) {
+ m_marker = marker;
+ }
+
+ int64_t incr_marker(int64_t add)
+ {
+ return m_marker++;
+ }
+ void set_marker(int64_t val)
+ {
+ m_marker = val;
+ }
+ int64_t get_marker()
+ {
+ return m_marker;
+ }
+
+ };
+
+ int i = 0;
+ BmapEntityTmp *obj = NULL;
+
+ BmapEntity **bmap = new BmapEntity*[num_items];
+ for (i = 0; i < num_items; i++) {
+ bmap[i] = new BmapEntityTmp(i);
+ }
+
+ BmapEntityList *list = new BmapEntityListTmp(bmap, num_items, 0);
+ BmapEntityListIter *iter = new BmapEntityListIter(list, off, false);
+
+ i = off;
+ int count = 0;
+ int64_t last_idx = off;
+ while ((obj = (BmapEntityTmp*) iter->next())) {
+ debug_assert(obj->get_index() == last_idx);
+ debug_assert(obj->get_index() == i);
+ debug_assert(obj == bmap[i]);
+ last_idx = iter->index();
+ i++;
+ count++;
+ }
+ debug_assert(i == num_items);
+ debug_assert(count == num_items - off);
+
+ delete iter;
+
+ iter = new BmapEntityListIter(list, off, true);
+
+ i = off;
+ last_idx = off;
+ count = 0;
+ while ((obj = (BmapEntityTmp*) iter->next())) {
+ debug_assert(obj->get_index() == last_idx);
+ debug_assert(obj->get_index() == i);
+ debug_assert(obj == bmap[i]);
+ last_idx = iter->index();
+
+
+ i = (i + 1) % num_items;
+ count++;
+ }
+ debug_assert(i == off);
+ debug_assert(count == num_items);
+
+
+ num_items = 4;
+ off = num_items - 1;
+
+ //list = new BmapEntityList(bmap, num_items);
+ list = new BmapEntityListTmp(bmap, num_items, 0);
+
+ iter = new BmapEntityListIter(list, off, true);
+ i = off;
+ last_idx = off;
+ count = 0;
+ while ((obj = (BmapEntityTmp*) iter->next())) {
+ debug_assert(obj->get_index() == last_idx);
+ debug_assert(obj->get_index() == i);
+ debug_assert(obj == bmap[i]);
+ last_idx = iter->index();
+ i = (i + 1) % num_items;
+ count++;
+ }
+ debug_assert(i == off);
+ debug_assert(count == num_items);
+}
+
+void test_bmap_entry()
+{
+ int i = 0;
+ int start = 0;
+ int64_t scanned = 0;
+ int64_t allocated = 0;
+ int size = BmapEntry::size();
+
+ BmapEntry *bmap = new BmapEntry(true);
+
+ // Clear bits one by one and check they are cleared
+ for (i = 0; i < size; i++) {
+ bmap->clear_bit(i);
+ debug_assert(!bmap->check_bit(i));
+ }
+
+ // Set all bits again using set_bits
+ bmap->set_bits(0, size);
+
+ // clear 4 bits at a time and then check allocated
+ for (i = 0; i < size/4; i++) {
+ bmap->clear_bits(i * 4, 4);
+ debug_assert(!bmap->is_allocated(i * 4, 4));
+ }
+
+ // set all bits again
+ bmap->set_bits(0, size);
+
+ // clear alternate bits, check and set those bits
+ for (i = 0; i < size/2; i++) {
+ bmap->clear_bit(i * 2 + 1);
+ debug_assert(!bmap->check_bit(i * 2 + 1));
+ debug_assert(bmap->check_n_set_bit(i * 2 + 1));
+ }
+
+ // free 1, 2 and size bits at a time and try to find n cont bits
+ for (i = 0; i < size / 4; i++) {
+ bmap->clear_bits(i * 2 + 1, i + 1);
+ debug_assert(!bmap->check_bit(i * 2 + 1));
+ debug_assert(bmap->find_n_cont_bits(i * 2 + 1, i + 1) ==
+ i + 1);
+ }
+
+ // free 1, 2 and size bits at a time and try to find any cont bits
+ for (i = 0; i < size / 4; i++) {
+ bmap->clear_bits(i * 2 + 1, i + 1);
+ debug_assert(!bmap->is_allocated(i * 2 + 1, i + 1));
+ }
+
+ for (i = 0; i < size / 4; i++) {
+ bmap->clear_bits(i * 2 + 1, i + 1);
+ allocated = bmap->find_first_set_bits(i + 1, 0, &start, &scanned);
+
+ debug_assert(allocated == i + 1);
+ debug_assert(scanned == ((i * 2 + 1) + (i + 1)));
+ debug_assert(start == i * 2 + 1);
+ bmap->set_bits(0, BmapEntry::size());
+
+ }
+
+ // Find few bits at end of bitmap and find those
+ bmap->clear_bits(0, 4);
+ bmap->clear_bits(BmapEntry::size() - 12, 5);
+ bmap->clear_bits(BmapEntry::size() - 6, 6);
+ allocated = bmap->find_first_set_bits(6, 0, &start, &scanned);
+
+ debug_assert(allocated == 6);
+ debug_assert(scanned == BmapEntry::size() - 6 + 6);
+ debug_assert(start == BmapEntry::size() - 6);
+ debug_assert(bmap->is_allocated(start, 6));
+
+ delete bmap;
+ bmap = new BmapEntry(false);
+ bmap->set_bits(4, BmapEntry::size() - 4);
+ debug_assert(bmap->is_allocated(4, BmapEntry::size() - 4));
+ debug_assert(!bmap->is_allocated(0, 4));
+ bmap->set_bits(0, 4);
+ debug_assert(bmap->is_allocated(0, BmapEntry::size()));
+
+
+}
+
+void test_zone_alloc()
+{
+ int total_blocks = 1024;
+ int64_t blks = 1;
+ int64_t last_blk = -1;
+ int64_t start_blk = 0;
+ int64_t allocated = 0;
+
+ BitMapZone *zone = new BitMapZone(0, total_blocks);
+
+ // Allocate all blocks and see that it is allocating in order.
+ for (int i = 0; i < total_blocks; i++) {
+ allocated = zone->alloc_blocks(blks, &start_blk);
+ debug_assert(last_blk + 1 == start_blk);
+ debug_assert(allocated == blks);
+ last_blk = start_blk;
+ }
+ debug_assert(zone->get_used_blocks() == total_blocks);
+
+ for (int i = 0; i < total_blocks; i++) {
+ debug_assert(zone->get_used_blocks() == total_blocks - i);
+ zone->free_blocks(i, blks);
+ }
+
+ blks = 2;
+ last_blk = -2;
+ debug_assert(zone->is_exhausted());
+ for (int i = 0; i < total_blocks/2; i++) {
+ allocated = zone->alloc_blocks(blks, &start_blk);
+ debug_assert(last_blk + 2 == start_blk);
+ last_blk = start_blk;
+ }
+
+ // Free different boundaries and allocate those
+ blks = 3;
+ debug_assert(zone->is_exhausted());
+ zone->free_blocks(BmapEntry::size() - blks, blks);
+ zone->free_blocks(BmapEntry::size(), blks);
+
+ allocated = zone->alloc_blocks(blks * 2, &start_blk);
+ debug_assert(BmapEntry::size() - blks == start_blk);
+ debug_assert(allocated == blks * 2);
+
+ blks = 4;
+ zone->free_blocks(BmapEntry::size() * 2 - blks, 2 * blks);
+ allocated = zone->alloc_blocks(2 * blks, &start_blk);
+ debug_assert(BmapEntry::size() * 2 - blks == start_blk);
+ debug_assert(allocated == blks * 2);
+
+ zone->reset_marker();
+ blks = BmapEntry::size() * 2;
+ zone->free_blocks(BmapEntry::size() * 6 - blks, blks);
+ allocated = zone->alloc_blocks(blks, &start_blk);
+ debug_assert(BmapEntry::size() * 6 - blks == start_blk);
+
+ // free blocks at distance 1, 2 up to 63 and allocate all of them
+ // together using disc alloc.
+ zone->reset_marker();
+ blks = BmapEntry::size() * 2;
+ int num_bits = 1;
+ for (int i = 0; i < zone->size() / BmapEntry::size() -1; i++) {
+
+ zone->free_blocks(i * BmapEntry::size(), num_bits);
+ num_bits++;
+ }
+
+ zone->reset_marker();
+ num_bits = 1;
+ int64_t start_block = 0;
+ for (int i = 0; i < zone->size() / BmapEntry::size() -1; i++) {
+ allocated = zone->alloc_blocks(num_bits, &start_block);
+ debug_assert(num_bits == allocated);
+ debug_assert(start_block == i * BmapEntry::size());
+ num_bits++;
+ }
+
+ start_block = 0;
+ num_bits = 1;
+ for (int i = 0; i < zone->size() / BmapEntry::size() -1; i++) {
+ zone->free_blocks(i * BmapEntry::size(), num_bits);
+ num_bits++;
+ }
+
+ delete zone;
+ // non-conti allocations test
+ zone = new BitMapZone(0, total_blocks);
+ int64_t blocks[1024] = {0};
+ for (int i = 0; i < zone->size(); i++) {
+ allocated = zone->alloc_blocks(1, &start_block);
+ debug_assert(allocated == 1);
+ }
+ for (int i = 0; i < zone->size(); i += 2) {
+ zone->free_blocks(i, 1);
+ }
+
+ zone->reset_marker();
+ allocated = zone->alloc_blocks_dis(zone->size() / 2, blocks);
+ debug_assert(allocated == zone->size() / 2);
+}
+
+void test_bitmap_alloc() {
+ int64_t total_blocks = 1024 * 4;
+ int64_t zone_size = 1024;
+ int64_t allocated = 0;
+ int64_t start_block = 0;
+
+ BitAllocator *alloc = new BitAllocator(total_blocks, zone_size, CONCURRENT);
+
+ for (int64_t iter = 0; iter < 4; iter++) {
+ for (int64_t i = 0; i < total_blocks; i++) {
+ allocated = alloc->alloc_blocks(1, &start_block);
+ debug_assert(allocated == 1);
+ debug_assert(start_block == i);
+ }
+
+ for (int64_t i = 0; i < total_blocks; i++) {
+ alloc->free_blocks(i, 1);
+ }
+ }
+
+ for (int64_t iter = 0; iter < 4; iter++) {
+ for (int64_t i = 0; i < total_blocks / zone_size; i++) {
+ allocated = alloc->alloc_blocks(zone_size, &start_block);
+ debug_assert(allocated == zone_size);
+ debug_assert(start_block == i * zone_size);
+ }
+
+ for (int64_t i = 0; i < total_blocks / zone_size; i++) {
+ alloc->free_blocks(i * zone_size, zone_size);
+ }
+ }
+
+ allocated = alloc->alloc_blocks(1, &start_block);
+ debug_assert(allocated == 1);
+
+ allocated = alloc->alloc_blocks(zone_size - 1, &start_block);
+ debug_assert(allocated == zone_size - 1);
+ debug_assert(start_block == 1);
+
+ allocated = alloc->alloc_blocks(1, &start_block);
+ debug_assert(allocated == 1);
+
+ allocated = alloc->alloc_blocks(zone_size, &start_block);
+ debug_assert(allocated == zone_size);
+ debug_assert(start_block == zone_size * 2);
+
+ // Dis contiguous blocks allocations
+ delete alloc;
+ alloc = new BitAllocator(total_blocks, zone_size, CONCURRENT);
+
+ int64_t blocks[2048] = {0};
+ for (int64_t i = 0; i < alloc->size(); i++) {
+ allocated = alloc->alloc_blocks(1, &start_block);
+ debug_assert(allocated == 1);
+ }
+ for (int i = 0; i < alloc->size(); i += 2) {
+ alloc->free_blocks(i, 1);
+ }
+
+ allocated = alloc->alloc_blocks_dis(alloc->size()/2, blocks);
+ debug_assert(allocated == alloc->size() / 2);
+
+ allocated = alloc->alloc_blocks_dis(1, blocks);
+ debug_assert(allocated == 0);
+
+ alloc->free_blocks(alloc->size() / 2, 1);
+ allocated = alloc->alloc_blocks_dis(1, blocks);
+
+ debug_assert(allocated == 1);
+ debug_assert(blocks[0] == alloc->size()/2);
+
+ delete alloc;
+ alloc = new BitAllocator(1024, zone_size, CONCURRENT, true);
+
+ alloc->free_blocks(1, 1023);
+ alloc->alloc_blocks(16, &start_block);
+}
+
+void
+verify_blocks(int64_t num_blocks, int64_t *blocks)
+{
+ int64_t i = 0;
+ int wraps = 0;
+ for (i = 0; i < num_blocks - 1; i++) {
+ if (blocks[i] > blocks[i + 1]) {
+ wraps++;
+ debug_assert(wraps <= 1);
+ }
+ }
+}
+
+__thread int my_tid;
+__thread int64_t allocated_blocks[MAX_BLOCKS];
+
+void
+do_work(BitAllocator *alloc)
+{
+ int num_iters = 10;
+ int64_t alloced = 0;
+ int64_t start_block = -1;
+ uint64_t alloc_unit = 1;
+ int64_t num_blocks = alloc->size() / NUM_THREADS;
+ int total_alloced = 0;
+
+ while (num_iters--) {
+ printf("Allocating in tid %d.\n", my_tid);
+ for (int i = 0; i < num_blocks; i++) {
+ alloced = alloc->alloc_blocks(1, &start_block);
+ debug_assert(alloced == 1);
+ total_alloced++;
+ allocated_blocks[i] = start_block;
+ }
+
+ printf("Freeing in tid %d %d blocks.\n", my_tid, total_alloced);
+ for (int i = 0; i < num_blocks; i++) {
+ alloc->free_blocks(allocated_blocks[i], 1);
+ }
+
+ total_alloced = 0;
+ printf("Done tid %d iter %d.\n", my_tid, num_iters);
+ }
+}
+
+int tid = 0;
+void *
+worker(void *args)
+{
+ my_tid = __sync_fetch_and_add(&tid, 1);
+ BitAllocator *alloc = (BitAllocator *) args;
+ printf("Starting thread %d", my_tid);
+ do_work(alloc);
+ return NULL;
+}
+
+void test_bmap_alloc_concurrent()
+{
+
+ // Create an allocator
+ int64_t total_blocks = MAX_BLOCKS;
+ int64_t zone_size = 1024;
+ int64_t allocated = 0;
+ int64_t start_block = 0;
+ pthread_t pthreads[NUM_THREADS] = {0};
+
+ debug_assert(total_blocks <= MAX_BLOCKS);
+
+ BitAllocator *alloc = new BitAllocator(total_blocks, zone_size, CONCURRENT);
+
+ // Create N threads and each thread allocates at max
+ printf("Spawning %d threads for parallel test.....\n", NUM_THREADS);
+
+ for (int j = 0; j < NUM_THREADS; j++) {
+ if (pthread_create(&pthreads[j], NULL, worker, alloc)) {
+ printf("Unable to create worker thread.\n");
+ exit(0);
+ }
+ }
+
+ for (int j = 0; j < NUM_THREADS; j++) {
+ pthread_join(pthreads[j], NULL);
+ }
+
+ // max_blks / num threads and free those. Make sure threads
+ // always gets blocks
+
+ // Do this with dis-contiguous and contiguous allocations
+
+
+ // do multithreaded allocation and check allocations are unique
+
+}
+
+int main()
+{
+ test_bmap_entry();
+ test_bmap_iter();
+ test_zone_alloc();
+ test_bitmap_alloc();
+ test_bmap_alloc_concurrent();
+
+
+ printf("All tests done : SUCCESS.\n");
+}
add_ceph_unittest(unittest_rocksdb_option ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_rocksdb_option)
target_link_libraries(unittest_rocksdb_option global os ${BLKID_LIBRARIES})
+# unittest_bit_alloc
+add_executable(unittest_bit_alloc EXCLUDE_FROM_ALL
+ BitAllocator_test.cc
+ )
+add_ceph_unittest(unittes_bit_alloc ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_bit_alloc)
+target_link_libraries(unittest_bit_alloc os global)
+
# unittest_bluefs
add_executable(unittest_bluefs EXCLUDE_FROM_ALL
test_bluefs.cc