From 81acdd1a077c6ef9042e912a9fc7ff106f6a54c1 Mon Sep 17 00:00:00 2001 From: Ramesh Chander Date: Tue, 10 May 2016 02:32:24 -0700 Subject: [PATCH] os/bluestore: bitmap allocator Signed-off-by: Ramesh Chander --- src/CMakeLists.txt | 2 + src/os/Makefile.am | 4 + src/os/bluestore/Allocator.cc | 6 +- src/os/bluestore/BitAllocator.cc | 1236 +++++++++++++++++++++ src/os/bluestore/BitAllocator.h | 250 +++++ src/os/bluestore/BitMapAllocator.cc | 217 ++++ src/os/bluestore/BitMapAllocator.h | 55 + src/test/Makefile-server.am | 5 + src/test/objectstore/BitAllocator_test.cc | 493 ++++++++ src/test/objectstore/CMakeLists.txt | 7 + 10 files changed, 2274 insertions(+), 1 deletion(-) create mode 100644 src/os/bluestore/BitAllocator.cc create mode 100644 src/os/bluestore/BitAllocator.h create mode 100644 src/os/bluestore/BitMapAllocator.cc create mode 100644 src/os/bluestore/BitMapAllocator.h create mode 100644 src/test/objectstore/BitAllocator_test.cc diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fc872f1a489..7833502fa5e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -683,6 +683,8 @@ set(libos_srcs os/bluestore/FreelistManager.cc os/bluestore/KernelDevice.cc os/bluestore/StupidAllocator.cc + os/bluestore/BitMapAllocator.cc + os/bluestore/BitAllocator.cc os/fs/FS.cc ${libos_xfs_srcs}) if(${HAVE_LIBFUSE}) diff --git a/src/os/Makefile.am b/src/os/Makefile.am index dc0601bcb78..98186e271cc 100644 --- a/src/os/Makefile.am +++ b/src/os/Makefile.am @@ -46,6 +46,8 @@ libos_a_SOURCES += \ os/bluestore/ExtentFreelistManager.cc \ os/bluestore/FreelistManager.cc \ os/bluestore/KernelDevice.cc \ + os/bluestore/BitMapAllocator.cc \ + os/bluestore/BitAllocator.cc \ os/bluestore/StupidAllocator.cc endif @@ -116,6 +118,8 @@ noinst_HEADERS += \ os/bluestore/KernelDevice.h \ os/bluestore/ExtentFreelistManager.h \ os/bluestore/FreelistManager.h \ + os/bluestore/BitMapAllocator.h \ + os/bluestore/BitAllocator.h \ os/bluestore/StupidAllocator.h endif diff --git a/src/os/bluestore/Allocator.cc b/src/os/bluestore/Allocator.cc index 79c51c43635..034fca660e0 100644 --- a/src/os/bluestore/Allocator.cc +++ b/src/os/bluestore/Allocator.cc @@ -3,14 +3,18 @@ #include "Allocator.h" #include "StupidAllocator.h" +#include "BitMapAllocator.h" #include "common/debug.h" #define dout_subsys ceph_subsys_bluestore Allocator *Allocator::create(string type, int64_t size) { - if (type == "stupid") + if (type == "stupid") { return new StupidAllocator; + } else if (type == "bitmap") { + return new BitMapAllocator(size); + } derr << "Allocator::" << __func__ << " unknown alloc type " << type << dendl; return NULL; } diff --git a/src/os/bluestore/BitAllocator.cc b/src/os/bluestore/BitAllocator.cc new file mode 100644 index 00000000000..f88bae7468c --- /dev/null +++ b/src/os/bluestore/BitAllocator.cc @@ -0,0 +1,1236 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Bitmap based in-memory allocator. + * Author: Ramesh Chander, Ramesh.Chander@sandisk.com + */ + +#include "BitAllocator.h" +#include + +#define debug_assert assert +#define MIN(x, y) ((x) > (y) ? (y) : (x)) + +/* + * BmapEntityList functions. + */ +BmapEntityListIter::BmapEntityListIter(BmapEntityList *list) +{ + m_list = list; + m_start_idx = list->get_marker(); + m_cur_idx = m_start_idx; + m_wrap = false; +} + +BmapEntityListIter::BmapEntityListIter(BmapEntityList *list, bool wrap) +{ + m_list = list; + m_start_idx = list->get_marker(); + m_cur_idx = m_start_idx; + m_wrap = wrap; +} + +BmapEntityListIter::BmapEntityListIter(BmapEntityList *list, int64_t start_idx) +{ + m_list = list; + m_wrap = false; + m_start_idx = start_idx; + m_cur_idx = m_start_idx; + m_wrapped = false; +} + +BmapEntityListIter::BmapEntityListIter(BmapEntityList *list, int64_t start_idx, bool wrap) +{ + m_list = list; + m_wrap = wrap; + m_start_idx = start_idx; + m_cur_idx = m_start_idx; + m_wrapped = false; +} + +BmapEntity * BmapEntityListIter::next() +{ + int64_t cur_idx = m_cur_idx; + + if (m_wrapped && + cur_idx == m_start_idx) { + /* + * End of wrap cycle + */ + return NULL; + } + m_cur_idx++; + + if (m_cur_idx == m_list->size() + &&m_wrap) { + m_cur_idx %= m_list->size(); + m_wrapped = true; + } + + if (cur_idx == m_list->size()) { + /* + * End of list + */ + return NULL; + } + debug_assert(cur_idx < m_list->size()); + return m_list->get_nth_item(cur_idx); +} + +int64_t BmapEntityListIter::index() +{ + return m_cur_idx; +} + +void BmapEntityListIter::decr_idx() +{ + m_cur_idx--; + debug_assert(m_cur_idx > 0); +} + +/* + * Bitmap Entry functions. + */ +BmapEntry::BmapEntry(bool full) +{ + if (full) { + m_bits = BmapEntry::full_bmask(); + } else { + m_bits = BmapEntry::empty_bmask(); + } +} + +BmapEntry::~BmapEntry() +{ + +} + +bmap_t BmapEntry::full_bmask() { + return (bmap_t) -1; +} + +int64_t BmapEntry::size() { + return (sizeof(bmap_t) * 8); +} + +bmap_t BmapEntry::empty_bmask() { + return (bmap_t) 0; +} + +bmap_t BmapEntry::align_mask(int x) +{ + return ((x) >= BmapEntry::size()? (bmap_t) -1 : (~(((bmap_t) -1) >> (x)))); +} + +bmap_t BmapEntry::bit_mask(int bit) +{ + return ((bmap_t) 0x1 << ((BmapEntry::size() - 1) - bit)); +} +bool BmapEntry::check_bit(int bit) +{ + return (m_bits & bit_mask(bit)); +} + +bmap_t BmapEntry::atomic_fetch() +{ + return std::atomic_load(&m_bits); +} + +bool BmapEntry::is_allocated(int64_t start_bit, int64_t num_bits) +{ + for (int i = start_bit; i < num_bits + start_bit; i++) { + if (!check_bit(i)) { + return false; + } + } + return true; +} + +void BmapEntry::clear_bit(int bit) +{ + bmap_t bmask = bit_mask(bit); + (void) std::atomic_fetch_and(&m_bits, ~(bmask)); +} + +void BmapEntry::clear_bits(int offset, int num_bits) +{ + if (num_bits == 0) { + return; + } + bmap_t bmask = BmapEntry::align_mask(num_bits) >> offset; + (void) std::atomic_fetch_and(&m_bits, ~(bmask)); +} + +void BmapEntry::set_bits(int offset, int num_bits) +{ + for (int i = 0; i < num_bits; i++) { + bmap_t bmask = bit_mask(i + offset); + (void) std::atomic_fetch_or(&m_bits, bmask); + } +} + +/* + * Allocate a bit if it was free. + * Retruns true if it was free. + */ +bool BmapEntry::check_n_set_bit(int bit) +{ + bmap_t bmask = bit_mask(bit); + return !(atomic_fetch_or(&m_bits, bmask) & bmask); +} + +/* + * Find N cont free bits in BitMap starting from an offset. + * + * Returns number of continuous bits found. + */ +int BmapEntry::find_n_cont_bits(int start_offset, int64_t num_bits) +{ + int count = 0; + int i = 0; + + if (num_bits == 0) { + return 0; + } + + if (start_offset >= BmapEntry::size()) { + return 0; + } + + for (i = start_offset; i < BmapEntry::size() && count < num_bits; i++) { + if (!check_n_set_bit(i)) { + break; + } + count++; + } + + return count; +} + +/* + * Find N free bits starting search from an given offset. + * + * Returns number of bits found, start bit and end of + * index next to bit where our search ended + 1. + */ +int BmapEntry::find_n_free_bits(int start_idx, int64_t max_bits, + int *free_bit, int *end_idx) +{ + int i = 0; + int count = 0; + + *free_bit = 0; + debug_assert(max_bits > 0); + + /* + * Find free bit aligned to bit_align return the bit_num in free_bit. + */ + if (atomic_fetch() == BmapEntry::full_bmask()) { + /* + * All bits full, return fail. + */ + *end_idx = BmapEntry::size(); + return 0; + } + + /* + * Do a serial scan on bitmap. + */ + for (i = start_idx; i < BmapEntry::size(); i++) { + if (check_n_set_bit(i)) { + /* + * Found first free bit + */ + *free_bit = i; + count++; + break; + } + } + count += find_n_cont_bits(i + 1, max_bits - 1); + + (*end_idx) = i + count; + return count; +} + +/* + * Find first series of contiguous bits free in bitmap starting + * from start offset that either + * satisfy our need or are touching right edge of bitmap. + * + * Returns allocated bits, start bit of allocated, number of bits + * scanned from start offset. + */ +int +BmapEntry::find_first_set_bits(int64_t required_blocks, + int bit_offset, int *start_offset, + int64_t *scanned) +{ + int allocated = 0; + int conti = 0; + int end_idx = 0; + + *scanned = 0; + + while (bit_offset < BmapEntry::size()) { + conti = find_n_free_bits(bit_offset, required_blocks, + start_offset, &end_idx); + + *scanned += end_idx - bit_offset; + /* + * Either end of bitmap or got required. + */ + if (conti == required_blocks || + (conti + *start_offset == BmapEntry::size())) { + allocated += conti; + break; + } + + /* + * Did not get expected, search from next index again. + */ + clear_bits(*start_offset, conti); + allocated = 0; + + bit_offset = end_idx; + } + + return allocated; +} + +/* + * Find N number of free bits in bitmap. Need not be contiguous. + */ +int BmapEntry::find_any_free_bits(int start_offset, int64_t num_blocks, + int64_t *allocated_blocks, int64_t block_offset, + int64_t *scanned) +{ + int allocated = 0; + int required = num_blocks; + int i = 0; + + *scanned = 0; + + if (atomic_fetch() == BmapEntry::full_bmask()) { + return 0; + } + + /* + * Do a serial scan on bitmap. + */ + for (i = start_offset; i < BmapEntry::size() && + allocated < required; i++) { + if (check_n_set_bit(i)) { + allocated_blocks[allocated] = i + block_offset; + allocated++; + } + } + + *scanned = i - start_offset; + return allocated; +} + +/* + * Bitmap List related functions. + */ +int64_t BmapList::incr_marker(int64_t add) +{ + return std::atomic_fetch_add(&m_marker, add); +} + +void BmapList::set_marker(int64_t val) +{ + atomic_store(&m_marker, val); +} + +int64_t BmapList::get_marker() +{ + return std::atomic_load(&m_marker); +} + +/* + * Zone related functions. + */ +void BitMapZone::init(int64_t zone_num, int64_t total_blocks, bool def) +{ + m_zone_num = zone_num; + m_total_blocks = total_blocks; + + m_used_blocks = def? total_blocks: 0; + + int64_t num_bmaps = total_blocks / BmapEntry::size(); + debug_assert(!(total_blocks % BmapEntry::size())); + + BmapEntity **bmaps = new BmapEntity *[num_bmaps]; + for (int i = 0; i < num_bmaps; i++) { + bmaps[i] = new BmapEntry(def); + } + + BmapEntityList *list = new BmapList(bmaps, num_bmaps, 0); + + m_bmap_list = list; + m_state = ZONE_ACTIVE; +} + +BitMapZone::BitMapZone(int64_t zone_num, int64_t total_blocks) +{ + init(zone_num, total_blocks, false); +} + +BitMapZone::BitMapZone(int64_t zone_num, int64_t total_blocks, bool def) +{ + init(zone_num, total_blocks, def); +} + +BitMapZone::~BitMapZone() +{ + lock_zone(true); + + m_state = ZONE_FREE; + BmapEntityList *list = m_bmap_list; + + for (int64_t i = 0; i < list->size(); i++) { + BmapEntry *bmap = (BmapEntry *) list->get_nth_item(i); + delete bmap; + } + + delete [] list->get_item_list(); + delete list; + + unlock_zone(); +} + +/* + * Check if some search took zone marker to end. + */ +bool BitMapZone::is_exhausted() +{ + if (m_bmap_list->get_marker() >= + m_total_blocks) { + m_bmap_list->set_marker(0); + return true; + } + return false; +} + +void BitMapZone::reset_marker() +{ + m_bmap_list->set_marker(0); +} + +bool BitMapZone::is_allocated(int64_t start_block, int64_t num_blocks) +{ + BmapEntry *bmap = NULL; + int bit = 0; + int64_t falling_in_bmap = 0; + + while (num_blocks) { + bit = start_block % BmapEntry::size(); + bmap = (BmapEntry *) m_bmap_list->get_nth_item(start_block / + BmapEntry::size()); + falling_in_bmap = MIN(num_blocks, BmapEntry::size() - bit); + + if (!bmap->is_allocated(bit, falling_in_bmap)) { + return false; + } + + start_block += falling_in_bmap; + num_blocks -= falling_in_bmap; + } + + return true; +} + +/* + * Allocate N continuous bits in a zone starting from + * marker provided in iter. + */ +int64_t BitMapZone::alloc_cont_bits(int64_t num_blocks, + BmapEntityListIter *iter, + int64_t *scanned) +{ + BmapEntry *bmap = NULL; + int64_t required = num_blocks; + while ((bmap = (BmapEntry *) iter->next())) { + int64_t found = 0; + int64_t max_expected = MIN(required, BmapEntry::size()); + found = bmap->find_n_cont_bits(0, max_expected); + + required -= found; + + if (found < max_expected) { + break; + } + } + + /* + * scanned == allocated + */ + *scanned = num_blocks - required; + return (num_blocks - required); +} + +void BitMapZone::set_blocks_used(int64_t start_block, int64_t num_blocks) +{ + BmapEntry *bmap = NULL; + int bit = 0; + int64_t falling_in_bmap = 0; + int64_t blks = num_blocks; + + while (blks) { + bit = start_block % BmapEntry::size(); + bmap = (BmapEntry *) m_bmap_list->get_nth_item(start_block / + BmapEntry::size()); + falling_in_bmap = MIN(blks, BmapEntry::size() - bit); + + bmap->set_bits(bit, falling_in_bmap); + + start_block += falling_in_bmap; + blks -= falling_in_bmap; + } + add_used_blocks(num_blocks); +} + +void BitMapZone::free_blocks_int(int64_t start_block, int64_t num_blocks) +{ + BmapEntry *bmap = NULL; + int bit = 0; + int64_t falling_in_bmap = 0; + + + while (num_blocks) { + bit = start_block % BmapEntry::size(); + bmap = (BmapEntry *) m_bmap_list->get_nth_item(start_block / + BmapEntry::size()); + falling_in_bmap = MIN(num_blocks, BmapEntry::size() - bit); + + bmap->clear_bits(bit, falling_in_bmap); + + start_block += falling_in_bmap; + num_blocks -= falling_in_bmap; + } + +} + +int64_t BitMapZone::get_index() +{ + return m_zone_num; +} + +bool BitMapZone::lock_zone(bool wait) +{ + if (wait) { + m_lock.lock(); + return true; + } + + if (m_lock.try_lock()) { + return true; + } else { + return false; + } +} + +void BitMapZone::unlock_zone() +{ + m_lock.unlock(); +} + +int64_t BitMapZone::get_used_blocks() +{ + return std::atomic_load(&m_used_blocks); +} + +int64_t BitMapZone::size() { + return m_total_blocks; +} + +int64_t BitMapZone::add_used_blocks(int64_t blks) +{ + return std::atomic_fetch_add(&m_used_blocks, blks) + blks; +} + +int64_t BitMapZone::sub_used_blocks(int64_t blks) +{ + int64_t used_blks = std::atomic_fetch_sub(&m_used_blocks, blks) - blks; + debug_assert(used_blks >= 0); + return used_blks; +} + +/* + * Try find N contiguous blocks in a Zone. + * Nothing less than N is good and considered failure. + * + * Caller must take exclusive lock on Zone. + */ +int64_t BitMapZone::alloc_blocks(int64_t num_blocks, int64_t *start_block) +{ + int64_t bmap_idx = 0; + int bit_idx = 0; + int64_t marker = m_bmap_list->get_marker(); + int64_t marker_add = 0; + BmapEntry *bmap = NULL; + int64_t allocated = 0; + int64_t zone_block_offset = get_index() * size(); + + bmap_idx = marker / BmapEntry::size(); + bit_idx = marker % BmapEntry::size(); + + BmapEntityListIter iter = BmapEntityListIter( + m_bmap_list, bmap_idx); + + while ((bmap = (BmapEntry *) iter.next())) { + int64_t scanned = 0; + int start_offset = -1; + + allocated = bmap->find_first_set_bits(num_blocks, + bit_idx, &start_offset, &scanned); + + marker_add += scanned; + bit_idx = 0; + + if (allocated > 0) { + (*start_block) = start_offset + + (iter.index() - 1) * bmap->size() + + m_zone_num * size(); + + allocated += alloc_cont_bits(num_blocks - allocated, + &iter, &scanned); + marker_add += scanned; + /* + * Iter need to go one step back for case when allocation + * is not enough and start from last bitmap again. + */ + iter.decr_idx(); + bit_idx = scanned % BmapEntry::size(); + } + + if (allocated < num_blocks) { + free_blocks_int((*start_block - zone_block_offset), allocated); + allocated = 0; + } else { + /* + * Got required. + */ + break; + } + } + + m_bmap_list->incr_marker(marker_add); + add_used_blocks(allocated); + + return allocated; +} + +void BitMapZone::free_blocks(int64_t start_block, int64_t num_blocks) +{ + free_blocks_int(start_block, num_blocks); + debug_assert(get_used_blocks() > 0); + sub_used_blocks(num_blocks); +} + +/* + * Allocate N blocks, dis-contiguous are fine + */ +int64_t BitMapZone::alloc_blocks_dis(int64_t num_blocks, int64_t *alloc_blocks) +{ + int64_t bmap_idx = 0; + int bit = 0; + int64_t marker_add = 0; + BmapEntry *bmap = NULL; + int64_t allocated = 0; + int64_t blk_off = 0; + + bmap_idx = m_bmap_list->get_marker() / BmapEntry::size(); + bit = m_bmap_list->get_marker() % BmapEntry::size(); + + BmapEntityListIter iter = BmapEntityListIter( + m_bmap_list, bmap_idx); + while ((bmap = (BmapEntry *) iter.next())) { + int64_t scanned = 0; + blk_off = (iter.index() - 1) * BmapEntry::size() + + m_zone_num * size(); + allocated += bmap->find_any_free_bits(bit, num_blocks - allocated, + &alloc_blocks[allocated], blk_off, &scanned); + + marker_add += scanned; + } + + m_bmap_list->incr_marker(marker_add); + add_used_blocks(allocated); + + return allocated; +} + +/* + * Zone List related functions + */ + +ZoneList::ZoneList(BmapEntity **list, int64_t len) : + BmapEntityList(list, len) +{ + m_marker = 0; + return; +} + +ZoneList::ZoneList(BmapEntity **list, int64_t len, int64_t marker) : + BmapEntityList(list, len) +{ + m_marker = marker; + return; +} + +int64_t ZoneList::incr_marker(int64_t add) { + std::lock_guard l (m_marker_mutex); + m_marker += add; + m_marker %= size(); + + return m_marker; +} + +int64_t ZoneList::get_marker() +{ + std::lock_guard l (m_marker_mutex); + return m_marker; +} + +void ZoneList::set_marker(int64_t val) +{ + std::lock_guard l (m_marker_mutex); + m_marker = val; +} + +/* + * Main allocator functions. + */ +BitAllocator::BitAllocator(int64_t total_blocks, int64_t zone_size_block, bmap_alloc_mode_t mode) +{ + init(total_blocks, zone_size_block, mode, false); +} + +BitAllocator::BitAllocator(int64_t total_blocks, int64_t zone_size_block, + bmap_alloc_mode_t mode, bool def) +{ + init(total_blocks, zone_size_block, mode, def); +} + +void BitAllocator::init(int64_t total_blocks, int64_t zone_size_block, + bmap_alloc_mode_t mode, bool def) +{ + int64_t total_zones = 0; + + if (mode != SERIAL && mode != CONCURRENT) { + debug_assert(0); + } + + if (total_blocks <= 0) { + debug_assert(0); + } + + if (zone_size_block == 0 || + zone_size_block < BmapEntry::size()) { + debug_assert(0); + } + + zone_size_block = (zone_size_block / BmapEntry::size()) * + BmapEntry::size(); + + if (total_blocks < zone_size_block) { + debug_assert(0); + } + + total_blocks = (total_blocks / zone_size_block) * zone_size_block; + total_zones = total_blocks / zone_size_block; + + debug_assert(total_blocks > 0); + debug_assert(total_zones > 0); + + pthread_rwlock_init(&m_alloc_slow_lock, NULL); + + m_total_blocks = total_blocks; + m_total_zones = total_zones; + m_zone_size_blocks = zone_size_block; + m_alloc_mode = mode; + m_allocated_blocks = def? total_blocks: 0; + m_reserved_blocks = 0; + + BmapEntity **zonesp = new BmapEntity* [total_zones]; + for (int i = 0; i < total_zones; i++) { + zonesp[i] = new BitMapZone(i, zone_size_block, def); + } + + BmapEntityList *list = new ZoneList(zonesp, total_zones, 0); + + m_zone_list = list; + + m_state = ALLOC_ACTIVE; +} + +BitAllocator::~BitAllocator() +{ + alloc_lock(true); + + m_state = ALLOC_DESTROY; + BmapEntityList *list = m_zone_list; + + for (int64_t i = 0; i < list->size(); i++) { + BitMapZone *zone = (BitMapZone *) list->get_nth_item(i); + delete zone; + } + + delete [] list->get_item_list(); + delete list; + + alloc_unlock(); + pthread_rwlock_destroy(&m_alloc_slow_lock); +} + +void +BitAllocator::shutdown() +{ + alloc_lock(true); + serial_lock(); +} + +int64_t BitAllocator::sub_used_blocks(int64_t num_blocks) +{ + int64_t used_blks = + std::atomic_fetch_sub(&m_allocated_blocks, num_blocks); + debug_assert(used_blks > 0); + return used_blks; +} + +int64_t BitAllocator::add_used_blocks(int64_t num_blocks) +{ + return std::atomic_fetch_add(&m_allocated_blocks, num_blocks) + num_blocks; +} + +int64_t BitAllocator::get_used_blocks() +{ + return std::atomic_load(&m_allocated_blocks); +} + +int64_t BitAllocator::get_reserved_blocks() +{ + return m_reserved_blocks; +} + +int64_t BitAllocator::size() +{ + return m_total_blocks; +} + +bool BitAllocator::reserve_blocks(int64_t num) +{ + bool res = false; + std::lock_guard l(m_res_blocks_lock); + if (add_used_blocks(num) <= size()) { + res = true; + m_reserved_blocks += num; + } else { + sub_used_blocks(num); + res = false; + } + + debug_assert(m_allocated_blocks <= size()); + return res; +} + +void BitAllocator::unreserve(int64_t needed, int64_t allocated) +{ + std::lock_guard l(m_res_blocks_lock); + sub_used_blocks(needed - allocated); + m_reserved_blocks -= needed; + debug_assert(m_allocated_blocks >= 0); +} + +void BitAllocator::unreserve_blocks(int64_t unused) +{ + unreserve(unused, 0); +} + +void BitAllocator::serial_lock() +{ + if (m_alloc_mode == SERIAL) { + m_serial_mutex.lock(); + } +} + +void BitAllocator::serial_unlock() +{ + if (m_alloc_mode == SERIAL) { + m_serial_mutex.unlock(); + } +} + +void BitAllocator::alloc_lock(bool write) { + if (write) { + pthread_rwlock_wrlock(&m_alloc_slow_lock); + } else { + pthread_rwlock_rdlock(&m_alloc_slow_lock); + } +} + +void BitAllocator::alloc_unlock() +{ + pthread_rwlock_unlock(&m_alloc_slow_lock); +} + +bool BitAllocator::zone_free_to_alloc(BmapEntity *item, + int64_t required, bool wait) +{ + BitMapZone *zone = (BitMapZone *) item; + if (!zone->lock_zone(wait)) { + return false; + } + + if (zone->is_exhausted()) { + if (zone->get_index() == + m_zone_list->get_marker()) { + m_zone_list->incr_marker(1); + } + zone->unlock_zone(); + return false; + } + + if (zone->get_used_blocks() + required > + zone->size()) { + zone->unlock_zone(); + return false; + } + + return true; +} + +bool BitAllocator::is_allocated(int64_t start_block, int64_t num_blocks) +{ + BitMapZone *zone = NULL; + int64_t zone_block_offset = 0; + int64_t falling_in_zone = 0; + + debug_assert(start_block >= 0 && + (start_block + num_blocks <= size())); + + if (num_blocks == 0) { + return true; + } + + assert(start_block >= 0); + + while (num_blocks) { + zone = (BitMapZone *) m_zone_list->get_nth_item( + start_block / m_zone_size_blocks); + + zone_block_offset = start_block % m_zone_size_blocks; + falling_in_zone = MIN(m_zone_size_blocks - zone_block_offset, + num_blocks); + if (!zone->is_allocated(zone_block_offset, falling_in_zone)) { + return false; + } + start_block += falling_in_zone; + num_blocks -= falling_in_zone; + } + return true; +} + +bool BitAllocator::is_allocated(int64_t *alloc_blocks, int64_t num_blocks) +{ + for (int64_t i = 0; i < num_blocks; i++) { + return is_allocated(alloc_blocks[i], 1); + } + + return true; +} + +/* + * Allocate N contiguous blocks. + */ +int64_t BitAllocator::alloc_blocks_int(int64_t num_blocks, + int64_t *start_block, bool wait) +{ + + int64_t zone_mark = m_zone_list->get_marker(); + BitMapZone *zone = NULL; + int64_t allocated = 0; + + BmapEntityListIter iter = BmapEntityListIter( + m_zone_list, zone_mark, true); + + while ((zone = (BitMapZone *) iter.next())) { + + if (!zone_free_to_alloc(zone, + num_blocks - allocated, wait)) { + continue; + } + + allocated = zone->alloc_blocks(num_blocks, start_block); + + zone->unlock_zone(); + if (allocated == num_blocks) { + break; + } + + zone->free_blocks(*start_block, allocated); + allocated = 0; + } + + return allocated; +} + +bool BitAllocator::check_input_dis(int64_t num_blocks) +{ + if (num_blocks == 0) { + return false; + } + + if (num_blocks > size()) { + return false; + } + return true; +} + +bool BitAllocator::check_input(int64_t num_blocks) +{ + if (num_blocks == 0) { + return false; + } + + if (num_blocks > m_zone_size_blocks) { + return false; + } + return true; +} + +/* + * Interface to allocate blocks after reserve. + */ +int64_t BitAllocator::alloc_blocks_res(int64_t num_blocks, int64_t *start_block) +{ + int scans = 2; + int64_t allocated = 0; + + if (!check_input(num_blocks)) { + return 0; + } + + alloc_lock(false); + serial_lock(); + + while (scans && !allocated) { + allocated = alloc_blocks_int(num_blocks, start_block, false); + scans --; + } + + if (!allocated) { + /* + * Could not find anything in two scans. + * Go in serial manner. + */ + allocated = alloc_blocks_int(num_blocks, start_block, true); + } + + debug_assert(is_allocated(*start_block, allocated)); + unreserve(num_blocks, allocated); + + serial_unlock(); + alloc_unlock(); + + return allocated; +} + +int64_t BitAllocator::alloc_blocks(int64_t num_blocks, int64_t *start_block) +{ + int scans = 2; + int64_t allocated = 0; + + if (!check_input(num_blocks)) { + return 0; + } + + alloc_lock(false); + serial_lock(); + + if (!reserve_blocks(num_blocks)) { + goto exit; + } + + while (scans && !allocated) { + allocated = alloc_blocks_int(num_blocks, start_block, false); + scans --; + } + + if (!allocated) { + /* + * Could not find anything in two scans. + * Go in serial manner. + */ + allocated = alloc_blocks_int(num_blocks, start_block, true); + } + + unreserve(num_blocks, allocated); + debug_assert((m_allocated_blocks <= m_total_blocks)); + debug_assert(is_allocated(*start_block, allocated)); + +exit: + serial_unlock(); + alloc_unlock(); + + return allocated; +} + +void BitAllocator::free_blocks_int(int64_t start_block, int64_t num_blocks) +{ + BitMapZone *zone = NULL; + int64_t zone_block_offset = 0; + int64_t falling_in_zone = 0; + + debug_assert(start_block >= 0 && + (start_block + num_blocks) <= size()); + + if (num_blocks == 0) { + return; + } + + assert(start_block >= 0); + + while (num_blocks) { + zone = (BitMapZone *) m_zone_list->get_nth_item( + start_block / m_zone_size_blocks); + + zone_block_offset = start_block % m_zone_size_blocks; + + falling_in_zone = MIN(m_zone_size_blocks - zone_block_offset, + num_blocks); + zone->free_blocks(zone_block_offset, falling_in_zone); + start_block += falling_in_zone; + num_blocks -= falling_in_zone; + } + +} + +void BitAllocator::free_blocks(int64_t start_block, int64_t num_blocks) +{ + alloc_lock(false); + debug_assert(is_allocated(start_block, num_blocks)); + + free_blocks_int(start_block, num_blocks); + (void) sub_used_blocks(num_blocks); + + alloc_unlock(); +} + +void BitAllocator::set_blocks_used(int64_t start_block, int64_t num_blocks) +{ + BitMapZone *zone = NULL; + int64_t zone_block_offset = 0; + int64_t falling_in_zone = 0; + int64_t blks = num_blocks; + int64_t start_blk = start_block; + + if (num_blocks == 0) { + return; + } + + alloc_lock(false); + serial_lock(); + + assert(start_block >= 0); + + while (blks) { + zone = (BitMapZone *) m_zone_list->get_nth_item( + start_blk / m_zone_size_blocks); + + zone_block_offset = start_blk % m_zone_size_blocks; + falling_in_zone = MIN(m_zone_size_blocks - zone_block_offset, + blks); + zone->set_blocks_used(zone_block_offset, falling_in_zone); + start_blk += falling_in_zone; + blks -= falling_in_zone; + } + + add_used_blocks(num_blocks); + debug_assert(is_allocated(start_block, num_blocks)); + + serial_unlock(); + alloc_unlock(); +} + +int64_t BitAllocator::alloc_blocks_dis_int(int64_t num_blocks, + int64_t *block_list, bool lock) +{ + int64_t zone_mark = m_zone_list->get_marker(); + BitMapZone *zone = NULL; + int64_t allocated = 0; + + alloc_lock(false); + serial_lock(); + + BmapEntityListIter iter = BmapEntityListIter( + m_zone_list, zone_mark, true); + + while ((zone = (BitMapZone *) iter.next())) { + + if (!zone_free_to_alloc(zone, 1, lock)) { + continue; + } + + allocated += zone->alloc_blocks_dis(num_blocks, &block_list[allocated]); + zone->unlock_zone(); + if (allocated == num_blocks) { + break; + } + } + + alloc_unlock(); + serial_unlock(); + + return allocated; +} + +/* + * Allocate N dis-contiguous blocks. + */ +int64_t BitAllocator::alloc_blocks_dis(int64_t num_blocks, int64_t *block_list) +{ + int scans = 2; + int64_t allocated = 0; + + if (!check_input_dis(num_blocks)) { + return 0; + } + + if (!reserve_blocks(num_blocks)) { + return 0; + } + + while (scans && allocated < num_blocks) { + allocated += alloc_blocks_dis_int(num_blocks, &block_list[allocated], false); + scans --; + } + + if (allocated < num_blocks) { + /* + * Could not find anything in two scans. + * Go in serial manner to get something for sure + * if available. + */ + allocated += alloc_blocks_dis_int(num_blocks, &block_list[allocated], true); + } + + unreserve(num_blocks, allocated); + debug_assert(is_allocated(block_list, allocated)); + + return allocated; +} + +void BitAllocator::free_blocks_dis(int64_t num_blocks, int64_t *block_list) +{ + alloc_lock(false); + + for (int64_t i = 0; i < num_blocks; i++) { + free_blocks_int(block_list[i], 1); + } + + debug_assert(get_used_blocks() > 0); + sub_used_blocks(num_blocks); + alloc_unlock(); +} diff --git a/src/os/bluestore/BitAllocator.h b/src/os/bluestore/BitAllocator.h new file mode 100644 index 00000000000..ae674cdb51a --- /dev/null +++ b/src/os/bluestore/BitAllocator.h @@ -0,0 +1,250 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Bitmap based in memory allocator. + * Author: Ramesh Chander, Ramesh.Chander@sandisk.com + */ + +#ifndef CEPH_OS_BLUESTORE_BITALLOCATOR_H +#define CEPH_OS_BLUESTORE_BITALLOCATOR_H + +#include +#include +#include +#include + +class BmapEntity { +public: + static int64_t size(); + virtual int64_t get_index() { return -1; } + virtual bool is_allocated(int64_t start, int64_t num) = 0; + + virtual ~BmapEntity() { } +}; + + +class BmapEntityList { + BmapEntity **m_items; + int64_t m_num_items; + +public: + + BmapEntityList(BmapEntity **list, int64_t len) { + m_items = list; + m_num_items = len; + } + + virtual ~BmapEntityList() { } + + BmapEntity *get_nth_item(int64_t idx) { + return m_items[idx]; + } + + BmapEntity** get_item_list() { + return m_items; + } + + virtual int64_t incr_marker(int64_t add) = 0; + virtual int64_t get_marker() = 0; + virtual void set_marker(int64_t val) = 0; + + int64_t size() { + return m_num_items; + } +}; + + +class BmapEntityListIter { + BmapEntityList *m_list; + int64_t m_start_idx; + int64_t m_cur_idx; + bool m_wrap; + bool m_wrapped; +public: + + BmapEntityListIter(BmapEntityList *list); + + BmapEntityListIter(BmapEntityList *list, bool wrap); + + BmapEntityListIter(BmapEntityList *list, int64_t start_idx); + + BmapEntityListIter(BmapEntityList *list, int64_t start_idx, bool wrap); + + BmapEntity *next(); + int64_t index(); + void decr_idx(); +}; + +typedef unsigned long bmap_t; + +class BmapEntry: public BmapEntity { + +private: + std::atomic m_bits; +public: + static bmap_t full_bmask(); + static int64_t size(); + static bmap_t empty_bmask(); + static bmap_t align_mask(int x); + bmap_t bit_mask(int bit_num); + bmap_t atomic_fetch(); + BmapEntry(bool val); + + void clear_bit(int bit); + void clear_bits(int offset, int num_bits); + void set_bits(int offset, int num_bits); + bool check_n_set_bit(int bit); + bool check_bit(int bit); + bool is_allocated(int64_t start_bit, int64_t num_bits); + + int find_n_cont_bits(int start_offset, int64_t num_bits); + int find_n_free_bits(int start_idx, int64_t max_bits, + int *free_bit, int *end_idx); + int find_first_set_bits(int64_t required_blocks, int bit_offset, + int *start_offset, int64_t *scanned); + + int find_any_free_bits(int start_offset, int64_t num_blocks, + int64_t *alloc_list, int64_t block_offset, + int64_t *scanned); + + virtual ~BmapEntry(); + +}; + +class BmapList: public BmapEntityList { + + std::atomic m_marker; +public: + BmapList(BmapEntity **bmaps, int len, int64_t marker): + BmapEntityList(bmaps, len) { + m_marker = marker; + } + + int64_t incr_marker(int64_t add); + void set_marker(int64_t val); + int64_t get_marker(); +}; + + +class BitMapZone: public BmapEntity { + +private: + int64_t m_total_blocks; + int64_t m_zone_num; + std::atomic m_used_blocks; + + BmapEntityList *m_bmap_list; + + enum {ZONE_FREE = 0, ZONE_ACTIVE = 1} m_state; + std::mutex m_lock; + + int64_t alloc_cont_bits(int64_t num_blocks, BmapEntityListIter *iter, int64_t *bmap_out_idx); + void free_blocks_int(int64_t start_block, int64_t num_blocks); + void init(int64_t zone_num, int64_t total_blocks, bool def); + +public: + BitMapZone(int64_t zone_num, int64_t total_blocks); + BitMapZone(int64_t zone_num, int64_t total_blocks, bool def); + bool lock_zone(bool wait); + void unlock_zone(); + + int64_t get_used_blocks(); + + int64_t add_used_blocks(int64_t blks); + int64_t sub_used_blocks(int64_t blks); + + int64_t alloc_blocks(int64_t num_blocks, int64_t *start_block); + void set_blocks_used(int64_t start_block, int64_t num_blocks); + void free_blocks(int64_t start_block, int64_t num_blocks); + + int64_t alloc_blocks_dis(int64_t num_blocks, int64_t *allocated_blocks); + + bool is_exhausted(); + bool is_allocated(int64_t stat_block, int64_t num_blocks); + int64_t get_index(); + int64_t size(); + void reset_marker(); + + virtual ~BitMapZone(); +}; + +typedef enum bmap_alloc_mode { + SERIAL = 1, + CONCURRENT = 2, +} bmap_alloc_mode_t; + + +class ZoneList : public BmapEntityList { + +private: + int64_t m_marker; + std::mutex m_marker_mutex; + +public: + + ZoneList(BmapEntity **list, int64_t len); + ZoneList(BmapEntity **list, int64_t len, int64_t marker); + int64_t incr_marker(int64_t add); + int64_t get_marker(); + void set_marker(int64_t val); + +}; + +class BitAllocator { +private: + bmap_alloc_mode_t m_alloc_mode; + int64_t m_zone_size_blocks; + int64_t m_total_zones; + + int64_t m_total_blocks; + std::atomic m_allocated_blocks; + int64_t m_reserved_blocks; + std::mutex m_res_blocks_lock; + BmapEntityList *m_zone_list; + + enum {ALLOC_DESTROY = 0, ALLOC_ACTIVE = 1} m_state; + + std::mutex m_serial_mutex; + pthread_rwlock_t m_alloc_slow_lock; + + bool is_allocated(int64_t start_block, int64_t num_blocks); + bool is_allocated(int64_t *blocks, int64_t num_blocks); + + int64_t alloc_blocks_int(int64_t num_blocks, int64_t *start_block, bool lock); + int64_t alloc_blocks_dis_int(int64_t num_blocks, int64_t *block_list, bool lock); + void free_blocks_int(int64_t start_block, int64_t num_blocks); + bool zone_free_to_alloc(BmapEntity *zone, int64_t required, bool lock); + + void serial_lock(); + void serial_unlock(); + + void alloc_lock(bool write); + void alloc_unlock(); + bool check_input(int64_t num_blocks); + bool check_input_dis(int64_t num_blocks); + void unreserve(int64_t needed, int64_t allocated); + void init(int64_t total_blocks, int64_t zone_size_block, bmap_alloc_mode_t mode, bool def); + +public: + + BitAllocator(int64_t total_blocks, int64_t zone_size_block, bmap_alloc_mode_t mode); + BitAllocator(int64_t total_blocks, int64_t zone_size_block, bmap_alloc_mode_t mode, bool def); + ~BitAllocator(); + void shutdown(); + int64_t alloc_blocks(int64_t num_blocks, int64_t *start_block); + int64_t alloc_blocks_res(int64_t num_blocks, int64_t *start_block); + void free_blocks(int64_t start_block, int64_t num_blocks); + void set_blocks_used(int64_t start_block, int64_t num_blocks); + int64_t sub_used_blocks(int64_t num_blocks); + int64_t add_used_blocks(int64_t num_blocks); + int64_t get_used_blocks(); + int64_t get_reserved_blocks(); + int64_t size(); + bool reserve_blocks(int64_t blocks); + void unreserve_blocks(int64_t blocks); + + int64_t alloc_blocks_dis(int64_t num_blocks, int64_t *block_list); + void free_blocks_dis(int64_t num_blocks, int64_t *block_list); +}; + +#endif //End of file diff --git a/src/os/bluestore/BitMapAllocator.cc b/src/os/bluestore/BitMapAllocator.cc new file mode 100644 index 00000000000..636405cef6a --- /dev/null +++ b/src/os/bluestore/BitMapAllocator.cc @@ -0,0 +1,217 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Bitmap based in-memory allocator. + * Author: Ramesh Chander, Ramesh.Chander@sandisk.com + * + * TBD list: + * 1. Make commiting and un commiting lists concurrent. + */ + +#include "BitAllocator.h" + +#include "BitMapAllocator.h" +#include "bluestore_types.h" +#include "BlueStore.h" + +#define dout_subsys ceph_subsys_bluestore +#undef dout_prefix +#define dout_prefix *_dout << "bitmapalloc:" + +#define NEXT_MULTIPLE(x, m) (!(x) ? 0: (((((x) - 1) / (m)) + 1) * (m))) + +BitMapAllocator::BitMapAllocator(int64_t device_size) + : m_num_uncommitted(0), + m_num_committing(0) +{ + int64_t block_size = g_conf->bluestore_min_alloc_size; + int64_t zone_size_blks = 1024; // Change it later + + m_block_size = block_size; + m_bit_alloc = new BitAllocator(device_size / block_size, + zone_size_blks, CONCURRENT, true); + assert(m_bit_alloc); + if (!m_bit_alloc) { + dout(10) << __func__ << "Unable to intialize Bit Allocator" << dendl; + } + dout(10) << __func__ <<" instance "<< (uint64_t) this << + " size " << device_size << dendl; +} + +BitMapAllocator::~BitMapAllocator() +{ + delete m_bit_alloc; +} + +void BitMapAllocator::insert_free(uint64_t off, uint64_t len) +{ + dout(20) << __func__ <<" instance "<< (uint64_t) this << + " offset " << off << " len "<< len << dendl; + + assert(!(off % m_block_size)); + assert(!(len % m_block_size)); + + m_bit_alloc->free_blocks(off / m_block_size, + len / m_block_size); +} + +int BitMapAllocator::reserve(uint64_t need) +{ + int nblks = need / m_block_size; // apply floor + assert(!(need % m_block_size)); + dout(10) << __func__ <<" instance "<< (uint64_t) this << + " num_used " << m_bit_alloc->get_used_blocks() << + " total " << m_bit_alloc->size() << dendl; + + if (m_bit_alloc->reserve_blocks(nblks)) { + return ENOSPC; + } + return 0; +} + +void BitMapAllocator::unreserve(uint64_t unused) +{ + int nblks = unused / m_block_size; + assert(!(unused % m_block_size)); + + dout(10) << __func__ <<" instance "<< (uint64_t) this << + " unused " << nblks << + " num used " << m_bit_alloc->get_used_blocks() << + " total " << m_bit_alloc->size() << dendl; + + m_bit_alloc->unreserve_blocks(nblks); +} + +int BitMapAllocator::allocate( + uint64_t want_size, uint64_t alloc_unit, int64_t hint, + uint64_t *offset, uint32_t *length) +{ + + assert(!(alloc_unit % m_block_size)); + int64_t len = NEXT_MULTIPLE(want_size, m_block_size); + int64_t nblks = len / m_block_size; + + assert(alloc_unit); + + int64_t start_blk = 0; + int64_t count = 0; + + dout(10) << __func__ <<" instance "<< (uint64_t) this + << " want_size " << want_size + << " alloc_unit " << alloc_unit + << " hint " << hint + << dendl; + + *offset = 0; + *length = 0; + + count = m_bit_alloc->alloc_blocks_res(nblks, &start_blk); + if (count == 0) { + return ENOSPC; + } + *offset = start_blk * m_block_size; + *length = count * m_block_size; + + dout(20) << __func__ <<" instance "<< (uint64_t) this + << " offset " << *offset << " length " << *length << dendl; + + return 0; +} + +int BitMapAllocator::release( + uint64_t offset, uint64_t length) +{ + std::lock_guard l(m_lock); + dout(10) << __func__ << " " << offset << "~" << length << dendl; + m_uncommitted.insert(offset, length); + m_num_uncommitted += length; + return 0; +} + +uint64_t BitMapAllocator::get_free() +{ + return (( + m_bit_alloc->size() - m_bit_alloc->get_used_blocks()) * + m_block_size); +} + +void BitMapAllocator::dump(ostream& out) +{ + std::lock_guard l(m_lock); + dout(30) << __func__ <<" instance "<< (uint64_t) this + << " committing: " << m_committing.num_intervals() + << " extents" << dendl; + + for (auto p = m_committing.begin(); + p != m_committing.end(); ++p) { + dout(30) << __func__ <<" instance "<< (uint64_t) this + << " " << p.get_start() << "~" << p.get_len() << dendl; + } + dout(30) << __func__ <<" instance "<< (uint64_t) this + << " uncommitted: " << m_uncommitted.num_intervals() + << " extents" << dendl; + + for (auto p = m_uncommitted.begin(); + p != m_uncommitted.end(); ++p) { + dout(30) << __func__ << " " << p.get_start() << "~" << p.get_len() << dendl; + } +} + +void BitMapAllocator::init_add_free(uint64_t offset, uint64_t length) +{ + dout(10) << __func__ <<" instance "<< (uint64_t) this << + " offset " << offset << " length " << length << dendl; + + insert_free(NEXT_MULTIPLE(offset, m_block_size), + (length / m_block_size) * m_block_size); +} + +void BitMapAllocator::init_rm_free(uint64_t offset, uint64_t length) +{ + dout(10) << __func__ <<" instance "<< (uint64_t) this << + " offset " << offset << " length " << length << dendl; + + assert(!(offset % m_block_size)); + assert(!(offset % m_block_size)); + + int64_t first_blk = offset / m_block_size; + int64_t count = length / m_block_size; + + m_bit_alloc->set_blocks_used(first_blk, count); +} + + +void BitMapAllocator::shutdown() +{ + dout(10) << __func__ <<" instance "<< (uint64_t) this << dendl; + m_bit_alloc->shutdown(); + //delete m_bit_alloc; //Fix this +} + +void BitMapAllocator::commit_start() +{ + std::lock_guard l(m_lock); + + dout(10) << __func__ <<" instance "<< (uint64_t) this + << " releasing " << m_num_uncommitted + << " in extents " << m_uncommitted.num_intervals() << dendl; + assert(m_committing.empty()); + m_committing.swap(m_uncommitted); + m_num_committing = m_num_uncommitted; + m_num_uncommitted = 0; +} + +void BitMapAllocator::commit_finish() +{ + std::lock_guard l(m_lock); + dout(10) << __func__ <<" instance "<< (uint64_t) this + << " released " << m_num_committing + << " in extents " << m_committing.num_intervals() << dendl; + for (auto p = m_committing.begin(); + p != m_committing.end(); + ++p) { + insert_free(p.get_start(), p.get_len()); + } + m_committing.clear(); + m_num_committing = 0; +} diff --git a/src/os/bluestore/BitMapAllocator.h b/src/os/bluestore/BitMapAllocator.h new file mode 100644 index 00000000000..e5cd32b1645 --- /dev/null +++ b/src/os/bluestore/BitMapAllocator.h @@ -0,0 +1,55 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_OS_BLUESTORE_BITMAPALLOCATOR_H +#define CEPH_OS_BLUESTORE_BITMAPALLOCATOR_H + +#include + +#include "Allocator.h" +#include "BitAllocator.h" +#include "include/btree_interval_set.h" + +class BitMapAllocator : public Allocator { + std::mutex m_lock; + + int64_t m_num_uncommitted; + int64_t m_num_committing; + int64_t m_block_size; + int64_t m_num_reserved; + + btree_interval_set m_uncommitted; ///< released but not yet usable + btree_interval_set m_committing; ///< released but not yet usable + BitAllocator *m_bit_alloc; // Bit allocator instance + + void insert_free(uint64_t offset, uint64_t len); + +public: + BitMapAllocator(); + BitMapAllocator(int64_t device_size); + ~BitMapAllocator(); + + int reserve(uint64_t need); + void unreserve(uint64_t unused); + + int allocate( + uint64_t want_size, uint64_t alloc_unit, int64_t hint, + uint64_t *offset, uint32_t *length); + + int release( + uint64_t offset, uint64_t length); + + void commit_start(); + void commit_finish(); + + uint64_t get_free(); + + void dump(std::ostream& out); + + void init_add_free(uint64_t offset, uint64_t length); + void init_rm_free(uint64_t offset, uint64_t length); + + void shutdown(); +}; + +#endif diff --git a/src/test/Makefile-server.am b/src/test/Makefile-server.am index fbb42e49b23..72e1b2237ee 100644 --- a/src/test/Makefile-server.am +++ b/src/test/Makefile-server.am @@ -66,6 +66,11 @@ unittest_bluefs_LDADD = $(LIBOS) $(UNITTEST_LDADD) $(CEPH_GLOBAL) unittest_bluefs_CXXFLAGS = $(UNITTEST_CXXFLAGS) check_TESTPROGRAMS += unittest_bluefs +unittest_bit_alloc_SOURCES = test/objectstore/BitAllocator_test.cc +unittest_bit_alloc_LDADD = $(LIBOS) $(UNITTEST_LDADD) $(CEPH_GLOBAL) +unittest_bluefs_CXXFLAGS = $(UNITTEST_CXXFLAGS) +check_TESTPROGRAMS += unittest_bit_alloc + unittest_bluestore_types_SOURCES = test/objectstore/test_bluestore_types.cc unittest_bluestore_types_LDADD = $(LIBOS) $(UNITTEST_LDADD) $(CEPH_GLOBAL) unittest_bluestore_types_CXXFLAGS = $(UNITTEST_CXXFLAGS) diff --git a/src/test/objectstore/BitAllocator_test.cc b/src/test/objectstore/BitAllocator_test.cc new file mode 100644 index 00000000000..890e92e35ed --- /dev/null +++ b/src/test/objectstore/BitAllocator_test.cc @@ -0,0 +1,493 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Bitmap based in-memory allocator unit test cases. + * Author: Ramesh Chander, Ramesh.Chander@sandisk.com + */ + +#include "os/bluestore/BitAllocator.h" +#include +#include +#define debug_assert assert +#define NUM_THREADS 16 +#define MAX_BLOCKS (1024 * 1024 * 4) + +void test_bmap_iter() +{ + int num_items = 5; + int off = 2; + class BmapEntityTmp : public BmapEntity { + int64_t m_num; + int64_t m_len; + public: + BmapEntityTmp(int num) { + m_num = num; + m_len = num; + } + + int64_t get_index() { + return m_num; + } + bool is_allocated(int64_t s, int64_t num) + { + return true; + } + }; + + class BmapEntityListTmp: public BmapEntityList { + private: + int64_t m_marker; + public: + BmapEntityListTmp(BmapEntity **bmaps, int len, int64_t marker): + BmapEntityList(bmaps, len) { + m_marker = marker; + } + + int64_t incr_marker(int64_t add) + { + return m_marker++; + } + void set_marker(int64_t val) + { + m_marker = val; + } + int64_t get_marker() + { + return m_marker; + } + + }; + + int i = 0; + BmapEntityTmp *obj = NULL; + + BmapEntity **bmap = new BmapEntity*[num_items]; + for (i = 0; i < num_items; i++) { + bmap[i] = new BmapEntityTmp(i); + } + + BmapEntityList *list = new BmapEntityListTmp(bmap, num_items, 0); + BmapEntityListIter *iter = new BmapEntityListIter(list, off, false); + + i = off; + int count = 0; + int64_t last_idx = off; + while ((obj = (BmapEntityTmp*) iter->next())) { + debug_assert(obj->get_index() == last_idx); + debug_assert(obj->get_index() == i); + debug_assert(obj == bmap[i]); + last_idx = iter->index(); + i++; + count++; + } + debug_assert(i == num_items); + debug_assert(count == num_items - off); + + delete iter; + + iter = new BmapEntityListIter(list, off, true); + + i = off; + last_idx = off; + count = 0; + while ((obj = (BmapEntityTmp*) iter->next())) { + debug_assert(obj->get_index() == last_idx); + debug_assert(obj->get_index() == i); + debug_assert(obj == bmap[i]); + last_idx = iter->index(); + + + i = (i + 1) % num_items; + count++; + } + debug_assert(i == off); + debug_assert(count == num_items); + + + num_items = 4; + off = num_items - 1; + + //list = new BmapEntityList(bmap, num_items); + list = new BmapEntityListTmp(bmap, num_items, 0); + + iter = new BmapEntityListIter(list, off, true); + i = off; + last_idx = off; + count = 0; + while ((obj = (BmapEntityTmp*) iter->next())) { + debug_assert(obj->get_index() == last_idx); + debug_assert(obj->get_index() == i); + debug_assert(obj == bmap[i]); + last_idx = iter->index(); + i = (i + 1) % num_items; + count++; + } + debug_assert(i == off); + debug_assert(count == num_items); +} + +void test_bmap_entry() +{ + int i = 0; + int start = 0; + int64_t scanned = 0; + int64_t allocated = 0; + int size = BmapEntry::size(); + + BmapEntry *bmap = new BmapEntry(true); + + // Clear bits one by one and check they are cleared + for (i = 0; i < size; i++) { + bmap->clear_bit(i); + debug_assert(!bmap->check_bit(i)); + } + + // Set all bits again using set_bits + bmap->set_bits(0, size); + + // clear 4 bits at a time and then check allocated + for (i = 0; i < size/4; i++) { + bmap->clear_bits(i * 4, 4); + debug_assert(!bmap->is_allocated(i * 4, 4)); + } + + // set all bits again + bmap->set_bits(0, size); + + // clear alternate bits, check and set those bits + for (i = 0; i < size/2; i++) { + bmap->clear_bit(i * 2 + 1); + debug_assert(!bmap->check_bit(i * 2 + 1)); + debug_assert(bmap->check_n_set_bit(i * 2 + 1)); + } + + // free 1, 2 and size bits at a time and try to find n cont bits + for (i = 0; i < size / 4; i++) { + bmap->clear_bits(i * 2 + 1, i + 1); + debug_assert(!bmap->check_bit(i * 2 + 1)); + debug_assert(bmap->find_n_cont_bits(i * 2 + 1, i + 1) == + i + 1); + } + + // free 1, 2 and size bits at a time and try to find any cont bits + for (i = 0; i < size / 4; i++) { + bmap->clear_bits(i * 2 + 1, i + 1); + debug_assert(!bmap->is_allocated(i * 2 + 1, i + 1)); + } + + for (i = 0; i < size / 4; i++) { + bmap->clear_bits(i * 2 + 1, i + 1); + allocated = bmap->find_first_set_bits(i + 1, 0, &start, &scanned); + + debug_assert(allocated == i + 1); + debug_assert(scanned == ((i * 2 + 1) + (i + 1))); + debug_assert(start == i * 2 + 1); + bmap->set_bits(0, BmapEntry::size()); + + } + + // Find few bits at end of bitmap and find those + bmap->clear_bits(0, 4); + bmap->clear_bits(BmapEntry::size() - 12, 5); + bmap->clear_bits(BmapEntry::size() - 6, 6); + allocated = bmap->find_first_set_bits(6, 0, &start, &scanned); + + debug_assert(allocated == 6); + debug_assert(scanned == BmapEntry::size() - 6 + 6); + debug_assert(start == BmapEntry::size() - 6); + debug_assert(bmap->is_allocated(start, 6)); + + delete bmap; + bmap = new BmapEntry(false); + bmap->set_bits(4, BmapEntry::size() - 4); + debug_assert(bmap->is_allocated(4, BmapEntry::size() - 4)); + debug_assert(!bmap->is_allocated(0, 4)); + bmap->set_bits(0, 4); + debug_assert(bmap->is_allocated(0, BmapEntry::size())); + + +} + +void test_zone_alloc() +{ + int total_blocks = 1024; + int64_t blks = 1; + int64_t last_blk = -1; + int64_t start_blk = 0; + int64_t allocated = 0; + + BitMapZone *zone = new BitMapZone(0, total_blocks); + + // Allocate all blocks and see that it is allocating in order. + for (int i = 0; i < total_blocks; i++) { + allocated = zone->alloc_blocks(blks, &start_blk); + debug_assert(last_blk + 1 == start_blk); + debug_assert(allocated == blks); + last_blk = start_blk; + } + debug_assert(zone->get_used_blocks() == total_blocks); + + for (int i = 0; i < total_blocks; i++) { + debug_assert(zone->get_used_blocks() == total_blocks - i); + zone->free_blocks(i, blks); + } + + blks = 2; + last_blk = -2; + debug_assert(zone->is_exhausted()); + for (int i = 0; i < total_blocks/2; i++) { + allocated = zone->alloc_blocks(blks, &start_blk); + debug_assert(last_blk + 2 == start_blk); + last_blk = start_blk; + } + + // Free different boundaries and allocate those + blks = 3; + debug_assert(zone->is_exhausted()); + zone->free_blocks(BmapEntry::size() - blks, blks); + zone->free_blocks(BmapEntry::size(), blks); + + allocated = zone->alloc_blocks(blks * 2, &start_blk); + debug_assert(BmapEntry::size() - blks == start_blk); + debug_assert(allocated == blks * 2); + + blks = 4; + zone->free_blocks(BmapEntry::size() * 2 - blks, 2 * blks); + allocated = zone->alloc_blocks(2 * blks, &start_blk); + debug_assert(BmapEntry::size() * 2 - blks == start_blk); + debug_assert(allocated == blks * 2); + + zone->reset_marker(); + blks = BmapEntry::size() * 2; + zone->free_blocks(BmapEntry::size() * 6 - blks, blks); + allocated = zone->alloc_blocks(blks, &start_blk); + debug_assert(BmapEntry::size() * 6 - blks == start_blk); + + // free blocks at distance 1, 2 up to 63 and allocate all of them + // together using disc alloc. + zone->reset_marker(); + blks = BmapEntry::size() * 2; + int num_bits = 1; + for (int i = 0; i < zone->size() / BmapEntry::size() -1; i++) { + + zone->free_blocks(i * BmapEntry::size(), num_bits); + num_bits++; + } + + zone->reset_marker(); + num_bits = 1; + int64_t start_block = 0; + for (int i = 0; i < zone->size() / BmapEntry::size() -1; i++) { + allocated = zone->alloc_blocks(num_bits, &start_block); + debug_assert(num_bits == allocated); + debug_assert(start_block == i * BmapEntry::size()); + num_bits++; + } + + start_block = 0; + num_bits = 1; + for (int i = 0; i < zone->size() / BmapEntry::size() -1; i++) { + zone->free_blocks(i * BmapEntry::size(), num_bits); + num_bits++; + } + + delete zone; + // non-conti allocations test + zone = new BitMapZone(0, total_blocks); + int64_t blocks[1024] = {0}; + for (int i = 0; i < zone->size(); i++) { + allocated = zone->alloc_blocks(1, &start_block); + debug_assert(allocated == 1); + } + for (int i = 0; i < zone->size(); i += 2) { + zone->free_blocks(i, 1); + } + + zone->reset_marker(); + allocated = zone->alloc_blocks_dis(zone->size() / 2, blocks); + debug_assert(allocated == zone->size() / 2); +} + +void test_bitmap_alloc() { + int64_t total_blocks = 1024 * 4; + int64_t zone_size = 1024; + int64_t allocated = 0; + int64_t start_block = 0; + + BitAllocator *alloc = new BitAllocator(total_blocks, zone_size, CONCURRENT); + + for (int64_t iter = 0; iter < 4; iter++) { + for (int64_t i = 0; i < total_blocks; i++) { + allocated = alloc->alloc_blocks(1, &start_block); + debug_assert(allocated == 1); + debug_assert(start_block == i); + } + + for (int64_t i = 0; i < total_blocks; i++) { + alloc->free_blocks(i, 1); + } + } + + for (int64_t iter = 0; iter < 4; iter++) { + for (int64_t i = 0; i < total_blocks / zone_size; i++) { + allocated = alloc->alloc_blocks(zone_size, &start_block); + debug_assert(allocated == zone_size); + debug_assert(start_block == i * zone_size); + } + + for (int64_t i = 0; i < total_blocks / zone_size; i++) { + alloc->free_blocks(i * zone_size, zone_size); + } + } + + allocated = alloc->alloc_blocks(1, &start_block); + debug_assert(allocated == 1); + + allocated = alloc->alloc_blocks(zone_size - 1, &start_block); + debug_assert(allocated == zone_size - 1); + debug_assert(start_block == 1); + + allocated = alloc->alloc_blocks(1, &start_block); + debug_assert(allocated == 1); + + allocated = alloc->alloc_blocks(zone_size, &start_block); + debug_assert(allocated == zone_size); + debug_assert(start_block == zone_size * 2); + + // Dis contiguous blocks allocations + delete alloc; + alloc = new BitAllocator(total_blocks, zone_size, CONCURRENT); + + int64_t blocks[2048] = {0}; + for (int64_t i = 0; i < alloc->size(); i++) { + allocated = alloc->alloc_blocks(1, &start_block); + debug_assert(allocated == 1); + } + for (int i = 0; i < alloc->size(); i += 2) { + alloc->free_blocks(i, 1); + } + + allocated = alloc->alloc_blocks_dis(alloc->size()/2, blocks); + debug_assert(allocated == alloc->size() / 2); + + allocated = alloc->alloc_blocks_dis(1, blocks); + debug_assert(allocated == 0); + + alloc->free_blocks(alloc->size() / 2, 1); + allocated = alloc->alloc_blocks_dis(1, blocks); + + debug_assert(allocated == 1); + debug_assert(blocks[0] == alloc->size()/2); + + delete alloc; + alloc = new BitAllocator(1024, zone_size, CONCURRENT, true); + + alloc->free_blocks(1, 1023); + alloc->alloc_blocks(16, &start_block); +} + +void +verify_blocks(int64_t num_blocks, int64_t *blocks) +{ + int64_t i = 0; + int wraps = 0; + for (i = 0; i < num_blocks - 1; i++) { + if (blocks[i] > blocks[i + 1]) { + wraps++; + debug_assert(wraps <= 1); + } + } +} + +__thread int my_tid; +__thread int64_t allocated_blocks[MAX_BLOCKS]; + +void +do_work(BitAllocator *alloc) +{ + int num_iters = 10; + int64_t alloced = 0; + int64_t start_block = -1; + uint64_t alloc_unit = 1; + int64_t num_blocks = alloc->size() / NUM_THREADS; + int total_alloced = 0; + + while (num_iters--) { + printf("Allocating in tid %d.\n", my_tid); + for (int i = 0; i < num_blocks; i++) { + alloced = alloc->alloc_blocks(1, &start_block); + debug_assert(alloced == 1); + total_alloced++; + allocated_blocks[i] = start_block; + } + + printf("Freeing in tid %d %d blocks.\n", my_tid, total_alloced); + for (int i = 0; i < num_blocks; i++) { + alloc->free_blocks(allocated_blocks[i], 1); + } + + total_alloced = 0; + printf("Done tid %d iter %d.\n", my_tid, num_iters); + } +} + +int tid = 0; +void * +worker(void *args) +{ + my_tid = __sync_fetch_and_add(&tid, 1); + BitAllocator *alloc = (BitAllocator *) args; + printf("Starting thread %d", my_tid); + do_work(alloc); + return NULL; +} + +void test_bmap_alloc_concurrent() +{ + + // Create an allocator + int64_t total_blocks = MAX_BLOCKS; + int64_t zone_size = 1024; + int64_t allocated = 0; + int64_t start_block = 0; + pthread_t pthreads[NUM_THREADS] = {0}; + + debug_assert(total_blocks <= MAX_BLOCKS); + + BitAllocator *alloc = new BitAllocator(total_blocks, zone_size, CONCURRENT); + + // Create N threads and each thread allocates at max + printf("Spawning %d threads for parallel test.....\n", NUM_THREADS); + + for (int j = 0; j < NUM_THREADS; j++) { + if (pthread_create(&pthreads[j], NULL, worker, alloc)) { + printf("Unable to create worker thread.\n"); + exit(0); + } + } + + for (int j = 0; j < NUM_THREADS; j++) { + pthread_join(pthreads[j], NULL); + } + + // max_blks / num threads and free those. Make sure threads + // always gets blocks + + // Do this with dis-contiguous and contiguous allocations + + + // do multithreaded allocation and check allocations are unique + +} + +int main() +{ + test_bmap_entry(); + test_bmap_iter(); + test_zone_alloc(); + test_bitmap_alloc(); + test_bmap_alloc_concurrent(); + + + printf("All tests done : SUCCESS.\n"); +} diff --git a/src/test/objectstore/CMakeLists.txt b/src/test/objectstore/CMakeLists.txt index aae937eaf02..54cf3ee1dc3 100644 --- a/src/test/objectstore/CMakeLists.txt +++ b/src/test/objectstore/CMakeLists.txt @@ -94,6 +94,13 @@ add_executable(unittest_rocksdb_option EXCLUDE_FROM_ALL add_ceph_unittest(unittest_rocksdb_option ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_rocksdb_option) target_link_libraries(unittest_rocksdb_option global os ${BLKID_LIBRARIES}) +# unittest_bit_alloc +add_executable(unittest_bit_alloc EXCLUDE_FROM_ALL + BitAllocator_test.cc + ) +add_ceph_unittest(unittes_bit_alloc ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_bit_alloc) +target_link_libraries(unittest_bit_alloc os global) + # unittest_bluefs add_executable(unittest_bluefs EXCLUDE_FROM_ALL test_bluefs.cc -- 2.47.3