Allocator::Allocator(std::string_view name,
int64_t _capacity,
int64_t _block_size)
- : device_size(_capacity), block_size(_block_size)
+ : device_size(_capacity),
+ block_size(_block_size)
{
asok_hook = new SocketHook(this, name);
}
return asok_hook->name;
}
-Allocator *Allocator::create(CephContext* cct, std::string_view type,
- int64_t size, int64_t block_size, std::string_view name)
+Allocator *Allocator::create(
+ CephContext* cct,
+ std::string_view type,
+ int64_t size,
+ int64_t block_size,
+ int64_t zone_size,
+ int64_t first_sequential_zone,
+ std::string_view name)
{
Allocator* alloc = nullptr;
if (type == "stupid") {
name);
#ifdef HAVE_LIBZBD
} else if (type == "zoned") {
- return new ZonedAllocator(cct, size, block_size, name);
+ return new ZonedAllocator(cct, size, block_size, zone_size, first_sequential_zone,
+ name);
#endif
}
if (alloc == nullptr) {
virtual double get_fragmentation_score();
virtual void shutdown() = 0;
- static Allocator *create(CephContext* cct, std::string_view type, int64_t size,
- int64_t block_size, const std::string_view name = "");
+ static Allocator *create(
+ CephContext* cct,
+ std::string_view type,
+ int64_t size,
+ int64_t block_size,
+ int64_t zone_size = 0,
+ int64_t firs_sequential_zone = 0,
+ const std::string_view name = ""
+ );
const std::string& get_name() const;
}
int BitmapFreelistManager::create(uint64_t new_size, uint64_t granularity,
+ uint64_t zone_size, uint64_t first_sequential_zone,
KeyValueDB::Transaction txn)
{
bytes_per_block = granularity;
static void setup_merge_operator(KeyValueDB *db, std::string prefix);
int create(uint64_t size, uint64_t granularity,
+ uint64_t zone_size, uint64_t first_sequential_zone,
KeyValueDB::Transaction txn) override;
int init(KeyValueDB *kvdb, bool db_in_read_only,
<< std::dec << dendl;
alloc[id] = Allocator::create(cct, cct->_conf->bluefs_allocator,
bdev[id]->get_size(),
- alloc_size[id], name);
+ alloc_size[id],
+ 0, 0,
+ name);
alloc[id]->init_add_free(
block_reserved[id],
_get_total(id));
<< dendl;
return -EINVAL;
}
- alloc_size = _zoned_piggyback_device_parameters_onto(alloc_size);
} else
#endif
if (freelist_type == "zoned") {
return -EINVAL;
}
- fm->create(bdev->get_size(), alloc_size, t);
+ fm->create(bdev->get_size(), alloc_size,
+ zone_size, first_sequential_zone,
+ t);
+
+ // allocate superblock reserved space. note that we do not mark
+ // bluefs space as allocated in the freelist; we instead rely on
+ // bluefs doing that itself.
auto reserved = _get_ondisk_reserved();
if (fm_restore) {
// we need to allocate the full space in restore case
<< "Please set to 0." << dendl;
return -EINVAL;
}
-
- alloc_size = _zoned_piggyback_device_parameters_onto(alloc_size);
}
#endif
- shared_alloc.set(Allocator::create(cct, allocator_type,
- bdev->get_size(),
- alloc_size, "block"));
+ shared_alloc.set(
+ Allocator::create(
+ cct, allocator_type,
+ bdev->get_size(),
+ alloc_size,
+ zone_size,
+ first_sequential_zone,
+ "block"));
if (!shared_alloc.a) {
lderr(cct) << __func__ << " failed to create " << allocator_type << " allocator"
#ifdef HAVE_LIBZBD
if (bdev->is_smr()) {
freelist_type = "zoned";
+ zone_size = bdev->get_zone_size();
+ first_sequential_zone = bdev->get_conventional_region_size() / zone_size;
} else
#endif
{
bl.append(stringify(OMAP_PER_PG));
t->set(PREFIX_SUPER, "per_pool_omap", bl);
}
+
+#ifdef HAVE_LIBZBD
+ if (bdev->is_smr()) {
+ {
+ bufferlist bl;
+ encode((uint64_t)zone_size, bl);
+ t->set(PREFIX_SUPER, "zone_size", bl);
+ }
+ {
+ bufferlist bl;
+ encode((uint64_t)first_sequential_zone, bl);
+ t->set(PREFIX_SUPER, "first_sequential_zone", bl);
+ }
+ }
+#endif
+
ondisk_format = latest_ondisk_format;
_prepare_ondisk_format_super(t);
db->submit_transaction_sync(t);
<< std::dec << dendl;
}
+ // smr fields
+ {
+ bufferlist bl;
+ int r = db->get(PREFIX_SUPER, "zone_size", &bl);
+ if (r >= 0) {
+ auto p = bl.cbegin();
+ decode(zone_size, p);
+ dout(1) << __func__ << " zone_size 0x" << std::hex << zone_size << std::dec << dendl;
+ }
+ }
+ {
+ bufferlist bl;
+ int r = db->get(PREFIX_SUPER, "first_sequential_zone", &bl);
+ if (r >= 0) {
+ auto p = bl.cbegin();
+ decode(first_sequential_zone, p);
+ dout(1) << __func__ << " first_sequential_zone 0x" << std::hex
+ << first_sequential_zone << std::dec << dendl;
+ }
+ }
+
_set_per_pool_omap();
_open_statfs();
return zone_key + object_key;
}
-// For now, to avoid interface changes we piggyback zone_size (in MiB) and the
-// first sequential zone number onto min_alloc_size and pass it to functions
-// Allocator::create and FreelistManager::create.
-uint64_t BlueStore::_zoned_piggyback_device_parameters_onto(uint64_t min_alloc_size) {
- uint64_t zone_size = bdev->get_zone_size();
- uint64_t zone_size_mb = zone_size / (1024 * 1024);
- uint64_t first_seq_zone = bdev->get_conventional_region_size() / zone_size;
- min_alloc_size |= (zone_size_mb << 32);
- min_alloc_size |= (first_seq_zone << 48);
- return min_alloc_size;
-}
-
#endif
void BlueStore::_txc_finalize_kv(TransContext *txc, KeyValueDB::Transaction t)
Allocator* BlueStore::create_bitmap_allocator(uint64_t bdev_size) {
// create allocator
uint64_t alloc_size = min_alloc_size;
- Allocator* alloc = Allocator::create(cct, "bitmap", bdev_size, alloc_size, "recovery");
+ Allocator* alloc = Allocator::create(cct, "bitmap", bdev_size, alloc_size,
+ zone_size, first_sequential_zone,
+ "recovery");
if (alloc) {
return alloc;
} else {
std::numeric_limits<decltype(min_alloc_size)>::digits,
"not enough bits for min_alloc_size");
+ // smr-only
+ uint64_t zone_size = 0; ///< number of SMR zones
+ uint64_t first_sequential_zone = 0; ///< first SMR zone that is sequential-only
+
enum {
// Please preserve the order since it's DB persistent
OMAP_BULK = 0,
#ifdef HAVE_LIBZBD
// Functions related to zoned storage.
- uint64_t _zoned_piggyback_device_parameters_onto(uint64_t min_alloc_size);
void _zoned_update_cleaning_metadata(TransContext *txc);
std::string _zoned_key(uint64_t offset, const ghobject_t *oid);
#endif
static void setup_merge_operators(KeyValueDB *db, const std::string &type);
virtual int create(uint64_t size, uint64_t granularity,
+ uint64_t zone_size, uint64_t first_sequential_zone,
KeyValueDB::Transaction txn) = 0;
virtual int init(KeyValueDB *kvdb, bool db_in_read_only,
ZonedAllocator::ZonedAllocator(CephContext* cct,
int64_t size,
int64_t blk_size,
+ int64_t _zone_size,
+ int64_t _first_sequential_zone,
std::string_view name)
- : Allocator(name, size, blk_size & 0x00000000ffffffff),
+ : Allocator(name, size, blk_size),
cct(cct),
num_free(0),
size(size),
- // To avoid interface changes, we piggyback zone size and the first
- // sequential zone number onto the first 32 bits of 64-bit |blk_size|.
- // The last 32 bits of |blk_size| is holding the actual block size.
- block_size((blk_size & 0x00000000ffffffff)),
- zone_size(((blk_size & 0x0000ffff00000000) >> 32) * 1024 * 1024),
- first_seq_zone_num((blk_size >> 48) & 0xffff),
+ block_size(blk_size),
+ zone_size(_zone_size),
+ first_seq_zone_num(_first_sequential_zone),
starting_zone_num(first_seq_zone_num),
num_zones(size / zone_size),
num_zones_to_clean(0) {
public:
ZonedAllocator(CephContext* cct, int64_t size, int64_t block_size,
- std::string_view name);
+ int64_t _zone_size,
+ int64_t _first_sequential_zone,
+ std::string_view name);
~ZonedAllocator() override;
const char *get_type() const override {
int ZonedFreelistManager::create(
uint64_t new_size,
uint64_t granularity,
+ uint64_t new_zone_size,
+ uint64_t first_sequential_zone,
KeyValueDB::Transaction txn) {
- // To avoid interface changes, we piggyback zone size and the first sequential
- // zone number onto the first 32 bits of 64-bit |granularity|. The last 32
- // bits of |granularity| is holding the actual allocation granularity, which
- // is bytes_per_block.
size = new_size;
- bytes_per_block = granularity & 0x00000000ffffffff;
- zone_size = ((granularity & 0x0000ffff00000000) >> 32) * 1024 * 1024;
+ bytes_per_block = granularity;
+ zone_size = new_zone_size;
num_zones = size / zone_size;
- starting_zone_num = (granularity & 0xffff000000000000) >> 48;
+ starting_zone_num = first_sequential_zone;
enumerate_zone_num = ~0UL;
ceph_assert(size % zone_size == 0);
int create(uint64_t size,
uint64_t granularity,
+ uint64_t zone_size,
+ uint64_t first_sequential_zone,
KeyValueDB::Transaction txn) override;
int init(KeyValueDB *kvdb,
void init_alloc(int64_t size, uint64_t min_alloc_size) {
std::cout << "Creating alloc type " << string(GetParam()) << " \n";
alloc.reset(Allocator::create(g_ceph_context, GetParam(), size,
- min_alloc_size));
+ min_alloc_size,
+ 256*1048576, 100*256*1048576ull));
}
void init_close() {
unique_ptr<Allocator> alloc;
alloc.reset(Allocator::create(g_ceph_context, alloc_type,
- capacity, alloc_unit, alloc_name));
+ capacity, alloc_unit, 0, 0, alloc_name));
auto it = o->find_first();
while (!it.end()) {