: cct(cct),
bdev(MAX_BDEV),
ioc(MAX_BDEV),
- block_reserved(MAX_BDEV),
alloc(MAX_BDEV),
- alloc_size(MAX_BDEV, 0)
+ alloc_size(MAX_BDEV, 0),
+ locked_alloc(MAX_BDEV)
{
dirty.pending_release.resize(MAX_BDEV);
discard_cb[BDEV_WAL] = wal_discard_cb;
int BlueFS::add_block_device(unsigned id, const string& path, bool trim,
bluefs_shared_alloc_context_t* _shared_alloc)
{
- uint64_t reserved;
string dev_name;
switch(id) {
case BDEV_WAL:
case BDEV_NEWWAL:
- reserved = BDEV_LABEL_BLOCK_SIZE;
dev_name = "wal";
break;
case BDEV_DB:
case BDEV_NEWDB:
- reserved = SUPER_RESERVED;
dev_name = "db";
break;
case BDEV_SLOW:
- reserved = 0;
dev_name = "slow";
break;
default:
ceph_assert(false);
}
dout(10) << __func__ << " bdev " << id << " path " << path << " "
- << " reserved " << reserved << dendl;
+ << dendl;
ceph_assert(id < bdev.size());
ceph_assert(bdev[id] == NULL);
BlockDevice *b = BlockDevice::create(cct, path, NULL, NULL,
discard_cb[id], static_cast<void*>(this), dev_name.c_str());
- block_reserved[id] = reserved;
if (_shared_alloc) {
b->set_no_exclusive_lock();
}
return alloc[id]->get_free();
}
+uint64_t BlueFS::_get_minimal_reserved(unsigned id) const
+{
+ uint64_t reserved = 0;
+ switch(id) {
+ case BDEV_WAL:
+ case BDEV_NEWWAL:
+ reserved = BDEV_LABEL_BLOCK_SIZE;
+ break;
+ case BDEV_DB:
+ case BDEV_NEWDB:
+ reserved = SUPER_RESERVED;
+ break;
+ case BDEV_SLOW:
+ reserved = 0;
+ break;
+ default:
+ ceph_assert(false);
+ }
+ return reserved;
+}
+
+uint64_t BlueFS::get_full_reserved(unsigned id)
+{
+ if (!is_shared_alloc(id)) {
+ return locked_alloc[id].length + _get_minimal_reserved(id);
+ }
+ return 0;
+}
+
void BlueFS::dump_perf_counters(Formatter *f)
{
f->open_object_section("bluefs_perf_counters");
}
_init_logger();
- _init_alloc();
super.version = 0;
super.block_size = bdev[BDEV_DB]->get_block_size();
super.osd_uuid = osd_uuid;
super.uuid.generate_random();
- dout(1) << __func__ << " uuid " << super.uuid << dendl;
+
+ _init_alloc();
// init log
FileRef log_file = ceph::make_ref<File>();
super.log_fnode = log_file->fnode;
super.memorized_layout = layout;
_write_super(BDEV_DB);
+ dout(1) << __func__ << " super " << super << dendl;
_flush_bdev();
// clean up
continue;
}
ceph_assert(bdev[id]->get_size());
+ locked_alloc[id] = bluefs_extent_t();
+
if (is_shared_alloc(id)) {
dout(1) << __func__ << " shared, id " << id << std::hex
<< ", capacity 0x" << bdev[id]->get_size()
name += devnames[id];
else
name += to_string(uintptr_t(this));
- string alloc_type = cct->_conf->bluefs_allocator;
+ auto reserved = _get_minimal_reserved(id);
+ uint64_t locked_offs = 0;
+ {
+ // Try to lock tailing space at device if allocator controlled space
+ // isn't aligned with recommended alloc unit.
+ // Final decision whether locked tail to be maintained is made after
+ // BlueFS replay depending on existing allocations.
+ uint64_t size0 = _get_total(id);
+ uint64_t size = size0 - reserved;
+ size = p2align(size, alloc_size[id]) + reserved;
+ if (size < size0) {
+ locked_offs = size;
+ locked_alloc[id] = bluefs_extent_t(id, locked_offs, uint32_t(size0 - size));
+ }
+ }
+ string alloc_type = cct->_conf->bluefs_allocator;
dout(1) << __func__ << " new, id " << id << std::hex
<< ", allocator name " << name
<< ", allocator type " << alloc_type
<< ", capacity 0x" << bdev[id]->get_size()
- << ", reserved 0x" << block_reserved[id]
- << ", block size 0x" << alloc_size[id]
+ << ", reserved 0x" << reserved
+ << ", locked 0x" << locked_alloc[id].offset
+ << "~" << locked_alloc[id].length
+ << ", block size 0x" << bdev[id]->get_block_size()
+ << ", alloc unit 0x" << alloc_size[id]
<< std::dec << dendl;
alloc[id] = Allocator::create(cct, alloc_type,
bdev[id]->get_size(),
- alloc_size[id],
+ bdev[id]->get_block_size(),
name);
- auto reserved = block_reserved[id];
- alloc[id]->init_add_free(reserved, _get_total(id) - reserved);
+ uint64_t free_len = locked_offs ? locked_offs : _get_total(id) - reserved;
+ alloc[id]->init_add_free(reserved, free_len);
}
}
}
derr << __func__ << " failed to open super: " << cpp_strerror(r) << dendl;
goto out;
}
+ dout(5) << __func__ << " super: " << super << dendl;
// set volume selector if not provided before/outside
if (vselector == nullptr) {
shared_alloc->bluefs_used += q.length;
alloc[q.bdev]->init_rm_free(q.offset, q.length);
} else if (!is_shared) {
+ if (locked_alloc[q.bdev].length) {
+ auto locked_offs = locked_alloc[q.bdev].offset;
+ if (q.offset + q.length > locked_offs) {
+ // we already have allocated extents in locked range,
+ // do not enforce this lock then.
+ bluefs_extent_t dummy;
+ std::swap(locked_alloc[q.bdev], dummy);
+ alloc[q.bdev]->init_add_free(dummy.offset, dummy.length);
+ dout(1) << __func__ << std::hex
+ << " unlocked at " << q.bdev
+ << " 0x" << dummy.offset << "~" << dummy.length
+ << std::dec << dendl;
+ }
+ }
alloc[q.bdev]->init_rm_free(q.offset, q.length);
}
}
bool seen_recs = false;
boost::dynamic_bitset<uint64_t> used_blocks[MAX_BDEV];
+ bool check_allocations = cct->_conf->bluefs_log_replay_check_allocations;
if (!noop) {
- if (cct->_conf->bluefs_log_replay_check_allocations) {
+ if (check_allocations) {
for (size_t i = 0; i < MAX_BDEV; ++i) {
if (bdev[i] != nullptr) {
// let's use minimal allocation unit we can have
}
if (!noop) {
FileRef f = _get_file(fnode.ino);
- if (cct->_conf->bluefs_log_replay_check_allocations) {
+ if (check_allocations) {
int r = _check_allocations(f->fnode,
used_blocks, false, "OP_FILE_UPDATE");
if (r < 0) {
if (fnode.ino > ino_last) {
ino_last = fnode.ino;
}
- if (cct->_conf->bluefs_log_replay_check_allocations) {
+ if (check_allocations) {
int r = _check_allocations(f->fnode,
used_blocks, true, "OP_FILE_UPDATE");
if (r < 0) {
// be leanient, if there is no extents just produce error message
ceph_assert(delta.offset == fnode.allocated || delta.extents.empty());
}
- if (cct->_conf->bluefs_log_replay_check_allocations) {
+ if (check_allocations) {
int r = _check_allocations(fnode,
used_blocks, false, "OP_FILE_UPDATE_INC");
if (r < 0) {
if (fnode.ino > ino_last) {
ino_last = fnode.ino;
}
- if (cct->_conf->bluefs_log_replay_check_allocations) {
+ if (check_allocations) {
int r = _check_allocations(f->fnode,
used_blocks, true, "OP_FILE_UPDATE_INC");
if (r < 0) {
auto p = nodes.file_map.find(ino);
ceph_assert(p != nodes.file_map.end());
vselector->sub_usage(p->second->vselector_hint, p->second->fnode);
- if (cct->_conf->bluefs_log_replay_check_allocations) {
+ if (check_allocations) {
int r = _check_allocations(p->second->fnode,
used_blocks, false, "OP_FILE_REMOVE");
if (r < 0) {