of alloc units it tracks.
This also fixes out-of-range access for fsck's used_blocks bitmap that
might happen when checking stores created prior to v12.2.2
Fixes http://tracker.ceph.com/issues/22535
Signed-off-by: Igor Fedotov <ifedotov@suse.com>
{
}
-int BitmapFreelistManager::create(uint64_t new_size, uint64_t min_alloc_size,
+int BitmapFreelistManager::create(uint64_t new_size, uint64_t granularity,
KeyValueDB::Transaction txn)
{
- bytes_per_block = std::max(cct->_conf->bdev_block_size,
- (int64_t)min_alloc_size);
+ bytes_per_block = granularity;
assert(ISP2(bytes_per_block));
size = P2ALIGN(new_size, bytes_per_block);
blocks_per_key = cct->_conf->bluestore_freelist_blocks_per_key;
<< enumerate_offset << " bit 0x" << enumerate_bl_pos
<< " offset 0x" << end << std::dec
<< dendl;
+ end = std::min(get_alloc_units() * bytes_per_block, end);
*length = end - *offset;
- assert((*offset + *length) <= size);
dout(10) << __func__ << std::hex << " 0x" << *offset << "~" << *length
<< std::dec << dendl;
return true;
}
}
- end = size;
- if (enumerate_offset < end) {
+ if (enumerate_offset < size) {
+ end = get_alloc_units() * bytes_per_block;
*length = end - *offset;
dout(10) << __func__ << std::hex << " 0x" << *offset << "~" << *length
<< std::dec << dendl;
- enumerate_offset = end;
+ enumerate_offset = size;
enumerate_bl_pos = blocks_per_key;
- assert((*offset + *length) <= size);
return true;
}
static void setup_merge_operator(KeyValueDB *db, string prefix);
- int create(uint64_t size, uint64_t min_alloc_size,
+ int create(uint64_t size, uint64_t granularity,
KeyValueDB::Transaction txn) override;
int init() override;
void release(
uint64_t offset, uint64_t length,
KeyValueDB::Transaction txn) override;
+
+ inline uint64_t get_alloc_units() const override {
+ return size / bytes_per_block;
+ }
+ inline uint64_t get_alloc_size() const override {
+ return bytes_per_block;
+ }
+
};
#endif
bl.append(freelist_type);
t->set(PREFIX_SUPER, "freelist_type", bl);
}
- fm->create(bdev->get_size(), min_alloc_size, t);
+ // being able to allocate in units less than bdev block size
+ // seems to be a bad idea.
+ assert( cct->_conf->bdev_block_size <= (int64_t)min_alloc_size);
+ fm->create(bdev->get_size(), (int64_t)min_alloc_size, t);
// allocate superblock reserved space. note that we do not mark
// bluefs space as allocated in the freelist; we instead rely on
const PExtentVector& extents,
bool compressed,
mempool_dynamic_bitset &used_blocks,
+ uint64_t granularity,
store_statfs_t& expected_statfs)
{
dout(30) << __func__ << " oid " << oid << " extents " << extents << dendl;
}
bool already = false;
apply(
- e.offset, e.length, min_alloc_size, used_blocks,
+ e.offset, e.length, granularity, used_blocks,
[&](uint64_t pos, mempool_dynamic_bitset &bs) {
if (bs.test(pos))
already = true;
if (r < 0)
goto out_scan;
- used_blocks.resize(bdev->get_size() / min_alloc_size);
+ used_blocks.resize(fm->get_alloc_units());
apply(
- 0, MAX(min_alloc_size, SUPER_RESERVED), min_alloc_size, used_blocks,
+ 0, MAX(min_alloc_size, SUPER_RESERVED), fm->get_alloc_size(), used_blocks,
[&](uint64_t pos, mempool_dynamic_bitset &bs) {
bs.set(pos);
}
if (bluefs) {
for (auto e = bluefs_extents.begin(); e != bluefs_extents.end(); ++e) {
apply(
- e.get_start(), e.get_len(), min_alloc_size, used_blocks,
+ e.get_start(), e.get_len(), fm->get_alloc_size(), used_blocks,
[&](uint64_t pos, mempool_dynamic_bitset &bs) {
bs.set(pos);
}
errors += _fsck_check_extents(oid, blob.get_extents(),
blob.is_compressed(),
used_blocks,
+ fm->get_alloc_size(),
expected_statfs);
}
}
errors += _fsck_check_extents(p->second.oids.front(),
extents,
p->second.compressed,
- used_blocks, expected_statfs);
+ used_blocks,
+ fm->get_alloc_size(),
+ expected_statfs);
sb_info.erase(p);
}
}
<< " released 0x" << std::hex << wt.released << std::dec << dendl;
for (auto e = wt.released.begin(); e != wt.released.end(); ++e) {
apply(
- e.get_start(), e.get_len(), min_alloc_size, used_blocks,
+ e.get_start(), e.get_len(), fm->get_alloc_size(), used_blocks,
[&](uint64_t pos, mempool_dynamic_bitset &bs) {
bs.set(pos);
}
// know they are allocated.
for (auto e = bluefs_extents.begin(); e != bluefs_extents.end(); ++e) {
apply(
- e.get_start(), e.get_len(), min_alloc_size, used_blocks,
+ e.get_start(), e.get_len(), fm->get_alloc_size(), used_blocks,
[&](uint64_t pos, mempool_dynamic_bitset &bs) {
bs.reset(pos);
}
while (fm->enumerate_next(&offset, &length)) {
bool intersects = false;
apply(
- offset, length, min_alloc_size, used_blocks,
+ offset, length, fm->get_alloc_size(), used_blocks,
[&](uint64_t pos, mempool_dynamic_bitset &bs) {
if (bs.test(pos)) {
intersects = true;
size_t next = used_blocks.find_next(cur);
if (next != cur + 1) {
derr << "fsck error: leaked extent 0x" << std::hex
- << ((uint64_t)start * min_alloc_size) << "~"
- << ((cur + 1 - start) * min_alloc_size) << std::dec
+ << ((uint64_t)start * fm->get_alloc_size()) << "~"
+ << ((cur + 1 - start) * fm->get_alloc_size()) << std::dec
<< dendl;
start = next;
break;
const PExtentVector& extents,
bool compressed,
mempool_dynamic_bitset &used_blocks,
+ uint64_t granularity,
store_statfs_t& expected_statfs);
void _buffer_cache_write(
static void setup_merge_operators(KeyValueDB *db);
- virtual int create(uint64_t size, uint64_t min_alloc_size,
+ virtual int create(uint64_t size, uint64_t granularity,
KeyValueDB::Transaction txn) = 0;
virtual int init() = 0;
virtual void release(
uint64_t offset, uint64_t length,
KeyValueDB::Transaction txn) = 0;
+
+ virtual uint64_t get_alloc_units() const = 0;
+ virtual uint64_t get_alloc_size() const = 0;
+
};
#define dout_context cct
#define dout_subsys ceph_subsys_bluestore
#undef dout_prefix
-#define dout_prefix *_dout << "stupidalloc "
+#define dout_prefix *_dout << "stupidalloc 0x" << this << " "
StupidAllocator::StupidAllocator(CephContext* cct)
: cct(cct), num_free(0),
}
#endif
+TEST_P(StoreTestSpecificAUSize, fsckOnUnalignedDevice) {
+ if (string(GetParam()) != "bluestore")
+ return;
+
+ g_conf->set_val("bluestore_block_size", stringify(0x280005000)); //10 Gb + 4K
+ g_conf->set_val("bluestore_fsck_on_mount", "false");
+ g_conf->set_val("bluestore_fsck_on_umount", "false");
+ StartDeferred(0x4000);
+ store->umount();
+ ASSERT_EQ(store->fsck(false), 0); // do fsck explicitly
+ store->mount();
+
+ g_conf->set_val("bluestore_fsck_on_mount", "true");
+ g_conf->set_val("bluestore_fsck_on_umount", "true");
+ g_conf->set_val("bluestore_block_size", stringify(0x280000000)); // 10 Gb
+ g_conf->apply_changes(NULL);
+}
+
+TEST_P(StoreTestSpecificAUSize, fsckOnUnalignedDevice2) {
+ if (string(GetParam()) != "bluestore")
+ return;
+
+ g_conf->set_val("bluestore_block_size", stringify(0x280005000)); //10 Gb + 20K
+ g_conf->set_val("bluestore_fsck_on_mount", "false");
+ g_conf->set_val("bluestore_fsck_on_umount", "false");
+ StartDeferred(0x1000);
+ store->umount();
+ ASSERT_EQ(store->fsck(false), 0); // do fsck explicitly
+ store->mount();
+
+ g_conf->set_val("bluestore_block_size", stringify(0x280000000)); // 10 Gb
+ g_conf->set_val("bluestore_fsck_on_mount", "true");
+ g_conf->set_val("bluestore_fsck_on_umount", "true");
+ g_conf->apply_changes(NULL);
+}
+
int main(int argc, char **argv) {
vector<const char*> args;
argv_to_vec(argc, (const char **)argv, args);