From f64c236f278732b0fa211cd9e93c4f9d5a77a356 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Fri, 29 Dec 2017 20:59:16 +0300 Subject: [PATCH] os/bluestore: refactor FreeListManager to get clearer view on the number of alloc units it tracks. This also fixes out-of-range access for fsck's used_blocks bitmap that might happen when checking stores created prior to v12.2.2 Fixes http://tracker.ceph.com/issues/22535 Signed-off-by: Igor Fedotov --- src/os/bluestore/BitmapFreelistManager.cc | 14 ++++----- src/os/bluestore/BitmapFreelistManager.h | 10 ++++++- src/os/bluestore/BlueStore.cc | 29 +++++++++++------- src/os/bluestore/BlueStore.h | 1 + src/os/bluestore/FreelistManager.h | 6 +++- src/os/bluestore/StupidAllocator.cc | 2 +- src/test/objectstore/store_test.cc | 36 +++++++++++++++++++++++ 7 files changed, 76 insertions(+), 22 deletions(-) diff --git a/src/os/bluestore/BitmapFreelistManager.cc b/src/os/bluestore/BitmapFreelistManager.cc index ad6717be50d..4a0e719ebe5 100644 --- a/src/os/bluestore/BitmapFreelistManager.cc +++ b/src/os/bluestore/BitmapFreelistManager.cc @@ -58,11 +58,10 @@ BitmapFreelistManager::BitmapFreelistManager(CephContext* cct, { } -int BitmapFreelistManager::create(uint64_t new_size, uint64_t min_alloc_size, +int BitmapFreelistManager::create(uint64_t new_size, uint64_t granularity, KeyValueDB::Transaction txn) { - bytes_per_block = std::max(cct->_conf->bdev_block_size, - (int64_t)min_alloc_size); + bytes_per_block = granularity; assert(ISP2(bytes_per_block)); size = P2ALIGN(new_size, bytes_per_block); blocks_per_key = cct->_conf->bluestore_freelist_blocks_per_key; @@ -291,8 +290,8 @@ bool BitmapFreelistManager::enumerate_next(uint64_t *offset, uint64_t *length) << enumerate_offset << " bit 0x" << enumerate_bl_pos << " offset 0x" << end << std::dec << dendl; + end = std::min(get_alloc_units() * bytes_per_block, end); *length = end - *offset; - assert((*offset + *length) <= size); dout(10) << __func__ << std::hex << " 0x" << *offset << "~" << *length << std::dec << dendl; return true; @@ -312,14 +311,13 @@ bool BitmapFreelistManager::enumerate_next(uint64_t *offset, uint64_t *length) } } - end = size; - if (enumerate_offset < end) { + if (enumerate_offset < size) { + end = get_alloc_units() * bytes_per_block; *length = end - *offset; dout(10) << __func__ << std::hex << " 0x" << *offset << "~" << *length << std::dec << dendl; - enumerate_offset = end; + enumerate_offset = size; enumerate_bl_pos = blocks_per_key; - assert((*offset + *length) <= size); return true; } diff --git a/src/os/bluestore/BitmapFreelistManager.h b/src/os/bluestore/BitmapFreelistManager.h index cb10c63d98a..65e9f5d925f 100644 --- a/src/os/bluestore/BitmapFreelistManager.h +++ b/src/os/bluestore/BitmapFreelistManager.h @@ -51,7 +51,7 @@ public: static void setup_merge_operator(KeyValueDB *db, string prefix); - int create(uint64_t size, uint64_t min_alloc_size, + int create(uint64_t size, uint64_t granularity, KeyValueDB::Transaction txn) override; int init() override; @@ -68,6 +68,14 @@ public: void release( uint64_t offset, uint64_t length, KeyValueDB::Transaction txn) override; + + inline uint64_t get_alloc_units() const override { + return size / bytes_per_block; + } + inline uint64_t get_alloc_size() const override { + return bytes_per_block; + } + }; #endif diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 9fef634068e..6329c2f1612 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -4241,7 +4241,10 @@ int BlueStore::_open_fm(bool create) bl.append(freelist_type); t->set(PREFIX_SUPER, "freelist_type", bl); } - fm->create(bdev->get_size(), min_alloc_size, t); + // being able to allocate in units less than bdev block size + // seems to be a bad idea. + assert( cct->_conf->bdev_block_size <= (int64_t)min_alloc_size); + fm->create(bdev->get_size(), (int64_t)min_alloc_size, t); // allocate superblock reserved space. note that we do not mark // bluefs space as allocated in the freelist; we instead rely on @@ -5575,6 +5578,7 @@ int BlueStore::_fsck_check_extents( const PExtentVector& extents, bool compressed, mempool_dynamic_bitset &used_blocks, + uint64_t granularity, store_statfs_t& expected_statfs) { dout(30) << __func__ << " oid " << oid << " extents " << extents << dendl; @@ -5588,7 +5592,7 @@ int BlueStore::_fsck_check_extents( } bool already = false; apply( - e.offset, e.length, min_alloc_size, used_blocks, + e.offset, e.length, granularity, used_blocks, [&](uint64_t pos, mempool_dynamic_bitset &bs) { if (bs.test(pos)) already = true; @@ -5694,9 +5698,9 @@ int BlueStore::_fsck(bool deep, bool repair) if (r < 0) goto out_scan; - used_blocks.resize(bdev->get_size() / min_alloc_size); + used_blocks.resize(fm->get_alloc_units()); apply( - 0, MAX(min_alloc_size, SUPER_RESERVED), min_alloc_size, used_blocks, + 0, MAX(min_alloc_size, SUPER_RESERVED), fm->get_alloc_size(), used_blocks, [&](uint64_t pos, mempool_dynamic_bitset &bs) { bs.set(pos); } @@ -5705,7 +5709,7 @@ int BlueStore::_fsck(bool deep, bool repair) if (bluefs) { for (auto e = bluefs_extents.begin(); e != bluefs_extents.end(); ++e) { apply( - e.get_start(), e.get_len(), min_alloc_size, used_blocks, + e.get_start(), e.get_len(), fm->get_alloc_size(), used_blocks, [&](uint64_t pos, mempool_dynamic_bitset &bs) { bs.set(pos); } @@ -5993,6 +5997,7 @@ int BlueStore::_fsck(bool deep, bool repair) errors += _fsck_check_extents(oid, blob.get_extents(), blob.is_compressed(), used_blocks, + fm->get_alloc_size(), expected_statfs); } } @@ -6057,7 +6062,9 @@ int BlueStore::_fsck(bool deep, bool repair) errors += _fsck_check_extents(p->second.oids.front(), extents, p->second.compressed, - used_blocks, expected_statfs); + used_blocks, + fm->get_alloc_size(), + expected_statfs); sb_info.erase(p); } } @@ -6119,7 +6126,7 @@ int BlueStore::_fsck(bool deep, bool repair) << " released 0x" << std::hex << wt.released << std::dec << dendl; for (auto e = wt.released.begin(); e != wt.released.end(); ++e) { apply( - e.get_start(), e.get_len(), min_alloc_size, used_blocks, + e.get_start(), e.get_len(), fm->get_alloc_size(), used_blocks, [&](uint64_t pos, mempool_dynamic_bitset &bs) { bs.set(pos); } @@ -6134,7 +6141,7 @@ int BlueStore::_fsck(bool deep, bool repair) // know they are allocated. for (auto e = bluefs_extents.begin(); e != bluefs_extents.end(); ++e) { apply( - e.get_start(), e.get_len(), min_alloc_size, used_blocks, + e.get_start(), e.get_len(), fm->get_alloc_size(), used_blocks, [&](uint64_t pos, mempool_dynamic_bitset &bs) { bs.reset(pos); } @@ -6145,7 +6152,7 @@ int BlueStore::_fsck(bool deep, bool repair) while (fm->enumerate_next(&offset, &length)) { bool intersects = false; apply( - offset, length, min_alloc_size, used_blocks, + offset, length, fm->get_alloc_size(), used_blocks, [&](uint64_t pos, mempool_dynamic_bitset &bs) { if (bs.test(pos)) { intersects = true; @@ -6186,8 +6193,8 @@ int BlueStore::_fsck(bool deep, bool repair) size_t next = used_blocks.find_next(cur); if (next != cur + 1) { derr << "fsck error: leaked extent 0x" << std::hex - << ((uint64_t)start * min_alloc_size) << "~" - << ((cur + 1 - start) * min_alloc_size) << std::dec + << ((uint64_t)start * fm->get_alloc_size()) << "~" + << ((cur + 1 - start) * fm->get_alloc_size()) << std::dec << dendl; start = next; break; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index e33fe062ea9..0642deb619f 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2068,6 +2068,7 @@ private: const PExtentVector& extents, bool compressed, mempool_dynamic_bitset &used_blocks, + uint64_t granularity, store_statfs_t& expected_statfs); void _buffer_cache_write( diff --git a/src/os/bluestore/FreelistManager.h b/src/os/bluestore/FreelistManager.h index 8f7aacbf2e1..6603062ef16 100644 --- a/src/os/bluestore/FreelistManager.h +++ b/src/os/bluestore/FreelistManager.h @@ -24,7 +24,7 @@ public: static void setup_merge_operators(KeyValueDB *db); - virtual int create(uint64_t size, uint64_t min_alloc_size, + virtual int create(uint64_t size, uint64_t granularity, KeyValueDB::Transaction txn) = 0; virtual int init() = 0; @@ -41,6 +41,10 @@ public: virtual void release( uint64_t offset, uint64_t length, KeyValueDB::Transaction txn) = 0; + + virtual uint64_t get_alloc_units() const = 0; + virtual uint64_t get_alloc_size() const = 0; + }; diff --git a/src/os/bluestore/StupidAllocator.cc b/src/os/bluestore/StupidAllocator.cc index fee4f0b31d9..ab2e562be3b 100644 --- a/src/os/bluestore/StupidAllocator.cc +++ b/src/os/bluestore/StupidAllocator.cc @@ -8,7 +8,7 @@ #define dout_context cct #define dout_subsys ceph_subsys_bluestore #undef dout_prefix -#define dout_prefix *_dout << "stupidalloc " +#define dout_prefix *_dout << "stupidalloc 0x" << this << " " StupidAllocator::StupidAllocator(CephContext* cct) : cct(cct), num_free(0), diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index fb07b695274..1a59c22abe0 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -6732,6 +6732,42 @@ TEST_P(StoreTestSpecificAUSize, garbageCollection) { } #endif +TEST_P(StoreTestSpecificAUSize, fsckOnUnalignedDevice) { + if (string(GetParam()) != "bluestore") + return; + + g_conf->set_val("bluestore_block_size", stringify(0x280005000)); //10 Gb + 4K + g_conf->set_val("bluestore_fsck_on_mount", "false"); + g_conf->set_val("bluestore_fsck_on_umount", "false"); + StartDeferred(0x4000); + store->umount(); + ASSERT_EQ(store->fsck(false), 0); // do fsck explicitly + store->mount(); + + g_conf->set_val("bluestore_fsck_on_mount", "true"); + g_conf->set_val("bluestore_fsck_on_umount", "true"); + g_conf->set_val("bluestore_block_size", stringify(0x280000000)); // 10 Gb + g_conf->apply_changes(NULL); +} + +TEST_P(StoreTestSpecificAUSize, fsckOnUnalignedDevice2) { + if (string(GetParam()) != "bluestore") + return; + + g_conf->set_val("bluestore_block_size", stringify(0x280005000)); //10 Gb + 20K + g_conf->set_val("bluestore_fsck_on_mount", "false"); + g_conf->set_val("bluestore_fsck_on_umount", "false"); + StartDeferred(0x1000); + store->umount(); + ASSERT_EQ(store->fsck(false), 0); // do fsck explicitly + store->mount(); + + g_conf->set_val("bluestore_block_size", stringify(0x280000000)); // 10 Gb + g_conf->set_val("bluestore_fsck_on_mount", "true"); + g_conf->set_val("bluestore_fsck_on_umount", "true"); + g_conf->apply_changes(NULL); +} + int main(int argc, char **argv) { vector args; argv_to_vec(argc, (const char **)argv, args); -- 2.39.5