From: Igor Fedotov Date: Thu, 10 Oct 2019 11:25:57 +0000 (+0300) Subject: os/bluestore: fix bluefs migrate/expand to match single allocator. X-Git-Tag: wip-pdonnell-testing-20200918.022351~473^2~4 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=9a8f1ae49254b35704e564784ffa3bbd99e443a9;p=ceph-ci.git os/bluestore: fix bluefs migrate/expand to match single allocator. Signed-off-by: Igor Fedotov --- diff --git a/qa/standalone/osd/osd-bluefs-volume-ops.sh b/qa/standalone/osd/osd-bluefs-volume-ops.sh index 1c9c5cf2a99..86da6af5c32 100755 --- a/qa/standalone/osd/osd-bluefs-volume-ops.sh +++ b/qa/standalone/osd/osd-bluefs-volume-ops.sh @@ -16,8 +16,6 @@ function run() { CEPH_ARGS+="--bluestore_block_db_create=true " CEPH_ARGS+="--bluestore_block_db_size=1073741824 " CEPH_ARGS+="--bluestore_block_wal_size=536870912 " - CEPH_ARGS+="--bluestore_bluefs_min=536870912 " - CEPH_ARGS+="--bluestore_bluefs_min_free=536870912 " CEPH_ARGS+="--bluestore_block_wal_create=true " CEPH_ARGS+="--bluestore_fsck_on_mount=true " local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index eff7ae62c19..201e8c0115b 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -368,10 +368,11 @@ uint64_t BlueFS::get_block_device_size(unsigned id) return 0; } -void BlueFS::_add_block_extent(unsigned id, uint64_t offset, uint64_t length, - bool skip) +void BlueFS::_add_block_extent(bool create, unsigned id, uint64_t offset, + uint64_t length, bool skip) { dout(1) << __func__ << " bdev " << id + << " create " << create << " 0x" << std::hex << offset << "~" << length << std::dec << " skip " << skip << dendl; @@ -381,10 +382,14 @@ void BlueFS::_add_block_extent(unsigned id, uint64_t offset, uint64_t length, ceph_assert(bdev[id]->get_size() >= offset + length); block_all[id].insert(offset, length); - if (id < alloc.size() && alloc[id] && alloc[id] != shared_bdev_alloc) { + if (!create) { + ceph_assert(id < alloc.size()); + ceph_assert(alloc[id]); if (!skip) log_t.op_alloc_add(id, offset, length); - alloc[id]->init_add_free(offset, length); + if (alloc[id] != shared_bdev_alloc) { + alloc[id]->init_add_free(offset, length); + } } dout(10) << __func__ << " done" << dendl; @@ -406,12 +411,30 @@ uint64_t BlueFS::get_used() uint64_t used = 0; for (unsigned id = 0; id < MAX_BDEV; ++id) { if (alloc[id]) { - used += block_all[id].size() - alloc[id]->get_free(); + if (alloc[id] != shared_bdev_alloc) { + used += block_all[id].size() - alloc[id]->get_free(); + } else { + used += shared_bdev_used; + } } } return used; } +uint64_t BlueFS::get_used(unsigned id) +{ + ceph_assert(id < alloc.size()); + ceph_assert(alloc[id]); + std::lock_guard l(lock); + uint64_t used = 0; + if (alloc[id] != shared_bdev_alloc) { + used = block_all[id].size() - alloc[id]->get_free(); + } else { + used += shared_bdev_used; + } + return used; +} + uint64_t BlueFS::get_total(unsigned id) { std::lock_guard l(lock); @@ -673,7 +696,6 @@ int BlueFS::mount() } // init freelist - dout(1) << __func__ << " shared_bdev_used = " << shared_bdev_used << dendl; for (auto& p : file_map) { dout(30) << __func__ << " noting alloc for " << p.second->fnode << dendl; for (auto& q : p.second->fnode.extents) { diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 7c4717ef463..0a2fe38c5b0 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -418,8 +418,8 @@ private: return 4096; } - void _add_block_extent(unsigned bdev, uint64_t offset, uint64_t len, - bool skip=false); + void _add_block_extent(bool create, unsigned bdev, uint64_t offset, + uint64_t len, bool skip=false); public: BlueFS(CephContext* cct); @@ -455,7 +455,8 @@ public: uint64_t get_used(); uint64_t get_total(unsigned id); uint64_t get_free(unsigned id); - void get_usage(std::vector> *usage); // [ ...] + uint64_t get_used(unsigned id); + void get_usage(std::vector> *usage); // [ ...] void dump_perf_counters(ceph::Formatter *f); void dump_block_extents(std::ostream& out); @@ -522,10 +523,10 @@ public: uint64_t get_block_device_size(unsigned bdev); /// gift more block space - void add_block_extent(unsigned bdev, uint64_t offset, uint64_t len, + void add_block_extent(bool create, unsigned bdev, uint64_t offset, uint64_t len, bool skip=false) { std::unique_lock l(lock); - _add_block_extent(bdev, offset, len, skip); + _add_block_extent(create, bdev, offset, len, skip); int r = _flush_and_sync_log(l); ceph_assert(r == 0); } diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 2d64de0f3d8..f7c5273dde7 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5326,6 +5326,7 @@ int BlueStore::_minimal_open_bluefs(bool create) } if (create) { bluefs->add_block_extent( + create, BlueFS::BDEV_DB, SUPER_RESERVED, bluefs->get_block_device_size(BlueFS::BDEV_DB) - SUPER_RESERVED); @@ -5359,6 +5360,7 @@ int BlueStore::_minimal_open_bluefs(bool create) auto reserved = _get_ondisk_reserved(); bluefs->add_block_extent( + create, bluefs_layout.shared_bdev, reserved, p2align(bdev->get_size(), min_alloc_size) - reserved); @@ -5388,6 +5390,7 @@ int BlueStore::_minimal_open_bluefs(bool create) if (create) { bluefs->add_block_extent( + create, BlueFS::BDEV_WAL, BDEV_LABEL_BLOCK_SIZE, bluefs->get_block_device_size(BlueFS::BDEV_WAL) - BDEV_LABEL_BLOCK_SIZE); @@ -6295,14 +6298,20 @@ int BlueStore::_mount_for_bluefs() ceph_assert(r == 0); r = _lock_fsid(); ceph_assert(r == 0); - r = _open_bluefs(false); + + r = _open_bdev(false); + ceph_assert(r == 0); + + r = _open_db_and_around(true); ceph_assert(r == 0); + return r; } void BlueStore::_umount_for_bluefs() { - _close_bluefs(false); + _close_db_and_around(true); + _close_bdev(); _close_fsid(); _close_path(); } @@ -6373,6 +6382,7 @@ int BlueStore::add_new_bluefs_device(int id, const string& dev_path) bluefs->mount(); bluefs->add_block_extent( + false, id, reserved, bluefs->get_block_device_size(id) - reserved, true); @@ -6407,7 +6417,7 @@ int BlueStore::migrate_to_existing_bluefs_device(const set& devs_source, uint64_t used_space = 0; for(auto src_id : devs_source) { - used_space += bluefs->get_total(src_id) - bluefs->get_free(src_id); + used_space += bluefs->get_used(src_id); } uint64_t target_free = bluefs->get_free(id); if (target_free < used_space) { @@ -6415,7 +6425,8 @@ int BlueStore::migrate_to_existing_bluefs_device(const set& devs_source, << " can't migrate, free space at target: " << target_free << " is less than required space: " << used_space << dendl; - return -ENOSPC; + r = -ENOSPC; + goto shutdown; } if (devs_source.count(BlueFS::BDEV_DB)) { bluefs_layout.shared_bdev = BlueFS::BDEV_DB; @@ -6520,6 +6531,7 @@ int BlueStore::migrate_to_new_bluefs_device(const set& devs_source, bluefs->mount(); bluefs->add_block_extent( + false, id, reserved, bluefs->get_block_device_size(id) - reserved); r = bluefs->device_migrate_to_new(cct, devs_source, id, bluefs_layout); @@ -6615,7 +6627,7 @@ int BlueStore::expand_devices(ostream& out) out << devid <<" : expanding " << " from 0x" << std::hex << end << " to 0x" << size << std::dec << std::endl; - bluefs->add_block_extent(devid, end, size-end); + bluefs->add_block_extent(false, devid, end, size-end); string p = get_device_path(devid); const char* path = p.c_str(); if (path == nullptr) { @@ -6638,6 +6650,8 @@ int BlueStore::expand_devices(ostream& out) out << bluefs_layout.shared_bdev << " : expanding " << " from 0x" << std::hex << size0 << " to 0x" << size << std::dec << std::endl; + bluefs->add_block_extent(false, + bluefs_layout.shared_bdev, size0, size - size0); _write_out_fm_meta(size); if (bdev->supported_bdev_label()) { if (_set_bdev_label_size(path, size) >= 0) { diff --git a/src/test/objectstore/test_bluefs.cc b/src/test/objectstore/test_bluefs.cc index 6ca1dd4a49c..427af756cd6 100644 --- a/src/test/objectstore/test_bluefs.cc +++ b/src/test/objectstore/test_bluefs.cc @@ -91,7 +91,7 @@ TEST(BlueFS, mkfs) { uuid_d fsid; BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); } @@ -100,7 +100,7 @@ TEST(BlueFS, mkfs_mount) { TempBdev bdev{size}; BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -117,7 +117,7 @@ TEST(BlueFS, mkfs_mount_duplicate_gift) { { BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -177,7 +177,7 @@ TEST(BlueFS, write_read) { TempBdev bdev{size}; BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -208,7 +208,7 @@ TEST(BlueFS, small_appends) { TempBdev bdev{size}; BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -246,7 +246,7 @@ TEST(BlueFS, very_large_write) { uint64_t total_written = 0; ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -432,7 +432,7 @@ TEST(BlueFS, test_flush_1) { BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -467,7 +467,7 @@ TEST(BlueFS, test_flush_2) { BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -495,7 +495,7 @@ TEST(BlueFS, test_flush_3) { BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -529,7 +529,7 @@ TEST(BlueFS, test_simple_compaction_sync) { BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -582,7 +582,7 @@ TEST(BlueFS, test_simple_compaction_async) { BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -638,7 +638,7 @@ TEST(BlueFS, test_compaction_sync) { BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -676,7 +676,7 @@ TEST(BlueFS, test_compaction_async) { BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -714,7 +714,7 @@ TEST(BlueFS, test_replay) { BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount()); @@ -760,7 +760,7 @@ TEST(BlueFS, test_replay_growth) { BlueFS fs(g_ceph_context); ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); - fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576); + fs.add_block_extent(true, BlueFS::BDEV_DB, 1048576, size - 1048576); uuid_d fsid; ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); ASSERT_EQ(0, fs.mount());