From dc23cf4474e2a7effc784377361ec615cded8df1 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Tue, 18 Feb 2025 20:20:53 +0300 Subject: [PATCH] tool/ceph-bluestore-tool: Make bluefs-bdev-expand command output nicer. Here is a sample output: inferring bluefs devices from bluestore path 0 : device size 0x4049c000(1.0 GiB) : using 0x1801000(24 MiB) 1 : device size 0x140000000(5 GiB) : using 0x1502000(21 MiB) 2 : device size 0x8c0000000(35 GiB) : using 0x40014000(1.0 GiB) Expanding DB/WAL... 0 : nothing to do, skipped 1 : Expanding to 0x140000000(5 GiB) 1 : size updated to 0x140000000(5 GiB) 2 : Expanding to 0x8c0000000(35 GiB) 2 : size updated to 0x8c0000000(35 GiB) Fixes: https://tracker.ceph.com/issues/67966 Signed-off-by: Igor Fedotov (cherry picked from commit ac7789139e3d4ba3bfd69ddcd4fe504c35b42bc3) --- src/os/bluestore/BlueFS.cc | 9 ++- src/os/bluestore/BlueStore.cc | 145 +++++++++++++++++++++------------- src/os/bluestore/BlueStore.h | 1 - 3 files changed, 94 insertions(+), 61 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index c232524c00071..b9a57cb714c51 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -621,16 +621,17 @@ void BlueFS::dump_perf_counters(Formatter *f) void BlueFS::dump_block_extents(ostream& out) { for (unsigned i = 0; i < MAX_BDEV; ++i) { - if (!bdev[i]) { + if (!bdev[i] || !alloc[i]) { continue; } - auto total = get_total(i); + auto total = get_total(i) + block_reserved[i]; auto free = get_free(i); out << i << " : device size 0x" << std::hex << total + << "(" << byte_u_t(total) << ")" << " : using 0x" << total - free - << std::dec << "(" << byte_u_t(total - free) << ")"; - out << "\n"; + << "(" << byte_u_t(total - free) << ")" + << std::dec << std::endl; } } diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 04772d15624b1..5d2a05f7793cf 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -8914,30 +8914,6 @@ string BlueStore::get_device_path(unsigned id) return res; } -int BlueStore::_set_bdev_label_size(unsigned id, uint64_t size) -{ - ceph_assert(bluefs); - BlockDevice* my_bdev = bluefs->get_block_device(id); - int r = -1; - if (my_bdev != nullptr) { - string my_path = get_device_path(id); - bluestore_bdev_label_t label; - r = _read_bdev_label(cct, my_bdev, my_path, &label); - if (r < 0) { - derr << "unable to read label for " << my_path << ": " - << cpp_strerror(r) << dendl; - } else { - label.size = size; - r = _write_bdev_label(cct, my_bdev, my_path, label); - if (r < 0) { - derr << "unable to write label for " << my_path << ": " - << cpp_strerror(r) << dendl; - } - } - } - return r; -} - int BlueStore::expand_devices(ostream& out) { // let's open in read-only mode first to be able to recover @@ -8948,38 +8924,85 @@ int BlueStore::expand_devices(ostream& out) ceph_assert(r == 0); bluefs->dump_block_extents(out); out << "Expanding DB/WAL..." << std::endl; + // updating dedicated devices first for (auto devid : { BlueFS::BDEV_WAL, BlueFS::BDEV_DB}) { - if (devid == bluefs_layout.shared_bdev ) { + if (devid == bluefs_layout.shared_bdev) { continue; } - uint64_t size = bluefs->get_block_device_size(devid); + auto my_bdev = bluefs->get_block_device(devid); + uint64_t size = my_bdev ? my_bdev->get_size() : 0; if (size == 0) { // no bdev continue; } - out << devid - <<" : expanding " << " to 0x" << size << std::dec << std::endl; - if (bluefs->bdev_support_label(devid)) { - if (_set_bdev_label_size(devid, size) >= 0) { - out << devid - << " : size label updated to " << size - << std::endl; + if (my_bdev->supported_bdev_label()) { + string my_path = get_device_path(devid); + bluestore_bdev_label_t my_label; + int r = _read_bdev_label(cct, my_bdev, my_path, &my_label); + if (r < 0) { + derr << "unable to read label for " << my_path << ": " + << cpp_strerror(r) << dendl; + continue; + } else { + if (size == my_label.size) { + // no need to expand + out << devid + << " : nothing to do, skipped" + << std::endl; + continue; + } else if (size < my_label.size) { + // something weird in bdev label + out << devid + <<" : ERROR: bdev label is above device size, skipped" + << std::endl; + continue; + } else { + my_label.size = size; + out << devid + << " : Expanding to 0x" << std::hex << size + << std::dec << "(" << byte_u_t(size) << ")" + << std::endl; + r = _write_bdev_label(cct, my_bdev, my_path, my_label); + if (r < 0) { + derr << "unable to write label for " << my_path << ": " + << cpp_strerror(r) << dendl; + } else { + out << devid + << " : size updated to 0x" << std::hex << size + << std::dec << "(" << byte_u_t(size) << ")" + << std::endl; + } + } } } } + // now proceed with a shared device uint64_t size0 = fm->get_size(); uint64_t size = bdev->get_size(); - if (size0 < size) { - out << bluefs_layout.shared_bdev - << " : expanding " << " from 0x" << std::hex - << size0 << " to 0x" << size << std::dec << std::endl; - _write_out_fm_meta(size); - if (bdev->supported_bdev_label()) { - out << bluefs_layout.shared_bdev - << " : size label updated to " << size - << std::endl; + auto devid = bluefs_layout.shared_bdev; + auto aligned_size = p2align(size, min_alloc_size); + if (aligned_size == size0) { + // no need to expand + out << devid + << " : nothing to do, skipped" + << std::endl; + } else if (aligned_size < size0) { + // something weird in bdev label + out << devid + << " : ERROR: previous device size is above the current one, skipped" + << std::endl; + } else { + auto my_path = get_device_path(devid); + out << devid + <<" : Expanding to 0x" << std::hex << size + << std::dec << "(" << byte_u_t(size) << ")" + << std::endl; + r = _write_out_fm_meta(size); + if (r != 0) { + derr << "unable to write out fm meta for " << my_path << ": " + << cpp_strerror(r) << dendl; + } else if (bdev->supported_bdev_label()) { bdev_label.size = size; - uint64_t lsize = std::max(BDEV_LABEL_BLOCK_SIZE, min_alloc_size); for (uint64_t loc : bdev_label_positions) { if ((loc >= size0) && (loc + lsize <= size)) { @@ -8989,22 +9012,32 @@ int BlueStore::expand_devices(ostream& out) } } } - _write_bdev_label(cct, bdev, - get_device_path(bluefs_layout.shared_bdev), - bdev_label, bdev_label_valid_locations); + r = _write_bdev_label(cct, bdev, my_path, + bdev_label, bdev_label_valid_locations); + if (r != 0) { + derr << "unable to write label(s) for " << my_path << ": " + << cpp_strerror(r) << dendl; + } } - _close_db_and_around(); + if (r == 0) { + out << devid + << " : size updated to 0x" << std::hex << size + << std::dec << "(" << byte_u_t(size) << ")" + << std::endl; + _close_db_and_around(); - // - // Mount in read/write to sync expansion changes - // and make sure everything is all right. - // - before_expansion_bdev_size = size0; //preserve orignal size to permit following - // _db_open_and_around() do some post-init stuff - // on opened allocator + // + // Mount in read/write to sync expansion changes + // and make sure everything is all right. + // + before_expansion_bdev_size = size0; // preserve orignal size to permit + // following _db_open_and_around() + // do some post-init stuff on opened + // allocator. - r = _open_db_and_around(false); - ceph_assert(r == 0); + r = _open_db_and_around(false); + ceph_assert(r == 0); + } } _close_db_and_around(); return r; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index a574b080c43e6..4b2ef759c90d3 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2809,7 +2809,6 @@ private: std::vector* out_valid_positions = nullptr, bool* out_is_multi = nullptr, int64_t* out_epoch = nullptr); - int _set_bdev_label_size(unsigned id, uint64_t size); void _main_bdev_label_try_reserve(); void _main_bdev_label_remove(Allocator* alloc); -- 2.39.5