From d07c10dfc02e4cdeda288bf39b8060b10da5bbf9 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Fri, 23 Nov 2018 14:39:20 +0300 Subject: [PATCH] os/bluestore: add main device expand capability. One can do that via ceph-bluestore-tool's bluefs-bdev-expand command Signed-off-by: Igor Fedotov --- qa/standalone/osd/osd-bluefs-volume-ops.sh | 20 +++- src/os/bluestore/BitmapFreelistManager.cc | 46 +++++++++ src/os/bluestore/BitmapFreelistManager.h | 7 ++ src/os/bluestore/BlueStore.cc | 107 +++++++++++++++++++++ src/os/bluestore/BlueStore.h | 2 + src/os/bluestore/FreelistManager.h | 4 + src/os/bluestore/bluestore_tool.cc | 42 ++------ 7 files changed, 191 insertions(+), 37 deletions(-) diff --git a/qa/standalone/osd/osd-bluefs-volume-ops.sh b/qa/standalone/osd/osd-bluefs-volume-ops.sh index 8a6a3acee8d..550708963bc 100755 --- a/qa/standalone/osd/osd-bluefs-volume-ops.sh +++ b/qa/standalone/osd/osd-bluefs-volume-ops.sh @@ -12,7 +12,7 @@ function run() { export CEPH_ARGS CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " CEPH_ARGS+="--mon-host=$CEPH_MON " - CEPH_ARGS+="--bluestore_block_size=4294967296 " + CEPH_ARGS+="--bluestore_block_size=2147483648 " CEPH_ARGS+="--bluestore_block_db_create=true " CEPH_ARGS+="--bluestore_block_db_size=1073741824 " CEPH_ARGS+="--bluestore_block_wal_size=536870912 " @@ -66,8 +66,26 @@ function TEST_bluestore() { while kill $osd_pid3; do sleep 1 ; done ceph osd down 3 + # expand slow devices + ceph-bluestore-tool --path $dir/0 fsck || return 1 + ceph-bluestore-tool --path $dir/1 fsck || return 1 + ceph-bluestore-tool --path $dir/2 fsck || return 1 + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + truncate $dir/0/block -s 4294967296 # 4GB + ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1 + truncate $dir/1/block -s 4311744512 # 4GB + 16MB + ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1 + truncate $dir/2/block -s 4295099392 # 4GB + 129KB + ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1 + truncate $dir/3/block -s 4293918720 # 4GB - 1MB + ceph-bluestore-tool --path $dir/3 bluefs-bdev-expand || return 1 + # slow, DB, WAL -> slow, DB ceph-bluestore-tool --path $dir/0 fsck || return 1 + ceph-bluestore-tool --path $dir/1 fsck || return 1 + ceph-bluestore-tool --path $dir/2 fsck || return 1 + ceph-bluestore-tool --path $dir/3 fsck || return 1 ceph-bluestore-tool --path $dir/0 bluefs-bdev-sizes diff --git a/src/os/bluestore/BitmapFreelistManager.cc b/src/os/bluestore/BitmapFreelistManager.cc index fac0e44d856..85f5f0261c1 100644 --- a/src/os/bluestore/BitmapFreelistManager.cc +++ b/src/os/bluestore/BitmapFreelistManager.cc @@ -106,6 +106,52 @@ int BitmapFreelistManager::create(uint64_t new_size, uint64_t granularity, return 0; } +int BitmapFreelistManager::expand(uint64_t new_size, KeyValueDB::Transaction txn) +{ + assert(new_size > size); + ceph_assert(isp2(bytes_per_block)); + + uint64_t blocks0 = size / bytes_per_block; + if (blocks0 / blocks_per_key * blocks_per_key != blocks0) { + blocks0 = (blocks / blocks_per_key + 1) * blocks_per_key; + dout(10) << __func__ << " rounding blocks up from 0x" << std::hex << size + << " to 0x" << (blocks0 * bytes_per_block) + << " (0x" << blocks0 << " blocks)" << std::dec << dendl; + // reset past-eof blocks to unallocated + _xor(size, blocks0 * bytes_per_block - size, txn); + } + + size = p2align(new_size, bytes_per_block); + blocks = size / bytes_per_block; + + if (blocks / blocks_per_key * blocks_per_key != blocks) { + blocks = (blocks / blocks_per_key + 1) * blocks_per_key; + dout(10) << __func__ << " rounding blocks up from 0x" << std::hex << size + << " to 0x" << (blocks * bytes_per_block) + << " (0x" << blocks << " blocks)" << std::dec << dendl; + // set past-eof blocks as allocated + _xor(size, blocks * bytes_per_block - size, txn); + } + + dout(10) << __func__ + << " size 0x" << std::hex << size + << " bytes_per_block 0x" << bytes_per_block + << " blocks 0x" << blocks + << " blocks_per_key 0x" << blocks_per_key + << std::dec << dendl; + { + bufferlist bl; + encode(blocks, bl); + txn->set(meta_prefix, "blocks", bl); + } + { + bufferlist bl; + encode(size, bl); + txn->set(meta_prefix, "size", bl); + } + return 0; +} + int BitmapFreelistManager::init() { dout(1) << __func__ << dendl; diff --git a/src/os/bluestore/BitmapFreelistManager.h b/src/os/bluestore/BitmapFreelistManager.h index ed80b70a027..ce04a21ea31 100644 --- a/src/os/bluestore/BitmapFreelistManager.h +++ b/src/os/bluestore/BitmapFreelistManager.h @@ -55,6 +55,10 @@ public: int create(uint64_t size, uint64_t granularity, KeyValueDB::Transaction txn) override; + int expand(uint64_t new_size, + KeyValueDB::Transaction txn) override; + + int init() override; void shutdown() override; @@ -70,6 +74,9 @@ public: uint64_t offset, uint64_t length, KeyValueDB::Transaction txn) override; + inline uint64_t get_size() const override { + return size; + } inline uint64_t get_alloc_units() const override { return size / bytes_per_block; } diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index e0de190a1dd..d67280ad2ae 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -6098,6 +6098,113 @@ shutdown: return r; } +string BlueStore::get_device_path(unsigned id) +{ + string res; + if (id < BlueFS::MAX_BDEV) { + switch (id) { + case BlueFS::BDEV_WAL: + res = path + "/block.wal"; + break; + case BlueFS::BDEV_DB: + if (id == bluefs_shared_bdev) { + res = path + "/block"; + } else { + res = path + "/block.db"; + } + break; + case BlueFS::BDEV_SLOW: + res = path + "/block"; + break; + } + } + return res; +} + +int BlueStore::expand_devices(ostream& out) +{ + int r = _mount(false); + ceph_assert(r == 0); + bluefs->dump_block_extents(out); + out << "Expanding..." << std::endl; + for (auto devid : { BlueFS::BDEV_WAL, BlueFS::BDEV_DB}) { + if (devid == bluefs_shared_bdev ) { + continue; + } + interval_set before; + bluefs->get_block_extents(devid, &before); + ceph_assert(!before.empty()); + uint64_t end = before.range_end(); + uint64_t size = bluefs->get_block_device_size(devid); + if (end < size) { + out << devid + <<" : expanding " << " from 0x" << std::hex + << end << " to 0x" << size << std::dec << std::endl; + bluefs->add_block_extent(devid, end, size-end); + string p = get_device_path(devid); + const char* path = p.c_str(); + if (path == nullptr) { + derr << devid + <<": can't find device path " << dendl; + continue; + } + bluestore_bdev_label_t label; + int r = _read_bdev_label(cct, path, &label); + if (r < 0) { + derr << "unable to read label for " << path << ": " + << cpp_strerror(r) << dendl; + continue; + } + label.size = size; + r = _write_bdev_label(cct, path, label); + if (r < 0) { + derr << "unable to write label for " << path << ": " + << cpp_strerror(r) << dendl; + continue; + } + out << devid + <<" : size label updated to " << size + << std::endl; + } + } + uint64_t size0 = fm->get_size(); + uint64_t size = bdev->get_size(); + if (size0 < size) { + out << bluefs_shared_bdev + <<" : expanding " << " from 0x" << std::hex + << size0 << " to 0x" << size << std::dec << std::endl; + KeyValueDB::Transaction txn; + txn = db->get_transaction(); + int r = fm->expand(size, txn); + ceph_assert(r == 0); + db->submit_transaction_sync(txn); + + // always reference to slow device here + string p = get_device_path(BlueFS::BDEV_SLOW); + ceph_assert(!p.empty()); + const char* path = p.c_str(); + bluestore_bdev_label_t label; + r = _read_bdev_label(cct, path, &label); + if (r < 0) { + derr << "unable to read label for " << path << ": " + << cpp_strerror(r) << dendl; + } else { + label.size = size; + r = _write_bdev_label(cct, path, label); + if (r < 0) { + derr << "unable to write label for " << path << ": " + << cpp_strerror(r) << dendl; + } else { + out << bluefs_shared_bdev + <<" : size label updated to " << size + << std::endl; + } + } + } + umount(); + return r; +} + void BlueStore::set_cache_shards(unsigned num) { dout(10) << __func__ << " " << num << dendl; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 05324916456..ee4b2327b57 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2398,6 +2398,8 @@ public: int migrate_to_new_bluefs_device(const set& devs_source, int id, const string& path); + int expand_devices(ostream& out); + string get_device_path(unsigned id); public: int statfs(struct store_statfs_t *buf) override; diff --git a/src/os/bluestore/FreelistManager.h b/src/os/bluestore/FreelistManager.h index 6603062ef16..a263fb0bc25 100644 --- a/src/os/bluestore/FreelistManager.h +++ b/src/os/bluestore/FreelistManager.h @@ -27,6 +27,9 @@ public: virtual int create(uint64_t size, uint64_t granularity, KeyValueDB::Transaction txn) = 0; + virtual int expand(uint64_t new_size, + KeyValueDB::Transaction txn) = 0; + virtual int init() = 0; virtual void shutdown() = 0; @@ -42,6 +45,7 @@ public: uint64_t offset, uint64_t length, KeyValueDB::Transaction txn) = 0; + virtual uint64_t get_size() const = 0; virtual uint64_t get_alloc_units() const = 0; virtual uint64_t get_alloc_size() const = 0; diff --git a/src/os/bluestore/bluestore_tool.cc b/src/os/bluestore/bluestore_tool.cc index 14b4a3624c0..03a7f147f99 100644 --- a/src/os/bluestore/bluestore_tool.cc +++ b/src/os/bluestore/bluestore_tool.cc @@ -527,43 +527,13 @@ int main(int argc, char **argv) delete fs; } else if (action == "bluefs-bdev-expand") { - BlueFS *fs = open_bluefs(cct.get(), path, devs); - cout << "start:" << std::endl; - fs->dump_block_extents(cout); - for (int devid : { BlueFS::BDEV_WAL, BlueFS::BDEV_DB }) { - interval_set before; - fs->get_block_extents(devid, &before); - if (before.empty()) continue; - uint64_t end = before.range_end(); - uint64_t size = fs->get_block_device_size(devid); - if (end < size) { - cout << "expanding dev " << devid << " from 0x" << std::hex - << end << " to 0x" << size << std::dec << std::endl; - fs->add_block_extent(devid, end, size-end); - const char* path = find_device_path(devid, cct.get(), devs); - if (path == nullptr) { - cerr << "Can't find device path for dev " << devid << std::endl; - continue; - } - bluestore_bdev_label_t label; - int r = BlueStore::_read_bdev_label(cct.get(), path, &label); - if (r < 0) { - cerr << "unable to read label for " << path << ": " - << cpp_strerror(r) << std::endl; - continue; - } - label.size = size; - r = BlueStore::_write_bdev_label(cct.get(), path, label); - if (r < 0) { - cerr << "unable to write label for " << path << ": " - << cpp_strerror(r) << std::endl; - continue; - } - cout << "dev " << devid << " size label updated to " - << size << std::endl; - } + BlueStore bluestore(cct.get(), path); + auto r = bluestore.expand_devices(cout); + if (r <0) { + cerr << "failed to expand bluestore devices: " + << cpp_strerror(r) << std::endl; + exit(EXIT_FAILURE); } - delete fs; } else if (action == "bluefs-export") { BlueFS *fs = open_bluefs(cct.get(), path, devs); -- 2.39.5