From: Adam Kupczyk Date: Mon, 5 Aug 2019 13:15:07 +0000 (+0200) Subject: BlueStore/allocator: Add command to inspect how much BlueStore's block can go to... X-Git-Tag: v12.2.13~157^2~3 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=a2fe69d03ce1c4d7bad2ef100c20a09ced5d8a2a;p=ceph.git BlueStore/allocator: Add command to inspect how much BlueStore's block can go to BlueFS. Moved reduced BlueFSDeviceExpander interface. Signed-off-by: Adam Kupczyk (cherry picked from commit 16a9dac1678613b45fab9576b8bc4368a58c7434) Conflicts: src/os/bluestore/BlueFS.cc - trivial resolution src/os/bluestore/BlueFS.h src/os/bluestore/BlueStore.cc src/os/bluestore/BlueStore.h --- diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 07dcb1b451967..c3ea01389911c 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -10,6 +10,7 @@ #include "BlockDevice.h" #include "Allocator.h" #include "include/assert.h" +#include "common/admin_socket.h" #define dout_context cct #define dout_subsys ceph_subsys_bluefs @@ -25,6 +26,78 @@ MEMPOOL_DEFINE_OBJECT_FACTORY(BlueFS::FileReader, bluefs_file_reader, bluefs); MEMPOOL_DEFINE_OBJECT_FACTORY(BlueFS::FileLock, bluefs_file_lock, bluefs); +class BlueFS::SocketHook : public AdminSocketHook { + BlueFS* bluefs; +public: + static BlueFS::SocketHook* create(BlueFS* bluefs) + { + BlueFS::SocketHook* hook = nullptr; + AdminSocket* admin_socket = bluefs->cct->get_admin_socket(); + if (admin_socket) { + hook = new BlueFS::SocketHook(bluefs); + int r = admin_socket->register_command("bluestore bluefs available", + "bluestore bluefs available " + "name=alloc_size,type=CephInt,req=false", + hook, + "Report available space for bluefs. " + "If alloc_size set, make simulation."); + if (r != 0) { + ldout(bluefs->cct, 1) << __func__ << " cannot register SocketHook" << dendl; + delete hook; + hook = nullptr; + } + } + return hook; + } + + ~SocketHook() { + AdminSocket* admin_socket = bluefs->cct->get_admin_socket(); + int r = admin_socket->unregister_command("bluestore bluefs available"); + ceph_assert(r == 0); + } +private: + SocketHook(BlueFS* bluefs) : + bluefs(bluefs) {} + bool call(std::string command, cmdmap_t& cmdmap, + std::string format, bufferlist& out) override { + stringstream ss; + bool r = true; + if (command == "bluestore bluefs available") { + int64_t alloc_size = 0; + cmd_getval(bluefs->cct, cmdmap, "alloc_size", alloc_size); + if ((alloc_size & (alloc_size - 1)) != 0) { + ss << "Invalid allocation size:'" << alloc_size << std::endl; + } + if (alloc_size == 0) + alloc_size = bluefs->cct->_conf->bluefs_alloc_size; + Formatter *f = Formatter::create(format, "json-pretty", "json-pretty"); + f->open_object_section("bluefs_available_space"); + for (unsigned dev = BDEV_WAL; dev <= BDEV_SLOW; dev++) { + if (bluefs->bdev[dev]) { + f->open_object_section("dev"); + f->dump_string("device", bluefs->get_device_name(dev)); + ceph_assert(bluefs->alloc[dev]); + f->dump_int("free", bluefs->alloc[dev]->get_free()); + f->close_section(); + } + } + size_t extra_space = 0; + if (bluefs->slow_dev_expander) { + extra_space = bluefs->slow_dev_expander->available_freespace(alloc_size); + } + f->dump_int("available_from_bluestore", extra_space); + f->close_section(); + f->flush(ss); + delete f; + } else { + ss << "Invalid command" << std::endl; + r = false; + } + out.append(ss); + return r; + } +}; + BlueFS::BlueFS(CephContext* cct) : cct(cct), bdev(MAX_BDEV), @@ -32,10 +105,12 @@ BlueFS::BlueFS(CephContext* cct) block_all(MAX_BDEV), block_total(MAX_BDEV, 0) { + asok_hook = SocketHook::create(this); } BlueFS::~BlueFS() { + delete asok_hook; for (auto p : ioc) { if (p) p->aio_wait(); @@ -1859,6 +1934,13 @@ void BlueFS::flush_bdev() } } +const char* BlueFS::get_device_name(unsigned id) +{ + if (id >= MAX_BDEV) return "BDEV_INV"; + const char* names[] = {"BDEV_WAL", "BDEV_DB", "BDEV_SLOW", "BDEV_NEWWAL", "BDEV_NEWDB"}; + return names[id]; +} + int BlueFS::_allocate(uint8_t id, uint64_t len, bluefs_fnode_t* node) { diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 6b3c57b1a8d17..e708aa5c990a0 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -38,6 +38,19 @@ enum { l_bluefs_last, }; +class BlueFSDeviceExpander { +protected: + ~BlueFSDeviceExpander() {} +public: + /** Reports amount of space that can be transferred to BlueFS. + * This gives either current state, when alloc_size is currently used + * BlueFS's size, or simulation when alloc_size is different. + * @params + * alloc_size - allocation unit size to check + */ + virtual size_t available_freespace(uint64_t alloc_size) = 0; +}; + class BlueFS { public: CephContext* cct; @@ -255,6 +268,11 @@ private: vector alloc; ///< allocators for bdevs vector> pending_release; ///< extents to release + BlueFSDeviceExpander* slow_dev_expander = nullptr; + + class SocketHook; + SocketHook* asok_hook = nullptr; + void _init_logger(); void _shutdown_logger(); void _update_logger_stats(); @@ -267,6 +285,8 @@ private: FileRef _get_file(uint64_t ino); void _drop_link(FileRef f); + int _get_slow_device_id() { return bdev[BDEV_SLOW] ? BDEV_SLOW : BDEV_DB; } + const char* get_device_name(unsigned id); int _allocate(uint8_t bdev, uint64_t len, bluefs_fnode_t* node); int _flush_range(FileWriter *h, uint64_t offset, uint64_t length); @@ -394,6 +414,9 @@ public: /// sync any uncommitted state to disk void sync_metadata(); + void set_slow_device_expander(BlueFSDeviceExpander* a) { + slow_dev_expander = a; + } int add_block_device(unsigned bdev, const string& path); bool bdev_support_label(unsigned id); uint64_t get_block_device_size(unsigned bdev); diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 9c75341a944a9..b2e9c7cd32603 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -4947,6 +4947,7 @@ int BlueStore::_open_db(bool create) return -EINVAL; } bluefs = new BlueFS(cct); + bluefs->set_slow_device_expander(this); string bfn; struct stat st; @@ -5243,6 +5244,25 @@ int BlueStore::_reconcile_bluefs_freespace() return 0; } +size_t BlueStore::available_freespace(uint64_t alloc_size) { + size_t total = 0; + auto iterated_allocation = [&](size_t off, size_t len) { + //only count in size that is alloc_size aligned + size_t dist_to_alignment; + size_t offset_in_block = off & (alloc_size - 1); + if (offset_in_block == 0) + dist_to_alignment = 0; + else + dist_to_alignment = alloc_size - offset_in_block; + if (dist_to_alignment >= len) + return; + len -= dist_to_alignment; + total += len & ~(alloc_size - 1); + }; + alloc->dump(iterated_allocation); + return total; +} + void BlueStore::_dump_alloc_on_rebalance_failure() { auto dump_interval = diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index c3b9cd1f858d4..69362ac69ccbd 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -41,6 +41,7 @@ #include "bluestore_types.h" #include "BlockDevice.h" +#include "BlueFS.h" #include "common/EventTrace.h" class Allocator; @@ -124,6 +125,7 @@ enum { }; class BlueStore : public ObjectStore, + public BlueFSDeviceExpander, public md_config_obs_t { // ----------------------------------------------------- // types @@ -2855,6 +2857,8 @@ private: CollectionRef& c, CollectionRef& d, unsigned bits, int rem); +private: + size_t available_freespace(uint64_t alloc_size) override; }; inline ostream& operator<<(ostream& out, const BlueStore::OpSequencer& s) {