From 59cbb4f2bbd28ba7eb0ea4d9f93177313923d38e Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Wed, 15 Apr 2020 16:43:21 +0300 Subject: [PATCH] os/bluestore: show file count for 'bluefs-stats' Signed-off-by: Igor Fedotov (cherry picked from commit 974906c2b48dee61faf2cffcb6a7f36b8b75fe59) Conflicts: src/os/bluestore/BlueFS.h - trivial --- src/os/bluestore/BlueFS.cc | 13 +++++++++---- src/os/bluestore/BlueFS.h | 8 ++++---- src/os/bluestore/BlueStore.cc | 16 +++++++++++----- src/os/bluestore/BlueStore.h | 17 +++++++++++++---- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 1f87d9ed8dbc..e90b5694b1df 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -514,7 +514,7 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout) // init log FileRef log_file = ceph::make_ref(); log_file->fnode.ino = 1; - log_file->vselector_hint = vselector->get_hint_by_device(BDEV_WAL); + log_file->vselector_hint = vselector->get_hint_for_log(); int r = _allocate( vselector->select_prefer_bdev(log_file->vselector_hint), cct->_conf->bluefs_max_log_runway, @@ -962,7 +962,7 @@ int BlueFS::_replay(bool noop, bool to_stdout) if (!noop) { log_file->fnode = super.log_fnode; log_file->vselector_hint = - vselector->get_hint_by_device(BDEV_WAL); + vselector->get_hint_for_log(); } else { // do not use fnode from superblock in 'noop' mode - log_file's one should // be fine and up-to-date @@ -3222,6 +3222,7 @@ int BlueFS::open_for_write( FileRef file; bool create = false; + bool truncate = false; map::iterator q = dir->file_map.find(filename); if (q == dir->file_map.end()) { if (overwrite) { @@ -3252,6 +3253,7 @@ int BlueFS::open_for_write( for (auto& p : file->fnode.extents) { pending_release[p.bdev].insert(p.offset, p.length); } + truncate = true; file->fnode.clear_extents(); } @@ -3260,6 +3262,9 @@ int BlueFS::open_for_write( file->fnode.mtime = ceph_clock_now(); file->vselector_hint = vselector->get_hint_by_dir(dirname); + if (create || truncate) { + vselector->add_usage(file->vselector_hint, file->fnode); // update file count + } dout(20) << __func__ << " mapping " << dirname << "/" << filename << " vsel_hint " << file->vselector_hint @@ -3590,8 +3595,8 @@ void BlueFS::debug_inject_duplicate_gift(unsigned id, // =============================================== // OriginalVolumeSelector -void* OriginalVolumeSelector::get_hint_by_device(uint8_t dev) const { - return reinterpret_cast(dev); +void* OriginalVolumeSelector::get_hint_for_log() const { + return reinterpret_cast(BlueFS::BDEV_WAL); } void* OriginalVolumeSelector::get_hint_by_dir(const string& dirname) const { uint8_t res = BlueFS::BDEV_DB; diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index fa727715ca6a..4ad026960109 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -80,8 +80,8 @@ public: virtual ~BlueFSVolumeSelector() { } - virtual void* get_hint_by_device(uint8_t dev) const = 0; - virtual void* get_hint_by_dir(const string& dirname) const = 0; + virtual void* get_hint_for_log() const = 0; + virtual void* get_hint_by_dir(const std::string& dirname) const = 0; virtual void add_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0; virtual void sub_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0; @@ -610,8 +610,8 @@ public: uint64_t _slow_total) : wal_total(_wal_total), db_total(_db_total), slow_total(_slow_total) {} - void* get_hint_by_device(uint8_t dev) const override; - void* get_hint_by_dir(const string& dirname) const override; + void* get_hint_for_log() const override; + void* get_hint_by_dir(const std::string& dirname) const override; void add_usage(void* hint, const bluefs_fnode_t& fnode) override { // do nothing diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 66c437fc8616..da0f2150c8db 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -15776,6 +15776,7 @@ uint8_t RocksDBBlueFSVolumeSelector::select_prefer_bdev(void* h) { // - observed maximums on DB dev for DB/WAL/UNSORTED data // - observed maximum spillovers uint64_t max_db_use = 0; // max db usage we potentially observed + max_db_use += per_level_per_dev_max.at(BlueFS::BDEV_DB, LEVEL_LOG - LEVEL_FIRST); max_db_use += per_level_per_dev_max.at(BlueFS::BDEV_DB, LEVEL_WAL - LEVEL_FIRST); max_db_use += per_level_per_dev_max.at(BlueFS::BDEV_DB, LEVEL_DB - LEVEL_FIRST); // this could go to db hence using it in the estimation @@ -15792,6 +15793,7 @@ uint8_t RocksDBBlueFSVolumeSelector::select_prefer_bdev(void* h) { } } break; + case LEVEL_LOG: case LEVEL_WAL: res = BlueFS::BDEV_WAL; break; @@ -15834,14 +15836,15 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) { << ", slow_total:" << l_totals[LEVEL_SLOW - LEVEL_FIRST] << ", db_avail:" << db_avail4slow << std::endl << "Usage matrix:" << std::endl; - constexpr std::array names{ { + constexpr std::array names{ { "DEV/LEV", "WAL", "DB", "SLOW", "*", "*", - "REAL" + "REAL", + "FILES", } }; const size_t width = 12; for (size_t i = 0; i < names.size(); ++i) { @@ -15854,6 +15857,8 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) { sout.setf(std::ios::left, std::ios::adjustfield); sout.width(width); switch (l + LEVEL_FIRST) { + case LEVEL_LOG: + sout << "LOG"; break; case LEVEL_WAL: sout << "WAL"; break; case LEVEL_DB: @@ -15863,15 +15868,14 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) { case LEVEL_MAX: sout << "TOTALS"; break; } - for (size_t d = 0; d < max_x - 1; d++) { + for (size_t d = 0; d < max_x; d++) { sout.setf(std::ios::left, std::ios::adjustfield); sout.width(width); sout << stringify(byte_u_t(per_level_per_dev_usage.at(d, l))); } sout.setf(std::ios::left, std::ios::adjustfield); sout.width(width); - sout << stringify(byte_u_t(per_level_per_dev_usage.at(max_x - 1, l))) - << std::endl; + sout << stringify(per_level_files[l]) << std::endl; } ceph_assert(max_x == per_level_per_dev_max.get_max_x()); ceph_assert(max_y == per_level_per_dev_max.get_max_y()); @@ -15880,6 +15884,8 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) { sout.setf(std::ios::left, std::ios::adjustfield); sout.width(width); switch (l + LEVEL_FIRST) { + case LEVEL_LOG: + sout << "LOG"; break; case LEVEL_WAL: sout << "WAL"; break; case LEVEL_DB: diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 55787e1b2986..20a34ce24d45 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -3582,7 +3582,8 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector enum { // use 0/nullptr as unset indication LEVEL_FIRST = 1, - LEVEL_WAL = LEVEL_FIRST, + LEVEL_LOG = LEVEL_FIRST, // BlueFS log + LEVEL_WAL, LEVEL_DB, LEVEL_SLOW, LEVEL_MAX @@ -3592,6 +3593,8 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector typedef matrix_2d per_level_per_dev_usage_t; per_level_per_dev_usage_t per_level_per_dev_usage; + // file count per level, add +1 to keep total file count + uint64_t per_level_files[LEVEL_MAX - LEVEL_FIRST + 1] = { 0 }; // Note: maximum per-device totals below might be smaller than corresponding // perf counters by up to a single alloc unit (1M) due to superblock extent. @@ -3617,6 +3620,7 @@ public: uint64_t reserved, bool new_pol) { + l_totals[LEVEL_LOG - LEVEL_FIRST] = 0; // not used at the moment l_totals[LEVEL_WAL - LEVEL_FIRST] = _wal_total; l_totals[LEVEL_DB - LEVEL_FIRST] = _db_total; l_totals[LEVEL_SLOW - LEVEL_FIRST] = _slow_total; @@ -3651,9 +3655,8 @@ public: } } - void* get_hint_by_device(uint8_t dev) const override { - ceph_assert(dev == BlueFS::BDEV_WAL); // others aren't used atm - return reinterpret_cast(LEVEL_WAL); + void* get_hint_for_log() const override { + return reinterpret_cast(LEVEL_LOG); } void* get_hint_by_dir(const string& dirname) const override; @@ -3687,6 +3690,8 @@ public: max = cur; } } + ++per_level_files[pos]; + ++per_level_files[LEVEL_MAX - LEVEL_FIRST]; } void sub_usage(void* hint, const bluefs_fnode_t& fnode) override { if (hint == nullptr) @@ -3706,6 +3711,10 @@ public: auto& cur = per_level_per_dev_usage.at(BlueFS::MAX_BDEV, pos); ceph_assert(cur >= fnode.size); cur -= fnode.size; + ceph_assert(per_level_files[pos] > 0); + --per_level_files[pos]; + ceph_assert(per_level_files[LEVEL_MAX - LEVEL_FIRST] > 0); + --per_level_files[LEVEL_MAX - LEVEL_FIRST]; } void add_usage(void* hint, uint64_t fsize) override { if (hint == nullptr) -- 2.47.3