]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: show file count for 'bluefs-stats' 35748/head
authorIgor Fedotov <ifedotov@suse.com>
Wed, 15 Apr 2020 13:43:21 +0000 (16:43 +0300)
committerIgor Fedotov <ifedotov@suse.com>
Wed, 8 Jul 2020 19:43:48 +0000 (22:43 +0300)
Signed-off-by: Igor Fedotov <ifedotov@suse.com>
(cherry picked from commit 974906c2b48dee61faf2cffcb6a7f36b8b75fe59)

 Conflicts:
src/os/bluestore/BlueFS.h - trivial

src/os/bluestore/BlueFS.cc
src/os/bluestore/BlueFS.h
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index 1f87d9ed8dbcfd9e86d65089e4c0cb7cdfcae40a..e90b5694b1df88ec95c6c8d20aeac9659fd876d2 100644 (file)
@@ -514,7 +514,7 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout)
   // init log
   FileRef log_file = ceph::make_ref<File>();
   log_file->fnode.ino = 1;
-  log_file->vselector_hint = vselector->get_hint_by_device(BDEV_WAL);
+  log_file->vselector_hint = vselector->get_hint_for_log();
   int r = _allocate(
     vselector->select_prefer_bdev(log_file->vselector_hint),
     cct->_conf->bluefs_max_log_runway,
@@ -962,7 +962,7 @@ int BlueFS::_replay(bool noop, bool to_stdout)
   if (!noop) {
     log_file->fnode = super.log_fnode;
     log_file->vselector_hint =
-      vselector->get_hint_by_device(BDEV_WAL);
+      vselector->get_hint_for_log();
   } else {
     // do not use fnode from superblock in 'noop' mode - log_file's one should
     // be fine and up-to-date
@@ -3222,6 +3222,7 @@ int BlueFS::open_for_write(
 
   FileRef file;
   bool create = false;
+  bool truncate = false;
   map<string,FileRef>::iterator q = dir->file_map.find(filename);
   if (q == dir->file_map.end()) {
     if (overwrite) {
@@ -3252,6 +3253,7 @@ int BlueFS::open_for_write(
       for (auto& p : file->fnode.extents) {
        pending_release[p.bdev].insert(p.offset, p.length);
       }
+      truncate = true;
 
       file->fnode.clear_extents();
     }
@@ -3260,6 +3262,9 @@ int BlueFS::open_for_write(
 
   file->fnode.mtime = ceph_clock_now();
   file->vselector_hint = vselector->get_hint_by_dir(dirname);
+  if (create || truncate) {
+    vselector->add_usage(file->vselector_hint, file->fnode); // update file count
+  }
 
   dout(20) << __func__ << " mapping " << dirname << "/" << filename
           << " vsel_hint " << file->vselector_hint
@@ -3590,8 +3595,8 @@ void BlueFS::debug_inject_duplicate_gift(unsigned id,
 // ===============================================
 // OriginalVolumeSelector
 
-void* OriginalVolumeSelector::get_hint_by_device(uint8_t dev) const {
-  return reinterpret_cast<void*>(dev);
+void* OriginalVolumeSelector::get_hint_for_log() const {
+  return reinterpret_cast<void*>(BlueFS::BDEV_WAL);
 }
 void* OriginalVolumeSelector::get_hint_by_dir(const string& dirname) const {
   uint8_t res = BlueFS::BDEV_DB;
index fa727715ca6a15111dbe9acf852ea378261bdb23..4ad026960109b4f81ea091c0d992d20924446cd1 100644 (file)
@@ -80,8 +80,8 @@ public:
 
   virtual ~BlueFSVolumeSelector() {
   }
-  virtual void* get_hint_by_device(uint8_t dev) const = 0;
-  virtual void* get_hint_by_dir(const string& dirname) const = 0;
+  virtual void* get_hint_for_log() const = 0;
+  virtual void* get_hint_by_dir(const std::string& dirname) const = 0;
 
   virtual void add_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
   virtual void sub_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
@@ -610,8 +610,8 @@ public:
     uint64_t _slow_total)
     : wal_total(_wal_total), db_total(_db_total), slow_total(_slow_total) {}
 
-  void* get_hint_by_device(uint8_t dev) const override;
-  void* get_hint_by_dir(const string& dirname) const override;
+  void* get_hint_for_log() const override;
+  void* get_hint_by_dir(const std::string& dirname) const override;
 
   void add_usage(void* hint, const bluefs_fnode_t& fnode) override {
     // do nothing
index 66c437fc8616d7029fb1911e853790f6323fcd39..da0f2150c8dbd367dbe35581f2e32709cad3894a 100644 (file)
@@ -15776,6 +15776,7 @@ uint8_t RocksDBBlueFSVolumeSelector::select_prefer_bdev(void* h) {
       // - observed maximums on DB dev for DB/WAL/UNSORTED data
       // - observed maximum spillovers
       uint64_t max_db_use = 0; // max db usage we potentially observed
+      max_db_use += per_level_per_dev_max.at(BlueFS::BDEV_DB, LEVEL_LOG - LEVEL_FIRST);
       max_db_use += per_level_per_dev_max.at(BlueFS::BDEV_DB, LEVEL_WAL - LEVEL_FIRST);
       max_db_use += per_level_per_dev_max.at(BlueFS::BDEV_DB, LEVEL_DB - LEVEL_FIRST);
       // this could go to db hence using it in the estimation
@@ -15792,6 +15793,7 @@ uint8_t RocksDBBlueFSVolumeSelector::select_prefer_bdev(void* h) {
       }
     }
     break;
+  case LEVEL_LOG:
   case LEVEL_WAL:
     res = BlueFS::BDEV_WAL;
     break;
@@ -15834,14 +15836,15 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
     << ", slow_total:" << l_totals[LEVEL_SLOW - LEVEL_FIRST]
     << ", db_avail:" << db_avail4slow << std::endl
     << "Usage matrix:" << std::endl;
-  constexpr std::array<const char*, 7> names{ {
+  constexpr std::array<const char*, 8> names{ {
     "DEV/LEV",
     "WAL",
     "DB",
     "SLOW",
     "*",
     "*",
-    "REAL"
+    "REAL",
+    "FILES",
   } };
   const size_t width = 12;
   for (size_t i = 0; i < names.size(); ++i) {
@@ -15854,6 +15857,8 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
     sout.setf(std::ios::left, std::ios::adjustfield);
     sout.width(width);
     switch (l + LEVEL_FIRST) {
+    case LEVEL_LOG:
+      sout << "LOG"; break;
     case LEVEL_WAL:
       sout << "WAL"; break;
     case LEVEL_DB:
@@ -15863,15 +15868,14 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
     case LEVEL_MAX:
       sout << "TOTALS"; break;
     }
-    for (size_t d = 0; d < max_x - 1; d++) {
+    for (size_t d = 0; d < max_x; d++) {
       sout.setf(std::ios::left, std::ios::adjustfield);
       sout.width(width);
       sout << stringify(byte_u_t(per_level_per_dev_usage.at(d, l)));
     }
     sout.setf(std::ios::left, std::ios::adjustfield);
     sout.width(width);
-    sout << stringify(byte_u_t(per_level_per_dev_usage.at(max_x - 1, l)))
-         << std::endl;
+    sout << stringify(per_level_files[l]) << std::endl;
   }
   ceph_assert(max_x == per_level_per_dev_max.get_max_x());
   ceph_assert(max_y == per_level_per_dev_max.get_max_y());
@@ -15880,6 +15884,8 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
     sout.setf(std::ios::left, std::ios::adjustfield);
     sout.width(width);
     switch (l + LEVEL_FIRST) {
+    case LEVEL_LOG:
+      sout << "LOG"; break;
     case LEVEL_WAL:
       sout << "WAL"; break;
     case LEVEL_DB:
index 55787e1b2986bd61dcc129b3205ba38e690cb190..20a34ce24d45b7059b948dbd57329cd77f807d62 100644 (file)
@@ -3582,7 +3582,8 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector
   enum {
     // use 0/nullptr as unset indication
     LEVEL_FIRST = 1,
-    LEVEL_WAL = LEVEL_FIRST,
+    LEVEL_LOG = LEVEL_FIRST, // BlueFS log
+    LEVEL_WAL,
     LEVEL_DB,
     LEVEL_SLOW,
     LEVEL_MAX
@@ -3592,6 +3593,8 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector
   typedef matrix_2d<uint64_t, BlueFS::MAX_BDEV + 1, LEVEL_MAX - LEVEL_FIRST + 1> per_level_per_dev_usage_t;
 
   per_level_per_dev_usage_t per_level_per_dev_usage;
+  // file count per level, add +1 to keep total file count
+  uint64_t per_level_files[LEVEL_MAX - LEVEL_FIRST + 1] = { 0 };
 
   // Note: maximum per-device totals below might be smaller than corresponding
   // perf counters by up to a single alloc unit (1M) due to superblock extent.
@@ -3617,6 +3620,7 @@ public:
     uint64_t reserved,
     bool new_pol)
   {
+    l_totals[LEVEL_LOG - LEVEL_FIRST] = 0; // not used at the moment
     l_totals[LEVEL_WAL - LEVEL_FIRST] = _wal_total;
     l_totals[LEVEL_DB - LEVEL_FIRST] = _db_total;
     l_totals[LEVEL_SLOW - LEVEL_FIRST] = _slow_total;
@@ -3651,9 +3655,8 @@ public:
     }
   }
 
-  void* get_hint_by_device(uint8_t dev) const override {
-    ceph_assert(dev == BlueFS::BDEV_WAL); // others aren't used atm
-    return  reinterpret_cast<void*>(LEVEL_WAL);
+  void* get_hint_for_log() const override {
+    return  reinterpret_cast<void*>(LEVEL_LOG);
   }
   void* get_hint_by_dir(const string& dirname) const override;
 
@@ -3687,6 +3690,8 @@ public:
         max = cur;
       }
     }
+    ++per_level_files[pos];
+    ++per_level_files[LEVEL_MAX - LEVEL_FIRST];
   }
   void sub_usage(void* hint, const bluefs_fnode_t& fnode) override {
     if (hint == nullptr)
@@ -3706,6 +3711,10 @@ public:
     auto& cur = per_level_per_dev_usage.at(BlueFS::MAX_BDEV, pos);
     ceph_assert(cur >= fnode.size);
     cur -= fnode.size;
+    ceph_assert(per_level_files[pos] > 0);
+    --per_level_files[pos];
+    ceph_assert(per_level_files[LEVEL_MAX - LEVEL_FIRST] > 0);
+    --per_level_files[LEVEL_MAX - LEVEL_FIRST];
   }
   void add_usage(void* hint, uint64_t fsize) override {
     if (hint == nullptr)