]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: show file count for 'bluefs-stats' 34569/head
authorIgor Fedotov <ifedotov@suse.com>
Wed, 15 Apr 2020 13:43:21 +0000 (16:43 +0300)
committerIgor Fedotov <ifedotov@suse.com>
Tue, 21 Apr 2020 09:02:35 +0000 (12:02 +0300)
Signed-off-by: Igor Fedotov <ifedotov@suse.com>
src/os/bluestore/BlueFS.cc
src/os/bluestore/BlueFS.h
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index b2875649b86df09cc315da17ff7dce67c7143cdf..b8de1012000eaae0a2c539cd3b087332d384790a 100644 (file)
@@ -532,7 +532,7 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout)
   // init log
   FileRef log_file = ceph::make_ref<File>();
   log_file->fnode.ino = 1;
-  log_file->vselector_hint = vselector->get_hint_by_device(BDEV_WAL);
+  log_file->vselector_hint = vselector->get_hint_for_log();
   int r = _allocate(
     vselector->select_prefer_bdev(log_file->vselector_hint),
     cct->_conf->bluefs_max_log_runway,
@@ -980,7 +980,7 @@ int BlueFS::_replay(bool noop, bool to_stdout)
   if (!noop) {
     log_file->fnode = super.log_fnode;
     log_file->vselector_hint =
-      vselector->get_hint_by_device(BDEV_WAL);
+      vselector->get_hint_for_log();
   } else {
     // do not use fnode from superblock in 'noop' mode - log_file's one should
     // be fine and up-to-date
@@ -3239,6 +3239,7 @@ int BlueFS::open_for_write(
 
   FileRef file;
   bool create = false;
+  bool truncate = false;
   map<string,FileRef>::iterator q = dir->file_map.find(filename);
   if (q == dir->file_map.end()) {
     if (overwrite) {
@@ -3269,6 +3270,7 @@ int BlueFS::open_for_write(
       for (auto& p : file->fnode.extents) {
        pending_release[p.bdev].insert(p.offset, p.length);
       }
+      truncate = true;
 
       file->fnode.clear_extents();
     }
@@ -3277,6 +3279,9 @@ int BlueFS::open_for_write(
 
   file->fnode.mtime = ceph_clock_now();
   file->vselector_hint = vselector->get_hint_by_dir(dirname);
+  if (create || truncate) {
+    vselector->add_usage(file->vselector_hint, file->fnode); // update file count
+  }
 
   dout(20) << __func__ << " mapping " << dirname << "/" << filename
           << " vsel_hint " << file->vselector_hint
@@ -3607,8 +3612,8 @@ void BlueFS::debug_inject_duplicate_gift(unsigned id,
 // ===============================================
 // OriginalVolumeSelector
 
-void* OriginalVolumeSelector::get_hint_by_device(uint8_t dev) const {
-  return reinterpret_cast<void*>(dev);
+void* OriginalVolumeSelector::get_hint_for_log() const {
+  return reinterpret_cast<void*>(BlueFS::BDEV_WAL);
 }
 void* OriginalVolumeSelector::get_hint_by_dir(const string& dirname) const {
   uint8_t res = BlueFS::BDEV_DB;
index 96ac5407f7f9048bd275103ecafb364d76005efe..8f5e1a9d3c22e11ff820dab737a99ef6aece3fa7 100644 (file)
@@ -80,7 +80,7 @@ public:
 
   virtual ~BlueFSVolumeSelector() {
   }
-  virtual void* get_hint_by_device(uint8_t dev) const = 0;
+  virtual void* get_hint_for_log() const = 0;
   virtual void* get_hint_by_dir(const std::string& dirname) const = 0;
 
   virtual void add_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
@@ -615,7 +615,7 @@ public:
     uint64_t _slow_total)
     : wal_total(_wal_total), db_total(_db_total), slow_total(_slow_total) {}
 
-  void* get_hint_by_device(uint8_t dev) const override;
+  void* get_hint_for_log() const override;
   void* get_hint_by_dir(const std::string& dirname) const override;
 
   void add_usage(void* hint, const bluefs_fnode_t& fnode) override {
index de18468d474526ca3855ecabb398ba3aa57b42fc..e2714741d5a6096f47348a0ec78198d23dfa4154 100644 (file)
@@ -16021,6 +16021,7 @@ uint8_t RocksDBBlueFSVolumeSelector::select_prefer_bdev(void* h) {
       // - observed maximums on DB dev for DB/WAL/UNSORTED data
       // - observed maximum spillovers
       uint64_t max_db_use = 0; // max db usage we potentially observed
+      max_db_use += per_level_per_dev_max.at(BlueFS::BDEV_DB, LEVEL_LOG - LEVEL_FIRST);
       max_db_use += per_level_per_dev_max.at(BlueFS::BDEV_DB, LEVEL_WAL - LEVEL_FIRST);
       max_db_use += per_level_per_dev_max.at(BlueFS::BDEV_DB, LEVEL_DB - LEVEL_FIRST);
       // this could go to db hence using it in the estimation
@@ -16037,6 +16038,7 @@ uint8_t RocksDBBlueFSVolumeSelector::select_prefer_bdev(void* h) {
       }
     }
     break;
+  case LEVEL_LOG:
   case LEVEL_WAL:
     res = BlueFS::BDEV_WAL;
     break;
@@ -16079,14 +16081,15 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
     << ", slow_total:" << l_totals[LEVEL_SLOW - LEVEL_FIRST]
     << ", db_avail:" << db_avail4slow << std::endl
     << "Usage matrix:" << std::endl;
-  constexpr std::array<const char*, 7> names{ {
+  constexpr std::array<const char*, 8> names{ {
     "DEV/LEV",
     "WAL",
     "DB",
     "SLOW",
     "*",
     "*",
-    "REAL"
+    "REAL",
+    "FILES",
   } };
   const size_t width = 12;
   for (size_t i = 0; i < names.size(); ++i) {
@@ -16099,6 +16102,8 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
     sout.setf(std::ios::left, std::ios::adjustfield);
     sout.width(width);
     switch (l + LEVEL_FIRST) {
+    case LEVEL_LOG:
+      sout << "LOG"; break;
     case LEVEL_WAL:
       sout << "WAL"; break;
     case LEVEL_DB:
@@ -16108,15 +16113,14 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
     case LEVEL_MAX:
       sout << "TOTALS"; break;
     }
-    for (size_t d = 0; d < max_x - 1; d++) {
+    for (size_t d = 0; d < max_x; d++) {
       sout.setf(std::ios::left, std::ios::adjustfield);
       sout.width(width);
       sout << stringify(byte_u_t(per_level_per_dev_usage.at(d, l)));
     }
     sout.setf(std::ios::left, std::ios::adjustfield);
     sout.width(width);
-    sout << stringify(byte_u_t(per_level_per_dev_usage.at(max_x - 1, l)))
-         << std::endl;
+    sout << stringify(per_level_files[l]) << std::endl;
   }
   ceph_assert(max_x == per_level_per_dev_max.get_max_x());
   ceph_assert(max_y == per_level_per_dev_max.get_max_y());
@@ -16125,6 +16129,8 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
     sout.setf(std::ios::left, std::ios::adjustfield);
     sout.width(width);
     switch (l + LEVEL_FIRST) {
+    case LEVEL_LOG:
+      sout << "LOG"; break;
     case LEVEL_WAL:
       sout << "WAL"; break;
     case LEVEL_DB:
index c9e831c7a4b173540a0985089cafa117610e5842..0b032277d47f892b9b43e0cea6a659c2c2b9389a 100644 (file)
@@ -3612,7 +3612,8 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector
   enum {
     // use 0/nullptr as unset indication
     LEVEL_FIRST = 1,
-    LEVEL_WAL = LEVEL_FIRST,
+    LEVEL_LOG = LEVEL_FIRST, // BlueFS log
+    LEVEL_WAL,
     LEVEL_DB,
     LEVEL_SLOW,
     LEVEL_MAX
@@ -3622,6 +3623,8 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector
   typedef matrix_2d<uint64_t, BlueFS::MAX_BDEV + 1, LEVEL_MAX - LEVEL_FIRST + 1> per_level_per_dev_usage_t;
 
   per_level_per_dev_usage_t per_level_per_dev_usage;
+  // file count per level, add +1 to keep total file count
+  uint64_t per_level_files[LEVEL_MAX - LEVEL_FIRST + 1] = { 0 };
 
   // Note: maximum per-device totals below might be smaller than corresponding
   // perf counters by up to a single alloc unit (1M) due to superblock extent.
@@ -3647,6 +3650,7 @@ public:
     uint64_t reserved,
     bool new_pol)
   {
+    l_totals[LEVEL_LOG - LEVEL_FIRST] = 0; // not used at the moment
     l_totals[LEVEL_WAL - LEVEL_FIRST] = _wal_total;
     l_totals[LEVEL_DB - LEVEL_FIRST] = _db_total;
     l_totals[LEVEL_SLOW - LEVEL_FIRST] = _slow_total;
@@ -3681,9 +3685,8 @@ public:
     }
   }
 
-  void* get_hint_by_device(uint8_t dev) const override {
-    ceph_assert(dev == BlueFS::BDEV_WAL); // others aren't used atm
-    return  reinterpret_cast<void*>(LEVEL_WAL);
+  void* get_hint_for_log() const override {
+    return  reinterpret_cast<void*>(LEVEL_LOG);
   }
   void* get_hint_by_dir(const std::string& dirname) const override;
 
@@ -3717,6 +3720,8 @@ public:
         max = cur;
       }
     }
+    ++per_level_files[pos];
+    ++per_level_files[LEVEL_MAX - LEVEL_FIRST];
   }
   void sub_usage(void* hint, const bluefs_fnode_t& fnode) override {
     if (hint == nullptr)
@@ -3736,6 +3741,10 @@ public:
     auto& cur = per_level_per_dev_usage.at(BlueFS::MAX_BDEV, pos);
     ceph_assert(cur >= fnode.size);
     cur -= fnode.size;
+    ceph_assert(per_level_files[pos] > 0);
+    --per_level_files[pos];
+    ceph_assert(per_level_files[LEVEL_MAX - LEVEL_FIRST] > 0);
+    --per_level_files[LEVEL_MAX - LEVEL_FIRST];
   }
   void add_usage(void* hint, uint64_t fsize) override {
     if (hint == nullptr)