]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore/bluefs: use map to track dirty files
authorxie xingguo <xie.xingguo@zte.com.cn>
Tue, 30 Aug 2016 07:37:46 +0000 (15:37 +0800)
committerxie xingguo <xie.xingguo@zte.com.cn>
Wed, 31 Aug 2016 01:24:54 +0000 (09:24 +0800)
So dirty files are arranged by dirty_seq and then
those of same dirty_seq are grouped into list and
as a result we can clean up dirty files much faster
during _flush_and_sync_log().

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
src/os/bluestore/BlueFS.cc
src/os/bluestore/BlueFS.h

index f2e358a023393814e04202b8e6115f947db46ff7..2b4cec5dec3daac134c4b6fc25b0d855e03b925d 100644 (file)
@@ -802,8 +802,10 @@ void BlueFS::_drop_link(FileRef file)
     file->deleted = true;
     if (file->dirty_seq) {
       assert(file->dirty_seq > log_seq_stable);
+      assert(dirty_files.count(file->dirty_seq));
+      auto it = dirty_files[file->dirty_seq].iterator_to(*file);
+      dirty_files[file->dirty_seq].erase(it);
       file->dirty_seq = 0;
-      dirty_files.erase(dirty_files.iterator_to(*file));
     }
   }
 }
@@ -1224,7 +1226,11 @@ void BlueFS::_compact_log_async(std::unique_lock<std::mutex>& l)
 
   // delete the new log, remove from the dirty files list
   _close_writer(new_log_writer);
-  dirty_files.erase(dirty_files.iterator_to(*new_log));
+  if (new_log->dirty_seq) {
+    assert(dirty_files.count(new_log->dirty_seq));
+    auto it = dirty_files[new_log->dirty_seq].iterator_to(*new_log);
+    dirty_files[new_log->dirty_seq].erase(it);
+  }
   new_log_writer = nullptr;
   new_log = nullptr;
   log_cond.notify_all();
@@ -1327,21 +1333,30 @@ int BlueFS::_flush_and_sync_log(std::unique_lock<std::mutex>& l,
   if (seq > log_seq_stable) {
     log_seq_stable = seq;
     dout(20) << __func__ << " log_seq_stable " << log_seq_stable << dendl;
-    dirty_file_list_t::iterator p = dirty_files.begin();
+
+    auto p = dirty_files.begin();
     while (p != dirty_files.end()) {
-      File *file = &(*p);
-      assert(file->dirty_seq > 0);
-      if (file->dirty_seq <= log_seq_stable) {
-         dout(20) << __func__ << " cleaned file " << file->fnode << dendl;
-         file->dirty_seq = 0;
-         dirty_files.erase(p++);
-      } else {
-        ++p;
+      if (p->first > log_seq_stable) {
+        dout(20) << __func__ << " done cleaning up dirty files" << dendl;
+        break;
+      }
+
+      auto l = p->second.begin();
+      while (l != p->second.end()) {
+        File *file = &*l;
+        assert(file->dirty_seq > 0);
+        assert(file->dirty_seq <= log_seq_stable);
+        dout(20) << __func__ << " cleaned file " << file->fnode << dendl;
+        file->dirty_seq = 0;
+        p->second.erase(l++);
       }
+
+      assert(p->second.empty());
+      dirty_files.erase(p++);
     }
   } else {
     dout(20) << __func__ << " log_seq_stable " << log_seq_stable
-             << " already > out seq " << seq
+             << " already >= out seq " << seq
              << ", we lost a race against another log flush, done" << dendl;
   }
   _update_logger_stats();
@@ -1401,14 +1416,25 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length)
     h->file->fnode.mtime = ceph_clock_now(NULL);
     log_t.op_file_update(h->file->fnode);
     if (h->file->dirty_seq == 0) {
-      dirty_files.push_back(*h->file);
+      h->file->dirty_seq = log_seq + 1;
+      dirty_files[h->file->dirty_seq].push_back(*h->file);
       dout(20) << __func__ << " dirty_seq = " << log_seq + 1
               << " (was clean)" << dendl;
     } else {
-      dout(20) << __func__ << " dirty_seq = " << log_seq + 1
-              << " (was " << h->file->dirty_seq << ")" << dendl;
+      if (h->file->dirty_seq != log_seq + 1) {
+        // need re-dirty, erase from list first
+        assert(dirty_files.count(h->file->dirty_seq));
+        auto it = dirty_files[h->file->dirty_seq].iterator_to(*h->file);
+        dirty_files[h->file->dirty_seq].erase(it);
+        h->file->dirty_seq = log_seq + 1;
+        dirty_files[h->file->dirty_seq].push_back(*h->file);
+        dout(20) << __func__ << " dirty_seq = " << log_seq + 1
+                 << " (was " << h->file->dirty_seq << ")" << dendl;
+      } else {
+        dout(20) << __func__ << " dirty_seq = " << log_seq + 1
+                 << " (unchanged, do nothing) " << dendl;
+      }
     }
-    h->file->dirty_seq = log_seq + 1;
   }
   dout(20) << __func__ << " file now " << h->file->fnode << dendl;
 
index 40abbffd53014f1dd5bdc9b2b091380fc5e88ade..f49de277c152c822117b0d24dc704908d742b008 100644 (file)
@@ -196,7 +196,9 @@ private:
   // cache
   map<string, DirRef> dir_map;                    ///< dirname -> Dir
   ceph::unordered_map<uint64_t,FileRef> file_map; ///< ino -> File
-  dirty_file_list_t dirty_files;                  ///< list of dirty files
+
+  // map of dirty files, files of same dirty_seq are grouped into list.
+  map<uint64_t, dirty_file_list_t> dirty_files;
 
   bluefs_super_t super;        ///< latest superblock (as last written)
   uint64_t ino_last = 0;       ///< last assigned ino (this one is in use)