]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore/bluefs: Fix improper vselector tracking in _flush_special()
authorAdam Kupczyk <akupczyk@redhat.com>
Wed, 9 Feb 2022 15:19:56 +0000 (16:19 +0100)
committerAdam Kupczyk <akupczyk@redhat.com>
Mon, 14 Feb 2022 15:59:03 +0000 (16:59 +0100)
Moves vselector size tracking outside _flush_special().
Function _compact_log_async...() updated sizes twice.
Problem could not be solved by making second modification of size just update,
as it will possibly disrupt vselector consistency check (_vselector_check()).
Feature to track vselector consistency relies on the fact that either log.lock or nodes.lock
are taken when the check is performed. Which is not true for _compact_log_async...().

Now _flush_special does not update vselector sizes by itself but leaves the update to
the caller.

Fixes: https://tracker.ceph.com/issues/54248
Signed-off-by: Adam Kupczyk <akupczyk@redhat.com>
src/os/bluestore/BlueFS.cc
src/os/bluestore/BlueFS.h

index e74cbc6308501a725c4d3ae8901e87db6bf9c877..3b7d329f79d9796dae230e208370442a81870487 100644 (file)
@@ -2495,8 +2495,8 @@ void BlueFS::_rewrite_log_and_layout_sync_LNF_LD(bool allocate_with_fallback,
 
   log.writer = _create_writer(log_file);
   log.writer->append(bl);
-  r = _flush_special(log.writer);
-  ceph_assert(r == 0);
+  uint64_t new_data = _flush_special(log.writer);
+  vselector->add_usage(log_file->vselector_hint, new_data);
 #ifdef HAVE_LIBAIO
   if (!cct->_conf->bluefs_sync_write) {
     list<aio_t> completed_ios;
@@ -2648,8 +2648,7 @@ void BlueFS::_compact_log_async_LD_LNF_D() //also locks FW for new_writer
 
   new_log_writer->append(bl);
   // 3. flush
-  r = _flush_special(new_log_writer);
-  ceph_assert(r == 0);
+  _flush_special(new_log_writer);
 
   // 4. wait
   _flush_bdev(new_log_writer);
@@ -2863,8 +2862,8 @@ void BlueFS::_flush_and_sync_log_core(int64_t runway)
   log.t.clear();
   log.t.seq = log.seq_live;
 
-  int r = _flush_special(log.writer);
-  ceph_assert(r == 0);
+  uint64_t new_data = _flush_special(log.writer);
+  vselector->add_usage(log.writer->file->vselector_hint, new_data);
 }
 
 // Clears dirty.files up to (including) seq_stable.
@@ -3341,18 +3340,19 @@ int BlueFS::_flush_F(FileWriter *h, bool force, bool *flushed)
 // we do not need to dirty the log file (or it's compacting
 // replacement) when the file size changes because replay is
 // smart enough to discover it on its own.
-int BlueFS::_flush_special(FileWriter *h)
+uint64_t BlueFS::_flush_special(FileWriter *h)
 {
   ceph_assert(h->file->fnode.ino <= 1);
   uint64_t length = h->get_buffer_length();
   uint64_t offset = h->pos;
+  uint64_t new_data = 0;
   ceph_assert(length + offset <= h->file->fnode.get_allocated());
   if (h->file->fnode.size < offset + length) {
-    vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size);
+    new_data = offset + length - h->file->fnode.size;
     h->file->fnode.size = offset + length;
-    vselector->add_usage(h->file->vselector_hint, h->file->fnode.size);
   }
-  return _flush_data(h, offset, length, false);
+  _flush_data(h, offset, length, false);
+  return new_data;
 }
 
 int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/
index 20dfbb5c4cfc13e6f40ff4afed6475b1dcf5f9ef..4307507d13b95f7a88c3aca096ca9c3e469c4f64 100644 (file)
@@ -427,7 +427,7 @@ private:
   int _flush_range_F(FileWriter *h, uint64_t offset, uint64_t length);
   int _flush_data(FileWriter *h, uint64_t offset, uint64_t length, bool buffered);
   int _flush_F(FileWriter *h, bool force, bool *flushed = nullptr);
-  int _flush_special(FileWriter *h);
+  uint64_t _flush_special(FileWriter *h);
   int _fsync(FileWriter *h);
 
 #ifdef HAVE_LIBAIO