]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: dissect the buffer management from _flush_range().
authorRadoslaw Zarzynski <rzarzyns@redhat.com>
Wed, 12 Aug 2020 19:16:58 +0000 (21:16 +0200)
committerRadoslaw Zarzynski <rzarzyns@redhat.com>
Fri, 21 Aug 2020 16:55:57 +0000 (18:55 +0200)
This commit has also privatized `buffer` and `tail_block`
of the `FileWriter`, and therefore introduced a public
getter for obtaining the `buffer` size. This is intended
to help reviewers (by ensuring there is no unexpected user)
and other readers in the future.

Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
src/os/bluestore/BlueFS.cc
src/os/bluestore/BlueFS.h
src/os/bluestore/BlueRocksEnv.cc

index bc991d484f98abeae524e484b6894c5f3f2cb61a..e10641a55f255a9c55400b1d09e91ab79087986d 100644 (file)
@@ -2745,6 +2745,52 @@ int BlueFS::_flush_and_sync_log(std::unique_lock<ceph::mutex>& l,
   return 0;
 }
 
+ceph::bufferlist BlueFS::FileWriter::flush_buffer(
+  CephContext* const cct,
+  const bool partial,
+  const unsigned length,
+  const bluefs_super_t& super)
+{
+  ceph::bufferlist bl;
+  if (partial) {
+    bl.claim_append_piecewise(tail_block);
+  }
+  if (length == bl.length() + buffer.length()) {
+    /* in case of inital allocation and need to zero, limited flush is unacceptable */
+    bl.claim_append_piecewise(buffer);
+  } else {
+    bufferlist t;
+    buffer.splice(0, length, &t);
+    bl.claim_append_piecewise(t);
+    dout(20) << " leaving 0x" << std::hex << buffer.length() << std::dec
+             << " unflushed" << dendl;
+  }
+  if (const unsigned tail = bl.length() & ~super.block_mask(); tail) {
+    dout(20) << __func__ << " caching tail of 0x"
+             << std::hex << tail
+             << " and padding block with 0x" << (super.block_size - tail)
+             << " buffer.length() " << buffer.length()
+             << std::dec << dendl;
+    tail_block.substr_of(bl, bl.length() - tail, tail);
+    //Because class page_aligned_appender, bl.append_zero will create a new ptr.
+    //This make rebuild for direct-io. We split ptr into two parts which make only rebuild size < 4k.
+    if (bl.get_num_buffers() == 1) {
+      bufferptr buf = bl.buffers().front();
+      if (length > super.block_size && buf.is_aligned(super.block_size) && !buf.is_n_align_sized(super.block_size)) {
+       bl.clear();
+       bufferptr tmp(buf, length - tail, tail);
+       buf.set_length(length - tail);
+       bl.append(buf);
+       bl.append(tmp);
+      }
+    }
+    bl.append_zero(super.block_size - tail);
+  } else {
+    tail_block.clear();
+  }
+  return bl;
+}
+
 int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length)
 {
   dout(10) << __func__ << " " << h << " pos 0x" << std::hex << h->pos
@@ -2836,12 +2882,9 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length)
            << std::hex << x_off << std::dec << dendl;
 
   unsigned partial = x_off & ~super.block_mask();
-  bufferlist bl;
   if (partial) {
     dout(20) << __func__ << " using partial tail 0x"
              << std::hex << partial << std::dec << dendl;
-    ceph_assert(h->tail_block.length() == partial);
-    bl.claim_append_piecewise(h->tail_block);
     x_off -= partial;
     offset -= partial;
     length += partial;
@@ -2852,45 +2895,11 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length)
       }
     }
   }
-  if (length == partial + h->buffer.length()) {
-    /* in case of inital allocation and need to zero, limited flush is unacceptable */
-    bl.claim_append_piecewise(h->buffer);
-  } else {
-    bufferlist t;
-    h->buffer.splice(0, length, &t);
-    bl.claim_append_piecewise(t);
-    dout(20) << " leaving 0x" << std::hex << h->buffer.length() << std::dec
-             << " unflushed" << dendl;
-  }
-  ceph_assert(bl.length() == length);
 
+  auto bl = h->flush_buffer(cct, partial, length, super);
+  ceph_assert(bl.length() >= length);
   h->pos = offset + length;
-
-  unsigned tail = bl.length() & ~super.block_mask();
-  if (tail) {
-    dout(20) << __func__ << " caching tail of 0x"
-             << std::hex << tail
-             << " and padding block with 0x" << (super.block_size - tail)
-             << std::dec << dendl;
-    h->tail_block.substr_of(bl, bl.length() - tail, tail);
-    //Because class page_aligned_appender, bl.append_zero will create a new ptr.
-    //This make rebuild for direct-io. We split ptr into two parts which make only rebuild size < 4k.
-    if (bl.get_num_buffers() == 1) {
-      bufferptr buf = bl.buffers().front();
-      if (length > super.block_size && buf.is_aligned(super.block_size) && !buf.is_n_align_sized(super.block_size)) {
-       bl.clear();
-       bufferptr tmp(buf, length - tail, tail);
-       buf.set_length(length - tail);
-       bl.append(buf);
-       bl.append(tmp);
-      }
-    }
-    bl.append_zero(super.block_size - tail);
-    length += super.block_size - tail;
-  } else {
-    h->tail_block.clear();
-  }
-  ceph_assert(bl.length() == length);
+  length = bl.length();
 
   switch (h->writer_type) {
   case WRITER_WAL:
@@ -2985,7 +2994,7 @@ int BlueFS::_flush(FileWriter *h, bool force, std::unique_lock<ceph::mutex>& l)
 int BlueFS::_flush(FileWriter *h, bool force, bool *flushed)
 {
   h->buffer_appender.flush();
-  uint64_t length = h->buffer.length();
+  uint64_t length = h->get_buffer_length();
   uint64_t offset = h->pos;
   if (flushed) {
     *flushed = false;
@@ -3029,15 +3038,12 @@ int BlueFS::_truncate(FileWriter *h, uint64_t offset)
 
   // truncate off unflushed data?
   if (h->pos < offset &&
-      h->pos + h->buffer.length() > offset) {
-    bufferlist t;
+      h->pos + h->get_buffer_length() > offset) {
     dout(20) << __func__ << " tossing out last " << offset - h->pos
             << " unflushed bytes" << dendl;
-    t.substr_of(h->buffer, 0, offset - h->pos);
-    h->buffer.swap(t);
     ceph_abort_msg("actually this shouldn't happen");
   }
-  if (h->buffer.length()) {
+  if (h->get_buffer_length()) {
     int r = _flush(h, true);
     if (r < 0)
       return r;
@@ -3383,7 +3389,7 @@ BlueFS::FileWriter *BlueFS::_create_writer(FileRef f)
 void BlueFS::_close_writer(FileWriter *h)
 {
   dout(10) << __func__ << " " << h << " type " << h->writer_type << dendl;
-  h->buffer.reassign_to_mempool(mempool::mempool_bluefs_file_writer);
+  //h->buffer.reassign_to_mempool(mempool::mempool_bluefs_file_writer);
   for (unsigned i=0; i<MAX_BDEV; ++i) {
     if (bdev[i]) {
       if (h->iocv[i]) {
index c8e2063ce79a0e835edc48e1f12b7e424c36e0e0..ac471907910158025c50f28418a3b0fedc28c147 100644 (file)
@@ -148,9 +148,20 @@ public:
 
     FileRef file;
     uint64_t pos = 0;       ///< start offset for buffer
+  private:
     ceph::buffer::list buffer;      ///< new data to write (at end of file)
     ceph::buffer::list tail_block;  ///< existing partial block at end of file, if any
+  public:
+    unsigned get_buffer_length() const {
+      return buffer.length();
+    }
+    ceph::bufferlist flush_buffer(
+      CephContext* cct,
+      const bool partial,
+      const unsigned length,
+      const bluefs_super_t& super);
     ceph::buffer::list::page_aligned_appender buffer_appender;  //< for const char* only
+  public:
     int writer_type = 0;    ///< WRITER_*
     int write_hint = WRITE_LIFE_NOT_SET;
 
@@ -160,8 +171,8 @@ public:
 
     FileWriter(FileRef f)
       : file(std::move(f)),
-       buffer_appender(buffer.get_page_aligned_appender(
-                         g_conf()->bluefs_alloc_size / CEPH_PAGE_SIZE)) {
+       buffer_appender(buffer.get_page_aligned_appender(
+                         g_conf()->bluefs_alloc_size / CEPH_PAGE_SIZE)) {
       ++file->num_writers;
       iocv.fill(nullptr);
       dirty_devs.fill(false);
@@ -543,7 +554,7 @@ public:
   }
   void try_flush(FileWriter *h) {
     h->buffer_appender.flush();
-    if (h->buffer.length() >= cct->_conf->bluefs_min_flush_size) {
+    if (h->get_buffer_length() >= cct->_conf->bluefs_min_flush_size) {
       flush(h, true);
     }
   }
index f4d68ea10259daa6b26ffd9d2510edfb7d73aafd..00df87a1450d0eb9609077655102f0537f3a0310 100644 (file)
@@ -250,7 +250,7 @@ class BlueRocksWritableFile : public rocksdb::WritableFile {
    * Get the size of valid data in the file.
    */
   uint64_t GetFileSize() override {
-    return h->file->fnode.size + h->buffer.length();;
+    return h->file->fnode.size + h->get_buffer_length();;
   }
 
   // For documentation, refer to RandomAccessFile::GetUniqueId()