]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
common/bl: new incarnation of page_aligned_appender.
authorRadoslaw Zarzynski <rzarzyns@redhat.com>
Tue, 18 Aug 2020 16:28:20 +0000 (18:28 +0200)
committerRadoslaw Zarzynski <rzarzyns@redhat.com>
Fri, 21 Aug 2020 16:55:57 +0000 (18:55 +0200)
This commit reworks the implementation of `page_aligned_appender`.
The goals are:
  * reuse the `bufferlist::_carriage` mechanism instead of
    mimicking / duplicating it with a `buffer::ptr` instance.
  * Eliminate the need for manual `flush()`. It's requires
    extra effort from code readers and is error-prone.
  * Enable the `encode()` machinery to put the marshalled
    content _on the same memory block_ where other things
    appended via `page_aligned_appender` reside on.

The commit introduces a performance regression in `FileWriter`
of BlueFS which is caused by an old misbehaviour in `splice()`
and other methods of `bufferlist` that unncecessarily thrashes
`_carriage`. They will be fixed in follow-ups.

Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
src/common/buffer.cc
src/include/buffer.h
src/os/bluestore/BlueFS.cc
src/test/bufferlist.cc

index 8d842290f9cff0ef94c0adec68e2e0fa3c91b777..c9ddc73c28aec103af69108f70da067746240b57 100644 (file)
@@ -2218,6 +2218,15 @@ MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static, buffer_raw_static,
                              buffer_meta);
 
 
+void ceph::buffer::list::page_aligned_appender::_refill(size_t len) {
+  const size_t alloc = \
+    std::max((size_t)min_alloc, (len + CEPH_PAGE_SIZE - 1) & CEPH_PAGE_MASK);
+  auto new_back = \
+    ptr_node::create(buffer::create_page_aligned(alloc));
+  new_back->set_length(0);   // unused, so far.
+  bl.push_back(std::move(new_back));
+}
+
 namespace ceph::buffer {
 inline namespace v15_2_0 {
 
index a07282ecee2dcef2d92f24c3547b9bc4e780f44e..2ca686828d2fb3d29fb6d5836d111c4e5e81c393 100644 (file)
@@ -844,39 +844,26 @@ struct error_code;
                  "contiguous_filler should be no costlier than pointer");
 
     class page_aligned_appender {
-      bufferlist *pbl;
+      bufferlistbl;
       unsigned min_alloc;
-      ptr buffer;
-      char *pos, *end;
 
       page_aligned_appender(list *l, unsigned min_pages)
-       : pbl(l),
-         min_alloc(min_pages * CEPH_PAGE_SIZE),
-         pos(nullptr), end(nullptr) {}
+       : bl(*l),
+         min_alloc(min_pages * CEPH_PAGE_SIZE) {
+      }
+
+      void _refill(size_t len);
 
       template <class Func>
       void _append_common(size_t len, Func&& impl_f) {
-       while (len > 0) {
-         if (!pos) {
-           size_t alloc = (len + CEPH_PAGE_SIZE - 1) & CEPH_PAGE_MASK;
-           if (alloc < min_alloc) {
-             alloc = min_alloc;
-           }
-           buffer = create_page_aligned(alloc);
-           pos = buffer.c_str();
-           end = buffer.end_c_str();
-         }
-         size_t l = len;
-         if (l > (size_t)(end - pos)) {
-           l = end - pos;
-         }
-         impl_f(l, pos);
-         pos += l;
-         len -= l;
-         if (pos == end) {
-           pbl->append(buffer, 0, buffer.length());
-           pos = end = nullptr;
-         }
+       const auto free_in_last = bl.get_append_buffer_unused_tail_length();
+       const auto first_round = std::min(len, free_in_last);
+       if (first_round) {
+         impl_f(first_round);
+       }
+       if (const auto second_round = len - first_round; second_round) {
+         _refill(second_round);
+         impl_f(second_round);
        }
       }
 
@@ -888,26 +875,25 @@ struct error_code;
       }
 
       void flush() {
-       if (pos && pos != buffer.c_str()) {
-         size_t len = pos - buffer.c_str();
-         pbl->append(buffer, 0, len);
-         buffer.set_length(buffer.length() - len);
-         buffer.set_offset(buffer.offset() + len);
-       }
+       // nop
+      }
+
+      void append(const bufferlist& l) {
+       bl.append(l);
+       bl.obtain_contiguous_space(0);
       }
 
       void append(const char* buf, size_t entire_len) {
-        _append_common(entire_len, [buf] (const size_t chunk_len,
-                                          char* const dst) mutable {
-         memcpy(dst, buf, chunk_len);
+        _append_common(entire_len,
+                       [buf, this] (const size_t chunk_len) mutable {
+         bl.append(buf, chunk_len);
          buf += chunk_len;
        });
       }
 
       void append_zero(size_t entire_len) {
-       _append_common(entire_len, [] (const size_t chunk_len,
-                                      char* const dst) {
-         memset(dst, '\0', chunk_len);
+       _append_common(entire_len, [this] (const size_t chunk_len) {
+         bl.append_zero(chunk_len);
        });
       }
 
@@ -936,6 +922,12 @@ struct error_code;
     static ptr always_empty_bptr;
     ptr_node& refill_append_space(const unsigned len);
 
+    // for page_aligned_appender; never ever expose this publicly!
+    // carriage / append_buffer is just an implementation's detail.
+    ptr& get_append_buffer() {
+      return *_carriage;
+    }
+
   public:
     // cons/des
     list()
@@ -1061,8 +1053,6 @@ struct error_code;
     void push_back(ptr_node&) = delete;
     void push_back(ptr_node&&) = delete;
     void push_back(std::unique_ptr<ptr_node, ptr_node::disposer> bp) {
-      if (bp->length() == 0)
-       return;
       _carriage = bp.get();
       _len += bp->length();
       _num += 1;
index 9c9a3bd7b19bb100c7892b9dd00bac3c7b25e4cc..15b0c10e8b885ff8893b4be2beb0010d88340c02 100644 (file)
@@ -2777,6 +2777,8 @@ ceph::bufferlist BlueFS::FileWriter::flush_buffer(
     // Otherwise a costly rebuild could happen in e.g. `KernelDevice`.
     buffer_appender.append_zero(padding_len);
     buffer_appender.flush();
+    // FIXME: `splice` is currently broken at it always thrashes
+    // the `_carriage`. This will be fixed in a separated patch.
     buffer.splice(buffer.length() - padding_len, padding_len, &bl);
     // Deep copy the tail here. This allows to avoid costlier copy on
     // bufferlist rebuild in e.g. `KernelDevice` and minimizes number
index 6703ec52adadce49f9ba5631551936e12e1a97cc..4d00d576a14f394ecca9655886c6da64a0d7245b 100644 (file)
@@ -1681,6 +1681,16 @@ TEST(BufferList, page_aligned_appender) {
     cout << bl << std::endl;
     ASSERT_EQ(6u, bl.get_num_buffers());
   }
+
+  // Verify that `page_aligned_appender` does respect the carrying
+  // `_carriage` over multiple allocations. Although `append_zero()`
+  // is used here, this affects other members of the append family.
+  // This part would be crucial for e.g. `encode()`.
+  {
+    bl.append_zero(42);
+    cout << bl << std::endl;
+    ASSERT_EQ(6u, bl.get_num_buffers());
+  }
 }
 
 TEST(BufferList, rebuild_aligned_size_and_memory) {