From: Radoslaw Zarzynski Date: Tue, 18 Aug 2020 16:28:20 +0000 (+0200) Subject: common/bl: new incarnation of page_aligned_appender. X-Git-Tag: v16.1.0~780^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cc4cd3f5e7c46bde6ac476c47c4a6cab323f6ed6;p=ceph.git common/bl: new incarnation of page_aligned_appender. This commit reworks the implementation of `page_aligned_appender`. The goals are: * reuse the `bufferlist::_carriage` mechanism instead of mimicking / duplicating it with a `buffer::ptr` instance. * Eliminate the need for manual `flush()`. It's requires extra effort from code readers and is error-prone. * Enable the `encode()` machinery to put the marshalled content _on the same memory block_ where other things appended via `page_aligned_appender` reside on. The commit introduces a performance regression in `FileWriter` of BlueFS which is caused by an old misbehaviour in `splice()` and other methods of `bufferlist` that unncecessarily thrashes `_carriage`. They will be fixed in follow-ups. Signed-off-by: Radoslaw Zarzynski --- diff --git a/src/common/buffer.cc b/src/common/buffer.cc index 8d842290f9c..c9ddc73c28a 100644 --- a/src/common/buffer.cc +++ b/src/common/buffer.cc @@ -2218,6 +2218,15 @@ MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static, buffer_raw_static, buffer_meta); +void ceph::buffer::list::page_aligned_appender::_refill(size_t len) { + const size_t alloc = \ + std::max((size_t)min_alloc, (len + CEPH_PAGE_SIZE - 1) & CEPH_PAGE_MASK); + auto new_back = \ + ptr_node::create(buffer::create_page_aligned(alloc)); + new_back->set_length(0); // unused, so far. + bl.push_back(std::move(new_back)); +} + namespace ceph::buffer { inline namespace v15_2_0 { diff --git a/src/include/buffer.h b/src/include/buffer.h index a07282ecee2..2ca686828d2 100644 --- a/src/include/buffer.h +++ b/src/include/buffer.h @@ -844,39 +844,26 @@ struct error_code; "contiguous_filler should be no costlier than pointer"); class page_aligned_appender { - bufferlist *pbl; + bufferlist& bl; unsigned min_alloc; - ptr buffer; - char *pos, *end; page_aligned_appender(list *l, unsigned min_pages) - : pbl(l), - min_alloc(min_pages * CEPH_PAGE_SIZE), - pos(nullptr), end(nullptr) {} + : bl(*l), + min_alloc(min_pages * CEPH_PAGE_SIZE) { + } + + void _refill(size_t len); template void _append_common(size_t len, Func&& impl_f) { - while (len > 0) { - if (!pos) { - size_t alloc = (len + CEPH_PAGE_SIZE - 1) & CEPH_PAGE_MASK; - if (alloc < min_alloc) { - alloc = min_alloc; - } - buffer = create_page_aligned(alloc); - pos = buffer.c_str(); - end = buffer.end_c_str(); - } - size_t l = len; - if (l > (size_t)(end - pos)) { - l = end - pos; - } - impl_f(l, pos); - pos += l; - len -= l; - if (pos == end) { - pbl->append(buffer, 0, buffer.length()); - pos = end = nullptr; - } + const auto free_in_last = bl.get_append_buffer_unused_tail_length(); + const auto first_round = std::min(len, free_in_last); + if (first_round) { + impl_f(first_round); + } + if (const auto second_round = len - first_round; second_round) { + _refill(second_round); + impl_f(second_round); } } @@ -888,26 +875,25 @@ struct error_code; } void flush() { - if (pos && pos != buffer.c_str()) { - size_t len = pos - buffer.c_str(); - pbl->append(buffer, 0, len); - buffer.set_length(buffer.length() - len); - buffer.set_offset(buffer.offset() + len); - } + // nop + } + + void append(const bufferlist& l) { + bl.append(l); + bl.obtain_contiguous_space(0); } void append(const char* buf, size_t entire_len) { - _append_common(entire_len, [buf] (const size_t chunk_len, - char* const dst) mutable { - memcpy(dst, buf, chunk_len); + _append_common(entire_len, + [buf, this] (const size_t chunk_len) mutable { + bl.append(buf, chunk_len); buf += chunk_len; }); } void append_zero(size_t entire_len) { - _append_common(entire_len, [] (const size_t chunk_len, - char* const dst) { - memset(dst, '\0', chunk_len); + _append_common(entire_len, [this] (const size_t chunk_len) { + bl.append_zero(chunk_len); }); } @@ -936,6 +922,12 @@ struct error_code; static ptr always_empty_bptr; ptr_node& refill_append_space(const unsigned len); + // for page_aligned_appender; never ever expose this publicly! + // carriage / append_buffer is just an implementation's detail. + ptr& get_append_buffer() { + return *_carriage; + } + public: // cons/des list() @@ -1061,8 +1053,6 @@ struct error_code; void push_back(ptr_node&) = delete; void push_back(ptr_node&&) = delete; void push_back(std::unique_ptr bp) { - if (bp->length() == 0) - return; _carriage = bp.get(); _len += bp->length(); _num += 1; diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 9c9a3bd7b19..15b0c10e8b8 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -2777,6 +2777,8 @@ ceph::bufferlist BlueFS::FileWriter::flush_buffer( // Otherwise a costly rebuild could happen in e.g. `KernelDevice`. buffer_appender.append_zero(padding_len); buffer_appender.flush(); + // FIXME: `splice` is currently broken at it always thrashes + // the `_carriage`. This will be fixed in a separated patch. buffer.splice(buffer.length() - padding_len, padding_len, &bl); // Deep copy the tail here. This allows to avoid costlier copy on // bufferlist rebuild in e.g. `KernelDevice` and minimizes number diff --git a/src/test/bufferlist.cc b/src/test/bufferlist.cc index 6703ec52ada..4d00d576a14 100644 --- a/src/test/bufferlist.cc +++ b/src/test/bufferlist.cc @@ -1681,6 +1681,16 @@ TEST(BufferList, page_aligned_appender) { cout << bl << std::endl; ASSERT_EQ(6u, bl.get_num_buffers()); } + + // Verify that `page_aligned_appender` does respect the carrying + // `_carriage` over multiple allocations. Although `append_zero()` + // is used here, this affects other members of the append family. + // This part would be crucial for e.g. `encode()`. + { + bl.append_zero(42); + cout << bl << std::endl; + ASSERT_EQ(6u, bl.get_num_buffers()); + } } TEST(BufferList, rebuild_aligned_size_and_memory) {