From: Mark Nelson Date: Mon, 10 Aug 2020 19:18:19 +0000 (+0000) Subject: common/buffer: Implement dynamic alen in refill_append_space X-Git-Tag: v17.1.0~2582^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f2fddccd4a6e6ce564d857b18851c8a24817625e;p=ceph.git common/buffer: Implement dynamic alen in refill_append_space Signed-off-by: Mark Nelson --- diff --git a/src/common/buffer.cc b/src/common/buffer.cc index ee2bdc1e9e28..81cf2f257f26 100644 --- a/src/common/buffer.cc +++ b/src/common/buffer.cc @@ -48,6 +48,10 @@ using namespace ceph; #define CEPH_BUFFER_ALLOC_UNIT 4096u #define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined)) +// 256K is the maximum "small" object size in tcmalloc above which allocations come from +// the central heap. For now let's keep this below that threshold. +#define CEPH_BUFFER_ALLOC_UNIT_MAX std::size_t { 256*1024 } + #ifdef BUFFER_DEBUG static ceph::spinlock debug_lock; # define bdout { std::lock_guard lg(debug_lock); std::cout @@ -1324,8 +1328,14 @@ static ceph::spinlock debug_lock; // make a new buffer. fill out a complete page, factoring in the // raw_combined overhead. size_t need = round_up_to(len, sizeof(size_t)) + sizeof(raw_combined); - size_t alen = round_up_to(need, CEPH_BUFFER_ALLOC_UNIT) - - sizeof(raw_combined); + size_t alen = round_up_to(need, CEPH_BUFFER_ALLOC_UNIT); + if (_carriage == &_buffers.back()) { + size_t nlen = round_up_to(_carriage->raw_length(), CEPH_BUFFER_ALLOC_UNIT) * 2; + nlen = std::min(nlen, CEPH_BUFFER_ALLOC_UNIT_MAX); + alen = std::max(alen, nlen); + } + alen -= sizeof(raw_combined); + auto new_back = \ ptr_node::create(raw_combined::create(alen, 0, get_mempool())); new_back->set_length(0); // unused, so far. diff --git a/src/test/bufferlist.cc b/src/test/bufferlist.cc index 5cb8df89ea05..b0cb983f7dff 100644 --- a/src/test/bufferlist.cc +++ b/src/test/bufferlist.cc @@ -1349,6 +1349,22 @@ TEST(BufferList, BenchAlloc) { bench_bufferlist_alloc(4, 100000, 16); } +/* + * append_bench tests now have multiple variants: + * + * Version 1 tests allocate a single bufferlist during loop iteration. + * Ultimately very little memory is utilized since the bufferlist immediately + * drops out of scope. This was the original variant of these tests but showed + * unexpected performance characteristics that appears to be tied to tcmalloc + * and/or kernel behavior depending on the bufferlist size and step size. + * + * Version 2 tests allocate a configurable number of bufferlists that are + * replaced round-robin during loop iteration. Version 2 tests are designed + * to better mimic performance when multiple bufferlists are in memory at the + * same time. During testing this showed more consistent and seemingly + * accurate behavior across bufferlist and step sizes. + */ + TEST(BufferList, append_bench_with_size_hint) { std::array src = { 0, }; @@ -1370,12 +1386,39 @@ TEST(BufferList, append_bench_with_size_hint) { } } -TEST(BufferList, append_bench) { +TEST(BufferList, append_bench_with_size_hint2) { std::array src = { 0, }; + constexpr size_t rounds = 4000; + constexpr int conc_bl = 400; + std::vector bls(conc_bl); + for (int i = 0; i < conc_bl; i++) { + bls[i] = new ceph::bufferlist; + } for (size_t step = 4; step <= 16384; step *= 4) { const utime_t start = ceph_clock_now(); + for (size_t r = 0; r < rounds; ++r) { + delete bls[r % conc_bl]; + bls[r % conc_bl] = new ceph::bufferlist(std::size(src)); + for (auto iter = std::begin(src); + iter != std::end(src); + iter = std::next(iter, step)) { + bls[r % conc_bl]->append(&*iter, step); + } + } + cout << rounds << " fills of buffer len " << src.size() + << " with " << step << " byte appends in " + << (ceph_clock_now() - start) << std::endl; + } + for (int i = 0; i < conc_bl; i++) { + delete bls[i]; + } +} +TEST(BufferList, append_bench) { + std::array src = { 0, }; + for (size_t step = 4; step <= 16384; step *= 4) { + const utime_t start = ceph_clock_now(); constexpr size_t rounds = 4000; for (size_t r = 0; r < rounds; ++r) { ceph::bufferlist bl; @@ -1391,6 +1434,80 @@ TEST(BufferList, append_bench) { } } +TEST(BufferList, append_bench2) { + std::array src = { 0, }; + constexpr size_t rounds = 4000; + constexpr int conc_bl = 400; + std::vector bls(conc_bl); + + for (int i = 0; i < conc_bl; i++) { + bls[i] = new ceph::bufferlist; + } + for (size_t step = 4; step <= 16384; step *= 4) { + const utime_t start = ceph_clock_now(); + for (size_t r = 0; r < rounds; ++r) { + delete bls[r % conc_bl]; + bls[r % conc_bl] = new ceph::bufferlist; + for (auto iter = std::begin(src); + iter != std::end(src); + iter = std::next(iter, step)) { + bls[r % conc_bl]->append(&*iter, step); + } + } + cout << rounds << " fills of buffer len " << src.size() + << " with " << step << " byte appends in " + << (ceph_clock_now() - start) << std::endl; + } + for (int i = 0; i < conc_bl; i++) { + delete bls[i]; + } +} + +TEST(BufferList, append_hole_bench) { + constexpr size_t targeted_bl_size = 1048576; + + for (size_t step = 512; step <= 65536; step *= 2) { + const utime_t start = ceph_clock_now(); + constexpr size_t rounds = 80000; + for (size_t r = 0; r < rounds; ++r) { + ceph::bufferlist bl; + while (bl.length() < targeted_bl_size) { + bl.append_hole(step); + } + } + cout << rounds << " fills of buffer len " << targeted_bl_size + << " with " << step << " byte long append_hole in " + << (ceph_clock_now() - start) << std::endl; + } +} + +TEST(BufferList, append_hole_bench2) { + constexpr size_t targeted_bl_size = 1048576; + constexpr size_t rounds = 80000; + constexpr int conc_bl = 400; + std::vector bls(conc_bl); + + for (int i = 0; i < conc_bl; i++) { + bls[i] = new ceph::bufferlist; + } + for (size_t step = 512; step <= 65536; step *= 2) { + const utime_t start = ceph_clock_now(); + for (size_t r = 0; r < rounds; ++r) { + delete bls[r % conc_bl]; + bls[r % conc_bl] = new ceph::bufferlist; + while (bls[r % conc_bl]->length() < targeted_bl_size) { + bls[r % conc_bl]->append_hole(step); + } + } + cout << rounds << " fills of buffer len " << targeted_bl_size + << " with " << step << " byte long append_hole in " + << (ceph_clock_now() - start) << std::endl; + } + for (int i = 0; i < conc_bl; i++) { + delete bls[i]; + } +} + TEST(BufferList, operator_assign_rvalue) { bufferlist from; {