#define CEPH_BUFFER_ALLOC_UNIT 4096u
#define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined))
+// 256K is the maximum "small" object size in tcmalloc above which allocations come from
+// the central heap. For now let's keep this below that threshold.
+#define CEPH_BUFFER_ALLOC_UNIT_MAX std::size_t { 256*1024 }
+
#ifdef BUFFER_DEBUG
static ceph::spinlock debug_lock;
# define bdout { std::lock_guard<ceph::spinlock> lg(debug_lock); std::cout
// make a new buffer. fill out a complete page, factoring in the
// raw_combined overhead.
size_t need = round_up_to(len, sizeof(size_t)) + sizeof(raw_combined);
- size_t alen = round_up_to(need, CEPH_BUFFER_ALLOC_UNIT) -
- sizeof(raw_combined);
+ size_t alen = round_up_to(need, CEPH_BUFFER_ALLOC_UNIT);
+ if (_carriage == &_buffers.back()) {
+ size_t nlen = round_up_to(_carriage->raw_length(), CEPH_BUFFER_ALLOC_UNIT) * 2;
+ nlen = std::min(nlen, CEPH_BUFFER_ALLOC_UNIT_MAX);
+ alen = std::max(alen, nlen);
+ }
+ alen -= sizeof(raw_combined);
+
auto new_back = \
ptr_node::create(raw_combined::create(alen, 0, get_mempool()));
new_back->set_length(0); // unused, so far.
bench_bufferlist_alloc(4, 100000, 16);
}
+/*
+ * append_bench tests now have multiple variants:
+ *
+ * Version 1 tests allocate a single bufferlist during loop iteration.
+ * Ultimately very little memory is utilized since the bufferlist immediately
+ * drops out of scope. This was the original variant of these tests but showed
+ * unexpected performance characteristics that appears to be tied to tcmalloc
+ * and/or kernel behavior depending on the bufferlist size and step size.
+ *
+ * Version 2 tests allocate a configurable number of bufferlists that are
+ * replaced round-robin during loop iteration. Version 2 tests are designed
+ * to better mimic performance when multiple bufferlists are in memory at the
+ * same time. During testing this showed more consistent and seemingly
+ * accurate behavior across bufferlist and step sizes.
+ */
+
TEST(BufferList, append_bench_with_size_hint) {
std::array<char, 1048576> src = { 0, };
}
}
-TEST(BufferList, append_bench) {
+TEST(BufferList, append_bench_with_size_hint2) {
std::array<char, 1048576> src = { 0, };
+ constexpr size_t rounds = 4000;
+ constexpr int conc_bl = 400;
+ std::vector<ceph::bufferlist*> bls(conc_bl);
+ for (int i = 0; i < conc_bl; i++) {
+ bls[i] = new ceph::bufferlist;
+ }
for (size_t step = 4; step <= 16384; step *= 4) {
const utime_t start = ceph_clock_now();
+ for (size_t r = 0; r < rounds; ++r) {
+ delete bls[r % conc_bl];
+ bls[r % conc_bl] = new ceph::bufferlist(std::size(src));
+ for (auto iter = std::begin(src);
+ iter != std::end(src);
+ iter = std::next(iter, step)) {
+ bls[r % conc_bl]->append(&*iter, step);
+ }
+ }
+ cout << rounds << " fills of buffer len " << src.size()
+ << " with " << step << " byte appends in "
+ << (ceph_clock_now() - start) << std::endl;
+ }
+ for (int i = 0; i < conc_bl; i++) {
+ delete bls[i];
+ }
+}
+TEST(BufferList, append_bench) {
+ std::array<char, 1048576> src = { 0, };
+ for (size_t step = 4; step <= 16384; step *= 4) {
+ const utime_t start = ceph_clock_now();
constexpr size_t rounds = 4000;
for (size_t r = 0; r < rounds; ++r) {
ceph::bufferlist bl;
}
}
+TEST(BufferList, append_bench2) {
+ std::array<char, 1048576> src = { 0, };
+ constexpr size_t rounds = 4000;
+ constexpr int conc_bl = 400;
+ std::vector<ceph::bufferlist*> bls(conc_bl);
+
+ for (int i = 0; i < conc_bl; i++) {
+ bls[i] = new ceph::bufferlist;
+ }
+ for (size_t step = 4; step <= 16384; step *= 4) {
+ const utime_t start = ceph_clock_now();
+ for (size_t r = 0; r < rounds; ++r) {
+ delete bls[r % conc_bl];
+ bls[r % conc_bl] = new ceph::bufferlist;
+ for (auto iter = std::begin(src);
+ iter != std::end(src);
+ iter = std::next(iter, step)) {
+ bls[r % conc_bl]->append(&*iter, step);
+ }
+ }
+ cout << rounds << " fills of buffer len " << src.size()
+ << " with " << step << " byte appends in "
+ << (ceph_clock_now() - start) << std::endl;
+ }
+ for (int i = 0; i < conc_bl; i++) {
+ delete bls[i];
+ }
+}
+
+TEST(BufferList, append_hole_bench) {
+ constexpr size_t targeted_bl_size = 1048576;
+
+ for (size_t step = 512; step <= 65536; step *= 2) {
+ const utime_t start = ceph_clock_now();
+ constexpr size_t rounds = 80000;
+ for (size_t r = 0; r < rounds; ++r) {
+ ceph::bufferlist bl;
+ while (bl.length() < targeted_bl_size) {
+ bl.append_hole(step);
+ }
+ }
+ cout << rounds << " fills of buffer len " << targeted_bl_size
+ << " with " << step << " byte long append_hole in "
+ << (ceph_clock_now() - start) << std::endl;
+ }
+}
+
+TEST(BufferList, append_hole_bench2) {
+ constexpr size_t targeted_bl_size = 1048576;
+ constexpr size_t rounds = 80000;
+ constexpr int conc_bl = 400;
+ std::vector<ceph::bufferlist*> bls(conc_bl);
+
+ for (int i = 0; i < conc_bl; i++) {
+ bls[i] = new ceph::bufferlist;
+ }
+ for (size_t step = 512; step <= 65536; step *= 2) {
+ const utime_t start = ceph_clock_now();
+ for (size_t r = 0; r < rounds; ++r) {
+ delete bls[r % conc_bl];
+ bls[r % conc_bl] = new ceph::bufferlist;
+ while (bls[r % conc_bl]->length() < targeted_bl_size) {
+ bls[r % conc_bl]->append_hole(step);
+ }
+ }
+ cout << rounds << " fills of buffer len " << targeted_bl_size
+ << " with " << step << " byte long append_hole in "
+ << (ceph_clock_now() - start) << std::endl;
+ }
+ for (int i = 0; i < conc_bl; i++) {
+ delete bls[i];
+ }
+}
+
TEST(BufferList, operator_assign_rvalue) {
bufferlist from;
{