From: yite.gu Date: Tue, 29 Aug 2023 15:27:23 +0000 (+0800) Subject: os/bluestore: add perfcount for bluestore/bluefs allocator X-Git-Tag: v18.2.5~327^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=49e65656c49a656b008f3dcbe2b0aba712db3d1f;p=ceph.git os/bluestore: add perfcount for bluestore/bluefs allocator Allocator performance is the performance limiting factor on performance storage. This performance count can help us intuitively observe the reasons for changes in bluestore performance. Signed-off-by: Yite Gu (cherry picked from commit 37dc16f232b3e10c571f4fdb0452669b1940ba2e) --- diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 1f1ced92b4a0..d44dbe846b1a 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -288,9 +288,9 @@ void BlueFS::_init_logger() "mxwb", PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); - b.add_u64_counter(l_bluefs_main_alloc_unit, "alloc_unit_main", + b.add_u64_counter(l_bluefs_slow_alloc_unit, "alloc_unit_slow", "Allocation unit size (in bytes) for primary/shared device", - "aumb", + "ausb", PerfCountersBuilder::PRIO_CRITICAL, unit_t(UNIT_BYTES)); b.add_u64_counter(l_bluefs_db_alloc_unit, "alloc_unit_db", @@ -406,6 +406,30 @@ void BlueFS::_init_logger() "How many times bluefs read found page with all 0s"); b.add_u64(l_bluefs_read_zeros_errors, "read_zeros_errors", "How many times bluefs read found transient page with all 0s"); + b.add_time_avg(l_bluefs_wal_alloc_lat, "wal_alloc_lat", + "Average bluefs wal allocate latency", + "bwal", + PerfCountersBuilder::PRIO_USEFUL); + b.add_time_avg(l_bluefs_db_alloc_lat, "db_alloc_lat", + "Average bluefs db allocate latency", + "bdal", + PerfCountersBuilder::PRIO_USEFUL); + b.add_time_avg(l_bluefs_slow_alloc_lat, "slow_alloc_lat", + "Average allocation latency for primary/shared device", + "bsal", + PerfCountersBuilder::PRIO_USEFUL); + b.add_time(l_bluefs_wal_alloc_max_lat, "alloc_wal_max_lat", + "Max allocation latency for wal device", + "awxt", + PerfCountersBuilder::PRIO_INTERESTING); + b.add_time(l_bluefs_db_alloc_max_lat, "alloc_db_max_lat", + "Max allocation latency for db device", + "adxt", + PerfCountersBuilder::PRIO_INTERESTING); + b.add_time(l_bluefs_slow_alloc_max_lat, "alloc_slow_max_lat", + "Max allocation latency for primary/shared device", + "asxt", + PerfCountersBuilder::PRIO_INTERESTING); logger = b.create_perf_counters(); cct->get_perfcounters_collection()->add(logger); @@ -675,7 +699,7 @@ void BlueFS::_init_alloc() alloc_size[BDEV_SLOW] = 0; } logger->set(l_bluefs_db_alloc_unit, alloc_size[BDEV_DB]); - logger->set(l_bluefs_main_alloc_unit, alloc_size[BDEV_SLOW]); + logger->set(l_bluefs_slow_alloc_unit, alloc_size[BDEV_SLOW]); // new wal and db devices are never shared if (bdev[BDEV_NEWWAL]) { alloc_size[BDEV_NEWWAL] = cct->_conf->bluefs_alloc_size; @@ -3762,6 +3786,33 @@ const char* BlueFS::get_device_name(unsigned id) return names[id]; } +void BlueFS::_update_allocate_stats(uint8_t id, const ceph::timespan& d) +{ + switch(id) { + case BDEV_SLOW: + logger->tinc(l_bluefs_slow_alloc_lat, d); + if (d > max_alloc_lat[id]) { + logger->tset(l_bluefs_slow_alloc_max_lat, utime_t(d)); + max_alloc_lat[id] = d; + } + break; + case BDEV_DB: + logger->tinc(l_bluefs_db_alloc_lat, d); + if (d > max_alloc_lat[id]) { + logger->tset(l_bluefs_db_alloc_max_lat, utime_t(d)); + max_alloc_lat[id] = d; + } + break; + case BDEV_WAL: + logger->tinc(l_bluefs_wal_alloc_lat, d); + if (d > max_alloc_lat[id]) { + logger->tset(l_bluefs_wal_alloc_max_lat, utime_t(d)); + max_alloc_lat[id] = d; + } + break; + } +} + int BlueFS::_allocate(uint8_t id, uint64_t len, uint64_t alloc_unit, bluefs_fnode_t* node, @@ -3807,7 +3858,9 @@ int BlueFS::_allocate(uint8_t id, uint64_t len, } ++alloc_attempts; extents.reserve(4); // 4 should be (more than) enough for most allocations + auto t0 = mono_clock::now(); alloc_len = alloc[id]->allocate(need, alloc_unit, hint, &extents); + _update_allocate_stats(id, mono_clock::now() - t0); } if (alloc_len < 0 || alloc_len < need) { if (alloc[id]) { diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index c0c2dfdf0ad2..673978e15399 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -43,7 +43,7 @@ enum { l_bluefs_max_bytes_wal, l_bluefs_max_bytes_db, l_bluefs_max_bytes_slow, - l_bluefs_main_alloc_unit, + l_bluefs_slow_alloc_unit, l_bluefs_db_alloc_unit, l_bluefs_wal_alloc_unit, l_bluefs_read_random_count, @@ -73,6 +73,12 @@ enum { l_bluefs_alloc_shared_size_fallbacks, l_bluefs_read_zeros_candidate, l_bluefs_read_zeros_errors, + l_bluefs_wal_alloc_lat, + l_bluefs_db_alloc_lat, + l_bluefs_slow_alloc_lat, + l_bluefs_wal_alloc_max_lat, + l_bluefs_db_alloc_max_lat, + l_bluefs_slow_alloc_max_lat, l_bluefs_last, }; @@ -434,6 +440,8 @@ private: l_bluefs_max_bytes_db, }; + ceph::timespan max_alloc_lat[MAX_BDEV] = {ceph::make_timespan(0)}; + // cache struct { ceph::mutex lock = ceph::make_mutex("BlueFS::nodes.lock"); @@ -522,6 +530,7 @@ private: const char* get_device_name(unsigned id); typedef std::function update_fn_t; + void _update_allocate_stats(uint8_t id, const ceph::timespan& d); int _allocate(uint8_t bdev, uint64_t len, uint64_t alloc_unit, bluefs_fnode_t* node, diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 189b8df1e7e0..831bd050f287 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5342,6 +5342,10 @@ void BlueStore::_init_logger() l_bluestore_allocate_hist, "allocate_histogram", alloc_hist_x_axis_config, alloc_hist_y_axis_config, "Histogram of requested block allocations vs. given ones"); + b.add_time_avg(l_bluestore_allocator_lat, "allocator_lat", + "Average bluestore allocator latency", + "bsal", + PerfCountersBuilder::PRIO_USEFUL); logger = b.create_perf_counters(); cct->get_perfcounters_collection()->add(logger); @@ -15644,9 +15648,14 @@ int BlueStore::_do_alloc_write( PExtentVector prealloc; prealloc.reserve(2 * wctx->writes.size()); int64_t prealloc_left = 0; + auto start = mono_clock::now(); prealloc_left = alloc->allocate( need, min_alloc_size, need, 0, &prealloc); + log_latency("allocator@_do_alloc_write", + l_bluestore_allocator_lat, + mono_clock::now() - start, + cct->_conf->bluestore_log_op_age); if (prealloc_left < 0 || prealloc_left < (int64_t)need) { derr << __func__ << " failed to allocate 0x" << std::hex << need << " allocated 0x " << (prealloc_left < 0 ? 0 : prealloc_left) diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 8f1dbbf08c96..0d0dece5daeb 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -213,6 +213,7 @@ enum { // allocation stats //**************************************** l_bluestore_allocate_hist, + l_bluestore_allocator_lat, //**************************************** l_bluestore_last };