From 6bcba24896df1d7fbcebb1d3f724c925d55b0c59 Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Fri, 20 Dec 2024 20:21:14 +0000 Subject: [PATCH] os/bluestore: Add health warning for bluestore fragmentation Changed "bluestore/fragmentation_micros" from quick imprecise to slow but more representative score. Introduced config "bluestore_warn_on_free_fragmentation" that controls when free space fragmentation score becomes a health warning. Currently calculation of fragmentation score might be non-instant for severly fragmented disks. It might induce stalls to write IO. Config value "bluestore_fragmentation_check_period" control score calculation period. In future, costly score calculation will be replaced with method that continously updates score. Signed-off-by: Adam Kupczyk (cherry picked from commit 72263dbb7984828844c87bdb3b3ff475a68b1ac6) --- src/common/options/global.yaml.in | 23 +++++++++++++++++++++++ src/os/bluestore/BlueStore.cc | 28 +++++++++++++++++++++++----- src/os/bluestore/BlueStore.h | 2 ++ 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in index 76a4f4d9551ad..d8c230d0e24c9 100644 --- a/src/common/options/global.yaml.in +++ b/src/common/options/global.yaml.in @@ -5361,6 +5361,29 @@ options: desc: Enable health indication when spurious read errors are observed by OSD default: true with_legacy: true +- name: bluestore_warn_on_free_fragmentation + type: float + level: basic + desc: Level at which disk free fragmentation causes health warning. Set "1" to disable. + This is same value as admin command "bluestore allocator score block". + default: 0.8 + with_legacy: false + flags: + - runtime + see_also: + - bluestore_fragmentation_check_period +- name: bluestore_fragmentation_check_period + type: uint + level: basic + desc: The period to perform bluestore free fragmentation check. + Checking fragmentation is usually almost immediate. For highly fragmented storage, + it can take several miliseconds. It can cause a stall to a write operation. + default: 3600 + with_legacy: false + flags: + - runtime + see_also: + - bluestore_warn_on_free_fragmentation - name: bluestore_slow_ops_warn_lifetime type: uint level: advanced diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index b143937e9453b..568da10cc1c67 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5234,7 +5234,7 @@ void BlueStore::Collection::split_cache( // MempoolThread #undef dout_prefix -#define dout_prefix *_dout << "bluestore.MempoolThread(" << this << ") " +#define dout_prefix *_dout << "bluestore.MempoolThread " #undef dout_context #define dout_context store->cct @@ -5361,6 +5361,23 @@ void *BlueStore::MempoolThread::entry() interval_stats_trim = false; store->refresh_perf_counters(); + uint64_t period = store->cct->_conf.get_val("bluestore_fragmentation_check_period"); + if (period != 0 && store->alloc) { + auto now = mono_clock::now(); + timespan elapsed = now - last_fragmentation_check; + if (elapsed > make_timespan(period)) { + last_fragmentation_check = now; + double score; + score = store->alloc->get_fragmentation_score(); + store->logger->set(l_bluestore_fragmentation, score * 1e6); + now = mono_clock::now(); + elapsed = now - last_fragmentation_check; + auto seconds = elapsed.count() * 1e-9; + dout(0) << std::fixed << std::setprecision(6) + << "fragmentation_score=" << score << " took=" << seconds << "s" << dendl; + } + } + auto wait = ceph::make_timespan( store->cct->_conf->bluestore_cache_trim_interval); cond.wait_for(l, wait); @@ -14790,10 +14807,6 @@ void BlueStore::_kv_finalize_thread() // this is as good a place as any ... _reap_collections(); - - logger->set(l_bluestore_fragmentation, - (uint64_t)(alloc->get_fragmentation() * 1000)); - log_latency("kv_final", l_bluestore_kv_final_lat, mono_clock::now() - start, @@ -18700,6 +18713,11 @@ void BlueStore::_log_alerts(osd_alert_list_t& alerts) "BLUESTORE_NO_COMPRESSION", s0); } + if (logger->get(l_bluestore_fragmentation) > + cct->_conf.get_val("bluestore_warn_on_free_fragmentation") * 1e6) { + alerts.emplace("BLUESTORE_FREE_FRAGMENTATION", + fmt::format("{0:.6f}", logger->get(l_bluestore_fragmentation) * 1e-6)); + } } void BlueStore::_collect_allocation_stats(uint64_t need, uint32_t alloc_size, diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index f7a41f85775ae..b8d4c35be078b 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2688,6 +2688,8 @@ private: private: void _update_cache_settings(); void _resize_shards(bool interval_stats); + + mono_clock::time_point last_fragmentation_check; } mempool_thread; #ifdef WITH_BLKIN -- 2.39.5