From: Jaya Prakash Date: Tue, 9 Dec 2025 16:50:19 +0000 (+0000) Subject: os/bluestore: separate slow scrub ops from normal slow ops X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4125ad1c65172f0e57668b8145bbfb18816f84f5;p=ceph.git os/bluestore: separate slow scrub ops from normal slow ops This Commit introduces separate slow-op tracking paths: 1. Normal IO slow op queue 2. Scrub IO slow op queue This enables the OSD to record slow scrub-originated operations distinctly, without mixing them with client IO latency. Fixes: https://tracker.ceph.com/issues/73990 Signed-off-by: Jaya Prakash --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 6ce95c827cd5..2261e447485c 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -12643,10 +12643,18 @@ int BlueStore::read( dout(10) << __func__ << " " << cid << " " << oid << " 0x" << std::hex << offset << "~" << length << std::dec << " = " << r << dendl; - log_latency(__func__, - l_bluestore_read_lat, - mono_clock::now() - start, - cct->_conf->bluestore_log_op_age); + + if (op_flags & CEPH_OSD_OP_FLAG_SCRUB) { + log_latency_scrub(__func__, + l_bluestore_read_lat, + mono_clock::now() - start, + cct->_conf->bluestore_log_scrub_op_age); + } else { + log_latency(__func__, + l_bluestore_read_lat, + mono_clock::now() - start, + cct->_conf->bluestore_log_op_age); + } return r; } @@ -12998,13 +13006,23 @@ int BlueStore::_do_read( return -EIO; } } - log_latency_fn(__func__, - l_bluestore_read_wait_aio_lat, - mono_clock::now() - start, - cct->_conf->bluestore_log_op_age, - [&](auto lat) { return ", num_ios = " + stringify(num_ios); }, - l_bluestore_slow_read_wait_aio_count - ); + if (op_flags & CEPH_OSD_OP_FLAG_SCRUB) { + log_latency_fn_scrub(__func__, + l_bluestore_read_wait_aio_lat, + mono_clock::now() - start, + cct->_conf->bluestore_log_scrub_op_age, + [&](auto lat) { return ", num_ios = " + stringify(num_ios); }, + l_bluestore_slow_read_wait_aio_count + ); + } else { + log_latency_fn(__func__, + l_bluestore_read_wait_aio_lat, + mono_clock::now() - start, + cct->_conf->bluestore_log_op_age, + [&](auto lat) { return ", num_ios = " + stringify(num_ios); }, + l_bluestore_slow_read_wait_aio_count + ); + } bool csum_error = false; r = _generate_read_result_bl(o, offset, length, ready_regions, @@ -13299,10 +13317,17 @@ int BlueStore::readv( dout(10) << __func__ << " " << cid << " " << oid << " fiemap " << m << std::dec << " = " << r << dendl; - log_latency(__func__, - l_bluestore_read_lat, - mono_clock::now() - start, - cct->_conf->bluestore_log_op_age); + if (op_flags & CEPH_OSD_OP_FLAG_SCRUB) { + log_latency_scrub(__func__, + l_bluestore_read_lat, + mono_clock::now() - start, + cct->_conf->bluestore_log_scrub_op_age); + } else { + log_latency(__func__, + l_bluestore_read_lat, + mono_clock::now() - start, + cct->_conf->bluestore_log_op_age); + } return r; } @@ -13376,13 +13401,23 @@ int BlueStore::_do_readv( return -EIO; } } - log_latency_fn(__func__, - l_bluestore_read_wait_aio_lat, - mono_clock::now() - start, - cct->_conf->bluestore_log_op_age, - [&](auto lat) { return ", num_ios = " + stringify(num_ios); }, - l_bluestore_slow_read_wait_aio_count - ); + if (op_flags & CEPH_OSD_OP_FLAG_SCRUB) { + log_latency_fn_scrub(__func__, + l_bluestore_read_wait_aio_lat, + mono_clock::now() - start, + cct->_conf->bluestore_log_scrub_op_age, + [&](auto lat) { return ", num_ios = " + stringify(num_ios); }, + l_bluestore_slow_read_wait_aio_count + ); + } else { + log_latency_fn(__func__, + l_bluestore_read_wait_aio_lat, + mono_clock::now() - start, + cct->_conf->bluestore_log_op_age, + [&](auto lat) { return ", num_ios = " + stringify(num_ios); }, + l_bluestore_slow_read_wait_aio_count + ); + } ceph_assert(raw_results.size() == (size_t)m.num_intervals()); i = 0; @@ -18832,15 +18867,23 @@ int BlueStore::_merge_collection( return r; } -size_t BlueStore::_trim_slow_op_event_queue(mono_clock::time_point cur_time) { +std::pair BlueStore::_trim_slow_op_event_queue( + mono_clock::time_point cur_time) +{ ceph_assert(ceph_mutex_is_locked(qlock)); auto warn_duration = std::chrono::seconds(cct->_conf->bluestore_slow_ops_warn_lifetime); while (!slow_op_event_queue.empty() && - ((slow_op_event_queue.front() < cur_time - warn_duration) || - (slow_op_event_queue.size() > cct->_conf->bluestore_slow_ops_warn_threshold))) { + ((slow_op_event_queue.front().first < cur_time - warn_duration) || + (slow_op_event_count > cct->_conf->bluestore_slow_ops_warn_threshold) || + (slow_scrub_op_event_count > cct->_conf->bluestore_slow_scrub_ops_warn_threshold))) { + if (!slow_op_event_queue.front().second) { + slow_op_event_count--; + } else { + slow_scrub_op_event_count--; + } slow_op_event_queue.pop(); } - return slow_op_event_queue.size(); + return {slow_op_event_count, slow_scrub_op_event_count}; } void BlueStore::_add_slow_op_event() { @@ -18849,7 +18892,19 @@ void BlueStore::_add_slow_op_event() { } std::lock_guard lock(qlock); auto cur_time = mono_clock::now(); - slow_op_event_queue.push(cur_time); + slow_op_event_queue.push({cur_time, false}); + slow_op_event_count++; + _trim_slow_op_event_queue(cur_time); +} + +void BlueStore::_add_slow_scrub_op_event() { + if (!cct->_conf->bluestore_slow_scrub_ops_warn_threshold) { + return; + } + std::lock_guard lock(qlock); + auto cur_time = mono_clock::now(); + slow_op_event_queue.push({cur_time, true}); + slow_scrub_op_event_count++; _trim_slow_op_event_queue(cur_time); } @@ -18875,6 +18930,28 @@ void BlueStore::log_latency( } } +void BlueStore::log_latency_scrub( + const char* name, + int idx, + const ceph::timespan& l, + double lat_threshold, + const char* info, + int idx2) +{ + logger->tinc_with_max(idx, l); + if (lat_threshold > 0.0 && + l >= make_timespan(lat_threshold)) { + dout(0) << __func__ << " slow operation observed in scrub for " << name + << ", latency = " << l + << info + << dendl; + _add_slow_scrub_op_event(); + if (idx2 > l_bluestore_first && idx2 < l_bluestore_last) { + logger->inc(idx2); + } + } +} + void BlueStore::log_latency_fn( const char* name, int idx, @@ -18897,6 +18974,28 @@ void BlueStore::log_latency_fn( } } +void BlueStore::log_latency_fn_scrub( + const char* name, + int idx, + const ceph::timespan& l, + double lat_threshold, + std::function fn, + int idx2) +{ + logger->tinc_with_max(idx, l); + if (lat_threshold > 0.0 && + l >= make_timespan(lat_threshold)) { + dout(0) << __func__ << " slow operation observed in scrub for " << name + << ", latency = " << l + << fn(l) + << dendl; + _add_slow_scrub_op_event(); + if (idx2 > l_bluestore_first && idx2 < l_bluestore_last) { + logger->inc(idx2); + } + } +} + #if defined(WITH_LTTNG) void BlueStore::BlueStoreThrottle::emit_initial_tracepoint( KeyValueDB &db, @@ -19310,8 +19409,9 @@ void BlueStore::_log_alerts(osd_alert_list_t& alerts) spillover_alert.clear(); } if (cct->_conf->bluestore_slow_ops_warn_threshold) { - size_t qsize = _trim_slow_op_event_queue(mono_clock::now()); - if (qsize >= cct->_conf->bluestore_slow_ops_warn_threshold) { + auto [qsize, scrub_qsize] = _trim_slow_op_event_queue(mono_clock::now()); + if (qsize >= cct->_conf->bluestore_slow_ops_warn_threshold || + scrub_qsize >= cct->_conf->bluestore_slow_scrub_ops_warn_threshold) { ostringstream ss; ss << "observed slow operation indications in BlueStore"; alerts.emplace("BLUESTORE_SLOW_OP_ALERT", ss.str()); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 0f4fb21d00f9..650a78f91ed4 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -3577,6 +3577,13 @@ public: const char* info = "", int idx2 = l_bluestore_first); + inline void log_latency_scrub(const char* name, + int idx, + const ceph::timespan& l, + double lat_threshold, + const char* info = "", + int idx2 = l_bluestore_first); + inline void log_latency_fn(const char* name, int idx, const ceph::timespan& lat, @@ -3584,6 +3591,13 @@ public: std::function fn, int idx2 = l_bluestore_first); + inline void log_latency_fn_scrub(const char* name, + int idx, + const ceph::timespan& lat, + double lat_threshold, + std::function fn, + int idx2 = l_bluestore_first); + private: bool _debug_data_eio(const ghobject_t& o) { if (!cct->_conf->bluestore_debug_inject_read_err) { @@ -3616,10 +3630,13 @@ private: std::string no_per_pg_omap_alert; std::string disk_size_mismatch_alert; std::string spurious_read_errors_alert; - std::queue slow_op_event_queue; + std::queue > slow_op_event_queue; + size_t slow_op_event_count = 0; + size_t slow_scrub_op_event_count = 0; - size_t _trim_slow_op_event_queue(ceph::mono_clock::time_point cur_time); + std::pair _trim_slow_op_event_queue(ceph::mono_clock::time_point cur_time); void _add_slow_op_event(); + void _add_slow_scrub_op_event(); void _log_alerts(osd_alert_list_t& alerts); bool _set_compression_alert(bool cmode, const char* s) { std::lock_guard l(qlock);