dout(10) << __func__ << " " << cid << " " << oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< " = " << r << dendl;
- log_latency(__func__,
- l_bluestore_read_lat,
- mono_clock::now() - start,
- cct->_conf->bluestore_log_op_age);
+
+ if (op_flags & CEPH_OSD_OP_FLAG_SCRUB) {
+ log_latency_scrub(__func__,
+ l_bluestore_read_lat,
+ mono_clock::now() - start,
+ cct->_conf->bluestore_log_scrub_op_age);
+ } else {
+ log_latency(__func__,
+ l_bluestore_read_lat,
+ mono_clock::now() - start,
+ cct->_conf->bluestore_log_op_age);
+ }
return r;
}
return -EIO;
}
}
- log_latency_fn(__func__,
- l_bluestore_read_wait_aio_lat,
- mono_clock::now() - start,
- cct->_conf->bluestore_log_op_age,
- [&](auto lat) { return ", num_ios = " + stringify(num_ios); },
- l_bluestore_slow_read_wait_aio_count
- );
+ if (op_flags & CEPH_OSD_OP_FLAG_SCRUB) {
+ log_latency_fn_scrub(__func__,
+ l_bluestore_read_wait_aio_lat,
+ mono_clock::now() - start,
+ cct->_conf->bluestore_log_scrub_op_age,
+ [&](auto lat) { return ", num_ios = " + stringify(num_ios); },
+ l_bluestore_slow_read_wait_aio_count
+ );
+ } else {
+ log_latency_fn(__func__,
+ l_bluestore_read_wait_aio_lat,
+ mono_clock::now() - start,
+ cct->_conf->bluestore_log_op_age,
+ [&](auto lat) { return ", num_ios = " + stringify(num_ios); },
+ l_bluestore_slow_read_wait_aio_count
+ );
+ }
bool csum_error = false;
r = _generate_read_result_bl(o, offset, length, ready_regions,
dout(10) << __func__ << " " << cid << " " << oid
<< " fiemap " << m << std::dec
<< " = " << r << dendl;
- log_latency(__func__,
- l_bluestore_read_lat,
- mono_clock::now() - start,
- cct->_conf->bluestore_log_op_age);
+ if (op_flags & CEPH_OSD_OP_FLAG_SCRUB) {
+ log_latency_scrub(__func__,
+ l_bluestore_read_lat,
+ mono_clock::now() - start,
+ cct->_conf->bluestore_log_scrub_op_age);
+ } else {
+ log_latency(__func__,
+ l_bluestore_read_lat,
+ mono_clock::now() - start,
+ cct->_conf->bluestore_log_op_age);
+ }
return r;
}
return -EIO;
}
}
- log_latency_fn(__func__,
- l_bluestore_read_wait_aio_lat,
- mono_clock::now() - start,
- cct->_conf->bluestore_log_op_age,
- [&](auto lat) { return ", num_ios = " + stringify(num_ios); },
- l_bluestore_slow_read_wait_aio_count
- );
+ if (op_flags & CEPH_OSD_OP_FLAG_SCRUB) {
+ log_latency_fn_scrub(__func__,
+ l_bluestore_read_wait_aio_lat,
+ mono_clock::now() - start,
+ cct->_conf->bluestore_log_scrub_op_age,
+ [&](auto lat) { return ", num_ios = " + stringify(num_ios); },
+ l_bluestore_slow_read_wait_aio_count
+ );
+ } else {
+ log_latency_fn(__func__,
+ l_bluestore_read_wait_aio_lat,
+ mono_clock::now() - start,
+ cct->_conf->bluestore_log_op_age,
+ [&](auto lat) { return ", num_ios = " + stringify(num_ios); },
+ l_bluestore_slow_read_wait_aio_count
+ );
+ }
ceph_assert(raw_results.size() == (size_t)m.num_intervals());
i = 0;
return r;
}
-size_t BlueStore::_trim_slow_op_event_queue(mono_clock::time_point cur_time) {
+std::pair<size_t, size_t> BlueStore::_trim_slow_op_event_queue(
+ mono_clock::time_point cur_time)
+{
ceph_assert(ceph_mutex_is_locked(qlock));
auto warn_duration = std::chrono::seconds(cct->_conf->bluestore_slow_ops_warn_lifetime);
while (!slow_op_event_queue.empty() &&
- ((slow_op_event_queue.front() < cur_time - warn_duration) ||
- (slow_op_event_queue.size() > cct->_conf->bluestore_slow_ops_warn_threshold))) {
+ ((slow_op_event_queue.front().first < cur_time - warn_duration) ||
+ (slow_op_event_count > cct->_conf->bluestore_slow_ops_warn_threshold) ||
+ (slow_scrub_op_event_count > cct->_conf->bluestore_slow_scrub_ops_warn_threshold))) {
+ if (!slow_op_event_queue.front().second) {
+ slow_op_event_count--;
+ } else {
+ slow_scrub_op_event_count--;
+ }
slow_op_event_queue.pop();
}
- return slow_op_event_queue.size();
+ return {slow_op_event_count, slow_scrub_op_event_count};
}
void BlueStore::_add_slow_op_event() {
}
std::lock_guard lock(qlock);
auto cur_time = mono_clock::now();
- slow_op_event_queue.push(cur_time);
+ slow_op_event_queue.push({cur_time, false});
+ slow_op_event_count++;
+ _trim_slow_op_event_queue(cur_time);
+}
+
+void BlueStore::_add_slow_scrub_op_event() {
+ if (!cct->_conf->bluestore_slow_scrub_ops_warn_threshold) {
+ return;
+ }
+ std::lock_guard lock(qlock);
+ auto cur_time = mono_clock::now();
+ slow_op_event_queue.push({cur_time, true});
+ slow_scrub_op_event_count++;
_trim_slow_op_event_queue(cur_time);
}
}
}
+void BlueStore::log_latency_scrub(
+ const char* name,
+ int idx,
+ const ceph::timespan& l,
+ double lat_threshold,
+ const char* info,
+ int idx2)
+{
+ logger->tinc_with_max(idx, l);
+ if (lat_threshold > 0.0 &&
+ l >= make_timespan(lat_threshold)) {
+ dout(0) << __func__ << " slow operation observed in scrub for " << name
+ << ", latency = " << l
+ << info
+ << dendl;
+ _add_slow_scrub_op_event();
+ if (idx2 > l_bluestore_first && idx2 < l_bluestore_last) {
+ logger->inc(idx2);
+ }
+ }
+}
+
void BlueStore::log_latency_fn(
const char* name,
int idx,
}
}
+void BlueStore::log_latency_fn_scrub(
+ const char* name,
+ int idx,
+ const ceph::timespan& l,
+ double lat_threshold,
+ std::function<string (const ceph::timespan& lat)> fn,
+ int idx2)
+{
+ logger->tinc_with_max(idx, l);
+ if (lat_threshold > 0.0 &&
+ l >= make_timespan(lat_threshold)) {
+ dout(0) << __func__ << " slow operation observed in scrub for " << name
+ << ", latency = " << l
+ << fn(l)
+ << dendl;
+ _add_slow_scrub_op_event();
+ if (idx2 > l_bluestore_first && idx2 < l_bluestore_last) {
+ logger->inc(idx2);
+ }
+ }
+}
+
#if defined(WITH_LTTNG)
void BlueStore::BlueStoreThrottle::emit_initial_tracepoint(
KeyValueDB &db,
spillover_alert.clear();
}
if (cct->_conf->bluestore_slow_ops_warn_threshold) {
- size_t qsize = _trim_slow_op_event_queue(mono_clock::now());
- if (qsize >= cct->_conf->bluestore_slow_ops_warn_threshold) {
+ auto [qsize, scrub_qsize] = _trim_slow_op_event_queue(mono_clock::now());
+ if (qsize >= cct->_conf->bluestore_slow_ops_warn_threshold ||
+ scrub_qsize >= cct->_conf->bluestore_slow_scrub_ops_warn_threshold) {
ostringstream ss;
ss << "observed slow operation indications in BlueStore";
alerts.emplace("BLUESTORE_SLOW_OP_ALERT", ss.str());
const char* info = "",
int idx2 = l_bluestore_first);
+ inline void log_latency_scrub(const char* name,
+ int idx,
+ const ceph::timespan& l,
+ double lat_threshold,
+ const char* info = "",
+ int idx2 = l_bluestore_first);
+
inline void log_latency_fn(const char* name,
int idx,
const ceph::timespan& lat,
std::function<std::string (const ceph::timespan& lat)> fn,
int idx2 = l_bluestore_first);
+ inline void log_latency_fn_scrub(const char* name,
+ int idx,
+ const ceph::timespan& lat,
+ double lat_threshold,
+ std::function<std::string (const ceph::timespan& lat)> fn,
+ int idx2 = l_bluestore_first);
+
private:
bool _debug_data_eio(const ghobject_t& o) {
if (!cct->_conf->bluestore_debug_inject_read_err) {
std::string no_per_pg_omap_alert;
std::string disk_size_mismatch_alert;
std::string spurious_read_errors_alert;
- std::queue <ceph::mono_clock::time_point> slow_op_event_queue;
+ std::queue <std::pair<ceph::mono_clock::time_point, bool>> slow_op_event_queue;
+ size_t slow_op_event_count = 0;
+ size_t slow_scrub_op_event_count = 0;
- size_t _trim_slow_op_event_queue(ceph::mono_clock::time_point cur_time);
+ std::pair<size_t, size_t> _trim_slow_op_event_queue(ceph::mono_clock::time_point cur_time);
void _add_slow_op_event();
+ void _add_slow_scrub_op_event();
void _log_alerts(osd_alert_list_t& alerts);
bool _set_compression_alert(bool cmode, const char* s) {
std::lock_guard l(qlock);