#define dout_prefix *_dout << "bdev "
using std::string;
+using ceph::mono_clock;
blk_access_mode_t buffermode(bool buffered)
}
return ret;
}
+
+size_t BlockDevice::trim_stalled_read_event_queue(mono_clock::time_point cur_time) {
+ std::lock_guard lock(stalled_read_event_queue_lock);
+ auto warn_duration = std::chrono::seconds(cct->_conf->bdev_stalled_read_warn_lifetime);
+ while (!stalled_read_event_queue.empty() &&
+ ((stalled_read_event_queue.front() < cur_time - warn_duration) ||
+ (stalled_read_event_queue.size() > cct->_conf->bdev_stalled_read_warn_threshold))) {
+ stalled_read_event_queue.pop();
+ }
+ return stalled_read_event_queue.size();
+}
+
+void BlockDevice::add_stalled_read_event() {
+ if (!cct->_conf->bdev_stalled_read_warn_threshold) {
+ return;
+ }
+ auto cur_time = mono_clock::now();
+ {
+ std::lock_guard lock(stalled_read_event_queue_lock);
+ stalled_read_event_queue.push(cur_time);
+ }
+ trim_stalled_read_event_queue(cur_time);
+}
+
+void BlockDevice::collect_alerts(osd_alert_list_t& alerts, const std::string& device_name) {
+ if (cct->_conf->bdev_stalled_read_warn_threshold) {
+ size_t qsize = trim_stalled_read_event_queue(mono_clock::now());
+ if (qsize >= cct->_conf->bdev_stalled_read_warn_threshold) {
+ std::ostringstream ss;
+ ss << "observed stalled read indications in "
+ << device_name << " device";
+ alerts.emplace(device_name + "_DEVICE_STALLED_READ_ALERT", ss.str());
+ }
+ }
+}
+
#include <set>
#include <string>
#include <vector>
+#include <queue>
#include "acconfig.h"
#include "common/ceph_mutex.h"
#include "include/common_fwd.h"
+#include "osd/osd_types.h"
#if defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO)
#include "aio/aio.h"
public:
CephContext* cct;
typedef void (*aio_callback_t)(void *handle, void *aio);
+ void collect_alerts(osd_alert_list_t& alerts, const std::string& device_name);
+
private:
ceph::mutex ioc_reap_lock = ceph::make_mutex("BlockDevice::ioc_reap_lock");
std::vector<IOContext*> ioc_reap_queue;
pmem,
#endif
};
+ std::queue <ceph::mono_clock::time_point> stalled_read_event_queue;
+ ceph::mutex stalled_read_event_queue_lock = ceph::make_mutex("BlockDevice::stalled_read_event_queue_lock");
+ size_t trim_stalled_read_event_queue(mono_clock::time_point cur_time);
static block_device_t detect_device_type(const std::string& path);
static block_device_t device_type_from_name(const std::string& blk_dev_name);
static BlockDevice *create_with_type(block_device_t device_type,
CephContext* cct, const std::string& path, aio_callback_t cb,
void *cbpriv, aio_callback_t d_cb, void *d_cbpriv);
-
protected:
uint64_t size = 0;
uint64_t block_size = 0;
// of the drive. The zones 524-52155 are sequential zones.
uint64_t conventional_region_size = 0;
uint64_t zone_size = 0;
+ void add_stalled_read_event();
public:
aio_callback_t aio_callback;
<< " since " << start1 << ", timeout is "
<< age
<< "s" << dendl;
+ add_stalled_read_event();
}
if (r < 0) {
if (ioc->allow_eio && is_expected_ioerr(-errno)) {
<< " since " << start1 << ", timeout is "
<< age
<< "s" << dendl;
+ add_stalled_read_event();
}
if (r < 0) {
<< " (buffered) since " << start1 << ", timeout is "
<< age
<< "s" << dendl;
+ add_stalled_read_event();
}
} else {
//direct and aligned read
<< " (direct) since " << start1 << ", timeout is "
<< age
<< "s" << dendl;
+ add_stalled_read_event();
}
if (r < 0) {
r = -errno;
desc: Enable health indication when spurious read errors are observed by OSD
default: true
with_legacy: true
+- name: bluestore_slow_ops_warn_lifetime
+ type: uint
+ level: advanced
+ desc: A configurable duration for slow ops warning to be appeared if number of occurence pass `bluestore_slow_ops_warn_threshold` in `bluestore_slow_ops_warn_lifetime` seconds
+ default: 86400
+ with_legacy: true
+- name: bluestore_slow_ops_warn_threshold
+ type: uint
+ level: advanced
+ desc: A configurable number for slow ops warning to be appeared if number of occurence pass `bluestore_slow_ops_warn_threshold` in `bluestore_slow_ops_warn_lifetime` seconds
+ default: 1
+ with_legacy: true
- name: bluestore_fsck_error_on_no_per_pool_omap
type: bool
level: advanced
- spdk
- pmem
- hm_smr
+- name: bdev_stalled_read_warn_lifetime
+ type: uint
+ level: advanced
+ desc: A configurable duration for stalled read warning to be appeared if number of stalled read occurence pass `bdev_stalled_read_warn_threshold` in `bdev_stalled_read_warn_lifetime` seconds
+ default: 86400
+ with_legacy: true
+- name: bdev_stalled_read_warn_threshold
+ type: uint
+ level: advanced
+ desc: A configurable number for stalled read warning to be appeared if number of stalled read occurence pass `bdev_stalled_read_warn_threshold` in `bdev_stalled_read_warn_lifetime` seconds
+ default: 1
+ with_legacy: true
- name: bluestore_cleaner_sleep_interval
type: float
level: advanced
summary += " reporting legacy (not per-pool) BlueStore omap usage stats";
} else if (asum.first == "BLUESTORE_SPURIOUS_READ_ERRORS") {
summary += " have spurious read errors";
+ } else if (asum.first == "BLUESTORE_SLOW_OP_ALERT") {
+ summary += " experiencing slow operations in BlueStore";
+ } else if (asum.first == "BLOCK_DEVICE_STALLED_READ_ALERT") {
+ summary += " experiencing stalled read in block device of BlueStore";
+ } else if (asum.first == "WAL_DEVICE_STALLED_READ_ALERT") {
+ summary += " experiencing stalled read in wal device of BlueFS";
+ } else if (asum.first == "DB_DEVICE_STALLED_READ_ALERT") {
+ summary += " experiencing stalled read in db device of BlueFS";
}
auto& d = checks->add(asum.first, HEALTH_WARN, summary, asum.second.first);
return h->dirty_devs[dev];
}
+void BlueFS::collect_alerts(osd_alert_list_t& alerts) {
+ if (bdev[BDEV_DB]) {
+ bdev[BDEV_DB]->collect_alerts(alerts, "DB");
+ }
+ if (bdev[BDEV_WAL]) {
+ bdev[BDEV_WAL]->collect_alerts(alerts, "WAL");
+ }
+}
+
int BlueFS::open_for_read(
std::string_view dirname,
std::string_view filename,
WRITER_WAL,
WRITER_SST,
};
+ void collect_alerts(osd_alert_list_t& alerts);
struct File : public RefCountedObject {
MEMPOOL_CLASS_HELPERS();
FileRef file;
explicit FileLock(FileRef f) : file(std::move(f)) {}
};
-
private:
PerfCounters *logger = nullptr;
int BlueStore::_verify_csum(OnodeRef& o,
const bluestore_blob_t* blob, uint64_t blob_xoffset,
const bufferlist& bl,
- uint64_t logical_offset) const
+ uint64_t logical_offset)
{
int bad;
uint64_t bad_csum;
return r;
}
+size_t BlueStore::_trim_slow_op_event_queue(mono_clock::time_point cur_time) {
+ ceph_assert(ceph_mutex_is_locked(qlock));
+ auto warn_duration = std::chrono::seconds(cct->_conf->bluestore_slow_ops_warn_lifetime);
+ while (!slow_op_event_queue.empty() &&
+ ((slow_op_event_queue.front() < cur_time - warn_duration) ||
+ (slow_op_event_queue.size() > cct->_conf->bluestore_slow_ops_warn_threshold))) {
+ slow_op_event_queue.pop();
+ }
+ return slow_op_event_queue.size();
+}
+
+void BlueStore::_add_slow_op_event() {
+ if (!cct->_conf->bluestore_slow_ops_warn_threshold) {
+ return;
+ }
+ std::lock_guard lock(qlock);
+ auto cur_time = mono_clock::now();
+ slow_op_event_queue.push(cur_time);
+ _trim_slow_op_event_queue(cur_time);
+}
+
void BlueStore::log_latency(
const char* name,
int idx,
const ceph::timespan& l,
double lat_threshold,
- const char* info) const
+ const char* info)
{
logger->tinc(idx, l);
if (lat_threshold > 0.0 &&
<< ", latency = " << l
<< info
<< dendl;
+ _add_slow_op_event();
}
}
int idx,
const ceph::timespan& l,
double lat_threshold,
- std::function<string (const ceph::timespan& lat)> fn) const
+ std::function<string (const ceph::timespan& lat)> fn)
{
logger->tinc(idx, l);
if (lat_threshold > 0.0 &&
<< ", latency = " << l
<< fn(l)
<< dendl;
+ _add_slow_op_event();
}
}
} else if (!spillover_alert.empty()){
spillover_alert.clear();
}
-
+ if (cct->_conf->bluestore_slow_ops_warn_threshold) {
+ size_t qsize = _trim_slow_op_event_queue(mono_clock::now());
+ if (qsize >= cct->_conf->bluestore_slow_ops_warn_threshold) {
+ ostringstream ss;
+ ss << "observed slow operation indications in BlueStore";
+ alerts.emplace("BLUESTORE_SLOW_OP_ALERT", ss.str());
+ }
+ }
+ bdev->collect_alerts(alerts, "BLOCK");
+ bluefs->collect_alerts(alerts);
if (!spurious_read_errors_alert.empty() &&
cct->_conf->bluestore_warn_on_spurious_read_errors) {
alerts.emplace(
#include <chrono>
#include <ratio>
#include <mutex>
+#include <queue>
#include <condition_variable>
#include <boost/intrusive/list.hpp>
int idx,
const ceph::timespan& lat,
double lat_threshold,
- const char* info = "") const;
+ const char* info = "");
inline void log_latency_fn(const char* name,
int idx,
const ceph::timespan& lat,
double lat_threshold,
- std::function<std::string (const ceph::timespan& lat)> fn) const;
+ std::function<std::string (const ceph::timespan& lat)> fn);
private:
bool _debug_data_eio(const ghobject_t& o) {
std::string no_per_pg_omap_alert;
std::string disk_size_mismatch_alert;
std::string spurious_read_errors_alert;
+ std::queue <ceph::mono_clock::time_point> slow_op_event_queue;
+ size_t _trim_slow_op_event_queue(ceph::mono_clock::time_point cur_time);
+ void _add_slow_op_event();
void _log_alerts(osd_alert_list_t& alerts);
bool _set_compression_alert(bool cmode, const char* s) {
std::lock_guard l(qlock);
const bluestore_blob_t* blob,
uint64_t blob_xoffset,
const ceph::buffer::list& bl,
- uint64_t logical_offset) const;
+ uint64_t logical_offset);
int _decompress(ceph::buffer::list& source, ceph::buffer::list* result);