scan_extents, scan_inodes, and other state-changing operations.
Related Tracker: https://tracker.ceph.com/issues/63191
+* OSD: A health warning is reported when BlueFS usage exceeds the
+ configured ratio of the main OSD data device size. This warning is
+ informational and can be muted with:
+ ``ceph health mute BLUESTORE_BLUEFS_OVERSIZED``
+
>=20.0.0
* RADOS: The lead Monitor and stretch mode status are now displayed by `ceph status`.
flags:
- runtime
with_legacy: true
+- name: bluestore_bluefs_warn_ratio
+ type: float
+ level: basic
+ desc: The ratio at which BlueFS usage relative to the main device raises a
+ health warning. Set to "1" to disable.
+ default: 0.06
+ with_legacy: false
+ flags:
+ - runtime
# rocksdb options that will be used for omap(if omap_backend is rocksdb)
- name: filestore_rocksdb_options
type: str
summary += " experiencing stalled read in block device of BlueStore";
} else if (asum.first == "WAL_DEVICE_STALLED_READ_ALERT") {
summary += " experiencing stalled read in wal device of BlueFS";
+ } else if (asum.first == "BLUESTORE_BLUEFS_OVERSIZED") {
+ summary += " have BlueFS usage exceeding configured ratio of main device size";
} else if (asum.first == "DB_DEVICE_STALLED_READ_ALERT") {
summary += " experiencing stalled read in db device of BlueFS";
} else if (asum.first.find("_DISCARD_QUEUE") != std::string::npos) {
uint64_t BlueFS::get_used(unsigned id)
{
ceph_assert(id < alloc.size());
- ceph_assert(alloc[id]);
return _get_used(id);
}
} else if (!spillover_alert.empty()){
spillover_alert.clear();
}
+ // CHECK: BlueFS usage relative to main device size
+ if (bluefs) {
+ uint64_t db_used = bluefs->get_used(BlueFS::BDEV_DB);
+ uint64_t wal_used = bluefs->get_used(BlueFS::BDEV_WAL);
+ uint64_t slow_used = bluefs->get_used(BlueFS::BDEV_SLOW);
+ uint64_t main_size = bdev->get_size();
+
+ if (main_size > 0) {
+ uint64_t total_bluefs_usage = db_used + wal_used + slow_used;
+ double ratio = static_cast<double>(total_bluefs_usage) /
+ static_cast<double>(main_size);
+ double warn_ratio =
+ cct->_conf.get_val<double>("bluestore_bluefs_warn_ratio");
+
+ if (ratio > warn_ratio) {
+ ostringstream ss;
+ ss << "BlueFS usage (" << byte_u_t(total_bluefs_usage)
+ << ") exceeds " << std::fixed << std::setprecision(4)
+ << (warn_ratio * 100.0) << "% of main device ("
+ << byte_u_t(main_size) << ", "
+ << std::fixed << std::setprecision(2)
+ << ratio * 100.0 << "%)";
+ alerts.emplace("BLUESTORE_BLUEFS_OVERSIZED", ss.str());
+ }
+ }
+ }
if (cct->_conf->bluestore_slow_ops_warn_threshold) {
size_t qsize = _trim_slow_op_event_queue(mono_clock::now());
if (qsize >= cct->_conf->bluestore_slow_ops_warn_threshold) {