From a3ab32e7651073a64a3c5b24ee5ddc36c7bcf8b2 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Fri, 12 Apr 2019 17:04:07 +0300 Subject: [PATCH] os/bluestore: alert on fm/bdev size mismatch Relates to: https://tracker.ceph.com/issues/39151 Signed-off-by: Igor Fedotov --- src/mon/PGMap.cc | 2 ++ src/os/bluestore/BlueStore.cc | 18 ++++++++++++++++++ src/os/bluestore/BlueStore.h | 5 +++++ 3 files changed, 25 insertions(+) diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 28e1dfc5419..aa24d7f9746 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -3005,6 +3005,8 @@ void PGMap::get_health_checks( summary = "BlueStore compression broken"; } else if (asum.first == "BLUESTORE_LEGACY_STATFS") { summary = "Legacy BlueStore stats reporting detected"; + } else if (asum.first == "BLUESTORE_DISK_SIZE_MISMATCH") { + summary = "BlueStore has dangerous mismatch between block device and free list sizes"; } summary += " on "; summary += stringify(asum.second.first); diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index c0296e7e3c9..9eb2cb0fed6 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -4822,6 +4822,19 @@ int BlueStore::_open_fm(KeyValueDB::Transaction t) fm = NULL; return r; } + // if space size tracked by free list manager is that higher than actual + // dev size one can hit out-of-space allocation which will result + // in data loss and/or assertions + // Probably user altered the device size somehow. + // The only fix for now is to redeploy OSD. + if (fm->get_size() >= bdev->get_size() + min_alloc_size) { + ostringstream ss; + ss << "slow device size mismatch detected, " + << " fm size(" << fm->get_size() + << ") > slow device size(" << bdev->get_size() + << "), Please stop using this OSD as it might cause data loss."; + _set_disk_size_mismatch_alert(ss.str()); + } return 0; } @@ -13809,6 +13822,11 @@ void BlueStore::_log_alerts(osd_alert_list_t& alerts) { std::lock_guard l(qlock); + if (!disk_size_mismatch_alert.empty()) { + alerts.emplace( + "BLUESTORE_DISK_SIZE_MISMATCH", + disk_size_mismatch_alert); + } if (!legacy_statfs_alert.empty()) { alerts.emplace( "BLUESTORE_LEGACY_STATFS", diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 4ebe61f6f9c..5949d3d42a5 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2698,6 +2698,7 @@ private: set failed_compressors; string spillover_alert; string legacy_statfs_alert; + string disk_size_mismatch_alert; void _log_alerts(osd_alert_list_t& alerts); bool _set_compression_alert(bool cmode, const char* s) { @@ -2725,6 +2726,10 @@ private: } void _check_legacy_statfs_alert(); + void _set_disk_size_mismatch_alert(const string& s) { + std::lock_guard l(qlock); + disk_size_mismatch_alert = s; + } private: -- 2.39.5