From: Igor Fedotov Date: Fri, 12 Apr 2019 14:04:07 +0000 (+0300) Subject: os/bluestore: alert on fm/bdev size mismatch X-Git-Tag: v14.2.2~113^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=13f6f71566d67766a6a20cee027158e739efa6d2;p=ceph.git os/bluestore: alert on fm/bdev size mismatch Relates to: https://tracker.ceph.com/issues/39151 Signed-off-by: Igor Fedotov (cherry picked from commit a3ab32e7651073a64a3c5b24ee5ddc36c7bcf8b2) --- diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index e6e4829edf9e..d53bc6fe721a 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -2996,6 +2996,8 @@ void PGMap::get_health_checks( summary = "BlueStore compression broken"; } else if (asum.first == "BLUESTORE_LEGACY_STATFS") { summary = "Legacy BlueStore stats reporting detected"; + } else if (asum.first == "BLUESTORE_DISK_SIZE_MISMATCH") { + summary = "BlueStore has dangerous mismatch between block device and free list sizes"; } summary += " on "; summary += stringify(asum.second.first); diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index bef723f03874..5d699f3ea937 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -4822,6 +4822,19 @@ int BlueStore::_open_fm(KeyValueDB::Transaction t) fm = NULL; return r; } + // if space size tracked by free list manager is that higher than actual + // dev size one can hit out-of-space allocation which will result + // in data loss and/or assertions + // Probably user altered the device size somehow. + // The only fix for now is to redeploy OSD. + if (fm->get_size() >= bdev->get_size() + min_alloc_size) { + ostringstream ss; + ss << "slow device size mismatch detected, " + << " fm size(" << fm->get_size() + << ") > slow device size(" << bdev->get_size() + << "), Please stop using this OSD as it might cause data loss."; + _set_disk_size_mismatch_alert(ss.str()); + } return 0; } @@ -13803,6 +13816,11 @@ void BlueStore::_log_alerts(osd_alert_list_t& alerts) { std::lock_guard l(qlock); + if (!disk_size_mismatch_alert.empty()) { + alerts.emplace( + "BLUESTORE_DISK_SIZE_MISMATCH", + disk_size_mismatch_alert); + } if (!legacy_statfs_alert.empty()) { alerts.emplace( "BLUESTORE_LEGACY_STATFS", diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index a66765c65b98..155d7e07b1b8 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2698,6 +2698,7 @@ private: set failed_compressors; string spillover_alert; string legacy_statfs_alert; + string disk_size_mismatch_alert; void _log_alerts(osd_alert_list_t& alerts); bool _set_compression_alert(bool cmode, const char* s) { @@ -2725,6 +2726,10 @@ private: } void _check_legacy_statfs_alert(); + void _set_disk_size_mismatch_alert(const string& s) { + std::lock_guard l(qlock); + disk_size_mismatch_alert = s; + } private: