]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: raise health alerts when spurious read errors observed
authorIgor Fedotov <ifedotov@suse.com>
Tue, 4 Feb 2020 16:06:04 +0000 (19:06 +0300)
committerIgor Fedotov <ifedotov@suse.com>
Tue, 14 Apr 2020 09:08:27 +0000 (12:08 +0300)
Signed-off-by: Igor Fedotov <ifedotov@suse.com>
src/common/legacy_config_opts.h
src/common/options.cc
src/mon/PGMap.cc
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index bfbf4f293ffe40808e3714482a2386b84ebfb21d..7792af27c7298b0a4a236c8154d357ecfc456f5d 100644 (file)
@@ -1051,6 +1051,7 @@ OPTION(bluestore_debug_inject_csum_err_probability, OPT_FLOAT)
 OPTION(bluestore_fsck_error_on_no_per_pool_stats, OPT_BOOL)
 OPTION(bluestore_warn_on_bluefs_spillover, OPT_BOOL)
 OPTION(bluestore_warn_on_legacy_statfs, OPT_BOOL)
+OPTION(bluestore_warn_on_spurious_read_errors, OPT_BOOL)
 OPTION(bluestore_fsck_error_on_no_per_pool_omap, OPT_BOOL)
 OPTION(bluestore_warn_on_no_per_pool_omap, OPT_BOOL)
 OPTION(bluestore_log_op_age, OPT_DOUBLE)
index 0d490269fb81dd899e4a485182240bf879ba1a8d..300ef3e26b6e83da270cbaf12f82ad62fa469c2a 100644 (file)
@@ -4603,6 +4603,10 @@ std::vector<Option> get_global_options() {
     .set_default(true)
     .set_description("Enable health indication on lack of per-pool statfs reporting from bluestore"),
 
+    Option("bluestore_warn_on_spurious_read_errors", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description("Enable health indication when spurious read errors are observed by OSD"),
+
     Option("bluestore_fsck_error_on_no_per_pool_omap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(false)
     .set_description("Make fsck error (instead of warn) when objects without per-pool omap are found"),
index 1cfb85d11e142f876869670d7cc3124cc553c265..b018bb0d7482ae91cb83af7f4172ec8a7691d58f 100644 (file)
@@ -3234,7 +3234,10 @@ void PGMap::get_health_checks(
        summary += " have dangerous mismatch between BlueStore block device and free list sizes";
       } else if (asum.first == "BLUESTORE_NO_PER_POOL_OMAP") {
        summary += " reporting legacy (not per-pool) BlueStore omap usage stats";
+      } else if (asum.first == "BLUESTORE_SPURIOUS_READ_ERRORS") {
+        summary += " have spurious read errors";
       }
+
       auto& d = checks->add(asum.first, HEALTH_WARN, summary, asum.second.first);
       for (auto& s : asum.second.second) {
         d.detail.push_back(s);
index 8343e3ddfd82a2c808a5b61ee85f7669d3289d5d..8cfa97113be1c5dba66305fe9f7246eedc418c30 100644 (file)
@@ -9890,6 +9890,9 @@ int BlueStore::_do_read(
     logger->inc(l_bluestore_reads_with_retries);
     dout(5) << __func__ << " read at 0x" << std::hex << offset << "~" << length
             << " failed " << std::dec << retry_count << " times before succeeding" << dendl;
+    stringstream s;
+    s << " reads with retries: " << logger->get(l_bluestore_reads_with_retries);
+    _set_spurious_read_errors_alert(s.str());
   }
   return r;
 }
@@ -15471,6 +15474,11 @@ void BlueStore::_log_alerts(osd_alert_list_t& alerts)
 {
   std::lock_guard l(qlock);
 
+  if (!spurious_read_errors_alert.empty()) {
+    alerts.emplace(
+      "BLUESTORE_SPURIOUS_READ_ERRORS",
+      spurious_read_errors_alert);
+  }
   if (!disk_size_mismatch_alert.empty()) {
     alerts.emplace(
       "BLUESTORE_DISK_SIZE_MISMATCH",
index b680774ba35c35328e02c554555e0945a1963e61..624a99dce0aac41686cf87535a44716a14281593 100644 (file)
@@ -2921,6 +2921,7 @@ private:
   std::string legacy_statfs_alert;
   std::string no_per_pool_omap_alert;
   std::string disk_size_mismatch_alert;
+  std::string spurious_read_errors_alert;
 
   void _log_alerts(osd_alert_list_t& alerts);
   bool _set_compression_alert(bool cmode, const char* s) {
@@ -2953,6 +2954,10 @@ private:
     std::lock_guard l(qlock);
     disk_size_mismatch_alert = s;
   }
+  void _set_spurious_read_errors_alert(const string& s) {
+    std::lock_guard l(qlock);
+    spurious_read_errors_alert = s;
+  }
 
 private: