mon: Improvements to slow heartbeat health messages

author David Zafman <dzafman@redhat.com>

Thu, 19 Dec 2019 05:20:25 +0000 (21:20 -0800)

committer David Zafman <dzafman@redhat.com>

Tue, 14 Jan 2020 18:06:44 +0000 (18:06 +0000)
author David Zafman <dzafman@redhat.com>
Thu, 19 Dec 2019 05:20:25 +0000 (21:20 -0800)
committer David Zafman <dzafman@redhat.com>
Tue, 14 Jan 2020 18:06:44 +0000 (18:06 +0000)
diff --git a/doc/rados/operations/monitoring.rst b/doc/rados/operations/monitoring.rst

index 0dd248b4290270121197e0a3e78a481d9a63fa71..bc8a35cbb8866bcbf1693ddfc2cced96fa8aad84 100644 (file)
--- a/doc/rados/operations/monitoring.rst
+++ b/doc/rados/operations/monitoring.rst
@@ -171,18 +171,18 @@ By default we will warn about ping times which exceed 1 second (1000 millisecond
  
  ::
  
-    HEALTH_WARN Long heartbeat ping times on back interface seen, longest is 1118.001 msec
+    HEALTH_WARN Slow OSD heartbeats on back (longest 1118.001ms)
  
  The health detail will add the combination of OSDs are seeing the delays and by how much.  There is a limit of 10
  detail line items.
  
  ::
  
-    [WRN] OSD_SLOW_PING_TIME_BACK: Long heartbeat ping times on back interface seen, longest is 1118.001 msec
-        Slow heartbeat ping on back interface from osd.0 to osd.1 1118.001 msec
-        Slow heartbeat ping on back interface from osd.0 to osd.2 1030.123 msec
-        Slow heartbeat ping on back interface from osd.2 to osd.1 1015.321 msec
-        Slow heartbeat ping on back interface from osd.1 to osd.0 1010.456 msec
+    [WRN] OSD_SLOW_PING_TIME_BACK: Slow OSD heartbeats on back (longest 1118.001ms)
+        Slow OSD heartbeats on back from osd.0 [dc1,rack1] to osd.1 [dc1,rack1] 1118.001 msec possibly improving
+        Slow OSD heartbeats on back from osd.0 [dc1,rack1] to osd.2 [dc1,rack2] 1030.123 msec
+        Slow OSD heartbeats on back from osd.2 [dc1,rack2] to osd.1 [dc1,rack1] 1015.321 msec
+        Slow OSD heartbeats on back from osd.1 [dc1,rack1] to osd.0 [dc1,rack1] 1010.456 msec
  
  To see even more detail and a complete dump of network performance information the ``dump_osd_network`` command can be used.  Typically, this would be
  sent to a mgr, but it can be limited to a particular OSD's interactions by issuing it to any OSD.  The current threshold which defaults to 1 second
diff --git a/qa/standalone/misc/network-ping.sh b/qa/standalone/misc/network-ping.sh

index b2b299d63ee81e28c9edff11e3fbcaf4eb556591..4745108c5babb98f1c2e435573e58a0096d3d5bf 100755 (executable)
--- a/qa/standalone/misc/network-ping.sh
+++ b/qa/standalone/misc/network-ping.sh
@@ -119,8 +119,30 @@ function TEST_network_ping_test2() {
      run_osd $dir 2 || return 1
  
      sleep 5
-
-    create_pool foo 16
+    ceph osd crush add-bucket dc1 datacenter
+    ceph osd crush add-bucket dc2 datacenter
+    ceph osd crush add-bucket dc3 datacenter
+    ceph osd crush add-bucket rack1 rack
+    ceph osd crush add-bucket rack2 rack
+    ceph osd crush add-bucket rack3 rack
+    ceph osd crush add-bucket host1 host
+    ceph osd crush add-bucket host2 host
+    ceph osd crush add-bucket host3 host
+    ceph osd crush move dc1 root=default
+    ceph osd crush move dc2 root=default
+    ceph osd crush move dc3 root=default
+    ceph osd crush move rack1 datacenter=dc1
+    ceph osd crush move rack2 datacenter=dc2
+    ceph osd crush move rack3 datacenter=dc3
+    ceph osd crush move host1 rack=rack1
+    ceph osd crush move host2 rack=rack2
+    ceph osd crush move host3 rack=rack3
+    ceph osd crush set osd.0 1.0 host=host1
+    ceph osd crush set osd.1 1.0 host=host2
+    ceph osd crush set osd.2 1.0 host=host3
+    ceph osd crush rule create-simple myrule default host firstn
+
+    create_pool foo 16 16 replicated myrule
  
      # write some objects
      timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
@@ -130,11 +152,13 @@ function TEST_network_ping_test2() {
      flush_pg_stats
  
      ceph health | tee $dir/health
-    grep -q "Long heartbeat" $dir/health || return 1
+    grep -q "Slow OSD heartbeats" $dir/health || return 1
  
      ceph health detail | tee $dir/health
      grep -q "OSD_SLOW_PING_TIME_BACK" $dir/health || return 1
      grep -q "OSD_SLOW_PING_TIME_FRONT" $dir/health || return 1
+    grep -q "Slow OSD heartbeats on front from osd[.][0-2] [[]dc[1-3],rack[1-3][]] \
+to osd[.][0-2] [[]dc[1-3],rack[1-3][]]" $dir/health || return 1
      rm -f $dir/health
  }
  
diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc

index 722e61f5d74b91075777dba8171f98958db444bb..7d74817bad0445683ecea155b58bf4ea0726d54b 100644 (file)
--- a/src/mon/PGMap.cc
+++ b/src/mon/PGMap.cc
@@ -2363,6 +2363,23 @@ void PGMap::dump_pool_stats_and_io_rate(int64_t poolid, const OSDMap &osd_map,
    }
  }
  
+// Get crush parentage for an osd (skip root)
+set<std::string> PGMap::osd_parentage(const OSDMap& osdmap, int id) const
+{
+  set<std::string> reporters_by_subtree;
+  auto reporter_subtree_level = g_conf().get_val<string>("mon_osd_reporter_subtree_level");
+
+  auto loc = osdmap.crush->get_full_location(id);
+  for (auto& [parent_bucket_type, parent_id] : loc) {
+    // Should we show the root?  Might not be too informative like "default"
+    if (parent_bucket_type != "root" &&
+        parent_bucket_type != reporter_subtree_level) {
+      reporters_by_subtree.insert(parent_id);
+    }
+  }
+  return reporters_by_subtree;
+}
+
  void PGMap::get_health_checks(
    CephContext *cct,
    const OSDMap& osdmap,
@@ -2833,9 +2850,11 @@ void PGMap::get_health_checks(
          break;
        }
        max_detail--;
-      ss << "Slow heartbeat ping on back interface from osd." << sback.from
+      ss << "Slow OSD heartbeats on back from osd." << sback.from
+        << " [" << osd_parentage(osdmap, sback.from) << "]"
           << (osdmap.is_down(sback.from) ? " (down)" : "")
          << " to osd." << sback.to
+        << " [" << osd_parentage(osdmap, sback.to) << "]"
           << (osdmap.is_down(sback.to) ? " (down)" : "")
          << " " << fixed_u_to_string(sback.pingtime, 3) << " msec"
          << (sback.improving ? " possibly improving" : "");
@@ -2850,9 +2869,12 @@ void PGMap::get_health_checks(
          break;
        }
        max_detail--;
-      ss << "Slow heartbeat ping on front interface from osd." << sfront.from
+      // Get crush parentage for each osd
+      ss << "Slow OSD heartbeats on front from osd." << sfront.from
+        << " [" << osd_parentage(osdmap, sfront.from) << "]"
           << (osdmap.is_down(sfront.from) ? " (down)" : "")
           << " to osd." << sfront.to
+        << " [" << osd_parentage(osdmap, sfront.to) << "]"
           << (osdmap.is_down(sfront.to) ? " (down)" : "")
          << " " << fixed_u_to_string(sfront.pingtime, 3) << " msec"
          << (sfront.improving ? " possibly improving" : "");
@@ -2860,16 +2882,16 @@ void PGMap::get_health_checks(
      }
      if (detail_back.size() != 0) {
        ostringstream ss;
-      ss << "Long heartbeat ping times on back interface seen, longest is "
-        << fixed_u_to_string(back_sorted.rbegin()->pingtime, 3) << " msec";
+      ss << "Slow OSD heartbeats on back (longest "
+        << fixed_u_to_string(back_sorted.rbegin()->pingtime, 3) << "ms)";
        auto& d = checks->add("OSD_SLOW_PING_TIME_BACK", HEALTH_WARN, ss.str(),
                       back_sorted.size());
        d.detail.swap(detail_back);
      }
      if (detail_front.size() != 0) {
        ostringstream ss;
-      ss << "Long heartbeat ping times on front interface seen, longest is "
-        << fixed_u_to_string(front_sorted.rbegin()->pingtime, 3) << " msec";
+      ss << "Slow OSD heartbeats on front (longest "
+        << fixed_u_to_string(front_sorted.rbegin()->pingtime, 3) << "ms)";
        auto& d = checks->add("OSD_SLOW_PING_TIME_FRONT", HEALTH_WARN, ss.str(),
                       front_sorted.size());
        d.detail.swap(detail_front);
diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h

index 20c1cac2777a40726a823c9879fbc396d6dc3e58..c05268c3ad8c358fed60b08331a864efc9f2278c 100644 (file)
--- a/src/mon/PGMap.h
+++ b/src/mon/PGMap.h
@@ -491,6 +491,7 @@ public:
    void get_filtered_pg_stats(uint64_t state, int64_t poolid, int64_t osdid,
                               bool primary, std::set<pg_t>& pgs) const;
  
+  set<std::string> osd_parentage(const OSDMap& osdmap, int id) const;
    void get_health_checks(
      CephContext *cct,
      const OSDMap& osdmap,
author	David Zafman <dzafman@redhat.com>
	Thu, 19 Dec 2019 05:20:25 +0000 (21:20 -0800)
committer	David Zafman <dzafman@redhat.com>
	Tue, 14 Jan 2020 18:06:44 +0000 (18:06 +0000)
doc/rados/operations/monitoring.rst		patch \| blob \| history
qa/standalone/misc/network-ping.sh		patch \| blob \| history
src/mon/PGMap.cc		patch \| blob \| history
src/mon/PGMap.h		patch \| blob \| history