For example, rbd send a watch ping to the header object
every 5 seconds to keep watch, if the primary OSD is unable
to receive the watch ping of the header object due to rbd
network interruption, this means that rbd's I/O has already been
hang. This way, we can quickly detect disconnection rbds on the osd,
and reflected in metrics.
Signed-off-by: Yite Gu <yitegu0@gmail.com>
oi.watchers.erase(make_pair(watch->get_cookie(),
watch->get_entity()));
+ osd->logger->inc(l_osd_watch_timeouts);
+ dout(3) << __func__ << " watcher " << watch->get_peer_addr()
+ << " object " << obc->obs.oi.soid << dendl;
+
list<watch_disconnect_t> watch_disconnects = {
watch_disconnect_t(watch->get_cookie(), watch->get_entity(), true)
};
osd_plb.add_u64_counter_histogram(
l_osd_scrub_reservation_dur_hist, "scrub_resrv_repnum_vs_duration",
rsrv_hist_x_axis_config, rsrv_hist_y_axis_config, "Histogram of scrub replicas reservation duration");
+ osd_plb.add_u64_counter(
+ l_osd_watch_timeouts, "watch_timeouts",
+ "Number of watches that timed out or were blocklisted",
+ NULL, PerfCountersBuilder::PRIO_USEFUL);
return osd_plb.create_perf_counters();
}
// are labeled, and histograms do not fully support labels.
l_osd_scrub_reservation_dur_hist,
+ l_osd_watch_timeouts,
+
l_osd_last,
};