osd_plb.add_u64_counter(l_osd_scrub_rppool_read_cnt, "scrub_replicated_read_cnt", "scrub replicated pool read calls count");
osd_plb.add_u64_counter(l_osd_scrub_rppool_read_bytes, "scrub_replicated_read_bytes", "scrub replicated pool read bytes read");
// scrub I/O performed for EC pools
- osd_plb.add_u64_counter(l_osd_scrub_ec_getattr_cnt, "scrub_ec_getattr_cnt", "scrub ec getattr calls count");
- osd_plb.add_u64_counter(l_osd_scrub_ec_stats_cnt, "scrub_ec_stats_cnt", "scrub ec stats calls count");
- osd_plb.add_u64_counter(l_osd_scrub_ec_read_cnt, "scrub_ec_read_cnt", "scrub ec read calls count");
- osd_plb.add_u64_counter(l_osd_scrub_ec_read_bytes, "scrub_ec_read_bytes", "scrub ec read bytes read");
+ osd_plb.add_u64_counter(l_osd_scrub_ec_getattr_cnt, "scrub_ec_getattr_cnt", "scrub EC getattr calls count");
+ osd_plb.add_u64_counter(l_osd_scrub_ec_stats_cnt, "scrub_ec_stats_cnt", "scrub EC stats calls count");
+ osd_plb.add_u64_counter(l_osd_scrub_ec_read_cnt, "scrub_ec_read_cnt", "scrub EC read calls count");
+ osd_plb.add_u64_counter(l_osd_scrub_ec_read_bytes, "scrub_ec_read_bytes", "scrub EC read bytes read");
- // scrub (no EC vs. replicated differentiation)
// scrub - replicated pools
- osd_plb.add_u64_counter(l_osd_scrub_rppool_started, "num_scrubs_started_replicated", "replicated scrubs attempted count");
- osd_plb.add_u64_counter(l_osd_scrub_rppool_active_started, "num_scrubs_past_reservation_replicated", "replicated scrubs count");
- osd_plb.add_u64_counter(l_osd_scrub_rppool_successful, "successful_scrubs_replicated", "successful replicated scrubs count");
- osd_plb.add_time_avg(l_osd_scrub_rppool_successful_elapsed, "successful_scrubs_replicated_elapsed", "time to complete a successful replicated scrub");
- osd_plb.add_u64_counter(l_osd_scrub_rppool_failed, "failed_scrubs_replicated", "failed replicated scrubs count");
- osd_plb.add_time_avg(l_osd_scrub_rppool_failed_elapsed, "failed_scrubs_replicated_elapsed", "time to scrub failure replicated");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_rppool_started,
+ "num_scrubs_started_replicated",
+ "replicated scrubs attempted count");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_rppool_active_started,
+ "num_scrubs_past_reservation_replicated",
+ "replicated scrubs count");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_rppool_successful,
+ "successful_scrubs_replicated",
+ "successful replicated scrubs count");
+ osd_plb.add_time_avg(
+ l_osd_scrub_rppool_successful_elapsed,
+ "successful_scrubs_replicated_elapsed",
+ "time to complete a successful replicated scrub");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_rppool_failed, "failed_scrubs_replicated",
+ "failed replicated scrubs count");
+ osd_plb.add_time_avg(
+ l_osd_scrub_rppool_failed_elapsed,
+ "failed_scrubs_replicated_elapsed",
+ "time to scrub failure replicated");
+
+ // the replica reservation process - replicated pool
+ osd_plb.add_u64_counter(
+ l_osd_scrub_rppool_reserv_success,
+ "scrub_replicated_scrub_reservations_completed",
+ "successfully completed reservation processes");
+ osd_plb.add_time_avg(
+ l_osd_scrub_rppool_reserv_successful_elapsed,
+ "scrub_replicated_successful_reservations_elapsed",
+ "time to scrub reservation completion");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_rppool_reserv_aborted,
+ "scrub_replicated_reservation_process_aborted",
+ "scrub replicated pool reservation was aborted");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_rppool_reserv_rejected,
+ "scrub_replicated_reservation_process_failure",
+ "scrub replicated pool reservation failed due to replica denial");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_rppool_reserv_skipped,
+ "scrub_replicated_reservation_process_skipped",
+ "scrub replicated pool reservation skipped for high priority scrub");
+ osd_plb.add_time_avg(
+ l_osd_scrub_rppool_reserv_failed_elapsed,
+ "scrub_replicated_failed_reservations_elapsed",
+ "scrub replicated pool time for scrub reservation to fail");
+ osd_plb.add_u64(
+ l_osd_scrub_rppool_reserv_secondaries_num,
+ "scrub_replicated_replicas_in_reservation",
+ "scrub replicated pool number of replicas to reserve");
// scrub - EC
- osd_plb.add_u64_counter(l_osd_scrub_ec_started, "num_scrubs_started_ec", "scrubs attempted count ec");
- osd_plb.add_u64_counter(l_osd_scrub_ec_active_started, "num_scrubs_past_reservation_ec", "scrubs count ec");
- osd_plb.add_u64_counter(l_osd_scrub_ec_successful, "successful_scrubs_ec", "successful scrubs count ec");
- osd_plb.add_time_avg(l_osd_scrub_ec_successful_elapsed, "successful_scrubs_ec_elapsed", "time to complete a successful ec scrub");
- osd_plb.add_u64_counter(l_osd_scrub_ec_failed, "failed_scrubs_ec", "failed scrubs count ec");
- osd_plb.add_time_avg(l_osd_scrub_ec_failed_elapsed, "failed_scrubs_ec_elapsed", "time to scrub failure ec");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_ec_started, "num_scrubs_started_ec",
+ "EC scrubs attempted count");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_ec_active_started, "num_scrubs_past_reservation_ec",
+ "EC scrubs count");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_ec_successful, "successful_scrubs_ec",
+ "successful EC scrubs count");
+ osd_plb.add_time_avg(
+ l_osd_scrub_ec_successful_elapsed, "successful_scrubs_ec_elapsed",
+ "time to complete a successful EC scrub");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_ec_failed, "failed_scrubs_ec", "failed scrubs count EC");
+ osd_plb.add_time_avg(
+ l_osd_scrub_ec_failed_elapsed, "failed_scrubs_ec_elapsed",
+ "time to scrub failure ec");
+
+ // the replica reservation process - EC
+ osd_plb.add_u64_counter(
+ l_osd_scrub_ec_reserv_success, "scrub_ec_reservations_completed",
+ "successfully completed reservation processes EC");
+ osd_plb.add_time_avg(
+ l_osd_scrub_ec_reserv_successful_elapsed,
+ "scrub_ec_successful_reservations_elapsed",
+ "time to EC scrub reservation completion");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_ec_reserv_aborted, "scrub_ec_reservation_process_aborted",
+ "scrub reservation was aborted EC");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_ec_reserv_rejected, "scrub_ec_reservation_process_failure",
+ "scrub reservation failed due to replica denial EC");
+ osd_plb.add_u64_counter(
+ l_osd_scrub_ec_reserv_skipped, "scrub_ec_reservation_process_skipped",
+ "scrub reservation skipped for high priority scrub EC");
+ osd_plb.add_time_avg(
+ l_osd_scrub_ec_reserv_failed_elapsed,
+ "scrub_ec_failed_reservations_elapsed",
+ "time for scrub reservation to fail EC");
+ osd_plb.add_u64(
+ l_osd_scrub_ec_reserv_secondaries_num, "scrub_ec_replicas_in_reservation",
+ "number of replicas to reserve EC");
return osd_plb.create_perf_counters();
}
scrub_perf.add_u64_counter(scrbcnt_blocked, "locked_object", "waiting on locked object events");
scrub_perf.add_u64_counter(scrbcnt_write_blocked, "write_blocked_by_scrub", "write blocked by scrub");
- // the replica reservation process
- scrub_perf.add_u64_counter(scrbcnt_resrv_success, "scrub_reservations_completed", "successfully completed reservation processes");
- scrub_perf.add_time_avg(scrbcnt_resrv_successful_elapsed, "successful_reservations_elapsed", "time to scrub reservation completion");
- scrub_perf.add_u64_counter(scrbcnt_resrv_aborted, "reservation_process_aborted", "scrub reservation was aborted");
- scrub_perf.add_u64_counter(scrbcnt_resrv_rejected, "reservation_process_failure", "scrub reservation failed due to replica denial");
- scrub_perf.add_u64_counter(scrbcnt_resrv_skipped, "reservation_process_skipped", "scrub reservation skipped for high priority scrub");
- scrub_perf.add_time_avg(scrbcnt_resrv_failed_elapsed, "failed_reservations_elapsed", "time for scrub reservation to fail");
- scrub_perf.add_u64(scrbcnt_resrv_replicas_num, "replicas_in_reservation", "number of replicas in reservation");
return scrub_perf.create_perf_counters();
}
l_osd_scrub_rppool_failed, ///< failed scrubs count
l_osd_scrub_rppool_failed_elapsed, ///< time from start to failure
- // scrub - EC
+ // ---- scrub reservation process - replicated pools
+
+ /// successful replicas reservation count
+ l_osd_scrub_rppool_reserv_success,
+ /// time to complete a successful replicas reservation
+ l_osd_scrub_rppool_reserv_successful_elapsed,
+ /// failed attempt to reserve replicas due to an abort
+ l_osd_scrub_rppool_reserv_aborted,
+ /// reservation failed due to a 'rejected' response
+ l_osd_scrub_rppool_reserv_rejected,
+ /// reservation skipped for high-priority scrubs
+ l_osd_scrub_rppool_reserv_skipped,
+ /// time for a replicas reservation process to fail
+ l_osd_scrub_rppool_reserv_failed_elapsed,
+ /// number of replicas
+ l_osd_scrub_rppool_reserv_secondaries_num,
+
+
+ // ---- scrub - EC
l_osd_scrub_ec_started, ///< scrubs that got started
l_osd_scrub_ec_active_started, /// scrubs that got past secondaries reservation
l_osd_scrub_ec_successful, ///< successful scrubs count
l_osd_scrub_ec_failed, ///< failed scrubs count
l_osd_scrub_ec_failed_elapsed, ///< time from start to failure
+ // ---- scrub reservation process - EC
+
+ /// successful replicas reservation count
+ l_osd_scrub_ec_reserv_success,
+ /// time to complete a successful replicas reservation
+ l_osd_scrub_ec_reserv_successful_elapsed,
+ /// failed attempt to reserve replicas due to an abort
+ l_osd_scrub_ec_reserv_aborted,
+ /// reservation failed due to a 'rejected' response
+ l_osd_scrub_ec_reserv_rejected,
+ /// reservation skipped for high-priority scrubs
+ l_osd_scrub_ec_reserv_skipped,
+ /// time for a replicas reservation process to fail
+ l_osd_scrub_ec_reserv_failed_elapsed,
+ /// number of replicas
+ l_osd_scrub_ec_reserv_secondaries_num,
+
l_osd_last,
};
/// # write blocked by the scrub
scrbcnt_write_blocked,
- // -- replicas reservation
- /// # successfully completed reservation steps
- scrbcnt_resrv_success,
- /// time to complete a successful replicas reservation
- scrbcnt_resrv_successful_elapsed,
- /// # failed attempt to reserve replicas due to an abort
- scrbcnt_resrv_aborted,
- /// # reservation failed due to a 'rejected' response
- scrbcnt_resrv_rejected,
- /// # reservation skipped for high-priority scrubs
- scrbcnt_resrv_skipped,
- /// time for a replicas reservation process to fail
- scrbcnt_resrv_failed_elapsed,
- /// # number of replicas
- scrbcnt_resrv_replicas_num,
-
scrbcnt_last,
};
.successful_cnt = l_osd_scrub_rppool_successful,
.successful_elapsed = l_osd_scrub_rppool_successful_elapsed,
.failed_cnt = l_osd_scrub_rppool_failed,
- .failed_elapsed = l_osd_scrub_rppool_failed_elapsed
+ .failed_elapsed = l_osd_scrub_rppool_failed_elapsed,
+ // replica-reservation-related:
+ .rsv_successful_cnt = l_osd_scrub_rppool_reserv_success,
+ .rsv_successful_elapsed = l_osd_scrub_rppool_reserv_successful_elapsed,
+ .rsv_aborted_cnt = l_osd_scrub_rppool_reserv_aborted,
+ .rsv_rejected_cnt = l_osd_scrub_rppool_reserv_rejected,
+ .rsv_skipped_cnt = l_osd_scrub_rppool_reserv_skipped,
+ .rsv_failed_elapsed = l_osd_scrub_rppool_reserv_failed_elapsed,
+ .rsv_secondaries_num = l_osd_scrub_rppool_reserv_secondaries_num
};
static inline constexpr ScrubCounterSet io_counters_ec{
.successful_cnt = l_osd_scrub_ec_successful,
.successful_elapsed = l_osd_scrub_ec_successful_elapsed,
.failed_cnt = l_osd_scrub_ec_failed,
- .failed_elapsed = l_osd_scrub_ec_failed_elapsed
+ .failed_elapsed = l_osd_scrub_ec_failed_elapsed,
+ // replica-reservation-related:
+ .rsv_successful_cnt = l_osd_scrub_ec_reserv_success,
+ .rsv_successful_elapsed = l_osd_scrub_ec_reserv_successful_elapsed,
+ .rsv_aborted_cnt = l_osd_scrub_ec_reserv_aborted,
+ .rsv_rejected_cnt = l_osd_scrub_ec_reserv_rejected,
+ .rsv_skipped_cnt = l_osd_scrub_ec_reserv_skipped,
+ .rsv_failed_elapsed = l_osd_scrub_ec_reserv_failed_elapsed,
+ .rsv_secondaries_num = l_osd_scrub_ec_reserv_secondaries_num
};
} // namespace Scrub
ReplicaReservations::ReplicaReservations(
ScrubMachineListener& scrbr,
reservation_nonce_t& nonce,
- PerfCounters& pc)
+ const ScrubCounterSet& pc)
: m_scrubber{scrbr}
, m_pg{m_scrubber.get_pg()}
, m_pgid{m_scrubber.get_spgid().pgid}
, m_osds{m_pg->get_pg_osd(ScrubberPasskey())}
, m_last_request_sent_nonce{nonce}
- , m_perf_set{pc}
+ , m_perf_indices{pc}
{
// the acting set is sorted by pg_shard_t. The reservations are to be issued
// in this order, so that the OSDs will receive the requests in a consistent
[whoami = m_pg->pg_whoami](const pg_shard_t& shard) {
return shard != whoami;
});
- m_perf_set.set(scrbcnt_resrv_replicas_num, m_sorted_secondaries.size());
+ m_osds->logger->set(
+ m_perf_indices.rsv_secondaries_num, m_sorted_secondaries.size());
m_next_to_request = m_sorted_secondaries.cbegin();
if (m_scrubber.is_reservation_required()) {
// for high-priority scrubs (i.e. - user-initiated), no reservations are
// needed. Note: not perf-counted as either success or failure.
dout(10) << "high-priority scrub - no reservations needed" << dendl;
- m_perf_set.inc(scrbcnt_resrv_skipped);
+ m_osds->logger->inc(m_perf_indices.rsv_skipped_cnt);
}
}
{
ceph_assert(m_process_started_at.has_value());
auto logged_duration = ScrubClock::now() - m_process_started_at.value();
- m_perf_set.tinc(scrbcnt_resrv_successful_elapsed, logged_duration);
- m_perf_set.inc(scrbcnt_resrv_success);
+ m_osds->logger->tinc(m_perf_indices.rsv_successful_elapsed, logged_duration);
+ m_osds->logger->inc(m_perf_indices.rsv_successful_cnt);
m_osds->logger->hinc(
l_osd_scrub_reservation_dur_hist, std::ssize(m_sorted_secondaries),
logged_duration.count());
return;
}
auto logged_duration = ScrubClock::now() - m_process_started_at.value();
- m_perf_set.tinc(scrbcnt_resrv_failed_elapsed, logged_duration);
+ m_osds->logger->tinc(m_perf_indices.rsv_failed_elapsed, logged_duration);
m_process_started_at.reset();
// note: not counted into l_osd_scrub_reservation_dur_hist
- m_perf_set.inc(failure_cause_counter);
+ m_osds->logger->inc(failure_cause_counter);
}
ReplicaReservations::~ReplicaReservations()
{
release_all();
- log_failure_and_duration(scrbcnt_resrv_aborted);
+ log_failure_and_duration(m_perf_indices.rsv_aborted_cnt);
}
bool ReplicaReservations::is_reservation_response_relevant(
return false;
}
- log_failure_and_duration(scrbcnt_resrv_rejected);
+ log_failure_and_duration(m_perf_indices.rsv_rejected_cnt);
// we should never see a rejection carrying a valid
// reservation nonce - arriving while we have no pending requests