OPTION(mon_osd_prime_pg_temp_max_time, OPT_FLOAT) // max time to spend priming
OPTION(mon_osd_prime_pg_temp_max_estimate, OPT_FLOAT) // max estimate of pg total before we do all pgs in parallel
OPTION(mon_osd_pool_ec_fast_read, OPT_BOOL) // whether turn on fast read on the pool or not
-OPTION(mon_stat_smooth_intervals, OPT_INT) // smooth stats over last N PGMap maps
OPTION(mon_election_timeout, OPT_FLOAT) // on election proposer, max waiting time for all ACKs
OPTION(mon_lease, OPT_FLOAT) // lease interval
OPTION(mon_lease_renew_interval_factor, OPT_FLOAT) // on leader, to renew the lease
OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT) // exponential backoff for clock drift warnings
OPTION(mon_timecheck_interval, OPT_FLOAT) // on leader, timecheck (clock drift check) interval (seconds)
OPTION(mon_timecheck_skew_interval, OPT_FLOAT) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds)
-OPTION(mon_pg_stuck_threshold, OPT_INT) // number of seconds after which pgs can be considered stuck inactive, unclean, etc (see doc/control.rst under dump_stuck for more info)
OPTION(mon_pg_min_inactive, OPT_U64) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR.
-OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT) // max skew few average in objects per pg
-OPTION(mon_pg_warn_min_objects, OPT_INT) // do not warn below this object #
-OPTION(mon_pg_warn_min_pool_objects, OPT_INT) // do not warn on pools below this object #
OPTION(mon_pg_check_down_all_threshold, OPT_FLOAT) // threshold of down osds after which we check all pgs
OPTION(mon_cache_target_full_warn_ratio, OPT_FLOAT) // position between pool cache_target_full and max where we start warning
OPTION(mon_osd_full_ratio, OPT_FLOAT) // what % full makes an OSD "full"
OPTION(mon_health_to_clog_interval, OPT_INT)
OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE)
OPTION(mon_health_preluminous_compat, OPT_BOOL)
-OPTION(mon_health_max_detail, OPT_INT) // max detailed pgs to report in health detail
OPTION(mon_data_avail_crit, OPT_INT)
OPTION(mon_data_avail_warn, OPT_INT)
OPTION(mon_data_size_warn, OPT_U64) // issue a warning when the monitor's data store goes over 15GB (in bytes)
OPTION(mon_client_hunt_interval_backoff, OPT_DOUBLE) // each time we reconnect to a monitor, double our timeout
OPTION(mon_client_hunt_interval_max_multiple, OPT_DOUBLE) // up to a max of 10*default (30 seconds)
OPTION(mon_client_max_log_entries_per_message, OPT_INT)
-OPTION(mon_pool_quota_warn_threshold, OPT_INT) // percent of quota at which to issue warnings
-OPTION(mon_pool_quota_crit_threshold, OPT_INT) // percent of quota at which to issue errors
OPTION(client_cache_size, OPT_INT)
OPTION(client_cache_mid, OPT_FLOAT)
OPTION(client_use_random_mds, OPT_BOOL)
.set_default(false)
.set_description(""),
- Option("mon_stat_smooth_intervals", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+ Option("mon_stat_smooth_intervals", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(6)
- .set_description(""),
+ .set_min(1)
+ .add_service("mgr")
+ .set_description("number of PGMaps stats over which we calc the average read/write throughput of the whole cluster"),
Option("mon_election_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(5)
Option("mon_pg_stuck_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(60)
- .set_description(""),
+ .set_description("number of seconds after which pgs can be considered stuck inactive, unclean, etc")
+ .set_long_description("see doc/control.rst under dump_stuck for more info")
+ .add_service("mgr"),
Option("mon_pg_min_inactive", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(1)
Option("mon_pg_warn_max_object_skew", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(10.0)
- .set_description(""),
+ .set_description("max skew few average in objects per pg")
+ .add_service("mgr"),
Option("mon_pg_warn_min_objects", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(10000)
- .set_description(""),
+ .set_description("do not warn below this object #")
+ .add_service("mgr"),
Option("mon_pg_warn_min_pool_objects", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(1000)
- .set_description(""),
+ .set_description("do not warn on pools below this object #")
+ .add_service("mgr"),
Option("mon_pg_check_down_all_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(.5)
- .set_description(""),
+ .set_description("threshold of down osds after which we check all pgs")
+ .add_service("mgr"),
Option("mon_cache_target_full_warn_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(.66)
.set_default(true)
.set_description("Warn about the health JSON format change in preluminous JSON fields"),
- Option("mon_health_max_detail", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+ Option("mon_health_max_detail", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(50)
- .set_description(""),
+ .set_description("max detailed pgs to report in health detail"),
Option("mon_health_log_update_period", Option::TYPE_INT, Option::LEVEL_DEV)
.set_default(5)
Option("mon_pool_quota_warn_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(0)
- .set_description(""),
+ .set_description("percent of quota at which to issue warnings")
+ .add_service("mgr"),
Option("mon_pool_quota_crit_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(0)
- .set_description(""),
+ .set_description("percent of quota at which to issue errors")
+ .add_service("mgr"),
Option("crush_location", Option::TYPE_STR, Option::LEVEL_ADVANCED)
.set_default("")
stamp_delta += delta_t;
pg_sum_delta.stats.add(d.stats);
- if (pg_sum_deltas.size() > (unsigned)MAX(1, cct ? cct->_conf->mon_stat_smooth_intervals : 1)) {
+ auto smooth_intervals =
+ cct ? cct->_conf->get_val<uint64_t>("mon_stat_smooth_intervals") : 1;
+ if (pg_sum_deltas.size() > smooth_intervals) {
pg_sum_delta.stats.sub(pg_sum_deltas.front().first.stats);
stamp_delta -= pg_sum_deltas.front().second;
pg_sum_deltas.pop_front();
* average it out.
*/
result_pool_delta->stats.add(d.stats);
- size_t s = MAX(1, cct ? cct->_conf->mon_stat_smooth_intervals : 1);
+ size_t s = cct ? cct->_conf->get_val<uint64_t>("mon_stat_smooth_intervals") : 1;
if (delta_avg_list->size() > s) {
result_pool_delta->stats.sub(delta_avg_list->front().first.stats);
*result_ts_delta -= delta_avg_list->front().second;
health_check_map_t *checks) const
{
utime_t now = ceph_clock_now();
- const unsigned max = cct->_conf->mon_health_max_detail;
+ const auto max = cct->_conf->get_val<uint64_t>("mon_health_max_detail");
const auto& pools = osdmap.get_pools();
typedef enum pg_consequence_t {
}
}
- utime_t cutoff = now - utime_t(cct->_conf->mon_pg_stuck_threshold, 0);
+ utime_t cutoff = now - utime_t(cct->_conf->get_val<int64_t>("mon_pg_stuck_threshold"), 0);
// Loop over all PGs, if there are any possibly-unhealthy states in there
if (!possible_responses.empty()) {
for (const auto& i : pg_stat) {
// MANY_OBJECTS_PER_PG
if (!pg_stat.empty()) {
list<string> pgp_detail, many_detail;
+ const auto mon_pg_warn_min_objects =
+ cct->_conf->get_val<int64_t>("mon_pg_warn_min_objects");
+ const auto mon_pg_warn_min_pool_objects =
+ cct->_conf->get_val<int64_t>("mon_pg_warn_min_pool_objects");
+ const auto mon_pg_warn_max_object_skew =
+ cct->_conf->get_val<double>("mon_pg_warn_max_object_skew");
for (auto p = pg_pool_sum.begin();
p != pg_pool_sum.end();
++p) {
}
int average_objects_per_pg = pg_sum.stats.sum.num_objects / pg_stat.size();
if (average_objects_per_pg > 0 &&
- pg_sum.stats.sum.num_objects >= cct->_conf->mon_pg_warn_min_objects &&
- p->second.stats.sum.num_objects >=
- cct->_conf->mon_pg_warn_min_pool_objects) {
+ pg_sum.stats.sum.num_objects >= mon_pg_warn_min_objects &&
+ p->second.stats.sum.num_objects >= mon_pg_warn_min_pool_objects) {
int objects_per_pg = p->second.stats.sum.num_objects / pi->get_pg_num();
float ratio = (float)objects_per_pg / (float)average_objects_per_pg;
- if (cct->_conf->mon_pg_warn_max_object_skew > 0 &&
- ratio > cct->_conf->mon_pg_warn_max_object_skew) {
+ if (mon_pg_warn_max_object_skew > 0 &&
+ ratio > mon_pg_warn_max_object_skew) {
ostringstream ss;
ss << "pool " << name << " objects per pg ("
<< objects_per_pg << ") is more than " << ratio
// POOL_FULL
// POOL_NEAR_FULL
{
- float warn_threshold = (float)g_conf->mon_pool_quota_warn_threshold/100;
- float crit_threshold = (float)g_conf->mon_pool_quota_crit_threshold/100;
+ float warn_threshold = (float)g_conf->get_val<int64_t>("mon_pool_quota_warn_threshold")/100;
+ float crit_threshold = (float)g_conf->get_val<int64_t>("mon_pool_quota_crit_threshold")/100;
list<string> full_detail, nearfull_detail;
unsigned full_pools = 0, nearfull_pools = 0;
for (auto it : pools) {
stuckop_vec.push_back("unclean");
int64_t threshold;
cmd_getval(g_ceph_context, cmdmap, "threshold", threshold,
- int64_t(g_conf->mon_pg_stuck_threshold));
+ g_conf->get_val<int64_t>("mon_pg_stuck_threshold"));
r = pg_map.dump_stuck_pg_stats(ds, f, (int)threshold, stuckop_vec);
odata->append(ds);
// if a large number of osds changed state, just iterate over the whole
// pg map.
if (need_check_down_pg_osds.size() > (unsigned)osdmap.get_num_osds() *
- g_conf->mon_pg_check_down_all_threshold) {
+ g_conf->get_val<double>("mon_pg_check_down_all_threshold")) {
check_all = true;
}