From 9f0bc152ab2224e6e9fe2799dbd83337262d5119 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 23 Feb 2017 15:55:35 -0500 Subject: [PATCH] osd: restructure and simplify internal fullness checks First, eliminate the useless nearfull failsafe--all it did was generate a log message, which we can do based on the OSDMap states. Add some new helpers. Unify the cluster nearfull/full vs failsafe states so that failsafe is a "really" full state that is more severe than full, so we have NONE, NEARFULL, FULL, FAILSAFE. Pull the full/nearfull ratios out of the OSDMap (remember that we require luminous mons, so these will be initialized). Signed-off-by: Sage Weil --- src/common/config_opts.h | 1 - src/osd/OSD.cc | 88 +++++++++++++++++++++++++++------------- src/osd/OSD.h | 19 ++++++--- 3 files changed, 73 insertions(+), 35 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 0cbec2006d3bc..ef4dc9ebceba9 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -875,7 +875,6 @@ OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to tr OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track OPTION(osd_target_transaction_size, OPT_INT, 30) // to adjust various transactions that batch smaller items OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe) -OPTION(osd_failsafe_nearfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD near full (failsafe) OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL, true) // immediately mark OSDs as down once they refuse to accept connections OPTION(osd_pg_object_context_cache_count, OPT_INT, 64) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index d1df4799b88a3..5c2cf7975024d 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -278,7 +278,6 @@ OSDService::OSDService(OSD *osd) : stat_lock("OSDService::stat_lock"), full_status_lock("OSDService::full_status_lock"), cur_state(NONE), - last_msg(0), cur_ratio(0), epoch_lock("OSDService::epoch_lock"), boot_epoch(0), up_epoch(0), bind_epoch(0), @@ -707,19 +706,9 @@ float OSDService::get_failsafe_full_ratio() return full_ratio; } -float OSDService::get_failsafe_nearfull_ratio() -{ - float nearfull_ratio = cct->_conf->osd_failsafe_nearfull_ratio; - if (nearfull_ratio > 1.0) nearfull_ratio /= 100.0; - return nearfull_ratio; -} - -void OSDService::check_nearfull_warning(const osd_stat_t &osd_stat) +void OSDService::check_full_status(const osd_stat_t &osd_stat) { Mutex::Locker l(full_status_lock); - enum s_names new_state; - - time_t now = ceph_clock_gettime(); // We base ratio on kb_avail rather than kb_used because they can // differ significantly e.g. on btrfs volumes with a large number of @@ -728,39 +717,80 @@ void OSDService::check_nearfull_warning(const osd_stat_t &osd_stat) // much space is available to use than how much we've already used. float ratio = ((float)(osd_stat.kb - osd_stat.kb_avail)) / ((float)osd_stat.kb); - float nearfull_ratio = get_failsafe_nearfull_ratio(); - float full_ratio = get_failsafe_full_ratio(); cur_ratio = ratio; - if (full_ratio > 0 && ratio > full_ratio) { - new_state = FULL; - } else if (nearfull_ratio > 0 && ratio > nearfull_ratio) { - new_state = NEAR; - } else { + // The OSDMap ratios take precendence. So if the failsafe is .95 and + // the admin sets the cluster full to .96, the failsafe moves up to .96 + // too. (Not that having failsafe == full is ideal, but it's better than + // dropping writes before the clusters appears full.) + OSDMapRef osdmap = get_osdmap(); + if (!osdmap || osdmap->get_epoch() == 0) { cur_state = NONE; return; } + float nearfull_ratio = osdmap->get_nearfull_ratio(); + float full_ratio = std::max(osdmap->get_full_ratio(), nearfull_ratio); + float failsafe_ratio = std::max(get_failsafe_full_ratio(), full_ratio); + if (full_ratio <= 0 || + nearfull_ratio <= 0) { + derr << __func__ << " full_ratio or nearfull_ratio is <= 0" << dendl; + cur_state = NONE; + return; + } + + enum s_names new_state; + if (ratio > failsafe_ratio) { + new_state = FAILSAFE; + } else if (ratio > full_ratio) { + new_state = FULL; + } else if (ratio > nearfull_ratio) { + new_state = NEARFULL; + } else { + new_state = NONE; + } + dout(20) << __func__ << " cur ratio " << ratio + << ". nearfull_ratio " << nearfull_ratio + << ", full_ratio " << full_ratio + << ", failsafe_ratio " << failsafe_ratio + << ", new state " << get_full_state_name(new_state) + << dendl; + + // warn if (cur_state != new_state) { + dout(10) << __func__ << " " << get_full_state_name(cur_state) + << " -> " << get_full_state_name(new_state) << dendl; + if (new_state == FAILSAFE) { + clog->error() << "failsafe engaged, dropping updates, now " + << (int)roundf(ratio * 100) << "% full"; + } else if (cur_state == FAILSAFE) { + clog->error() << "failsafe disengaged, no longer dropping updates, now " + << (int)roundf(ratio * 100) << "% full"; + } cur_state = new_state; - } else if (now - last_msg < cct->_conf->osd_op_complaint_time) { - return; } - last_msg = now; - if (cur_state == FULL) - clog->error() << "OSD full dropping all updates " << (int)roundf(ratio * 100) << "% full"; - else - clog->warn() << "OSD near full (" << (int)roundf(ratio * 100) << "%)"; } bool OSDService::check_failsafe_full() { Mutex::Locker l(full_status_lock); - if (cur_state == FULL) + if (cur_state == FAILSAFE) return true; return false; } +bool OSDService::is_nearfull() +{ + Mutex::Locker l(full_status_lock); + return cur_state == NEARFULL; +} + +bool OSDService::is_full() +{ + Mutex::Locker l(full_status_lock); + return cur_state >= FULL; +} + bool OSDService::too_full_for_backfill(double *_ratio, double *_max_ratio) { Mutex::Locker l(full_status_lock); @@ -801,9 +831,9 @@ void OSDService::update_osd_stat(vector& hb_peers) osd->logger->set(l_osd_stat_bytes_used, used); osd->logger->set(l_osd_stat_bytes_avail, avail); - check_nearfull_warning(osd_stat); - dout(20) << "update_osd_stat " << osd_stat << dendl; + + check_full_status(osd_stat); } void OSDService::send_message_osd_cluster(int peer, Message *m, epoch_t from_epoch) diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 6e16d633cbdd2..e305d7ef5e41e 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1136,14 +1136,23 @@ public: // -- OSD Full Status -- private: Mutex full_status_lock; - enum s_names { NONE, NEAR, FULL } cur_state; - time_t last_msg; - double cur_ratio; + enum s_names { NONE, NEARFULL, FULL, FAILSAFE } cur_state; // ascending + const char *get_full_state_name(s_names s) { + switch (s) { + case NONE: return "none"; + case NEARFULL: return "nearfull"; + case FULL: return "full"; + case FAILSAFE: return "failsafe"; + default: return "???"; + } + } + double cur_ratio; ///< current utilization float get_failsafe_full_ratio(); - float get_failsafe_nearfull_ratio(); - void check_nearfull_warning(const osd_stat_t &stat); + void check_full_status(const osd_stat_t &stat); public: bool check_failsafe_full(); + bool is_nearfull(); + bool is_full(); bool too_full_for_backfill(double *ratio, double *max_ratio); // -- epochs -- -- 2.39.5