From: David Zafman Date: Fri, 31 Mar 2017 21:13:14 +0000 (-0700) Subject: osd: Revamp injectfull op to support all full states X-Git-Tag: v12.0.2~51^2~11 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1e2fde1012fe9df1ce114256cd72d9da902a01ab;p=ceph.git osd: Revamp injectfull op to support all full states Use check_* for injectable full checks Use is_* to just test simple cur_state Signed-off-by: David Zafman --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 1368c9d654e..644c9b959c8 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -221,7 +221,6 @@ OSDService::OSDService(OSD *osd) : whoami(osd->whoami), store(osd->store), log_client(osd->log_client), clog(osd->clog), pg_recovery_stats(osd->pg_recovery_stats), - injectfull(0), cluster_messenger(osd->cluster_messenger), client_messenger(osd->client_messenger), logger(osd->logger), @@ -757,11 +756,14 @@ void OSDService::check_full_status(const osd_stat_t &osd_stat) nearfull_ratio = failsafe_ratio; } - enum s_names new_state; - // If testing with injectfull, let's keep determined state as FAILSAFE - if (ratio > failsafe_ratio) { + string inject; + s_names new_state; + if (injectfull_state > NONE && injectfull) { + new_state = injectfull_state; + inject = "(Injected)"; + } else if (ratio > failsafe_ratio) { new_state = FAILSAFE; - } else if (ratio > full_ratio || injectfull) { + } else if (ratio > full_ratio) { new_state = FULL; } else if (ratio > backfillfull_ratio) { new_state = BACKFILLFULL; @@ -776,6 +778,7 @@ void OSDService::check_full_status(const osd_stat_t &osd_stat) << ", full_ratio " << full_ratio << ", failsafe_ratio " << failsafe_ratio << ", new state " << get_full_state_name(new_state) + << " " << inject << dendl; // warn @@ -816,48 +819,73 @@ bool OSDService::need_fullness_update() return want != cur; } -bool OSDService::check_failsafe_full() +bool OSDService::_check_full(s_names type, ostream &ss) const { Mutex::Locker l(full_status_lock); - if (injectfull) { - // injectfull is either a count of the number of times to return full + if (injectfull && injectfull_state >= type) { + // injectfull is either a count of the number of times to return failsafe full // or if -1 then always return full if (injectfull > 0) --injectfull; - dout(5) << __func__ << " Injected full OSD (" << (injectfull < 0 ? "set" : std::to_string(injectfull)) << ")" << dendl; + ss << "Injected " << get_full_state_name(type) << " OSD (" + << (injectfull < 0 ? "set" : std::to_string(injectfull)) << ")"; return true; } + ss << "current usage is " << cur_ratio; + return cur_state >= type; +} + +bool OSDService::check_failsafe_full(ostream &ss) const +{ + return _check_full(FAILSAFE, ss); +} + +bool OSDService::check_full(ostream &ss) const +{ + return _check_full(FULL, ss); +} + +bool OSDService::check_backfill_full(ostream &ss) const +{ + return _check_full(BACKFILLFULL, ss); +} + +bool OSDService::check_nearfull(ostream &ss) const +{ + return _check_full(NEARFULL, ss); +} + +bool OSDService::is_failsafe_full() const +{ + Mutex::Locker l(full_status_lock); return cur_state == FAILSAFE; } -bool OSDService::is_nearfull() +bool OSDService::is_full() const { Mutex::Locker l(full_status_lock); - return cur_state >= NEARFULL; + return cur_state >= FULL; } -bool OSDService::is_backfillfull() +bool OSDService::is_backfillfull() const { Mutex::Locker l(full_status_lock); return cur_state >= BACKFILLFULL; } -bool OSDService::is_full() +bool OSDService::is_nearfull() const { Mutex::Locker l(full_status_lock); - return cur_state >= FULL; + return cur_state >= NEARFULL; } -bool OSDService::too_full_for_backfill(ostream &ss) +void OSDService::set_injectfull(s_names type, int64_t count) { Mutex::Locker l(full_status_lock); - if (cur_state >= BACKFILLFULL) { - ss << "current usage is " << cur_ratio << ", which is greater than max allowed ratio"; - return true; - } - return false; + injectfull_state = type; + injectfull = count; } void OSDService::update_osd_stat(vector& hb_peers) @@ -2660,6 +2688,7 @@ void OSD::final_init() r = admin_socket->register_command( "injectfull", "injectfull " \ + "name=type,type=CephString,req=false " \ "name=count,type=CephInt,req=false ", test_ops_hook, "Inject a full disk (optional count times)"); @@ -4887,7 +4916,21 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store, return; } if (command == "injectfull") { - cmd_getval(service->cct, cmdmap, "count", service->injectfull, (int64_t)-1); + int64_t count; + string type; + OSDService::s_names state; + cmd_getval(service->cct, cmdmap, "type", type, string("full")); + cmd_getval(service->cct, cmdmap, "count", count, (int64_t)-1); + if (type == "none" || count == 0) { + type = "none"; + count = 0; + } + state = service->get_full_state(type); + if (state == OSDService::s_names::INVALID) { + ss << "Invalid type use (none, nearfull, backfillfull, full, failsafe)"; + return; + } + service->set_injectfull(state, count); return; } ss << "Internal error - command=" << command; diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 19a1ac7a2df..45f76415f49 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -461,7 +461,6 @@ public: LogClient &log_client; LogChannelRef clog; PGRecoveryStats &pg_recovery_stats; - int64_t injectfull; private: Messenger *&cluster_messenger; Messenger *&client_messenger; @@ -1138,9 +1137,10 @@ public: // -- OSD Full Status -- private: - Mutex full_status_lock; - enum s_names { NONE, NEARFULL, BACKFILLFULL, FULL, FAILSAFE } cur_state; // ascending - const char *get_full_state_name(s_names s) { + friend TestOpsSocketHook; + mutable Mutex full_status_lock; + enum s_names { INVALID = -1, NONE, NEARFULL, BACKFILLFULL, FULL, FAILSAFE } cur_state; // ascending + const char *get_full_state_name(s_names s) const { switch (s) { case NONE: return "none"; case NEARFULL: return "nearfull"; @@ -1150,16 +1150,37 @@ private: default: return "???"; } } + s_names get_full_state(string type) const { + if (type == "none") + return NONE; + else if (type == "failsafe") + return FAILSAFE; + else if (type == "full") + return FULL; + else if (type == "backfillfull") + return BACKFILLFULL; + else if (type == "nearfull") + return NEARFULL; + else + return INVALID; + } double cur_ratio; ///< current utilization + mutable int64_t injectfull = 0; + s_names injectfull_state = NONE; float get_failsafe_full_ratio(); void check_full_status(const osd_stat_t &stat); + bool _check_full(s_names type, ostream &ss) const; public: - bool check_failsafe_full(); - bool is_nearfull(); - bool is_backfillfull(); - bool is_full(); - bool too_full_for_backfill(ostream &ss); + bool check_failsafe_full(ostream &ss) const; + bool check_full(ostream &ss) const; + bool check_backfill_full(ostream &ss) const; + bool check_nearfull(ostream &ss) const; + bool is_failsafe_full() const; + bool is_full() const; + bool is_backfillfull() const; + bool is_nearfull() const; bool need_fullness_update(); ///< osdmap state needs update + void set_injectfull(s_names type, int64_t count); // -- epochs -- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 7cee9141820..d8d9c26dd14 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -6561,8 +6561,8 @@ PG::RecoveryState::RepNotRecovering::react(const RequestBackfillPrio &evt) ldout(pg->cct, 10) << "backfill reservation rejected: failure injection" << dendl; post_event(RemoteReservationRejected()); - } else if (pg->osd->too_full_for_backfill(ss) && - !pg->cct->_conf->osd_debug_skip_full_check_in_backfill_reservation) { + } else if (!pg->cct->_conf->osd_debug_skip_full_check_in_backfill_reservation && + pg->osd->check_backfill_full(ss)) { ldout(pg->cct, 10) << "backfill reservation rejected: " << ss.str() << dendl; post_event(RemoteReservationRejected()); @@ -6597,8 +6597,8 @@ PG::RecoveryState::RepWaitBackfillReserved::react(const RemoteBackfillReserved & pg->osd->remote_reserver.cancel_reservation(pg->info.pgid); post_event(RemoteReservationRejected()); return discard_event(); - } else if (pg->osd->too_full_for_backfill(ss) && - !pg->cct->_conf->osd_debug_skip_full_check_in_backfill_reservation) { + } else if (!pg->cct->_conf->osd_debug_skip_full_check_in_backfill_reservation && + pg->osd->check_backfill_full(ss)) { ldout(pg->cct, 10) << "backfill reservation rejected after reservation: " << ss.str() << dendl; pg->osd->remote_reserver.cancel_reservation(pg->info.pgid); diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 553ffca191e..5a73ca5e982 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -1891,8 +1891,10 @@ void PrimaryLogPG::do_op(OpRequestRef& op) // mds should have stopped writing before this point. // We can't allow OSD to become non-startable even if mds // could be writing as part of file removals. - if (write_ordered && osd->check_failsafe_full()) { + ostringstream ss; + if (write_ordered && osd->check_failsafe_full(ss)) { dout(10) << __func__ << " fail-safe full check failed, dropping request" + << ss.str() << dendl; return; } @@ -3332,7 +3334,7 @@ void PrimaryLogPG::do_scan( case MOSDPGScan::OP_SCAN_GET_DIGEST: { ostringstream ss; - if (osd->too_full_for_backfill(ss)) { + if (osd->check_backfill_full(ss)) { dout(1) << __func__ << ": Canceling backfill, " << ss.str() << dendl; queue_peering_event( CephPeeringEvtRef(