From b7d31e5f5952c631dd4172bcb825e77a13fc60bc Mon Sep 17 00:00:00 2001 From: David Zafman Date: Mon, 21 Apr 2014 23:52:04 -0700 Subject: [PATCH] osd, common: If agent_work() finds no objs to work on delay 5 (default) secs Add config osd_agent_delay_time of 5 seconds Honor delay by ignoring agent_choose_mode() calls Add tier_delay to logger Treat restart after delay like we were previously idle Fixes: #8113 Backport: firefly Signed-off-by: David Zafman --- src/common/config_opts.h | 1 + src/osd/OSD.cc | 30 +++++++++++++++- src/osd/OSD.h | 3 ++ src/osd/PG.h | 4 ++- src/osd/ReplicatedPG.cc | 76 +++++++++++++++++++++++++++++++++++----- src/osd/ReplicatedPG.h | 6 ++-- src/osd/TierAgentState.h | 13 +++++-- 7 files changed, 117 insertions(+), 16 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index a065a772f9b13..4c9e423c911b8 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -399,6 +399,7 @@ OPTION(osd_backfill_retry_interval, OPT_DOUBLE, 10.0) OPTION(osd_agent_max_ops, OPT_INT, 4) OPTION(osd_agent_min_evict_effort, OPT_FLOAT, .1) OPTION(osd_agent_quantize_effort, OPT_FLOAT, .1) +OPTION(osd_agent_delay_time, OPT_FLOAT, 5.0) // decay atime and hist histograms after how many objects go by OPTION(osd_agent_hist_halflife, OPT_INT, 1000) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 2d6379055b23d..615a94ba9e037 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -201,6 +201,8 @@ OSDService::OSDService(OSD *osd) : agent_active(true), agent_thread(this), agent_stop_flag(false), + agent_timer_lock("OSD::agent_timer_lock"), + agent_timer(osd->client_messenger->cct, agent_timer_lock), objecter_lock("OSD::objecter_lock"), objecter_timer(osd->client_messenger->cct, objecter_lock), objecter(new Objecter(osd->client_messenger->cct, osd->objecter_messenger, osd->monc, &objecter_osdmap, @@ -435,6 +437,10 @@ void OSDService::shutdown() Mutex::Locker l(backfill_request_lock); backfill_request_timer.shutdown(); } + { + Mutex::Locker l(agent_timer_lock); + agent_timer.shutdown(); + } osdmap = OSDMapRef(); next_osdmap = OSDMapRef(); } @@ -451,6 +457,7 @@ void OSDService::init() objecter->init_locked(); } watch_timer.init(); + agent_timer.init(); agent_thread.create(); } @@ -466,6 +473,15 @@ void OSDService::activate_map() agent_lock.Unlock(); } +class AgentTimeoutCB : public Context { + PGRef pg; +public: + AgentTimeoutCB(PGRef _pg) : pg(_pg) {} + void finish(int) { + pg->agent_choose_mode_restart(); + } +}; + void OSDService::agent_entry() { dout(10) << __func__ << " start" << dendl; @@ -501,7 +517,18 @@ void OSDService::agent_entry() PGRef pg = *agent_queue_pos; int max = g_conf->osd_agent_max_ops - agent_ops; agent_lock.Unlock(); - pg->agent_work(max); + if (!pg->agent_work(max)) { + dout(10) << __func__ << " " << *pg + << " no agent_work, delay for " << g_conf->osd_agent_delay_time + << " seconds" << dendl; + + osd->logger->inc(l_osd_tier_delay); + // Queue a timer to call agent_choose_mode for this pg in 5 seconds + agent_timer_lock.Lock(); + Context *cb = new AgentTimeoutCB(pg); + agent_timer.add_event_after(g_conf->osd_agent_delay_time, cb); + agent_timer_lock.Unlock(); + } agent_lock.Lock(); } agent_lock.Unlock(); @@ -1478,6 +1505,7 @@ void OSD::create_logger() osd_plb.add_u64_counter(l_osd_tier_whiteout, "tier_whiteout"); osd_plb.add_u64_counter(l_osd_tier_dirty, "tier_dirty"); osd_plb.add_u64_counter(l_osd_tier_clean, "tier_clean"); + osd_plb.add_u64_counter(l_osd_tier_delay, "tier_delay"); osd_plb.add_u64_counter(l_osd_agent_wake, "agent_wake"); osd_plb.add_u64_counter(l_osd_agent_skip, "agent_skip"); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 6b3c89d9ce5d5..ce8b74c6328c5 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -133,6 +133,7 @@ enum { l_osd_tier_whiteout, l_osd_tier_dirty, l_osd_tier_clean, + l_osd_tier_delay, l_osd_agent_wake, l_osd_agent_skip, @@ -466,6 +467,8 @@ public: } } agent_thread; bool agent_stop_flag; + Mutex agent_timer_lock; + SafeTimer agent_timer; void agent_entry(); void agent_stop(); diff --git a/src/osd/PG.h b/src/osd/PG.h index fa5bccd1d20ff..035a9675e08ff 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -2120,9 +2120,11 @@ public: virtual void check_blacklisted_watchers() = 0; virtual void get_watchers(std::list&) = 0; - virtual void agent_work(int max) = 0; + virtual bool agent_work(int max) = 0; virtual void agent_stop() = 0; + virtual void agent_delay() = 0; virtual void agent_clear() = 0; + virtual void agent_choose_mode_restart() = 0; }; ostream& operator<<(ostream& out, const PG& pg); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 44bb805cf7685..6cc5e560a6d9a 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -10924,6 +10924,7 @@ void ReplicatedPG::agent_setup() agent_state->position.hash = pool.info.get_random_pg_position( info.pgid.pgid, rand()); + agent_state->start = agent_state->position; dout(10) << __func__ << " allocated new state, position " << agent_state->position << dendl; @@ -10944,13 +10945,14 @@ void ReplicatedPG::agent_clear() agent_state.reset(NULL); } -void ReplicatedPG::agent_work(int start_max) +// Return false if no objects operated on since start of object hash space +bool ReplicatedPG::agent_work(int start_max) { lock(); if (!agent_state) { dout(10) << __func__ << " no agent state, stopping" << dendl; unlock(); - return; + return true; } assert(!deleting); @@ -10958,7 +10960,7 @@ void ReplicatedPG::agent_work(int start_max) if (agent_state->is_idle()) { dout(10) << __func__ << " idle, stopping" << dendl; unlock(); - return; + return true; } osd->logger->inc(l_osd_agent_wake); @@ -11063,13 +11065,42 @@ void ReplicatedPG::agent_work(int start_max) agent_state->temp_hist.decay(); } + // Total objects operated on so far + int total_started = agent_state->started + started; + bool need_delay = false; + + dout(20) << __func__ << " start pos " << agent_state->position + << " next start pos " << next + << " started " << total_started << dendl; + + // See if we've made a full pass over the object hash space + // This might check at most ls_max objects a second time to notice that + // we've checked every objects at least once. + if (agent_state->position < agent_state->start && next >= agent_state->start) { + dout(20) << __func__ << " wrap around " << agent_state->start << dendl; + if (total_started == 0) + need_delay = true; + else + total_started = 0; + agent_state->start = next; + } + agent_state->started = total_started; + + // See if we are starting from beginning if (next.is_max()) agent_state->position = hobject_t(); else agent_state->position = next; - dout(20) << __func__ << " final position " << agent_state->position << dendl; + + if (need_delay) { + assert(agent_state->delaying == false); + agent_delay(); + unlock(); + return false; + } agent_choose_mode(); unlock(); + return true; } void ReplicatedPG::agent_load_hit_sets() @@ -11280,8 +11311,35 @@ void ReplicatedPG::agent_stop() } } -void ReplicatedPG::agent_choose_mode() +void ReplicatedPG::agent_delay() { + dout(20) << __func__ << dendl; + if (agent_state && !agent_state->is_idle()) { + assert(agent_state->delaying == false); + agent_state->delaying = true; + osd->agent_disable_pg(this, agent_state->evict_effort); + } +} + +void ReplicatedPG::agent_choose_mode_restart() +{ + dout(20) << __func__ << dendl; + lock(); + if (agent_state && agent_state->delaying) { + agent_state->delaying = false; + agent_choose_mode(true); + } + unlock(); +} + +void ReplicatedPG::agent_choose_mode(bool restart) +{ + // Let delay play out + if (agent_state->delaying) { + dout(20) << __func__ << this << " delaying, ignored" << dendl; + return; + } + uint64_t divisor = pool.info.get_pg_num_divisor(info.pgid.pgid); uint64_t num_user_objects = info.stats.stats.sum.num_objects; @@ -11355,7 +11413,7 @@ void ReplicatedPG::agent_choose_mode() TierAgentState::flush_mode_t flush_mode = TierAgentState::FLUSH_MODE_IDLE; uint64_t flush_target = pool.info.cache_target_dirty_ratio_micro; uint64_t flush_slop = (float)flush_target * g_conf->osd_agent_slop; - if (agent_state->flush_mode == TierAgentState::FLUSH_MODE_IDLE) + if (restart || agent_state->flush_mode == TierAgentState::FLUSH_MODE_IDLE) flush_target += flush_slop; else flush_target -= MIN(flush_target, flush_slop); @@ -11372,7 +11430,7 @@ void ReplicatedPG::agent_choose_mode() unsigned evict_effort = 0; uint64_t evict_target = pool.info.cache_target_full_ratio_micro; uint64_t evict_slop = (float)evict_target * g_conf->osd_agent_slop; - if (agent_state->evict_mode == TierAgentState::EVICT_MODE_IDLE) + if (restart || agent_state->evict_mode == TierAgentState::EVICT_MODE_IDLE) evict_target += evict_slop; else evict_target -= MIN(evict_target, evict_slop); @@ -11436,11 +11494,11 @@ void ReplicatedPG::agent_choose_mode() // (including flush). This is probably fine (they should be // correlated) but it is not precisely correct. if (agent_state->is_idle()) { - if (!old_idle) { + if (!restart && !old_idle) { osd->agent_disable_pg(this, old_effort); } } else { - if (old_idle) { + if (restart || old_idle) { osd->agent_enable_pg(this, agent_state->evict_effort); } else if (old_effort != agent_state->evict_effort) { osd->agent_adjust_pg(this, old_effort, agent_state->evict_effort); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 38bdfbe34ad00..2bc8dd2c0f69f 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -805,7 +805,7 @@ protected: friend class C_HitSetFlushing; void agent_setup(); ///< initialize agent state - void agent_work(int max); ///< entry point to do some agent work + bool agent_work(int max); ///< entry point to do some agent work bool agent_maybe_flush(ObjectContextRef& obc); ///< maybe flush bool agent_maybe_evict(ObjectContextRef& obc); ///< maybe evict @@ -821,11 +821,13 @@ protected: /// stop the agent void agent_stop(); + void agent_delay(); /// clear agent state void agent_clear(); - void agent_choose_mode(); ///< choose (new) agent mode(s) + void agent_choose_mode(bool restart = false); ///< choose (new) agent mode(s) + void agent_choose_mode_restart(); /// true if we can send an ondisk/commit for v bool already_complete(eversion_t v) { diff --git a/src/osd/TierAgentState.h b/src/osd/TierAgentState.h index b5f7910f20993..e9c22b245fc10 100644 --- a/src/osd/TierAgentState.h +++ b/src/osd/TierAgentState.h @@ -17,6 +17,10 @@ struct TierAgentState { /// current position iterating across pool hobject_t position; + /// Count of agent_work since "start" position of object hash space + int started; + hobject_t start; + bool delaying; /// histogram of ages we've encountered pow2_hist_t atime_hist; @@ -66,7 +70,9 @@ struct TierAgentState { unsigned evict_effort; TierAgentState() - : hist_age(0), + : started(0), + delaying(false), + hist_age(0), flush_mode(FLUSH_MODE_IDLE), evict_mode(EVICT_MODE_IDLE), evict_effort(0) @@ -75,8 +81,9 @@ struct TierAgentState { /// false if we have any work to do bool is_idle() const { return - flush_mode == FLUSH_MODE_IDLE && - evict_mode == EVICT_MODE_IDLE; + delaying || + (flush_mode == FLUSH_MODE_IDLE && + evict_mode == EVICT_MODE_IDLE); } /// add archived HitSet -- 2.39.5