From: Sage Weil Date: Fri, 18 Mar 2016 16:50:35 +0000 (-0400) Subject: osd/PG: indicate in pg query output whether ignore_history_les would help X-Git-Tag: v10.1.1~111^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=65858fea23a5e2d2249a8950ce9bef4738961110;p=ceph.git osd/PG: indicate in pg query output whether ignore_history_les would help Put a hint in the PG query output if setting the osd_find_best_info_ignore_history_les = true would allow peering to proceed. This is difficult to diagnose even for a developer (you need to read the peer info *very* carefully) but is easy to hint. Signed-off-by: Sage Weil --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 682f41569ba3..b2cad0b76861 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -970,8 +970,9 @@ PG::Scrubber::~Scrubber() {} * 3) Prefer current primary */ map::const_iterator PG::find_best_info( - const map &infos) const + const map &infos, bool *history_les_bound) const { + assert(history_les_bound); /* See doc/dev/osd_internals/last_epoch_started.rst before attempting * to make changes to this process. Also, make sure to update it * when you find bugs! */ @@ -982,6 +983,7 @@ map::const_iterator PG::find_best_info( ++i) { if (!cct->_conf->osd_find_best_info_ignore_history_les && max_last_epoch_started_found < i->second.history.last_epoch_started) { + *history_les_bound = true; max_last_epoch_started_found = i->second.history.last_epoch_started; } if (!i->second.is_incomplete() && @@ -1287,7 +1289,7 @@ void PG::calc_replicated_acting( * calculate the desired acting, and request a change with the monitor * if it differs from the current acting. */ -bool PG::choose_acting(pg_shard_t &auth_log_shard_id) +bool PG::choose_acting(pg_shard_t &auth_log_shard_id, bool *history_les_bound) { map all_info(peer_info.begin(), peer_info.end()); all_info[pg_whoami] = info; @@ -1299,7 +1301,7 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id) } map::const_iterator auth_log_shard = - find_best_info(all_info); + find_best_info(all_info, history_les_bound); if (auth_log_shard == all_info.end()) { if (up != acting) { @@ -1328,7 +1330,8 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id) complete_infos.insert(*i); } map::const_iterator i = find_best_info( - complete_infos); + complete_infos, + history_les_bound); if (i != complete_infos.end()) { auth_log_shard = all_info.find(i->first); } @@ -5902,7 +5905,8 @@ void PG::RecoveryState::Primary::exit() /*---------Peering--------*/ PG::RecoveryState::Peering::Peering(my_context ctx) : my_base(ctx), - NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Peering") + NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Peering"), + history_les_bound(false) { context< RecoveryMachine >().log_enter(state_name); @@ -5975,6 +5979,14 @@ boost::statechart::result PG::RecoveryState::Peering::react(const QueryState& q) } q.f->close_section(); + if (history_les_bound) { + q.f->open_array_section("peering_blocked_by_detail"); + q.f->open_object_section("item"); + q.f->dump_string("detail","peering_blocked_by_history_les_bound"); + q.f->close_section(); + q.f->close_section(); + } + q.f->close_section(); return forward_event(); } @@ -6510,7 +6522,9 @@ PG::RecoveryState::Recovered::Recovered(my_context ctx) pg->state_clear(PG_STATE_DEGRADED); // adjust acting set? (e.g. because backfill completed...) - if (pg->acting != pg->up && !pg->choose_acting(auth_log_shard)) + bool history_les_bound = false; + if (pg->acting != pg->up && !pg->choose_acting(auth_log_shard, + &history_les_bound)) assert(pg->want_acting.size()); if (context< Active >().all_replicas_activated) @@ -7299,7 +7313,8 @@ PG::RecoveryState::GetLog::GetLog(my_context ctx) PG *pg = context< RecoveryMachine >().pg; // adjust acting? - if (!pg->choose_acting(auth_log_shard)) { + if (!pg->choose_acting(auth_log_shard, + &context< Peering >().history_les_bound)) { if (!pg->want_acting.empty()) { post_event(NeedActingChange()); } else { diff --git a/src/osd/PG.h b/src/osd/PG.h index 87b0d996028d..16d58bd9ebc0 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1041,7 +1041,8 @@ public: void trim_write_ahead(); map::const_iterator find_best_info( - const map &infos) const; + const map &infos, + bool *history_les_bound) const; static void calc_ec_acting( map::const_iterator auth_log_shard, unsigned size, @@ -1070,7 +1071,8 @@ public: set *acting_backfill, pg_shard_t *want_primary, ostream &ss); - bool choose_acting(pg_shard_t &auth_log_shard); + bool choose_acting(pg_shard_t &auth_log_shard, + bool *history_les_bound); void build_might_have_unfound(); void replay_queued_ops(); void activate( @@ -1732,6 +1734,7 @@ public: struct Peering : boost::statechart::state< Peering, Primary, GetInfo >, NamedState { std::unique_ptr< PriorSet > prior_set; + bool history_les_bound; //< need osd_find_best_info_ignore_history_les explicit Peering(my_context ctx); void exit();