From bbd9f0f0b2277c6ba94bd78f828f90563547c360 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 20 Feb 2017 14:26:42 -0500 Subject: [PATCH] osdc/Objecter: _calc_target on all ops so that we notice splits We need to make sure we update the mapping and get an accurate actual_pgid value by recalcuating the mapping on every map change. Otherwise, we may not notice a split (and subsequent actual_pgid change) and resend the same op with a stale spg_t. To fix this, - _calc_target on need_resend - update target regardless of current con Signed-off-by: Sage Weil --- src/osdc/Objecter.cc | 51 ++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index aac9ce6e1cc..69a141329a3 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -1208,11 +1208,11 @@ void Objecter::handle_osd_map(MOSDMap *m) cluster_full = cluster_full || _osdmap_full_flag(); update_pool_full_map(pool_full_map); + + // check all outstanding requests on every epoch _scan_requests(homeless_session, skipped_map, cluster_full, &pool_full_map, need_resend, need_resend_linger, need_resend_command, sul); - - // osd addr changes? for (map::iterator p = osd_sessions.begin(); p != osd_sessions.end(); ) { OSDSession *s = p->second; @@ -1220,6 +1220,7 @@ void Objecter::handle_osd_map(MOSDMap *m) &pool_full_map, need_resend, need_resend_linger, need_resend_command, sul); ++p; + // osd down or addr change? if (!osdmap->is_up(s->osd) || (s->con && s->con->get_peer_addr() != osdmap->get_inst(s->osd).addr)) { @@ -1255,6 +1256,23 @@ void Objecter::handle_osd_map(MOSDMap *m) } } + // make sure need_resend targets reflect latest map + for (auto p = need_resend.begin(); p != need_resend.end(); ) { + Op *op = p->second; + if (op->target.epoch < osdmap->get_epoch()) { + ldout(cct, 10) << __func__ << " checking op " << p->first << dendl; + int r = _calc_target(&op->target, nullptr); + if (r == RECALC_OP_TARGET_POOL_DNE) { + p = need_resend.erase(p); + _check_op_pool_dne(op, nullptr); + } else { + ++p; + } + } else { + ++p; + } + } + bool pauserd = osdmap->test_flag(CEPH_OSDMAP_PAUSERD); bool pausewr = osdmap->test_flag(CEPH_OSDMAP_PAUSEWR) || _osdmap_full_flag() || _osdmap_has_pool_full(); @@ -2756,20 +2774,23 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change) force_resend = true; } - bool need_resend = false; - - bool paused = target_should_be_paused(t); - if (!paused && paused != t->paused) { + bool unpaused = false; + if (t->paused && !target_should_be_paused(t)) { t->paused = false; - need_resend = true; + unpaused = true; } - if (t->pgid != pgid || + + bool legacy_change = + t->pgid != pgid || is_pg_changed( t->acting_primary, t->acting, acting_primary, acting, - t->used_replica || any_change) || - (con && con->has_features(CEPH_FEATUREMASK_RESEND_ON_SPLIT) && - prev_pgid.is_split(t->pg_num, pg_num, nullptr)) || - force_resend) { + t->used_replica || any_change); + bool split = false; + if (t->pg_num) { + split = prev_pgid.is_split(t->pg_num, pg_num, nullptr); + } + + if (legacy_change || split || force_resend) { t->pgid = pgid; t->acting = acting; t->acting_primary = acting_primary; @@ -2829,9 +2850,11 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change) } t->osd = osd; } - need_resend = true; } - if (need_resend) { + if (legacy_change || unpaused || force_resend) { + return RECALC_OP_TARGET_NEED_RESEND; + } + if (split && con && con->has_features(CEPH_FEATUREMASK_RESEND_ON_SPLIT)) { return RECALC_OP_TARGET_NEED_RESEND; } return RECALC_OP_TARGET_NO_ACTION; -- 2.39.5