]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osdc/Objecter: _calc_target on all ops so that we notice splits
authorSage Weil <sage@redhat.com>
Mon, 20 Feb 2017 19:26:42 +0000 (14:26 -0500)
committerSage Weil <sage@redhat.com>
Thu, 23 Feb 2017 15:29:42 +0000 (10:29 -0500)
We need to make sure we update the mapping and get an accurate actual_pgid
value by recalcuating the mapping on every map change.  Otherwise, we may
not notice a split (and subsequent actual_pgid change) and resend the same
op with a stale spg_t.  To fix this,

- _calc_target on need_resend
- update target regardless of current con

Signed-off-by: Sage Weil <sage@redhat.com>
src/osdc/Objecter.cc

index aac9ce6e1cc1b913c6b6c33ff4a63e9f1d67f6ba..69a141329a3d334fad0a2ef6e0a27e9eab32f9d0 100644 (file)
@@ -1208,11 +1208,11 @@ void Objecter::handle_osd_map(MOSDMap *m)
 
        cluster_full = cluster_full || _osdmap_full_flag();
        update_pool_full_map(pool_full_map);
+
+       // check all outstanding requests on every epoch
        _scan_requests(homeless_session, skipped_map, cluster_full,
                       &pool_full_map, need_resend,
                       need_resend_linger, need_resend_command, sul);
-
-       // osd addr changes?
        for (map<int,OSDSession*>::iterator p = osd_sessions.begin();
             p != osd_sessions.end(); ) {
          OSDSession *s = p->second;
@@ -1220,6 +1220,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
                         &pool_full_map, need_resend,
                         need_resend_linger, need_resend_command, sul);
          ++p;
+         // osd down or addr change?
          if (!osdmap->is_up(s->osd) ||
              (s->con &&
               s->con->get_peer_addr() != osdmap->get_inst(s->osd).addr)) {
@@ -1255,6 +1256,23 @@ void Objecter::handle_osd_map(MOSDMap *m)
     }
   }
 
+  // make sure need_resend targets reflect latest map
+  for (auto p = need_resend.begin(); p != need_resend.end(); ) {
+    Op *op = p->second;
+    if (op->target.epoch < osdmap->get_epoch()) {
+      ldout(cct, 10) << __func__ << "  checking op " << p->first << dendl;
+      int r = _calc_target(&op->target, nullptr);
+      if (r == RECALC_OP_TARGET_POOL_DNE) {
+       p = need_resend.erase(p);
+       _check_op_pool_dne(op, nullptr);
+      } else {
+       ++p;
+      }
+    } else {
+      ++p;
+    }
+  }
+
   bool pauserd = osdmap->test_flag(CEPH_OSDMAP_PAUSERD);
   bool pausewr = osdmap->test_flag(CEPH_OSDMAP_PAUSEWR) || _osdmap_full_flag()
     || _osdmap_has_pool_full();
@@ -2756,20 +2774,23 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change)
     force_resend = true;
   }
 
-  bool need_resend = false;
-
-  bool paused = target_should_be_paused(t);
-  if (!paused && paused != t->paused) {
+  bool unpaused = false;
+  if (t->paused && !target_should_be_paused(t)) {
     t->paused = false;
-    need_resend = true;
+    unpaused = true;
   }
-  if (t->pgid != pgid ||
+
+  bool legacy_change =
+    t->pgid != pgid ||
       is_pg_changed(
        t->acting_primary, t->acting, acting_primary, acting,
-       t->used_replica || any_change) ||
-      (con && con->has_features(CEPH_FEATUREMASK_RESEND_ON_SPLIT) &&
-       prev_pgid.is_split(t->pg_num, pg_num, nullptr)) ||
-      force_resend) {
+       t->used_replica || any_change);
+  bool split = false;
+  if (t->pg_num) {
+    split = prev_pgid.is_split(t->pg_num, pg_num, nullptr);
+  }
+
+  if (legacy_change || split || force_resend) {
     t->pgid = pgid;
     t->acting = acting;
     t->acting_primary = acting_primary;
@@ -2829,9 +2850,11 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change)
       }
       t->osd = osd;
     }
-    need_resend = true;
   }
-  if (need_resend) {
+  if (legacy_change || unpaused || force_resend) {
+    return RECALC_OP_TARGET_NEED_RESEND;
+  }
+  if (split && con && con->has_features(CEPH_FEATUREMASK_RESEND_ON_SPLIT)) {
     return RECALC_OP_TARGET_NEED_RESEND;
   }
   return RECALC_OP_TARGET_NO_ACTION;