]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
Objecter: don't attempt to read from non-primary on EC pools 35444/head
authorIlya Dryomov <idryomov@gmail.com>
Thu, 28 May 2020 10:24:20 +0000 (12:24 +0200)
committerNathan Cutler <ncutler@suse.com>
Sat, 6 Jun 2020 08:31:02 +0000 (10:31 +0200)
With BALANCE_READS or LOCALIZE_READS set, the client will hang if
the non-primary OSD is picked because the OSD will most likely drop
the op (or start waiting for peering that won't actually happen).

Refactor the code so that the replica read conditions don't need to
be repeated.  Apart from the missing replica pool check, the acting
set size was checked only in the LOCALIZE_READS case.

Fixes: https://tracker.ceph.com/issues/45793
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit aabdd4eed4c67531680a6f1f00c4a3cfe372b65c)

src/osdc/Objecter.cc

index 9a63f64f13be69423577eb8a9daf1a066e7652a5..f16e4855093f2cc7aaae6e19f5c4e776b615ed62 100644 (file)
@@ -2927,20 +2927,19 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change)
                   << " acting " << acting
                   << " primary " << acting_primary << dendl;
     t->used_replica = false;
-    if (acting_primary == -1) {
-      t->osd = -1;
-    } else {
+    if ((t->flags & (CEPH_OSD_FLAG_BALANCE_READS |
+                     CEPH_OSD_FLAG_LOCALIZE_READS)) &&
+        !is_write && pi->is_replicated() && acting.size() > 1) {
       int osd;
-      bool read = is_read && !is_write;
-      if (read && (t->flags & CEPH_OSD_FLAG_BALANCE_READS)) {
+      ceph_assert(is_read && acting[0] == acting_primary);
+      if (t->flags & CEPH_OSD_FLAG_BALANCE_READS) {
        int p = rand() % acting.size();
        if (p)
          t->used_replica = true;
        osd = acting[p];
        ldout(cct, 10) << " chose random osd." << osd << " of " << acting
                       << dendl;
-      } else if (read && (t->flags & CEPH_OSD_FLAG_LOCALIZE_READS) &&
-                acting.size() > 1) {
+      } else {
        // look for a local replica.  prefer the primary if the
        // distance is the same.
        int best = -1;
@@ -2963,10 +2962,10 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change)
        }
        ceph_assert(best >= 0);
        osd = acting[best];
-      } else {
-       osd = acting_primary;
       }
       t->osd = osd;
+    } else {
+      t->osd = acting_primary;
     }
   }
   if (legacy_change || unpaused || force_resend) {