From: Ilya Dryomov Date: Thu, 28 May 2020 10:24:20 +0000 (+0200) Subject: Objecter: don't attempt to read from non-primary on EC pools X-Git-Tag: v16.1.0~2148^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=aabdd4eed4c67531680a6f1f00c4a3cfe372b65c;p=ceph.git Objecter: don't attempt to read from non-primary on EC pools With BALANCE_READS or LOCALIZE_READS set, the client will hang if the non-primary OSD is picked because the OSD will most likely drop the op (or start waiting for peering that won't actually happen). Refactor the code so that the replica read conditions don't need to be repeated. Apart from the missing replica pool check, the acting set size was checked only in the LOCALIZE_READS case. Fixes: https://tracker.ceph.com/issues/45793 Signed-off-by: Ilya Dryomov --- diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index c59807ee4e389..6c260d57cecc2 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -2853,20 +2853,19 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change) << " acting " << acting << " primary " << acting_primary << dendl; t->used_replica = false; - if (acting_primary == -1) { - t->osd = -1; - } else { + if ((t->flags & (CEPH_OSD_FLAG_BALANCE_READS | + CEPH_OSD_FLAG_LOCALIZE_READS)) && + !is_write && pi->is_replicated() && acting.size() > 1) { int osd; - bool read = is_read && !is_write; - if (read && (t->flags & CEPH_OSD_FLAG_BALANCE_READS)) { + ceph_assert(is_read && acting[0] == acting_primary); + if (t->flags & CEPH_OSD_FLAG_BALANCE_READS) { int p = rand() % acting.size(); if (p) t->used_replica = true; osd = acting[p]; ldout(cct, 10) << " chose random osd." << osd << " of " << acting << dendl; - } else if (read && (t->flags & CEPH_OSD_FLAG_LOCALIZE_READS) && - acting.size() > 1) { + } else { // look for a local replica. prefer the primary if the // distance is the same. int best = -1; @@ -2889,10 +2888,10 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change) } ceph_assert(best >= 0); osd = acting[best]; - } else { - osd = acting_primary; } t->osd = osd; + } else { + t->osd = acting_primary; } } if (legacy_change || unpaused || force_resend) {