From: Sage Weil Date: Mon, 20 Dec 2010 23:44:40 +0000 (-0800) Subject: objecter: send read to random replica if BALANCE_READS flag is set X-Git-Tag: v0.25~231^2~16^2~7 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a28dbc93bfb2d377e0415f79248df8ab725006d9;p=ceph.git objecter: send read to random replica if BALANCE_READS flag is set The flag is just an existing one; we may want to rename. This assumes the OSD will discard any request sent prior to any change in the PG acting set. We need to make sure the OSD behavior matches. Signed-off-by: Sage Weil --- diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 05bc18c63972..c3a792186402 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -532,7 +532,7 @@ tid_t Objecter::op_submit(Op *op, OSDSession *s) return op->tid; } -bool Objecter::is_pg_changed(vector& o, vector& n) +bool Objecter::is_pg_changed(vector& o, vector& n, bool any_change) { if (o.empty() && n.empty()) return false; // both still empty @@ -540,6 +540,8 @@ bool Objecter::is_pg_changed(vector& o, vector& n) return true; // was empty, now not, or vice versa if (o[0] != n[0]) return true; // primary changed + if (any_change && o != n) + return true; return false; // same primary (tho replicas may have changed) } @@ -551,12 +553,27 @@ bool Objecter::recalc_op_target(Op *op) pgid = osdmap->object_locator_to_pg(op->oid, op->oloc); osdmap->pg_to_acting_osds(pgid, acting); - if (op->pgid != pgid || is_pg_changed(op->acting, acting)) { + if (op->pgid != pgid || is_pg_changed(op->acting, acting, op->used_replica)) { op->pgid = pgid; op->acting = acting; - dout(10) << "recalc_op_target tid " << op->tid << " pgid " << pgid << " acting " << acting << dendl; + dout(10) << "recalc_op_target tid " << op->tid + << " pgid " << pgid << " acting " << acting << dendl; - OSDSession *s = op->acting.size() ? get_session(op->acting[0]) : NULL; + OSDSession *s = NULL; + op->used_replica = false; + if (acting.size()) { + int osd; + bool read = (op->flags & CEPH_OSD_FLAG_READ) && (op->flags & CEPH_OSD_FLAG_WRITE) == 0; + if (read && (op->flags & CEPH_OSD_FLAG_BALANCE_READS)) { + int p = rand() % acting.size(); + if (p) + op->used_replica = true; + osd = acting[p]; + dout(10) << " chose random osd" << osd << " of " << acting << dendl; + } else + osd = acting[0]; + s = get_session(osd); + } if (op->session != s) { if (!op->session) diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 6cfff72918a0..37a23fb8fdb6 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -275,6 +275,7 @@ public: pg_t pgid; vector acting; + bool used_replica; Connection *con; // for rx buffer only @@ -303,7 +304,7 @@ public: int f, Context *ac, Context *co, eversion_t *ov) : session(NULL), session_item(this), incarnation(0), oid(o), oloc(ol), - con(NULL), + used_replica(false), con(NULL), snapid(CEPH_NOSNAP), outbl(0), flags(f), priority(0), onack(ac), oncommit(co), tid(0), attempts(0), paused(false), objver(ov) { @@ -501,7 +502,7 @@ public: map > > waiting_for_map; void send_op(Op *op); - bool is_pg_changed(vector& a, vector& b); + bool is_pg_changed(vector& a, vector& b, bool any_change=false); bool recalc_op_target(Op *op); bool recalc_linger_op_target(LingerOp *op);