From: Sage Weil Date: Tue, 8 Apr 2014 16:00:11 +0000 (-0700) Subject: osd: drop previous interval ops even if primary happens to be the same X-Git-Tag: v0.80-rc1~71^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5d6116199eb4010ca40df8cc4f1ab3ead0ac761a;p=ceph.git osd: drop previous interval ops even if primary happens to be the same If we have two consecutive intervals with the same primary, the client will not resend the op and the same_primary_since epoch will not change, and all is well. If, however, we have 3 intervals, and the primary changes away and then back to a particular OSD, the OSD will currently still process the old request (assuming the timing works out) because it is currently the primary. This is unnecessary because the client will resend the request. It may even introduce a hard-to-hit ordering problem since whether or not the OSD processes the message becomes dependent on how many subsequent maps it has consumed when the request is processed. Instead, simplify the minor tangle of helpers by making a single simple check that discards requests from before same_primary_since. We can then avoid using the same_for_*() helpers and drop the check from handle_misdireted_op(), which is also nice because the name is now accurate (it *only* deals with ops that are in fact misdirected, not just slow to arrive). Signed-off-by: Sage Weil --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index baa98481651a..f08a43b6e5dc 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -7273,10 +7273,7 @@ void OSDService::handle_misdirected_op(PG *pg, OpRequestRef op) MOSDOp *m = static_cast(op->get_req()); assert(m->get_header().type == CEPH_MSG_OSD_OP); - if (m->get_map_epoch() < pg->info.history.same_primary_since) { - dout(7) << *pg << " changed after " << m->get_map_epoch() << ", dropping" << dendl; - return; - } + assert(m->get_map_epoch() >= pg->info.history.same_primary_since); if (pg->is_ec_pg()) { /** diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 4102b5156c84..73c0f1d83b56 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -4911,20 +4911,25 @@ bool PG::can_discard_op(OpRequestRef op) dout(20) << " discard " << *m << dendl; return true; } + + if (m->get_map_epoch() < info.history.same_primary_since) { + dout(7) << " changed after " << m->get_map_epoch() + << ", dropping " << *m << dendl; + return true; + } + if ((m->get_flags() & (CEPH_OSD_FLAG_BALANCE_READS | CEPH_OSD_FLAG_LOCALIZE_READS)) && op->may_read() && !(op->may_write() || op->may_cache())) { // balanced reads; any replica will do - if (!((is_primary() || is_replica()) && - same_for_read_since(m->get_map_epoch()))) { + if (!(is_primary() || is_replica())) { osd->handle_misdirected_op(this, op); return true; } } else { // normal case; must be primary - if (!(is_primary() && - same_for_modify_since(m->get_map_epoch()))) { + if (!is_primary()) { osd->handle_misdirected_op(this, op); return true; }