From: Sage Weil Date: Tue, 24 Sep 2013 18:22:19 +0000 (-0700) Subject: osd/ReplicatedPG: respect RWORDERED rados flag X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=503f865d6432bead72aac0ffba0539d807f078c4;p=ceph.git osd/ReplicatedPG: respect RWORDERED rados flag If this flag is set, we need to order reads as writes. In particular, this means that reads will wait for degraded object recovery even if there is a local copy. And subsequently will be ordered after a preceding write that is waiting for the same thing. Signed-off-by: Sage Weil (cherry picked from commit 9322305c80e995e1c4a964edff0fc094329d951b) --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 443e98d3d80..8beec85eebe 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -661,13 +661,21 @@ void ReplicatedPG::do_op(OpRequestRef op) return do_pg_op(op); } - dout(10) << "do_op " << *m << (op->may_write() ? " may_write" : "") << dendl; + // order this op as a write? + bool write_ordered = op->may_write() || (m->get_flags() & CEPH_OSD_FLAG_RWORDERED); + + dout(10) << "do_op " << *m + << (op->may_write() ? " may_write" : "") + << (op->may_read() ? " may_read" : "") + << " -> " << (write_ordered ? "write-ordered" : "read-ordered") + << dendl; hobject_t head(m->get_oid(), m->get_object_locator().key, CEPH_NOSNAP, m->get_pg().ps(), info.pgid.pool(), m->get_object_locator().nspace); - if (op->may_write() && scrubber.write_blocked_by_scrub(head)) { + + if (write_ordered && scrubber.write_blocked_by_scrub(head)) { dout(20) << __func__ << ": waiting for scrub" << dendl; waiting_for_active.push_back(op); op->mark_delayed("waiting for scrub"); @@ -681,7 +689,7 @@ void ReplicatedPG::do_op(OpRequestRef op) } // degraded object? - if (op->may_write() && is_degraded_object(head)) { + if (write_ordered && is_degraded_object(head)) { wait_for_degraded_object(head, op); return; } @@ -701,7 +709,7 @@ void ReplicatedPG::do_op(OpRequestRef op) } // degraded object? - if (op->may_write() && is_degraded_object(snapdir)) { + if (write_ordered && is_degraded_object(snapdir)) { wait_for_degraded_object(snapdir, op); return; }