From 0b81ff68c04b8e9c1c66720d4160311998528ca6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 22 Oct 2013 19:56:55 -0700 Subject: [PATCH] osd/ReplicatedPG: skip promote for DELETE If an op starts with DELETE there is no need to promote the old content from the base tier. Note that this only works if the FAILOK flag is set. Otherwise, we need to know whether the object existed or not to return either 0 or -ENOENT. Signed-off-by: Sage Weil --- src/osd/ReplicatedPG.cc | 16 ++++++++++++++++ src/osd/ReplicatedPG.h | 5 +++++ 2 files changed, 21 insertions(+) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 6e338c2bf7395..64d9c1dcef136 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1231,6 +1231,8 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op, ObjectContextRef obc, case pg_pool_t::CACHEMODE_WRITEBACK: if (obc.get() && obc->obs.exists) { // we have the object already return false; + } else if (can_skip_promote(op, obc)) { + return false; } else { // try and promote! promote_object(op, obc); return true; @@ -1259,6 +1261,20 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op, ObjectContextRef obc, return false; } +bool ReplicatedPG::can_skip_promote(OpRequestRef op, ObjectContextRef obc) +{ + MOSDOp *m = static_cast(op->get_req()); + if (m->ops.empty()) + return false; + // if we get a delete with FAILOK we can skip promote. without + // FAILOK we still need to promote (or do something smarter) to + // determine whether to return ENOENT or 0. + if (m->ops[0].op.op == CEPH_OSD_OP_DELETE && + (m->ops[0].op.flags & CEPH_OSD_OP_FLAG_FAILOK)) + return true; + return false; +} + void ReplicatedPG::do_cache_redirect(OpRequestRef op, ObjectContextRef obc) { MOSDOp *m = static_cast(op->get_req()); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index ef1a385ce469a..06797d0c6cae7 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -818,6 +818,11 @@ protected: */ void promote_object(OpRequestRef op, ObjectContextRef obc); + /** + * Check if the op is such that we can skip promote (e.g., DELETE) + */ + bool can_skip_promote(OpRequestRef op, ObjectContextRef obc); + int prepare_transaction(OpContext *ctx); // pg on-disk content -- 2.39.5