From 8dec2b27356d49daaaab6c90bf32f49c1c0b66be Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 13 Dec 2013 13:35:25 -0800 Subject: [PATCH] librados, osd: add flags to COPY_FROM If we initiate a COPY_FROM as part of a FLUSH operation, we will need to set a flag so that the read-side of the copy and join the existing in-progress operation without taknig additional locks. Similarly, we need to pass flags from the client indicating whether we should ignore overlay or cache logic while performing the copy. These are used by the promote and flush logic. Note that none of these flags are exposed through librados (at least not at this time). Signed-off-by: Sage Weil --- src/include/rados.h | 8 ++++++++ src/include/rados/librados.hpp | 3 ++- src/librados/librados.cc | 3 ++- src/osd/ReplicatedPG.cc | 27 +++++++++++++++++++++------ src/osd/ReplicatedPG.h | 14 +++++++++----- src/osd/osd_types.cc | 1 + src/osdc/Objecter.h | 4 +++- 7 files changed, 46 insertions(+), 14 deletions(-) diff --git a/src/include/rados.h b/src/include/rados.h index 082118ca9e5cd..2b1a8d3002713 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -349,6 +349,7 @@ enum { CEPH_OSD_FLAG_IGNORE_CACHE = 0x8000, /* ignore cache logic */ CEPH_OSD_FLAG_SKIPRWLOCKS = 0x10000, /* skip rw locks */ CEPH_OSD_FLAG_IGNORE_OVERLAY =0x20000, /* ignore pool overlay */ + CEPH_OSD_FLAG_FLUSH = 0x40000, /* this is part of flush */ }; enum { @@ -375,6 +376,12 @@ enum { CEPH_OSD_CMPXATTR_MODE_U64 = 2 }; +enum { + CEPH_OSD_COPY_FROM_FLAG_FLUSH = 1, /* part of a flush operation */ + CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY = 2, /* ignore pool overlay */ + CEPH_OSD_COPY_FROM_FLAG_IGNORE_CACHE = 4, /* ignore osd cache logic */ +}; + /* * an individual object operation. each may be accompanied by some data * payload @@ -426,6 +433,7 @@ struct ceph_osd_op { struct { __le64 snapid; __le64 src_version; + __u8 flags; } __attribute__ ((packed)) copy_from; struct { struct ceph_timespec stamp; diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp index 388af64976249..2ce85310a0254 100644 --- a/src/include/rados/librados.hpp +++ b/src/include/rados/librados.hpp @@ -300,7 +300,8 @@ namespace librados * @param src_ioctx ioctx for the source object * @param version current version of the source object */ - void copy_from(const std::string& src, const IoCtx& src_ioctx, uint64_t src_version); + void copy_from(const std::string& src, const IoCtx& src_ioctx, + uint64_t src_version); /** * undirty an object diff --git a/src/librados/librados.cc b/src/librados/librados.cc index c0bdebf8527ec..dd199abd0ec8e 100644 --- a/src/librados/librados.cc +++ b/src/librados/librados.cc @@ -393,7 +393,8 @@ void librados::ObjectWriteOperation::copy_from(const std::string& src, uint64_t src_version) { ::ObjectOperation *o = (::ObjectOperation *)impl; - o->copy_from(object_t(src), src_ioctx.io_ctx_impl->snap_seq, src_ioctx.io_ctx_impl->oloc, src_version); + o->copy_from(object_t(src), src_ioctx.io_ctx_impl->snap_seq, + src_ioctx.io_ctx_impl->oloc, src_version, 0); } void librados::ObjectWriteOperation::undirty() diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 39046d082861e..d79aaf5e6674d 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1325,7 +1325,10 @@ dout(10) << __func__ << " " << obc->obs.oi.soid << dendl; PromoteCallback *cb = new PromoteCallback(op, obc, temp_target, this); object_locator_t oloc(m->get_object_locator()); oloc.pool = pool.info.tier_of; - start_copy(cb, obc, obc->obs.oi.soid, oloc, 0, temp_target); + start_copy(cb, obc, obc->obs.oi.soid, oloc, 0, + CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY | + CEPH_OSD_COPY_FROM_FLAG_IGNORE_CACHE, + temp_target); assert(obc->is_blocked()); wait_for_blocked_object(obc->obs.oi.soid, op); @@ -3791,7 +3794,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) CopyFromCallback *cb = new CopyFromCallback(ctx, temp_target); ctx->copy_cb = cb; start_copy(cb, ctx->obc, src, src_oloc, src_version, - temp_target); + op.copy_from.flags, + temp_target); result = -EINPROGRESS; } else { // finish @@ -4507,12 +4511,14 @@ int ReplicatedPG::fill_in_copy_get(bufferlist::iterator& bp, OSDOp& osd_op, } void ReplicatedPG::start_copy(CopyCallback *cb, ObjectContextRef obc, - hobject_t src, object_locator_t oloc, version_t version, - const hobject_t& temp_dest_oid) + hobject_t src, object_locator_t oloc, + version_t version, unsigned flags, + const hobject_t& temp_dest_oid) { const hobject_t& dest = obc->obs.oi.soid; dout(10) << __func__ << " " << dest << " from " << src << " " << oloc << " v" << version + << " flags " << flags << dendl; // cancel a previous in-progress copy? @@ -4523,7 +4529,7 @@ void ReplicatedPG::start_copy(CopyCallback *cb, ObjectContextRef obc, cancel_copy(cop, false); } - CopyOpRef cop(new CopyOp(cb, obc, src, oloc, version, temp_dest_oid)); + CopyOpRef cop(new CopyOp(cb, obc, src, oloc, version, flags, temp_dest_oid)); copy_ops[dest] = cop; ++obc->copyfrom_readside; @@ -4549,10 +4555,19 @@ void ReplicatedPG::_copy_some(ObjectContextRef obc, CopyOpRef cop) C_Copyfrom *fin = new C_Copyfrom(this, obc->obs.oi.soid, get_last_peering_reset()); + + unsigned flags = 0; + if (cop->flags & CEPH_OSD_COPY_FROM_FLAG_FLUSH) + flags |= CEPH_OSD_FLAG_FLUSH; + if (cop->flags & CEPH_OSD_COPY_FROM_FLAG_IGNORE_CACHE) + flags |= CEPH_OSD_FLAG_IGNORE_CACHE; + if (cop->flags & CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY) + flags |= CEPH_OSD_FLAG_IGNORE_OVERLAY; + osd->objecter_lock.Lock(); tid_t tid = osd->objecter->read(cop->src.oid, cop->oloc, op, cop->src.snap, NULL, - CEPH_OSD_FLAG_IGNORE_OVERLAY, + flags, new C_OnFinisher(fin, &osd->objecter_finisher), // discover the object version if we don't know it yet diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index dff369edf8ddb..f751e05b3f3e4 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -121,6 +121,7 @@ public: ObjectContextRef obc; hobject_t src; object_locator_t oloc; + unsigned flags; CopyResults *results; @@ -136,9 +137,12 @@ public: hobject_t temp_oid; object_copy_cursor_t temp_cursor; - CopyOp(CopyCallback *cb_, ObjectContextRef _obc, hobject_t s, object_locator_t l, - version_t v, const hobject_t& dest) - : cb(cb_), obc(_obc), src(s), oloc(l), + CopyOp(CopyCallback *cb_, ObjectContextRef _obc, hobject_t s, + object_locator_t l, + version_t v, + unsigned f, + const hobject_t& dest) + : cb(cb_), obc(_obc), src(s), oloc(l), flags(f), results(NULL), objecter_tid(0), rval(-1), @@ -983,8 +987,8 @@ protected: * @param temp_dest_oid: the temporary object to use for large objects */ void start_copy(CopyCallback *cb, ObjectContextRef obc, hobject_t src, - object_locator_t oloc, version_t version, - const hobject_t& temp_dest_oid); + object_locator_t oloc, version_t version, unsigned flags, + const hobject_t& temp_dest_oid); void process_copy_chunk(hobject_t oid, tid_t tid, int r); void _write_copy_chunk(CopyOpRef cop, ObjectStore::Transaction *t); void _copy_some(ObjectContextRef obc, CopyOpRef cop); diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 6b1228253827d..def1e3f7055aa 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -38,6 +38,7 @@ const char *ceph_osd_flag_name(unsigned flag) case CEPH_OSD_FLAG_IGNORE_CACHE: return "ignore_cache"; case CEPH_OSD_FLAG_SKIPRWLOCKS: return "skiprwlocks"; case CEPH_OSD_FLAG_IGNORE_OVERLAY: return "ignore_overlay"; + case CEPH_OSD_FLAG_FLUSH: return "flush"; default: return "???"; } } diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 704e4127a4bc6..301811e779369 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -860,10 +860,12 @@ struct ObjectOperation { osd_op.op.snap.snapid = snapid; } - void copy_from(object_t src, snapid_t snapid, object_locator_t src_oloc, version_t src_version) { + void copy_from(object_t src, snapid_t snapid, object_locator_t src_oloc, + version_t src_version, unsigned flags) { OSDOp& osd_op = add_op(CEPH_OSD_OP_COPY_FROM); osd_op.op.copy_from.snapid = snapid; osd_op.op.copy_from.src_version = src_version; + osd_op.op.copy_from.flags = flags; ::encode(src, osd_op.indata); ::encode(src_oloc, osd_op.indata); } -- 2.39.5