From 6f27f6089238d2a20e7a0f13066eddfc31192dc8 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Fri, 20 Jan 2017 16:17:49 -0800 Subject: [PATCH] rgw: assume obj write is a first write if fails and object already exists then retry. This improves first obj write performance on the expense of overwrites. Fixes: http://tracker.ceph.com/issues/18622 Signed-off-by: Yehuda Sadeh --- src/rgw/rgw_rados.cc | 48 +++++++++++++++++++++++++++++++++++--------- src/rgw/rgw_rados.h | 9 ++++++--- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 7c9f6b881f3..ef1c95b7c1d 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -6288,8 +6288,8 @@ int RGWRados::swift_versioning_restore(RGWObjectCtx& obj_ctx, * exclusive: create object exclusively * Returns: 0 on success, -ERR# otherwise. */ -int RGWRados::Object::Write::write_meta(uint64_t size, uint64_t accounted_size, - map& attrs) +int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_size, + map& attrs, bool assume_noent) { rgw_bucket bucket; rgw_rados_ref ref; @@ -6298,7 +6298,7 @@ int RGWRados::Object::Write::write_meta(uint64_t size, uint64_t accounted_size, ObjectWriteOperation op; RGWObjState *state; - int r = target->get_state(&state, false); + int r = target->get_state(&state, false, assume_noent); if (r < 0) return r; @@ -6421,6 +6421,10 @@ int RGWRados::Object::Write::write_meta(uint64_t size, uint64_t accounted_size, if (r < 0) { /* we can expect to get -ECANCELED if object was replaced under, or -ENOENT if was removed, or -EEXIST if it did not exist before and now it does */ + if (r == -EEXIST && assume_noent) { + target->invalidate_state(); + return r; + } goto done_cancel; } @@ -6486,7 +6490,10 @@ done_cancel: * should treat it as a success */ if (meta.if_match == NULL && meta.if_nomatch == NULL) { - if (r == -ECANCELED || r == -ENOENT || r == -EEXIST) { + if (r == -ECANCELED || r == -ENOENT || + (r == -EEXIST && !assume_noent)) /* if assume_noent, we want to send back error so that + * we'd be called again with assume_noent == false + */ { r = 0; } } else { @@ -6516,6 +6523,23 @@ done_cancel: return r; } +int RGWRados::Object::Write::write_meta(uint64_t size, uint64_t accounted_size, + map& attrs) +{ + bool assume_noent = (meta.if_match == NULL && meta.if_nomatch == NULL); + int r; + if (assume_noent) { + r = _do_write_meta(size, accounted_size, attrs, assume_noent); + if (r == -EEXIST) { + assume_noent = false; + } + } + if (!assume_noent) { + r = _do_write_meta(size, accounted_size, attrs, assume_noent); + } + return r; +} + /** Write/overwrite a system object. */ int RGWRados::put_system_obj_impl(rgw_obj& obj, uint64_t size, real_time *mtime, map& attrs, int flags, @@ -8503,7 +8527,7 @@ int RGWRados::get_system_obj_state(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState return ret; } -int RGWRados::get_obj_state_impl(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, bool follow_olh) +int RGWRados::get_obj_state_impl(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, bool follow_olh, bool assume_noent) { bool need_follow_olh = follow_olh && !obj.have_instance(); @@ -8519,7 +8543,11 @@ int RGWRados::get_obj_state_impl(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState * s->obj = obj; - int r = RGWRados::raw_obj_stat(obj, &s->size, &s->mtime, &s->epoch, &s->attrset, (s->prefetch_data ? &s->data : NULL), NULL); + int r = -ENOENT; + + if (!assume_noent) { + r = RGWRados::raw_obj_stat(obj, &s->size, &s->mtime, &s->epoch, &s->attrset, (s->prefetch_data ? &s->data : NULL), NULL); + } if (r == -ENOENT) { s->exists = false; s->has_attrs = true; @@ -8645,12 +8673,12 @@ int RGWRados::get_obj_state_impl(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState * return 0; } -int RGWRados::get_obj_state(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, bool follow_olh) +int RGWRados::get_obj_state(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, bool follow_olh, bool assume_noent) { int ret; do { - ret = get_obj_state_impl(rctx, obj, state, follow_olh); + ret = get_obj_state_impl(rctx, obj, state, follow_olh, assume_noent); } while (ret == -EAGAIN); return ret; @@ -8806,9 +8834,9 @@ int RGWRados::append_atomic_test(RGWObjectCtx *rctx, rgw_obj& obj, return 0; } -int RGWRados::Object::get_state(RGWObjState **pstate, bool follow_olh) +int RGWRados::Object::get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent) { - return store->get_obj_state(&ctx, obj, pstate, follow_olh); + return store->get_obj_state(&ctx, obj, pstate, follow_olh, assume_noent); } void RGWRados::Object::invalidate_state() diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 8cea3ad63e0..81929390751 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -1966,7 +1966,7 @@ class RGWRados int get_olh_target_state(RGWObjectCtx& rctx, rgw_obj& obj, RGWObjState *olh_state, RGWObjState **target_state); int get_system_obj_state_impl(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, RGWObjVersionTracker *objv_tracker); - int get_obj_state_impl(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, bool follow_olh); + int get_obj_state_impl(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, bool follow_olh, bool assume_noent = false); int append_atomic_test(RGWObjectCtx *rctx, rgw_obj& obj, librados::ObjectOperation& op, RGWObjState **state); @@ -2354,7 +2354,7 @@ public: bool bs_initialized; protected: - int get_state(RGWObjState **pstate, bool follow_olh); + int get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent = false); void invalidate_state(); int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag, @@ -2459,6 +2459,9 @@ public: explicit Write(RGWRados::Object *_target) : target(_target) {} + int _do_write_meta(uint64_t size, uint64_t accounted_size, + map& attrs, + bool assume_noent); int write_meta(uint64_t size, uint64_t accounted_size, map& attrs); int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive); @@ -2859,7 +2862,7 @@ public: map* rmattrs); int get_system_obj_state(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, RGWObjVersionTracker *objv_tracker); - int get_obj_state(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, bool follow_olh); + int get_obj_state(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, bool follow_olh, bool assume_noent = false); int get_obj_state(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state) { return get_obj_state(rctx, obj, state, true); } -- 2.39.5