From 3c9d67b9480cd5bf0839f6e728476c8afd5d13e6 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Wed, 25 Sep 2024 18:38:08 -0400 Subject: [PATCH] rgw/rados: rgw_rados_operate() takes version_t* instead of calling ioctx.get_last_version() after a rados operation, callers now pass version_t* as an output parameter. in the null_yield case, that version is assigned to ioctx.get_last_version() as normal. in the async case, we get the version out of librados::async_operate()'s return value Fixes: https://tracker.ceph.com/issues/63935 Signed-off-by: Casey Bodley (cherry picked from commit 7769bdb9438d48afc8500c16167f82c8f549a914) Conflicts: src/rgw/driver/rados/rgw_rados.cc src/rgw/driver/rados/rgw_tools.cc src/rgw/driver/rados/rgw_tools.h src/rgw/services/svc_sys_obj_core.cc RGWSI_RADOS removal not backported boost::asio::spawn() not backported maybe_warn_about_blocking() not backported --- src/rgw/driver/rados/rgw_rados.cc | 45 +++++++++++++--------------- src/rgw/driver/rados/rgw_rados.h | 2 +- src/rgw/driver/rados/rgw_tools.cc | 31 ++++++++++++++----- src/rgw/driver/rados/rgw_tools.h | 4 +-- src/rgw/rgw_aio.cc | 4 +-- src/rgw/services/svc_sys_obj_core.cc | 7 +++-- 6 files changed, 53 insertions(+), 40 deletions(-) diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index 9260a1edc06..410d6c0cf05 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -3192,7 +3192,7 @@ int RGWRados::Object::Write::_do_write_meta(const DoutPrefixProvider *dpp, auto& ioctx = ref.pool.ioctx(); tracepoint(rgw_rados, operate_enter, req_id.c_str()); - r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, null_yield); + r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y, 0, &epoch); tracepoint(rgw_rados, operate_exit, req_id.c_str()); if (r < 0) { /* we can expect to get -ECANCELED if object was replaced under, or -ENOENT if was removed, or -EEXIST if it did not exist @@ -3204,7 +3204,6 @@ int RGWRados::Object::Write::_do_write_meta(const DoutPrefixProvider *dpp, goto done_cancel; } - epoch = ioctx.get_last_version(); poolid = ioctx.get_id(); r = target->complete_atomic_modification(dpp); @@ -5520,7 +5519,8 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi store->remove_rgw_head_obj(op); auto& ioctx = ref.pool.ioctx(); - r = rgw_rados_operate(dpp, ioctx, ref.obj.oid, &op, y); + version_t epoch = 0; + r = rgw_rados_operate(dpp, ioctx, ref.obj.oid, &op, y, 0, &epoch); /* raced with another operation, object state is indeterminate */ const bool need_invalidate = (r == -ECANCELED); @@ -5532,7 +5532,7 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi tombstone_entry entry{*state}; obj_tombstone_cache->add(obj, entry); } - r = index_op.complete_del(dpp, poolid, ioctx.get_last_version(), state->mtime, params.remove_objs, y, log_op); + r = index_op.complete_del(dpp, poolid, epoch, state->mtime, params.remove_objs, y, log_op); int ret = target->complete_atomic_modification(dpp); if (ret < 0) { @@ -6234,7 +6234,8 @@ int RGWRados::set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* rctx, RGWBu struct timespec mtime_ts = real_clock::to_timespec(mtime); op.mtime2(&mtime_ts); auto& ioctx = ref.pool.ioctx(); - r = rgw_rados_operate(dpp, ioctx, ref.obj.oid, &op, null_yield); + version_t epoch = 0; + r = rgw_rados_operate(dpp, ioctx, ref.obj.oid, &op, y, 0, &epoch); if (state) { if (r >= 0) { bufferlist acl_bl; @@ -6253,7 +6254,6 @@ int RGWRados::set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* rctx, RGWBu if (iter = attrs.find(RGW_ATTR_STORAGE_CLASS); iter != attrs.end()) { storage_class = rgw_bl_str(iter->second); } - uint64_t epoch = ioctx.get_last_version(); int64_t poolid = ioctx.get_id(); r = index_op.complete(dpp, poolid, epoch, state->size, state->accounted_size, mtime, etag, content_type, storage_class, &acl_bl, @@ -8180,12 +8180,7 @@ int RGWRados::raw_obj_stat(const DoutPrefixProvider *dpp, op.read(0, cct->_conf->rgw_max_chunk_size, first_chunk, NULL); } bufferlist outbl; - r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, &outbl, y); - - if (epoch) { - *epoch = ref.pool.ioctx().get_last_version(); - } - + r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, &outbl, y, 0, epoch); if (r < 0) return r; @@ -9073,6 +9068,12 @@ int RGWRados::cls_bucket_list_ordered(const DoutPrefixProvider *dpp, num_entries << " total entries" << dendl; auto& ioctx = index_pool.ioctx(); + + // XXX: check_disk_state() relies on ioctx.get_last_version() but that + // returns 0 because CLSRGWIssueBucketList doesn't make any synchonous calls + rgw_bucket_entry_ver index_ver; + index_ver.pool = ioctx.get_id(); + std::map shard_list_results; cls_rgw_obj_key start_after_key(start_after.name, start_after.instance); r = CLSRGWIssueBucketList(ioctx, start_after_key, prefix, delimiter, @@ -9196,12 +9197,10 @@ int RGWRados::cls_bucket_list_ordered(const DoutPrefixProvider *dpp, /* there are uncommitted ops. We need to check the current * state, and if the tags are old we need to do clean-up as * well. */ - librados::IoCtx sub_ctx; - sub_ctx.dup(ioctx); ldout_bitx(bitx, dpp, 20) << "INFO: " << __func__ << " calling check_disk_state bucket=" << bucket_info.bucket << " entry=" << dirent.key << dendl_bitx; - r = check_disk_state(dpp, sub_ctx, bucket_info, dirent, dirent, + r = check_disk_state(dpp, bucket_info, index_ver, dirent, dirent, updates[tracker.oid_name], y); if (r < 0 && r != -ENOENT) { ldpp_dout(dpp, 0) << __func__ << @@ -9420,6 +9419,9 @@ int RGWRados::cls_bucket_list_unordered(const DoutPrefixProvider *dpp, } } + rgw_bucket_entry_ver index_ver; + index_ver.pool = ioctx.get_id(); + uint32_t count = 0u; std::map updates; rgw_obj_index_key last_added_entry; @@ -9434,7 +9436,7 @@ int RGWRados::cls_bucket_list_unordered(const DoutPrefixProvider *dpp, cls_rgw_bucket_list_op(op, marker, prefix, empty_delimiter, num_entries, list_versions, &result); - r = rgw_rados_operate(dpp, ioctx, oid, &op, nullptr, null_yield); + r = rgw_rados_operate(dpp, ioctx, oid, &op, nullptr, y, 0, &index_ver.epoch); if (r < 0) { ldpp_dout(dpp, 0) << "ERROR: " << __func__ << ": error in rgw_rados_operate (bucket list op), r=" << r << dendl; @@ -9451,12 +9453,10 @@ int RGWRados::cls_bucket_list_unordered(const DoutPrefixProvider *dpp, force_check) { /* there are uncommitted ops. We need to check the current state, * and if the tags are old we need to do cleanup as well. */ - librados::IoCtx sub_ctx; - sub_ctx.dup(ioctx); ldout_bitx(bitx, dpp, 20) << "INFO: " << __func__ << ": calling check_disk_state bucket=" << bucket_info.bucket << " entry=" << dirent.key << dendl_bitx; - r = check_disk_state(dpp, sub_ctx, bucket_info, dirent, dirent, updates[oid], y); + r = check_disk_state(dpp, bucket_info, index_ver, dirent, dirent, updates[oid], y); if (r < 0 && r != -ENOENT) { ldpp_dout(dpp, 0) << "ERROR: " << __func__ << ": error in check_disk_state, r=" << r << dendl; @@ -9687,8 +9687,8 @@ int RGWRados::remove_objs_from_index(const DoutPrefixProvider *dpp, } int RGWRados::check_disk_state(const DoutPrefixProvider *dpp, - librados::IoCtx io_ctx, RGWBucketInfo& bucket_info, + const rgw_bucket_entry_ver& index_ver, rgw_bucket_dir_entry& list_state, rgw_bucket_dir_entry& object, bufferlist& suggested_updates, @@ -9717,8 +9717,6 @@ int RGWRados::check_disk_state(const DoutPrefixProvider *dpp, ldpp_dout(dpp, 0) << "WARNING: generated locator (" << loc << ") is different from listed locator (" << list_state.locator << ")" << dendl; } - io_ctx.locator_set_key(list_state.locator); - RGWObjState *astate = NULL; RGWObjManifest *manifest = nullptr; RGWObjectCtx rctx(this->driver); @@ -9739,8 +9737,7 @@ int RGWRados::check_disk_state(const DoutPrefixProvider *dpp, } // encode a suggested removal of that key - list_state.ver.epoch = io_ctx.get_last_version(); - list_state.ver.pool = io_ctx.get_id(); + list_state.ver = index_ver; ldout_bitx(bitx, dpp, 10) << "INFO: " << __func__ << ": encoding remove of " << list_state.key << " on suggested_updates" << dendl_bitx; cls_rgw_encode_suggestion(CEPH_RGW_REMOVE | suggest_flag, list_state, suggested_updates); return -ENOENT; diff --git a/src/rgw/driver/rados/rgw_rados.h b/src/rgw/driver/rados/rgw_rados.h index d77dd5c5f4d..c314704cfe6 100644 --- a/src/rgw/driver/rados/rgw_rados.h +++ b/src/rgw/driver/rados/rgw_rados.h @@ -1577,8 +1577,8 @@ public: * will encode that info as a suggested update.) */ int check_disk_state(const DoutPrefixProvider *dpp, - librados::IoCtx io_ctx, RGWBucketInfo& bucket_info, + const rgw_bucket_entry_ver& index_ver, rgw_bucket_dir_entry& list_state, rgw_bucket_dir_entry& object, bufferlist& suggested_updates, diff --git a/src/rgw/driver/rados/rgw_tools.cc b/src/rgw/driver/rados/rgw_tools.cc index bc58c71ea56..c5c564ceed9 100644 --- a/src/rgw/driver/rados/rgw_tools.cc +++ b/src/rgw/driver/rados/rgw_tools.cc @@ -178,7 +178,7 @@ int rgw_delete_system_obj(const DoutPrefixProvider *dpp, int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, librados::ObjectReadOperation *op, bufferlist* pbl, - optional_yield y, int flags) + optional_yield y, int flags, version_t* pver) { // given a yield_context, call async_operate() to yield the coroutine instead // of blocking @@ -186,11 +186,14 @@ int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, con auto& context = y.get_io_context(); auto& yield = y.get_yield_context(); boost::system::error_code ec; - auto bl = librados::async_operate( + auto [ver, bl] = librados::async_operate( context, ioctx, oid, op, flags, yield[ec]); if (pbl) { *pbl = std::move(bl); } + if (pver) { + *pver = ver; + } return -ec.value(); } // work on asio threads should be asynchronous, so warn when they block @@ -200,18 +203,26 @@ int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, con ldpp_dout(dpp, 20) << "BACKTRACE: " << __func__ << ": " << ClibBackTrace(0) << dendl; #endif } - return ioctx.operate(oid, op, nullptr, flags); + int r = ioctx.operate(oid, op, nullptr, flags); + if (pver) { + *pver = ioctx.get_last_version(); + } + return r; } int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, librados::ObjectWriteOperation *op, optional_yield y, - int flags) + int flags, version_t* pver) { if (y) { auto& context = y.get_io_context(); auto& yield = y.get_yield_context(); boost::system::error_code ec; - librados::async_operate(context, ioctx, oid, op, flags, yield[ec]); + version_t ver = librados::async_operate(context, ioctx, oid, op, flags, + yield[ec]); + if (pver) { + *pver = ver; + } return -ec.value(); } if (is_asio_thread) { @@ -220,7 +231,11 @@ int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, con ldpp_dout(dpp, 20) << "BACKTRACE: " << __func__ << ": " << ClibBackTrace(0) << dendl; #endif } - return ioctx.operate(oid, op, flags); + int r = ioctx.operate(oid, op, flags); + if (pver) { + *pver = ioctx.get_last_version(); + } + return r; } int rgw_rados_notify(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, @@ -231,8 +246,8 @@ int rgw_rados_notify(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, cons auto& context = y.get_io_context(); auto& yield = y.get_yield_context(); boost::system::error_code ec; - auto reply = librados::async_notify(context, ioctx, oid, - bl, timeout_ms, yield[ec]); + auto [ver, reply] = librados::async_notify(context, ioctx, oid, + bl, timeout_ms, yield[ec]); if (pbl) { *pbl = std::move(reply); } diff --git a/src/rgw/driver/rados/rgw_tools.h b/src/rgw/driver/rados/rgw_tools.h index 66600856d16..9eedbab7608 100644 --- a/src/rgw/driver/rados/rgw_tools.h +++ b/src/rgw/driver/rados/rgw_tools.h @@ -96,10 +96,10 @@ extern thread_local bool is_asio_thread; /// perform the rados operation, using the yield context when given int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, librados::ObjectReadOperation *op, bufferlist* pbl, - optional_yield y, int flags = 0); + optional_yield y, int flags = 0, version_t* pver = nullptr); int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, librados::ObjectWriteOperation *op, optional_yield y, - int flags = 0); + int flags = 0, version_t* pver = nullptr); int rgw_rados_notify(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, bufferlist& bl, uint64_t timeout_ms, bufferlist* pbl, optional_yield y); diff --git a/src/rgw/rgw_aio.cc b/src/rgw/rgw_aio.cc index 02e3411858a..29c74998c6d 100644 --- a/src/rgw/rgw_aio.cc +++ b/src/rgw/rgw_aio.cc @@ -73,12 +73,12 @@ struct Handler { librados::IoCtx ctx; AioResult& r; // write callback - void operator()(boost::system::error_code ec) const { + void operator()(boost::system::error_code ec, version_t) const { r.result = -ec.value(); throttle->put(r); } // read callback - void operator()(boost::system::error_code ec, bufferlist bl) const { + void operator()(boost::system::error_code ec, version_t, bufferlist bl) const { r.result = -ec.value(); r.data = std::move(bl); throttle->put(r); diff --git a/src/rgw/services/svc_sys_obj_core.cc b/src/rgw/services/svc_sys_obj_core.cc index 30308969131..eb1169afbe9 100644 --- a/src/rgw/services/svc_sys_obj_core.cc +++ b/src/rgw/services/svc_sys_obj_core.cc @@ -178,15 +178,16 @@ int RGWSI_SysObj_Core::read(const DoutPrefixProvider *dpp, ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; return r; } - r = rados_obj.operate(dpp, &op, nullptr, y); + + auto& ref = rados_obj.get_ref(); + version_t op_ver = 0; + r = rgw_rados_operate(dpp, ref.pool.ioctx(), obj.oid, &op, nullptr, y, 0, &op_ver); if (r < 0) { ldpp_dout(dpp, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl; return r; } ldpp_dout(dpp, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl; - uint64_t op_ver = rados_obj.get_last_version(); - if (read_state.last_ver > 0 && read_state.last_ver != op_ver) { ldpp_dout(dpp, 5) << "raced with an object write, abort" << dendl; -- 2.39.5