From: Alex Ainscow Date: Fri, 7 Nov 2025 10:44:56 +0000 (+0000) Subject: rados: Add API to disable version querying with reads in librados X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1fb6cb24f3f85f2ea916e51abf126aed212ebb5b;p=ceph.git rados: Add API to disable version querying with reads in librados librados will always request a "user version". Until EC direct reads are implemented this is a cheap operation and so librados always requests the user version, even if the client does not need it. With EC direct reads, requesting the user version requires an extra op to the primary in some scenarios. The non-primary OSDs do not contain an up to date user version. NEORADOS already allows for such optimisations, due to a how the API is organised. librados is not heavily used by ceph-maintained clients, but this API will still be useful for testing of EC direct reads, since the test clients will use librados, due to it simpler nature and performance not being critical in the tests. Signed-off-by: Alex Ainscow --- diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp index 96ed711def0..0a6ac40dbc1 100644 --- a/src/include/rados/librados.hpp +++ b/src/include/rados/librados.hpp @@ -1393,6 +1393,8 @@ inline namespace v14_2_0 { int application_metadata_list(const std::string& app_name, std::map *values); + void set_no_version_on_read(bool b); + private: /* You can only get IoCtx instances from Rados */ IoCtx(IoCtxImpl *io_ctx_impl_); diff --git a/src/librados/IoCtxImpl.cc b/src/librados/IoCtxImpl.cc index 410f18820ac..b78fa4e70da 100644 --- a/src/librados/IoCtxImpl.cc +++ b/src/librados/IoCtxImpl.cc @@ -687,6 +687,21 @@ int librados::IoCtxImpl::operate(const object_t& oid, ::ObjectOperation *o, return r; } +version_t *librados::IoCtxImpl::get_objver_for_read(version_t *objver_p) const { + if (!no_version_on_read) { + return objver_p; + } + + if (objver_p) { + *objver_p = std::numeric_limits::max(); + } + return nullptr; +} + +void librados::IoCtxImpl::set_no_version_on_read(bool _val) { + no_version_on_read = _val; +} + int librados::IoCtxImpl::operate_read(const object_t& oid, ::ObjectOperation *o, bufferlist *pbl, @@ -711,7 +726,7 @@ int librados::IoCtxImpl::operate_read(const object_t& oid, *o, snap_seq, pbl, flags | extra_op_flags, flags_mask, - onack, &ver); + onack, get_objver_for_read(&ver)); objecter->op_submit(objecter_op); { @@ -752,7 +767,7 @@ int librados::IoCtxImpl::aio_operate_read(const object_t &oid, Objecter::Op *objecter_op = objecter->prepare_read_op( oid, oloc, *o, snap_seq, pbl, flags | extra_op_flags, -1, - oncomplete, &c->objver, nullptr, 0, &trace); + oncomplete, get_objver_for_read(&c->objver), nullptr, 0, &trace); objecter->op_submit(objecter_op, &c->tid); trace.event("rados operate read submitted"); @@ -821,7 +836,7 @@ int librados::IoCtxImpl::aio_read(const object_t oid, AioCompletionImpl *c, Objecter::Op *o = objecter->prepare_read_op( oid, oloc, off, len, snapid, pbl, extra_op_flags, - oncomplete, &c->objver, nullptr, 0, &trace); + oncomplete, get_objver_for_read(&c->objver), nullptr, 0, &trace); objecter->op_submit(o, &c->tid); return 0; } @@ -854,7 +869,7 @@ int librados::IoCtxImpl::aio_read(const object_t oid, AioCompletionImpl *c, Objecter::Op *o = objecter->prepare_read_op( oid, oloc, off, len, snapid, &c->bl, extra_op_flags, - oncomplete, &c->objver, nullptr, 0, &trace); + oncomplete, get_objver_for_read(&c->objver), nullptr, 0, &trace); objecter->op_submit(o, &c->tid); return 0; } @@ -894,7 +909,7 @@ int librados::IoCtxImpl::aio_sparse_read(const object_t oid, Objecter::Op *o = objecter->prepare_read_op( oid, oloc, onack->m_ops, snapid, NULL, extra_op_flags, -1, - onack, &c->objver); + onack, get_objver_for_read(&c->objver)); objecter->op_submit(o, &c->tid); return 0; } @@ -914,7 +929,7 @@ int librados::IoCtxImpl::aio_cmpext(const object_t& oid, Objecter::Op *o = objecter->prepare_cmpext_op( oid, oloc, off, cmp_bl, snap_seq, extra_op_flags, - onack, &c->objver); + onack, get_objver_for_read(&c->objver)); objecter->op_submit(o, &c->tid); return 0; @@ -942,7 +957,8 @@ int librados::IoCtxImpl::aio_cmpext(const object_t& oid, onack->m_ops.cmpext(off, cmp_len, cmp_buf, NULL); Objecter::Op *o = objecter->prepare_read_op( - oid, oloc, onack->m_ops, snap_seq, NULL, extra_op_flags, -1, onack, &c->objver); + oid, oloc, onack->m_ops, snap_seq, NULL, extra_op_flags, -1, onack, + get_objver_for_read(&c->objver)); objecter->op_submit(o, &c->tid); return 0; } @@ -1113,7 +1129,7 @@ int librados::IoCtxImpl::aio_stat(const object_t& oid, AioCompletionImpl *c, Objecter::Op *o = objecter->prepare_stat_op( oid, oloc, snap_seq, psize, &onack->mtime, extra_op_flags, - onack, &c->objver); + onack, get_objver_for_read(&c->objver)); objecter->op_submit(o, &c->tid); return 0; } @@ -1127,7 +1143,7 @@ int librados::IoCtxImpl::aio_stat2(const object_t& oid, AioCompletionImpl *c, Objecter::Op *o = objecter->prepare_stat_op( oid, oloc, snap_seq, psize, &onack->mtime, extra_op_flags, - onack, &c->objver); + onack, get_objver_for_read(&c->objver)); objecter->op_submit(o, &c->tid); return 0; } diff --git a/src/librados/IoCtxImpl.h b/src/librados/IoCtxImpl.h index 183dab976fc..cc554727f6b 100644 --- a/src/librados/IoCtxImpl.h +++ b/src/librados/IoCtxImpl.h @@ -40,6 +40,7 @@ struct librados::IoCtxImpl { uint64_t assert_ver = 0; version_t last_objver = 0; uint32_t notify_timeout = 30; + bool no_version_on_read = false; object_locator_t oloc; int extra_op_flags = 0; int objclass_flags_mask = -1; @@ -297,6 +298,9 @@ struct librados::IoCtxImpl { int application_metadata_list(const std::string& app_name, std::map *values); + void set_no_version_on_read(bool _val); + private: + version_t *get_objver_for_read(version_t *objver_p) const; }; #endif diff --git a/src/librados/librados_cxx.cc b/src/librados/librados_cxx.cc index 223e8d4606c..483c6a8f753 100644 --- a/src/librados/librados_cxx.cc +++ b/src/librados/librados_cxx.cc @@ -3197,3 +3197,8 @@ int librados::IoCtx::application_metadata_list(const std::string& app_name, { return io_ctx_impl->application_metadata_list(app_name, values); } + +void librados::IoCtx::set_no_version_on_read(bool value) +{ + io_ctx_impl->set_no_version_on_read(value); +} diff --git a/src/test/librados/misc_cxx.cc b/src/test/librados/misc_cxx.cc index 71e36430b51..aa0db24a5dc 100644 --- a/src/test/librados/misc_cxx.cc +++ b/src/test/librados/misc_cxx.cc @@ -924,3 +924,39 @@ TEST_F(LibRadosMiscPP, Conf) { ASSERT_EQ(0, cluster.conf_get(option, actual)); ASSERT_EQ(expected, actual); } + +TEST_F(LibRadosMiscPP, NoVer) { + bufferlist bl; + bl.append("ceph"); + ObjectWriteOperation write, write2; + ObjectReadOperation read, read2; + + write.write_full(bl); + ASSERT_EQ(0, ioctx.operate("foo", &write)); + uint64_t version = ioctx.get_last_version(); + + ioctx.set_no_version_on_read(true); + bl.append("moreceph"); + write2.write_full(bl); + ASSERT_EQ(0, ioctx.operate("foo", &write2)); + + // Write versioning should still work. + ASSERT_EQ(++version, ioctx.get_last_version()); + + // Asserting the version should still work. + read.assert_version(version); + read.read(0, bl.length(), NULL, NULL); + ASSERT_EQ(0, ioctx.operate("foo", &read, &bl)); + + // However, version read should be invalid. + ASSERT_EQ(std::numeric_limits::max(), ioctx.get_last_version()); + + // Re-enable versioning and check we can re-establish the version on a read. + ioctx.set_no_version_on_read(false); + read2.read(0, bl.length(), NULL, NULL); + ASSERT_EQ(0, ioctx.operate("foo", &read2, &bl)); + ASSERT_EQ(0, memcmp(bl.c_str(), "ceph", 4)); + + // last version should now have been corrected. + ASSERT_EQ(version, ioctx.get_last_version()); +} \ No newline at end of file