From: Adam C. Emerson Date: Tue, 28 Apr 2020 22:26:52 +0000 (-0400) Subject: osdc: Asiofact the Objecter X-Git-Tag: wip-pdonnell-testing-20200918.022351~1203^2~8 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=e3a050403751f6e1283381a09b6553a29e29f835;p=ceph-ci.git osdc: Asiofact the Objecter Thanks to Casey Bodley for watch/notify fixes and Patrick Donnelly for MDS fix. Signed-off-by: Adam C. Emerson --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2537e5110c6..afedbfdd9da 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -377,6 +377,7 @@ set(ceph_common_deps Boost::program_options Boost::date_time Boost::iostreams + fmt::fmt StdFilesystem::filesystem fmt::fmt ${BLKID_LIBRARIES} diff --git a/src/ceph_fuse.cc b/src/ceph_fuse.cc index ab4db60920c..3692b5c6ff6 100644 --- a/src/ceph_fuse.cc +++ b/src/ceph_fuse.cc @@ -254,7 +254,7 @@ int main(int argc, const char **argv, const char *envp[]) { messenger->set_policy(entity_name_t::TYPE_MDS, Messenger::Policy::lossless_client(0)); - client = new StandaloneClient(messenger, mc); + client = new StandaloneClient(messenger, mc, icp); if (filer_flags) { client->set_filer_flags(filer_flags); } diff --git a/src/ceph_syn.cc b/src/ceph_syn.cc index e3ca1328702..165ea42308a 100644 --- a/src/ceph_syn.cc +++ b/src/ceph_syn.cc @@ -68,7 +68,7 @@ int main(int argc, const char **argv, char *envp[]) messengers[i]->bind(g_conf()->public_addr); mclients[i] = new MonClient(g_ceph_context, poolctx); mclients[i]->build_initial_monmap(); - auto client = new StandaloneClient(messengers[i], mclients[i]); + auto client = new StandaloneClient(messengers[i], mclients[i], poolctx); client->set_filer_flags(syn_filer_flags); SyntheticClient *syn = new SyntheticClient(client); clients.push_back(client); diff --git a/src/client/Client.cc b/src/client/Client.cc index 8bd78b4046b..cd0c87a22cc 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -29,6 +29,8 @@ #include #include +#include "common/async/waiter.h" + #if defined(__FreeBSD__) #define XATTR_CREATE 0x1 #define XATTR_REPLACE 0x2 @@ -11704,9 +11706,8 @@ void Client::_setxattr_maybe_wait_for_osdmap(const char *name, const void *value }); if (r == -ENOENT) { - C_SaferCond ctx; - objecter->wait_for_latest_osdmap(&ctx); - ctx.wait(); + bs::error_code ec; + objecter->wait_for_latest_osdmap(ca::use_blocked[ec]); } } } @@ -14278,7 +14279,7 @@ int Client::check_pool_perm(Inode *in, int need) C_SaferCond rd_cond; ObjectOperation rd_op; - rd_op.stat(NULL, (ceph::real_time*)nullptr, NULL); + rd_op.stat(nullptr, nullptr, nullptr); objecter->mutate(oid, OSDMap::file_to_object_locator(in->layout), rd_op, nullsnapc, ceph::real_clock::now(), 0, &rd_cond); @@ -14465,7 +14466,7 @@ void Client::set_session_timeout(unsigned timeout) int Client::start_reclaim(const std::string& uuid, unsigned flags, const std::string& fs_name) { - std::lock_guard l(client_lock); + std::unique_lock l(client_lock); if (!initialized) return -ENOTCONN; @@ -14541,13 +14542,14 @@ int Client::start_reclaim(const std::string& uuid, unsigned flags, // use blacklist to check if target session was killed // (config option mds_session_blacklist_on_evict needs to be true) - C_SaferCond cond; - if (!objecter->wait_for_map(reclaim_osd_epoch, &cond)) { - ldout(cct, 10) << __func__ << ": waiting for OSD epoch " << reclaim_osd_epoch << dendl; - client_lock.unlock(); - cond.wait(); - client_lock.lock(); - } + ldout(cct, 10) << __func__ << ": waiting for OSD epoch " << reclaim_osd_epoch << dendl; + bs::error_code ec; + l.unlock(); + objecter->wait_for_map(reclaim_osd_epoch, ca::use_blocked[ec]); + l.lock(); + + if (ec) + return ceph::from_error_code(ec); bool blacklisted = objecter->with_osdmap( [this](const OSDMap &osd_map) -> bool { @@ -14671,8 +14673,9 @@ mds_rank_t Client::_get_random_up_mds() const } -StandaloneClient::StandaloneClient(Messenger *m, MonClient *mc) - : Client(m, mc, new Objecter(m->cct, m, mc, nullptr, 0, 0)) +StandaloneClient::StandaloneClient(Messenger *m, MonClient *mc, + boost::asio::io_context& ictx) + : Client(m, mc, new Objecter(m->cct, m, mc, ictx, 0, 0)) { monclient->set_messenger(m); objecter->set_client_incarnation(0); diff --git a/src/client/Client.h b/src/client/Client.h index 3a5c6741d00..398b4bc89f1 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -1305,7 +1305,7 @@ private: class StandaloneClient : public Client { public: - StandaloneClient(Messenger *m, MonClient *mc); + StandaloneClient(Messenger *m, MonClient *mc, boost::asio::io_context& ictx); ~StandaloneClient() override; diff --git a/src/libcephfs.cc b/src/libcephfs.cc index 623b6bb51ab..3d3aa22fe8f 100644 --- a/src/libcephfs.cc +++ b/src/libcephfs.cc @@ -106,7 +106,7 @@ public: //at last the client ret = -CEPHFS_ERROR_NEW_CLIENT; //defined in libcephfs.h; - client = new StandaloneClient(messenger, monclient); + client = new StandaloneClient(messenger, monclient, icp); if (!client) goto fail; diff --git a/src/librados/AioCompletionImpl.h b/src/librados/AioCompletionImpl.h index d3a674e8ee7..6f7e1b62886 100644 --- a/src/librados/AioCompletionImpl.h +++ b/src/librados/AioCompletionImpl.h @@ -126,14 +126,14 @@ struct librados::AioCompletionImpl { }; namespace librados { -struct C_AioComplete : public Context { +struct CB_AioComplete { AioCompletionImpl *c; - explicit C_AioComplete(AioCompletionImpl *cc) : c(cc) { + explicit CB_AioComplete(AioCompletionImpl *cc) : c(cc) { c->_get(); } - void finish(int r) override { + void operator()() { rados_callback_t cb_complete = c->callback_complete; void *cb_complete_arg = c->callback_complete_arg; if (cb_complete) @@ -160,14 +160,27 @@ struct C_AioComplete : public Context { * flush where we only want to wait for things to be safe, * but allow users to specify any of the callbacks. */ -struct C_AioCompleteAndSafe : public Context { +struct CB_AioCompleteAndSafe { AioCompletionImpl *c; - explicit C_AioCompleteAndSafe(AioCompletionImpl *cc) : c(cc) { + + explicit CB_AioCompleteAndSafe(AioCompletionImpl *cc) : c(cc) { c->get(); } - void finish(int r) override { + CB_AioCompleteAndSafe(const CB_AioCompleteAndSafe&) = delete; + CB_AioCompleteAndSafe& operator =(const CB_AioCompleteAndSafe&) = delete; + CB_AioCompleteAndSafe(CB_AioCompleteAndSafe&& rhs) { + c = rhs.c; + rhs.c = nullptr; + } + CB_AioCompleteAndSafe& operator =(CB_AioCompleteAndSafe&& rhs) { + c = rhs.c; + rhs.c = nullptr; + return *this; + } + + void operator()(int r = 0) { c->lock.lock(); c->rval = r; c->complete = true; @@ -190,7 +203,6 @@ struct C_AioCompleteAndSafe : public Context { c->put_unlock(); } }; - } #endif diff --git a/src/librados/IoCtxImpl.cc b/src/librados/IoCtxImpl.cc index 900628bbdc4..f384c49766f 100644 --- a/src/librados/IoCtxImpl.cc +++ b/src/librados/IoCtxImpl.cc @@ -28,34 +28,39 @@ #undef dout_prefix #define dout_prefix *_dout << "librados: " +namespace bs = boost::system; +namespace ca = ceph::async; +namespace cb = ceph::buffer; + namespace librados { namespace { -struct C_notify_Finish : public Context { +struct CB_notify_Finish { CephContext *cct; Context *ctx; Objecter *objecter; Objecter::LingerOp *linger_op; - bufferlist reply_bl; bufferlist *preply_bl; char **preply_buf; size_t *preply_buf_len; - C_notify_Finish(CephContext *_cct, Context *_ctx, Objecter *_objecter, - Objecter::LingerOp *_linger_op, bufferlist *_preply_bl, - char **_preply_buf, size_t *_preply_buf_len) + CB_notify_Finish(CephContext *_cct, Context *_ctx, Objecter *_objecter, + Objecter::LingerOp *_linger_op, bufferlist *_preply_bl, + char **_preply_buf, size_t *_preply_buf_len) : cct(_cct), ctx(_ctx), objecter(_objecter), linger_op(_linger_op), preply_bl(_preply_bl), preply_buf(_preply_buf), - preply_buf_len(_preply_buf_len) - { - linger_op->on_notify_finish = this; - linger_op->notify_result_bl = &reply_bl; - } + preply_buf_len(_preply_buf_len) {} - void finish(int r) override - { + + // move-only + CB_notify_Finish(const CB_notify_Finish&) = delete; + CB_notify_Finish& operator =(const CB_notify_Finish&) = delete; + CB_notify_Finish(CB_notify_Finish&&) = default; + CB_notify_Finish& operator =(CB_notify_Finish&&) = default; + + void operator()(bs::error_code ec, bufferlist&& reply_bl) { ldout(cct, 10) << __func__ << " completed notify (linger op " - << linger_op << "), r = " << r << dendl; + << linger_op << "), ec = " << ec << dendl; // pass result back to user // NOTE: we do this regardless of what error code we return @@ -72,21 +77,20 @@ struct C_notify_Finish : public Context { if (preply_bl) preply_bl->claim(reply_bl); - ctx->complete(r); + ctx->complete(ceph::from_error_code(ec)); } }; -struct C_aio_linger_cancel : public Context { +struct CB_aio_linger_cancel { Objecter *objecter; Objecter::LingerOp *linger_op; - C_aio_linger_cancel(Objecter *_objecter, Objecter::LingerOp *_linger_op) + CB_aio_linger_cancel(Objecter *_objecter, Objecter::LingerOp *_linger_op) : objecter(_objecter), linger_op(_linger_op) { } - void finish(int r) override - { + void operator()() { objecter->linger_cancel(linger_op); } }; @@ -104,8 +108,9 @@ struct C_aio_linger_Complete : public Context { void finish(int r) override { if (cancel || r < 0) - c->io->client->finisher.queue(new C_aio_linger_cancel(c->io->objecter, - linger_op)); + boost::asio::defer(c->io->client->finish_strand, + CB_aio_linger_cancel(c->io->objecter, + linger_op)); c->lock.lock(); c->rval = r; @@ -114,7 +119,7 @@ struct C_aio_linger_Complete : public Context { if (c->callback_complete || c->callback_safe) { - c->io->client->finisher.queue(new C_AioComplete(c)); + boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c)); } c->put_unlock(); } @@ -161,12 +166,11 @@ struct C_aio_notify_Complete : public C_aio_linger_Complete { struct C_aio_notify_Ack : public Context { CephContext *cct; - C_notify_Finish *onfinish; C_aio_notify_Complete *oncomplete; - C_aio_notify_Ack(CephContext *_cct, C_notify_Finish *_onfinish, + C_aio_notify_Ack(CephContext *_cct, C_aio_notify_Complete *_oncomplete) - : cct(_cct), onfinish(_onfinish), oncomplete(_oncomplete) + : cct(_cct), oncomplete(_oncomplete) { } @@ -195,7 +199,7 @@ struct C_aio_selfmanaged_snap_op_Complete : public Context { c->cond.notify_all(); if (c->callback_complete || c->callback_safe) { - client->finisher.queue(new librados::C_AioComplete(c)); + boost::asio::defer(client->finish_strand, librados::CB_AioComplete(c)); } c->put_unlock(); } @@ -305,7 +309,7 @@ void librados::IoCtxImpl::complete_aio_write(AioCompletionImpl *c) ldout(client->cct, 20) << " waking waiters on seq " << waiters->first << dendl; for (std::list::iterator it = waiters->second.begin(); it != waiters->second.end(); ++it) { - client->finisher.queue(new C_AioCompleteAndSafe(*it)); + boost::asio::defer(client->finish_strand, CB_AioCompleteAndSafe(*it)); (*it)->put(); } aio_write_waiters.erase(waiters++); @@ -325,7 +329,7 @@ void librados::IoCtxImpl::flush_aio_writes_async(AioCompletionImpl *c) if (aio_write_list.empty()) { ldout(client->cct, 20) << "flush_aio_writes_async no writes. (tid " << seq << ")" << dendl; - client->finisher.queue(new C_AioCompleteAndSafe(c)); + boost::asio::defer(client->finish_strand, CB_AioCompleteAndSafe(c)); } else { ldout(client->cct, 20) << "flush_aio_writes_async " << aio_write_list.size() << " writes in flight; waiting on tid " << seq << dendl; @@ -362,14 +366,10 @@ int librados::IoCtxImpl::snap_create(const char *snapName) ceph::condition_variable cond; bool done; Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply); - reply = objecter->create_pool_snap(poolid, sName, onfinish); + objecter->create_pool_snap(poolid, sName, onfinish); - if (reply < 0) { - delete onfinish; - } else { - std::unique_lock l{mylock}; - cond.wait(l, [&done] { return done; }); - } + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done; }); return reply; } @@ -382,18 +382,14 @@ int librados::IoCtxImpl::selfmanaged_snap_create(uint64_t *psnapid) bool done; Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply); snapid_t snapid; - reply = objecter->allocate_selfmanaged_snap(poolid, &snapid, onfinish); + objecter->allocate_selfmanaged_snap(poolid, &snapid, onfinish); - if (reply < 0) { - delete onfinish; - } else { - { - std::unique_lock l{mylock}; - cond.wait(l, [&done] { return done; }); - } - if (reply == 0) - *psnapid = snapid; + { + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done; }); } + if (reply == 0) + *psnapid = snapid; return reply; } @@ -402,11 +398,8 @@ void librados::IoCtxImpl::aio_selfmanaged_snap_create(uint64_t *snapid, { C_aio_selfmanaged_snap_create_Complete *onfinish = new C_aio_selfmanaged_snap_create_Complete(client, c, snapid); - int r = objecter->allocate_selfmanaged_snap(poolid, &onfinish->snapid, - onfinish); - if (r < 0) { - onfinish->complete(r); - } + objecter->allocate_selfmanaged_snap(poolid, &onfinish->snapid, + onfinish); } int librados::IoCtxImpl::snap_remove(const char *snapName) @@ -418,14 +411,9 @@ int librados::IoCtxImpl::snap_remove(const char *snapName) ceph::condition_variable cond; bool done; Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply); - reply = objecter->delete_pool_snap(poolid, sName, onfinish); - - if (reply < 0) { - delete onfinish; - } else { - unique_lock l{mylock}; - cond.wait(l, [&done] { return done; }); - } + objecter->delete_pool_snap(poolid, sName, onfinish); + unique_lock l{mylock}; + cond.wait(l, [&done] { return done; }); return reply; } @@ -1155,7 +1143,7 @@ struct AioGetxattrsData { AioGetxattrsData(librados::AioCompletionImpl *c, map* attrset, librados::RadosClient *_client) : user_completion(c), user_attrset(attrset), client(_client) {} - struct librados::C_AioCompleteAndSafe user_completion; + struct librados::CB_AioCompleteAndSafe user_completion; map result_attrset; map* user_attrset; librados::RadosClient *client; @@ -1174,7 +1162,7 @@ static void aio_getxattrs_complete(rados_completion_t c, void *arg) { (*cdata->user_attrset)[p->first] = p->second; } } - cdata->user_completion.finish(rc); + cdata->user_completion(rc); ((librados::AioCompletionImpl*)c)->put(); delete cdata; } @@ -1477,7 +1465,7 @@ int librados::IoCtxImpl::stat(const object_t& oid, uint64_t *psize, time_t *pmti ::ObjectOperation rd; prepare_assert_ops(&rd); - rd.stat(psize, &mtime, NULL); + rd.stat(psize, &mtime, nullptr); int r = operate_read(oid, &rd, NULL); if (r >= 0 && pmtime) { @@ -1497,7 +1485,7 @@ int librados::IoCtxImpl::stat2(const object_t& oid, uint64_t *psize, struct time ::ObjectOperation rd; prepare_assert_ops(&rd); - rd.stat(psize, &mtime, NULL); + rd.stat(psize, &mtime, nullptr); int r = operate_read(oid, &rd, NULL); if (r < 0) { return r; @@ -1568,31 +1556,25 @@ void librados::IoCtxImpl::set_sync_op_version(version_t ver) last_objver = ver; } -struct WatchInfo : public Objecter::WatchContext { - librados::IoCtxImpl *ioctx; +namespace librados { +void intrusive_ptr_add_ref(IoCtxImpl *p) { p->get(); } +void intrusive_ptr_release(IoCtxImpl *p) { p->put(); } +} + +struct WatchInfo { + boost::intrusive_ptr ioctx; object_t oid; librados::WatchCtx *ctx; librados::WatchCtx2 *ctx2; - bool internal = false; WatchInfo(librados::IoCtxImpl *io, object_t o, - librados::WatchCtx *c, librados::WatchCtx2 *c2, - bool inter) - : ioctx(io), oid(o), ctx(c), ctx2(c2), internal(inter) { - ioctx->get(); - } - ~WatchInfo() override { - ioctx->put(); - if (internal) { - delete ctx; - delete ctx2; - } - } + librados::WatchCtx *c, librados::WatchCtx2 *c2) + : ioctx(io), oid(o), ctx(c), ctx2(c2) {} void handle_notify(uint64_t notify_id, uint64_t cookie, uint64_t notifier_id, - bufferlist& bl) override { + bufferlist& bl) { ldout(ioctx->client->cct, 10) << __func__ << " " << notify_id << " cookie " << cookie << " notifier_id " << notifier_id @@ -1609,13 +1591,35 @@ struct WatchInfo : public Objecter::WatchContext { ioctx->notify_ack(oid, notify_id, cookie, empty); } } - void handle_error(uint64_t cookie, int err) override { + void handle_error(uint64_t cookie, int err) { ldout(ioctx->client->cct, 10) << __func__ << " cookie " << cookie << " err " << err << dendl; if (ctx2) ctx2->handle_error(cookie, err); } + + void operator()(bs::error_code ec, + uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist&& bl) { + if (ec) { + handle_error(cookie, ceph::from_error_code(ec)); + } else { + handle_notify(notify_id, cookie, notifier_id, bl); + } + } +}; + +// internal WatchInfo that owns the context memory +struct InternalWatchInfo : public WatchInfo { + std::unique_ptr ctx; + std::unique_ptr ctx2; + + InternalWatchInfo(librados::IoCtxImpl *io, object_t o, + librados::WatchCtx *c, librados::WatchCtx2 *c2) + : WatchInfo(io, o, c, c2), ctx(c), ctx2(c2) {} }; int librados::IoCtxImpl::watch(const object_t& oid, uint64_t *handle, @@ -1638,9 +1642,11 @@ int librados::IoCtxImpl::watch(const object_t& oid, uint64_t *handle, Objecter::LingerOp *linger_op = objecter->linger_register(oid, oloc, 0); *handle = linger_op->get_cookie(); - linger_op->watch_context = new WatchInfo(this, - oid, ctx, ctx2, internal); - + if (internal) { + linger_op->handle = InternalWatchInfo(this, oid, ctx, ctx2); + } else { + linger_op->handle = WatchInfo(this, oid, ctx, ctx2); + } prepare_assert_ops(&wr); wr.watch(*handle, CEPH_OSD_WATCH_OP_WATCH, timeout); bufferlist bl; @@ -1684,7 +1690,11 @@ int librados::IoCtxImpl::aio_watch(const object_t& oid, ::ObjectOperation wr; *handle = linger_op->get_cookie(); - linger_op->watch_context = new WatchInfo(this, oid, ctx, ctx2, internal); + if (internal) { + linger_op->handle = InternalWatchInfo(this, oid, ctx, ctx2); + } else { + linger_op->handle = WatchInfo(this, oid, ctx, ctx2); + } prepare_assert_ops(&wr); wr.watch(*handle, CEPH_OSD_WATCH_OP_WATCH, timeout); @@ -1712,8 +1722,13 @@ int librados::IoCtxImpl::notify_ack( int librados::IoCtxImpl::watch_check(uint64_t cookie) { - Objecter::LingerOp *linger_op = reinterpret_cast(cookie); - return objecter->linger_check(linger_op); + auto linger_op = reinterpret_cast(cookie); + auto r = objecter->linger_check(linger_op); + if (r) + return 1 + std::chrono::duration_cast< + std::chrono::milliseconds>(*r).count(); + else + return ceph::from_error_code(r.error()); } int librados::IoCtxImpl::unwatch(uint64_t cookie) @@ -1758,11 +1773,12 @@ int librados::IoCtxImpl::notify(const object_t& oid, bufferlist& bl, Objecter::LingerOp *linger_op = objecter->linger_register(oid, oloc, 0); C_SaferCond notify_finish_cond; - Context *notify_finish = new C_notify_Finish(client->cct, ¬ify_finish_cond, - objecter, linger_op, preply_bl, - preply_buf, preply_buf_len); - (void) notify_finish; - + linger_op->on_notify_finish = + Objecter::LingerOp::OpComp::create( + objecter->service.get_executor(), + CB_notify_Finish(client->cct, ¬ify_finish_cond, + objecter, linger_op, preply_bl, + preply_buf, preply_buf_len)); uint32_t timeout = notify_timeout; if (timeout_ms) timeout = timeout_ms / 1000; @@ -1812,11 +1828,14 @@ int librados::IoCtxImpl::aio_notify(const object_t& oid, AioCompletionImpl *c, c->io = this; C_aio_notify_Complete *oncomplete = new C_aio_notify_Complete(c, linger_op); - C_notify_Finish *onnotify = new C_notify_Finish(client->cct, oncomplete, - objecter, linger_op, - preply_bl, preply_buf, - preply_buf_len); - Context *onack = new C_aio_notify_Ack(client->cct, onnotify, oncomplete); + linger_op->on_notify_finish = + Objecter::LingerOp::OpComp::create( + objecter->service.get_executor(), + CB_notify_Finish(client->cct, oncomplete, + objecter, linger_op, + preply_bl, preply_buf, + preply_buf_len)); + Context *onack = new C_aio_notify_Ack(client->cct, oncomplete); uint32_t timeout = notify_timeout; if (timeout_ms) @@ -1900,7 +1919,7 @@ void librados::IoCtxImpl::C_aio_stat_Ack::finish(int r) } if (c->callback_complete) { - c->io->client->finisher.queue(new C_AioComplete(c)); + boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c)); } c->put_unlock(); @@ -1928,7 +1947,7 @@ void librados::IoCtxImpl::C_aio_stat2_Ack::finish(int r) } if (c->callback_complete) { - c->io->client->finisher.queue(new C_AioComplete(c)); + boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c)); } c->put_unlock(); @@ -1964,7 +1983,7 @@ void librados::IoCtxImpl::C_aio_Complete::finish(int r) if (c->callback_complete || c->callback_safe) { - c->io->client->finisher.queue(new C_AioComplete(c)); + boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c)); } if (c->aio_write_seq) { @@ -2028,6 +2047,7 @@ int librados::IoCtxImpl::application_enable(const std::string& app_name, r = c->get_return_value(); c->release(); + c->put(); if (r < 0) { return r; } @@ -2043,7 +2063,10 @@ void librados::IoCtxImpl::application_enable_async(const std::string& app_name, // preserved until Luminous is configured as minimim version. if (!client->get_required_monitor_features().contains_all( ceph::features::mon::FEATURE_LUMINOUS)) { - client->finisher.queue(new C_PoolAsync_Safe(c), -EOPNOTSUPP); + boost::asio::defer(client->finish_strand, + [cb = CB_PoolAsync_Safe(c)]() mutable { + cb(-EOPNOTSUPP); + }); return; } @@ -2061,7 +2084,7 @@ void librados::IoCtxImpl::application_enable_async(const std::string& app_name, cmds.push_back(cmd.str()); bufferlist inbl; client->mon_command_async(cmds, inbl, nullptr, nullptr, - new C_PoolAsync_Safe(c)); + make_lambda_context(CB_PoolAsync_Safe(c))); } int librados::IoCtxImpl::application_list(std::set *app_names) diff --git a/src/librados/ListObjectImpl.h b/src/librados/ListObjectImpl.h index 95c2e21a4ec..7396c12108d 100644 --- a/src/librados/ListObjectImpl.h +++ b/src/librados/ListObjectImpl.h @@ -11,13 +11,15 @@ * Foundation. See file COPYING. * */ -#include #ifndef CEPH_LIBRADOS_LISTOBJECTIMPL_H #define CEPH_LIBRADOS_LISTOBJECTIMPL_H +#include #include +#include "include/cmp.h" + namespace librados { struct ListObjectImpl { std::string nspace; diff --git a/src/librados/PoolAsyncCompletionImpl.h b/src/librados/PoolAsyncCompletionImpl.h index b52d7fada20..73420fe359c 100644 --- a/src/librados/PoolAsyncCompletionImpl.h +++ b/src/librados/PoolAsyncCompletionImpl.h @@ -16,7 +16,9 @@ #define CEPH_LIBRADOS_POOLASYNCCOMPLETIONIMPL_H #include "common/ceph_mutex.h" -#include "include/Context.h" + +#include + #include "include/rados/librados.h" #include "include/rados/librados.hpp" @@ -29,67 +31,68 @@ namespace librados { bool released = false; bool done = false; - rados_callback_t callback = 0; - void *callback_arg = nullptr;; + rados_callback_t callback = nullptr; + void *callback_arg = nullptr; PoolAsyncCompletionImpl() = default; int set_callback(void *cb_arg, rados_callback_t cb) { - std::scoped_lock l{lock}; + std::scoped_lock l(lock); callback = cb; callback_arg = cb_arg; return 0; } int wait() { - std::unique_lock l{lock}; - cond.wait(l, [this] { return done;}); + std::unique_lock l(lock); + while (!done) + cond.wait(l); return 0; } int is_complete() { - std::scoped_lock l{lock}; + std::scoped_lock l(lock); return done; } int get_return_value() { - std::scoped_lock l{lock}; + std::scoped_lock l(lock); return rval; } void get() { - std::scoped_lock l{lock}; + std::scoped_lock l(lock); ceph_assert(ref > 0); ref++; } void release() { - lock.lock(); + std::scoped_lock l(lock); ceph_assert(!released); released = true; - put_unlock(); } void put() { - lock.lock(); - put_unlock(); - } - void put_unlock() { - ceph_assert(ref > 0); + std::unique_lock l(lock); int n = --ref; - lock.unlock(); + l.unlock(); if (!n) delete this; } }; - class C_PoolAsync_Safe : public Context { - PoolAsyncCompletionImpl *c; + inline void intrusive_ptr_add_ref(PoolAsyncCompletionImpl* p) { + p->get(); + } + inline void intrusive_ptr_release(PoolAsyncCompletionImpl* p) { + p->put(); + } + + class CB_PoolAsync_Safe { + boost::intrusive_ptr p; public: - explicit C_PoolAsync_Safe(PoolAsyncCompletionImpl *_c) : c(_c) { - c->get(); - } - ~C_PoolAsync_Safe() override { - c->put(); - } - - void finish(int r) override { - c->lock.lock(); + explicit CB_PoolAsync_Safe(boost::intrusive_ptr p) + : p(p) {} + ~CB_PoolAsync_Safe() = default; + + void operator()(int r) { + auto c(std::move(p)); + std::unique_lock l(c->lock); c->rval = r; c->done = true; c->cond.notify_all(); @@ -97,12 +100,10 @@ namespace librados { if (c->callback) { rados_callback_t cb = c->callback; void *cb_arg = c->callback_arg; - c->lock.unlock(); - cb(c, cb_arg); - c->lock.lock(); + l.unlock(); + cb(c.get(), cb_arg); + l.lock(); } - - c->lock.unlock(); } }; } diff --git a/src/librados/RadosClient.cc b/src/librados/RadosClient.cc index 3a96cc07cee..58524e46fe0 100644 --- a/src/librados/RadosClient.cc +++ b/src/librados/RadosClient.cc @@ -57,25 +57,13 @@ #undef dout_prefix #define dout_prefix *_dout << "librados: " +namespace bc = boost::container; namespace bs = boost::system; namespace ca = ceph::async; +namespace cb = ceph::buffer; librados::RadosClient::RadosClient(CephContext *cct_) - : Dispatcher(cct_->get()), - cct_deleter{cct_, [](CephContext *p) {p->put();}}, - conf(cct_->_conf), - state(DISCONNECTED), - monclient(cct_, poolctx), - mgrclient(cct_, nullptr, &monclient.monmap), - messenger(NULL), - instance_id(0), - objecter(NULL), - timer(cct, lock), - refcnt(1), - log_last_version(0), log_cb(NULL), log_cb2(NULL), log_cb_arg(NULL), - finisher(cct, "radosclient", "fn-radosclient") -{ -} + : Dispatcher(cct_->get()) {} int64_t librados::RadosClient::lookup_pool(const char *name) { @@ -267,9 +255,9 @@ int librados::RadosClient::connect() ldout(cct, 1) << "starting objecter" << dendl; objecter = new (std::nothrow) Objecter(cct, messenger, &monclient, - &finisher, - cct->_conf->rados_mon_op_timeout, - cct->_conf->rados_osd_op_timeout); + poolctx, + cct->_conf->rados_mon_op_timeout, + cct->_conf->rados_osd_op_timeout); if (!objecter) goto out; objecter->set_balanced_budget(); @@ -324,10 +312,6 @@ int librados::RadosClient::connect() objecter->start(); lock.lock(); - timer.init(); - - finisher.start(); - state = CONNECTED; instance_id = monclient.get_global_id(); @@ -370,12 +354,9 @@ void librados::RadosClient::shutdown() // make sure watch callbacks are flushed watch_flush(); } - finisher.wait_for_empty(); - finisher.stop(); } state = DISCONNECTED; instance_id = 0; - timer.shutdown(); // will drop+retake lock l.unlock(); if (need_objecter) { objecter->shutdown(); @@ -387,42 +368,49 @@ void librados::RadosClient::shutdown() messenger->shutdown(); messenger->wait(); } + poolctx.stop(); ldout(cct, 1) << "shutdown" << dendl; - poolctx.finish(); } int librados::RadosClient::watch_flush() { ldout(cct, 10) << __func__ << " enter" << dendl; - ceph::mutex mylock = ceph::make_mutex("RadosClient::watch_flush::mylock"); - ceph::condition_variable cond; - bool done; - objecter->linger_callback_flush(new C_SafeCond(mylock, cond, &done)); + objecter->linger_callback_flush(ca::use_blocked); - std::unique_lock l{mylock}; - cond.wait(l, [&done] { return done; }); ldout(cct, 10) << __func__ << " exit" << dendl; return 0; } -struct C_aio_watch_flush_Complete : public Context { +struct CB_aio_watch_flush_Complete { librados::RadosClient *client; librados::AioCompletionImpl *c; - C_aio_watch_flush_Complete(librados::RadosClient *_client, librados::AioCompletionImpl *_c) + CB_aio_watch_flush_Complete(librados::RadosClient *_client, librados::AioCompletionImpl *_c) : client(_client), c(_c) { c->get(); } - void finish(int r) override { + CB_aio_watch_flush_Complete(const CB_aio_watch_flush_Complete&) = delete; + CB_aio_watch_flush_Complete operator =(const CB_aio_watch_flush_Complete&) = delete; + CB_aio_watch_flush_Complete(CB_aio_watch_flush_Complete&& rhs) { + client = rhs.client; + c = rhs.c; + } + CB_aio_watch_flush_Complete& operator =(CB_aio_watch_flush_Complete&& rhs) { + client = rhs.client; + c = rhs.c; + return *this; + } + + void operator()() { c->lock.lock(); - c->rval = r; + c->rval = 0; c->complete = true; c->cond.notify_all(); if (c->callback_complete || c->callback_safe) { - client->finisher.queue(new librados::C_AioComplete(c)); + boost::asio::defer(client->finish_strand, librados::CB_AioComplete(c)); } c->put_unlock(); } @@ -431,8 +419,7 @@ struct C_aio_watch_flush_Complete : public Context { int librados::RadosClient::async_watch_flush(AioCompletionImpl *c) { ldout(cct, 10) << __func__ << " enter" << dendl; - Context *oncomplete = new C_aio_watch_flush_Complete(this, c); - objecter->linger_callback_flush(oncomplete); + objecter->linger_callback_flush(CB_aio_watch_flush_Complete(this, c)); ldout(cct, 10) << __func__ << " exit" << dendl; return 0; } @@ -609,15 +596,9 @@ int librados::RadosClient::wait_for_osdmap() int librados::RadosClient::wait_for_latest_osdmap() { - ceph::mutex mylock = ceph::make_mutex("RadosClient::wait_for_latest_osdmap"); - ceph::condition_variable cond; - bool done; - - objecter->wait_for_latest_osdmap(new C_SafeCond(mylock, cond, &done)); - - std::unique_lock l{mylock}; - cond.wait(l, [&done] {return done;}); - return 0; + bs::error_code ec; + objecter->wait_for_latest_osdmap(ca::use_blocked[ec]); + return ceph::from_error_code(ec); } int librados::RadosClient::pool_list(std::list >& v) @@ -635,20 +616,22 @@ int librados::RadosClient::pool_list(std::list >& v) int librados::RadosClient::get_pool_stats(std::list& pools, map *result, - bool *per_pool) + bool *pper_pool) { - ceph::mutex mylock = ceph::make_mutex("RadosClient::get_pool_stats::mylock"); - ceph::condition_variable cond; - bool done; - int ret = 0; + bs::error_code ec; - objecter->get_pool_stats(pools, result, per_pool, - new C_SafeCond(mylock, cond, &done, - &ret)); + std::vector v(pools.begin(), pools.end()); - unique_lock l{mylock}; - cond.wait(l, [&done] { return done;}); - return ret; + auto [res, per_pool] = objecter->get_pool_stats(v, ca::use_blocked[ec]); + if (ec) + return ceph::from_error_code(ec); + + if (per_pool) + *pper_pool = per_pool; + if (result) + result->insert(res.begin(), res.end()); + + return 0; } bool librados::RadosClient::get_pool_is_selfmanaged_snaps_mode( @@ -710,14 +693,10 @@ int librados::RadosClient::pool_create(string& name, ceph::condition_variable cond; bool done; Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply); - reply = objecter->create_pool(name, onfinish, crush_rule); + objecter->create_pool(name, onfinish, crush_rule); - if (reply < 0) { - delete onfinish; - } else { - std::unique_lock l{mylock}; - cond.wait(l, [&done] { return done; }); - } + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done; }); return reply; } @@ -729,11 +708,8 @@ int librados::RadosClient::pool_create_async(string& name, if (r < 0) return r; - Context *onfinish = new C_PoolAsync_Safe(c); - r = objecter->create_pool(name, onfinish, crush_rule); - if (r < 0) { - delete onfinish; - } + Context *onfinish = make_lambda_context(CB_PoolAsync_Safe(c)); + objecter->create_pool(name, onfinish, crush_rule); return r; } @@ -772,14 +748,10 @@ int librados::RadosClient::pool_delete(const char *name) bool done; int ret; Context *onfinish = new C_SafeCond(mylock, cond, &done, &ret); - ret = objecter->delete_pool(name, onfinish); + objecter->delete_pool(name, onfinish); - if (ret < 0) { - delete onfinish; - } else { - std::unique_lock l{mylock}; - cond.wait(l, [&done] { return done;}); - } + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done;}); return ret; } @@ -789,11 +761,8 @@ int librados::RadosClient::pool_delete_async(const char *name, PoolAsyncCompleti if (r < 0) return r; - Context *onfinish = new C_PoolAsync_Safe(c); - r = objecter->delete_pool(name, onfinish); - if (r < 0) { - delete onfinish; - } + Context *onfinish = make_lambda_context(CB_PoolAsync_Safe(c)); + objecter->delete_pool(name, onfinish); return r; } @@ -952,44 +921,36 @@ int librados::RadosClient::osd_command(int osd, vector& cmd, const bufferlist& inbl, bufferlist *poutbl, string *prs) { - ceph::mutex mylock = ceph::make_mutex("RadosClient::osd_command::mylock"); - ceph::condition_variable cond; - bool done; - int ret; ceph_tid_t tid; if (osd < 0) return -EINVAL; - { - std::lock_guard l{mylock}; - // XXX do anything with tid? - objecter->osd_command(osd, cmd, inbl, &tid, poutbl, prs, - new C_SafeCond(mylock, cond, &done, &ret)); - } - std::unique_lock l{mylock}; - cond.wait(l, [&done] { return done;}); - return ret; + + // XXX do anything with tid? + bs::error_code ec; + auto [s, bl] = objecter->osd_command(osd, std::move(cmd), cb::list(inbl), + &tid, ca::use_blocked[ec]); + if (poutbl) + *poutbl = std::move(bl); + if (prs) + *prs = std::move(s); + return ceph::from_error_code(ec); } int librados::RadosClient::pg_command(pg_t pgid, vector& cmd, const bufferlist& inbl, bufferlist *poutbl, string *prs) { - ceph::mutex mylock = ceph::make_mutex("RadosClient::pg_command::mylock"); - ceph::condition_variable cond; - bool done; - int ret; ceph_tid_t tid; - - { - std::lock_guard l{lock}; - objecter->pg_command(pgid, cmd, inbl, &tid, poutbl, prs, - new C_SafeCond(mylock, cond, &done, &ret)); - } - std::unique_lock l{mylock}; - cond.wait(l, [&done] { return done;}); - return ret; + bs::error_code ec; + auto [s, bl] = objecter->pg_command(pgid, std::move(cmd), inbl, &tid, + ca::use_blocked[ec]); + if (poutbl) + *poutbl = std::move(bl); + if (prs) + *prs = std::move(s); + return ceph::from_error_code(ec); } int librados::RadosClient::monitor_log(const string& level, diff --git a/src/librados/RadosClient.h b/src/librados/RadosClient.h index 5ad083ead05..7475fc678d1 100644 --- a/src/librados/RadosClient.h +++ b/src/librados/RadosClient.h @@ -23,7 +23,6 @@ #include "common/async/context_pool.h" #include "common/config_fwd.h" #include "common/Cond.h" -#include "common/Timer.h" #include "common/ceph_mutex.h" #include "common/ceph_time.h" #include "common/config_obs.h" @@ -44,11 +43,14 @@ class AioCompletionImpl; class librados::RadosClient : public Dispatcher, public md_config_obs_t { +public: + using Dispatcher::cct; +private: std::unique_ptr > cct_deleter; + std::function > cct_deleter{ + cct, [](CephContext *p) {p->put();}}; public: - using Dispatcher::cct; const ConfigProxy& conf{cct->_conf}; ceph::async::io_context_pool poolctx; private: @@ -56,13 +58,13 @@ private: DISCONNECTED, CONNECTING, CONNECTED, - } state; + } state{DISCONNECTED}; - MonClient monclient; - MgrClient mgrclient; - Messenger *messenger; + MonClient monclient{cct, poolctx}; + MgrClient mgrclient{cct, nullptr, &monclient.monmap}; + Messenger *messenger{nullptr}; - uint64_t instance_id; + uint64_t instance_id{0}; bool _dispatch(Message *m); bool ms_dispatch(Message *m) override; @@ -72,17 +74,16 @@ private: void ms_handle_remote_reset(Connection *con) override; bool ms_handle_refused(Connection *con) override; - Objecter *objecter; + Objecter *objecter{nullptr}; ceph::mutex lock = ceph::make_mutex("librados::RadosClient::lock"); ceph::condition_variable cond; - SafeTimer timer; - int refcnt; + int refcnt{1}; - version_t log_last_version; - rados_log_callback_t log_cb; - rados_log_callback2_t log_cb2; - void *log_cb_arg; + version_t log_last_version{0}; + rados_log_callback_t log_cb{nullptr}; + rados_log_callback2_t log_cb2{nullptr}; + void *log_cb_arg{nullptr}; string log_watch; bool service_daemon = false; @@ -92,9 +93,9 @@ private: int wait_for_osdmap(); public: - Finisher finisher; + boost::asio::io_context::strand finish_strand{poolctx.get_io_context()}; - explicit RadosClient(CephContext *cct_); + explicit RadosClient(CephContext *cct); ~RadosClient() override; int ping_monitor(std::string mon_id, std::string *result); int connect(); diff --git a/src/librados/librados_c.cc b/src/librados/librados_c.cc index decaeeaccce..701c2964c37 100644 --- a/src/librados/librados_c.cc +++ b/src/librados/librados_c.cc @@ -10,6 +10,7 @@ #include "common/common_init.h" #include "common/TracepointProvider.h" #include "common/hobject.h" +#include "common/async/waiter.h" #include "include/rados/librados.h" #include "include/types.h" #include @@ -2018,40 +2019,37 @@ extern "C" int _rados_object_list(rados_ioctx_t io, // FIPS zeroization audit 20191116: this memset is not security related. memset(result_items, 0, sizeof(rados_object_list_item) * result_item_count); - std::list result; - hobject_t next_hash; - bufferlist filter_bl; if (filter_buf != nullptr) { filter_bl.append(filter_buf, filter_buf_len); } - C_SaferCond cond; - ctx->objecter->enumerate_objects( + ceph::async::waiter, + hobject_t> w; + ctx->objecter->enumerate_objects( ctx->poolid, ctx->oloc.nspace, *((hobject_t*)start), *((hobject_t*)finish), result_item_count, filter_bl, - &result, - &next_hash, - &cond); + w); hobject_t *next_hobj = (hobject_t*)(*next); ceph_assert(next_hobj); - int r = cond.wait(); - if (r < 0) { + auto [ec, result, next_hash] = w.wait(); + + if (ec) { *next_hobj = hobject_t::get_max(); - return r; + return ceph::from_error_code(ec); } ceph_assert(result.size() <= result_item_count); // Don't overflow! int k = 0; - for (std::list::iterator i = result.begin(); - i != result.end(); ++i) { + for (auto i = result.begin(); i != result.end(); ++i) { rados_object_list_item &item = result_items[k++]; do_out_buffer(i->oid, &item.oid, &item.oid_length); do_out_buffer(i->nspace, &item.nspace, &item.nspace_length); @@ -2528,7 +2526,7 @@ struct AioGetxattrData { bufferlist bl; char* user_buf; size_t len; - struct librados::C_AioCompleteAndSafe user_completion; + struct librados::CB_AioCompleteAndSafe user_completion; }; static void rados_aio_getxattr_complete(rados_completion_t c, void *arg) { @@ -2543,7 +2541,7 @@ static void rados_aio_getxattr_complete(rados_completion_t c, void *arg) { rc = cdata->bl.length(); } } - cdata->user_completion.finish(rc); + cdata->user_completion(rc); reinterpret_cast(c)->put(); delete cdata; } @@ -2583,7 +2581,7 @@ struct AioGetxattrsData { } librados::RadosXattrsIter *it; rados_xattrs_iter_t *iter; - struct librados::C_AioCompleteAndSafe user_completion; + struct librados::CB_AioCompleteAndSafe user_completion; }; } @@ -2591,12 +2589,12 @@ static void rados_aio_getxattrs_complete(rados_completion_t c, void *arg) { AioGetxattrsData *cdata = reinterpret_cast(arg); int rc = _rados_aio_get_return_value(c); if (rc) { - cdata->user_completion.finish(rc); + cdata->user_completion(rc); } else { cdata->it->i = cdata->it->attrset.begin(); *cdata->iter = cdata->it; cdata->it = 0; - cdata->user_completion.finish(0); + cdata->user_completion(0); } reinterpret_cast(c)->put(); delete cdata; @@ -3168,7 +3166,7 @@ LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_assert_version); extern "C" void _rados_write_op_assert_exists(rados_write_op_t write_op) { tracepoint(librados, rados_write_op_assert_exists_enter, write_op); - ((::ObjectOperation *)write_op)->stat(NULL, (ceph::real_time *)NULL, NULL); + ((::ObjectOperation *)write_op)->stat(nullptr, nullptr, nullptr); tracepoint(librados, rados_write_op_assert_exists_exit); } LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_assert_exists); @@ -3593,7 +3591,7 @@ LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_assert_version); extern "C" void _rados_read_op_assert_exists(rados_read_op_t read_op) { tracepoint(librados, rados_read_op_assert_exists_enter, read_op); - ((::ObjectOperation *)read_op)->stat(NULL, (ceph::real_time *)NULL, NULL); + ((::ObjectOperation *)read_op)->stat(nullptr, nullptr, nullptr); tracepoint(librados, rados_read_op_assert_exists_exit); } LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_assert_exists); @@ -3819,7 +3817,7 @@ extern "C" void _rados_read_op_getxattrs(rados_read_op_t read_op, tracepoint(librados, rados_read_op_getxattrs_enter, read_op, prval); librados::RadosXattrsIter *xattrs_iter = new librados::RadosXattrsIter; ((::ObjectOperation *)read_op)->getxattrs(&xattrs_iter->attrset, prval); - ((::ObjectOperation *)read_op)->add_handler(new C_XattrsIter(xattrs_iter)); + ((::ObjectOperation *)read_op)->set_handler(new C_XattrsIter(xattrs_iter)); *iter = xattrs_iter; tracepoint(librados, rados_read_op_getxattrs_exit, *iter); } @@ -3843,7 +3841,7 @@ extern "C" void _rados_read_op_omap_get_vals(rados_read_op_t read_op, &omap_iter->values, nullptr, prval); - ((::ObjectOperation *)read_op)->add_handler(new C_OmapIter(omap_iter)); + ((::ObjectOperation *)read_op)->set_handler(new C_OmapIter(omap_iter)); *iter = omap_iter; tracepoint(librados, rados_read_op_omap_get_vals_exit, *iter); } @@ -3868,7 +3866,7 @@ extern "C" void _rados_read_op_omap_get_vals2(rados_read_op_t read_op, &omap_iter->values, (bool*)pmore, prval); - ((::ObjectOperation *)read_op)->add_handler(new C_OmapIter(omap_iter)); + ((::ObjectOperation *)read_op)->set_handler(new C_OmapIter(omap_iter)); *iter = omap_iter; tracepoint(librados, rados_read_op_omap_get_vals_exit, *iter); } @@ -3900,7 +3898,7 @@ extern "C" void _rados_read_op_omap_get_keys(rados_read_op_t read_op, ((::ObjectOperation *)read_op)->omap_get_keys( start_after ? start_after : "", max_return, &ctx->keys, nullptr, prval); - ((::ObjectOperation *)read_op)->add_handler(ctx); + ((::ObjectOperation *)read_op)->set_handler(ctx); *iter = omap_iter; tracepoint(librados, rados_read_op_omap_get_keys_exit, *iter); } @@ -3920,7 +3918,7 @@ extern "C" void _rados_read_op_omap_get_keys2(rados_read_op_t read_op, start_after ? start_after : "", max_return, &ctx->keys, (bool*)pmore, prval); - ((::ObjectOperation *)read_op)->add_handler(ctx); + ((::ObjectOperation *)read_op)->set_handler(ctx); *iter = omap_iter; tracepoint(librados, rados_read_op_omap_get_keys_exit, *iter); } @@ -3935,7 +3933,7 @@ static void internal_rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op ((::ObjectOperation *)read_op)->omap_get_vals_by_keys(to_get, &omap_iter->values, prval); - ((::ObjectOperation *)read_op)->add_handler(new C_OmapIter(omap_iter)); + ((::ObjectOperation *)read_op)->set_handler(new C_OmapIter(omap_iter)); *iter = omap_iter; } diff --git a/src/librados/librados_cxx.cc b/src/librados/librados_cxx.cc index 3702350984a..bc399ea82e5 100644 --- a/src/librados/librados_cxx.cc +++ b/src/librados/librados_cxx.cc @@ -21,6 +21,7 @@ #include "common/common_init.h" #include "common/TracepointProvider.h" #include "common/hobject.h" +#include "common/async/waiter.h" #include "include/rados/librados.h" #include "include/rados/librados.hpp" #include "include/types.h" @@ -155,10 +156,11 @@ void librados::ObjectOperation::assert_exists() { ceph_assert(impl); ::ObjectOperation *o = &impl->o; - o->stat(NULL, (ceph::real_time*) NULL, NULL); + o->stat(nullptr, nullptr, nullptr); } -void librados::ObjectOperation::exec(const char *cls, const char *method, bufferlist& inbl) +void librados::ObjectOperation::exec(const char *cls, const char *method, + bufferlist& inbl) { ceph_assert(impl); ::ObjectOperation *o = &impl->o; @@ -2008,7 +2010,7 @@ struct AioGetxattrDataPP { AioGetxattrDataPP(librados::AioCompletionImpl *c, bufferlist *_bl) : bl(_bl), completion(c) {} bufferlist *bl; - struct librados::C_AioCompleteAndSafe completion; + struct librados::CB_AioCompleteAndSafe completion; }; static void rados_aio_getxattr_completepp(rados_completion_t c, void *arg) { @@ -2017,7 +2019,7 @@ static void rados_aio_getxattr_completepp(rados_completion_t c, void *arg) { if (rc >= 0) { rc = cdata->bl->length(); } - cdata->completion.finish(rc); + cdata->completion(rc); delete cdata; } @@ -3040,29 +3042,27 @@ int librados::IoCtx::object_list(const ObjectCursor &start, ceph_assert(next != nullptr); result->clear(); - C_SaferCond cond; - hobject_t next_hash; - std::list obj_result; - io_ctx_impl->objecter->enumerate_objects( + ceph::async::waiter, + hobject_t> w; + io_ctx_impl->objecter->enumerate_objects( io_ctx_impl->poolid, io_ctx_impl->oloc.nspace, *((hobject_t*)start.c_cursor), *((hobject_t*)finish.c_cursor), result_item_count, filter, - &obj_result, - &next_hash, - &cond); + w); - int r = cond.wait(); - if (r < 0) { + auto [ec, obj_result, next_hash] = w.wait(); + if (ec) { next->set((rados_object_list_cursor)(new hobject_t(hobject_t::get_max()))); - return r; + return ceph::from_error_code(ec); } next->set((rados_object_list_cursor)(new hobject_t(next_hash))); - for (std::list::iterator i = obj_result.begin(); + for (auto i = obj_result.begin(); i != obj_result.end(); ++i) { ObjectItem oi; oi.oid = i->oid; diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 47b6d5ec648..08ebe2190cf 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -2212,7 +2212,7 @@ void CDir::_omap_commit(int op_prio) // don't create new dirfrag blindly if (!is_new() && !state_test(CDir::STATE_FRAGMENTING)) - op.stat(NULL, (ceph::real_time*) NULL, NULL); + op.stat(nullptr, nullptr, nullptr); if (!to_set.empty()) op.omap_set(to_set); @@ -2250,7 +2250,7 @@ void CDir::_omap_commit(int op_prio) // don't create new dirfrag blindly if (!is_new() && !state_test(CDir::STATE_FRAGMENTING)) - op.stat(NULL, (ceph::real_time*)NULL, NULL); + op.stat(nullptr, nullptr, nullptr); /* * save the header at the last moment.. If we were to send it off before other diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index cc858b0028b..c5c6359df3e 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -16,6 +16,7 @@ #include "common/debug.h" #include "common/errno.h" +#include "common/async/blocked_completion.h" #include "messages/MClientRequestForward.h" #include "messages/MMDSLoadTargets.h" @@ -488,7 +489,7 @@ MDSRank::MDSRank( boost::asio::io_context& ioc) : cct(msgr->cct), mds_lock(mds_lock_), clog(clog_), timer(timer_), mdsmap(mdsmap_), - objecter(new Objecter(g_ceph_context, msgr, monc_, nullptr, 0, 0)), + objecter(new Objecter(g_ceph_context, msgr, monc_, ioc, 0, 0)), damage_table(whoami_), sessionmap(this), op_tracker(g_ceph_context, g_conf()->mds_enable_op_tracker, g_conf()->osd_num_op_tracker_shard), @@ -1684,7 +1685,6 @@ void MDSRank::calc_recovery_set() dout(1) << " recovery set is " << rs << dendl; } - void MDSRank::replay_start() { dout(1) << "replay_start" << dendl; @@ -1695,17 +1695,20 @@ void MDSRank::replay_start() calc_recovery_set(); // Check if we need to wait for a newer OSD map before starting - Context *fin = new C_IO_Wrapper(this, new C_MDS_BootStart(this, MDS_BOOT_INITIAL)); - bool const ready = objecter->wait_for_map( - mdsmap->get_last_failure_osd_epoch(), - fin); + bool const ready = objecter->with_osdmap( + [this](const OSDMap& o) { + return o.get_epoch() >= mdsmap->get_last_failure_osd_epoch(); + }); if (ready) { - delete fin; boot_start(); } else { dout(1) << " waiting for osdmap " << mdsmap->get_last_failure_osd_epoch() << " (which blacklists prior instance)" << dendl; + Context *fin = new C_IO_Wrapper(this, new C_MDS_BootStart(this, MDS_BOOT_INITIAL)); + objecter->wait_for_map( + mdsmap->get_last_failure_osd_epoch(), + lambdafy(fin)); } } @@ -1757,10 +1760,11 @@ void MDSRank::standby_replay_restart() /* We are transitioning out of standby: wait for OSD map update before making final pass */ dout(1) << "standby_replay_restart (final takeover pass)" << dendl; - Context *fin = new C_IO_Wrapper(this, new C_MDS_StandbyReplayRestart(this)); - bool ready = objecter->wait_for_map(mdsmap->get_last_failure_osd_epoch(), fin); + bool ready = objecter->with_osdmap( + [this](const OSDMap& o) { + return o.get_epoch() >= mdsmap->get_last_failure_osd_epoch(); + }); if (ready) { - delete fin; mdlog->get_journaler()->reread_head_and_probe( new C_MDS_StandbyReplayRestartFinish( this, @@ -1771,8 +1775,11 @@ void MDSRank::standby_replay_restart() dout(1) << " opening open_file_table (async)" << dendl; mdcache->open_file_table.load(nullptr); } else { + auto fin = new C_IO_Wrapper(this, new C_MDS_StandbyReplayRestart(this)); dout(1) << " waiting for osdmap " << mdsmap->get_last_failure_osd_epoch() - << " (which blacklists prior instance)" << dendl; + << " (which blacklists prior instance)" << dendl; + objecter->wait_for_map(mdsmap->get_last_failure_osd_epoch(), + lambdafy(fin)); } } } @@ -2495,12 +2502,9 @@ void MDSRankDispatcher::handle_asok_command( std::lock_guard l(mds_lock); set_osd_epoch_barrier(target_epoch); } - C_SaferCond cond; - bool already_got = objecter->wait_for_map(target_epoch, &cond); - if (!already_got) { - dout(4) << __func__ << ": waiting for OSD epoch " << target_epoch << dendl; - cond.wait(); - } + boost::system::error_code ec; + dout(4) << __func__ << ": possibly waiting for OSD epoch " << target_epoch << dendl; + objecter->wait_for_map(target_epoch, ceph::async::use_blocked[ec]); } else if (command == "session ls" || command == "client ls") { std::lock_guard l(mds_lock); @@ -3433,7 +3437,7 @@ bool MDSRank::evict_client(int64_t session_id, Context *on_blacklist_done = new LambdaContext([this, fn](int r) { objecter->wait_for_latest_osdmap( - new C_OnFinisher( + lambdafy((new C_OnFinisher( new LambdaContext([this, fn](int r) { std::lock_guard l(mds_lock); auto epoch = objecter->with_osdmap([](const OSDMap &o){ @@ -3444,7 +3448,7 @@ bool MDSRank::evict_client(int64_t session_id, fn(); }), finisher) - ); + ))); }); dout(4) << "Sending mon blacklist command: " << cmd[0] << dendl; diff --git a/src/mds/PurgeQueue.h b/src/mds/PurgeQueue.h index 4ccc2ac1042..2bbcfeb4959 100644 --- a/src/mds/PurgeQueue.h +++ b/src/mds/PurgeQueue.h @@ -16,6 +16,7 @@ #define PURGE_QUEUE_H_ #include "include/compact_set.h" +#include "common/Finisher.h" #include "mds/MDSMap.h" #include "osdc/Journaler.h" diff --git a/src/mds/Server.cc b/src/mds/Server.cc index a32babaa283..a5c22c09532 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -5497,29 +5497,18 @@ int Server::check_layout_vxattr(MDRequestRef& mdr, if (req_epoch > epoch) { // well, our map is older. consult mds. - Context *fin = new C_IO_Wrapper(mds, new C_MDS_RetryRequest(mdcache, mdr)); - - if (!mds->objecter->wait_for_map(req_epoch, fin)) - return r; // wait, fin will retry this request later - - delete fin; - - // now we have at least as new a map as the client, try again. - mds->objecter->with_osdmap([&](const OSDMap& osdmap) { - r = parse_layout_vxattr(name, value, osdmap, layout); - epoch = osdmap.get_epoch(); - }); - - ceph_assert(epoch >= req_epoch); // otherwise wait_for_map() told a lie + auto fin = new C_IO_Wrapper(mds, new C_MDS_RetryRequest(mdcache, mdr)); + mds->objecter->wait_for_map(req_epoch, lambdafy(fin)); + return r; } else if (req_epoch == 0 && !mdr->waited_for_osdmap) { // For compatibility with client w/ old code, we still need get the // latest map. One day if COMPACT_VERSION of MClientRequest >=3, // we can remove those code. mdr->waited_for_osdmap = true; - mds->objecter->wait_for_latest_osdmap(new C_IO_Wrapper( - mds, new C_MDS_RetryRequest(mdcache, mdr))); + mds->objecter->wait_for_latest_osdmap(std::ref(*new C_IO_Wrapper( + mds, new C_MDS_RetryRequest(mdcache, mdr)))); return r; } } diff --git a/src/messages/MGetPoolStats.h b/src/messages/MGetPoolStats.h index 51bc134a21f..6b64e4feee2 100644 --- a/src/messages/MGetPoolStats.h +++ b/src/messages/MGetPoolStats.h @@ -21,10 +21,10 @@ class MGetPoolStats : public PaxosServiceMessage { public: uuid_d fsid; - std::list pools; + std::vector pools; MGetPoolStats() : PaxosServiceMessage{MSG_GETPOOLSTATS, 0} {} - MGetPoolStats(const uuid_d& f, ceph_tid_t t, std::list& ls, version_t l) : + MGetPoolStats(const uuid_d& f, ceph_tid_t t, std::vector& ls, version_t l) : PaxosServiceMessage{MSG_GETPOOLSTATS, l}, fsid(f), pools(ls) { set_tid(t); diff --git a/src/messages/MGetPoolStatsReply.h b/src/messages/MGetPoolStatsReply.h index ff474d3d5db..063b6f7cb28 100644 --- a/src/messages/MGetPoolStatsReply.h +++ b/src/messages/MGetPoolStatsReply.h @@ -22,7 +22,7 @@ class MGetPoolStatsReply : public PaxosServiceMessage { public: uuid_d fsid; - std::map pool_stats; + boost::container::flat_map pool_stats; bool per_pool = false; MGetPoolStatsReply() : PaxosServiceMessage{MSG_GETPOOLSTATSREPLY, 0, diff --git a/src/mgr/BaseMgrModule.cc b/src/mgr/BaseMgrModule.cc index 3ac66e9a9e7..391d632cc75 100644 --- a/src/mgr/BaseMgrModule.cc +++ b/src/mgr/BaseMgrModule.cc @@ -155,10 +155,9 @@ ceph_send_command(BaseMgrModule *self, PyObject *args) // can wait for those. auto c = new LambdaContext([command_c, self](int command_r){ self->py_modules->get_objecter().wait_for_latest_osdmap( - new LambdaContext([command_c, command_r](int wait_r){ - command_c->complete(command_r); - }) - ); + [command_c, command_r](boost::system::error_code) { + command_c->complete(command_r); + }); }); self->py_modules->get_monc().start_mon_command( @@ -186,9 +185,12 @@ ceph_send_command(BaseMgrModule *self, PyObject *args) {cmd_json}, {}, &tid, - &command_c->outbl, - &command_c->outs, - new C_OnFinisher(command_c, &self->py_modules->cmd_finisher)); + [command_c, f = &self->py_modules->cmd_finisher] + (boost::system::error_code ec, std::string s, ceph::buffer::list bl) { + command_c->outs = std::move(s); + command_c->outbl = std::move(bl); + f->queue(command_c); + }); } else if (std::string(type) == "mds") { int r = self->py_modules->get_client().mds_command( name, @@ -221,9 +223,12 @@ ceph_send_command(BaseMgrModule *self, PyObject *args) {cmd_json}, {}, &tid, - &command_c->outbl, - &command_c->outs, - new C_OnFinisher(command_c, &self->py_modules->cmd_finisher)); + [command_c, f = &self->py_modules->cmd_finisher] + (boost::system::error_code ec, std::string s, ceph::buffer::list bl) { + command_c->outs = std::move(s); + command_c->outbl = std::move(bl); + f->queue(command_c); + }); PyEval_RestoreThread(tstate); return nullptr; } else { diff --git a/src/mgr/MgrStandby.cc b/src/mgr/MgrStandby.cc index 7e30eac6181..79374518865 100644 --- a/src/mgr/MgrStandby.cc +++ b/src/mgr/MgrStandby.cc @@ -46,7 +46,7 @@ MgrStandby::MgrStandby(int argc, const char **argv) : "mgr", Messenger::get_pid_nonce(), 0)), - objecter{g_ceph_context, client_messenger.get(), &monc, NULL, 0, 0}, + objecter{g_ceph_context, client_messenger.get(), &monc, poolctx, 0, 0}, client{client_messenger.get(), &monc, &objecter}, mgrc(g_ceph_context, client_messenger.get(), &monc.monmap), log_client(g_ceph_context, client_messenger.get(), &monc.monmap, LogClient::NO_FLAGS), diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 39cf8d19bea..b410b82b57b 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -51,6 +51,7 @@ #include "common/ceph_releases.h" #include "common/ceph_time.h" #include "common/version.h" +#include "common/async/blocked_completion.h" #include "common/pick_address.h" #include "common/blkdev.h" #include "common/numa.h" @@ -277,7 +278,7 @@ OSDService::OSDService(OSD *osd, ceph::async::io_context_pool& poolctx) : poolctx(poolctx), objecter(make_unique(osd->client_messenger->cct, osd->objecter_messenger, - osd->monc, nullptr, 0, 0)), + osd->monc, poolctx, 0, 0)), m_objecter_finishers(cct->_conf->osd_objecter_finishers), watch_timer(osd->client_messenger->cct, watch_lock), next_notif_id(0), @@ -9914,9 +9915,8 @@ void OSD::get_latest_osdmap() { dout(10) << __func__ << " -- start" << dendl; - C_SaferCond cond; - service.objecter->wait_for_latest_osdmap(&cond); - cond.wait(); + boost::system::error_code ec; + service.objecter->wait_for_latest_osdmap(ceph::async::use_blocked[ec]); dout(10) << __func__ << " -- finish" << dendl; } diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 634e385c41b..f53c5a64610 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -571,7 +571,7 @@ private: mempool::osdmap::map pools; mempool::osdmap::map pool_name; mempool::osdmap::map> erasure_code_profiles; - mempool::osdmap::map name_pool; + mempool::osdmap::map> name_pool; std::shared_ptr< mempool::osdmap::vector > osd_uuid; mempool::osdmap::vector osd_xinfo; @@ -1299,7 +1299,7 @@ public: return new_purged_snaps; } - int64_t lookup_pg_pool_name(const std::string& name) const { + int64_t lookup_pg_pool_name(std::string_view name) const { auto p = name_pool.find(name); if (p == name_pool.end()) return -ENOENT; diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 416f5cafd76..3c235ecc682 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1689,7 +1689,7 @@ bool pg_pool_t::is_removed_snap(snapid_t s) const return removed_snaps.contains(s); } -snapid_t pg_pool_t::snap_exists(const char *s) const +snapid_t pg_pool_t::snap_exists(std::string_view s) const { for (auto p = snaps.cbegin(); p != snaps.cend(); ++p) if (p->second.name == s) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index c74f119745b..3169e63c369 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -256,10 +256,10 @@ namespace std { // does it go in. struct object_locator_t { // You specify either the hash or the key -- not both - int64_t pool; ///< pool id - std::string key; ///< key std::string (if non-empty) + std::int64_t pool; ///< pool id + std::string key; ///< key string (if non-empty) std::string nspace; ///< namespace - int64_t hash; ///< hash position (if >= 0) + std::int64_t hash; ///< hash position (if >= 0) explicit object_locator_t() : pool(-1), hash(-1) {} @@ -267,11 +267,11 @@ struct object_locator_t { : pool(po), hash(-1) {} explicit object_locator_t(int64_t po, int64_t ps) : pool(po), hash(ps) {} - explicit object_locator_t(int64_t po, std::string ns) + explicit object_locator_t(int64_t po, std::string_view ns) : pool(po), nspace(ns), hash(-1) {} - explicit object_locator_t(int64_t po, std::string ns, int64_t ps) + explicit object_locator_t(int64_t po, std::string_view ns, int64_t ps) : pool(po), nspace(ns), hash(ps) {} - explicit object_locator_t(int64_t po, std::string ns, std::string s) + explicit object_locator_t(int64_t po, std::string_view ns, std::string_view s) : pool(po), key(s), nspace(ns), hash(-1) {} explicit object_locator_t(const hobject_t& soid) : pool(soid.pool), key(soid.get_key()), nspace(soid.nspace), hash(-1) {} @@ -1699,7 +1699,7 @@ public: bool is_unmanaged_snaps_mode() const; bool is_removed_snap(snapid_t s) const; - snapid_t snap_exists(const char *s) const; + snapid_t snap_exists(std::string_view s) const; void add_snap(const char *n, utime_t stamp); uint64_t add_unmanaged_snap(bool preoctopus_compat); void remove_snap(snapid_t s); @@ -4964,20 +4964,24 @@ using pg_missing_t = pg_missing_set; using pg_missing_tracker_t = pg_missing_set; + + /** * pg list objects response format * */ -struct pg_nls_response_t { + +template +struct pg_nls_response_template { collection_list_handle_t handle; - std::list entries; + std::vector entries; void encode(ceph::buffer::list& bl) const { ENCODE_START(1, 1, bl); encode(handle, bl); __u32 n = (__u32)entries.size(); encode(n, bl); - for (std::list::const_iterator i = entries.begin(); i != entries.end(); ++i) { + for (auto i = entries.begin(); i != entries.end(); ++i) { encode(i->nspace, bl); encode(i->oid, bl); encode(i->locator, bl); @@ -4991,7 +4995,7 @@ struct pg_nls_response_t { decode(n, bl); entries.clear(); while (n--) { - librados::ListObjectImpl i; + T i; decode(i.nspace, bl); decode(i.oid, bl); decode(i.locator, bl); @@ -5002,7 +5006,7 @@ struct pg_nls_response_t { void dump(ceph::Formatter *f) const { f->dump_stream("handle") << handle; f->open_array_section("entries"); - for (std::list::const_iterator p = entries.begin(); p != entries.end(); ++p) { + for (auto p = entries.begin(); p != entries.end(); ++p) { f->open_object_section("object"); f->dump_string("namespace", p->nspace); f->dump_string("object", p->oid); @@ -5011,19 +5015,19 @@ struct pg_nls_response_t { } f->close_section(); } - static void generate_test_instances(std::list& o) { - o.push_back(new pg_nls_response_t); - o.push_back(new pg_nls_response_t); + static void generate_test_instances(std::list*>& o) { + o.push_back(new pg_nls_response_template); + o.push_back(new pg_nls_response_template); o.back()->handle = hobject_t(object_t("hi"), "key", 1, 2, -1, ""); o.back()->entries.push_back(librados::ListObjectImpl("", "one", "")); o.back()->entries.push_back(librados::ListObjectImpl("", "two", "twokey")); o.back()->entries.push_back(librados::ListObjectImpl("", "three", "")); - o.push_back(new pg_nls_response_t); + o.push_back(new pg_nls_response_template); o.back()->handle = hobject_t(object_t("hi"), "key", 3, 4, -1, ""); o.back()->entries.push_back(librados::ListObjectImpl("n1", "n1one", "")); o.back()->entries.push_back(librados::ListObjectImpl("n1", "n1two", "n1twokey")); o.back()->entries.push_back(librados::ListObjectImpl("n1", "n1three", "")); - o.push_back(new pg_nls_response_t); + o.push_back(new pg_nls_response_template); o.back()->handle = hobject_t(object_t("hi"), "key", 5, 6, -1, ""); o.back()->entries.push_back(librados::ListObjectImpl("", "one", "")); o.back()->entries.push_back(librados::ListObjectImpl("", "two", "twokey")); @@ -5034,6 +5038,8 @@ struct pg_nls_response_t { } }; +using pg_nls_response_t = pg_nls_response_template; + WRITE_CLASS_ENCODER(pg_nls_response_t) // For backwards compatibility with older OSD requests diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 2a8577f05bd..79426643400 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -12,13 +12,16 @@ * */ +#include #include #include "Objecter.h" #include "osd/OSDMap.h" +#include "osd/error_code.h" #include "Filer.h" #include "mon/MonClient.h" +#include "mon/error_code.h" #include "msg/Messenger.h" #include "msg/Message.h" @@ -52,6 +55,9 @@ #include "include/str_list.h" #include "common/errno.h" #include "common/EventTrace.h" +#include "common/async/waiter.h" +#include "error_code.h" + using std::list; using std::make_pair; @@ -82,6 +88,11 @@ using ceph::shunique_lock; using ceph::acquire_shared; using ceph::acquire_unique; +namespace bc = boost::container; +namespace bs = boost::system; +namespace ca = ceph::async; +namespace cb = ceph::buffer; + #define dout_subsys ceph_subsys_objecter #undef dout_prefix #define dout_prefix *_dout << messenger->get_myname() << ".objecter " @@ -165,6 +176,11 @@ enum { l_osdc_last, }; +namespace { +inline bs::error_code osdcode(int r) { + return (r < 0) ? bs::error_code(-r, osd_category()) : bs::error_code(); +} +} // config obs ---------------------------- @@ -180,55 +196,19 @@ public: int call(std::string_view command, const cmdmap_t& cmdmap, Formatter *f, std::ostream& ss, - ceph::buffer::list& out) override; + cb::list& out) override; }; -/** - * This is a more limited form of C_Contexts, but that requires - * a ceph_context which we don't have here. - */ -class ObjectOperation::C_TwoContexts : public Context { - Context *first; - Context *second; -public: - C_TwoContexts(Context *first, Context *second) : - first(first), second(second) {} - void finish(int r) override { - first->complete(r); - second->complete(r); - first = NULL; - second = NULL; - } - - ~C_TwoContexts() override { - delete first; - delete second; - } -}; - -void ObjectOperation::add_handler(Context *extra) { - size_t last = out_handler.size() - 1; - Context *orig = out_handler[last]; - if (orig) { - Context *wrapper = new C_TwoContexts(orig, extra); - out_handler[last] = wrapper; - } else { - out_handler[last] = extra; - } -} - -Objecter::OSDSession::unique_completion_lock Objecter::OSDSession::get_lock( - object_t& oid) +std::unique_lock Objecter::OSDSession::get_lock(object_t& oid) { if (oid.name.empty()) - return unique_completion_lock(); + return {}; static constexpr uint32_t HASH_PRIME = 1021; uint32_t h = ceph_str_hash_linux(oid.name.c_str(), oid.name.size()) % HASH_PRIME; - return unique_completion_lock(completion_locks[h % num_locks], - std::defer_lock); + return {completion_locks[h % num_locks], std::defer_lock}; } const char** Objecter::get_tracked_conf_keys() const @@ -385,7 +365,7 @@ void Objecter::init() } m_request_state_hook = new RequestStateHook(this); - AdminSocket* admin_socket = cct->get_admin_socket(); + auto admin_socket = cct->get_admin_socket(); int ret = admin_socket->register_command("objecter_requests", m_request_state_hook, "show in-progress osd requests"); @@ -432,57 +412,54 @@ void Objecter::shutdown() cct->_conf.remove_observer(this); wl.lock(); - map::iterator p; while (!osd_sessions.empty()) { - p = osd_sessions.begin(); + auto p = osd_sessions.begin(); close_session(p->second); } while(!check_latest_map_lingers.empty()) { - map::iterator i = check_latest_map_lingers.begin(); + auto i = check_latest_map_lingers.begin(); i->second->put(); check_latest_map_lingers.erase(i->first); } while(!check_latest_map_ops.empty()) { - map::iterator i = check_latest_map_ops.begin(); + auto i = check_latest_map_ops.begin(); i->second->put(); check_latest_map_ops.erase(i->first); } while(!check_latest_map_commands.empty()) { - map::iterator i - = check_latest_map_commands.begin(); + auto i = check_latest_map_commands.begin(); i->second->put(); check_latest_map_commands.erase(i->first); } while(!poolstat_ops.empty()) { - map::iterator i = poolstat_ops.begin(); + auto i = poolstat_ops.begin(); delete i->second; poolstat_ops.erase(i->first); } while(!statfs_ops.empty()) { - map::iterator i = statfs_ops.begin(); + auto i = statfs_ops.begin(); delete i->second; statfs_ops.erase(i->first); } while(!pool_ops.empty()) { - map::iterator i = pool_ops.begin(); + auto i = pool_ops.begin(); delete i->second; pool_ops.erase(i->first); } ldout(cct, 20) << __func__ << " clearing up homeless session..." << dendl; while(!homeless_session->linger_ops.empty()) { - std::map::iterator i - = homeless_session->linger_ops.begin(); + auto i = homeless_session->linger_ops.begin(); ldout(cct, 10) << " linger_op " << i->first << dendl; LingerOp *lop = i->second; { - OSDSession::unique_lock swl(homeless_session->lock); + std::unique_lock swl(homeless_session->lock); _session_linger_op_remove(homeless_session, lop); } linger_ops.erase(lop->linger_id); @@ -491,23 +468,22 @@ void Objecter::shutdown() } while(!homeless_session->ops.empty()) { - std::map::iterator i = homeless_session->ops.begin(); + auto i = homeless_session->ops.begin(); ldout(cct, 10) << " op " << i->first << dendl; - Op *op = i->second; + auto op = i->second; { - OSDSession::unique_lock swl(homeless_session->lock); + std::unique_lock swl(homeless_session->lock); _session_op_remove(homeless_session, op); } op->put(); } while(!homeless_session->command_ops.empty()) { - std::map::iterator i - = homeless_session->command_ops.begin(); + auto i = homeless_session->command_ops.begin(); ldout(cct, 10) << " command_op " << i->first << dendl; - CommandOp *cop = i->second; + auto cop = i->second; { - OSDSession::unique_lock swl(homeless_session->lock); + std::unique_lock swl(homeless_session->lock); _session_command_op_remove(homeless_session, cop); } cop->put(); @@ -533,7 +509,7 @@ void Objecter::shutdown() // This is safe because we guarantee no concurrent calls to // shutdown() with the ::initialized check at start. if (m_request_state_hook) { - AdminSocket* admin_socket = cct->get_admin_socket(); + auto admin_socket = cct->get_admin_socket(); admin_socket->unregister_commands(m_request_state_hook); delete m_request_state_hook; m_request_state_hook = NULL; @@ -541,14 +517,14 @@ void Objecter::shutdown() } void Objecter::_send_linger(LingerOp *info, - shunique_lock& sul) + ceph::shunique_lock& sul) { ceph_assert(sul.owns_lock() && sul.mutex() == &rwlock); vector opv; - Context *oncommit = NULL; - LingerOp::shared_lock watchl(info->watch_lock); - ceph::buffer::list *poutbl = NULL; + fu2::unique_function oncommit; + std::shared_lock watchl(info->watch_lock); + cb::list *poutbl = nullptr; if (info->registered && info->is_watch) { ldout(cct, 15) << "send_linger " << info->linger_id << " reconnect" << dendl; @@ -557,22 +533,26 @@ void Objecter::_send_linger(LingerOp *info, opv.back().op.watch.cookie = info->get_cookie(); opv.back().op.watch.op = CEPH_OSD_WATCH_OP_RECONNECT; opv.back().op.watch.gen = ++info->register_gen; - oncommit = new C_Linger_Reconnect(this, info); + oncommit = CB_Linger_Reconnect(this, info); } else { ldout(cct, 15) << "send_linger " << info->linger_id << " register" << dendl; opv = info->ops; - C_Linger_Commit *c = new C_Linger_Commit(this, info); + // TODO Augment ca::Completion with an equivalent of + // target so we can handle these cases better. + auto c = std::make_unique(this, info); if (!info->is_watch) { info->notify_id = 0; poutbl = &c->outbl; } - oncommit = c; + oncommit = [c = std::move(c)](bs::error_code ec) mutable { + std::move(*c)(ec); + }; } watchl.unlock(); - Op *o = new Op(info->target.base_oid, info->target.base_oloc, - opv, info->target.flags | CEPH_OSD_FLAG_READ, - oncommit, info->pobjver); + auto o = new Op(info->target.base_oid, info->target.base_oloc, + std::move(opv), info->target.flags | CEPH_OSD_FLAG_READ, + std::move(oncommit), info->pobjver); o->outbl = poutbl; o->snapid = info->snap; o->snapc = info->snapc; @@ -587,9 +567,9 @@ void Objecter::_send_linger(LingerOp *info, if (info->register_tid) { // repeat send. cancel old registration op, if any. - OSDSession::unique_lock sl(info->session->lock); + std::unique_lock sl(info->session->lock); if (info->session->ops.count(info->register_tid)) { - Op *o = info->session->ops[info->register_tid]; + auto o = info->session->ops[info->register_tid]; _op_cancel_map_check(o); _cancel_linger_op(o); } @@ -601,17 +581,20 @@ void Objecter::_send_linger(LingerOp *info, logger->inc(l_osdc_linger_send); } -void Objecter::_linger_commit(LingerOp *info, int r, ceph::buffer::list& outbl) +void Objecter::_linger_commit(LingerOp *info, bs::error_code ec, + cb::list& outbl) { - LingerOp::unique_lock wl(info->watch_lock); + std::unique_lock wl(info->watch_lock); ldout(cct, 10) << "_linger_commit " << info->linger_id << dendl; if (info->on_reg_commit) { - info->on_reg_commit->complete(r); - info->on_reg_commit = NULL; + info->on_reg_commit->defer(std::move(info->on_reg_commit), + ec, cb::list{}); + info->on_reg_commit.reset(); } - if (r < 0 && info->on_notify_finish) { - info->on_notify_finish->complete(r); - info->on_notify_finish = nullptr; + if (ec && info->on_notify_finish) { + info->on_notify_finish->defer(std::move(info->on_notify_finish), + ec, cb::list{}); + info->on_notify_finish.reset(); } // only tell the user the first time we do this @@ -626,55 +609,55 @@ void Objecter::_linger_commit(LingerOp *info, int r, ceph::buffer::list& outbl) ldout(cct, 10) << "_linger_commit notify_id=" << info->notify_id << dendl; } - catch (ceph::buffer::error& e) { + catch (cb::error& e) { } } } -struct C_DoWatchError : public Context { +class CB_DoWatchError { Objecter *objecter; - Objecter::LingerOp *info; - int err; - C_DoWatchError(Objecter *o, Objecter::LingerOp *i, int r) - : objecter(o), info(i), err(r) { - info->get(); + boost::intrusive_ptr info; + bs::error_code ec; +public: + CB_DoWatchError(Objecter *o, Objecter::LingerOp *i, + bs::error_code ec) + : objecter(o), info(i), ec(ec) { info->_queued_async(); } - void finish(int r) override { - Objecter::unique_lock wl(objecter->rwlock); + void operator()() { + std::unique_lock wl(objecter->rwlock); bool canceled = info->canceled; wl.unlock(); if (!canceled) { - info->watch_context->handle_error(info->get_cookie(), err); + info->handle(ec, 0, info->get_cookie(), 0, {}); } info->finished_async(); - info->put(); } }; -int Objecter::_normalize_watch_error(int r) +bs::error_code Objecter::_normalize_watch_error(bs::error_code ec) { // translate ENOENT -> ENOTCONN so that a delete->disconnection // notification and a failure to reconnect because we raced with // the delete appear the same to the user. - if (r == -ENOENT) - r = -ENOTCONN; - return r; + if (ec == bs::errc::no_such_file_or_directory) + ec = bs::error_code(ENOTCONN, osd_category()); + return ec; } -void Objecter::_linger_reconnect(LingerOp *info, int r) +void Objecter::_linger_reconnect(LingerOp *info, bs::error_code ec) { - ldout(cct, 10) << __func__ << " " << info->linger_id << " = " << r + ldout(cct, 10) << __func__ << " " << info->linger_id << " = " << ec << " (last_error " << info->last_error << ")" << dendl; - if (r < 0) { - LingerOp::unique_lock wl(info->watch_lock); + if (ec) { + std::unique_lock wl(info->watch_lock); if (!info->last_error) { - r = _normalize_watch_error(r); - info->last_error = r; - if (info->watch_context) { - finisher->queue(new C_DoWatchError(this, info, r)); + ec = _normalize_watch_error(ec); + info->last_error = ec; + if (info->handle) { + boost::asio::defer(finish_strand, CB_DoWatchError(this, info, ec)); } } wl.unlock(); @@ -705,10 +688,11 @@ void Objecter::_send_linger_ping(LingerOp *info) opv[0].op.watch.cookie = info->get_cookie(); opv[0].op.watch.op = CEPH_OSD_WATCH_OP_PING; opv[0].op.watch.gen = info->register_gen; - C_Linger_Ping *onack = new C_Linger_Ping(this, info); + Op *o = new Op(info->target.base_oid, info->target.base_oloc, - opv, info->target.flags | CEPH_OSD_FLAG_READ, - onack, NULL, NULL); + std::move(opv), info->target.flags | CEPH_OSD_FLAG_READ, + CB_Linger_Ping(this, info, now), + nullptr, nullptr); o->target = info->target; o->should_resend = false; _send_op_account(o); @@ -717,26 +701,25 @@ void Objecter::_send_linger_ping(LingerOp *info) _send_op(o); info->ping_tid = o->tid; - onack->sent = now; logger->inc(l_osdc_linger_ping); } -void Objecter::_linger_ping(LingerOp *info, int r, ceph::coarse_mono_time sent, +void Objecter::_linger_ping(LingerOp *info, bs::error_code ec, ceph::coarse_mono_time sent, uint32_t register_gen) { - LingerOp::unique_lock l(info->watch_lock); + std::unique_lock l(info->watch_lock); ldout(cct, 10) << __func__ << " " << info->linger_id - << " sent " << sent << " gen " << register_gen << " = " << r + << " sent " << sent << " gen " << register_gen << " = " << ec << " (last_error " << info->last_error << " register_gen " << info->register_gen << ")" << dendl; if (info->register_gen == register_gen) { - if (r == 0) { + if (!ec) { info->watch_valid_thru = sent; - } else if (r < 0 && !info->last_error) { - r = _normalize_watch_error(r); - info->last_error = r; - if (info->watch_context) { - finisher->queue(new C_DoWatchError(this, info, r)); + } else if (ec && !info->last_error) { + ec = _normalize_watch_error(ec); + info->last_error = ec; + if (info->handle) { + boost::asio::defer(finish_strand, CB_DoWatchError(this, info, ec)); } } } else { @@ -744,9 +727,10 @@ void Objecter::_linger_ping(LingerOp *info, int r, ceph::coarse_mono_time sent, } } -int Objecter::linger_check(LingerOp *info) +tl::expected Objecter::linger_check(LingerOp *info) { - LingerOp::shared_lock l(info->watch_lock); + std::shared_lock l(info->watch_lock); ceph::coarse_mono_time stamp = info->watch_valid_thru; if (!info->watch_pending_async.empty()) @@ -757,10 +741,9 @@ int Objecter::linger_check(LingerOp *info) << " err " << info->last_error << " age " << age << dendl; if (info->last_error) - return info->last_error; + return tl::unexpected(info->last_error); // return a safe upper bound (we are truncating to ms) - return - 1 + std::chrono::duration_cast(age).count(); + return age; } void Objecter::linger_cancel(LingerOp *info) @@ -776,7 +759,7 @@ void Objecter::_linger_cancel(LingerOp *info) ldout(cct, 20) << __func__ << " linger_id=" << info->linger_id << dendl; if (!info->canceled) { OSDSession *s = info->session; - OSDSession::unique_lock sl(s->lock); + std::unique_lock sl(s->lock); _session_linger_op_remove(s, info); sl.unlock(); @@ -797,18 +780,15 @@ Objecter::LingerOp *Objecter::linger_register(const object_t& oid, const object_locator_t& oloc, int flags) { - LingerOp *info = new LingerOp(this); + unique_lock l(rwlock); + // Acquire linger ID + auto info = new LingerOp(this, ++max_linger_id); info->target.base_oid = oid; info->target.base_oloc = oloc; if (info->target.base_oloc.key == oid) info->target.base_oloc.key.clear(); info->target.flags = flags; info->watch_valid_thru = ceph::coarse_mono_clock::now(); - - unique_lock l(rwlock); - - // Acquire linger ID - info->linger_id = ++max_linger_id; ldout(cct, 10) << __func__ << " info " << info << " linger_id " << info->linger_id << " cookie " << info->get_cookie() @@ -825,8 +805,8 @@ ceph_tid_t Objecter::linger_watch(LingerOp *info, ObjectOperation& op, const SnapContext& snapc, real_time mtime, - ceph::buffer::list& inbl, - Context *oncommit, + cb::list& inbl, + decltype(info->on_reg_commit)&& oncommit, version_t *objver) { info->is_watch = true; @@ -835,9 +815,8 @@ ceph_tid_t Objecter::linger_watch(LingerOp *info, info->target.flags |= CEPH_OSD_FLAG_WRITE; info->ops = op.ops; info->inbl = inbl; - info->poutbl = NULL; info->pobjver = objver; - info->on_reg_commit = oncommit; + info->on_reg_commit = std::move(oncommit); info->ctx_budget = take_linger_budget(info); @@ -845,34 +824,34 @@ ceph_tid_t Objecter::linger_watch(LingerOp *info, _linger_submit(info, sul); logger->inc(l_osdc_linger_active); + op.clear(); return info->linger_id; } ceph_tid_t Objecter::linger_notify(LingerOp *info, ObjectOperation& op, - snapid_t snap, ceph::buffer::list& inbl, - ceph::buffer::list *poutbl, - Context *onfinish, + snapid_t snap, cb::list& inbl, + decltype(LingerOp::on_reg_commit)&& onfinish, version_t *objver) { info->snap = snap; info->target.flags |= CEPH_OSD_FLAG_READ; info->ops = op.ops; info->inbl = inbl; - info->poutbl = poutbl; info->pobjver = objver; - info->on_reg_commit = onfinish; - + info->on_reg_commit = std::move(onfinish); info->ctx_budget = take_linger_budget(info); - + shunique_lock sul(rwlock, ceph::acquire_unique); _linger_submit(info, sul); logger->inc(l_osdc_linger_active); + op.clear(); return info->linger_id; } -void Objecter::_linger_submit(LingerOp *info, shunique_lock& sul) +void Objecter::_linger_submit(LingerOp *info, + ceph::shunique_lock& sul) { ceph_assert(sul.owns_lock() && sul.mutex() == &rwlock); ceph_assert(info->linger_id); @@ -885,7 +864,7 @@ void Objecter::_linger_submit(LingerOp *info, shunique_lock& sul) // Create LingerOp<->OSDSession relation int r = _get_session(info->target.osd, &s, sul); ceph_assert(r == 0); - OSDSession::unique_lock sl(s->lock); + unique_lock sl(s->lock); _session_linger_op_assign(s, info); sl.unlock(); put_session(s); @@ -893,18 +872,16 @@ void Objecter::_linger_submit(LingerOp *info, shunique_lock& sul) _send_linger(info, sul); } -struct C_DoWatchNotify : public Context { +struct CB_DoWatchNotify { Objecter *objecter; - Objecter::LingerOp *info; - MWatchNotify *msg; - C_DoWatchNotify(Objecter *o, Objecter::LingerOp *i, MWatchNotify *m) + boost::intrusive_ptr info; + boost::intrusive_ptr msg; + CB_DoWatchNotify(Objecter *o, Objecter::LingerOp *i, MWatchNotify *m) : objecter(o), info(i), msg(m) { - info->get(); info->_queued_async(); - msg->get(); } - void finish(int r) override { - objecter->_do_watch_notify(info, msg); + void operator()() { + objecter->_do_watch_notify(std::move(info), std::move(msg)); } }; @@ -920,12 +897,13 @@ void Objecter::handle_watch_notify(MWatchNotify *m) ldout(cct, 7) << __func__ << " cookie " << m->cookie << " dne" << dendl; return; } - LingerOp::unique_lock wl(info->watch_lock); + std::unique_lock wl(info->watch_lock); if (m->opcode == CEPH_WATCH_EVENT_DISCONNECT) { if (!info->last_error) { - info->last_error = -ENOTCONN; - if (info->watch_context) { - finisher->queue(new C_DoWatchError(this, info, -ENOTCONN)); + info->last_error = bs::error_code(ENOTCONN, osd_category()); + if (info->handle) { + boost::asio::defer(finish_strand, CB_DoWatchError(this, info, + info->last_error)); } } } else if (!info->is_watch) { @@ -937,19 +915,21 @@ void Objecter::handle_watch_notify(MWatchNotify *m) ldout(cct, 10) << __func__ << " reply notify " << m->notify_id << " != " << info->notify_id << ", ignoring" << dendl; } else if (info->on_notify_finish) { - info->notify_result_bl->claim(m->get_data()); - info->on_notify_finish->complete(m->return_code); + info->on_notify_finish->defer( + std::move(info->on_notify_finish), + osdcode(m->return_code), std::move(m->get_data())); // if we race with reconnect we might get a second notify; only // notify the caller once! - info->on_notify_finish = NULL; + info->on_notify_finish = nullptr; } } else { - finisher->queue(new C_DoWatchNotify(this, info, m)); + boost::asio::defer(finish_strand, CB_DoWatchNotify(this, info, m)); } } -void Objecter::_do_watch_notify(LingerOp *info, MWatchNotify *m) +void Objecter::_do_watch_notify(boost::intrusive_ptr info, + boost::intrusive_ptr m) { ldout(cct, 10) << __func__ << " " << *m << dendl; @@ -963,22 +943,19 @@ void Objecter::_do_watch_notify(LingerOp *info, MWatchNotify *m) // notify completion? ceph_assert(info->is_watch); - ceph_assert(info->watch_context); + ceph_assert(info->handle); ceph_assert(m->opcode != CEPH_WATCH_EVENT_DISCONNECT); l.unlock(); switch (m->opcode) { case CEPH_WATCH_EVENT_NOTIFY: - info->watch_context->handle_notify(m->notify_id, m->cookie, - m->notifier_gid, m->bl); + info->handle({}, m->notify_id, m->cookie, m->notifier_gid, std::move(m->bl)); break; } out: info->finished_async(); - info->put(); - m->put(); } bool Objecter::ms_dispatch(Message *m) @@ -1037,18 +1014,18 @@ void Objecter::_scan_requests( map& need_resend, list& need_resend_linger, map& need_resend_command, - shunique_lock& sul) + ceph::shunique_lock& sul) { ceph_assert(sul.owns_lock() && sul.mutex() == &rwlock); list unregister_lingers; - OSDSession::unique_lock sl(s->lock); + std::unique_lock sl(s->lock); // check for changed linger mappings (_before_ regular ops) - map::iterator lp = s->linger_ops.begin(); + auto lp = s->linger_ops.begin(); while (lp != s->linger_ops.end()) { - LingerOp *op = lp->second; + auto op = lp->second; ceph_assert(op->session == s); // check_linger_pool_dne() may touch linger_ops; prevent iterator // invalidation @@ -1081,7 +1058,7 @@ void Objecter::_scan_requests( } // check for changed request mappings - map::iterator p = s->ops.begin(); + auto p = s->ops.begin(); while (p != s->ops.end()) { Op *op = p->second; ++p; // check_op_pool_dne() may touch ops; prevent iterator invalidation @@ -1110,9 +1087,9 @@ void Objecter::_scan_requests( } // commands - map::iterator cp = s->command_ops.begin(); + auto cp = s->command_ops.begin(); while (cp != s->command_ops.end()) { - CommandOp *c = cp->second; + auto c = cp->second; ++cp; ldout(cct, 10) << " checking command " << c->tid << dendl; bool force_resend_writes = cluster_full; @@ -1141,7 +1118,7 @@ void Objecter::_scan_requests( sl.unlock(); - for (list::iterator iter = unregister_lingers.begin(); + for (auto iter = unregister_lingers.begin(); iter != unregister_lingers.end(); ++iter) { _linger_cancel(*iter); @@ -1151,7 +1128,7 @@ void Objecter::_scan_requests( void Objecter::handle_osd_map(MOSDMap *m) { - shunique_lock sul(rwlock, acquire_unique); + ceph::shunique_lock sul(rwlock, acquire_unique); if (!initialized) return; @@ -1168,8 +1145,7 @@ void Objecter::handle_osd_map(MOSDMap *m) bool was_pausewr = osdmap->test_flag(CEPH_OSDMAP_PAUSEWR) || cluster_full || _osdmap_has_pool_full(); map pool_full_map; - for (map::const_iterator it - = osdmap->get_pools().begin(); + for (auto it = osdmap->get_pools().begin(); it != osdmap->get_pools().end(); ++it) pool_full_map[it->first] = _osdmap_pool_full(it->second); @@ -1242,9 +1218,9 @@ void Objecter::handle_osd_map(MOSDMap *m) _scan_requests(homeless_session, skipped_map, cluster_full, &pool_full_map, need_resend, need_resend_linger, need_resend_command, sul); - for (map::iterator p = osd_sessions.begin(); + for (auto p = osd_sessions.begin(); p != osd_sessions.end(); ) { - OSDSession *s = p->second; + auto s = p->second; _scan_requests(s, skipped_map, cluster_full, &pool_full_map, need_resend, need_resend_linger, need_resend_command, sul); @@ -1263,7 +1239,7 @@ void Objecter::handle_osd_map(MOSDMap *m) } else { // first map. we want the full thing. if (m->maps.count(m->get_last())) { - for (map::iterator p = osd_sessions.begin(); + for (auto p = osd_sessions.begin(); p != osd_sessions.end(); ++p) { OSDSession *s = p->second; _scan_requests(s, false, false, NULL, need_resend, @@ -1314,10 +1290,10 @@ void Objecter::handle_osd_map(MOSDMap *m) } // resend requests - for (map::iterator p = need_resend.begin(); + for (auto p = need_resend.begin(); p != need_resend.end(); ++p) { - Op *op = p->second; - OSDSession *s = op->session; + auto op = p->second; + auto s = op->session; bool mapped_session = false; if (!s) { int r = _map_session(&op->target, &s, sul); @@ -1326,7 +1302,7 @@ void Objecter::handle_osd_map(MOSDMap *m) } else { get_session(s); } - OSDSession::unique_lock sl(s->lock); + std::unique_lock sl(s->lock); if (mapped_session) { _session_op_assign(s, op); } @@ -1342,7 +1318,7 @@ void Objecter::handle_osd_map(MOSDMap *m) sl.unlock(); put_session(s); } - for (list::iterator p = need_resend_linger.begin(); + for (auto p = need_resend_linger.begin(); p != need_resend_linger.end(); ++p) { LingerOp *op = *p; ceph_assert(op->session); @@ -1351,9 +1327,9 @@ void Objecter::handle_osd_map(MOSDMap *m) _send_linger(op, sul); } } - for (map::iterator p = need_resend_command.begin(); + for (auto p = need_resend_command.begin(); p != need_resend_command.end(); ++p) { - CommandOp *c = p->second; + auto c = p->second; if (c->target.osd >= 0) { _assign_command_session(c, sul); if (c->session && !c->session->is_homeless()) { @@ -1365,14 +1341,12 @@ void Objecter::handle_osd_map(MOSDMap *m) _dump_active(); // finish any Contexts that were waiting on a map update - map > >::iterator p = - waiting_for_map.begin(); + auto p = waiting_for_map.begin(); while (p != waiting_for_map.end() && p->first <= osdmap->get_epoch()) { //go through the list and call the onfinish methods - for (list >::iterator i = p->second.begin(); - i != p->second.end(); ++i) { - i->first->complete(i->second); + for (auto& [c, ec] : p->second) { + ca::post(std::move(c), ec); } waiting_for_map.erase(p++); } @@ -1438,21 +1412,20 @@ void Objecter::emit_blacklist_events(const OSDMap &old_osd_map, // op pool check -void Objecter::CB_Op_Map_Latest::operator()(boost::system::error_code e, +void Objecter::CB_Op_Map_Latest::operator()(bs::error_code e, version_t latest, version_t) { - if (e == boost::system::errc::resource_unavailable_try_again || - e == boost::system::errc::operation_canceled) + if (e == bs::errc::resource_unavailable_try_again || + e == bs::errc::operation_canceled) return; lgeneric_subdout(objecter->cct, objecter, 10) << "op_map_latest r=" << e << " tid=" << tid << " latest " << latest << dendl; - Objecter::unique_lock wl(objecter->rwlock); + unique_lock wl(objecter->rwlock); - map::iterator iter = - objecter->check_latest_map_ops.find(tid); + auto iter = objecter->check_latest_map_ops.find(tid); if (iter == objecter->check_latest_map_ops.end()) { lgeneric_subdout(objecter->cct, objecter, 10) << "op_map_latest op "<< tid << " not found" << dendl; @@ -1468,7 +1441,7 @@ void Objecter::CB_Op_Map_Latest::operator()(boost::system::error_code e, if (op->map_dne_bound == 0) op->map_dne_bound = latest; - OSDSession::unique_lock sl(op->session->lock, defer_lock); + unique_lock sl(op->session->lock, defer_lock); objecter->_check_op_pool_dne(op, &sl); op->put(); @@ -1522,7 +1495,7 @@ int Objecter::pool_snap_list(int64_t poolid, vector *snaps) const pg_pool_t *pi = osdmap->get_pg_pool(poolid); if (!pi) return -ENOENT; - for (map::const_iterator p = pi->snaps.begin(); + for (auto p = pi->snaps.begin(); p != pi->snaps.end(); ++p) { snaps->push_back(p->first); @@ -1531,7 +1504,7 @@ int Objecter::pool_snap_list(int64_t poolid, vector *snaps) } // sl may be unlocked. -void Objecter::_check_op_pool_dne(Op *op, unique_lock *sl) +void Objecter::_check_op_pool_dne(Op *op, std::unique_lock *sl) { // rwlock is locked unique @@ -1554,9 +1527,9 @@ void Objecter::_check_op_pool_dne(Op *op, unique_lock *sl) ldout(cct, 10) << "check_op_pool_dne tid " << op->tid << " concluding pool " << op->target.base_pgid.pool() << " dne" << dendl; - if (op->onfinish) { + if (op->has_completion()) { num_in_flight--; - op->onfinish->complete(-ENOENT); + op->complete(osdc_errc::pool_dne, -ENOENT); } OSDSession *s = op->session; @@ -1594,8 +1567,7 @@ void Objecter::_send_op_map_check(Op *op) void Objecter::_op_cancel_map_check(Op *op) { // rwlock is locked unique - map::iterator iter = - check_latest_map_ops.find(op->tid); + auto iter = check_latest_map_ops.find(op->tid); if (iter != check_latest_map_ops.end()) { Op *op = iter->second; op->put(); @@ -1605,25 +1577,24 @@ void Objecter::_op_cancel_map_check(Op *op) // linger pool check -void Objecter::CB_Linger_Map_Latest::operator()(boost::system::error_code e, +void Objecter::CB_Linger_Map_Latest::operator()(bs::error_code e, version_t latest, version_t) { - if (e == boost::system::errc::resource_unavailable_try_again || - e == boost::system::errc::operation_canceled) { + if (e == bs::errc::resource_unavailable_try_again || + e == bs::errc::operation_canceled) { // ignore callback; we will retry in resend_mon_ops() return; } unique_lock wl(objecter->rwlock); - map::iterator iter = - objecter->check_latest_map_lingers.find(linger_id); + auto iter = objecter->check_latest_map_lingers.find(linger_id); if (iter == objecter->check_latest_map_lingers.end()) { return; } - LingerOp *op = iter->second; + auto op = iter->second; objecter->check_latest_map_lingers.erase(iter); if (op->map_dne_bound == 0) @@ -1658,13 +1629,15 @@ void Objecter::_check_linger_pool_dne(LingerOp *op, bool *need_unregister) } if (op->map_dne_bound > 0) { if (osdmap->get_epoch() >= op->map_dne_bound) { - LingerOp::unique_lock wl{op->watch_lock}; + std::unique_lock wl{op->watch_lock}; if (op->on_reg_commit) { - op->on_reg_commit->complete(-ENOENT); + op->on_reg_commit->defer(std::move(op->on_reg_commit), + osdc_errc::pool_dne, cb::list{}); op->on_reg_commit = nullptr; } if (op->on_notify_finish) { - op->on_notify_finish->complete(-ENOENT); + op->on_notify_finish->defer(std::move(op->on_notify_finish), + osdc_errc::pool_dne, cb::list{}); op->on_notify_finish = nullptr; } *need_unregister = true; @@ -1688,8 +1661,7 @@ void Objecter::_linger_cancel_map_check(LingerOp *op) { // rwlock is locked unique - map::iterator iter = - check_latest_map_lingers.find(op->linger_id); + auto iter = check_latest_map_lingers.find(op->linger_id); if (iter != check_latest_map_lingers.end()) { LingerOp *op = iter->second; op->put(); @@ -1699,30 +1671,29 @@ void Objecter::_linger_cancel_map_check(LingerOp *op) // command pool check -void Objecter::CB_Command_Map_Latest::operator()(boost::system::error_code e, +void Objecter::CB_Command_Map_Latest::operator()(bs::error_code e, version_t latest, version_t) { - if (e == boost::system::errc::resource_unavailable_try_again || - e == boost::system::errc::operation_canceled) { + if (e == bs::errc::resource_unavailable_try_again || + e == bs::errc::operation_canceled) { // ignore callback; we will retry in resend_mon_ops() return; } unique_lock wl(objecter->rwlock); - map::iterator iter = - objecter->check_latest_map_commands.find(tid); + auto iter = objecter->check_latest_map_commands.find(tid); if (iter == objecter->check_latest_map_commands.end()) { return; } - CommandOp *c = iter->second; + auto c = iter->second; objecter->check_latest_map_commands.erase(iter); if (c->map_dne_bound == 0) c->map_dne_bound = latest; - OSDSession::unique_lock sul(c->session->lock); + unique_lock sul(c->session->lock); objecter->_check_command_map_dne(c); sul.unlock(); @@ -1740,7 +1711,8 @@ void Objecter::_check_command_map_dne(CommandOp *c) << dendl; if (c->map_dne_bound > 0) { if (osdmap->get_epoch() >= c->map_dne_bound) { - _finish_command(c, c->map_check_error, c->map_check_error_str); + _finish_command(c, osdcode(c->map_check_error), + std::move(c->map_check_error_str), {}); } } else { _send_command_map_check(c); @@ -1764,10 +1736,9 @@ void Objecter::_command_cancel_map_check(CommandOp *c) { // rwlock is locked uniqe - map::iterator iter = - check_latest_map_commands.find(c->tid); + auto iter = check_latest_map_commands.find(c->tid); if (iter != check_latest_map_commands.end()) { - CommandOp *c = iter->second; + auto c = iter->second; c->put(); check_latest_map_commands.erase(iter); } @@ -1780,7 +1751,8 @@ void Objecter::_command_cancel_map_check(CommandOp *c) * @returns 0 on success, or -EAGAIN if the lock context requires * promotion to write. */ -int Objecter::_get_session(int osd, OSDSession **session, shunique_lock& sul) +int Objecter::_get_session(int osd, OSDSession **session, + shunique_lock& sul) { ceph_assert(sul && sul.mutex() == &rwlock); @@ -1791,9 +1763,9 @@ int Objecter::_get_session(int osd, OSDSession **session, shunique_lock& sul) return 0; } - map::iterator p = osd_sessions.find(osd); + auto p = osd_sessions.find(osd); if (p != osd_sessions.end()) { - OSDSession *s = p->second; + auto s = p->second; s->get(); *session = s; ldout(cct, 20) << __func__ << " s=" << s << " osd=" << osd << " " @@ -1803,7 +1775,7 @@ int Objecter::_get_session(int osd, OSDSession **session, shunique_lock& sul) if (!sul.owns_lock()) { return -EAGAIN; } - OSDSession *s = new OSDSession(cct, osd); + auto s = new OSDSession(cct, osd); osd_sessions[osd] = s; s->con = messenger->connect_to_osd(osdmap->get_addrs(osd)); s->con->set_priv(RefCountedPtr{s}); @@ -1865,28 +1837,28 @@ void Objecter::close_session(OSDSession *s) s->con->mark_down(); logger->inc(l_osdc_osd_session_close); } - OSDSession::unique_lock sl(s->lock); + unique_lock sl(s->lock); std::list homeless_lingers; std::list homeless_commands; std::list homeless_ops; while (!s->linger_ops.empty()) { - std::map::iterator i = s->linger_ops.begin(); + auto i = s->linger_ops.begin(); ldout(cct, 10) << " linger_op " << i->first << dendl; homeless_lingers.push_back(i->second); _session_linger_op_remove(s, i->second); } while (!s->ops.empty()) { - std::map::iterator i = s->ops.begin(); + auto i = s->ops.begin(); ldout(cct, 10) << " op " << i->first << dendl; homeless_ops.push_back(i->second); _session_op_remove(s, i->second); } while (!s->command_ops.empty()) { - std::map::iterator i = s->command_ops.begin(); + auto i = s->command_ops.begin(); ldout(cct, 10) << " command_op " << i->first << dendl; homeless_commands.push_back(i->second); _session_command_op_remove(s, i->second); @@ -1898,16 +1870,16 @@ void Objecter::close_session(OSDSession *s) // Assign any leftover ops to the homeless session { - OSDSession::unique_lock hsl(homeless_session->lock); - for (std::list::iterator i = homeless_lingers.begin(); + unique_lock hsl(homeless_session->lock); + for (auto i = homeless_lingers.begin(); i != homeless_lingers.end(); ++i) { _session_linger_op_assign(homeless_session, *i); } - for (std::list::iterator i = homeless_ops.begin(); + for (auto i = homeless_ops.begin(); i != homeless_ops.end(); ++i) { _session_op_assign(homeless_session, *i); } - for (std::list::iterator i = homeless_commands.begin(); + for (auto i = homeless_commands.begin(); i != homeless_commands.end(); ++i) { _session_command_op_assign(homeless_session, *i); } @@ -1924,53 +1896,29 @@ void Objecter::wait_for_osd_map(epoch_t e) return; } - // Leave this since it goes with C_SafeCond - ceph::mutex lock = ceph::make_mutex(""); - ceph::condition_variable cond; - bool done; - std::unique_lock mlock{lock}; - C_SafeCond *context = new C_SafeCond(lock, cond, &done, NULL); - waiting_for_map[e].push_back(pair(context, 0)); + ca::waiter w; + waiting_for_map[e].emplace_back(OpCompletion::create( + service.get_executor(), + w.ref()), + bs::error_code{}); l.unlock(); - cond.wait(mlock, [&done] { return done; }); + w.wait(); } -struct CB_Objecter_GetVersion { - Objecter *objecter; - Context *fin; - CB_Objecter_GetVersion(Objecter *o, Context *c) : objecter(o), fin(c) {} - void operator()(boost::system::error_code e, version_t newest, version_t oldest) { - if (!e) { - objecter->get_latest_version(oldest, newest, fin); - } else if (e == boost::system::errc::resource_unavailable_try_again) { - // try again as instructed - objecter->wait_for_latest_osdmap(fin); - } else { - // it doesn't return any other error codes! - ceph_abort(); - } - } -}; - -void Objecter::wait_for_latest_osdmap(Context *fin) -{ - ldout(cct, 10) << __func__ << dendl; - monc->get_version("osdmap", CB_Objecter_GetVersion(this, fin)); -} - -void Objecter::get_latest_version(epoch_t oldest, epoch_t newest, Context *fin) +void Objecter::_get_latest_version(epoch_t oldest, epoch_t newest, + std::unique_ptr fin, + std::unique_lock&& l) { - unique_lock wl(rwlock); + ceph_assert(fin); if (osdmap->get_epoch() >= newest) { ldout(cct, 10) << __func__ << " latest " << newest << ", have it" << dendl; - wl.unlock(); - if (fin) - fin->complete(0); - return; + l.unlock(); + ca::defer(std::move(fin), bs::error_code{}); + } else { + ldout(cct, 10) << __func__ << " latest " << newest << ", waiting" << dendl; + _wait_for_new_map(std::move(fin), newest, bs::error_code{}); + l.unlock(); } - - ldout(cct, 10) << __func__ << " latest " << newest << ", waiting" << dendl; - _wait_for_new_map(fin, newest, 0); } void Objecter::maybe_request_map() @@ -1999,10 +1947,11 @@ void Objecter::_maybe_request_map() } } -void Objecter::_wait_for_new_map(Context *c, epoch_t epoch, int err) +void Objecter::_wait_for_new_map(std::unique_ptr c, epoch_t epoch, + bs::error_code ec) { // rwlock is locked unique - waiting_for_map[epoch].push_back(pair(c, err)); + waiting_for_map[epoch].emplace_back(std::move(c), ec); _maybe_request_map(); } @@ -2027,16 +1976,6 @@ bool Objecter::have_map(const epoch_t epoch) } } -bool Objecter::wait_for_map(epoch_t epoch, Context *c, int err) -{ - unique_lock wl(rwlock); - if (osdmap->get_epoch() >= epoch) { - return true; - } - _wait_for_new_map(c, epoch, err); - return false; -} - void Objecter::_kick_requests(OSDSession *session, map& lresend) { @@ -2048,8 +1987,7 @@ void Objecter::_kick_requests(OSDSession *session, // resend ops map resend; // resend in tid order - for (map::iterator p = session->ops.begin(); - p != session->ops.end();) { + for (auto p = session->ops.begin(); p != session->ops.end();) { Op *op = p->second; ++p; if (op->should_resend) { @@ -2069,7 +2007,7 @@ void Objecter::_kick_requests(OSDSession *session, // resend lingers logger->inc(l_osdc_linger_resend, session->linger_ops.size()); - for (map::iterator j = session->linger_ops.begin(); + for (auto j = session->linger_ops.begin(); j != session->linger_ops.end(); ++j) { LingerOp *op = j->second; op->get(); @@ -2080,7 +2018,7 @@ void Objecter::_kick_requests(OSDSession *session, // resend commands logger->inc(l_osdc_command_resend, session->command_ops.size()); map cresend; // resend in order - for (map::iterator k = session->command_ops.begin(); + for (auto k = session->command_ops.begin(); k != session->command_ops.end(); ++k) { cresend[k->first] = k->second; } @@ -2091,7 +2029,7 @@ void Objecter::_kick_requests(OSDSession *session, } void Objecter::_linger_ops_resend(map& lresend, - unique_lock& ul) + unique_lock& ul) { ceph_assert(ul.owns_lock()); shunique_lock sul(std::move(ul)); @@ -2138,15 +2076,13 @@ void Objecter::tick() unsigned laggy_ops = 0; - for (map::iterator siter = osd_sessions.begin(); + for (auto siter = osd_sessions.begin(); siter != osd_sessions.end(); ++siter) { - OSDSession *s = siter->second; - OSDSession::lock_guard l(s->lock); + auto s = siter->second; + scoped_lock l(s->lock); bool found = false; - for (map::iterator p = s->ops.begin(); - p != s->ops.end(); - ++p) { - Op *op = p->second; + for (auto p = s->ops.begin(); p != s->ops.end(); ++p) { + auto op = p->second; ceph_assert(op->session); if (op->stamp < cutoff) { ldout(cct, 2) << " tid " << p->first << " on osd." << op->session->osd @@ -2155,11 +2091,11 @@ void Objecter::tick() ++laggy_ops; } } - for (map::iterator p = s->linger_ops.begin(); + for (auto p = s->linger_ops.begin(); p != s->linger_ops.end(); ++p) { - LingerOp *op = p->second; - LingerOp::unique_lock wl(op->watch_lock); + auto op = p->second; + std::unique_lock wl(op->watch_lock); ceph_assert(op->session); ldout(cct, 10) << " pinging osd that serves lingering tid " << p->first << " (osd." << op->session->osd << ")" << dendl; @@ -2167,10 +2103,10 @@ void Objecter::tick() if (op->is_watch && op->registered && !op->last_error) _send_linger_ping(op); } - for (map::iterator p = s->command_ops.begin(); + for (auto p = s->command_ops.begin(); p != s->command_ops.end(); ++p) { - CommandOp *op = p->second; + auto op = p->second; ceph_assert(op->session); ldout(cct, 10) << " pinging osd that serves command tid " << p->first << " (osd." << op->session->osd << ")" << dendl; @@ -2189,9 +2125,7 @@ void Objecter::tick() if (!toping.empty()) { // send a ping to these osds, to ensure we detect any session resets // (osd reply message policy is lossy) - for (set::const_iterator i = toping.begin(); - i != toping.end(); - ++i) { + for (auto i = toping.begin(); i != toping.end(); ++i) { (*i)->con->send_message(new MPing); } } @@ -2209,41 +2143,34 @@ void Objecter::resend_mon_ops() ldout(cct, 10) << "resend_mon_ops" << dendl; - for (map::iterator p = poolstat_ops.begin(); - p != poolstat_ops.end(); - ++p) { + for (auto p = poolstat_ops.begin(); p != poolstat_ops.end(); ++p) { _poolstat_submit(p->second); logger->inc(l_osdc_poolstat_resend); } - for (map::iterator p = statfs_ops.begin(); - p != statfs_ops.end(); - ++p) { + for (auto p = statfs_ops.begin(); p != statfs_ops.end(); ++p) { _fs_stats_submit(p->second); logger->inc(l_osdc_statfs_resend); } - for (map::iterator p = pool_ops.begin(); - p != pool_ops.end(); - ++p) { + for (auto p = pool_ops.begin(); p != pool_ops.end(); ++p) { _pool_op_submit(p->second); logger->inc(l_osdc_poolop_resend); } - for (map::iterator p = check_latest_map_ops.begin(); + for (auto p = check_latest_map_ops.begin(); p != check_latest_map_ops.end(); ++p) { monc->get_version("osdmap", CB_Op_Map_Latest(this, p->second->tid)); } - for (map::iterator p = check_latest_map_lingers.begin(); + for (auto p = check_latest_map_lingers.begin(); p != check_latest_map_lingers.end(); ++p) { monc->get_version("osdmap", CB_Linger_Map_Latest(this, p->second->linger_id)); } - for (map::iterator p - = check_latest_map_commands.begin(); + for (auto p = check_latest_map_commands.begin(); p != check_latest_map_commands.end(); ++p) { monc->get_version("osdmap", CB_Command_Map_Latest(this, p->second->tid)); @@ -2262,7 +2189,8 @@ void Objecter::op_submit(Op *op, ceph_tid_t *ptid, int *ctx_budget) _op_submit_with_budget(op, rl, ptid, ctx_budget); } -void Objecter::_op_submit_with_budget(Op *op, shunique_lock& sul, +void Objecter::_op_submit_with_budget(Op *op, + shunique_lock& sul, ceph_tid_t *ptid, int *ctx_budget) { @@ -2300,7 +2228,7 @@ void Objecter::_send_op_account(Op *op) inflight_ops++; // add to gather set(s) - if (op->onfinish) { + if (op->has_completion()) { num_in_flight++; } else { ldout(cct, 20) << " note: not requesting reply" << dendl; @@ -2320,7 +2248,7 @@ void Objecter::_send_op_account(Op *op) if (op->target.flags & CEPH_OSD_FLAG_PGOP) logger->inc(l_osdc_op_pg); - for (vector::iterator p = op->ops.begin(); p != op->ops.end(); ++p) { + for (auto p = op->ops.begin(); p != op->ops.end(); ++p) { int code = l_osdc_osdop_other; switch (p->op.op) { case CEPH_OSD_OP_STAT: code = l_osdc_osdop_stat; break; @@ -2365,7 +2293,7 @@ void Objecter::_send_op_account(Op *op) } } -void Objecter::_op_submit(Op *op, shunique_lock& sul, ceph_tid_t *ptid) +void Objecter::_op_submit(Op *op, shunique_lock& sul, ceph_tid_t *ptid) { // rwlock is locked @@ -2430,7 +2358,7 @@ void Objecter::_op_submit(Op *op, shunique_lock& sul, ceph_tid_t *ptid) _maybe_request_map(); } - OSDSession::unique_lock sl(s->lock); + unique_lock sl(s->lock); if (op->tid == 0) op->tid = ++last_tid; @@ -2466,9 +2394,9 @@ int Objecter::op_cancel(OSDSession *s, ceph_tid_t tid, int r) { ceph_assert(initialized); - OSDSession::unique_lock sl(s->lock); + unique_lock sl(s->lock); - map::iterator p = s->ops.find(tid); + auto p = s->ops.find(tid); if (p == s->ops.end()) { ldout(cct, 10) << __func__ << " tid " << tid << " dne in session " << s->osd << dendl; @@ -2486,10 +2414,9 @@ int Objecter::op_cancel(OSDSession *s, ceph_tid_t tid, int r) ldout(cct, 10) << __func__ << " tid " << tid << " in session " << s->osd << dendl; Op *op = p->second; - if (op->onfinish) { + if (op->has_completion()) { num_in_flight--; - op->onfinish->complete(r); - op->onfinish = NULL; + op->complete(osdcode(r), r); } _op_cancel_map_check(op); _finish_op(op, r); @@ -2527,10 +2454,10 @@ int Objecter::_op_cancel(ceph_tid_t tid, int r) start: - for (map::iterator siter = osd_sessions.begin(); + for (auto siter = osd_sessions.begin(); siter != osd_sessions.end(); ++siter) { OSDSession *s = siter->second; - OSDSession::shared_lock sl(s->lock); + shared_lock sl(s->lock); if (s->ops.find(tid) != s->ops.end()) { sl.unlock(); ret = op_cancel(s, tid, r); @@ -2546,7 +2473,7 @@ start: << " not found in live sessions" << dendl; // Handle case where the op is in homeless session - OSDSession::shared_lock sl(homeless_session->lock); + shared_lock sl(homeless_session->lock); if (homeless_session->ops.find(tid) != homeless_session->ops.end()) { sl.unlock(); ret = op_cancel(homeless_session, tid, r); @@ -2574,11 +2501,11 @@ epoch_t Objecter::op_cancel_writes(int r, int64_t pool) std::vector to_cancel; bool found = false; - for (map::iterator siter = osd_sessions.begin(); + for (auto siter = osd_sessions.begin(); siter != osd_sessions.end(); ++siter) { OSDSession *s = siter->second; - OSDSession::shared_lock sl(s->lock); - for (map::iterator op_i = s->ops.begin(); + shared_lock sl(s->lock); + for (auto op_i = s->ops.begin(); op_i != s->ops.end(); ++op_i) { if (op_i->second->target.flags & CEPH_OSD_FLAG_WRITE && (pool == -1 || op_i->second->target.target_oloc.pool == pool)) { @@ -2587,9 +2514,7 @@ epoch_t Objecter::op_cancel_writes(int r, int64_t pool) } sl.unlock(); - for (std::vector::iterator titer = to_cancel.begin(); - titer != to_cancel.end(); - ++titer) { + for (auto titer = to_cancel.begin(); titer != to_cancel.end(); ++titer) { int cancel_result = op_cancel(s, *titer, r); // We hold rwlock across search and cancellation, so cancels // should always succeed @@ -2675,8 +2600,7 @@ bool Objecter::_osdmap_pool_full(const int64_t pool_id) const bool Objecter::_osdmap_has_pool_full() const { - for (map::const_iterator it - = osdmap->get_pools().begin(); + for (auto it = osdmap->get_pools().begin(); it != osdmap->get_pools().end(); ++it) { if (_osdmap_pool_full(it->second)) return true; @@ -2981,7 +2905,7 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change) } int Objecter::_map_session(op_target_t *target, OSDSession **s, - shunique_lock& sul) + shunique_lock& sul) { _calc_target(target, nullptr); return _get_session(target->osd, s, sul); @@ -3088,7 +3012,7 @@ void Objecter::_session_command_op_assign(OSDSession *to, CommandOp *op) } int Objecter::_recalc_linger_op_target(LingerOp *linger_op, - shunique_lock& sul) + shunique_lock& sul) { // rwlock is locked unique @@ -3107,7 +3031,7 @@ int Objecter::_recalc_linger_op_target(LingerOp *linger_op, // same time here is only safe because we are the only one that // takes two, and we are holding rwlock for write. Disable // lockdep because it doesn't know that. - OSDSession::unique_lock sl(s->lock); + unique_lock sl(s->lock); _session_linger_op_remove(linger_op->session, linger_op); _session_linger_op_assign(s, linger_op); } @@ -3123,8 +3047,8 @@ void Objecter::_cancel_linger_op(Op *op) ldout(cct, 15) << "cancel_op " << op->tid << dendl; ceph_assert(!op->should_resend); - if (op->onfinish) { - delete op->onfinish; + if (op->has_completion()) { + op->onfinish = nullptr; num_in_flight--; } @@ -3176,10 +3100,10 @@ MOSDOp *Objecter::_prepare_osd_op(Op *op) op->stamp = ceph::coarse_mono_clock::now(); hobject_t hobj = op->target.get_hobj(); - MOSDOp *m = new MOSDOp(client_inc, op->tid, - hobj, op->target.actual_pgid, - osdmap->get_epoch(), - flags, op->features); + auto m = new MOSDOp(client_inc, op->tid, + hobj, op->target.actual_pgid, + osdmap->get_epoch(), + flags, op->features); m->set_snapid(op->snapid); m->set_snap_seq(op->snapc.seq); @@ -3306,7 +3230,7 @@ int Objecter::calc_op_budget(const vector& ops) } void Objecter::_throttle_op(Op *op, - shunique_lock& sul, + shunique_lock& sul, int op_budget) { ceph_assert(sul && sul.mutex() == &rwlock); @@ -3360,7 +3284,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m) return; } - OSDSession::unique_lock sl(s->lock); + unique_lock sl(s->lock); map::iterator iter = s->ops.find(tid); if (iter == s->ops.end()) { @@ -3386,7 +3310,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m) if (retry_writes_after_first_reply && op->attempts == 1 && (op->target.flags & CEPH_OSD_FLAG_WRITE)) { ldout(cct, 7) << "retrying write after first reply: " << tid << dendl; - if (op->onfinish) { + if (op->has_completion()) { num_in_flight--; } _session_op_remove(s, op); @@ -3414,13 +3338,13 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m) // have, but that is better than doing callbacks out of order. } - Context *onfinish = 0; + decltype(op->onfinish) onfinish; int rc = m->get_result(); if (m->is_redirect_reply()) { ldout(cct, 5) << " got redirect reply; redirecting" << dendl; - if (op->onfinish) + if (op->has_completion()) num_in_flight--; _session_op_remove(s, op); sl.unlock(); @@ -3440,7 +3364,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m) if (rc == -EAGAIN) { ldout(cct, 7) << " got -EAGAIN, resubmitting" << dendl; - if (op->onfinish) + if (op->has_completion()) num_in_flight--; _session_op_remove(s, op); sl.unlock(); @@ -3478,7 +3402,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m) ldout(cct,10) << __func__ << " copying resulting " << bl.length() << " into existing ceph::buffer of length " << op->outbl->length() << dendl; - ceph::buffer::list t; + cb::list t; t.claim(*op->outbl); t.invalidate_crc(); // we're overwriting the raw buffers via c_str() bl.begin().copy(bl.length(), t.c_str()); @@ -3498,15 +3422,20 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m) << " != request ops " << op->ops << " from " << m->get_source_inst() << dendl; - vector::iterator pb = op->out_bl.begin(); - vector::iterator pr = op->out_rval.begin(); - vector::iterator ph = op->out_handler.begin(); + ceph_assert(op->ops.size() == op->out_bl.size()); + ceph_assert(op->ops.size() == op->out_rval.size()); + ceph_assert(op->ops.size() == op->out_ec.size()); + ceph_assert(op->ops.size() == op->out_handler.size()); + auto pb = op->out_bl.begin(); + auto pr = op->out_rval.begin(); + auto pe = op->out_ec.begin(); + auto ph = op->out_handler.begin(); ceph_assert(op->out_bl.size() == op->out_rval.size()); ceph_assert(op->out_bl.size() == op->out_handler.size()); - vector::iterator p = out_ops.begin(); + auto p = out_ops.begin(); for (unsigned i = 0; p != out_ops.end() && pb != op->out_bl.end(); - ++i, ++p, ++pb, ++pr, ++ph) { + ++i, ++p, ++pb, ++pr, ++pe, ++ph) { ldout(cct, 10) << " op " << i << " rval " << p->rval << " len " << p->outdata.length() << dendl; if (*pb) @@ -3515,20 +3444,24 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m) // can change it if e.g. decoding fails if (*pr) **pr = ceph_to_hostos_errno(p->rval); + if (*pe) + **pe = p->rval < 0 ? bs::error_code(-p->rval, osd_category()) : + bs::error_code(); if (*ph) { - ldout(cct, 10) << " op " << i << " handler " << *ph << dendl; - (*ph)->complete(ceph_to_hostos_errno(p->rval)); - *ph = NULL; + std::move((*ph))(p->rval < 0 ? + bs::error_code(-p->rval, osd_category()) : + bs::error_code(), + p->rval, p->outdata); } } // NOTE: we assume that since we only request ONDISK ever we will // only ever get back one (type of) ack ever. - if (op->onfinish) { + if (op->has_completion()) { num_in_flight--; - onfinish = op->onfinish; - op->onfinish = NULL; + onfinish = std::move(op->onfinish); + op->onfinish = nullptr; } logger->inc(l_osdc_op_reply); @@ -3547,8 +3480,8 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m) sl.unlock(); // do callbacks - if (onfinish) { - onfinish->complete(rc); + if (Op::has_completion(onfinish)) { + Op::complete(std::move(onfinish), osdcode(rc), rc); } if (completion_lock.mutex()) { completion_lock.unlock(); @@ -3577,7 +3510,7 @@ void Objecter::handle_osd_backoff(MOSDBackoff *m) get_session(s); - OSDSession::unique_lock sl(s->lock); + unique_lock sl(s->lock); switch (m->op) { case CEPH_OSD_BACKOFF_OP_BLOCK: @@ -3592,10 +3525,9 @@ void Objecter::handle_osd_backoff(MOSDBackoff *m) // ack with original backoff's epoch so that the osd can discard this if // there was a pg split. - Message *r = new MOSDBackoff(m->pgid, - m->map_epoch, - CEPH_OSD_BACKOFF_OP_ACK_BLOCK, - m->id, m->begin, m->end); + auto r = new MOSDBackoff(m->pgid, m->map_epoch, + CEPH_OSD_BACKOFF_OP_ACK_BLOCK, + m->id, m->begin, m->end); // this priority must match the MOSDOps from _prepare_osd_op r->set_priority(cct->_conf->osd_client_op_priority); con->send_message(r); @@ -3758,7 +3690,7 @@ void Objecter::list_nobjects(NListContext *list_context, Context *onfinish) op.pg_nls(list_context->max_entries, list_context->filter, list_context->pos, osdmap->get_epoch()); list_context->bl.clear(); - C_NList *onack = new C_NList(list_context, onfinish, this); + auto onack = new C_NList(list_context, onfinish, this); object_locator_t oloc(list_context->pool_id, list_context->nspace); // note current_pg in case we don't have (or lose) SORTBITWISE @@ -3780,7 +3712,7 @@ void Objecter::_nlist_reply(NListContext *list_context, int r, decode(response, iter); if (!iter.end()) { // we do this as legacy. - ceph::buffer::list legacy_extra_info; + cb::list legacy_extra_info; decode(legacy_extra_info, iter); } @@ -3809,7 +3741,9 @@ void Objecter::_nlist_reply(NListContext *list_context, int r, << ", handle " << response.handle << ", tentative new pos " << list_context->pos << dendl; if (response_size) { - list_context->list.splice(list_context->list.end(), response.entries); + std::move(response.entries.begin(), response.entries.end(), + std::back_inserter(list_context->list)); + response.entries.clear(); } if (list_context->list.size() >= list_context->max_entries) { @@ -3838,176 +3772,177 @@ void Objecter::put_nlist_context_budget(NListContext *list_context) // snapshots -int Objecter::create_pool_snap(int64_t pool, string& snap_name, - Context *onfinish) +void Objecter::create_pool_snap(int64_t pool, std::string_view snap_name, + decltype(PoolOp::onfinish)&& onfinish) { unique_lock wl(rwlock); ldout(cct, 10) << "create_pool_snap; pool: " << pool << "; snap: " << snap_name << dendl; const pg_pool_t *p = osdmap->get_pg_pool(pool); - if (!p) - return -EINVAL; - if (p->snap_exists(snap_name.c_str())) - return -EEXIST; + if (!p) { + onfinish->defer(std::move(onfinish), osdc_errc::pool_dne, cb::list{}); + return; + } + if (p->snap_exists(snap_name)) { + onfinish->defer(std::move(onfinish), osdc_errc::snapshot_exists, + cb::list{}); + return; + } - PoolOp *op = new PoolOp; - if (!op) - return -ENOMEM; + auto op = new PoolOp; op->tid = ++last_tid; op->pool = pool; op->name = snap_name; - op->onfinish = onfinish; + op->onfinish = std::move(onfinish); op->pool_op = POOL_OP_CREATE_SNAP; pool_ops[op->tid] = op; pool_op_submit(op); - - return 0; } -struct C_SelfmanagedSnap : public Context { - ceph::buffer::list bl; - snapid_t *psnapid; - Context *fin; - C_SelfmanagedSnap(snapid_t *ps, Context *f) : psnapid(ps), fin(f) {} - void finish(int r) override { - if (r == 0) { +struct CB_SelfmanagedSnap { + std::unique_ptr> fin; + CB_SelfmanagedSnap(decltype(fin)&& fin) + : fin(std::move(fin)) {} + void operator()(bs::error_code ec, const cb::list& bl) { + snapid_t snapid = 0; + if (!ec) { try { - auto p = bl.cbegin(); - decode(*psnapid, p); - } catch (ceph::buffer::error&) { - r = -EIO; + auto p = bl.cbegin(); + decode(snapid, p); + } catch (const cb::error& e) { + ec = e.code(); } } - fin->complete(r); + fin->defer(std::move(fin), ec, snapid); } }; -int Objecter::allocate_selfmanaged_snap(int64_t pool, snapid_t *psnapid, - Context *onfinish) +void Objecter::allocate_selfmanaged_snap( + int64_t pool, + std::unique_ptr> onfinish) { unique_lock wl(rwlock); ldout(cct, 10) << "allocate_selfmanaged_snap; pool: " << pool << dendl; - PoolOp *op = new PoolOp; - if (!op) return -ENOMEM; + auto op = new PoolOp; op->tid = ++last_tid; op->pool = pool; - C_SelfmanagedSnap *fin = new C_SelfmanagedSnap(psnapid, onfinish); - op->onfinish = fin; - op->blp = &fin->bl; + op->onfinish = PoolOp::OpComp::create( + service.get_executor(), + CB_SelfmanagedSnap(std::move(onfinish))); op->pool_op = POOL_OP_CREATE_UNMANAGED_SNAP; pool_ops[op->tid] = op; pool_op_submit(op); - return 0; } -int Objecter::delete_pool_snap(int64_t pool, string& snap_name, - Context *onfinish) +void Objecter::delete_pool_snap( + int64_t pool, std::string_view snap_name, + decltype(PoolOp::onfinish)&& onfinish) { unique_lock wl(rwlock); ldout(cct, 10) << "delete_pool_snap; pool: " << pool << "; snap: " << snap_name << dendl; const pg_pool_t *p = osdmap->get_pg_pool(pool); - if (!p) - return -EINVAL; - if (!p->snap_exists(snap_name.c_str())) - return -ENOENT; + if (!p) { + onfinish->defer(std::move(onfinish), osdc_errc::pool_dne, cb::list{}); + return; + } - PoolOp *op = new PoolOp; - if (!op) - return -ENOMEM; + if (!p->snap_exists(snap_name)) { + onfinish->defer(std::move(onfinish), osdc_errc::snapshot_dne, cb::list{}); + return; + } + + auto op = new PoolOp; op->tid = ++last_tid; op->pool = pool; op->name = snap_name; - op->onfinish = onfinish; + op->onfinish = std::move(onfinish); op->pool_op = POOL_OP_DELETE_SNAP; pool_ops[op->tid] = op; pool_op_submit(op); - - return 0; } -int Objecter::delete_selfmanaged_snap(int64_t pool, snapid_t snap, - Context *onfinish) +void Objecter::delete_selfmanaged_snap(int64_t pool, snapid_t snap, + decltype(PoolOp::onfinish)&& onfinish) { unique_lock wl(rwlock); ldout(cct, 10) << "delete_selfmanaged_snap; pool: " << pool << "; snap: " << snap << dendl; - PoolOp *op = new PoolOp; - if (!op) return -ENOMEM; + auto op = new PoolOp; op->tid = ++last_tid; op->pool = pool; - op->onfinish = onfinish; + op->onfinish = std::move(onfinish); op->pool_op = POOL_OP_DELETE_UNMANAGED_SNAP; op->snapid = snap; pool_ops[op->tid] = op; pool_op_submit(op); - - return 0; } -int Objecter::create_pool(string& name, Context *onfinish, - int crush_rule) +void Objecter::create_pool(std::string_view name, + decltype(PoolOp::onfinish)&& onfinish, + int crush_rule) { unique_lock wl(rwlock); ldout(cct, 10) << "create_pool name=" << name << dendl; - if (osdmap->lookup_pg_pool_name(name) >= 0) - return -EEXIST; + if (osdmap->lookup_pg_pool_name(name) >= 0) { + onfinish->defer(std::move(onfinish), osdc_errc::pool_exists, cb::list{}); + return; + } - PoolOp *op = new PoolOp; - if (!op) - return -ENOMEM; + auto op = new PoolOp; op->tid = ++last_tid; op->pool = 0; op->name = name; - op->onfinish = onfinish; + op->onfinish = std::move(onfinish); op->pool_op = POOL_OP_CREATE; pool_ops[op->tid] = op; op->crush_rule = crush_rule; pool_op_submit(op); - - return 0; } -int Objecter::delete_pool(int64_t pool, Context *onfinish) +void Objecter::delete_pool(int64_t pool, + decltype(PoolOp::onfinish)&& onfinish) { unique_lock wl(rwlock); ldout(cct, 10) << "delete_pool " << pool << dendl; if (!osdmap->have_pg_pool(pool)) - return -ENOENT; - - _do_delete_pool(pool, onfinish); - return 0; + onfinish->defer(std::move(onfinish), osdc_errc::pool_dne, cb::list{}); + else + _do_delete_pool(pool, std::move(onfinish)); } -int Objecter::delete_pool(const string &pool_name, Context *onfinish) +void Objecter::delete_pool(std::string_view pool_name, + decltype(PoolOp::onfinish)&& onfinish) { unique_lock wl(rwlock); ldout(cct, 10) << "delete_pool " << pool_name << dendl; int64_t pool = osdmap->lookup_pg_pool_name(pool_name); if (pool < 0) - return pool; - - _do_delete_pool(pool, onfinish); - return 0; + // This only returns one error: -ENOENT. + onfinish->defer(std::move(onfinish), osdc_errc::pool_dne, cb::list{}); + else + _do_delete_pool(pool, std::move(onfinish)); } -void Objecter::_do_delete_pool(int64_t pool, Context *onfinish) +void Objecter::_do_delete_pool(int64_t pool, + decltype(PoolOp::onfinish)&& onfinish) + { - PoolOp *op = new PoolOp; + auto op = new PoolOp; op->tid = ++last_tid; op->pool = pool; op->name = "delete"; - op->onfinish = onfinish; + op->onfinish = std::move(onfinish); op->pool_op = POOL_OP_DELETE; pool_ops[op->tid] = op; pool_op_submit(op); @@ -4029,9 +3964,9 @@ void Objecter::_pool_op_submit(PoolOp *op) // rwlock is locked unique ldout(cct, 10) << "pool_op_submit " << op->tid << dendl; - MPoolOp *m = new MPoolOp(monc->get_fsid(), op->tid, op->pool, - op->name, op->pool_op, - last_seen_osdmap_version); + auto m = new MPoolOp(monc->get_fsid(), op->tid, op->pool, + op->name, op->pool_op, + last_seen_osdmap_version); if (op->snapid) m->snapid = op->snapid; if (op->crush_rule) m->crush_rule = op->crush_rule; monc->send_mon_message(m); @@ -4042,13 +3977,15 @@ void Objecter::_pool_op_submit(PoolOp *op) /** * Handle a reply to a PoolOp message. Check that we sent the message - * and give the caller responsibility for the returned ceph::buffer::list. + * and give the caller responsibility for the returned cb::list. * Then either call the finisher or stash the PoolOp, depending on if we * have a new enough map. * Lastly, clean up the message and PoolOp. */ void Objecter::handle_pool_op_reply(MPoolOpReply *m) { + int rc = m->replyCode; + auto ec = rc < 0 ? bs::error_code(-rc, mon_category()) : bs::error_code(); FUNCTRACE(cct); shunique_lock sul(rwlock, acquire_shared); if (!initialized) { @@ -4059,13 +3996,13 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m) ldout(cct, 10) << "handle_pool_op_reply " << *m << dendl; ceph_tid_t tid = m->get_tid(); - map::iterator iter = pool_ops.find(tid); + auto iter = pool_ops.find(tid); if (iter != pool_ops.end()) { PoolOp *op = iter->second; ldout(cct, 10) << "have request " << tid << " at " << op << " Op: " << ceph_pool_op_name(op->pool_op) << dendl; - if (op->blp) - op->blp->claim(m->response_data); + cb::list bl; + bl.claim(m->response_data); if (m->version > last_seen_osdmap_version) last_seen_osdmap_version = m->version; if (osdmap->get_epoch() < m->epoch) { @@ -4079,19 +4016,27 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m) if (osdmap->get_epoch() < m->epoch) { ldout(cct, 20) << "waiting for client to reach epoch " << m->epoch << " before calling back" << dendl; - _wait_for_new_map(op->onfinish, m->epoch, m->replyCode); + _wait_for_new_map(OpCompletion::create( + service.get_executor(), + [o = std::move(op->onfinish), + bl = std::move(bl)]( + bs::error_code ec) mutable { + o->defer(std::move(o), ec, bl); + }), + m->epoch, + ec); } else { // map epoch changed, probably because a MOSDMap message // sneaked in. Do caller-specified callback now or else // we lose it forever. ceph_assert(op->onfinish); - op->onfinish->complete(m->replyCode); + op->onfinish->defer(std::move(op->onfinish), ec, std::move(bl)); } } else { ceph_assert(op->onfinish); - op->onfinish->complete(m->replyCode); + op->onfinish->defer(std::move(op->onfinish), ec, std::move(bl)); } - op->onfinish = NULL; + op->onfinish = nullptr; if (!sul.owns_lock()) { sul.unlock(); sul.lock(); @@ -4118,7 +4063,7 @@ int Objecter::pool_op_cancel(ceph_tid_t tid, int r) unique_lock wl(rwlock); - map::iterator it = pool_ops.find(tid); + auto it = pool_ops.find(tid); if (it == pool_ops.end()) { ldout(cct, 10) << __func__ << " tid " << tid << " dne" << dendl; return -ENOENT; @@ -4128,7 +4073,7 @@ int Objecter::pool_op_cancel(ceph_tid_t tid, int r) PoolOp *op = it->second; if (op->onfinish) - op->onfinish->complete(r); + op->onfinish->defer(std::move(op->onfinish), osdcode(r), cb::list{}); _finish_pool_op(op, r); return 0; @@ -4149,19 +4094,16 @@ void Objecter::_finish_pool_op(PoolOp *op, int r) // pool stats -void Objecter::get_pool_stats(list& pools, - map *result, - bool *per_pool, - Context *onfinish) +void Objecter::get_pool_stats( + const std::vector& pools, + decltype(PoolStatOp::onfinish)&& onfinish) { ldout(cct, 10) << "get_pool_stats " << pools << dendl; - PoolStatOp *op = new PoolStatOp; + auto op = new PoolStatOp; op->tid = ++last_tid; op->pools = pools; - op->pool_stats = result; - op->per_pool = per_pool; - op->onfinish = onfinish; + op->onfinish = std::move(onfinish); if (mon_timeout > timespan(0)) { op->ontimeout = timer.add_event(mon_timeout, [this, op]() { @@ -4202,16 +4144,15 @@ void Objecter::handle_get_pool_stats_reply(MGetPoolStatsReply *m) return; } - map::iterator iter = poolstat_ops.find(tid); + auto iter = poolstat_ops.find(tid); if (iter != poolstat_ops.end()) { PoolStatOp *op = poolstat_ops[tid]; ldout(cct, 10) << "have request " << tid << " at " << op << dendl; - *op->pool_stats = m->pool_stats; - *op->per_pool = m->per_pool; if (m->version > last_seen_pgmap_version) { last_seen_pgmap_version = m->version; } - op->onfinish->complete(0); + op->onfinish->defer(std::move(op->onfinish), bs::error_code{}, + std::move(m->pool_stats), m->per_pool); _finish_pool_stat_op(op, 0); } else { ldout(cct, 10) << "unknown request " << tid << dendl; @@ -4226,7 +4167,7 @@ int Objecter::pool_stat_op_cancel(ceph_tid_t tid, int r) unique_lock wl(rwlock); - map::iterator it = poolstat_ops.find(tid); + auto it = poolstat_ops.find(tid); if (it == poolstat_ops.end()) { ldout(cct, 10) << __func__ << " tid " << tid << " dne" << dendl; return -ENOENT; @@ -4234,9 +4175,10 @@ int Objecter::pool_stat_op_cancel(ceph_tid_t tid, int r) ldout(cct, 10) << __func__ << " tid " << tid << dendl; - PoolStatOp *op = it->second; + auto op = it->second; if (op->onfinish) - op->onfinish->complete(r); + op->onfinish->defer(std::move(op->onfinish), osdcode(r), + bc::flat_map{}, false); _finish_pool_stat_op(op, r); return 0; } @@ -4254,18 +4196,16 @@ void Objecter::_finish_pool_stat_op(PoolStatOp *op, int r) delete op; } -void Objecter::get_fs_stats(ceph_statfs& result, - boost::optional data_pool, - Context *onfinish) +void Objecter::get_fs_stats(boost::optional poolid, + decltype(StatfsOp::onfinish)&& onfinish) { ldout(cct, 10) << "get_fs_stats" << dendl; unique_lock l(rwlock); - StatfsOp *op = new StatfsOp; + auto op = new StatfsOp; op->tid = ++last_tid; - op->stats = &result; - op->data_pool = data_pool; - op->onfinish = onfinish; + op->data_pool = poolid; + op->onfinish = std::move(onfinish); if (mon_timeout > timespan(0)) { op->ontimeout = timer.add_event(mon_timeout, [this, op]() { @@ -4308,10 +4248,9 @@ void Objecter::handle_fs_stats_reply(MStatfsReply *m) if (statfs_ops.count(tid)) { StatfsOp *op = statfs_ops[tid]; ldout(cct, 10) << "have request " << tid << " at " << op << dendl; - *(op->stats) = m->h.st; if (m->h.version > last_seen_pgmap_version) last_seen_pgmap_version = m->h.version; - op->onfinish->complete(0); + op->onfinish->defer(std::move(op->onfinish), bs::error_code{}, m->h.st); _finish_statfs_op(op, 0); } else { ldout(cct, 10) << "unknown request " << tid << dendl; @@ -4326,7 +4265,7 @@ int Objecter::statfs_op_cancel(ceph_tid_t tid, int r) unique_lock wl(rwlock); - map::iterator it = statfs_ops.find(tid); + auto it = statfs_ops.find(tid); if (it == statfs_ops.end()) { ldout(cct, 10) << __func__ << " tid " << tid << " dne" << dendl; return -ENOENT; @@ -4334,9 +4273,9 @@ int Objecter::statfs_op_cancel(ceph_tid_t tid, int r) ldout(cct, 10) << __func__ << " tid " << tid << dendl; - StatfsOp *op = it->second; + auto op = it->second; if (op->onfinish) - op->onfinish->complete(r); + op->onfinish->defer(std::move(op->onfinish), osdcode(r), ceph_statfs{}); _finish_statfs_op(op, r); return 0; } @@ -4357,16 +4296,16 @@ void Objecter::_finish_statfs_op(StatfsOp *op, int r) // scatter/gather void Objecter::_sg_read_finish(vector& extents, - vector& resultbl, - ceph::buffer::list *bl, Context *onfinish) + vector& resultbl, + cb::list *bl, Context *onfinish) { // all done ldout(cct, 15) << "_sg_read_finish" << dendl; if (extents.size() > 1) { Striper::StripedReadResult r; - vector::iterator bit = resultbl.begin(); - for (vector::iterator eit = extents.begin(); + auto bit = resultbl.begin(); + for (auto eit = extents.begin(); eit != extents.end(); ++eit, ++bit) { r.add_partial_result(cct, *bit, eit->buffer_extents); @@ -4418,7 +4357,7 @@ bool Objecter::ms_handle_reset(Connection *con) return false; } map lresend; - OSDSession::unique_lock sl(session->lock); + unique_lock sl(session->lock); _reopen_session(session); _kick_requests(session, lresend); sl.unlock(); @@ -4466,9 +4405,7 @@ void Objecter::op_target_t::dump(Formatter *f) const void Objecter::_dump_active(OSDSession *s) { - for (map::iterator p = s->ops.begin(); - p != s->ops.end(); - ++p) { + for (auto p = s->ops.begin(); p != s->ops.end(); ++p) { Op *op = p->second; ldout(cct, 20) << op->tid << "\t" << op->target.pgid << "\tosd." << (op->session ? op->session->osd : -1) @@ -4481,10 +4418,10 @@ void Objecter::_dump_active() { ldout(cct, 20) << "dump_active .. " << num_homeless_ops << " homeless" << dendl; - for (map::iterator siter = osd_sessions.begin(); + for (auto siter = osd_sessions.begin(); siter != osd_sessions.end(); ++siter) { - OSDSession *s = siter->second; - OSDSession::shared_lock sl(s->lock); + auto s = siter->second; + shared_lock sl(s->lock); _dump_active(s); sl.unlock(); } @@ -4513,9 +4450,7 @@ void Objecter::dump_requests(Formatter *fmt) void Objecter::_dump_ops(const OSDSession *s, Formatter *fmt) { - for (map::const_iterator p = s->ops.begin(); - p != s->ops.end(); - ++p) { + for (auto p = s->ops.begin(); p != s->ops.end(); ++p) { Op *op = p->second; auto age = std::chrono::duration(ceph::coarse_mono_clock::now() - op->stamp); fmt->open_object_section("op"); @@ -4529,9 +4464,7 @@ void Objecter::_dump_ops(const OSDSession *s, Formatter *fmt) fmt->dump_stream("mtime") << op->mtime; fmt->open_array_section("osd_ops"); - for (vector::const_iterator it = op->ops.begin(); - it != op->ops.end(); - ++it) { + for (auto it = op->ops.begin(); it != op->ops.end(); ++it) { fmt->dump_stream("osd_op") << *it; } fmt->close_section(); // osd_ops array @@ -4544,10 +4477,10 @@ void Objecter::dump_ops(Formatter *fmt) { // Read-lock on Objecter held fmt->open_array_section("ops"); - for (map::const_iterator siter = osd_sessions.begin(); + for (auto siter = osd_sessions.begin(); siter != osd_sessions.end(); ++siter) { OSDSession *s = siter->second; - OSDSession::shared_lock sl(s->lock); + shared_lock sl(s->lock); _dump_ops(s, fmt); sl.unlock(); } @@ -4557,10 +4490,8 @@ void Objecter::dump_ops(Formatter *fmt) void Objecter::_dump_linger_ops(const OSDSession *s, Formatter *fmt) { - for (map::const_iterator p = s->linger_ops.begin(); - p != s->linger_ops.end(); - ++p) { - LingerOp *op = p->second; + for (auto p = s->linger_ops.begin(); p != s->linger_ops.end(); ++p) { + auto op = p->second; fmt->open_object_section("linger_op"); fmt->dump_unsigned("linger_id", op->linger_id); op->target.dump(fmt); @@ -4574,10 +4505,10 @@ void Objecter::dump_linger_ops(Formatter *fmt) { // We have a read-lock on the objecter fmt->open_array_section("linger_ops"); - for (map::const_iterator siter = osd_sessions.begin(); + for (auto siter = osd_sessions.begin(); siter != osd_sessions.end(); ++siter) { - OSDSession *s = siter->second; - OSDSession::shared_lock sl(s->lock); + auto s = siter->second; + shared_lock sl(s->lock); _dump_linger_ops(s, fmt); sl.unlock(); } @@ -4587,16 +4518,13 @@ void Objecter::dump_linger_ops(Formatter *fmt) void Objecter::_dump_command_ops(const OSDSession *s, Formatter *fmt) { - for (map::const_iterator p = s->command_ops.begin(); - p != s->command_ops.end(); - ++p) { - CommandOp *op = p->second; + for (auto p = s->command_ops.begin(); p != s->command_ops.end(); ++p) { + auto op = p->second; fmt->open_object_section("command_op"); fmt->dump_unsigned("command_id", op->tid); fmt->dump_int("osd", op->session ? op->session->osd : -1); fmt->open_array_section("command"); - for (vector::const_iterator q = op->cmd.begin(); - q != op->cmd.end(); ++q) + for (auto q = op->cmd.begin(); q != op->cmd.end(); ++q) fmt->dump_string("word", *q); fmt->close_section(); if (op->target_osd >= 0) @@ -4611,10 +4539,10 @@ void Objecter::dump_command_ops(Formatter *fmt) { // We have a read-lock on the Objecter here fmt->open_array_section("command_ops"); - for (map::const_iterator siter = osd_sessions.begin(); + for (auto siter = osd_sessions.begin(); siter != osd_sessions.end(); ++siter) { - OSDSession *s = siter->second; - OSDSession::shared_lock sl(s->lock); + auto s = siter->second; + shared_lock sl(s->lock); _dump_command_ops(s, fmt); sl.unlock(); } @@ -4625,10 +4553,8 @@ void Objecter::dump_command_ops(Formatter *fmt) void Objecter::dump_pool_ops(Formatter *fmt) const { fmt->open_array_section("pool_ops"); - for (map::const_iterator p = pool_ops.begin(); - p != pool_ops.end(); - ++p) { - PoolOp *op = p->second; + for (auto p = pool_ops.begin(); p != pool_ops.end(); ++p) { + auto op = p->second; fmt->open_object_section("pool_op"); fmt->dump_unsigned("tid", op->tid); fmt->dump_int("pool", op->pool); @@ -4645,7 +4571,7 @@ void Objecter::dump_pool_ops(Formatter *fmt) const void Objecter::dump_pool_stat_ops(Formatter *fmt) const { fmt->open_array_section("pool_stat_ops"); - for (map::const_iterator p = poolstat_ops.begin(); + for (auto p = poolstat_ops.begin(); p != poolstat_ops.end(); ++p) { PoolStatOp *op = p->second; @@ -4654,10 +4580,8 @@ void Objecter::dump_pool_stat_ops(Formatter *fmt) const fmt->dump_stream("last_sent") << op->last_submit; fmt->open_array_section("pools"); - for (list::const_iterator it = op->pools.begin(); - it != op->pools.end(); - ++it) { - fmt->dump_string("pool", *it); + for (const auto& it : op->pools) { + fmt->dump_string("pool", it); } fmt->close_section(); // pools array @@ -4669,10 +4593,8 @@ void Objecter::dump_pool_stat_ops(Formatter *fmt) const void Objecter::dump_statfs_ops(Formatter *fmt) const { fmt->open_array_section("statfs_ops"); - for (map::const_iterator p = statfs_ops.begin(); - p != statfs_ops.end(); - ++p) { - StatfsOp *op = p->second; + for (auto p = statfs_ops.begin(); p != statfs_ops.end(); ++p) { + auto op = p->second; fmt->open_object_section("statfs_op"); fmt->dump_unsigned("tid", op->tid); fmt->dump_stream("last_sent") << op->last_submit; @@ -4690,7 +4612,7 @@ int Objecter::RequestStateHook::call(std::string_view command, const cmdmap_t& cmdmap, Formatter *f, std::ostream& ss, - ceph::buffer::list& out) + cb::list& out) { shared_lock rl(m_objecter->rwlock); m_objecter->dump_requests(f); @@ -4712,7 +4634,7 @@ void Objecter::blacklist_self(bool set) ss << messenger->get_myaddrs().front().get_legacy_str(); cmd.push_back("\"addr\":\"" + ss.str() + "\""); - MMonCommand *m = new MMonCommand(monc->get_fsid()); + auto m = new MMonCommand(monc->get_fsid()); m->cmd = cmd; monc->send_mon_message(m); @@ -4737,8 +4659,8 @@ void Objecter::handle_command_reply(MCommandReply *m) return; } - OSDSession::shared_lock sl(s->lock); - map::iterator p = s->command_ops.find(m->get_tid()); + shared_lock sl(s->lock); + auto p = s->command_ops.find(m->get_tid()); if (p == s->command_ops.end()) { ldout(cct, 10) << "handle_command_reply tid " << m->get_tid() << " not found" << dendl; @@ -4758,6 +4680,7 @@ void Objecter::handle_command_reply(MCommandReply *m) sl.unlock(); return; } + if (m->r == -EAGAIN) { ldout(cct,10) << __func__ << " tid " << m->get_tid() << " got EAGAIN, requesting map and resending" << dendl; @@ -4770,14 +4693,12 @@ void Objecter::handle_command_reply(MCommandReply *m) return; } - if (c->poutbl) { - c->poutbl->claim(m->get_data()); - } - sl.unlock(); - OSDSession::unique_lock sul(s->lock); - _finish_command(c, m->r, m->rs); + unique_lock sul(s->lock); + _finish_command(c, m->r < 0 ? bs::error_code(-m->r, osd_category()) : + bs::error_code(), std::move(m->rs), + std::move(m->get_data())); sul.unlock(); m->put(); @@ -4792,7 +4713,7 @@ void Objecter::submit_command(CommandOp *c, ceph_tid_t *ptid) c->tid = tid; { - OSDSession::unique_lock hs_wl(homeless_session->lock); + unique_lock hs_wl(homeless_session->lock); _session_command_op_assign(homeless_session, c); } @@ -4801,8 +4722,9 @@ void Objecter::submit_command(CommandOp *c, ceph_tid_t *ptid) if (osd_timeout > timespan(0)) { c->ontimeout = timer.add_event(osd_timeout, [this, c, tid]() { - command_op_cancel(c->session, tid, - -ETIMEDOUT); }); + command_op_cancel( + c->session, tid, + osdc_errc::timed_out); }); } if (!c->session->is_homeless()) { @@ -4812,12 +4734,14 @@ void Objecter::submit_command(CommandOp *c, ceph_tid_t *ptid) } if (c->map_check_error) _send_command_map_check(c); - *ptid = tid; + if (ptid) + *ptid = tid; logger->inc(l_osdc_command_active); } -int Objecter::_calc_command_target(CommandOp *c, shunique_lock& sul) +int Objecter::_calc_command_target(CommandOp *c, + shunique_lock& sul) { ceph_assert(sul.owns_lock() && sul.mutex() == &rwlock); @@ -4873,7 +4797,7 @@ int Objecter::_calc_command_target(CommandOp *c, shunique_lock& sul) } void Objecter::_assign_command_session(CommandOp *c, - shunique_lock& sul) + shunique_lock& sul) { ceph_assert(sul.owns_lock() && sul.mutex() == &rwlock); @@ -4884,11 +4808,11 @@ void Objecter::_assign_command_session(CommandOp *c, if (c->session != s) { if (c->session) { OSDSession *cs = c->session; - OSDSession::unique_lock csl(cs->lock); + unique_lock csl(cs->lock); _session_command_op_remove(c->session, c); csl.unlock(); } - OSDSession::unique_lock sl(s->lock); + unique_lock sl(s->lock); _session_command_op_assign(s, c); } @@ -4900,7 +4824,7 @@ void Objecter::_send_command(CommandOp *c) ldout(cct, 10) << "_send_command " << c->tid << dendl; ceph_assert(c->session); ceph_assert(c->session->con); - MCommand *m = new MCommand(monc->monmap.fsid); + auto m = new MCommand(monc->monmap.fsid); m->cmd = c->cmd; m->set_data(c->inbl); m->set_tid(c->tid); @@ -4908,13 +4832,14 @@ void Objecter::_send_command(CommandOp *c) logger->inc(l_osdc_command_send); } -int Objecter::command_op_cancel(OSDSession *s, ceph_tid_t tid, int r) +int Objecter::command_op_cancel(OSDSession *s, ceph_tid_t tid, + bs::error_code ec) { ceph_assert(initialized); unique_lock wl(rwlock); - map::iterator it = s->command_ops.find(tid); + auto it = s->command_ops.find(tid); if (it == s->command_ops.end()) { ldout(cct, 10) << __func__ << " tid " << tid << " dne" << dendl; return -ENOENT; @@ -4924,25 +4849,25 @@ int Objecter::command_op_cancel(OSDSession *s, ceph_tid_t tid, int r) CommandOp *op = it->second; _command_cancel_map_check(op); - OSDSession::unique_lock sl(op->session->lock); - _finish_command(op, r, ""); + unique_lock sl(op->session->lock); + _finish_command(op, ec, {}, {}); sl.unlock(); return 0; } -void Objecter::_finish_command(CommandOp *c, int r, string rs) +void Objecter::_finish_command(CommandOp *c, bs::error_code ec, + string&& rs, cb::list&& bl) { // rwlock is locked unique // session lock is locked - ldout(cct, 10) << "_finish_command " << c->tid << " = " << r << " " + ldout(cct, 10) << "_finish_command " << c->tid << " = " << ec << " " << rs << dendl; - if (c->prs) - *c->prs = rs; + if (c->onfinish) - c->onfinish->complete(r); + c->onfinish->defer(std::move(c->onfinish), ec, std::move(rs), std::move(bl)); - if (c->ontimeout && r != -ETIMEDOUT) + if (c->ontimeout && ec != bs::errc::timed_out) timer.cancel_event(c->ontimeout); _session_command_op_remove(c->session, c); @@ -4961,20 +4886,14 @@ Objecter::OSDSession::~OSDSession() ceph_assert(command_ops.empty()); } -Objecter::Objecter(CephContext *cct_, Messenger *m, MonClient *mc, - Finisher *fin, +Objecter::Objecter(CephContext *cct, + Messenger *m, MonClient *mc, + boost::asio::io_context& service, double mon_timeout, double osd_timeout) : - Dispatcher(cct_), messenger(m), monc(mc), finisher(fin), - trace_endpoint("0.0.0.0", 0, "Objecter"), - osdmap{std::make_unique()}, - homeless_session(new OSDSession(cct, -1)), + Dispatcher(cct), messenger(m), monc(mc), service(service), mon_timeout(ceph::make_timespan(mon_timeout)), - osd_timeout(ceph::make_timespan(osd_timeout)), - op_throttle_bytes(cct, "objecter_bytes", - cct->_conf->objecter_inflight_op_bytes), - op_throttle_ops(cct, "objecter_ops", cct->_conf->objecter_inflight_ops), - retry_writes_after_first_reply(cct->_conf->objecter_retry_writes_after_first_reply) + osd_timeout(ceph::make_timespan(osd_timeout)) {} Objecter::~Objecter() @@ -5029,60 +4948,82 @@ hobject_t Objecter::enumerate_objects_end() return hobject_t::get_max(); } -struct C_EnumerateReply : public Context { - ceph::buffer::list bl; - - Objecter *objecter; - hobject_t *next; - std::list *result; +template +struct EnumerationContext { + Objecter* objecter; const hobject_t end; - const int64_t pool_id; - Context *on_finish; + const cb::list filter; + uint32_t max; + const object_locator_t oloc; + std::vector ls; +private: + fu2::unique_function, + hobject_t) &&> on_finish; +public: + epoch_t epoch = 0; + int budget = -1; + + EnumerationContext(Objecter* objecter, + hobject_t end, cb::list filter, + uint32_t max, object_locator_t oloc, + decltype(on_finish) on_finish) + : objecter(objecter), end(std::move(end)), filter(std::move(filter)), + max(max), oloc(std::move(oloc)), on_finish(std::move(on_finish)) {} + + void operator()(bs::error_code ec, + std::vector v, + hobject_t h) && { + if (budget >= 0) { + objecter->put_op_budget_bytes(budget); + budget = -1; + } + + std::move(on_finish)(ec, std::move(v), std::move(h)); + } +}; + +template +struct CB_EnumerateReply { + cb::list bl; - epoch_t epoch; - int budget; + Objecter* objecter; + std::unique_ptr> ctx; - C_EnumerateReply(Objecter *objecter_, hobject_t *next_, - std::list *result_, - const hobject_t end_, const int64_t pool_id_, Context *on_finish_) : - objecter(objecter_), next(next_), result(result_), - end(end_), pool_id(pool_id_), on_finish(on_finish_), - epoch(0), budget(-1) - {} + CB_EnumerateReply(Objecter* objecter, + std::unique_ptr>&& ctx) : + objecter(objecter), ctx(std::move(ctx)) {} - void finish(int r) override { - objecter->_enumerate_reply( - bl, r, end, pool_id, budget, epoch, result, next, on_finish); + void operator()(bs::error_code ec) { + objecter->_enumerate_reply(std::move(bl), ec, std::move(ctx)); } }; +template void Objecter::enumerate_objects( - int64_t pool_id, - const std::string &ns, - const hobject_t &start, - const hobject_t &end, - const uint32_t max, - const ceph::buffer::list &filter_bl, - std::list *result, - hobject_t *next, - Context *on_finish) -{ - ceph_assert(result); - + int64_t pool_id, + std::string_view ns, + hobject_t start, + hobject_t end, + const uint32_t max, + const cb::list& filter_bl, + fu2::unique_function, + hobject_t) &&> on_finish) { if (!end.is_max() && start > end) { lderr(cct) << __func__ << ": start " << start << " > end " << end << dendl; - on_finish->complete(-EINVAL); + std::move(on_finish)(osdc_errc::precondition_violated, {}, {}); return; } if (max < 1) { lderr(cct) << __func__ << ": result size may not be zero" << dendl; - on_finish->complete(-EINVAL); + std::move(on_finish)(osdc_errc::precondition_violated, {}, {}); return; } if (start.is_max()) { - on_finish->complete(0); + std::move(on_finish)({}, {}, {}); return; } @@ -5091,92 +5032,128 @@ void Objecter::enumerate_objects( if (!osdmap->test_flag(CEPH_OSDMAP_SORTBITWISE)) { rl.unlock(); lderr(cct) << __func__ << ": SORTBITWISE cluster flag not set" << dendl; - on_finish->complete(-EOPNOTSUPP); + std::move(on_finish)(osdc_errc::not_supported, {}, {}); return; } - const pg_pool_t *p = osdmap->get_pg_pool(pool_id); + const pg_pool_t* p = osdmap->get_pg_pool(pool_id); if (!p) { lderr(cct) << __func__ << ": pool " << pool_id << " DNE in osd epoch " << osdmap->get_epoch() << dendl; rl.unlock(); - on_finish->complete(-ENOENT); + std::move(on_finish)(osdc_errc::pool_dne, {}, {}); return; } else { rl.unlock(); } - ldout(cct, 20) << __func__ << ": start=" << start << " end=" << end << dendl; - - // Stash completion state - C_EnumerateReply *on_ack = new C_EnumerateReply( - this, next, result, end, pool_id, on_finish); - + _issue_enumerate(start, + std::make_unique>( + this, std::move(end), filter_bl, + max, object_locator_t{pool_id, ns}, + std::move(on_finish))); +} + +template +void Objecter::enumerate_objects( + int64_t pool_id, + std::string_view ns, + hobject_t start, + hobject_t end, + const uint32_t max, + const cb::list& filter_bl, + fu2::unique_function, + hobject_t) &&> on_finish); + +template +void Objecter::enumerate_objects( + int64_t pool_id, + std::string_view ns, + hobject_t start, + hobject_t end, + const uint32_t max, + const cb::list& filter_bl, + fu2::unique_function, + hobject_t) &&> on_finish); + + + +template +void Objecter::_issue_enumerate(hobject_t start, + std::unique_ptr> ctx) { ObjectOperation op; - op.pg_nls(max, filter_bl, start, 0); + auto c = ctx.get(); + op.pg_nls(c->max, c->filter, start, osdmap->get_epoch()); + auto on_ack = std::make_unique>(this, std::move(ctx)); + // I hate having to do this. Try to find a cleaner way + // later. + auto epoch = &c->epoch; + auto budget = &c->budget; + auto pbl = &on_ack->bl; // Issue. See you later in _enumerate_reply - object_locator_t oloc(pool_id, ns); - pg_read(start.get_hash(), oloc, op, - &on_ack->bl, 0, on_ack, &on_ack->epoch, &on_ack->budget); -} - + pg_read(start.get_hash(), + c->oloc, op, pbl, 0, + Op::OpComp::create(service.get_executor(), + [c = std::move(on_ack)] + (bs::error_code ec) mutable { + (*c)(ec); + }), epoch, budget); +} + +template +void Objecter::_issue_enumerate( + hobject_t start, + std::unique_ptr> ctx); +template +void Objecter::_issue_enumerate( + hobject_t start, std::unique_ptr> ctx); + +template void Objecter::_enumerate_reply( - ceph::buffer::list &bl, - int r, - const hobject_t &end, - const int64_t pool_id, - int budget, - epoch_t reply_epoch, - std::list *result, - hobject_t *next, - Context *on_finish) -{ - if (budget >= 0) { - put_op_budget_bytes(budget); - } - - if (r < 0) { - ldout(cct, 4) << __func__ << ": remote error " << r << dendl; - on_finish->complete(r); + cb::list&& bl, + bs::error_code ec, + std::unique_ptr>&& ctx) +{ + if (ec) { + std::move(*ctx)(ec, {}, {}); return; } - ceph_assert(next != NULL); - // Decode the results auto iter = bl.cbegin(); - pg_nls_response_t response; + pg_nls_response_template response; + + try { + response.decode(iter); + if (!iter.end()) { + // extra_info isn't used anywhere. We do this solely to preserve + // backward compatibility + cb::list legacy_extra_info; + decode(legacy_extra_info, iter); + } + } catch (const bs::system_error& e) { + std::move(*ctx)(e.code(), {}, {}); + return; + } - decode(response, iter); - if (!iter.end()) { - // extra_info isn't used anywhere. We do this solely to preserve - // backward compatibility - ceph::buffer::list legacy_extra_info; - decode(legacy_extra_info, iter); + shared_lock rl(rwlock); + auto pool = osdmap->get_pg_pool(ctx->oloc.get_pool()); + rl.unlock(); + if (!pool) { + // pool is gone, drop any results which are now meaningless. + std::move(*ctx)(osdc_errc::pool_dne, {}, {}); + return; } - ldout(cct, 10) << __func__ << ": got " << response.entries.size() - << " handle " << response.handle - << " reply_epoch " << reply_epoch << dendl; - ldout(cct, 20) << __func__ << ": response.entries.size " - << response.entries.size() << ", response.entries " - << response.entries << dendl; - if (response.handle <= end) { - *next = response.handle; + hobject_t next; + if ((response.handle <= ctx->end)) { + next = response.handle; } else { - ldout(cct, 10) << __func__ << ": adjusted next down to end " << end - << dendl; - *next = end; + next = ctx->end; // drop anything after 'end' - shared_lock rl(rwlock); - const pg_pool_t *pool = osdmap->get_pg_pool(pool_id); - if (!pool) { - // pool is gone, drop any results which are now meaningless. - rl.unlock(); - on_finish->complete(-ENOENT); - return; - } while (!response.entries.empty()) { uint32_t hash = response.entries.back().locator.empty() ? pool->hash_key(response.entries.back().oid, @@ -5187,34 +5164,61 @@ void Objecter::_enumerate_reply( response.entries.back().locator, CEPH_NOSNAP, hash, - pool_id, + ctx->oloc.get_pool(), response.entries.back().nspace); - if (last < end) + if (last < ctx->end) break; - ldout(cct, 20) << __func__ << " dropping item " << last - << " >= end " << end << dendl; response.entries.pop_back(); } - rl.unlock(); } - if (!response.entries.empty()) { - result->merge(response.entries); + + if (response.entries.size() <= ctx->max) { + ctx->max -= response.entries.size(); + std::move(response.entries.begin(), response.entries.end(), + std::back_inserter(ctx->ls)); + } else { + auto i = response.entries.begin(); + while (ctx->max > 0) { + ctx->ls.push_back(std::move(*i)); + --(ctx->max); + ++i; + } + uint32_t hash = + i->locator.empty() ? + pool->hash_key(i->oid, i->nspace) : + pool->hash_key(i->locator, i->nspace); + + next = hobject_t{i->oid, i->locator, + CEPH_NOSNAP, + hash, + ctx->oloc.get_pool(), + i->nspace}; } - // release the listing context's budget once all - // OPs (in the session) are finished -#if 0 - put_nlist_context_budget(list_context); -#endif - on_finish->complete(r); - return; + if (next == ctx->end || ctx->max == 0) { + std::move(*ctx)(ec, std::move(ctx->ls), std::move(next)); + } else { + _issue_enumerate(next, std::move(ctx)); + } } +template +void Objecter::_enumerate_reply( + cb::list&& bl, + bs::error_code ec, + std::unique_ptr>&& ctx); + +template +void Objecter::_enumerate_reply( + cb::list&& bl, + bs::error_code ec, + std::unique_ptr>&& ctx); + namespace { using namespace librados; template - void do_decode(std::vector& items, std::vector& bls) + void do_decode(std::vector& items, std::vector& bls) { for (auto bl : bls) { auto p = bl.cbegin(); @@ -5225,19 +5229,19 @@ namespace { } struct C_ObjectOperation_scrub_ls : public Context { - ceph::buffer::list bl; - uint32_t *interval; + cb::list bl; + uint32_t* interval; std::vector *objects = nullptr; std::vector *snapsets = nullptr; - int *rval; + int* rval; - C_ObjectOperation_scrub_ls(uint32_t *interval, - std::vector *objects, - int *rval) + C_ObjectOperation_scrub_ls(uint32_t* interval, + std::vector* objects, + int* rval) : interval(interval), objects(objects), rval(rval) {} - C_ObjectOperation_scrub_ls(uint32_t *interval, - std::vector *snapsets, - int *rval) + C_ObjectOperation_scrub_ls(uint32_t* interval, + std::vector* snapsets, + int* rval) : interval(interval), snapsets(snapsets), rval(rval) {} void finish(int r) override { if (r < 0 && r != -EAGAIN) { @@ -5251,7 +5255,7 @@ namespace { try { decode(); - } catch (ceph::buffer::error&) { + } catch (cb::error&) { if (rval) *rval = -EIO; } @@ -5271,19 +5275,19 @@ namespace { }; template - void do_scrub_ls(::ObjectOperation *op, + void do_scrub_ls(::ObjectOperation* op, const scrub_ls_arg_t& arg, std::vector *items, - uint32_t *interval, - int *rval) + uint32_t* interval, + int* rval) { OSDOp& osd_op = op->add_op(CEPH_OSD_OP_SCRUBLS); op->flags |= CEPH_OSD_FLAG_PGOP; ceph_assert(interval); arg.encode(osd_op.indata); unsigned p = op->ops.size() - 1; - auto *h = new C_ObjectOperation_scrub_ls{interval, items, rval}; - op->out_handler[p] = h; + auto h = new C_ObjectOperation_scrub_ls{interval, items, rval}; + op->set_handler(h); op->out_bl[p] = &h->bl; op->out_rval[p] = rval; } @@ -5291,9 +5295,9 @@ namespace { void ::ObjectOperation::scrub_ls(const librados::object_id_t& start_after, uint64_t max_to_get, - std::vector *objects, - uint32_t *interval, - int *rval) + std::vector* objects, + uint32_t* interval, + int* rval) { scrub_ls_arg_t arg = {*interval, 0, start_after, max_to_get}; do_scrub_ls(this, arg, objects, interval, rval); diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 5adc4a77b47..c7d132a5087 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -21,25 +21,37 @@ #include #include #include +#include +#include #include +#include -#include +#include +#include + +#include "include/buffer.h" #include "include/ceph_assert.h" +#include "include/ceph_fs.h" #include "include/common_fwd.h" -#include "include/buffer.h" +#include "include/expected.hpp" #include "include/types.h" #include "include/rados/rados_types.hpp" +#include "include/function2.hpp" +#include "include/RADOS/RADOS_Decodable.hpp" #include "common/admin_socket.h" +#include "common/async/completion.h" #include "common/ceph_time.h" +#include "common/ceph_mutex.h" #include "common/ceph_timer.h" #include "common/config_obs.h" #include "common/shunique_lock.h" #include "common/zipkin_trace.h" -#include "common/Finisher.h" #include "common/Throttle.h" +#include "mon/MonClient.h" + #include "messages/MOSDOp.h" #include "msg/Dispatcher.h" @@ -49,7 +61,6 @@ class Context; class Messenger; class MonClient; class Message; -class Finisher; class MPoolOpReply; @@ -57,54 +68,95 @@ class MGetPoolStatsReply; class MStatfsReply; class MCommandReply; class MWatchNotify; - +template +struct EnumerationContext; +template +struct CB_EnumerateReply; // ----------------------------------------- struct ObjectOperation { std::vector ops; - int flags; - int priority; + int flags = 0; + int priority = 0; std::vector out_bl; - std::vector out_handler; + std::vector> out_handler; std::vector out_rval; + std::vector out_ec; - ObjectOperation() : flags(0), priority(0) {} - ~ObjectOperation() { - while (!out_handler.empty()) { - delete out_handler.back(); - out_handler.pop_back(); - } - } + ObjectOperation() = default; + ObjectOperation(const ObjectOperation&) = delete; + ObjectOperation& operator =(const ObjectOperation&) = delete; + ObjectOperation(ObjectOperation&&) = default; + ObjectOperation& operator =(ObjectOperation&&) = default; + ~ObjectOperation() = default; - size_t size() { + size_t size() const { return ops.size(); } + void clear() { + ops.clear(); + flags = 0; + priority = 0; + out_bl.clear(); + out_handler.clear(); + out_rval.clear(); + out_ec.clear(); + } + void set_last_op_flags(int flags) { ceph_assert(!ops.empty()); ops.rbegin()->op.flags = flags; } - class C_TwoContexts; - /** - * Add a callback to run when this operation completes, - * after any other callbacks for it. - */ - void add_handler(Context *extra); + + void set_handler(fu2::unique_function f) { + if (f) { + if (out_handler.back()) { + // This happens seldom enough that we may as well keep folding + // functions together when we get another one rather than + // using a container. + out_handler.back() = + [f = std::move(f), + g = std::move(std::move(out_handler.back()))] + (boost::system::error_code ec, int r, + const ceph::buffer::list& bl) mutable { + std::move(g)(ec, r, bl); + std::move(f)(ec, r, bl); + }; + } else { + out_handler.back() = std::move(f); + } + } + ceph_assert(ops.size() == out_handler.size()); + } + + void set_handler(Context *c) { + if (c) + set_handler([c = std::unique_ptr(c)](boost::system::error_code, + int r, + const ceph::buffer::list&) mutable { + c.release()->complete(r); + }); + + } OSDOp& add_op(int op) { - int s = ops.size(); - ops.resize(s+1); - ops[s].op.op = op; - out_bl.resize(s+1); - out_bl[s] = NULL; - out_handler.resize(s+1); - out_handler[s] = NULL; - out_rval.resize(s+1); - out_rval[s] = NULL; - return ops[s]; + ops.emplace_back(); + ops.back().op.op = op; + out_bl.push_back(nullptr); + ceph_assert(ops.size() == out_bl.size()); + out_handler.emplace_back(); + ceph_assert(ops.size() == out_handler.size()); + out_rval.push_back(nullptr); + ceph_assert(ops.size() == out_rval.size()); + out_ec.push_back(nullptr); + ceph_assert(ops.size() == out_ec.size()); + return ops.back(); } void add_data(int op, uint64_t off, uint64_t len, ceph::buffer::list& bl) { OSDOp& osd_op = add_op(op); @@ -139,21 +191,77 @@ struct ObjectOperation { osd_op.indata.append(name, osd_op.op.xattr.name_len); osd_op.indata.append(data); } - void add_call(int op, const char *cname, const char *method, - ceph::buffer::list &indata, + void add_xattr(int op, std::string_view name, const ceph::buffer::list& data) { + OSDOp& osd_op = add_op(op); + osd_op.op.xattr.name_len = name.size(); + osd_op.op.xattr.value_len = data.length(); + osd_op.indata.append(name.data(), osd_op.op.xattr.name_len); + osd_op.indata.append(data); + } + void add_xattr_cmp(int op, std::string_view name, uint8_t cmp_op, + uint8_t cmp_mode, const ceph::buffer::list& data) { + OSDOp& osd_op = add_op(op); + osd_op.op.xattr.name_len = name.size(); + osd_op.op.xattr.value_len = data.length(); + osd_op.op.xattr.cmp_op = cmp_op; + osd_op.op.xattr.cmp_mode = cmp_mode; + if (!name.empty()) + osd_op.indata.append(name.data(), osd_op.op.xattr.name_len); + osd_op.indata.append(data); + } + void add_call(int op, std::string_view cname, std::string_view method, + const ceph::buffer::list &indata, ceph::buffer::list *outbl, Context *ctx, int *prval) { OSDOp& osd_op = add_op(op); unsigned p = ops.size() - 1; - out_handler[p] = ctx; + set_handler(ctx); out_bl[p] = outbl; out_rval[p] = prval; - osd_op.op.cls.class_len = strlen(cname); - osd_op.op.cls.method_len = strlen(method); + osd_op.op.cls.class_len = cname.size(); + osd_op.op.cls.method_len = method.size(); osd_op.op.cls.indata_len = indata.length(); - osd_op.indata.append(cname, osd_op.op.cls.class_len); - osd_op.indata.append(method, osd_op.op.cls.method_len); + osd_op.indata.append(cname.data(), osd_op.op.cls.class_len); + osd_op.indata.append(method.data(), osd_op.op.cls.method_len); + osd_op.indata.append(indata); + } + void add_call(int op, std::string_view cname, std::string_view method, + const ceph::buffer::list &indata, + fu2::unique_function f) { + OSDOp& osd_op = add_op(op); + + set_handler([f = std::move(f)](boost::system::error_code ec, + int, + const ceph::buffer::list& bl) mutable { + std::move(f)(ec, bl); + }); + + osd_op.op.cls.class_len = cname.size(); + osd_op.op.cls.method_len = method.size(); + osd_op.op.cls.indata_len = indata.length(); + osd_op.indata.append(cname.data(), osd_op.op.cls.class_len); + osd_op.indata.append(method.data(), osd_op.op.cls.method_len); + osd_op.indata.append(indata); + } + void add_call(int op, std::string_view cname, std::string_view method, + const ceph::buffer::list &indata, + fu2::unique_function f) { + OSDOp& osd_op = add_op(op); + + set_handler([f = std::move(f)](boost::system::error_code ec, + int r, + const ceph::buffer::list& bl) mutable { + std::move(f)(ec, r, bl); + }); + + osd_op.op.cls.class_len = cname.size(); + osd_op.op.cls.method_len = method.size(); + osd_op.op.cls.indata_len = indata.length(); + osd_op.indata.append(cname.data(), osd_op.op.cls.class_len); + osd_op.indata.append(method.data(), osd_op.op.cls.method_len); osd_op.indata.append(indata); } void add_pgls(int op, uint64_t count, collection_list_handle_t cookie, @@ -225,17 +333,18 @@ struct ObjectOperation { o.op.flags = (excl ? CEPH_OSD_OP_FLAG_EXCL : 0); } - struct C_ObjectOperation_stat : public Context { + struct CB_ObjectOperation_stat { ceph::buffer::list bl; uint64_t *psize; ceph::real_time *pmtime; time_t *ptime; struct timespec *pts; int *prval; - C_ObjectOperation_stat(uint64_t *ps, ceph::real_time *pm, time_t *pt, struct timespec *_pts, - int *prval) - : psize(ps), pmtime(pm), ptime(pt), pts(_pts), prval(prval) {} - void finish(int r) override { + boost::system::error_code* pec; + CB_ObjectOperation_stat(uint64_t *ps, ceph::real_time *pm, time_t *pt, struct timespec *_pts, + int *prval, boost::system::error_code* pec) + : psize(ps), pmtime(pm), ptime(pt), pts(_pts), prval(prval), pec(pec) {} + void operator()(boost::system::error_code ec, int r, const ceph::buffer::list& bl) { using ceph::decode; if (r >= 0) { auto p = bl.cbegin(); @@ -252,58 +361,91 @@ struct ObjectOperation { *ptime = ceph::real_clock::to_time_t(mtime); if (pts) *pts = ceph::real_clock::to_timespec(mtime); - } catch (ceph::buffer::error& e) { + } catch (const ceph::buffer::error& e) { if (prval) *prval = -EIO; + if (pec) + *pec = e.code(); } } } }; void stat(uint64_t *psize, ceph::real_time *pmtime, int *prval) { add_op(CEPH_OSD_OP_STAT); - unsigned p = ops.size() - 1; - C_ObjectOperation_stat *h = new C_ObjectOperation_stat(psize, pmtime, NULL, NULL, - prval); - out_bl[p] = &h->bl; - out_handler[p] = h; - out_rval[p] = prval; + set_handler(CB_ObjectOperation_stat(psize, pmtime, nullptr, nullptr, prval, + nullptr)); + out_rval.back() = prval; + } + void stat(uint64_t *psize, ceph::real_time *pmtime, + boost::system::error_code* ec) { + add_op(CEPH_OSD_OP_STAT); + set_handler(CB_ObjectOperation_stat(psize, pmtime, nullptr, nullptr, + nullptr, ec)); + out_ec.back() = ec; } void stat(uint64_t *psize, time_t *ptime, int *prval) { add_op(CEPH_OSD_OP_STAT); - unsigned p = ops.size() - 1; - C_ObjectOperation_stat *h = new C_ObjectOperation_stat(psize, NULL, ptime, NULL, - prval); - out_bl[p] = &h->bl; - out_handler[p] = h; - out_rval[p] = prval; + set_handler(CB_ObjectOperation_stat(psize, nullptr, ptime, nullptr, prval, + nullptr)); + out_rval.back() = prval; } void stat(uint64_t *psize, struct timespec *pts, int *prval) { add_op(CEPH_OSD_OP_STAT); - unsigned p = ops.size() - 1; - C_ObjectOperation_stat *h = new C_ObjectOperation_stat(psize, NULL, NULL, pts, - prval); - out_bl[p] = &h->bl; - out_handler[p] = h; - out_rval[p] = prval; + set_handler(CB_ObjectOperation_stat(psize, nullptr, nullptr, pts, prval, nullptr)); + out_rval.back() = prval; + } + void stat(uint64_t *psize, ceph::real_time *pmtime, nullptr_t) { + add_op(CEPH_OSD_OP_STAT); + set_handler(CB_ObjectOperation_stat(psize, pmtime, nullptr, nullptr, nullptr, + nullptr)); + } + void stat(uint64_t *psize, time_t *ptime, nullptr_t) { + add_op(CEPH_OSD_OP_STAT); + set_handler(CB_ObjectOperation_stat(psize, nullptr, ptime, nullptr, nullptr, + nullptr)); } + void stat(uint64_t *psize, struct timespec *pts, nullptr_t) { + add_op(CEPH_OSD_OP_STAT); + set_handler(CB_ObjectOperation_stat(psize, nullptr, nullptr, pts, nullptr, + nullptr)); + } + void stat(uint64_t *psize, nullptr_t, nullptr_t) { + add_op(CEPH_OSD_OP_STAT); + set_handler(CB_ObjectOperation_stat(psize, nullptr, nullptr, nullptr, + nullptr, nullptr)); + } + // object cmpext - struct C_ObjectOperation_cmpext : public Context { - int *prval; - explicit C_ObjectOperation_cmpext(int *prval) + struct CB_ObjectOperation_cmpext { + int* prval = nullptr; + boost::system::error_code* ec = nullptr; + std::size_t* s = nullptr; + explicit CB_ObjectOperation_cmpext(int *prval) : prval(prval) {} + CB_ObjectOperation_cmpext(boost::system::error_code* ec, std::size_t* s) + : ec(ec), s(s) {} - void finish(int r) { + void operator()(boost::system::error_code ec, int r, const ceph::buffer::list&) { if (prval) *prval = r; + if (this->ec) + *this->ec = ec; + if (s) + *s = static_cast(-(MAX_ERRNO - r)); } }; void cmpext(uint64_t off, ceph::buffer::list& cmp_bl, int *prval) { add_data(CEPH_OSD_OP_CMPEXT, off, cmp_bl.length(), cmp_bl); - unsigned p = ops.size() - 1; - C_ObjectOperation_cmpext *h = new C_ObjectOperation_cmpext(prval); - out_handler[p] = h; - out_rval[p] = prval; + set_handler(CB_ObjectOperation_cmpext(prval)); + out_rval.back() = prval; + } + + void cmpext(uint64_t off, ceph::buffer::list&& cmp_bl, boost::system::error_code* ec, + std::size_t* s) { + add_data(CEPH_OSD_OP_CMPEXT, off, cmp_bl.length(), cmp_bl); + set_handler(CB_ObjectOperation_cmpext(ec, s)); + out_ec.back() = ec; } // Used by C API @@ -311,10 +453,8 @@ struct ObjectOperation { ceph::buffer::list cmp_bl; cmp_bl.append(cmp_buf, cmp_len); add_data(CEPH_OSD_OP_CMPEXT, off, cmp_len, cmp_bl); - unsigned p = ops.size() - 1; - C_ObjectOperation_cmpext *h = new C_ObjectOperation_cmpext(prval); - out_handler[p] = h; - out_rval[p] = prval; + set_handler(CB_ObjectOperation_cmpext(prval)); + out_rval.back() = prval; } void read(uint64_t off, uint64_t len, ceph::buffer::list *pbl, int *prval, @@ -324,20 +464,29 @@ struct ObjectOperation { unsigned p = ops.size() - 1; out_bl[p] = pbl; out_rval[p] = prval; - out_handler[p] = ctx; + set_handler(ctx); } - struct C_ObjectOperation_sparse_read : public Context { + void read(uint64_t off, uint64_t len, boost::system::error_code* ec, + ceph::buffer::list* pbl) { ceph::buffer::list bl; - ceph::buffer::list *data_bl; - std::map *extents; - int *prval; - C_ObjectOperation_sparse_read(ceph::buffer::list *data_bl, - std::map *extents, - int *prval) - : data_bl(data_bl), extents(extents), prval(prval) {} - void finish(int r) override { - using ceph::decode; + add_data(CEPH_OSD_OP_READ, off, len, bl); + out_ec.back() = ec; + out_bl.back() = pbl; + } + + template + struct CB_ObjectOperation_sparse_read { + ceph::buffer::list* data_bl; + Ex* extents; + int* prval; + boost::system::error_code* pec; + CB_ObjectOperation_sparse_read(ceph::buffer::list* data_bl, + Ex* extents, + int* prval, + boost::system::error_code* pec) + : data_bl(data_bl), extents(extents), prval(prval), pec(pec) {} + void operator()(boost::system::error_code ec, int r, const ceph::buffer::list& bl) { auto iter = bl.cbegin(); if (r >= 0) { // NOTE: it's possible the sub-op has not been executed but the result @@ -347,26 +496,35 @@ struct ObjectOperation { try { decode(*extents, iter); decode(*data_bl, iter); - } catch (ceph::buffer::error& e) { + } catch (const ceph::buffer::error& e) { if (prval) *prval = -EIO; + if (pec) + *pec = e.code(); } } else if (prval) { *prval = -EIO; - } + if (pec) + *pec = buffer::errc::end_of_buffer; + } } } }; - void sparse_read(uint64_t off, uint64_t len, std::map *m, - ceph::buffer::list *data_bl, int *prval) { + void sparse_read(uint64_t off, uint64_t len, std::map* m, + ceph::buffer::list* data_bl, int* prval) { ceph::buffer::list bl; add_data(CEPH_OSD_OP_SPARSE_READ, off, len, bl); - unsigned p = ops.size() - 1; - C_ObjectOperation_sparse_read *h = - new C_ObjectOperation_sparse_read(data_bl, m, prval); - out_bl[p] = &h->bl; - out_handler[p] = h; - out_rval[p] = prval; + set_handler(CB_ObjectOperation_sparse_read(data_bl, m, prval, nullptr)); + out_rval.back() = prval; + } + void sparse_read(uint64_t off, uint64_t len, + boost::system::error_code* ec, + std::vector>* m, + ceph::buffer::list* data_bl) { + ceph::buffer::list bl; + add_data(CEPH_OSD_OP_SPARSE_READ, off, len, bl); + set_handler(CB_ObjectOperation_sparse_read(data_bl, m, nullptr, ec)); + out_ec.back() = ec; } void write(uint64_t off, ceph::buffer::list& bl, uint64_t truncate_size, @@ -422,7 +580,7 @@ struct ObjectOperation { unsigned p = ops.size() - 1; out_bl[p] = pbl; out_rval[p] = prval; - out_handler[p] = ctx; + set_handler(ctx); } // object attrs @@ -433,28 +591,37 @@ struct ObjectOperation { out_bl[p] = pbl; out_rval[p] = prval; } - struct C_ObjectOperation_decodevals : public Context { - uint64_t max_entries; + void getxattr(std::string_view name, boost::system::error_code* ec, + buffer::list *pbl) { ceph::buffer::list bl; - std::map *pattrs; - bool *ptruncated; - int *prval; - C_ObjectOperation_decodevals(uint64_t m, std::map *pa, - bool *pt, int *pr) - : max_entries(m), pattrs(pa), ptruncated(pt), prval(pr) { + add_xattr(CEPH_OSD_OP_GETXATTR, name, bl); + out_bl.back() = pbl; + out_ec.back() = ec; + } + + template + struct CB_ObjectOperation_decodevals { + uint64_t max_entries; + Vals* pattrs; + bool* ptruncated; + int* prval; + boost::system::error_code* pec; + CB_ObjectOperation_decodevals(uint64_t m, Vals* pa, + bool *pt, int *pr, + boost::system::error_code* pec) + : max_entries(m), pattrs(pa), ptruncated(pt), prval(pr), pec(pec) { if (ptruncated) { *ptruncated = false; } } - void finish(int r) override { - using ceph::decode; + void operator()(boost::system::error_code ec, int r, const ceph::buffer::list& bl) { if (r >= 0) { auto p = bl.cbegin(); try { if (pattrs) decode(*pattrs, p); if (ptruncated) { - std::map ignore; + Vals ignore; if (!pattrs) { decode(ignore, p); pattrs = &ignore; @@ -462,34 +629,36 @@ struct ObjectOperation { if (!p.end()) { decode(*ptruncated, p); } else { - // the OSD did not provide this. since old OSDs do not + // The OSD did not provide this. Since old OSDs do not // enfoce omap result limits either, we can infer it from // the size of the result *ptruncated = (pattrs->size() == max_entries); } } - } - catch (ceph::buffer::error& e) { + } catch (const ceph::buffer::error& e) { if (prval) *prval = -EIO; + if (pec) + *pec = e.code(); } } } }; - struct C_ObjectOperation_decodekeys : public Context { + template + struct CB_ObjectOperation_decodekeys { uint64_t max_entries; - ceph::buffer::list bl; - std::set *pattrs; + Keys* pattrs; bool *ptruncated; int *prval; - C_ObjectOperation_decodekeys(uint64_t m, std::set *pa, bool *pt, - int *pr) - : max_entries(m), pattrs(pa), ptruncated(pt), prval(pr) { + boost::system::error_code* pec; + CB_ObjectOperation_decodekeys(uint64_t m, Keys* pa, bool *pt, + int *pr, boost::system::error_code* pec) + : max_entries(m), pattrs(pa), ptruncated(pt), prval(pr), pec(pec) { if (ptruncated) { *ptruncated = false; } } - void finish(int r) override { + void operator()(boost::system::error_code ec, int r, const ceph::buffer::list& bl) { if (r >= 0) { using ceph::decode; auto p = bl.cbegin(); @@ -497,7 +666,7 @@ struct ObjectOperation { if (pattrs) decode(*pattrs, p); if (ptruncated) { - std::set ignore; + Keys ignore; if (!pattrs) { decode(ignore, p); pattrs = &ignore; @@ -511,22 +680,24 @@ struct ObjectOperation { *ptruncated = (pattrs->size() == max_entries); } } - } - catch (ceph::buffer::error& e) { + } catch (const ceph::buffer::error& e) { if (prval) *prval = -EIO; + if (pec) + *pec = e.code(); } } } }; - struct C_ObjectOperation_decodewatchers : public Context { - ceph::buffer::list bl; - std::list *pwatchers; - int *prval; - C_ObjectOperation_decodewatchers(std::list *pw, int *pr) - : pwatchers(pw), prval(pr) {} - void finish(int r) override { - using ceph::decode; + struct CB_ObjectOperation_decodewatchers { + std::list* pwatchers; + int* prval; + boost::system::error_code* pec; + CB_ObjectOperation_decodewatchers(std::list* pw, int* pr, + boost::system::error_code* pec) + : pwatchers(pw), prval(pr), pec(pec) {} + void operator()(boost::system::error_code ec, int r, + const ceph::buffer::list& bl) { if (r >= 0) { auto p = bl.cbegin(); try { @@ -544,21 +715,62 @@ struct ObjectOperation { pwatchers->push_back(std::move(ow)); } } + } catch (const ceph::buffer::error& e) { + if (prval) + *prval = -EIO; + if (pec) + *pec = e.code(); } - catch (ceph::buffer::error& e) { + } + } + }; + + struct CB_ObjectOperation_decodewatchersneo { + std::vector* pwatchers; + int* prval; + boost::system::error_code* pec; + CB_ObjectOperation_decodewatchersneo(std::vector* pw, + int* pr, + boost::system::error_code* pec) + : pwatchers(pw), prval(pr), pec(pec) {} + void operator()(boost::system::error_code ec, int r, + const ceph::buffer::list& bl) { + if (r >= 0) { + auto p = bl.cbegin(); + try { + obj_list_watch_response_t resp; + decode(resp, p); + if (pwatchers) { + for (const auto& watch_item : resp.entries) { + RADOS::ObjWatcher ow; + ow.addr = watch_item.addr.get_legacy_str(); + ow.watcher_id = watch_item.name.num(); + ow.cookie = watch_item.cookie; + ow.timeout_seconds = watch_item.timeout_seconds; + pwatchers->push_back(std::move(ow)); + } + } + } catch (const ceph::buffer::error& e) { if (prval) *prval = -EIO; + if (pec) + *pec = e.code(); } } } }; - struct C_ObjectOperation_decodesnaps : public Context { - ceph::buffer::list bl; + + + struct CB_ObjectOperation_decodesnaps { librados::snap_set_t *psnaps; + RADOS::SnapSet *neosnaps; int *prval; - C_ObjectOperation_decodesnaps(librados::snap_set_t *ps, int *pr) - : psnaps(ps), prval(pr) {} - void finish(int r) override { + boost::system::error_code* pec; + CB_ObjectOperation_decodesnaps(librados::snap_set_t* ps, + RADOS::SnapSet* ns, int* pr, + boost::system::error_code* pec) + : psnaps(ps), neosnaps(ns), prval(pr), pec(pec) {} + void operator()(boost::system::error_code ec, int r, const ceph::buffer::list& bl) { if (r >= 0) { using ceph::decode; auto p = bl.cbegin(); @@ -567,7 +779,9 @@ struct ObjectOperation { decode(resp, p); if (psnaps) { psnaps->clones.clear(); - for (auto ci = resp.clones.begin(); ci != resp.clones.end(); ++ci) { + for (auto ci = resp.clones.begin(); + ci != resp.clones.end(); + ++ci) { librados::clone_info_t clone; clone.cloneid = ci->cloneid; @@ -581,9 +795,27 @@ struct ObjectOperation { } psnaps->seq = resp.seq; } - } catch (ceph::buffer::error& e) { + + if (neosnaps) { + neosnaps->clones.clear(); + for (auto&& c : resp.clones) { + RADOS::CloneInfo clone; + + clone.cloneid = std::move(c.cloneid); + clone.snaps.reserve(c.snaps.size()); + std::move(c.snaps.begin(), c.snaps.end(), + clone.snaps.end()); + clone.overlap = c.overlap; + clone.size = c.size; + neosnaps->clones.push_back(std::move(clone)); + } + neosnaps->seq = resp.seq; + } + } catch (const ceph::buffer::error& e) { if (prval) *prval = -EIO; + if (pec) + *pec = e.code(); } } } @@ -591,17 +823,23 @@ struct ObjectOperation { void getxattrs(std::map *pattrs, int *prval) { add_op(CEPH_OSD_OP_GETXATTRS); if (pattrs || prval) { - unsigned p = ops.size() - 1; - C_ObjectOperation_decodevals *h - = new C_ObjectOperation_decodevals(0, pattrs, nullptr, prval); - out_handler[p] = h; - out_bl[p] = &h->bl; - out_rval[p] = prval; + set_handler(CB_ObjectOperation_decodevals(0, pattrs, nullptr, prval, + nullptr)); + out_rval.back() = prval; } } + void getxattrs(boost::system::error_code* ec, + boost::container::flat_map *pattrs) { + add_op(CEPH_OSD_OP_GETXATTRS); + set_handler(CB_ObjectOperation_decodevals(0, pattrs, nullptr, nullptr, ec)); + out_ec.back() = ec; + } void setxattr(const char *name, const ceph::buffer::list& bl) { add_xattr(CEPH_OSD_OP_SETXATTR, name, bl); } + void setxattr(std::string_view name, const ceph::buffer::list& bl) { + add_xattr(CEPH_OSD_OP_SETXATTR, name, bl); + } void setxattr(const char *name, const std::string& s) { ceph::buffer::list bl; bl.append(s); @@ -611,11 +849,19 @@ struct ObjectOperation { const ceph::buffer::list& bl) { add_xattr_cmp(CEPH_OSD_OP_CMPXATTR, name, cmp_op, cmp_mode, bl); } + void cmpxattr(std::string_view name, uint8_t cmp_op, uint8_t cmp_mode, + const ceph::buffer::list& bl) { + add_xattr_cmp(CEPH_OSD_OP_CMPXATTR, name, cmp_op, cmp_mode, bl); + } void rmxattr(const char *name) { ceph::buffer::list bl; add_xattr(CEPH_OSD_OP_RMXATTR, name, bl); } - void setxattrs(std::map& attrs) { + void rmxattr(std::string_view name) { + ceph::buffer::list bl; + add_xattr(CEPH_OSD_OP_RMXATTR, name, bl); + } + void setxattrs(map& attrs) { using ceph::encode; ceph::buffer::list bl; encode(attrs, bl); @@ -648,14 +894,28 @@ struct ObjectOperation { op.op.extent.length = bl.length(); op.indata.claim_append(bl); if (prval || ptruncated || out_set) { - unsigned p = ops.size() - 1; - C_ObjectOperation_decodekeys *h = - new C_ObjectOperation_decodekeys(max_to_get, out_set, ptruncated, prval); - out_handler[p] = h; - out_bl[p] = &h->bl; - out_rval[p] = prval; + set_handler(CB_ObjectOperation_decodekeys(max_to_get, out_set, ptruncated, prval, + nullptr)); + out_rval.back() = prval; } } + void omap_get_keys(std::optional start_after, + uint64_t max_to_get, + boost::system::error_code* ec, + boost::container::flat_set *out_set, + bool *ptruncated) { + OSDOp& op = add_op(CEPH_OSD_OP_OMAPGETKEYS); + ceph::buffer::list bl; + encode(start_after ? *start_after : std::string_view{}, bl); + encode(max_to_get, bl); + op.op.extent.offset = 0; + op.op.extent.length = bl.length(); + op.indata.claim_append(bl); + set_handler( + CB_ObjectOperation_decodekeys(max_to_get, out_set, ptruncated, nullptr, + ec)); + out_ec.back() = ec; + } void omap_get_vals(const std::string &start_after, const std::string &filter_prefix, @@ -673,19 +933,34 @@ struct ObjectOperation { op.op.extent.length = bl.length(); op.indata.claim_append(bl); if (prval || out_set || ptruncated) { - unsigned p = ops.size() - 1; - C_ObjectOperation_decodevals *h = - new C_ObjectOperation_decodevals(max_to_get, out_set, ptruncated, prval); - out_handler[p] = h; - out_bl[p] = &h->bl; - out_rval[p] = prval; + set_handler(CB_ObjectOperation_decodevals(max_to_get, out_set, ptruncated, + prval, nullptr)); + out_rval.back() = prval; } } + void omap_get_vals(std::optional start_after, + std::optional filter_prefix, + uint64_t max_to_get, + boost::system::error_code* ec, + boost::container::flat_map *out_set, + bool *ptruncated) { + OSDOp &op = add_op(CEPH_OSD_OP_OMAPGETVALS); + ceph::buffer::list bl; + encode(start_after ? *start_after : std::string_view{}, bl); + encode(max_to_get, bl); + encode(filter_prefix ? *start_after : std::string_view{}, bl); + op.op.extent.offset = 0; + op.op.extent.length = bl.length(); + op.indata.claim_append(bl); + set_handler(CB_ObjectOperation_decodevals(max_to_get, out_set, ptruncated, + nullptr, ec)); + out_ec.back() = ec; + } + void omap_get_vals_by_keys(const std::set &to_get, - std::map *out_set, - int *prval) { - using ceph::encode; + std::map *out_set, + int *prval) { OSDOp &op = add_op(CEPH_OSD_OP_OMAPGETVALSBYKEYS); ceph::buffer::list bl; encode(to_get, bl); @@ -693,16 +968,28 @@ struct ObjectOperation { op.op.extent.length = bl.length(); op.indata.claim_append(bl); if (prval || out_set) { - unsigned p = ops.size() - 1; - C_ObjectOperation_decodevals *h = - new C_ObjectOperation_decodevals(0, out_set, nullptr, prval); - out_handler[p] = h; - out_bl[p] = &h->bl; - out_rval[p] = prval; + set_handler(CB_ObjectOperation_decodevals(0, out_set, nullptr, prval, + nullptr)); + out_rval.back() = prval; } } - void omap_cmp(const std::map > &assertions, + void omap_get_vals_by_keys( + const boost::container::flat_set& to_get, + boost::system::error_code* ec, + boost::container::flat_map *out_set) { + OSDOp &op = add_op(CEPH_OSD_OP_OMAPGETVALSBYKEYS); + ceph::buffer::list bl; + encode(to_get, bl); + op.op.extent.offset = 0; + op.op.extent.length = bl.length(); + op.indata.claim_append(bl); + set_handler(CB_ObjectOperation_decodevals(0, out_set, nullptr, nullptr, + ec)); + out_ec.back() = ec; + } + + void omap_cmp(const std::map > &assertions, int *prval) { using ceph::encode; OSDOp &op = add_op(CEPH_OSD_OP_OMAP_CMP); @@ -717,6 +1004,18 @@ struct ObjectOperation { } } + void omap_cmp(const boost::container::flat_map< + std::string, pair>& assertions, + boost::system::error_code *ec) { + OSDOp &op = add_op(CEPH_OSD_OP_OMAP_CMP); + ceph::buffer::list bl; + encode(assertions, bl); + op.op.extent.offset = 0; + op.op.extent.length = bl.length(); + op.indata.claim_append(bl); + out_ec.back() = ec; + } + struct C_ObjectOperation_copyget : public Context { ceph::buffer::list bl; object_copy_cursor_t *cursor; @@ -805,7 +1104,7 @@ struct ObjectOperation { if (out_truncate_size) *out_truncate_size = copy_reply.truncate_size; *cursor = copy_reply.cursor; - } catch (ceph::buffer::error& e) { + } catch (const ceph::buffer::error& e) { if (prval) *prval = -EIO; } @@ -846,7 +1145,7 @@ struct ObjectOperation { out_reqid_return_codes, truncate_seq, truncate_size, prval); out_bl[p] = &h->bl; - out_handler[p] = h; + set_handler(h); } void undirty() { @@ -869,7 +1168,7 @@ struct ObjectOperation { decode(isdirty, p); if (pisdirty) *pisdirty = isdirty; - } catch (ceph::buffer::error& e) { + } catch (const ceph::buffer::error& e) { if (prval) *prval = -EIO; } @@ -883,7 +1182,7 @@ struct ObjectOperation { C_ObjectOperation_isdirty *h = new C_ObjectOperation_isdirty(pisdirty, prval); out_bl[p] = &h->bl; - out_handler[p] = h; + set_handler(h); } struct C_ObjectOperation_hit_set_ls : public Context { @@ -918,7 +1217,7 @@ struct ObjectOperation { } if (putls) putls->swap(ls); - } catch (ceph::buffer::error& e) { + } catch (const ceph::buffer::error& e) { r = -EIO; } if (prval) @@ -943,7 +1242,7 @@ struct ObjectOperation { C_ObjectOperation_hit_set_ls *h = new C_ObjectOperation_hit_set_ls(pls, NULL, prval); out_bl[p] = &h->bl; - out_handler[p] = h; + set_handler(h); } void hit_set_ls(std::list > *pls, int *prval) { @@ -953,7 +1252,7 @@ struct ObjectOperation { C_ObjectOperation_hit_set_ls *h = new C_ObjectOperation_hit_set_ls(NULL, pls, prval); out_bl[p] = &h->bl; - out_handler[p] = h; + set_handler(h); } /** @@ -981,8 +1280,19 @@ struct ObjectOperation { out_rval[p] = prval; } - void omap_set(const std::map& map) { - using ceph::encode; + void omap_get_header(boost::system::error_code* ec, ceph::buffer::list *bl) { + add_op(CEPH_OSD_OP_OMAPGETHEADER); + out_bl.back() = bl; + out_ec.back() = ec; + } + + void omap_set(const map &map) { + ceph::buffer::list bl; + encode(map, bl); + add_data(CEPH_OSD_OP_OMAPSETVALS, 0, bl.length(), bl); + } + + void omap_set(const boost::container::flat_map& map) { ceph::buffer::list bl; encode(map, bl); add_data(CEPH_OSD_OP_OMAPSETVALS, 0, bl.length(), bl); @@ -1002,6 +1312,11 @@ struct ObjectOperation { encode(to_remove, bl); add_data(CEPH_OSD_OP_OMAPRMKEYS, 0, bl.length(), bl); } + void omap_rm_keys(const boost::container::flat_set& to_remove) { + ceph::buffer::list bl; + encode(to_remove, bl); + add_data(CEPH_OSD_OP_OMAPRMKEYS, 0, bl.length(), bl); + } void omap_rm_range(std::string_view key_begin, std::string_view key_end) { ceph::buffer::list bl; @@ -1021,6 +1336,30 @@ struct ObjectOperation { add_call(CEPH_OSD_OP_CALL, cname, method, indata, outdata, ctx, prval); } + void call(std::string_view cname, std::string_view method, + const ceph::buffer::list& indata, boost::system::error_code* ec) { + add_call(CEPH_OSD_OP_CALL, cname, method, indata, NULL, NULL, NULL); + out_ec.back() = ec; + } + + void call(std::string_view cname, std::string_view method, const ceph::buffer::list& indata, + boost::system::error_code* ec, ceph::buffer::list *outdata) { + add_call(CEPH_OSD_OP_CALL, cname, method, indata, outdata, nullptr, nullptr); + out_ec.back() = ec; + } + void call(std::string_view cname, std::string_view method, + const ceph::buffer::list& indata, + fu2::unique_function f) { + add_call(CEPH_OSD_OP_CALL, cname, method, indata, std::move(f)); + } + void call(std::string_view cname, std::string_view method, + const ceph::buffer::list& indata, + fu2::unique_function f) { + add_call(CEPH_OSD_OP_CALL, cname, method, indata, std::move(f)); + } + // watch/notify void watch(uint64_t cookie, __u8 op, uint32_t timeout = 0) { OSDOp& osd_op = add_op(CEPH_OSD_OP_WATCH); @@ -1053,26 +1392,36 @@ struct ObjectOperation { void list_watchers(std::list *out, int *prval) { - (void)add_op(CEPH_OSD_OP_LIST_WATCHERS); + add_op(CEPH_OSD_OP_LIST_WATCHERS); if (prval || out) { - unsigned p = ops.size() - 1; - C_ObjectOperation_decodewatchers *h = - new C_ObjectOperation_decodewatchers(out, prval); - out_handler[p] = h; - out_bl[p] = &h->bl; - out_rval[p] = prval; + set_handler(CB_ObjectOperation_decodewatchers(out, prval, nullptr)); + out_rval.back() = prval; } } + void list_watchers(vector* out, + boost::system::error_code* ec) { + add_op(CEPH_OSD_OP_LIST_WATCHERS); + set_handler(CB_ObjectOperation_decodewatchersneo(out, nullptr, ec)); + out_ec.back() = ec; + } - void list_snaps(librados::snap_set_t *out, int *prval) { - (void)add_op(CEPH_OSD_OP_LIST_SNAPS); - if (prval || out) { - unsigned p = ops.size() - 1; - C_ObjectOperation_decodesnaps *h = - new C_ObjectOperation_decodesnaps(out, prval); - out_handler[p] = h; - out_bl[p] = &h->bl; - out_rval[p] = prval; + void list_snaps(librados::snap_set_t *out, int *prval, + boost::system::error_code* ec = nullptr) { + add_op(CEPH_OSD_OP_LIST_SNAPS); + if (prval || out || ec) { + set_handler(CB_ObjectOperation_decodesnaps(out, nullptr, prval, ec)); + out_rval.back() = prval; + out_ec.back() = ec; + } + } + + void list_snaps(RADOS::SnapSet *out, int *prval, + boost::system::error_code* ec = nullptr) { + add_op(CEPH_OSD_OP_LIST_SNAPS); + if (prval || out || ec) { + set_handler(CB_ObjectOperation_decodesnaps(nullptr, out, prval, ec)); + out_rval.back() = prval; + out_ec.back() = ec; } } @@ -1218,10 +1567,11 @@ struct ObjectOperation { out_bl.resize(sops.size()); out_handler.resize(sops.size()); out_rval.resize(sops.size()); + out_ec.resize(sops.size()); for (uint32_t i = 0; i < sops.size(); i++) { out_bl[i] = &sops[i].outdata; - out_handler[i] = NULL; out_rval[i] = &sops[i].rval; + out_ec[i] = nullptr; } } @@ -1237,12 +1587,27 @@ struct ObjectOperation { } }; +inline std::ostream& operator <<(std::ostream& m, const ObjectOperation& oo) { + auto i = oo.ops.cbegin(); + m << '['; + while (i != oo.ops.cend()) { + if (i != oo.ops.cbegin()) + m << ' '; + m << *i; + ++i; + } + m << ']'; + return m; +} -// ---------------- +// ---------------- class Objecter : public md_config_obs_t, public Dispatcher { public: + using OpSignature = void(boost::system::error_code); + using OpCompletion = ceph::async::Completion; + // config observer bits const char** get_tracked_conf_keys() const override; void handle_conf_change(const ConfigProxy& conf, @@ -1251,10 +1616,13 @@ public: public: Messenger *messenger; MonClient *monc; - Finisher *finisher; - ZTracer::Endpoint trace_endpoint; + boost::asio::io_context& service; + // The guaranteed sequenced, one-at-a-time execution and apparently + // people sometimes depend on this. + boost::asio::io_context::strand finish_strand{service}; + ZTracer::Endpoint trace_endpoint{"0.0.0.0", 0, "Objecter"}; private: - std::unique_ptr osdmap; + std::unique_ptr osdmap{std::make_unique()}; public: using Dispatcher::cct; std::multimap crush_location; @@ -1289,7 +1657,8 @@ private: : epoch(epoch), up(up), up_primary(up_primary), acting(acting), acting_primary(acting_primary) {} }; - std::shared_mutex pg_mapping_lock; + ceph::shared_mutex pg_mapping_lock = + ceph::make_shared_mutex("Objecter::pg_mapping_lock"); // pool -> pg mapping std::map> pg_mappings; @@ -1344,14 +1713,11 @@ private: version_t last_seen_osdmap_version = 0; version_t last_seen_pgmap_version = 0; - mutable std::shared_mutex rwlock; - using lock_guard = std::lock_guard; - using unique_lock = std::unique_lock; - using shared_lock = boost::shared_lock; - using shunique_lock = ceph::shunique_lock; + mutable ceph::shared_mutex rwlock = + ceph::make_shared_mutex("Objecter::rwlock"); ceph::timer timer; - PerfCounters *logger = nullptr; + PerfCounters* logger = nullptr; uint64_t tick_event = 0; @@ -1447,92 +1813,198 @@ public: void dump(ceph::Formatter *f) const; }; + std::unique_ptr> + OpContextVert(Context* c) { + if (c) + return ceph::async::Completion::create( + service.get_executor(), + [c = std::unique_ptr(c)] + (boost::system::error_code e) mutable { + c.release()->complete(e); + }); + else + return nullptr; + } + + template + std::unique_ptr> + OpContextVert(Context* c, T* p) { + + if (c || p) + return + ceph::async::Completion::create( + service.get_executor(), + [c = std::unique_ptr(c), p] + (boost::system::error_code e, T r) mutable { + if (p) + *p = std::move(r); + if (c) + c.release()->complete(ceph::from_error_code(e)); + }); + else + return nullptr; + } + + template + std::unique_ptr> + OpContextVert(Context* c, T& p) { + if (c) + return ceph::async::Completion< + void(boost::system::error_code, T)>::create( + service.get_executor(), + [c = std::unique_ptr(c), &p] + (boost::system::error_code e, T r) mutable { + p = std::move(r); + if (c) + c.release()->complete(ceph::from_error_code(e)); + }); + else + return nullptr; + } + struct Op : public RefCountedObject { - OSDSession *session; - int incarnation; + OSDSession *session = nullptr; + int incarnation = 0; op_target_t target; - ConnectionRef con; // for rx buffer only - uint64_t features; // explicitly specified op features + ConnectionRef con = nullptr; // for rx buffer only + uint64_t features = CEPH_FEATURES_SUPPORTED_DEFAULT; // explicitly specified op features std::vector ops; - snapid_t snapid; + snapid_t snapid = CEPH_NOSNAP; SnapContext snapc; ceph::real_time mtime; - ceph::buffer::list *outbl; + ceph::buffer::list *outbl = nullptr; std::vector out_bl; - std::vector out_handler; + std::vector> out_handler; std::vector out_rval; + std::vector out_ec; + + int priority = 0; + using OpSig = void(boost::system::error_code); + using OpComp = ceph::async::Completion; + // Due to an irregularity of cmpxattr, we actualy need the 'int' + // value for onfinish for legacy librados users. As such just + // preserve the Context* in this one case. That way we can have + // our callers just pass in a unique_ptr and not deal with + // our signature in Objecter being different than the exposed + // signature in RADOS. + // + // Add a function for the linger case, where we want better + // semantics than Context, but still need to be under the completion_lock. + std::variant, fu2::unique_function, + Context*> onfinish; + uint64_t ontimeout = 0; - int priority; - Context *onfinish; - uint64_t ontimeout; - - ceph_tid_t tid; - int attempts; + ceph_tid_t tid = 0; + int attempts = 0; version_t *objver; - epoch_t *reply_epoch; + epoch_t *reply_epoch = nullptr; ceph::coarse_mono_time stamp; - epoch_t map_dne_bound; + epoch_t map_dne_bound = 0; - int budget; + int budget = -1; /// true if we should resend this message on failure - bool should_resend; + bool should_resend = true; /// true if the throttle budget is get/put on a series of OPs, /// instead of per OP basis, when this flag is set, the budget is /// acquired before sending the very first OP of the series and /// released upon receiving the last OP reply. - bool ctx_budgeted; + bool ctx_budgeted = false; int *data_offset; osd_reqid_t reqid; // explicitly setting reqid ZTracer::Trace trace; - Op(const object_t& o, const object_locator_t& ol, std::vector& op, - int f, Context *fin, version_t *ov, int *offset = NULL, + static bool has_completion(decltype(onfinish)& f) { + return std::visit([](auto&& arg) { return bool(arg);}, f); + } + bool has_completion() { + return has_completion(onfinish); + } + + static void complete(decltype(onfinish)&& f, boost::system::error_code ec, + int r) { + std::visit([ec, r](auto&& arg) { + if constexpr (std::is_same_v, + Context*>) { + arg->complete(r); + } else if constexpr (std::is_same_v, + fu2::unique_function>) { + std::move(arg)(ec); + } else { + arg->defer(std::move(arg), ec); + } + }, std::move(f)); + } + void complete(boost::system::error_code ec, int r) { + complete(std::move(onfinish), ec, r); + } + + Op(const object_t& o, const object_locator_t& ol, vector&& _ops, + int f, std::unique_ptr&& fin, + version_t *ov, int *offset = nullptr, ZTracer::Trace *parent_trace = nullptr) : - session(NULL), incarnation(0), target(o, ol, f), - con(NULL), - features(CEPH_FEATURES_SUPPORTED_DEFAULT), - snapid(CEPH_NOSNAP), - outbl(NULL), - priority(0), + ops(std::move(_ops)), + out_bl(ops.size(), nullptr), + out_handler(ops.size()), + out_rval(ops.size(), nullptr), + out_ec(ops.size(), nullptr), + onfinish(std::move(fin)), + objver(ov), + data_offset(offset) { + if (target.base_oloc.key == o) + target.base_oloc.key.clear(); + if (parent_trace && parent_trace->valid()) { + trace.init("op", nullptr, parent_trace); + trace.event("start"); + } + } + Op(const object_t& o, const object_locator_t& ol, vector&& _ops, + int f, Context* fin, version_t *ov, int *offset = nullptr, + ZTracer::Trace *parent_trace = nullptr) : + target(o, ol, f), + ops(std::move(_ops)), + out_bl(ops.size(), nullptr), + out_handler(ops.size()), + out_rval(ops.size(), nullptr), + out_ec(ops.size(), nullptr), onfinish(fin), - ontimeout(0), - tid(0), - attempts(0), objver(ov), - reply_epoch(NULL), - map_dne_bound(0), - budget(-1), - should_resend(true), - ctx_budgeted(false), data_offset(offset) { - ops.swap(op); - - /* initialize out_* to match op std::vector */ - out_bl.resize(ops.size()); - out_rval.resize(ops.size()); - out_handler.resize(ops.size()); - for (unsigned i = 0; i < ops.size(); i++) { - out_bl[i] = NULL; - out_handler[i] = NULL; - out_rval[i] = NULL; + if (target.base_oloc.key == o) + target.base_oloc.key.clear(); + if (parent_trace && parent_trace->valid()) { + trace.init("op", nullptr, parent_trace); + trace.event("start"); } + } + Op(const object_t& o, const object_locator_t& ol, vector&& _ops, + int f, fu2::unique_function&& fin, version_t *ov, int *offset = nullptr, + ZTracer::Trace *parent_trace = nullptr) : + target(o, ol, f), + ops(std::move(_ops)), + out_bl(ops.size(), nullptr), + out_handler(ops.size()), + out_rval(ops.size(), nullptr), + out_ec(ops.size(), nullptr), + onfinish(std::move(fin)), + objver(ov), + data_offset(offset) { if (target.base_oloc.key == o) target.base_oloc.key.clear(); - if (parent_trace && parent_trace->valid()) { trace.init("op", nullptr, parent_trace); trace.event("start"); @@ -1545,10 +2017,6 @@ public: private: ~Op() override { - while (!out_handler.empty()) { - delete out_handler.back(); - out_handler.pop_back(); - } trace.event("finish"); } }; @@ -1662,40 +2130,43 @@ public: struct PoolStatOp { ceph_tid_t tid; - std::list pools; - - std::map *pool_stats; - bool *per_pool; - Context *onfinish; - uint64_t ontimeout; - + std::vector pools; + using OpSig = void(boost::system::error_code, + boost::container::flat_map, + bool); + using OpComp = ceph::async::Completion; + std::unique_ptr onfinish; + std::uint64_t ontimeout; ceph::coarse_mono_time last_submit; }; struct StatfsOp { ceph_tid_t tid; - struct ceph_statfs *stats; boost::optional data_pool; - Context *onfinish; + using OpSig = void(boost::system::error_code, + const struct ceph_statfs); + using OpComp = ceph::async::Completion; + + std::unique_ptr onfinish; uint64_t ontimeout; ceph::coarse_mono_time last_submit; }; struct PoolOp { - ceph_tid_t tid; - int64_t pool; + ceph_tid_t tid = 0; + int64_t pool = 0; std::string name; - Context *onfinish; - uint64_t ontimeout; - int pool_op; - int16_t crush_rule; - snapid_t snapid; - ceph::buffer::list *blp; - + using OpSig = void(boost::system::error_code, ceph::buffer::list); + using OpComp = ceph::async::Completion; + std::unique_ptr onfinish; + uint64_t ontimeout = 0; + int pool_op = 0; + int16_t crush_rule = 0; + snapid_t snapid = 0; ceph::coarse_mono_time last_submit; - PoolOp() : tid(0), pool(0), onfinish(NULL), ontimeout(0), pool_op(0), - crush_rule(0), snapid(0), blp(NULL) {} + + PoolOp() {} }; // -- osd commands -- @@ -1704,8 +2175,6 @@ public: ceph_tid_t tid = 0; std::vector cmd; ceph::buffer::list inbl; - ceph::buffer::list *poutbl = nullptr; - std::string *prs = nullptr; // target_osd == -1 means target_pg is valid const int target_osd = -1; @@ -1717,108 +2186,91 @@ public: int map_check_error = 0; // error to return if std::map check fails const char *map_check_error_str = nullptr; - Context *onfinish = nullptr; + using OpSig = void(boost::system::error_code, std::string, + ceph::buffer::list); + using OpComp = ceph::async::Completion; + std::unique_ptr onfinish; + uint64_t ontimeout = 0; ceph::coarse_mono_time last_submit; CommandOp( int target_osd, - const std::vector &cmd, - ceph::buffer::list inbl, - ceph::buffer::list *poutbl, - std::string *prs, - Context *onfinish) - : cmd(cmd), - inbl(inbl), - poutbl(poutbl), - prs(prs), + std::vector&& cmd, + ceph::buffer::list&& inbl, + decltype(onfinish)&& onfinish) + : cmd(std::move(cmd)), + inbl(std::move(inbl)), target_osd(target_osd), - onfinish(onfinish) {} + onfinish(std::move(onfinish)) {} CommandOp( pg_t pgid, - const std::vector &cmd, - ceph::buffer::list inbl, - ceph::buffer::list *poutbl, - std::string *prs, - Context *onfinish) - : cmd(cmd), - inbl(inbl), - poutbl(poutbl), - prs(prs), + std::vector&& cmd, + ceph::buffer::list&& inbl, + decltype(onfinish)&& onfinish) + : cmd(std::move(cmd)), + inbl(std::move(inbl)), target_pg(pgid), target(pgid), - onfinish(onfinish) {} - + onfinish(std::move(onfinish)) {} }; void submit_command(CommandOp *c, ceph_tid_t *ptid); - int _calc_command_target(CommandOp *c, shunique_lock &sul); - void _assign_command_session(CommandOp *c, shunique_lock &sul); + int _calc_command_target(CommandOp *c, + ceph::shunique_lock &sul); + void _assign_command_session(CommandOp *c, + ceph::shunique_lock &sul); void _send_command(CommandOp *c); - int command_op_cancel(OSDSession *s, ceph_tid_t tid, int r); - void _finish_command(CommandOp *c, int r, std::string rs); + int command_op_cancel(OSDSession *s, ceph_tid_t tid, + boost::system::error_code ec); + void _finish_command(CommandOp *c, boost::system::error_code ec, + std::string&& rs, ceph::buffer::list&& bl); void handle_command_reply(MCommandReply *m); - // -- lingering ops -- - struct WatchContext { - // this simply mirrors librados WatchCtx2 - virtual void handle_notify(uint64_t notify_id, - uint64_t cookie, - uint64_t notifier_id, - ceph::buffer::list& bl) = 0; - virtual void handle_error(uint64_t cookie, int err) = 0; - virtual ~WatchContext() {} - }; - struct LingerOp : public RefCountedObject { - uint64_t linger_id; - - op_target_t target; - - snapid_t snap; + Objecter *objecter; + uint64_t linger_id{0}; + op_target_t target{object_t(), object_locator_t(), 0}; + snapid_t snap{CEPH_NOSNAP}; SnapContext snapc; ceph::real_time mtime; std::vector ops; ceph::buffer::list inbl; - ceph::buffer::list *poutbl; - version_t *pobjver; + version_t *pobjver{nullptr}; - bool is_watch; + bool is_watch{false}; ceph::coarse_mono_time watch_valid_thru; ///< send time for last acked ping - int last_error; ///< error from last failed ping|reconnect, if any - std::shared_mutex watch_lock; - using lock_guard = std::unique_lock; - using unique_lock = std::unique_lock; - using shared_lock = boost::shared_lock; - using shunique_lock = ceph::shunique_lock; + boost::system::error_code last_error; ///< error from last failed ping|reconnect, if any + ceph::shared_mutex watch_lock; // queue of pending async operations, with the timestamp of // when they were queued. std::list watch_pending_async; - uint32_t register_gen; - bool registered; - bool canceled; - Context *on_reg_commit; - - // we trigger these from an async finisher - Context *on_notify_finish; - ceph::buffer::list *notify_result_bl; - uint64_t notify_id; - - WatchContext *watch_context; - - OSDSession *session; - - Objecter *objecter; - int ctx_budget; - ceph_tid_t register_tid; - ceph_tid_t ping_tid; - epoch_t map_dne_bound; + uint32_t register_gen{0}; + bool registered{false}; + bool canceled{false}; + using OpSig = void(boost::system::error_code, ceph::buffer::list); + using OpComp = ceph::async::Completion; + std::unique_ptr on_reg_commit; + std::unique_ptr on_notify_finish; + uint64_t notify_id{0}; + + fu2::unique_function handle; + OSDSession *session{nullptr}; + + int ctx_budget{-1}; + ceph_tid_t register_tid{0}; + ceph_tid_t ping_tid{0}; + epoch_t map_dne_bound{0}; void _queued_async() { // watch_lock ust be locked unique @@ -1830,81 +2282,53 @@ public: watch_pending_async.pop_front(); } - explicit LingerOp(Objecter *o) : linger_id(0), - target(object_t(), object_locator_t(), 0), - snap(CEPH_NOSNAP), poutbl(NULL), pobjver(NULL), - is_watch(false), last_error(0), - register_gen(0), - registered(false), - canceled(false), - on_reg_commit(NULL), - on_notify_finish(NULL), - notify_result_bl(NULL), - notify_id(0), - watch_context(NULL), - session(NULL), - objecter(o), - ctx_budget(-1), - register_tid(0), - ping_tid(0), - map_dne_bound(0) {} - - const LingerOp &operator=(const LingerOp& r) = delete; + explicit LingerOp(Objecter *o, uint64_t linger_id) + : objecter(o), linger_id(linger_id), + watch_lock(ceph::make_shared_mutex( + fmt::format("LingerOp::watch_lock #{}", linger_id))) {} + + const LingerOp& operator=(const LingerOp& r) = delete; LingerOp(const LingerOp& o) = delete; uint64_t get_cookie() { return reinterpret_cast(this); } - - private: - ~LingerOp() override { - delete watch_context; - } }; - struct C_Linger_Commit : public Context { + struct CB_Linger_Commit { Objecter *objecter; - LingerOp *info; + boost::intrusive_ptr info; ceph::buffer::list outbl; // used for notify only - C_Linger_Commit(Objecter *o, LingerOp *l) : objecter(o), info(l) { - info->get(); - } - ~C_Linger_Commit() override { - info->put(); - } - void finish(int r) override { - objecter->_linger_commit(info, r, outbl); + CB_Linger_Commit(Objecter *o, LingerOp *l) : objecter(o), info(l) {} + ~CB_Linger_Commit() = default; + + void operator()(boost::system::error_code ec) { + objecter->_linger_commit(info.get(), ec, outbl); } }; - struct C_Linger_Reconnect : public Context { + struct CB_Linger_Reconnect { Objecter *objecter; - LingerOp *info; - C_Linger_Reconnect(Objecter *o, LingerOp *l) : objecter(o), info(l) { - info->get(); - } - ~C_Linger_Reconnect() override { - info->put(); - } - void finish(int r) override { - objecter->_linger_reconnect(info, r); + boost::intrusive_ptr info; + CB_Linger_Reconnect(Objecter *o, LingerOp *l) : objecter(o), info(l) {} + ~CB_Linger_Reconnect() = default; + + void operator()(boost::system::error_code ec) { + objecter->_linger_reconnect(info.get(), ec); + info.reset(); } }; - struct C_Linger_Ping : public Context { + struct CB_Linger_Ping { Objecter *objecter; - LingerOp *info; + boost::intrusive_ptr info; ceph::coarse_mono_time sent; uint32_t register_gen; - C_Linger_Ping(Objecter *o, LingerOp *l) - : objecter(o), info(l), register_gen(info->register_gen) { - info->get(); - } - ~C_Linger_Ping() override { - info->put(); - } - void finish(int r) override { - objecter->_linger_ping(info, r, sent, register_gen); + CB_Linger_Ping(Objecter *o, LingerOp *l, ceph::coarse_mono_time s) + : objecter(o), info(l), sent(s), register_gen(info->register_gen) {} + void operator()(boost::system::error_code ec) { + objecter->_linger_ping(info.get(), ec, sent, register_gen); + info.reset(); } }; @@ -1923,12 +2347,6 @@ public: }; struct OSDSession : public RefCountedObject { - std::shared_mutex lock; - using lock_guard = std::lock_guard; - using unique_lock = std::unique_lock; - using shared_lock = boost::shared_lock; - using shunique_lock = ceph::shunique_lock; - // pending ops std::map ops; std::map linger_ops; @@ -1939,16 +2357,17 @@ public: std::map backoffs_by_id; int osd; + ceph::shared_mutex lock; + int incarnation; ConnectionRef con; int num_locks; std::unique_ptr completion_locks; - using unique_completion_lock = std::unique_lock< - decltype(completion_locks)::element_type>; - OSDSession(CephContext *cct, int o) : - osd(o), incarnation(0), con(NULL), + osd(o), lock(ceph::make_shared_mutex( + fmt::format("OSDSession::lock #{}", o))), + incarnation(0), con(NULL), num_locks(cct->_conf->objecter_completion_locks_per_session), completion_locks(new std::mutex[num_locks]) {} @@ -1956,7 +2375,7 @@ public: bool is_homeless() { return (osd == -1); } - unique_completion_lock get_lock(object_t& oid); + std::unique_lock get_lock(object_t& oid); }; std::map osd_sessions; @@ -1987,7 +2406,8 @@ public: std::map pool_ops; std::atomic num_homeless_ops{0}; - OSDSession *homeless_session; + OSDSession* homeless_session = new OSDSession(cct, -1); + // ops waiting for an osdmap with a new pool or confirmation that // the pool does not exist (may be expanded to other uses later) @@ -1995,7 +2415,9 @@ public: std::map check_latest_map_ops; std::map check_latest_map_commands; - std::map > > waiting_for_map; + std::map, + boost::system::error_code>>> waiting_for_map; ceph::timespan mon_timeout; ceph::timespan osd_timeout; @@ -2028,7 +2450,7 @@ public: int _calc_target(op_target_t *t, Connection *con, bool any_change = false); int _map_session(op_target_t *op, OSDSession **s, - shunique_lock& lc); + ceph::shunique_lock& lc); void _session_op_assign(OSDSession *s, Op *op); void _session_op_remove(OSDSession *s, Op *op); @@ -2037,28 +2459,35 @@ public: void _session_command_op_assign(OSDSession *to, CommandOp *op); void _session_command_op_remove(OSDSession *from, CommandOp *op); - int _assign_op_target_session(Op *op, shunique_lock& lc, + int _assign_op_target_session(Op *op, ceph::shunique_lock& lc, bool src_session_locked, bool dst_session_locked); - int _recalc_linger_op_target(LingerOp *op, shunique_lock& lc); - - void _linger_submit(LingerOp *info, shunique_lock& sul); - void _send_linger(LingerOp *info, shunique_lock& sul); - void _linger_commit(LingerOp *info, int r, ceph::buffer::list& outbl); - void _linger_reconnect(LingerOp *info, int r); + int _recalc_linger_op_target(LingerOp *op, + ceph::shunique_lock& lc); + + void _linger_submit(LingerOp *info, + ceph::shunique_lock& sul); + void _send_linger(LingerOp *info, + ceph::shunique_lock& sul); + void _linger_commit(LingerOp *info, boost::system::error_code ec, + ceph::buffer::list& outbl); + void _linger_reconnect(LingerOp *info, boost::system::error_code ec); void _send_linger_ping(LingerOp *info); - void _linger_ping(LingerOp *info, int r, ceph::coarse_mono_time sent, - uint32_t register_gen); - int _normalize_watch_error(int r); + void _linger_ping(LingerOp *info, boost::system::error_code ec, + ceph::coarse_mono_time sent, uint32_t register_gen); + boost::system::error_code _normalize_watch_error(boost::system::error_code ec); - friend class C_DoWatchError; + friend class CB_DoWatchError; public: - void linger_callback_flush(Context *ctx) { - finisher->queue(ctx); + template + auto linger_callback_flush(CT&& ct) { + boost::asio::async_completion init(ct); + boost::asio::defer(finish_strand, std::move(init.completion_handler)); + return init.result.get(); } private: - void _check_op_pool_dne(Op *op, unique_lock *sl); + void _check_op_pool_dne(Op *op, std::unique_lock *sl); void _send_op_map_check(Op *op); void _op_cancel_map_check(Op *op); void _check_linger_pool_dne(LingerOp *op, bool *need_unregister); @@ -2069,9 +2498,11 @@ private: void _command_cancel_map_check(CommandOp *op); void _kick_requests(OSDSession *session, std::map& lresend); - void _linger_ops_resend(std::map& lresend, unique_lock& ul); + void _linger_ops_resend(std::map& lresend, + std::unique_lock& ul); - int _get_session(int osd, OSDSession **session, shunique_lock& sul); + int _get_session(int osd, OSDSession **session, + ceph::shunique_lock& sul); void put_session(OSDSession *s); void get_session(OSDSession *s); void _reopen_session(OSDSession *session); @@ -2089,8 +2520,9 @@ private: * If throttle_op needs to throttle it will unlock client_lock. */ int calc_op_budget(const std::vector& ops); - void _throttle_op(Op *op, shunique_lock& sul, int op_size = 0); - int _take_op_budget(Op *op, shunique_lock& sul) { + void _throttle_op(Op *op, ceph::shunique_lock& sul, + int op_size = 0); + int _take_op_budget(Op *op, ceph::shunique_lock& sul) { ceph_assert(sul && sul.mutex() == &rwlock); int op_budget = calc_op_budget(op->ops); if (keep_balanced_budget) { @@ -2103,18 +2535,21 @@ private: return op_budget; } int take_linger_budget(LingerOp *info); - friend struct WatchContext; // to invoke put_up_budget_bytes void put_op_budget_bytes(int op_budget) { ceph_assert(op_budget >= 0); op_throttle_bytes.put(op_budget); op_throttle_ops.put(1); } void put_nlist_context_budget(NListContext *list_context); - Throttle op_throttle_bytes, op_throttle_ops; - + Throttle op_throttle_bytes{cct, "objecter_bytes", + static_cast( + cct->_conf->objecter_inflight_op_bytes)}; + Throttle op_throttle_ops{cct, "objecter_ops", + static_cast( + cct->_conf->objecter_inflight_ops)}; public: - Objecter(CephContext *cct_, Messenger *m, MonClient *mc, - Finisher *fin, + Objecter(CephContext *cct, Messenger *m, MonClient *mc, + boost::asio::io_context& service, double mon_timeout, double osd_timeout); ~Objecter() override; @@ -2166,7 +2601,7 @@ private: std::map& need_resend, std::list& need_resend_linger, std::map& need_resend_command, - shunique_lock& sul); + ceph::shunique_lock& sul); int64_t get_object_hash_position(int64_t pool, const std::string& key, const std::string& ns); @@ -2200,6 +2635,27 @@ private: void handle_osd_map(class MOSDMap *m); void wait_for_osd_map(epoch_t e=0); + template + auto wait_for_osd_map(CompletionToken&& token) { + boost::asio::async_completion init(token); + unique_lock l(rwlock); + if (osdmap->get_epoch()) { + l.unlock(); + boost::asio::dispatch(std::move(init.completion_handler)); + } else { + waiting_for_map[0].emplace_back( + OpCompletion::create( + service.get_executor(), + [c = std::move(init.completion_handler)] + (boost::system::error_code) mutable { + std::move(c)(); + }), boost::system::error_code{}); + l.unlock(); + } + return init.result.get(); + } + + /** * Get std::list of entities blacklisted since this was last called, * and reset the std::list. @@ -2224,15 +2680,17 @@ private: const OSDMap &new_osd_map); // low-level - void _op_submit(Op *op, shunique_lock& lc, ceph_tid_t *ptid); - void _op_submit_with_budget(Op *op, shunique_lock& lc, + void _op_submit(Op *op, ceph::shunique_lock& lc, + ceph_tid_t *ptid); + void _op_submit_with_budget(Op *op, + ceph::shunique_lock& lc, ceph_tid_t *ptid, int *ctx_budget = NULL); // public interface public: void op_submit(Op *op, ceph_tid_t *ptid = NULL, int *ctx_budget = NULL); bool is_active() { - shared_lock l(rwlock); + std::shared_lock l(rwlock); return !((!inflight_ops) && linger_ops.empty() && poolstat_ops.empty() && statfs_ops.empty()); } @@ -2258,11 +2716,87 @@ public: void set_client_incarnation(int inc) { client_inc = inc; } bool have_map(epoch_t epoch); - /// wait for epoch; true if we already have it - bool wait_for_map(epoch_t epoch, Context *c, int err=0); - void _wait_for_new_map(Context *c, epoch_t epoch, int err=0); - void wait_for_latest_osdmap(Context *fin); - void get_latest_version(epoch_t oldest, epoch_t neweset, Context *fin); + + struct CB_Objecter_GetVersion { + Objecter *objecter; + std::unique_ptr fin; + + CB_Objecter_GetVersion(Objecter *o, std::unique_ptr c) + : objecter(o), fin(std::move(c)) {} + void operator()(boost::system::error_code ec, version_t newest, + version_t oldest) { + if (ec == boost::system::errc::resource_unavailable_try_again) { + // try again as instructed + objecter->wait_for_latest_osdmap(std::move(fin)); + } else if (ec) { + ceph::async::post(std::move(fin), ec); + } else { + auto l = std::unique_lock(objecter->rwlock); + objecter->_get_latest_version(oldest, newest, std::move(fin), + std::move(l)); + } + } + }; + + template + typename boost::asio::async_result::return_type + wait_for_map(epoch_t epoch, CompletionToken&& token) { + boost::asio::async_completion init(token); + + if (osdmap->get_epoch() >= epoch) { + boost::asio::post(service, + ceph::async::bind_handler( + std::move(init.completion_handler), + boost::system::error_code())); + } else { + monc->get_version("osdmap", + CB_Objecter_GetVersion( + this, + OpCompletion::create(service.get_executor(), + std::move(init.completion_handler)))); + } + return init.result.get(); + } + + void _wait_for_new_map(std::unique_ptr, epoch_t epoch, + boost::system::error_code = {}); + + template + typename boost::asio::async_result::return_type + wait_for_latest_osdmap(CompletionToken&& token) { + boost::asio::async_completion init(token); + + monc->get_version("osdmap", + CB_Objecter_GetVersion( + this, + OpCompletion::create(service.get_executor(), + std::move(init.completion_handler)))); + return init.result.get(); + } + + void wait_for_latest_osdmap(std::unique_ptr c) { + monc->get_version("osdmap", + CB_Objecter_GetVersion(this, std::move(c))); + } + + template + auto get_latest_version(epoch_t oldest, epoch_t newest, + CompletionToken&& token) { + boost::asio::async_completion init(token); + { + std::unique_lock wl(rwlock); + _get_latest_version(oldest, newest, + OpCompletion::create( + service.get_executor(), + std::move(init.completion_handler)), + std::move(wl)); + } + return init.result.get(); + } + + void _get_latest_version(epoch_t oldest, epoch_t neweset, + std::unique_ptr fin, + std::unique_lock&& ul); /** Get the current set of global op flags */ int get_global_op_flags() const { return global_op_flags; } @@ -2294,32 +2828,52 @@ public: epoch_t op_cancel_writes(int r, int64_t pool=-1); // commands - void osd_command(int osd, const std::vector& cmd, - const ceph::buffer::list& inbl, ceph_tid_t *ptid, - ceph::buffer::list *poutbl, std::string *prs, Context *onfinish) { + void osd_command(int osd, std::vector cmd, + ceph::buffer::list inbl, ceph_tid_t *ptid, + decltype(CommandOp::onfinish)&& onfinish) { ceph_assert(osd >= 0); - CommandOp *c = new CommandOp( + auto c = new CommandOp( osd, - cmd, - inbl, - poutbl, - prs, - onfinish); + std::move(cmd), + std::move(inbl), + std::move(onfinish)); submit_command(c, ptid); } - void pg_command(pg_t pgid, const std::vector& cmd, - const ceph::buffer::list& inbl, ceph_tid_t *ptid, - ceph::buffer::list *poutbl, std::string *prs, Context *onfinish) { - CommandOp *c = new CommandOp( + template + auto osd_command(int osd, std::vector cmd, + ceph::buffer::list inbl, ceph_tid_t *ptid, + CompletionToken&& token) { + boost::asio::async_completion init(token); + osd_command(osd, std::move(cmd), std::move(inbl), ptid, + CommandOp::OpComp::create(service.get_executor(), + std::move(init.completion_handler))); + return init.result.get(); + } + + void pg_command(pg_t pgid, std::vector cmd, + ceph::buffer::list inbl, ceph_tid_t *ptid, + decltype(CommandOp::onfinish)&& onfinish) { + auto *c = new CommandOp( pgid, - cmd, - inbl, - poutbl, - prs, - onfinish); + std::move(cmd), + std::move(inbl), + std::move(onfinish)); submit_command(c, ptid); } + template + auto pg_command(pg_t pgid, std::vector cmd, + ceph::buffer::list inbl, ceph_tid_t *ptid, + CompletionToken&& token) { + boost::asio::async_completion init(token); + pg_command(pgid, std::move(cmd), std::move(inbl), ptid, + CommandOp::OpComp::create(service.get_executor(), + std::move(init.completion_handler))); + return init.result.get(); + } + // mid-level helpers Op *prepare_mutate_op( const object_t& oid, const object_locator_t& oloc, @@ -2328,15 +2882,18 @@ public: Context *oncommit, version_t *objver = NULL, osd_reqid_t reqid = osd_reqid_t(), ZTracer::Trace *parent_trace = nullptr) { - Op *o = new Op(oid, oloc, op.ops, flags | global_op_flags | - CEPH_OSD_FLAG_WRITE, oncommit, objver, nullptr, parent_trace); + Op *o = new Op(oid, oloc, std::move(op.ops), flags | global_op_flags | + CEPH_OSD_FLAG_WRITE, oncommit, objver, + nullptr, parent_trace); o->priority = op.priority; o->mtime = mtime; o->snapc = snapc; o->out_rval.swap(op.out_rval); o->out_bl.swap(op.out_bl); o->out_handler.swap(op.out_handler); + o->out_ec.swap(op.out_ec); o->reqid = reqid; + op.clear(); return o; } ceph_tid_t mutate( @@ -2351,6 +2908,27 @@ public: op_submit(o, &tid); return tid; } + + void mutate(const object_t& oid, const object_locator_t& oloc, + ObjectOperation&& op, const SnapContext& snapc, + ceph::real_time mtime, int flags, + std::unique_ptr&& oncommit, + version_t *objver = NULL, osd_reqid_t reqid = osd_reqid_t()) { + Op *o = new Op(oid, oloc, std::move(op.ops), flags | global_op_flags | + CEPH_OSD_FLAG_WRITE, std::move(oncommit), objver, + nullptr); + o->priority = op.priority; + o->mtime = mtime; + o->snapc = snapc; + o->out_bl.swap(op.out_bl); + o->out_handler.swap(op.out_handler); + o->out_rval.swap(op.out_rval); + o->out_ec.swap(op.out_ec); + o->reqid = reqid; + op.clear(); + op_submit(o); + } + Op *prepare_read_op( const object_t& oid, const object_locator_t& oloc, ObjectOperation& op, @@ -2359,8 +2937,9 @@ public: int *data_offset = NULL, uint64_t features = 0, ZTracer::Trace *parent_trace = nullptr) { - Op *o = new Op(oid, oloc, op.ops, flags | global_op_flags | - CEPH_OSD_FLAG_READ, onack, objver, data_offset, parent_trace); + Op *o = new Op(oid, oloc, std::move(op.ops), flags | global_op_flags | + CEPH_OSD_FLAG_READ, onack, objver, + data_offset, parent_trace); o->priority = op.priority; o->snapid = snapid; o->outbl = pbl; @@ -2369,6 +2948,8 @@ public: o->out_bl.swap(op.out_bl); o->out_handler.swap(op.out_handler); o->out_rval.swap(op.out_rval); + o->out_ec.swap(op.out_ec); + op.clear(); return o; } ceph_tid_t read( @@ -2386,13 +2967,38 @@ public: op_submit(o, &tid); return tid; } + + void read(const object_t& oid, const object_locator_t& oloc, + ObjectOperation&& op, snapid_t snapid, ceph::buffer::list *pbl, + int flags, std::unique_ptr&& onack, + version_t *objver = nullptr, int *data_offset = nullptr, + uint64_t features = 0) { + Op *o = new Op(oid, oloc, std::move(op.ops), flags | global_op_flags | + CEPH_OSD_FLAG_READ, std::move(onack), objver, + data_offset); + o->priority = op.priority; + o->snapid = snapid; + o->outbl = pbl; + if (!o->outbl && op.size() == 1 && op.out_bl[0]->length()) + o->outbl = op.out_bl[0]; + o->out_bl.swap(op.out_bl); + o->out_handler.swap(op.out_handler); + o->out_rval.swap(op.out_rval); + o->out_ec.swap(op.out_ec); + if (features) + o->features = features; + op.clear(); + op_submit(o); + } + + Op *prepare_pg_read_op( uint32_t hash, object_locator_t oloc, ObjectOperation& op, ceph::buffer::list *pbl, int flags, Context *onack, epoch_t *reply_epoch, int *ctx_budget) { Op *o = new Op(object_t(), oloc, - op.ops, + std::move(op.ops), flags | global_op_flags | CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_IGNORE_OVERLAY, onack, NULL); @@ -2404,11 +3010,13 @@ public: o->out_bl.swap(op.out_bl); o->out_handler.swap(op.out_handler); o->out_rval.swap(op.out_rval); + o->out_ec.swap(op.out_ec); o->reply_epoch = reply_epoch; if (ctx_budget) { // budget is tracked by listing context o->ctx_budgeted = true; } + op.clear(); return o; } ceph_tid_t pg_read( @@ -2423,6 +3031,35 @@ public: return tid; } + ceph_tid_t pg_read( + uint32_t hash, object_locator_t oloc, + ObjectOperation& op, ceph::buffer::list *pbl, int flags, + std::unique_ptr&& onack, epoch_t *reply_epoch, int *ctx_budget) { + ceph_tid_t tid; + Op *o = new Op(object_t(), oloc, + std::move(op.ops), + flags | global_op_flags | CEPH_OSD_FLAG_READ | + CEPH_OSD_FLAG_IGNORE_OVERLAY, + std::move(onack), nullptr); + o->target.precalc_pgid = true; + o->target.base_pgid = pg_t(hash, oloc.pool); + o->priority = op.priority; + o->snapid = CEPH_NOSNAP; + o->outbl = pbl; + o->out_bl.swap(op.out_bl); + o->out_handler.swap(op.out_handler); + o->out_rval.swap(op.out_rval); + o->out_ec.swap(op.out_ec); + o->reply_epoch = reply_epoch; + if (ctx_budget) { + // budget is tracked by listing context + o->ctx_budgeted = true; + } + op_submit(o, &tid, ctx_budget); + op.clear(); + return tid; + } + // caller owns a ref LingerOp *linger_register(const object_t& oid, const object_locator_t& oloc, int flags); @@ -2430,19 +3067,39 @@ public: ObjectOperation& op, const SnapContext& snapc, ceph::real_time mtime, ceph::buffer::list& inbl, - Context *onfinish, + decltype(info->on_reg_commit)&& oncommit, version_t *objver); + ceph_tid_t linger_watch(LingerOp *info, + ObjectOperation& op, + const SnapContext& snapc, ceph::real_time mtime, + ceph::buffer::list& inbl, + Context* onfinish, + version_t *objver) { + return linger_watch(info, op, snapc, mtime, inbl, + OpContextVert(onfinish, nullptr), objver); + } ceph_tid_t linger_notify(LingerOp *info, ObjectOperation& op, snapid_t snap, ceph::buffer::list& inbl, - ceph::buffer::list *poutbl, - Context *onack, + decltype(LingerOp::on_reg_commit)&& onfinish, version_t *objver); - int linger_check(LingerOp *info); + ceph_tid_t linger_notify(LingerOp *info, + ObjectOperation& op, + snapid_t snap, ceph::buffer::list& inbl, + ceph::buffer::list *poutbl, + Context* onack, + version_t *objver) { + return linger_notify(info, op, snap, inbl, + OpContextVert(onack, poutbl), + objver); + } + tl::expected linger_check(LingerOp *info); void linger_cancel(LingerOp *info); // releases a reference void _linger_cancel(LingerOp *info); - void _do_watch_notify(LingerOp *info, MWatchNotify *m); + void _do_watch_notify(boost::intrusive_ptr info, + boost::intrusive_ptr m); /** * set up initial ops in the op std::vector, and allocate a final op slot. @@ -2481,7 +3138,7 @@ public: int i = init_ops(ops, 1, extra_ops); ops[i].op.op = CEPH_OSD_OP_STAT; C_Stat *fin = new C_Stat(psize, pmtime, onfinish); - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_READ, fin, objver); o->snapid = snap; o->outbl = &fin->bl; @@ -2513,8 +3170,9 @@ public: ops[i].op.extent.truncate_size = 0; ops[i].op.extent.truncate_seq = 0; ops[i].op.flags = op_flags; - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | - CEPH_OSD_FLAG_READ, onfinish, objver, nullptr, parent_trace); + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | + CEPH_OSD_FLAG_READ, onfinish, objver, + nullptr, parent_trace); o->snapid = snap; o->outbl = pbl; return o; @@ -2545,7 +3203,7 @@ public: ops[i].op.extent.truncate_seq = 0; ops[i].indata = cmp_bl; ops[i].op.flags = op_flags; - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_READ, onfinish, objver); o->snapid = snap; return o; @@ -2577,7 +3235,7 @@ public: ops[i].op.extent.truncate_size = trunc_size; ops[i].op.extent.truncate_seq = trunc_seq; ops[i].op.flags = op_flags; - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_READ, onfinish, objver); o->snapid = snap; o->outbl = pbl; @@ -2596,7 +3254,7 @@ public: ops[i].op.extent.length = len; ops[i].op.extent.truncate_size = 0; ops[i].op.extent.truncate_seq = 0; - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_READ, onfinish, objver); o->snapid = snap; o->outbl = pbl; @@ -2615,7 +3273,7 @@ public: ops[i].op.xattr.value_len = 0; if (name) ops[i].indata.append(name, ops[i].op.xattr.name_len); - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_READ, onfinish, objver); o->snapid = snap; o->outbl = pbl; @@ -2632,7 +3290,7 @@ public: int i = init_ops(ops, 1, extra_ops); ops[i].op.op = CEPH_OSD_OP_GETXATTRS; C_GetAttrs *fin = new C_GetAttrs(attrset, onfinish); - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_READ, fin, objver); o->snapid = snap; o->outbl = &fin->bl; @@ -2656,7 +3314,7 @@ public: const SnapContext& snapc, int flags, Context *oncommit, version_t *objver = NULL) { - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_WRITE, oncommit, objver); o->mtime = mtime; o->snapc = snapc; @@ -2680,8 +3338,8 @@ public: ops[i].op.extent.truncate_seq = 0; ops[i].indata = bl; ops[i].op.flags = op_flags; - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | - CEPH_OSD_FLAG_WRITE, oncommit, objver, + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | + CEPH_OSD_FLAG_WRITE, std::move(oncommit), objver, nullptr, parent_trace); o->mtime = mtime; o->snapc = snapc; @@ -2714,7 +3372,7 @@ public: ops[i].op.extent.truncate_size = 0; ops[i].op.extent.truncate_seq = 0; ops[i].indata = bl; - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_WRITE, oncommit, objver); o->mtime = mtime; o->snapc = snapc; @@ -2749,7 +3407,7 @@ public: ops[i].op.extent.truncate_seq = trunc_seq; ops[i].indata = bl; ops[i].op.flags = op_flags; - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_WRITE, oncommit, objver); o->mtime = mtime; o->snapc = snapc; @@ -2770,7 +3428,7 @@ public: ops[i].op.extent.length = bl.length(); ops[i].indata = bl; ops[i].op.flags = op_flags; - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_WRITE, oncommit, objver); o->mtime = mtime; o->snapc = snapc; @@ -2804,7 +3462,7 @@ public: ops[i].op.writesame.data_length = bl.length(); ops[i].indata = bl; ops[i].op.flags = op_flags; - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_WRITE, oncommit, objver); o->mtime = mtime; o->snapc = snapc; @@ -2837,7 +3495,7 @@ public: ops[i].op.extent.offset = trunc_size; ops[i].op.extent.truncate_size = trunc_size; ops[i].op.extent.truncate_seq = trunc_seq; - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_WRITE, oncommit, objver); o->mtime = mtime; o->snapc = snapc; @@ -2854,7 +3512,7 @@ public: ops[i].op.op = CEPH_OSD_OP_ZERO; ops[i].op.extent.offset = off; ops[i].op.extent.length = len; - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_WRITE, oncommit, objver); o->mtime = mtime; o->snapc = snapc; @@ -2871,7 +3529,7 @@ public: int i = init_ops(ops, 1, extra_ops); ops[i].op.op = CEPH_OSD_OP_ROLLBACK; ops[i].op.snap.snapid = snapid; - Op *o = new Op(oid, oloc, ops, CEPH_OSD_FLAG_WRITE, oncommit, objver); + Op *o = new Op(oid, oloc, std::move(ops), CEPH_OSD_FLAG_WRITE, oncommit, objver); o->mtime = mtime; o->snapc = snapc; ceph_tid_t tid; @@ -2887,7 +3545,7 @@ public: int i = init_ops(ops, 1, extra_ops); ops[i].op.op = CEPH_OSD_OP_CREATE; ops[i].op.flags = create_flags; - Op *o = new Op(oid, oloc, ops, global_flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), global_flags | global_op_flags | CEPH_OSD_FLAG_WRITE, oncommit, objver); o->mtime = mtime; o->snapc = snapc; @@ -2903,7 +3561,7 @@ public: std::vector ops; int i = init_ops(ops, 1, extra_ops); ops[i].op.op = CEPH_OSD_OP_DELETE; - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_WRITE, oncommit, objver); o->mtime = mtime; o->snapc = snapc; @@ -2934,8 +3592,9 @@ public: if (name) ops[i].indata.append(name, ops[i].op.xattr.name_len); ops[i].indata.append(bl); - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | - CEPH_OSD_FLAG_WRITE, oncommit, objver); + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | + CEPH_OSD_FLAG_WRITE, oncommit, + objver); o->mtime = mtime; o->snapc = snapc; ceph_tid_t tid; @@ -2954,7 +3613,7 @@ public: ops[i].op.xattr.value_len = 0; if (name) ops[i].indata.append(name, ops[i].op.xattr.name_len); - Op *o = new Op(oid, oloc, ops, flags | global_op_flags | + Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags | CEPH_OSD_FLAG_WRITE, oncommit, objver); o->mtime = mtime; o->snapc = snapc; @@ -2970,29 +3629,30 @@ public: hobject_t enumerate_objects_begin(); hobject_t enumerate_objects_end(); - //hobject_t enumerate_objects_begin(int n, int m); + + template + friend struct EnumerationContext; + template + friend struct CB_EnumerateReply; + template void enumerate_objects( int64_t pool_id, - const std::string &ns, - const hobject_t &start, - const hobject_t &end, + std::string_view ns, + hobject_t start, + hobject_t end, const uint32_t max, - const ceph::buffer::list &filter_bl, - std::list *result, - hobject_t *next, - Context *on_finish); - + const ceph::buffer::list& filter_bl, + fu2::unique_function, + hobject_t) &&> on_finish); + template + void _issue_enumerate(hobject_t start, + std::unique_ptr>); + template void _enumerate_reply( - ceph::buffer::list &bl, - int r, - const hobject_t &end, - const int64_t pool_id, - int budget, - epoch_t reply_epoch, - std::list *result, - hobject_t *next, - Context *on_finish); - friend class C_EnumerateReply; + ceph::buffer::list&& bl, + boost::system::error_code ec, + std::unique_ptr>&& ectx); // ------------------------- // pool ops @@ -3000,18 +3660,66 @@ private: void pool_op_submit(PoolOp *op); void _pool_op_submit(PoolOp *op); void _finish_pool_op(PoolOp *op, int r); - void _do_delete_pool(int64_t pool, Context *onfinish); -public: - int create_pool_snap(int64_t pool, std::string& snapName, Context *onfinish); - int allocate_selfmanaged_snap(int64_t pool, snapid_t *psnapid, - Context *onfinish); - int delete_pool_snap(int64_t pool, std::string& snapName, Context *onfinish); - int delete_selfmanaged_snap(int64_t pool, snapid_t snap, Context *onfinish); + void _do_delete_pool(int64_t pool, + decltype(PoolOp::onfinish)&& onfinish); - int create_pool(std::string& name, Context *onfinish, - int crush_rule=-1); - int delete_pool(int64_t pool, Context *onfinish); - int delete_pool(const std::string& name, Context *onfinish); +public: + void create_pool_snap(int64_t pool, std::string_view snapName, + decltype(PoolOp::onfinish)&& onfinish); + void create_pool_snap(int64_t pool, std::string_view snapName, + Context* c) { + create_pool_snap(pool, snapName, + OpContextVert(c, nullptr)); + } + void allocate_selfmanaged_snap(int64_t pool, + std::unique_ptr> onfinish); + void allocate_selfmanaged_snap(int64_t pool, snapid_t* psnapid, + Context* c) { + allocate_selfmanaged_snap(pool, + OpContextVert(c, psnapid)); + } + void delete_pool_snap(int64_t pool, std::string_view snapName, + decltype(PoolOp::onfinish)&& onfinish); + void delete_pool_snap(int64_t pool, std::string_view snapName, + Context* c) { + delete_pool_snap(pool, snapName, + OpContextVert(c, nullptr)); + } + + void delete_selfmanaged_snap(int64_t pool, snapid_t snap, + decltype(PoolOp::onfinish)&& onfinish); + void delete_selfmanaged_snap(int64_t pool, snapid_t snap, + Context* c) { + delete_selfmanaged_snap(pool, snap, + OpContextVert(c, nullptr)); + } + + + void create_pool(std::string_view name, + decltype(PoolOp::onfinish)&& onfinish, + int crush_rule=-1); + void create_pool(std::string_view name, Context *onfinish, + int crush_rule=-1) { + create_pool(name, + OpContextVert(onfinish, nullptr), + crush_rule); + } + void delete_pool(int64_t pool, + decltype(PoolOp::onfinish)&& onfinish); + void delete_pool(int64_t pool, + Context* onfinish) { + delete_pool(pool, OpContextVert(onfinish, nullptr)); + } + + void delete_pool(std::string_view name, + decltype(PoolOp::onfinish)&& onfinish); + + void delete_pool(std::string_view name, + Context* onfinish) { + delete_pool(name, OpContextVert(onfinish, nullptr)); + } void handle_pool_op_reply(MPoolOpReply *m); int pool_op_cancel(ceph_tid_t tid, int r); @@ -3022,10 +3730,19 @@ private: void _poolstat_submit(PoolStatOp *op); public: void handle_get_pool_stats_reply(MGetPoolStatsReply *m); - void get_pool_stats(std::list& pools, - std::map *result, - bool *per_pool, - Context *onfinish); + void get_pool_stats(const std::vector& pools, + decltype(PoolStatOp::onfinish)&& onfinish); + template + auto get_pool_stats(const std::vector& pools, + CompletionToken&& token) { + boost::asio::async_completion init(token); + get_pool_stats(pools, + PoolStatOp::OpComp::create( + service.get_executor(), + std::move(init.completion_handler))); + return init.result.get(); + } int pool_stat_op_cancel(ceph_tid_t tid, int r); void _finish_pool_stat_op(PoolStatOp *op, int r); @@ -3035,8 +3752,21 @@ private: void _fs_stats_submit(StatfsOp *op); public: void handle_fs_stats_reply(MStatfsReply *m); + void get_fs_stats(boost::optional poolid, + decltype(StatfsOp::onfinish)&& onfinish); + template + auto get_fs_stats(boost::optional poolid, + CompletionToken&& token) { + boost::asio::async_completion init(token); + get_fs_stats(poolid, + StatfsOp::OpComp::create(service.get_executor(), + std::move(init.completion_handler))); + return init.result.get(); + } void get_fs_stats(struct ceph_statfs& result, boost::optional poolid, - Context *onfinish); + Context *onfinish) { + get_fs_stats(poolid, OpContextVert(onfinish, result)); + } int statfs_op_cancel(ceph_tid_t tid, int r); void _finish_statfs_op(StatfsOp *op, int r); @@ -3139,7 +3869,9 @@ public: private: epoch_t epoch_barrier = 0; - bool retry_writes_after_first_reply; + bool retry_writes_after_first_reply = + cct->_conf->objecter_retry_writes_after_first_reply; + public: void set_epoch_barrier(epoch_t epoch); diff --git a/src/test/librados/CMakeLists.txt b/src/test/librados/CMakeLists.txt index 14962e84d27..135794da9f7 100644 --- a/src/test/librados/CMakeLists.txt +++ b/src/test/librados/CMakeLists.txt @@ -190,10 +190,13 @@ target_link_libraries(unittest_librados_config librados ${BLKID_LIBRARIES} ${GSSAPI_LIBRARIES} ${OPENLDAP_LIBRARIES}) -add_executable(ceph_test_rados_completion_speed - completion_speed.cc) -target_link_libraries(ceph_test_rados_completion_speed - librados ${UNITTEST_LIBS} radostest-cxx) +# Removing this test. We can't shove it into Finisher as it's not a +# Context any more, and wrapping it to adapt it would be less fair. + +#add_executable(ceph_test_rados_completion_speed +# completion_speed.cc) +#target_link_libraries(ceph_test_rados_completion_speed +# librados ${UNITTEST_LIBS} radostest-cxx) add_executable(ceph_test_rados_op_speed op_speed.cc) diff --git a/src/test/mon/test_mon_workloadgen.cc b/src/test/mon/test_mon_workloadgen.cc index d62b0d7104a..613a18f247e 100644 --- a/src/test/mon/test_mon_workloadgen.cc +++ b/src/test/mon/test_mon_workloadgen.cc @@ -263,7 +263,7 @@ class ClientStub : public TestStub dout(10) << "ClientStub::" << __func__ << " starting messenger at " << messenger->get_myaddrs() << dendl; - objecter.reset(new Objecter(cct, messenger.get(), &monc, NULL, 0, 0)); + objecter.reset(new Objecter(cct, messenger.get(), &monc, poolctx, 0, 0)); ceph_assert(objecter.get() != NULL); objecter->set_balanced_budget(); diff --git a/src/tools/cephfs/MDSUtility.cc b/src/tools/cephfs/MDSUtility.cc index 3e867aa735f..cc4de9ff1b8 100644 --- a/src/tools/cephfs/MDSUtility.cc +++ b/src/tools/cephfs/MDSUtility.cc @@ -28,7 +28,7 @@ MDSUtility::MDSUtility() : monc = new MonClient(g_ceph_context, poolctx); messenger = Messenger::create_client_messenger(g_ceph_context, "mds"); fsmap = new FSMap(); - objecter = new Objecter(g_ceph_context, messenger, monc, NULL, 0, 0); + objecter = new Objecter(g_ceph_context, messenger, monc, poolctx, 0, 0); }