]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osdc: Asiofact the Objecter
authorAdam C. Emerson <aemerson@redhat.com>
Tue, 28 Apr 2020 22:26:52 +0000 (18:26 -0400)
committerAdam C. Emerson <aemerson@redhat.com>
Fri, 15 May 2020 14:55:10 +0000 (10:55 -0400)
Thanks to Casey Bodley <cbodley@users.noreply.github.com> for
watch/notify fixes and Patrick Donnelly <pdonnell@redhat.com> for MDS
fix.

Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
31 files changed:
src/CMakeLists.txt
src/ceph_fuse.cc
src/ceph_syn.cc
src/client/Client.cc
src/client/Client.h
src/libcephfs.cc
src/librados/AioCompletionImpl.h
src/librados/IoCtxImpl.cc
src/librados/ListObjectImpl.h
src/librados/PoolAsyncCompletionImpl.h
src/librados/RadosClient.cc
src/librados/RadosClient.h
src/librados/librados_c.cc
src/librados/librados_cxx.cc
src/mds/CDir.cc
src/mds/MDSRank.cc
src/mds/PurgeQueue.h
src/mds/Server.cc
src/messages/MGetPoolStats.h
src/messages/MGetPoolStatsReply.h
src/mgr/BaseMgrModule.cc
src/mgr/MgrStandby.cc
src/osd/OSD.cc
src/osd/OSDMap.h
src/osd/osd_types.cc
src/osd/osd_types.h
src/osdc/Objecter.cc
src/osdc/Objecter.h
src/test/librados/CMakeLists.txt
src/test/mon/test_mon_workloadgen.cc
src/tools/cephfs/MDSUtility.cc

index 2537e5110c621231653b468020bd93bcaa97d0a4..afedbfdd9da2f7d751a42cd502e9e99f29954d86 100644 (file)
@@ -377,6 +377,7 @@ set(ceph_common_deps
   Boost::program_options
   Boost::date_time
   Boost::iostreams
+  fmt::fmt
   StdFilesystem::filesystem
   fmt::fmt
   ${BLKID_LIBRARIES}
index ab4db60920c0263a931145757bfc1226af198704..3692b5c6ff6f6aa4152fc9d801e221a3dba6c227 100644 (file)
@@ -254,7 +254,7 @@ int main(int argc, const char **argv, const char *envp[]) {
     messenger->set_policy(entity_name_t::TYPE_MDS,
                          Messenger::Policy::lossless_client(0));
 
-    client = new StandaloneClient(messenger, mc);
+    client = new StandaloneClient(messenger, mc, icp);
     if (filer_flags) {
       client->set_filer_flags(filer_flags);
     }
index e3ca1328702060ca905acf12c494fb288bf69240..165ea42308ac6db2e7faf97a63e382654c8803ab 100644 (file)
@@ -68,7 +68,7 @@ int main(int argc, const char **argv, char *envp[])
     messengers[i]->bind(g_conf()->public_addr);
     mclients[i] = new MonClient(g_ceph_context, poolctx);
     mclients[i]->build_initial_monmap();
-    auto client = new StandaloneClient(messengers[i], mclients[i]);
+    auto client = new StandaloneClient(messengers[i], mclients[i], poolctx);
     client->set_filer_flags(syn_filer_flags);
     SyntheticClient *syn = new SyntheticClient(client);
     clients.push_back(client);
index 8bd78b4046bed492c355d8a27868b501901931f0..cd0c87a22cc40f7aeb64c12fcb6a8484175a7e39 100644 (file)
@@ -29,6 +29,8 @@
 #include <boost/lexical_cast.hpp>
 #include <boost/fusion/include/std_pair.hpp>
 
+#include "common/async/waiter.h"
+
 #if defined(__FreeBSD__)
 #define XATTR_CREATE    0x1
 #define XATTR_REPLACE   0x2
@@ -11704,9 +11706,8 @@ void Client::_setxattr_maybe_wait_for_osdmap(const char *name, const void *value
     });
 
     if (r == -ENOENT) {
-      C_SaferCond ctx;
-      objecter->wait_for_latest_osdmap(&ctx);
-      ctx.wait();
+      bs::error_code ec;
+      objecter->wait_for_latest_osdmap(ca::use_blocked[ec]);
     }
   }
 }
@@ -14278,7 +14279,7 @@ int Client::check_pool_perm(Inode *in, int need)
 
     C_SaferCond rd_cond;
     ObjectOperation rd_op;
-    rd_op.stat(NULL, (ceph::real_time*)nullptr, NULL);
+    rd_op.stat(nullptr, nullptr, nullptr);
 
     objecter->mutate(oid, OSDMap::file_to_object_locator(in->layout), rd_op,
                     nullsnapc, ceph::real_clock::now(), 0, &rd_cond);
@@ -14465,7 +14466,7 @@ void Client::set_session_timeout(unsigned timeout)
 int Client::start_reclaim(const std::string& uuid, unsigned flags,
                          const std::string& fs_name)
 {
-  std::lock_guard l(client_lock);
+  std::unique_lock l(client_lock);
   if (!initialized)
     return -ENOTCONN;
 
@@ -14541,13 +14542,14 @@ int Client::start_reclaim(const std::string& uuid, unsigned flags,
 
   // use blacklist to check if target session was killed
   // (config option mds_session_blacklist_on_evict needs to be true)
-  C_SaferCond cond;
-  if (!objecter->wait_for_map(reclaim_osd_epoch, &cond)) {
-    ldout(cct, 10) << __func__ << ": waiting for OSD epoch " << reclaim_osd_epoch << dendl;
-    client_lock.unlock();
-    cond.wait();
-    client_lock.lock();
-  }
+  ldout(cct, 10) << __func__ << ": waiting for OSD epoch " << reclaim_osd_epoch << dendl;
+  bs::error_code ec;
+  l.unlock();
+  objecter->wait_for_map(reclaim_osd_epoch, ca::use_blocked[ec]);
+  l.lock();
+
+  if (ec)
+    return ceph::from_error_code(ec);
 
   bool blacklisted = objecter->with_osdmap(
       [this](const OSDMap &osd_map) -> bool {
@@ -14671,8 +14673,9 @@ mds_rank_t Client::_get_random_up_mds() const
 }
 
 
-StandaloneClient::StandaloneClient(Messenger *m, MonClient *mc)
-  : Client(m, mc, new Objecter(m->cct, m, mc, nullptr, 0, 0))
+StandaloneClient::StandaloneClient(Messenger *m, MonClient *mc,
+                                  boost::asio::io_context& ictx)
+  : Client(m, mc, new Objecter(m->cct, m, mc, ictx, 0, 0))
 {
   monclient->set_messenger(m);
   objecter->set_client_incarnation(0);
index 3a5c6741d003a4a10b6cafe7bbf7a731961ed299..398b4bc89f1da2eb4ce5aeda9817fa708f849f38 100644 (file)
@@ -1305,7 +1305,7 @@ private:
 class StandaloneClient : public Client
 {
 public:
-  StandaloneClient(Messenger *m, MonClient *mc);
+  StandaloneClient(Messenger *m, MonClient *mc, boost::asio::io_context& ictx);
 
   ~StandaloneClient() override;
 
index 623b6bb51ab7353a7366c3c512e32cff8bb99934..3d3aa22fe8f9f442e290b6202a91da393cd18c99 100644 (file)
@@ -106,7 +106,7 @@ public:
 
     //at last the client
     ret = -CEPHFS_ERROR_NEW_CLIENT; //defined in libcephfs.h;
-    client = new StandaloneClient(messenger, monclient);
+    client = new StandaloneClient(messenger, monclient, icp);
     if (!client)
       goto fail;
 
index d3a674e8ee7d7e3ebf226070672bd5568fddce60..6f7e1b6288618958edbf1815624564da72fd44f0 100644 (file)
@@ -126,14 +126,14 @@ struct librados::AioCompletionImpl {
 };
 
 namespace librados {
-struct C_AioComplete : public Context {
+struct CB_AioComplete {
   AioCompletionImpl *c;
 
-  explicit C_AioComplete(AioCompletionImpl *cc) : c(cc) {
+  explicit CB_AioComplete(AioCompletionImpl *cc) : c(cc) {
     c->_get();
   }
 
-  void finish(int r) override {
+  void operator()() {
     rados_callback_t cb_complete = c->callback_complete;
     void *cb_complete_arg = c->callback_complete_arg;
     if (cb_complete)
@@ -160,14 +160,27 @@ struct C_AioComplete : public Context {
   * flush where we only want to wait for things to be safe,
   * but allow users to specify any of the callbacks.
   */
-struct C_AioCompleteAndSafe : public Context {
+struct CB_AioCompleteAndSafe {
   AioCompletionImpl *c;
 
-  explicit C_AioCompleteAndSafe(AioCompletionImpl *cc) : c(cc) {
+
+  explicit CB_AioCompleteAndSafe(AioCompletionImpl *cc) : c(cc) {
     c->get();
   }
 
-  void finish(int r) override {
+  CB_AioCompleteAndSafe(const CB_AioCompleteAndSafe&) = delete;
+  CB_AioCompleteAndSafe& operator =(const CB_AioCompleteAndSafe&) = delete;
+  CB_AioCompleteAndSafe(CB_AioCompleteAndSafe&& rhs) {
+    c = rhs.c;
+    rhs.c = nullptr;
+  }
+  CB_AioCompleteAndSafe& operator =(CB_AioCompleteAndSafe&& rhs) {
+    c = rhs.c;
+    rhs.c = nullptr;
+    return *this;
+  }
+
+  void operator()(int r = 0) {
     c->lock.lock();
     c->rval = r;
     c->complete = true;
@@ -190,7 +203,6 @@ struct C_AioCompleteAndSafe : public Context {
     c->put_unlock();
   }
 };
-
 }
 
 #endif
index 900628bbdc4fb8d6bf5e335144b8747e578c5302..f384c49766fc5d6409ff470c969c62f6cfc3e0ad 100644 (file)
 #undef dout_prefix
 #define dout_prefix *_dout << "librados: "
 
+namespace bs = boost::system;
+namespace ca = ceph::async;
+namespace cb = ceph::buffer;
+
 namespace librados {
 namespace {
 
-struct C_notify_Finish : public Context {
+struct CB_notify_Finish {
   CephContext *cct;
   Context *ctx;
   Objecter *objecter;
   Objecter::LingerOp *linger_op;
-  bufferlist reply_bl;
   bufferlist *preply_bl;
   char **preply_buf;
   size_t *preply_buf_len;
 
-  C_notify_Finish(CephContext *_cct, Context *_ctx, Objecter *_objecter,
-                  Objecter::LingerOp *_linger_op, bufferlist *_preply_bl,
-                  char **_preply_buf, size_t *_preply_buf_len)
+  CB_notify_Finish(CephContext *_cct, Context *_ctx, Objecter *_objecter,
+                  Objecter::LingerOp *_linger_op, bufferlist *_preply_bl,
+                  char **_preply_buf, size_t *_preply_buf_len)
     : cct(_cct), ctx(_ctx), objecter(_objecter), linger_op(_linger_op),
       preply_bl(_preply_bl), preply_buf(_preply_buf),
-      preply_buf_len(_preply_buf_len)
-  {
-    linger_op->on_notify_finish = this;
-    linger_op->notify_result_bl = &reply_bl;
-  }
+      preply_buf_len(_preply_buf_len) {}
 
-  void finish(int r) override
-  {
+
+  // move-only
+  CB_notify_Finish(const CB_notify_Finish&) = delete;
+  CB_notify_Finish& operator =(const CB_notify_Finish&) = delete;
+  CB_notify_Finish(CB_notify_Finish&&) = default;
+  CB_notify_Finish& operator =(CB_notify_Finish&&) = default;
+
+  void operator()(bs::error_code ec, bufferlist&& reply_bl) {
     ldout(cct, 10) << __func__ << " completed notify (linger op "
-                   << linger_op << "), r = " << r << dendl;
+                   << linger_op << "), ec = " << ec << dendl;
 
     // pass result back to user
     // NOTE: we do this regardless of what error code we return
@@ -72,21 +77,20 @@ struct C_notify_Finish : public Context {
     if (preply_bl)
       preply_bl->claim(reply_bl);
 
-    ctx->complete(r);
+    ctx->complete(ceph::from_error_code(ec));
   }
 };
 
-struct C_aio_linger_cancel : public Context {
+struct CB_aio_linger_cancel {
   Objecter *objecter;
   Objecter::LingerOp *linger_op;
 
-  C_aio_linger_cancel(Objecter *_objecter, Objecter::LingerOp *_linger_op)
+  CB_aio_linger_cancel(Objecter *_objecter, Objecter::LingerOp *_linger_op)
     : objecter(_objecter), linger_op(_linger_op)
   {
   }
 
-  void finish(int r) override
-  {
+  void operator()() {
     objecter->linger_cancel(linger_op);
   }
 };
@@ -104,8 +108,9 @@ struct C_aio_linger_Complete : public Context {
 
   void finish(int r) override {
     if (cancel || r < 0)
-      c->io->client->finisher.queue(new C_aio_linger_cancel(c->io->objecter,
-                                                            linger_op));
+      boost::asio::defer(c->io->client->finish_strand,
+                        CB_aio_linger_cancel(c->io->objecter,
+                                             linger_op));
 
     c->lock.lock();
     c->rval = r;
@@ -114,7 +119,7 @@ struct C_aio_linger_Complete : public Context {
 
     if (c->callback_complete ||
        c->callback_safe) {
-      c->io->client->finisher.queue(new C_AioComplete(c));
+      boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c));
     }
     c->put_unlock();
   }
@@ -161,12 +166,11 @@ struct C_aio_notify_Complete : public C_aio_linger_Complete {
 
 struct C_aio_notify_Ack : public Context {
   CephContext *cct;
-  C_notify_Finish *onfinish;
   C_aio_notify_Complete *oncomplete;
 
-  C_aio_notify_Ack(CephContext *_cct, C_notify_Finish *_onfinish,
+  C_aio_notify_Ack(CephContext *_cct,
                    C_aio_notify_Complete *_oncomplete)
-    : cct(_cct), onfinish(_onfinish), oncomplete(_oncomplete)
+    : cct(_cct), oncomplete(_oncomplete)
   {
   }
 
@@ -195,7 +199,7 @@ struct C_aio_selfmanaged_snap_op_Complete : public Context {
     c->cond.notify_all();
 
     if (c->callback_complete || c->callback_safe) {
-      client->finisher.queue(new librados::C_AioComplete(c));
+      boost::asio::defer(client->finish_strand, librados::CB_AioComplete(c));
     }
     c->put_unlock();
   }
@@ -305,7 +309,7 @@ void librados::IoCtxImpl::complete_aio_write(AioCompletionImpl *c)
     ldout(client->cct, 20) << " waking waiters on seq " << waiters->first << dendl;
     for (std::list<AioCompletionImpl*>::iterator it = waiters->second.begin();
         it != waiters->second.end(); ++it) {
-      client->finisher.queue(new C_AioCompleteAndSafe(*it));
+      boost::asio::defer(client->finish_strand, CB_AioCompleteAndSafe(*it));
       (*it)->put();
     }
     aio_write_waiters.erase(waiters++);
@@ -325,7 +329,7 @@ void librados::IoCtxImpl::flush_aio_writes_async(AioCompletionImpl *c)
   if (aio_write_list.empty()) {
     ldout(client->cct, 20) << "flush_aio_writes_async no writes. (tid "
                           << seq << ")" << dendl;
-    client->finisher.queue(new C_AioCompleteAndSafe(c));
+    boost::asio::defer(client->finish_strand, CB_AioCompleteAndSafe(c));
   } else {
     ldout(client->cct, 20) << "flush_aio_writes_async " << aio_write_list.size()
                           << " writes in flight; waiting on tid " << seq << dendl;
@@ -362,14 +366,10 @@ int librados::IoCtxImpl::snap_create(const char *snapName)
   ceph::condition_variable cond;
   bool done;
   Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply);
-  reply = objecter->create_pool_snap(poolid, sName, onfinish);
+  objecter->create_pool_snap(poolid, sName, onfinish);
 
-  if (reply < 0) {
-    delete onfinish;
-  } else {
-    std::unique_lock l{mylock};
-    cond.wait(l, [&done] { return done; });
-  }
+  std::unique_lock l{mylock};
+  cond.wait(l, [&done] { return done; });
   return reply;
 }
 
@@ -382,18 +382,14 @@ int librados::IoCtxImpl::selfmanaged_snap_create(uint64_t *psnapid)
   bool done;
   Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply);
   snapid_t snapid;
-  reply = objecter->allocate_selfmanaged_snap(poolid, &snapid, onfinish);
+  objecter->allocate_selfmanaged_snap(poolid, &snapid, onfinish);
 
-  if (reply < 0) {
-    delete onfinish;
-  } else {
-    {
-      std::unique_lock l{mylock};
-      cond.wait(l, [&done] { return done; });
-    }
-    if (reply == 0)
-      *psnapid = snapid;
+  {
+    std::unique_lock l{mylock};
+    cond.wait(l, [&done] { return done; });
   }
+  if (reply == 0)
+    *psnapid = snapid;
   return reply;
 }
 
@@ -402,11 +398,8 @@ void librados::IoCtxImpl::aio_selfmanaged_snap_create(uint64_t *snapid,
 {
   C_aio_selfmanaged_snap_create_Complete *onfinish =
     new C_aio_selfmanaged_snap_create_Complete(client, c, snapid);
-  int r = objecter->allocate_selfmanaged_snap(poolid, &onfinish->snapid,
-                                              onfinish);
-  if (r < 0) {
-    onfinish->complete(r);
-  }
+  objecter->allocate_selfmanaged_snap(poolid, &onfinish->snapid,
+                                     onfinish);
 }
 
 int librados::IoCtxImpl::snap_remove(const char *snapName)
@@ -418,14 +411,9 @@ int librados::IoCtxImpl::snap_remove(const char *snapName)
   ceph::condition_variable cond;
   bool done;
   Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply);
-  reply = objecter->delete_pool_snap(poolid, sName, onfinish);
-
-  if (reply < 0) {
-    delete onfinish; 
-  } else {
-    unique_lock l{mylock};
-    cond.wait(l, [&done] { return done; });
-  }
+  objecter->delete_pool_snap(poolid, sName, onfinish);
+  unique_lock l{mylock};
+  cond.wait(l, [&done] { return done; });
   return reply;
 }
 
@@ -1155,7 +1143,7 @@ struct AioGetxattrsData {
   AioGetxattrsData(librados::AioCompletionImpl *c, map<string, bufferlist>* attrset,
                   librados::RadosClient *_client) :
     user_completion(c), user_attrset(attrset), client(_client) {}
-  struct librados::C_AioCompleteAndSafe user_completion;
+  struct librados::CB_AioCompleteAndSafe user_completion;
   map<string, bufferlist> result_attrset;
   map<std::string, bufferlist>* user_attrset;
   librados::RadosClient *client;
@@ -1174,7 +1162,7 @@ static void aio_getxattrs_complete(rados_completion_t c, void *arg) {
       (*cdata->user_attrset)[p->first] = p->second;
     }
   }
-  cdata->user_completion.finish(rc);
+  cdata->user_completion(rc);
   ((librados::AioCompletionImpl*)c)->put();
   delete cdata;
 }
@@ -1477,7 +1465,7 @@ int librados::IoCtxImpl::stat(const object_t& oid, uint64_t *psize, time_t *pmti
 
   ::ObjectOperation rd;
   prepare_assert_ops(&rd);
-  rd.stat(psize, &mtime, NULL);
+  rd.stat(psize, &mtime, nullptr);
   int r = operate_read(oid, &rd, NULL);
 
   if (r >= 0 && pmtime) {
@@ -1497,7 +1485,7 @@ int librados::IoCtxImpl::stat2(const object_t& oid, uint64_t *psize, struct time
 
   ::ObjectOperation rd;
   prepare_assert_ops(&rd);
-  rd.stat(psize, &mtime, NULL);
+  rd.stat(psize, &mtime, nullptr);
   int r = operate_read(oid, &rd, NULL);
   if (r < 0) {
     return r;
@@ -1568,31 +1556,25 @@ void librados::IoCtxImpl::set_sync_op_version(version_t ver)
   last_objver = ver;
 }
 
-struct WatchInfo : public Objecter::WatchContext {
-  librados::IoCtxImpl *ioctx;
+namespace librados {
+void intrusive_ptr_add_ref(IoCtxImpl *p) { p->get(); }
+void intrusive_ptr_release(IoCtxImpl *p) { p->put(); }
+}
+
+struct WatchInfo {
+  boost::intrusive_ptr<librados::IoCtxImpl> ioctx;
   object_t oid;
   librados::WatchCtx *ctx;
   librados::WatchCtx2 *ctx2;
-  bool internal = false;
 
   WatchInfo(librados::IoCtxImpl *io, object_t o,
-           librados::WatchCtx *c, librados::WatchCtx2 *c2,
-            bool inter)
-    : ioctx(io), oid(o), ctx(c), ctx2(c2), internal(inter) {
-    ioctx->get();
-  }
-  ~WatchInfo() override {
-    ioctx->put();
-    if (internal) {
-      delete ctx;
-      delete ctx2;
-    }
-  }
+           librados::WatchCtx *c, librados::WatchCtx2 *c2)
+    : ioctx(io), oid(o), ctx(c), ctx2(c2) {}
 
   void handle_notify(uint64_t notify_id,
                     uint64_t cookie,
                     uint64_t notifier_id,
-                    bufferlist& bl) override {
+                    bufferlist& bl) {
     ldout(ioctx->client->cct, 10) << __func__ << " " << notify_id
                                  << " cookie " << cookie
                                  << " notifier_id " << notifier_id
@@ -1609,13 +1591,35 @@ struct WatchInfo : public Objecter::WatchContext {
       ioctx->notify_ack(oid, notify_id, cookie, empty);
     }
   }
-  void handle_error(uint64_t cookie, int err) override {
+  void handle_error(uint64_t cookie, int err) {
     ldout(ioctx->client->cct, 10) << __func__ << " cookie " << cookie
                                  << " err " << err
                                  << dendl;
     if (ctx2)
       ctx2->handle_error(cookie, err);
   }
+
+  void operator()(bs::error_code ec,
+                 uint64_t notify_id,
+                 uint64_t cookie,
+                 uint64_t notifier_id,
+                 bufferlist&& bl) {
+    if (ec) {
+      handle_error(cookie, ceph::from_error_code(ec));
+    } else {
+      handle_notify(notify_id, cookie, notifier_id, bl);
+    }
+  }
+};
+
+// internal WatchInfo that owns the context memory
+struct InternalWatchInfo : public WatchInfo {
+  std::unique_ptr<librados::WatchCtx> ctx;
+  std::unique_ptr<librados::WatchCtx2> ctx2;
+
+  InternalWatchInfo(librados::IoCtxImpl *io, object_t o,
+                    librados::WatchCtx *c, librados::WatchCtx2 *c2)
+    : WatchInfo(io, o, c, c2), ctx(c), ctx2(c2) {}
 };
 
 int librados::IoCtxImpl::watch(const object_t& oid, uint64_t *handle,
@@ -1638,9 +1642,11 @@ int librados::IoCtxImpl::watch(const object_t& oid, uint64_t *handle,
 
   Objecter::LingerOp *linger_op = objecter->linger_register(oid, oloc, 0);
   *handle = linger_op->get_cookie();
-  linger_op->watch_context = new WatchInfo(this,
-                                          oid, ctx, ctx2, internal);
-
+  if (internal) {
+    linger_op->handle = InternalWatchInfo(this, oid, ctx, ctx2);
+  } else {
+    linger_op->handle = WatchInfo(this, oid, ctx, ctx2);
+  }
   prepare_assert_ops(&wr);
   wr.watch(*handle, CEPH_OSD_WATCH_OP_WATCH, timeout);
   bufferlist bl;
@@ -1684,7 +1690,11 @@ int librados::IoCtxImpl::aio_watch(const object_t& oid,
 
   ::ObjectOperation wr;
   *handle = linger_op->get_cookie();
-  linger_op->watch_context = new WatchInfo(this, oid, ctx, ctx2, internal);
+  if (internal) {
+    linger_op->handle = InternalWatchInfo(this, oid, ctx, ctx2);
+  } else {
+    linger_op->handle = WatchInfo(this, oid, ctx, ctx2);
+  }
 
   prepare_assert_ops(&wr);
   wr.watch(*handle, CEPH_OSD_WATCH_OP_WATCH, timeout);
@@ -1712,8 +1722,13 @@ int librados::IoCtxImpl::notify_ack(
 
 int librados::IoCtxImpl::watch_check(uint64_t cookie)
 {
-  Objecter::LingerOp *linger_op = reinterpret_cast<Objecter::LingerOp*>(cookie);
-  return objecter->linger_check(linger_op);
+  auto linger_op = reinterpret_cast<Objecter::LingerOp*>(cookie);
+  auto r = objecter->linger_check(linger_op);
+  if (r)
+    return 1 + std::chrono::duration_cast<
+      std::chrono::milliseconds>(*r).count();
+  else
+    return ceph::from_error_code(r.error());
 }
 
 int librados::IoCtxImpl::unwatch(uint64_t cookie)
@@ -1758,11 +1773,12 @@ int librados::IoCtxImpl::notify(const object_t& oid, bufferlist& bl,
   Objecter::LingerOp *linger_op = objecter->linger_register(oid, oloc, 0);
 
   C_SaferCond notify_finish_cond;
-  Context *notify_finish = new C_notify_Finish(client->cct, &notify_finish_cond,
-                                               objecter, linger_op, preply_bl,
-                                               preply_buf, preply_buf_len);
-  (void) notify_finish;
-
+  linger_op->on_notify_finish =
+    Objecter::LingerOp::OpComp::create(
+      objecter->service.get_executor(),
+      CB_notify_Finish(client->cct, &notify_finish_cond,
+                       objecter, linger_op, preply_bl,
+                       preply_buf, preply_buf_len));
   uint32_t timeout = notify_timeout;
   if (timeout_ms)
     timeout = timeout_ms / 1000;
@@ -1812,11 +1828,14 @@ int librados::IoCtxImpl::aio_notify(const object_t& oid, AioCompletionImpl *c,
   c->io = this;
 
   C_aio_notify_Complete *oncomplete = new C_aio_notify_Complete(c, linger_op);
-  C_notify_Finish *onnotify = new C_notify_Finish(client->cct, oncomplete,
-                                                  objecter, linger_op,
-                                                  preply_bl, preply_buf,
-                                                  preply_buf_len);
-  Context *onack = new C_aio_notify_Ack(client->cct, onnotify, oncomplete);
+  linger_op->on_notify_finish =
+    Objecter::LingerOp::OpComp::create(
+      objecter->service.get_executor(),
+      CB_notify_Finish(client->cct, oncomplete,
+                       objecter, linger_op,
+                       preply_bl, preply_buf,
+                       preply_buf_len));
+  Context *onack = new C_aio_notify_Ack(client->cct, oncomplete);
 
   uint32_t timeout = notify_timeout;
   if (timeout_ms)
@@ -1900,7 +1919,7 @@ void librados::IoCtxImpl::C_aio_stat_Ack::finish(int r)
   }
 
   if (c->callback_complete) {
-    c->io->client->finisher.queue(new C_AioComplete(c));
+    boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c));
   }
 
   c->put_unlock();
@@ -1928,7 +1947,7 @@ void librados::IoCtxImpl::C_aio_stat2_Ack::finish(int r)
   }
 
   if (c->callback_complete) {
-    c->io->client->finisher.queue(new C_AioComplete(c));
+    boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c));
   }
 
   c->put_unlock();
@@ -1964,7 +1983,7 @@ void librados::IoCtxImpl::C_aio_Complete::finish(int r)
 
   if (c->callback_complete ||
       c->callback_safe) {
-    c->io->client->finisher.queue(new C_AioComplete(c));
+    boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c));
   }
 
   if (c->aio_write_seq) {
@@ -2028,6 +2047,7 @@ int librados::IoCtxImpl::application_enable(const std::string& app_name,
 
   r = c->get_return_value();
   c->release();
+  c->put();
   if (r < 0) {
     return r;
   }
@@ -2043,7 +2063,10 @@ void librados::IoCtxImpl::application_enable_async(const std::string& app_name,
   // preserved until Luminous is configured as minimim version.
   if (!client->get_required_monitor_features().contains_all(
         ceph::features::mon::FEATURE_LUMINOUS)) {
-    client->finisher.queue(new C_PoolAsync_Safe(c), -EOPNOTSUPP);
+    boost::asio::defer(client->finish_strand,
+                      [cb = CB_PoolAsync_Safe(c)]() mutable {
+                        cb(-EOPNOTSUPP);
+                      });
     return;
   }
 
@@ -2061,7 +2084,7 @@ void librados::IoCtxImpl::application_enable_async(const std::string& app_name,
   cmds.push_back(cmd.str());
   bufferlist inbl;
   client->mon_command_async(cmds, inbl, nullptr, nullptr,
-                            new C_PoolAsync_Safe(c));
+                            make_lambda_context(CB_PoolAsync_Safe(c)));
 }
 
 int librados::IoCtxImpl::application_list(std::set<std::string> *app_names)
index 95c2e21a4ec57c1f2481f725d709a9e2f30dd595..7396c12108d0c9c0961ac63d419e20273b14b228 100644 (file)
  * Foundation.  See file COPYING.
  *
  */
-#include <string>
 
 #ifndef CEPH_LIBRADOS_LISTOBJECTIMPL_H
 #define CEPH_LIBRADOS_LISTOBJECTIMPL_H
 
+#include <string>
 #include <include/rados/librados.hpp>
 
+#include "include/cmp.h"
+
 namespace librados {
 struct ListObjectImpl {
   std::string nspace;
index b52d7fada201cb7d4ba385abf1ecd5113c8e42f8..73420fe359c45ba2c4f9b715c01eed34c92d0a64 100644 (file)
@@ -16,7 +16,9 @@
 #define CEPH_LIBRADOS_POOLASYNCCOMPLETIONIMPL_H
 
 #include "common/ceph_mutex.h"
-#include "include/Context.h"
+
+#include <boost/intrusive_ptr.hpp>
+
 #include "include/rados/librados.h"
 #include "include/rados/librados.hpp"
 
@@ -29,67 +31,68 @@ namespace librados {
     bool released = false;
     bool done = false;
 
-    rados_callback_t callback = 0;
-    void *callback_arg = nullptr;;
+    rados_callback_t callback = nullptr;
+    void *callback_arg = nullptr;
 
     PoolAsyncCompletionImpl() = default;
 
     int set_callback(void *cb_arg, rados_callback_t cb) {
-      std::scoped_lock l{lock};
+      std::scoped_lock l(lock);
       callback = cb;
       callback_arg = cb_arg;
       return 0;
     }
     int wait() {
-      std::unique_lock l{lock};
-      cond.wait(l, [this] { return done;});
+      std::unique_lock l(lock);
+      while (!done)
+       cond.wait(l);
       return 0;
     }
     int is_complete() {
-      std::scoped_lock l{lock};
+      std::scoped_lock l(lock);
       return done;
     }
     int get_return_value() {
-      std::scoped_lock l{lock};
+      std::scoped_lock l(lock);
       return rval;
     }
     void get() {
-      std::scoped_lock l{lock};
+      std::scoped_lock l(lock);
       ceph_assert(ref > 0);
       ref++;
     }
     void release() {
-      lock.lock();
+      std::scoped_lock l(lock);
       ceph_assert(!released);
       released = true;
-      put_unlock();
     }
     void put() {
-      lock.lock();
-      put_unlock();
-    }
-    void put_unlock() {
-      ceph_assert(ref > 0);
+      std::unique_lock l(lock);
       int n = --ref;
-      lock.unlock();
+      l.unlock();
       if (!n)
        delete this;
     }
   };
 
-  class C_PoolAsync_Safe : public Context {
-    PoolAsyncCompletionImpl *c;
+  inline void intrusive_ptr_add_ref(PoolAsyncCompletionImpl* p) {
+    p->get();
+  }
+  inline void intrusive_ptr_release(PoolAsyncCompletionImpl* p) {
+    p->put();
+  }
+
+  class CB_PoolAsync_Safe {
+    boost::intrusive_ptr<PoolAsyncCompletionImpl> p;
 
   public:
-    explicit C_PoolAsync_Safe(PoolAsyncCompletionImpl *_c) : c(_c) {
-      c->get();
-    }
-    ~C_PoolAsync_Safe() override {
-      c->put();
-    }
-  
-    void finish(int r) override {
-      c->lock.lock();
+    explicit CB_PoolAsync_Safe(boost::intrusive_ptr<PoolAsyncCompletionImpl> p)
+      : p(p) {}
+    ~CB_PoolAsync_Safe() = default;
+
+    void operator()(int r) {
+      auto c(std::move(p));
+      std::unique_lock l(c->lock);
       c->rval = r;
       c->done = true;
       c->cond.notify_all();
@@ -97,12 +100,10 @@ namespace librados {
       if (c->callback) {
        rados_callback_t cb = c->callback;
        void *cb_arg = c->callback_arg;
-       c->lock.unlock();
-       cb(c, cb_arg);
-       c->lock.lock();
+       l.unlock();
+       cb(c.get(), cb_arg);
+       l.lock();
       }
-
-      c->lock.unlock();
     }
   };
 }
index 3a96cc07cee4aec085db7213261e5b0dfd54927c..58524e46fe01159e3c7ae6aafa9af558d8bd2e4b 100644 (file)
 #undef dout_prefix
 #define dout_prefix *_dout << "librados: "
 
+namespace bc = boost::container;
 namespace bs = boost::system;
 namespace ca = ceph::async;
+namespace cb = ceph::buffer;
 
 librados::RadosClient::RadosClient(CephContext *cct_)
-  : Dispatcher(cct_->get()),
-    cct_deleter{cct_, [](CephContext *p) {p->put();}},
-    conf(cct_->_conf),
-    state(DISCONNECTED),
-    monclient(cct_, poolctx),
-    mgrclient(cct_, nullptr, &monclient.monmap),
-    messenger(NULL),
-    instance_id(0),
-    objecter(NULL),
-    timer(cct, lock),
-    refcnt(1),
-    log_last_version(0), log_cb(NULL), log_cb2(NULL), log_cb_arg(NULL),
-    finisher(cct, "radosclient", "fn-radosclient")
-{
-}
+  : Dispatcher(cct_->get()) {}
 
 int64_t librados::RadosClient::lookup_pool(const char *name)
 {
@@ -267,9 +255,9 @@ int librados::RadosClient::connect()
   ldout(cct, 1) << "starting objecter" << dendl;
 
   objecter = new (std::nothrow) Objecter(cct, messenger, &monclient,
-                         &finisher,
-                         cct->_conf->rados_mon_op_timeout,
-                         cct->_conf->rados_osd_op_timeout);
+                                        poolctx,
+                                        cct->_conf->rados_mon_op_timeout,
+                                        cct->_conf->rados_osd_op_timeout);
   if (!objecter)
     goto out;
   objecter->set_balanced_budget();
@@ -324,10 +312,6 @@ int librados::RadosClient::connect()
   objecter->start();
   lock.lock();
 
-  timer.init();
-
-  finisher.start();
-
   state = CONNECTED;
   instance_id = monclient.get_global_id();
 
@@ -370,12 +354,9 @@ void librados::RadosClient::shutdown()
       // make sure watch callbacks are flushed
       watch_flush();
     }
-    finisher.wait_for_empty();
-    finisher.stop();
   }
   state = DISCONNECTED;
   instance_id = 0;
-  timer.shutdown();   // will drop+retake lock
   l.unlock();
   if (need_objecter) {
     objecter->shutdown();
@@ -387,42 +368,49 @@ void librados::RadosClient::shutdown()
     messenger->shutdown();
     messenger->wait();
   }
+  poolctx.stop();
   ldout(cct, 1) << "shutdown" << dendl;
-  poolctx.finish();
 }
 
 int librados::RadosClient::watch_flush()
 {
   ldout(cct, 10) << __func__ << " enter" << dendl;
-  ceph::mutex mylock = ceph::make_mutex("RadosClient::watch_flush::mylock");
-  ceph::condition_variable cond;
-  bool done;
-  objecter->linger_callback_flush(new C_SafeCond(mylock, cond, &done));
+  objecter->linger_callback_flush(ca::use_blocked);
 
-  std::unique_lock l{mylock};
-  cond.wait(l, [&done] { return done; });
   ldout(cct, 10) << __func__ << " exit" << dendl;
   return 0;
 }
 
-struct C_aio_watch_flush_Complete : public Context {
+struct CB_aio_watch_flush_Complete {
   librados::RadosClient *client;
   librados::AioCompletionImpl *c;
 
-  C_aio_watch_flush_Complete(librados::RadosClient *_client, librados::AioCompletionImpl *_c)
+  CB_aio_watch_flush_Complete(librados::RadosClient *_client, librados::AioCompletionImpl *_c)
     : client(_client), c(_c) {
     c->get();
   }
 
-  void finish(int r) override {
+  CB_aio_watch_flush_Complete(const CB_aio_watch_flush_Complete&) = delete;
+  CB_aio_watch_flush_Complete operator =(const CB_aio_watch_flush_Complete&) = delete;
+  CB_aio_watch_flush_Complete(CB_aio_watch_flush_Complete&& rhs) {
+    client = rhs.client;
+    c = rhs.c;
+  }
+  CB_aio_watch_flush_Complete& operator =(CB_aio_watch_flush_Complete&& rhs) {
+    client = rhs.client;
+    c = rhs.c;
+    return *this;
+  }
+
+  void operator()() {
     c->lock.lock();
-    c->rval = r;
+    c->rval = 0;
     c->complete = true;
     c->cond.notify_all();
 
     if (c->callback_complete ||
        c->callback_safe) {
-      client->finisher.queue(new librados::C_AioComplete(c));
+      boost::asio::defer(client->finish_strand, librados::CB_AioComplete(c));
     }
     c->put_unlock();
   }
@@ -431,8 +419,7 @@ struct C_aio_watch_flush_Complete : public Context {
 int librados::RadosClient::async_watch_flush(AioCompletionImpl *c)
 {
   ldout(cct, 10) << __func__ << " enter" << dendl;
-  Context *oncomplete = new C_aio_watch_flush_Complete(this, c);
-  objecter->linger_callback_flush(oncomplete);
+  objecter->linger_callback_flush(CB_aio_watch_flush_Complete(this, c));
   ldout(cct, 10) << __func__ << " exit" << dendl;
   return 0;
 }
@@ -609,15 +596,9 @@ int librados::RadosClient::wait_for_osdmap()
 
 int librados::RadosClient::wait_for_latest_osdmap()
 {
-  ceph::mutex mylock = ceph::make_mutex("RadosClient::wait_for_latest_osdmap");
-  ceph::condition_variable cond;
-  bool done;
-
-  objecter->wait_for_latest_osdmap(new C_SafeCond(mylock, cond, &done));
-
-  std::unique_lock l{mylock};
-  cond.wait(l, [&done] {return done;});
-  return 0;
+  bs::error_code ec;
+  objecter->wait_for_latest_osdmap(ca::use_blocked[ec]);
+  return ceph::from_error_code(ec);
 }
 
 int librados::RadosClient::pool_list(std::list<std::pair<int64_t, string> >& v)
@@ -635,20 +616,22 @@ int librados::RadosClient::pool_list(std::list<std::pair<int64_t, string> >& v)
 
 int librados::RadosClient::get_pool_stats(std::list<string>& pools,
                                          map<string,::pool_stat_t> *result,
-                                         bool *per_pool)
+                                         bool *pper_pool)
 {
-  ceph::mutex mylock = ceph::make_mutex("RadosClient::get_pool_stats::mylock");
-  ceph::condition_variable cond;
-  bool done;
-  int ret = 0;
+  bs::error_code ec;
 
-  objecter->get_pool_stats(pools, result, per_pool,
-                          new C_SafeCond(mylock, cond, &done,
-                                         &ret));
+  std::vector<std::string> v(pools.begin(), pools.end());
 
-  unique_lock l{mylock};
-  cond.wait(l, [&done] { return done;});
-  return ret;
+  auto [res, per_pool] = objecter->get_pool_stats(v, ca::use_blocked[ec]);
+  if (ec)
+    return ceph::from_error_code(ec);
+
+  if (per_pool)
+    *pper_pool = per_pool;
+  if (result)
+    result->insert(res.begin(), res.end());
+
+  return 0;
 }
 
 bool librados::RadosClient::get_pool_is_selfmanaged_snaps_mode(
@@ -710,14 +693,10 @@ int librados::RadosClient::pool_create(string& name,
   ceph::condition_variable cond;
   bool done;
   Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply);
-  reply = objecter->create_pool(name, onfinish, crush_rule);
+  objecter->create_pool(name, onfinish, crush_rule);
 
-  if (reply < 0) {
-    delete onfinish;
-  } else {
-    std::unique_lock l{mylock};
-    cond.wait(l, [&done] { return done; });
-  }
+  std::unique_lock l{mylock};
+  cond.wait(l, [&done] { return done; });
   return reply;
 }
 
@@ -729,11 +708,8 @@ int librados::RadosClient::pool_create_async(string& name,
   if (r < 0)
     return r;
 
-  Context *onfinish = new C_PoolAsync_Safe(c);
-  r = objecter->create_pool(name, onfinish, crush_rule);
-  if (r < 0) {
-    delete onfinish;
-  }
+  Context *onfinish = make_lambda_context(CB_PoolAsync_Safe(c));
+  objecter->create_pool(name, onfinish, crush_rule);
   return r;
 }
 
@@ -772,14 +748,10 @@ int librados::RadosClient::pool_delete(const char *name)
   bool done;
   int ret;
   Context *onfinish = new C_SafeCond(mylock, cond, &done, &ret);
-  ret = objecter->delete_pool(name, onfinish);
+  objecter->delete_pool(name, onfinish);
 
-  if (ret < 0) {
-    delete onfinish;
-  } else {
-    std::unique_lock l{mylock};
-    cond.wait(l, [&done] { return done;});
-  }
+  std::unique_lock l{mylock};
+  cond.wait(l, [&done] { return done;});
   return ret;
 }
 
@@ -789,11 +761,8 @@ int librados::RadosClient::pool_delete_async(const char *name, PoolAsyncCompleti
   if (r < 0)
     return r;
 
-  Context *onfinish = new C_PoolAsync_Safe(c);
-  r = objecter->delete_pool(name, onfinish);
-  if (r < 0) {
-    delete onfinish;
-  }
+  Context *onfinish = make_lambda_context(CB_PoolAsync_Safe(c));
+  objecter->delete_pool(name, onfinish);
   return r;
 }
 
@@ -952,44 +921,36 @@ int librados::RadosClient::osd_command(int osd, vector<string>& cmd,
                                       const bufferlist& inbl,
                                       bufferlist *poutbl, string *prs)
 {
-  ceph::mutex mylock = ceph::make_mutex("RadosClient::osd_command::mylock");
-  ceph::condition_variable cond;
-  bool done;
-  int ret;
   ceph_tid_t tid;
 
   if (osd < 0)
     return -EINVAL;
 
-  {
-    std::lock_guard l{mylock};
-    // XXX do anything with tid?
-    objecter->osd_command(osd, cmd, inbl, &tid, poutbl, prs,
-                         new C_SafeCond(mylock, cond, &done, &ret));
-  }
-  std::unique_lock l{mylock};
-  cond.wait(l, [&done] { return done;});
-  return ret;
+
+  // XXX do anything with tid?
+  bs::error_code ec;
+  auto [s, bl] = objecter->osd_command(osd, std::move(cmd), cb::list(inbl),
+                                      &tid, ca::use_blocked[ec]);
+  if (poutbl)
+    *poutbl = std::move(bl);
+  if (prs)
+    *prs = std::move(s);
+  return ceph::from_error_code(ec);
 }
 
 int librados::RadosClient::pg_command(pg_t pgid, vector<string>& cmd,
                                      const bufferlist& inbl,
                                      bufferlist *poutbl, string *prs)
 {
-  ceph::mutex mylock = ceph::make_mutex("RadosClient::pg_command::mylock");
-  ceph::condition_variable cond;
-  bool done;
-  int ret;
   ceph_tid_t tid;
-
-  {
-    std::lock_guard l{lock};
-    objecter->pg_command(pgid, cmd, inbl, &tid, poutbl, prs,
-                        new C_SafeCond(mylock, cond, &done, &ret));
-  }
-  std::unique_lock l{mylock};
-  cond.wait(l, [&done] { return done;});
-  return ret;
+  bs::error_code ec;
+  auto [s, bl] = objecter->pg_command(pgid, std::move(cmd), inbl, &tid,
+                                     ca::use_blocked[ec]);
+  if (poutbl)
+    *poutbl = std::move(bl);
+  if (prs)
+    *prs = std::move(s);
+  return ceph::from_error_code(ec);
 }
 
 int librados::RadosClient::monitor_log(const string& level,
index 5ad083ead05eb864c776c9378bfd5eb516d343ec..7475fc678d12863da79e8421068233b34f538f9f 100644 (file)
@@ -23,7 +23,6 @@
 #include "common/async/context_pool.h"
 #include "common/config_fwd.h"
 #include "common/Cond.h"
-#include "common/Timer.h"
 #include "common/ceph_mutex.h"
 #include "common/ceph_time.h"
 #include "common/config_obs.h"
@@ -44,11 +43,14 @@ class AioCompletionImpl;
 class librados::RadosClient : public Dispatcher,
                              public md_config_obs_t
 {
+public:
+  using Dispatcher::cct;
+private:
   std::unique_ptr<CephContext,
-                 std::function<void(CephContext*)> > cct_deleter;
+                 std::function<void(CephContext*)> > cct_deleter{
+    cct, [](CephContext *p) {p->put();}};
 
 public:
-  using Dispatcher::cct;
   const ConfigProxy& conf{cct->_conf};
   ceph::async::io_context_pool poolctx;
 private:
@@ -56,13 +58,13 @@ private:
     DISCONNECTED,
     CONNECTING,
     CONNECTED,
-  } state;
+  } state{DISCONNECTED};
 
-  MonClient monclient;
-  MgrClient mgrclient;
-  Messenger *messenger;
+  MonClient monclient{cct, poolctx};
+  MgrClient mgrclient{cct, nullptr, &monclient.monmap};
+  Messenger *messenger{nullptr};
 
-  uint64_t instance_id;
+  uint64_t instance_id{0};
 
   bool _dispatch(Message *m);
   bool ms_dispatch(Message *m) override;
@@ -72,17 +74,16 @@ private:
   void ms_handle_remote_reset(Connection *con) override;
   bool ms_handle_refused(Connection *con) override;
 
-  Objecter *objecter;
+  Objecter *objecter{nullptr};
 
   ceph::mutex lock = ceph::make_mutex("librados::RadosClient::lock");
   ceph::condition_variable cond;
-  SafeTimer timer;
-  int refcnt;
+  int refcnt{1};
 
-  version_t log_last_version;
-  rados_log_callback_t log_cb;
-  rados_log_callback2_t log_cb2;
-  void *log_cb_arg;
+  version_t log_last_version{0};
+  rados_log_callback_t log_cb{nullptr};
+  rados_log_callback2_t log_cb2{nullptr};
+  void *log_cb_arg{nullptr};
   string log_watch;
 
   bool service_daemon = false;
@@ -92,9 +93,9 @@ private:
   int wait_for_osdmap();
 
 public:
-  Finisher finisher;
+  boost::asio::io_context::strand finish_strand{poolctx.get_io_context()};
 
-  explicit RadosClient(CephContext *cct_);
+  explicit RadosClient(CephContext *cct);
   ~RadosClient() override;
   int ping_monitor(std::string mon_id, std::string *result);
   int connect();
index decaeeacccecd50fc1c559a583811c5c35cd279b..701c2964c37b49ca977e32f2e706e35dd3d6c93d 100644 (file)
@@ -10,6 +10,7 @@
 #include "common/common_init.h"
 #include "common/TracepointProvider.h"
 #include "common/hobject.h"
+#include "common/async/waiter.h"
 #include "include/rados/librados.h"
 #include "include/types.h"
 #include <include/stringify.h>
@@ -2018,40 +2019,37 @@ extern "C" int _rados_object_list(rados_ioctx_t io,
   // FIPS zeroization audit 20191116: this memset is not security related.
   memset(result_items, 0, sizeof(rados_object_list_item) * result_item_count);
 
-  std::list<librados::ListObjectImpl> result;
-  hobject_t next_hash;
-
   bufferlist filter_bl;
   if (filter_buf != nullptr) {
     filter_bl.append(filter_buf, filter_buf_len);
   }
 
-  C_SaferCond cond;
-  ctx->objecter->enumerate_objects(
+  ceph::async::waiter<boost::system::error_code,
+                     std::vector<librados::ListObjectImpl>,
+                     hobject_t> w;
+  ctx->objecter->enumerate_objects<librados::ListObjectImpl>(
       ctx->poolid,
       ctx->oloc.nspace,
       *((hobject_t*)start),
       *((hobject_t*)finish),
       result_item_count,
       filter_bl,
-      &result,
-      &next_hash,
-      &cond);
+      w);
 
   hobject_t *next_hobj = (hobject_t*)(*next);
   ceph_assert(next_hobj);
 
-  int r = cond.wait();
-  if (r < 0) {
+  auto [ec, result, next_hash] = w.wait();
+
+  if (ec) {
     *next_hobj = hobject_t::get_max();
-    return r;
+    return ceph::from_error_code(ec);
   }
 
   ceph_assert(result.size() <= result_item_count);  // Don't overflow!
 
   int k = 0;
-  for (std::list<librados::ListObjectImpl>::iterator i = result.begin();
-       i != result.end(); ++i) {
+  for (auto i = result.begin(); i != result.end(); ++i) {
     rados_object_list_item &item = result_items[k++];
     do_out_buffer(i->oid, &item.oid, &item.oid_length);
     do_out_buffer(i->nspace, &item.nspace, &item.nspace_length);
@@ -2528,7 +2526,7 @@ struct AioGetxattrData {
   bufferlist bl;
   char* user_buf;
   size_t len;
-  struct librados::C_AioCompleteAndSafe user_completion;
+  struct librados::CB_AioCompleteAndSafe user_completion;
 };
 
 static void rados_aio_getxattr_complete(rados_completion_t c, void *arg) {
@@ -2543,7 +2541,7 @@ static void rados_aio_getxattr_complete(rados_completion_t c, void *arg) {
       rc = cdata->bl.length();
     }
   }
-  cdata->user_completion.finish(rc);
+  cdata->user_completion(rc);
   reinterpret_cast<librados::AioCompletionImpl*>(c)->put();
   delete cdata;
 }
@@ -2583,7 +2581,7 @@ struct AioGetxattrsData {
   }
   librados::RadosXattrsIter *it;
   rados_xattrs_iter_t *iter;
-  struct librados::C_AioCompleteAndSafe user_completion;
+  struct librados::CB_AioCompleteAndSafe user_completion;
 };
 }
 
@@ -2591,12 +2589,12 @@ static void rados_aio_getxattrs_complete(rados_completion_t c, void *arg) {
   AioGetxattrsData *cdata = reinterpret_cast<AioGetxattrsData*>(arg);
   int rc = _rados_aio_get_return_value(c);
   if (rc) {
-    cdata->user_completion.finish(rc);
+    cdata->user_completion(rc);
   } else {
     cdata->it->i = cdata->it->attrset.begin();
     *cdata->iter = cdata->it;
     cdata->it = 0;
-    cdata->user_completion.finish(0);
+    cdata->user_completion(0);
   }
   reinterpret_cast<librados::AioCompletionImpl*>(c)->put();
   delete cdata;
@@ -3168,7 +3166,7 @@ LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_assert_version);
 extern "C" void _rados_write_op_assert_exists(rados_write_op_t write_op)
 {
   tracepoint(librados, rados_write_op_assert_exists_enter, write_op);
-  ((::ObjectOperation *)write_op)->stat(NULL, (ceph::real_time *)NULL, NULL);
+  ((::ObjectOperation *)write_op)->stat(nullptr, nullptr, nullptr);
   tracepoint(librados, rados_write_op_assert_exists_exit);
 }
 LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_assert_exists);
@@ -3593,7 +3591,7 @@ LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_assert_version);
 extern "C" void _rados_read_op_assert_exists(rados_read_op_t read_op)
 {
   tracepoint(librados, rados_read_op_assert_exists_enter, read_op);
-  ((::ObjectOperation *)read_op)->stat(NULL, (ceph::real_time *)NULL, NULL);
+  ((::ObjectOperation *)read_op)->stat(nullptr, nullptr, nullptr);
   tracepoint(librados, rados_read_op_assert_exists_exit);
 }
 LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_assert_exists);
@@ -3819,7 +3817,7 @@ extern "C" void _rados_read_op_getxattrs(rados_read_op_t read_op,
   tracepoint(librados, rados_read_op_getxattrs_enter, read_op, prval);
   librados::RadosXattrsIter *xattrs_iter = new librados::RadosXattrsIter;
   ((::ObjectOperation *)read_op)->getxattrs(&xattrs_iter->attrset, prval);
-  ((::ObjectOperation *)read_op)->add_handler(new C_XattrsIter(xattrs_iter));
+  ((::ObjectOperation *)read_op)->set_handler(new C_XattrsIter(xattrs_iter));
   *iter = xattrs_iter;
   tracepoint(librados, rados_read_op_getxattrs_exit, *iter);
 }
@@ -3843,7 +3841,7 @@ extern "C" void _rados_read_op_omap_get_vals(rados_read_op_t read_op,
     &omap_iter->values,
     nullptr,
     prval);
-  ((::ObjectOperation *)read_op)->add_handler(new C_OmapIter(omap_iter));
+  ((::ObjectOperation *)read_op)->set_handler(new C_OmapIter(omap_iter));
   *iter = omap_iter;
   tracepoint(librados, rados_read_op_omap_get_vals_exit, *iter);
 }
@@ -3868,7 +3866,7 @@ extern "C" void _rados_read_op_omap_get_vals2(rados_read_op_t read_op,
     &omap_iter->values,
     (bool*)pmore,
     prval);
-  ((::ObjectOperation *)read_op)->add_handler(new C_OmapIter(omap_iter));
+  ((::ObjectOperation *)read_op)->set_handler(new C_OmapIter(omap_iter));
   *iter = omap_iter;
   tracepoint(librados, rados_read_op_omap_get_vals_exit, *iter);
 }
@@ -3900,7 +3898,7 @@ extern "C" void _rados_read_op_omap_get_keys(rados_read_op_t read_op,
   ((::ObjectOperation *)read_op)->omap_get_keys(
     start_after ? start_after : "",
     max_return, &ctx->keys, nullptr, prval);
-  ((::ObjectOperation *)read_op)->add_handler(ctx);
+  ((::ObjectOperation *)read_op)->set_handler(ctx);
   *iter = omap_iter;
   tracepoint(librados, rados_read_op_omap_get_keys_exit, *iter);
 }
@@ -3920,7 +3918,7 @@ extern "C" void _rados_read_op_omap_get_keys2(rados_read_op_t read_op,
     start_after ? start_after : "",
     max_return, &ctx->keys,
     (bool*)pmore, prval);
-  ((::ObjectOperation *)read_op)->add_handler(ctx);
+  ((::ObjectOperation *)read_op)->set_handler(ctx);
   *iter = omap_iter;
   tracepoint(librados, rados_read_op_omap_get_keys_exit, *iter);
 }
@@ -3935,7 +3933,7 @@ static void internal_rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op
   ((::ObjectOperation *)read_op)->omap_get_vals_by_keys(to_get,
                                                         &omap_iter->values,
                                                         prval);
-  ((::ObjectOperation *)read_op)->add_handler(new C_OmapIter(omap_iter));
+  ((::ObjectOperation *)read_op)->set_handler(new C_OmapIter(omap_iter));
   *iter = omap_iter;
 }
 
index 3702350984ae3a9bd2090cb3c1863bbb0454814e..bc399ea82e51ab48432eadd3f90f2c78095d952d 100644 (file)
@@ -21,6 +21,7 @@
 #include "common/common_init.h"
 #include "common/TracepointProvider.h"
 #include "common/hobject.h"
+#include "common/async/waiter.h"
 #include "include/rados/librados.h"
 #include "include/rados/librados.hpp"
 #include "include/types.h"
@@ -155,10 +156,11 @@ void librados::ObjectOperation::assert_exists()
 {
   ceph_assert(impl);
   ::ObjectOperation *o = &impl->o;
-  o->stat(NULL, (ceph::real_time*) NULL, NULL);
+  o->stat(nullptr, nullptr, nullptr);
 }
 
-void librados::ObjectOperation::exec(const char *cls, const char *method, bufferlist& inbl)
+void librados::ObjectOperation::exec(const char *cls, const char *method,
+                                    bufferlist& inbl)
 {
   ceph_assert(impl);
   ::ObjectOperation *o = &impl->o;
@@ -2008,7 +2010,7 @@ struct AioGetxattrDataPP {
   AioGetxattrDataPP(librados::AioCompletionImpl *c, bufferlist *_bl) :
     bl(_bl), completion(c) {}
   bufferlist *bl;
-  struct librados::C_AioCompleteAndSafe completion;
+  struct librados::CB_AioCompleteAndSafe completion;
 };
 
 static void rados_aio_getxattr_completepp(rados_completion_t c, void *arg) {
@@ -2017,7 +2019,7 @@ static void rados_aio_getxattr_completepp(rados_completion_t c, void *arg) {
   if (rc >= 0) {
     rc = cdata->bl->length();
   }
-  cdata->completion.finish(rc);
+  cdata->completion(rc);
   delete cdata;
 }
 
@@ -3040,29 +3042,27 @@ int librados::IoCtx::object_list(const ObjectCursor &start,
   ceph_assert(next != nullptr);
   result->clear();
 
-  C_SaferCond cond;
-  hobject_t next_hash;
-  std::list<librados::ListObjectImpl> obj_result;
-  io_ctx_impl->objecter->enumerate_objects(
+  ceph::async::waiter<boost::system::error_code,
+                     std::vector<librados::ListObjectImpl>,
+                     hobject_t>  w;
+  io_ctx_impl->objecter->enumerate_objects<librados::ListObjectImpl>(
       io_ctx_impl->poolid,
       io_ctx_impl->oloc.nspace,
       *((hobject_t*)start.c_cursor),
       *((hobject_t*)finish.c_cursor),
       result_item_count,
       filter,
-      &obj_result,
-      &next_hash,
-      &cond);
+      w);
 
-  int r = cond.wait();
-  if (r < 0) {
+  auto [ec, obj_result, next_hash] = w.wait();
+  if (ec) {
     next->set((rados_object_list_cursor)(new hobject_t(hobject_t::get_max())));
-    return r;
+    return ceph::from_error_code(ec);
   }
 
   next->set((rados_object_list_cursor)(new hobject_t(next_hash)));
 
-  for (std::list<librados::ListObjectImpl>::iterator i = obj_result.begin();
+  for (auto i = obj_result.begin();
        i != obj_result.end(); ++i) {
     ObjectItem oi;
     oi.oid = i->oid;
index 47b6d5ec6484f52c7b4b226f17561fc16f2ce9f7..08ebe2190cffa9e7e6e1bd43ca47f2c53ccf1dff 100644 (file)
@@ -2212,7 +2212,7 @@ void CDir::_omap_commit(int op_prio)
 
       // don't create new dirfrag blindly
       if (!is_new() && !state_test(CDir::STATE_FRAGMENTING))
-       op.stat(NULL, (ceph::real_time*) NULL, NULL);
+       op.stat(nullptr, nullptr, nullptr);
 
       if (!to_set.empty())
        op.omap_set(to_set);
@@ -2250,7 +2250,7 @@ void CDir::_omap_commit(int op_prio)
 
   // don't create new dirfrag blindly
   if (!is_new() && !state_test(CDir::STATE_FRAGMENTING))
-    op.stat(NULL, (ceph::real_time*)NULL, NULL);
+    op.stat(nullptr, nullptr, nullptr);
 
   /*
    * save the header at the last moment.. If we were to send it off before other
index cc858b0028ba135a07e0c972d42de4508398cfd7..c5c6359df3ec86cb85b9a9ed69313526d1193694 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "common/debug.h"
 #include "common/errno.h"
+#include "common/async/blocked_completion.h"
 
 #include "messages/MClientRequestForward.h"
 #include "messages/MMDSLoadTargets.h"
@@ -488,7 +489,7 @@ MDSRank::MDSRank(
     boost::asio::io_context& ioc) :
     cct(msgr->cct), mds_lock(mds_lock_), clog(clog_),
     timer(timer_), mdsmap(mdsmap_),
-    objecter(new Objecter(g_ceph_context, msgr, monc_, nullptr, 0, 0)),
+    objecter(new Objecter(g_ceph_context, msgr, monc_, ioc, 0, 0)),
     damage_table(whoami_), sessionmap(this),
     op_tracker(g_ceph_context, g_conf()->mds_enable_op_tracker,
                g_conf()->osd_num_op_tracker_shard),
@@ -1684,7 +1685,6 @@ void MDSRank::calc_recovery_set()
   dout(1) << " recovery set is " << rs << dendl;
 }
 
-
 void MDSRank::replay_start()
 {
   dout(1) << "replay_start" << dendl;
@@ -1695,17 +1695,20 @@ void MDSRank::replay_start()
   calc_recovery_set();
 
   // Check if we need to wait for a newer OSD map before starting
-  Context *fin = new C_IO_Wrapper(this, new C_MDS_BootStart(this, MDS_BOOT_INITIAL));
-  bool const ready = objecter->wait_for_map(
-      mdsmap->get_last_failure_osd_epoch(),
-      fin);
+  bool const ready = objecter->with_osdmap(
+    [this](const OSDMap& o) {
+      return o.get_epoch() >= mdsmap->get_last_failure_osd_epoch();
+    });
 
   if (ready) {
-    delete fin;
     boot_start();
   } else {
     dout(1) << " waiting for osdmap " << mdsmap->get_last_failure_osd_epoch()
            << " (which blacklists prior instance)" << dendl;
+    Context *fin = new C_IO_Wrapper(this, new C_MDS_BootStart(this, MDS_BOOT_INITIAL));
+    objecter->wait_for_map(
+      mdsmap->get_last_failure_osd_epoch(),
+      lambdafy(fin));
   }
 }
 
@@ -1757,10 +1760,11 @@ void MDSRank::standby_replay_restart()
     /* We are transitioning out of standby: wait for OSD map update
        before making final pass */
     dout(1) << "standby_replay_restart (final takeover pass)" << dendl;
-    Context *fin = new C_IO_Wrapper(this, new C_MDS_StandbyReplayRestart(this));
-    bool ready = objecter->wait_for_map(mdsmap->get_last_failure_osd_epoch(), fin);
+    bool ready = objecter->with_osdmap(
+      [this](const OSDMap& o) {
+       return o.get_epoch() >= mdsmap->get_last_failure_osd_epoch();
+      });
     if (ready) {
-      delete fin;
       mdlog->get_journaler()->reread_head_and_probe(
         new C_MDS_StandbyReplayRestartFinish(
           this,
@@ -1771,8 +1775,11 @@ void MDSRank::standby_replay_restart()
       dout(1) << " opening open_file_table (async)" << dendl;
       mdcache->open_file_table.load(nullptr);
     } else {
+      auto fin = new C_IO_Wrapper(this, new C_MDS_StandbyReplayRestart(this));
       dout(1) << " waiting for osdmap " << mdsmap->get_last_failure_osd_epoch()
-              << " (which blacklists prior instance)" << dendl;
+             << " (which blacklists prior instance)" << dendl;
+      objecter->wait_for_map(mdsmap->get_last_failure_osd_epoch(),
+                            lambdafy(fin));
     }
   }
 }
@@ -2495,12 +2502,9 @@ void MDSRankDispatcher::handle_asok_command(
       std::lock_guard l(mds_lock);
       set_osd_epoch_barrier(target_epoch);
     }
-    C_SaferCond cond;
-    bool already_got = objecter->wait_for_map(target_epoch, &cond);
-    if (!already_got) {
-      dout(4) << __func__ << ": waiting for OSD epoch " << target_epoch << dendl;
-      cond.wait();
-    }
+    boost::system::error_code ec;
+    dout(4) << __func__ << ": possibly waiting for OSD epoch " << target_epoch << dendl;
+    objecter->wait_for_map(target_epoch, ceph::async::use_blocked[ec]);
   } else if (command == "session ls" ||
             command == "client ls") {
     std::lock_guard l(mds_lock);
@@ -3433,7 +3437,7 @@ bool MDSRank::evict_client(int64_t session_id,
 
     Context *on_blacklist_done = new LambdaContext([this, fn](int r) {
       objecter->wait_for_latest_osdmap(
-       new C_OnFinisher(
+      lambdafy((new C_OnFinisher(
          new LambdaContext([this, fn](int r) {
               std::lock_guard l(mds_lock);
               auto epoch = objecter->with_osdmap([](const OSDMap &o){
@@ -3444,7 +3448,7 @@ bool MDSRank::evict_client(int64_t session_id,
 
               fn();
             }), finisher)
-       );
+      )));
     });
 
     dout(4) << "Sending mon blacklist command: " << cmd[0] << dendl;
index 4ccc2ac10421b34d1b442b57df888b8954341d8c..2bbcfeb49598689b09c71e23ba71cf2ed9061c0e 100644 (file)
@@ -16,6 +16,7 @@
 #define PURGE_QUEUE_H_
 
 #include "include/compact_set.h"
+#include "common/Finisher.h"
 #include "mds/MDSMap.h"
 #include "osdc/Journaler.h"
 
index a32babaa28359bdddaecc4fdfaeb8bff42254c89..a5c22c095321b3dc80dbc25e2b9221e6b33cce98 100644 (file)
@@ -5497,29 +5497,18 @@ int Server::check_layout_vxattr(MDRequestRef& mdr,
     if (req_epoch > epoch) {
 
       // well, our map is older. consult mds.
-      Context *fin = new C_IO_Wrapper(mds, new C_MDS_RetryRequest(mdcache, mdr));
-
-      if (!mds->objecter->wait_for_map(req_epoch, fin))
-        return r; // wait, fin will retry this request later
-
-      delete fin;
-
-      // now we have at least as new a map as the client, try again.
-      mds->objecter->with_osdmap([&](const OSDMap& osdmap) {
-          r = parse_layout_vxattr(name, value, osdmap, layout);
-          epoch = osdmap.get_epoch();
-        });
-
-      ceph_assert(epoch >= req_epoch); // otherwise wait_for_map() told a lie
+      auto fin = new C_IO_Wrapper(mds, new C_MDS_RetryRequest(mdcache, mdr));
 
+      mds->objecter->wait_for_map(req_epoch, lambdafy(fin));
+      return r;
     } else if (req_epoch == 0 && !mdr->waited_for_osdmap) {
 
       // For compatibility with client w/ old code, we still need get the
       // latest map. One day if COMPACT_VERSION of MClientRequest >=3,
       // we can remove those code.
       mdr->waited_for_osdmap = true;
-      mds->objecter->wait_for_latest_osdmap(new C_IO_Wrapper(
-                             mds, new C_MDS_RetryRequest(mdcache, mdr)));
+      mds->objecter->wait_for_latest_osdmap(std::ref(*new C_IO_Wrapper(
+        mds, new C_MDS_RetryRequest(mdcache, mdr))));
       return r;
     }
   }
index 51bc134a21f7704bbd9fd2361fc5f86c3ddc870a..6b64e4feee2a38c0e878f76d59668490bf4a3552 100644 (file)
 class MGetPoolStats : public PaxosServiceMessage {
 public:
   uuid_d fsid;
-  std::list<std::string> pools;
+  std::vector<std::string> pools;
 
   MGetPoolStats() : PaxosServiceMessage{MSG_GETPOOLSTATS, 0} {}
-  MGetPoolStats(const uuid_d& f, ceph_tid_t t, std::list<std::string>& ls, version_t l) :
+  MGetPoolStats(const uuid_d& f, ceph_tid_t t, std::vector<std::string>& ls, version_t l) :
     PaxosServiceMessage{MSG_GETPOOLSTATS, l},
     fsid(f), pools(ls) {
     set_tid(t);
index ff474d3d5db55083d533427591c401f82bae6bfc..063b6f7cb289904f6c2dc4de7d0888912ef07ee4 100644 (file)
@@ -22,7 +22,7 @@ class MGetPoolStatsReply : public PaxosServiceMessage {
 
 public:
   uuid_d fsid;
-  std::map<std::string,pool_stat_t> pool_stats;
+  boost::container::flat_map<std::string, pool_stat_t> pool_stats;
   bool per_pool = false;
 
   MGetPoolStatsReply() : PaxosServiceMessage{MSG_GETPOOLSTATSREPLY, 0,
index 3ac66e9a9e77526a73cc6a7109e33111b9f86c06..391d632cc75565ed13ef1744a175f0a5115b1ffb 100644 (file)
@@ -155,10 +155,9 @@ ceph_send_command(BaseMgrModule *self, PyObject *args)
     // can wait for those.
     auto c = new LambdaContext([command_c, self](int command_r){
       self->py_modules->get_objecter().wait_for_latest_osdmap(
-          new LambdaContext([command_c, command_r](int wait_r){
-            command_c->complete(command_r);
-          })
-      );
+       [command_c, command_r](boost::system::error_code) {
+         command_c->complete(command_r);
+       });
     });
 
     self->py_modules->get_monc().start_mon_command(
@@ -186,9 +185,12 @@ ceph_send_command(BaseMgrModule *self, PyObject *args)
         {cmd_json},
         {},
         &tid,
-        &command_c->outbl,
-        &command_c->outs,
-        new C_OnFinisher(command_c, &self->py_modules->cmd_finisher));
+       [command_c, f = &self->py_modules->cmd_finisher]
+       (boost::system::error_code ec, std::string s, ceph::buffer::list bl) {
+         command_c->outs = std::move(s);
+         command_c->outbl = std::move(bl);
+         f->queue(command_c);
+       });
   } else if (std::string(type) == "mds") {
     int r = self->py_modules->get_client().mds_command(
         name,
@@ -221,9 +223,12 @@ ceph_send_command(BaseMgrModule *self, PyObject *args)
         {cmd_json},
         {},
         &tid,
-        &command_c->outbl,
-        &command_c->outs,
-        new C_OnFinisher(command_c, &self->py_modules->cmd_finisher));
+       [command_c, f = &self->py_modules->cmd_finisher]
+       (boost::system::error_code ec, std::string s, ceph::buffer::list bl) {
+         command_c->outs = std::move(s);
+         command_c->outbl = std::move(bl);
+         f->queue(command_c);
+       });
     PyEval_RestoreThread(tstate);
     return nullptr;
   } else {
index 7e30eac618168cbacbad54a124665d4acc5fc251..79374518865709bb39692fe7c2d560bb0f9bc65b 100644 (file)
@@ -46,7 +46,7 @@ MgrStandby::MgrStandby(int argc, const char **argv) :
                     "mgr",
                     Messenger::get_pid_nonce(),
                     0)),
-  objecter{g_ceph_context, client_messenger.get(), &monc, NULL, 0, 0},
+  objecter{g_ceph_context, client_messenger.get(), &monc, poolctx, 0, 0},
   client{client_messenger.get(), &monc, &objecter},
   mgrc(g_ceph_context, client_messenger.get(), &monc.monmap),
   log_client(g_ceph_context, client_messenger.get(), &monc.monmap, LogClient::NO_FLAGS),
index 39cf8d19beafd59f2737b8fba437731c01422aee..b410b82b57b6c5c891b0fafd3923e0e35bf02b0e 100644 (file)
@@ -51,6 +51,7 @@
 #include "common/ceph_releases.h"
 #include "common/ceph_time.h"
 #include "common/version.h"
+#include "common/async/blocked_completion.h"
 #include "common/pick_address.h"
 #include "common/blkdev.h"
 #include "common/numa.h"
@@ -277,7 +278,7 @@ OSDService::OSDService(OSD *osd, ceph::async::io_context_pool& poolctx) :
   poolctx(poolctx),
   objecter(make_unique<Objecter>(osd->client_messenger->cct,
                                 osd->objecter_messenger,
-                                osd->monc, nullptr, 0, 0)),
+                                osd->monc, poolctx, 0, 0)),
   m_objecter_finishers(cct->_conf->osd_objecter_finishers),
   watch_timer(osd->client_messenger->cct, watch_lock),
   next_notif_id(0),
@@ -9914,9 +9915,8 @@ void OSD::get_latest_osdmap()
 {
   dout(10) << __func__ << " -- start" << dendl;
 
-  C_SaferCond cond;
-  service.objecter->wait_for_latest_osdmap(&cond);
-  cond.wait();
+  boost::system::error_code ec;
+  service.objecter->wait_for_latest_osdmap(ceph::async::use_blocked[ec]);
 
   dout(10) << __func__ << " -- finish" << dendl;
 }
index 634e385c41bae0d3631a9d5b8b35495533127bea..f53c5a646101e402a2a7954db42ce7f1d66ba4d1 100644 (file)
@@ -571,7 +571,7 @@ private:
   mempool::osdmap::map<int64_t,pg_pool_t> pools;
   mempool::osdmap::map<int64_t,std::string> pool_name;
   mempool::osdmap::map<std::string, std::map<std::string,std::string>> erasure_code_profiles;
-  mempool::osdmap::map<std::string,int64_t> name_pool;
+  mempool::osdmap::map<std::string,int64_t, std::less<>> name_pool;
 
   std::shared_ptr< mempool::osdmap::vector<uuid_d> > osd_uuid;
   mempool::osdmap::vector<osd_xinfo_t> osd_xinfo;
@@ -1299,7 +1299,7 @@ public:
     return new_purged_snaps;
   }
 
-  int64_t lookup_pg_pool_name(const std::string& name) const {
+  int64_t lookup_pg_pool_name(std::string_view name) const {
     auto p = name_pool.find(name);
     if (p == name_pool.end())
       return -ENOENT;
index 416f5cafd762aa81bccc3660c048023fe37aceea..3c235ecc682dc03026523b4e48688ef6bfaf58d9 100644 (file)
@@ -1689,7 +1689,7 @@ bool pg_pool_t::is_removed_snap(snapid_t s) const
     return removed_snaps.contains(s);
 }
 
-snapid_t pg_pool_t::snap_exists(const char *s) const
+snapid_t pg_pool_t::snap_exists(std::string_view s) const
 {
   for (auto p = snaps.cbegin(); p != snaps.cend(); ++p)
     if (p->second.name == s)
index c74f119745ba2256ee71231ee33de3c5d8d830ba..3169e63c369eefd204a1a9828dbc93236feba96d 100644 (file)
@@ -256,10 +256,10 @@ namespace std {
 // does it go in.
 struct object_locator_t {
   // You specify either the hash or the key -- not both
-  int64_t pool;     ///< pool id
-  std::string key;       ///< key std::string (if non-empty)
+  std::int64_t pool;     ///< pool id
+  std::string key;       ///< key string (if non-empty)
   std::string nspace;    ///< namespace
-  int64_t hash;     ///< hash position (if >= 0)
+  std::int64_t hash;     ///< hash position (if >= 0)
 
   explicit object_locator_t()
     : pool(-1), hash(-1) {}
@@ -267,11 +267,11 @@ struct object_locator_t {
     : pool(po), hash(-1)  {}
   explicit object_locator_t(int64_t po, int64_t ps)
     : pool(po), hash(ps)  {}
-  explicit object_locator_t(int64_t po, std::string ns)
+  explicit object_locator_t(int64_t po, std::string_view ns)
     : pool(po), nspace(ns), hash(-1) {}
-  explicit object_locator_t(int64_t po, std::string ns, int64_t ps)
+  explicit object_locator_t(int64_t po, std::string_view ns, int64_t ps)
     : pool(po), nspace(ns), hash(ps) {}
-  explicit object_locator_t(int64_t po, std::string ns, std::string s)
+  explicit object_locator_t(int64_t po, std::string_view ns, std::string_view s)
     : pool(po), key(s), nspace(ns), hash(-1) {}
   explicit object_locator_t(const hobject_t& soid)
     : pool(soid.pool), key(soid.get_key()), nspace(soid.nspace), hash(-1) {}
@@ -1699,7 +1699,7 @@ public:
   bool is_unmanaged_snaps_mode() const;
   bool is_removed_snap(snapid_t s) const;
 
-  snapid_t snap_exists(const char *s) const;
+  snapid_t snap_exists(std::string_view s) const;
   void add_snap(const char *n, utime_t stamp);
   uint64_t add_unmanaged_snap(bool preoctopus_compat);
   void remove_snap(snapid_t s);
@@ -4964,20 +4964,24 @@ using pg_missing_t = pg_missing_set<false>;
 using pg_missing_tracker_t = pg_missing_set<true>;
 
 
+
+
 /**
  * pg list objects response format
  *
  */
-struct pg_nls_response_t {
+
+template<typename T>
+struct pg_nls_response_template {
   collection_list_handle_t handle;
-  std::list<librados::ListObjectImpl> entries;
+  std::vector<T> entries;
 
   void encode(ceph::buffer::list& bl) const {
     ENCODE_START(1, 1, bl);
     encode(handle, bl);
     __u32 n = (__u32)entries.size();
     encode(n, bl);
-    for (std::list<librados::ListObjectImpl>::const_iterator i = entries.begin(); i != entries.end(); ++i) {
+    for (auto i = entries.begin(); i != entries.end(); ++i) {
       encode(i->nspace, bl);
       encode(i->oid, bl);
       encode(i->locator, bl);
@@ -4991,7 +4995,7 @@ struct pg_nls_response_t {
     decode(n, bl);
     entries.clear();
     while (n--) {
-      librados::ListObjectImpl i;
+      T i;
       decode(i.nspace, bl);
       decode(i.oid, bl);
       decode(i.locator, bl);
@@ -5002,7 +5006,7 @@ struct pg_nls_response_t {
   void dump(ceph::Formatter *f) const {
     f->dump_stream("handle") << handle;
     f->open_array_section("entries");
-    for (std::list<librados::ListObjectImpl>::const_iterator p = entries.begin(); p != entries.end(); ++p) {
+    for (auto p = entries.begin(); p != entries.end(); ++p) {
       f->open_object_section("object");
       f->dump_string("namespace", p->nspace);
       f->dump_string("object", p->oid);
@@ -5011,19 +5015,19 @@ struct pg_nls_response_t {
     }
     f->close_section();
   }
-  static void generate_test_instances(std::list<pg_nls_response_t*>& o) {
-    o.push_back(new pg_nls_response_t);
-    o.push_back(new pg_nls_response_t);
+  static void generate_test_instances(std::list<pg_nls_response_template<T>*>& o) {
+    o.push_back(new pg_nls_response_template<T>);
+    o.push_back(new pg_nls_response_template<T>);
     o.back()->handle = hobject_t(object_t("hi"), "key", 1, 2, -1, "");
     o.back()->entries.push_back(librados::ListObjectImpl("", "one", ""));
     o.back()->entries.push_back(librados::ListObjectImpl("", "two", "twokey"));
     o.back()->entries.push_back(librados::ListObjectImpl("", "three", ""));
-    o.push_back(new pg_nls_response_t);
+    o.push_back(new pg_nls_response_template<T>);
     o.back()->handle = hobject_t(object_t("hi"), "key", 3, 4, -1, "");
     o.back()->entries.push_back(librados::ListObjectImpl("n1", "n1one", ""));
     o.back()->entries.push_back(librados::ListObjectImpl("n1", "n1two", "n1twokey"));
     o.back()->entries.push_back(librados::ListObjectImpl("n1", "n1three", ""));
-    o.push_back(new pg_nls_response_t);
+    o.push_back(new pg_nls_response_template<T>);
     o.back()->handle = hobject_t(object_t("hi"), "key", 5, 6, -1, "");
     o.back()->entries.push_back(librados::ListObjectImpl("", "one", ""));
     o.back()->entries.push_back(librados::ListObjectImpl("", "two", "twokey"));
@@ -5034,6 +5038,8 @@ struct pg_nls_response_t {
   }
 };
 
+using pg_nls_response_t = pg_nls_response_template<librados::ListObjectImpl>;
+
 WRITE_CLASS_ENCODER(pg_nls_response_t)
 
 // For backwards compatibility with older OSD requests
index 2a8577f05bde2022065a932383bb38ed405adebb..79426643400fd39c82256977d72130688ddff445 100644 (file)
  *
  */
 
+#include <algorithm>
 #include <cerrno>
 
 #include "Objecter.h"
 #include "osd/OSDMap.h"
+#include "osd/error_code.h"
 #include "Filer.h"
 
 #include "mon/MonClient.h"
+#include "mon/error_code.h"
 
 #include "msg/Messenger.h"
 #include "msg/Message.h"
@@ -52,6 +55,9 @@
 #include "include/str_list.h"
 #include "common/errno.h"
 #include "common/EventTrace.h"
+#include "common/async/waiter.h"
+#include "error_code.h"
+
 
 using std::list;
 using std::make_pair;
@@ -82,6 +88,11 @@ using ceph::shunique_lock;
 using ceph::acquire_shared;
 using ceph::acquire_unique;
 
+namespace bc = boost::container;
+namespace bs = boost::system;
+namespace ca = ceph::async;
+namespace cb = ceph::buffer;
+
 #define dout_subsys ceph_subsys_objecter
 #undef dout_prefix
 #define dout_prefix *_dout << messenger->get_myname() << ".objecter "
@@ -165,6 +176,11 @@ enum {
   l_osdc_last,
 };
 
+namespace {
+inline bs::error_code osdcode(int r) {
+  return (r < 0) ? bs::error_code(-r, osd_category()) : bs::error_code();
+}
+}
 
 // config obs ----------------------------
 
@@ -180,55 +196,19 @@ public:
   int call(std::string_view command, const cmdmap_t& cmdmap,
           Formatter *f,
           std::ostream& ss,
-          ceph::buffer::list& out) override;
+          cb::list& out) override;
 };
 
-/**
- * This is a more limited form of C_Contexts, but that requires
- * a ceph_context which we don't have here.
- */
-class ObjectOperation::C_TwoContexts : public Context {
-  Context *first;
-  Context *second;
-public:
-  C_TwoContexts(Context *first, Context *second) :
-    first(first), second(second) {}
-  void finish(int r) override {
-    first->complete(r);
-    second->complete(r);
-    first = NULL;
-    second = NULL;
-  }
-
-  ~C_TwoContexts() override {
-    delete first;
-    delete second;
-  }
-};
-
-void ObjectOperation::add_handler(Context *extra) {
-  size_t last = out_handler.size() - 1;
-  Context *orig = out_handler[last];
-  if (orig) {
-    Context *wrapper = new C_TwoContexts(orig, extra);
-    out_handler[last] = wrapper;
-  } else {
-    out_handler[last] = extra;
-  }
-}
-
-Objecter::OSDSession::unique_completion_lock Objecter::OSDSession::get_lock(
-  object_t& oid)
+std::unique_lock<std::mutex> Objecter::OSDSession::get_lock(object_t& oid)
 {
   if (oid.name.empty())
-    return unique_completion_lock();
+    return {};
 
   static constexpr uint32_t HASH_PRIME = 1021;
   uint32_t h = ceph_str_hash_linux(oid.name.c_str(), oid.name.size())
     % HASH_PRIME;
 
-  return unique_completion_lock(completion_locks[h % num_locks],
-                               std::defer_lock);
+  return {completion_locks[h % num_locks], std::defer_lock};
 }
 
 const char** Objecter::get_tracked_conf_keys() const
@@ -385,7 +365,7 @@ void Objecter::init()
   }
 
   m_request_state_hook = new RequestStateHook(this);
-  AdminSocket* admin_socket = cct->get_admin_socket();
+  auto admin_socket = cct->get_admin_socket();
   int ret = admin_socket->register_command("objecter_requests",
                                           m_request_state_hook,
                                           "show in-progress osd requests");
@@ -432,57 +412,54 @@ void Objecter::shutdown()
   cct->_conf.remove_observer(this);
   wl.lock();
 
-  map<int,OSDSession*>::iterator p;
   while (!osd_sessions.empty()) {
-    p = osd_sessions.begin();
+    auto p = osd_sessions.begin();
     close_session(p->second);
   }
 
   while(!check_latest_map_lingers.empty()) {
-    map<uint64_t, LingerOp*>::iterator i = check_latest_map_lingers.begin();
+    auto i = check_latest_map_lingers.begin();
     i->second->put();
     check_latest_map_lingers.erase(i->first);
   }
 
   while(!check_latest_map_ops.empty()) {
-    map<ceph_tid_t, Op*>::iterator i = check_latest_map_ops.begin();
+    auto i = check_latest_map_ops.begin();
     i->second->put();
     check_latest_map_ops.erase(i->first);
   }
 
   while(!check_latest_map_commands.empty()) {
-    map<ceph_tid_t, CommandOp*>::iterator i
-      = check_latest_map_commands.begin();
+    auto i = check_latest_map_commands.begin();
     i->second->put();
     check_latest_map_commands.erase(i->first);
   }
 
   while(!poolstat_ops.empty()) {
-    map<ceph_tid_t,PoolStatOp*>::iterator i = poolstat_ops.begin();
+    auto i = poolstat_ops.begin();
     delete i->second;
     poolstat_ops.erase(i->first);
   }
 
   while(!statfs_ops.empty()) {
-    map<ceph_tid_t, StatfsOp*>::iterator i = statfs_ops.begin();
+    auto i = statfs_ops.begin();
     delete i->second;
     statfs_ops.erase(i->first);
   }
 
   while(!pool_ops.empty()) {
-    map<ceph_tid_t, PoolOp*>::iterator i = pool_ops.begin();
+    auto i = pool_ops.begin();
     delete i->second;
     pool_ops.erase(i->first);
   }
 
   ldout(cct, 20) << __func__ << " clearing up homeless session..." << dendl;
   while(!homeless_session->linger_ops.empty()) {
-    std::map<uint64_t, LingerOp*>::iterator i
-      = homeless_session->linger_ops.begin();
+    auto i = homeless_session->linger_ops.begin();
     ldout(cct, 10) << " linger_op " << i->first << dendl;
     LingerOp *lop = i->second;
     {
-      OSDSession::unique_lock swl(homeless_session->lock);
+      std::unique_lock swl(homeless_session->lock);
       _session_linger_op_remove(homeless_session, lop);
     }
     linger_ops.erase(lop->linger_id);
@@ -491,23 +468,22 @@ void Objecter::shutdown()
   }
 
   while(!homeless_session->ops.empty()) {
-    std::map<ceph_tid_t, Op*>::iterator i = homeless_session->ops.begin();
+    auto i = homeless_session->ops.begin();
     ldout(cct, 10) << " op " << i->first << dendl;
-    Op *op = i->second;
+    auto op = i->second;
     {
-      OSDSession::unique_lock swl(homeless_session->lock);
+      std::unique_lock swl(homeless_session->lock);
       _session_op_remove(homeless_session, op);
     }
     op->put();
   }
 
   while(!homeless_session->command_ops.empty()) {
-    std::map<ceph_tid_t, CommandOp*>::iterator i
-      = homeless_session->command_ops.begin();
+    auto i = homeless_session->command_ops.begin();
     ldout(cct, 10) << " command_op " << i->first << dendl;
-    CommandOp *cop = i->second;
+    auto cop = i->second;
     {
-      OSDSession::unique_lock swl(homeless_session->lock);
+      std::unique_lock swl(homeless_session->lock);
       _session_command_op_remove(homeless_session, cop);
     }
     cop->put();
@@ -533,7 +509,7 @@ void Objecter::shutdown()
   // This is safe because we guarantee no concurrent calls to
   // shutdown() with the ::initialized check at start.
   if (m_request_state_hook) {
-    AdminSocket* admin_socket = cct->get_admin_socket();
+    auto admin_socket = cct->get_admin_socket();
     admin_socket->unregister_commands(m_request_state_hook);
     delete m_request_state_hook;
     m_request_state_hook = NULL;
@@ -541,14 +517,14 @@ void Objecter::shutdown()
 }
 
 void Objecter::_send_linger(LingerOp *info,
-                           shunique_lock& sul)
+                           ceph::shunique_lock<ceph::shared_mutex>& sul)
 {
   ceph_assert(sul.owns_lock() && sul.mutex() == &rwlock);
 
   vector<OSDOp> opv;
-  Context *oncommit = NULL;
-  LingerOp::shared_lock watchl(info->watch_lock);
-  ceph::buffer::list *poutbl = NULL;
+  fu2::unique_function<Op::OpSig> oncommit;
+  std::shared_lock watchl(info->watch_lock);
+  cb::list *poutbl = nullptr;
   if (info->registered && info->is_watch) {
     ldout(cct, 15) << "send_linger " << info->linger_id << " reconnect"
                   << dendl;
@@ -557,22 +533,26 @@ void Objecter::_send_linger(LingerOp *info,
     opv.back().op.watch.cookie = info->get_cookie();
     opv.back().op.watch.op = CEPH_OSD_WATCH_OP_RECONNECT;
     opv.back().op.watch.gen = ++info->register_gen;
-    oncommit = new C_Linger_Reconnect(this, info);
+    oncommit = CB_Linger_Reconnect(this, info);
   } else {
     ldout(cct, 15) << "send_linger " << info->linger_id << " register"
                   << dendl;
     opv = info->ops;
-    C_Linger_Commit *c = new C_Linger_Commit(this, info);
+    // TODO Augment ca::Completion with an equivalent of
+    // target so we can handle these cases better.
+    auto c = std::make_unique<CB_Linger_Commit>(this, info);
     if (!info->is_watch) {
       info->notify_id = 0;
       poutbl = &c->outbl;
     }
-    oncommit = c;
+    oncommit = [c = std::move(c)](bs::error_code ec) mutable {
+                std::move(*c)(ec);
+              };
   }
   watchl.unlock();
-  Op *o = new Op(info->target.base_oid, info->target.base_oloc,
-                opv, info->target.flags | CEPH_OSD_FLAG_READ,
-                oncommit, info->pobjver);
+  auto o = new Op(info->target.base_oid, info->target.base_oloc,
+                 std::move(opv), info->target.flags | CEPH_OSD_FLAG_READ,
+                 std::move(oncommit), info->pobjver);
   o->outbl = poutbl;
   o->snapid = info->snap;
   o->snapc = info->snapc;
@@ -587,9 +567,9 @@ void Objecter::_send_linger(LingerOp *info,
 
   if (info->register_tid) {
     // repeat send.  cancel old registration op, if any.
-    OSDSession::unique_lock sl(info->session->lock);
+    std::unique_lock sl(info->session->lock);
     if (info->session->ops.count(info->register_tid)) {
-      Op *o = info->session->ops[info->register_tid];
+      auto o = info->session->ops[info->register_tid];
       _op_cancel_map_check(o);
       _cancel_linger_op(o);
     }
@@ -601,17 +581,20 @@ void Objecter::_send_linger(LingerOp *info,
   logger->inc(l_osdc_linger_send);
 }
 
-void Objecter::_linger_commit(LingerOp *info, int r, ceph::buffer::list& outbl)
+void Objecter::_linger_commit(LingerOp *info, bs::error_code ec,
+                             cb::list& outbl)
 {
-  LingerOp::unique_lock wl(info->watch_lock);
+  std::unique_lock wl(info->watch_lock);
   ldout(cct, 10) << "_linger_commit " << info->linger_id << dendl;
   if (info->on_reg_commit) {
-    info->on_reg_commit->complete(r);
-    info->on_reg_commit = NULL;
+    info->on_reg_commit->defer(std::move(info->on_reg_commit),
+                              ec, cb::list{});
+    info->on_reg_commit.reset();
   }
-  if (r < 0 && info->on_notify_finish) {
-    info->on_notify_finish->complete(r);
-    info->on_notify_finish = nullptr;
+  if (ec && info->on_notify_finish) {
+    info->on_notify_finish->defer(std::move(info->on_notify_finish),
+                                 ec, cb::list{});
+    info->on_notify_finish.reset();
   }
 
   // only tell the user the first time we do this
@@ -626,55 +609,55 @@ void Objecter::_linger_commit(LingerOp *info, int r, ceph::buffer::list& outbl)
       ldout(cct, 10) << "_linger_commit  notify_id=" << info->notify_id
                     << dendl;
     }
-    catch (ceph::buffer::error& e) {
+    catch (cb::error& e) {
     }
   }
 }
 
-struct C_DoWatchError : public Context {
+class CB_DoWatchError {
   Objecter *objecter;
-  Objecter::LingerOp *info;
-  int err;
-  C_DoWatchError(Objecter *o, Objecter::LingerOp *i, int r)
-    : objecter(o), info(i), err(r) {
-    info->get();
+  boost::intrusive_ptr<Objecter::LingerOp> info;
+  bs::error_code ec;
+public:
+  CB_DoWatchError(Objecter *o, Objecter::LingerOp *i,
+                 bs::error_code ec)
+    : objecter(o), info(i), ec(ec) {
     info->_queued_async();
   }
-  void finish(int r) override {
-    Objecter::unique_lock wl(objecter->rwlock);
+  void operator()() {
+    std::unique_lock wl(objecter->rwlock);
     bool canceled = info->canceled;
     wl.unlock();
 
     if (!canceled) {
-      info->watch_context->handle_error(info->get_cookie(), err);
+      info->handle(ec, 0, info->get_cookie(), 0, {});
     }
 
     info->finished_async();
-    info->put();
   }
 };
 
-int Objecter::_normalize_watch_error(int r)
+bs::error_code Objecter::_normalize_watch_error(bs::error_code ec)
 {
   // translate ENOENT -> ENOTCONN so that a delete->disconnection
   // notification and a failure to reconnect because we raced with
   // the delete appear the same to the user.
-  if (r == -ENOENT)
-    r = -ENOTCONN;
-  return r;
+  if (ec == bs::errc::no_such_file_or_directory)
+    ec = bs::error_code(ENOTCONN, osd_category());
+  return ec;
 }
 
-void Objecter::_linger_reconnect(LingerOp *info, int r)
+void Objecter::_linger_reconnect(LingerOp *info, bs::error_code ec)
 {
-  ldout(cct, 10) << __func__ << " " << info->linger_id << " = " << r
+  ldout(cct, 10) << __func__ << " " << info->linger_id << " = " << ec 
                 << " (last_error " << info->last_error << ")" << dendl;
-  if (r < 0) {
-    LingerOp::unique_lock wl(info->watch_lock);
+  if (ec) {
+    std::unique_lock wl(info->watch_lock);
     if (!info->last_error) {
-      r = _normalize_watch_error(r);
-      info->last_error = r;
-      if (info->watch_context) {
-       finisher->queue(new C_DoWatchError(this, info, r));
+      ec = _normalize_watch_error(ec);
+      info->last_error = ec;
+      if (info->handle) {
+       boost::asio::defer(finish_strand, CB_DoWatchError(this, info, ec));
       }
     }
     wl.unlock();
@@ -705,10 +688,11 @@ void Objecter::_send_linger_ping(LingerOp *info)
   opv[0].op.watch.cookie = info->get_cookie();
   opv[0].op.watch.op = CEPH_OSD_WATCH_OP_PING;
   opv[0].op.watch.gen = info->register_gen;
-  C_Linger_Ping *onack = new C_Linger_Ping(this, info);
+
   Op *o = new Op(info->target.base_oid, info->target.base_oloc,
-                opv, info->target.flags | CEPH_OSD_FLAG_READ,
-                onack, NULL, NULL);
+                std::move(opv), info->target.flags | CEPH_OSD_FLAG_READ,
+                CB_Linger_Ping(this, info, now),
+                nullptr, nullptr);
   o->target = info->target;
   o->should_resend = false;
   _send_op_account(o);
@@ -717,26 +701,25 @@ void Objecter::_send_linger_ping(LingerOp *info)
   _send_op(o);
   info->ping_tid = o->tid;
 
-  onack->sent = now;
   logger->inc(l_osdc_linger_ping);
 }
 
-void Objecter::_linger_ping(LingerOp *info, int r, ceph::coarse_mono_time sent,
+void Objecter::_linger_ping(LingerOp *info, bs::error_code ec, ceph::coarse_mono_time sent,
                            uint32_t register_gen)
 {
-  LingerOp::unique_lock l(info->watch_lock);
+  std::unique_lock l(info->watch_lock);
   ldout(cct, 10) << __func__ << " " << info->linger_id
-                << " sent " << sent << " gen " << register_gen << " = " << r
+                << " sent " << sent << " gen " << register_gen << " = " << ec
                 << " (last_error " << info->last_error
                 << " register_gen " << info->register_gen << ")" << dendl;
   if (info->register_gen == register_gen) {
-    if (r == 0) {
+    if (!ec) {
       info->watch_valid_thru = sent;
-    } else if (r < 0 && !info->last_error) {
-      r = _normalize_watch_error(r);
-      info->last_error = r;
-      if (info->watch_context) {
-       finisher->queue(new C_DoWatchError(this, info, r));
+    } else if (ec && !info->last_error) {
+      ec = _normalize_watch_error(ec);
+      info->last_error = ec;
+      if (info->handle) {
+       boost::asio::defer(finish_strand, CB_DoWatchError(this, info, ec));
       }
     }
   } else {
@@ -744,9 +727,10 @@ void Objecter::_linger_ping(LingerOp *info, int r, ceph::coarse_mono_time sent,
   }
 }
 
-int Objecter::linger_check(LingerOp *info)
+tl::expected<ceph::timespan,
+            bs::error_code> Objecter::linger_check(LingerOp *info)
 {
-  LingerOp::shared_lock l(info->watch_lock);
+  std::shared_lock l(info->watch_lock);
 
   ceph::coarse_mono_time stamp = info->watch_valid_thru;
   if (!info->watch_pending_async.empty())
@@ -757,10 +741,9 @@ int Objecter::linger_check(LingerOp *info)
                 << " err " << info->last_error
                 << " age " << age << dendl;
   if (info->last_error)
-    return info->last_error;
+    return tl::unexpected(info->last_error);
   // return a safe upper bound (we are truncating to ms)
-  return
-    1 + std::chrono::duration_cast<std::chrono::milliseconds>(age).count();
+  return age;
 }
 
 void Objecter::linger_cancel(LingerOp *info)
@@ -776,7 +759,7 @@ void Objecter::_linger_cancel(LingerOp *info)
   ldout(cct, 20) << __func__ << " linger_id=" << info->linger_id << dendl;
   if (!info->canceled) {
     OSDSession *s = info->session;
-    OSDSession::unique_lock sl(s->lock);
+    std::unique_lock sl(s->lock);
     _session_linger_op_remove(s, info);
     sl.unlock();
 
@@ -797,18 +780,15 @@ Objecter::LingerOp *Objecter::linger_register(const object_t& oid,
                                              const object_locator_t& oloc,
                                              int flags)
 {
-  LingerOp *info = new LingerOp(this);
+  unique_lock l(rwlock);
+  // Acquire linger ID
+  auto info = new LingerOp(this, ++max_linger_id);
   info->target.base_oid = oid;
   info->target.base_oloc = oloc;
   if (info->target.base_oloc.key == oid)
     info->target.base_oloc.key.clear();
   info->target.flags = flags;
   info->watch_valid_thru = ceph::coarse_mono_clock::now();
-
-  unique_lock l(rwlock);
-
-  // Acquire linger ID
-  info->linger_id = ++max_linger_id;
   ldout(cct, 10) << __func__ << " info " << info
                 << " linger_id " << info->linger_id
                 << " cookie " << info->get_cookie()
@@ -825,8 +805,8 @@ ceph_tid_t Objecter::linger_watch(LingerOp *info,
                                  ObjectOperation& op,
                                  const SnapContext& snapc,
                                  real_time mtime,
-                                 ceph::buffer::list& inbl,
-                                 Context *oncommit,
+                                 cb::list& inbl,
+                                 decltype(info->on_reg_commit)&& oncommit,
                                  version_t *objver)
 {
   info->is_watch = true;
@@ -835,9 +815,8 @@ ceph_tid_t Objecter::linger_watch(LingerOp *info,
   info->target.flags |= CEPH_OSD_FLAG_WRITE;
   info->ops = op.ops;
   info->inbl = inbl;
-  info->poutbl = NULL;
   info->pobjver = objver;
-  info->on_reg_commit = oncommit;
+  info->on_reg_commit = std::move(oncommit);
 
   info->ctx_budget = take_linger_budget(info);
 
@@ -845,34 +824,34 @@ ceph_tid_t Objecter::linger_watch(LingerOp *info,
   _linger_submit(info, sul);
   logger->inc(l_osdc_linger_active);
 
+  op.clear();
   return info->linger_id;
 }
 
 ceph_tid_t Objecter::linger_notify(LingerOp *info,
                                   ObjectOperation& op,
-                                  snapid_t snap, ceph::buffer::list& inbl,
-                                  ceph::buffer::list *poutbl,
-                                  Context *onfinish,
+                                  snapid_t snap, cb::list& inbl,
+                                  decltype(LingerOp::on_reg_commit)&& onfinish,
                                   version_t *objver)
 {
   info->snap = snap;
   info->target.flags |= CEPH_OSD_FLAG_READ;
   info->ops = op.ops;
   info->inbl = inbl;
-  info->poutbl = poutbl;
   info->pobjver = objver;
-  info->on_reg_commit = onfinish;
-
+  info->on_reg_commit = std::move(onfinish);
   info->ctx_budget = take_linger_budget(info);
-  
+
   shunique_lock sul(rwlock, ceph::acquire_unique);
   _linger_submit(info, sul);
   logger->inc(l_osdc_linger_active);
 
+  op.clear();
   return info->linger_id;
 }
 
-void Objecter::_linger_submit(LingerOp *info, shunique_lock& sul)
+void Objecter::_linger_submit(LingerOp *info,
+                             ceph::shunique_lock<ceph::shared_mutex>& sul)
 {
   ceph_assert(sul.owns_lock() && sul.mutex() == &rwlock);
   ceph_assert(info->linger_id);
@@ -885,7 +864,7 @@ void Objecter::_linger_submit(LingerOp *info, shunique_lock& sul)
   // Create LingerOp<->OSDSession relation
   int r = _get_session(info->target.osd, &s, sul);
   ceph_assert(r == 0);
-  OSDSession::unique_lock sl(s->lock);
+  unique_lock sl(s->lock);
   _session_linger_op_assign(s, info);
   sl.unlock();
   put_session(s);
@@ -893,18 +872,16 @@ void Objecter::_linger_submit(LingerOp *info, shunique_lock& sul)
   _send_linger(info, sul);
 }
 
-struct C_DoWatchNotify : public Context {
+struct CB_DoWatchNotify {
   Objecter *objecter;
-  Objecter::LingerOp *info;
-  MWatchNotify *msg;
-  C_DoWatchNotify(Objecter *o, Objecter::LingerOp *i, MWatchNotify *m)
+  boost::intrusive_ptr<Objecter::LingerOp> info;
+  boost::intrusive_ptr<MWatchNotify> msg;
+  CB_DoWatchNotify(Objecter *o, Objecter::LingerOp *i, MWatchNotify *m)
     : objecter(o), info(i), msg(m) {
-    info->get();
     info->_queued_async();
-    msg->get();
   }
-  void finish(int r) override {
-    objecter->_do_watch_notify(info, msg);
+  void operator()() {
+    objecter->_do_watch_notify(std::move(info), std::move(msg));
   }
 };
 
@@ -920,12 +897,13 @@ void Objecter::handle_watch_notify(MWatchNotify *m)
     ldout(cct, 7) << __func__ << " cookie " << m->cookie << " dne" << dendl;
     return;
   }
-  LingerOp::unique_lock wl(info->watch_lock);
+  std::unique_lock wl(info->watch_lock);
   if (m->opcode == CEPH_WATCH_EVENT_DISCONNECT) {
     if (!info->last_error) {
-      info->last_error = -ENOTCONN;
-      if (info->watch_context) {
-       finisher->queue(new C_DoWatchError(this, info, -ENOTCONN));
+      info->last_error = bs::error_code(ENOTCONN, osd_category());
+      if (info->handle) {
+       boost::asio::defer(finish_strand, CB_DoWatchError(this, info,
+                                                         info->last_error));
       }
     }
   } else if (!info->is_watch) {
@@ -937,19 +915,21 @@ void Objecter::handle_watch_notify(MWatchNotify *m)
       ldout(cct, 10) << __func__ << " reply notify " << m->notify_id
                     << " != " << info->notify_id << ", ignoring" << dendl;
     } else if (info->on_notify_finish) {
-      info->notify_result_bl->claim(m->get_data());
-      info->on_notify_finish->complete(m->return_code);
+      info->on_notify_finish->defer(
+       std::move(info->on_notify_finish),
+       osdcode(m->return_code), std::move(m->get_data()));
 
       // if we race with reconnect we might get a second notify; only
       // notify the caller once!
-      info->on_notify_finish = NULL;
+      info->on_notify_finish = nullptr;
     }
   } else {
-    finisher->queue(new C_DoWatchNotify(this, info, m));
+    boost::asio::defer(finish_strand, CB_DoWatchNotify(this, info, m));
   }
 }
 
-void Objecter::_do_watch_notify(LingerOp *info, MWatchNotify *m)
+void Objecter::_do_watch_notify(boost::intrusive_ptr<LingerOp> info,
+                                boost::intrusive_ptr<MWatchNotify> m)
 {
   ldout(cct, 10) << __func__ << " " << *m << dendl;
 
@@ -963,22 +943,19 @@ void Objecter::_do_watch_notify(LingerOp *info, MWatchNotify *m)
 
   // notify completion?
   ceph_assert(info->is_watch);
-  ceph_assert(info->watch_context);
+  ceph_assert(info->handle);
   ceph_assert(m->opcode != CEPH_WATCH_EVENT_DISCONNECT);
 
   l.unlock();
 
   switch (m->opcode) {
   case CEPH_WATCH_EVENT_NOTIFY:
-    info->watch_context->handle_notify(m->notify_id, m->cookie,
-                                      m->notifier_gid, m->bl);
+    info->handle({}, m->notify_id, m->cookie, m->notifier_gid, std::move(m->bl));
     break;
   }
 
  out:
   info->finished_async();
-  info->put();
-  m->put();
 }
 
 bool Objecter::ms_dispatch(Message *m)
@@ -1037,18 +1014,18 @@ void Objecter::_scan_requests(
   map<ceph_tid_t, Op*>& need_resend,
   list<LingerOp*>& need_resend_linger,
   map<ceph_tid_t, CommandOp*>& need_resend_command,
-  shunique_lock& sul)
+  ceph::shunique_lock<ceph::shared_mutex>& sul)
 {
   ceph_assert(sul.owns_lock() && sul.mutex() == &rwlock);
 
   list<LingerOp*> unregister_lingers;
 
-  OSDSession::unique_lock sl(s->lock);
+  std::unique_lock sl(s->lock);
 
   // check for changed linger mappings (_before_ regular ops)
-  map<ceph_tid_t,LingerOp*>::iterator lp = s->linger_ops.begin();
+  auto lp = s->linger_ops.begin();
   while (lp != s->linger_ops.end()) {
-    LingerOp *op = lp->second;
+    auto op = lp->second;
     ceph_assert(op->session == s);
     // check_linger_pool_dne() may touch linger_ops; prevent iterator
     // invalidation
@@ -1081,7 +1058,7 @@ void Objecter::_scan_requests(
   }
 
   // check for changed request mappings
-  map<ceph_tid_t,Op*>::iterator p = s->ops.begin();
+  auto p = s->ops.begin();
   while (p != s->ops.end()) {
     Op *op = p->second;
     ++p;   // check_op_pool_dne() may touch ops; prevent iterator invalidation
@@ -1110,9 +1087,9 @@ void Objecter::_scan_requests(
   }
 
   // commands
-  map<ceph_tid_t,CommandOp*>::iterator cp = s->command_ops.begin();
+  auto cp = s->command_ops.begin();
   while (cp != s->command_ops.end()) {
-    CommandOp *c = cp->second;
+    auto c = cp->second;
     ++cp;
     ldout(cct, 10) << " checking command " << c->tid << dendl;
     bool force_resend_writes = cluster_full;
@@ -1141,7 +1118,7 @@ void Objecter::_scan_requests(
 
   sl.unlock();
 
-  for (list<LingerOp*>::iterator iter = unregister_lingers.begin();
+  for (auto iter = unregister_lingers.begin();
        iter != unregister_lingers.end();
        ++iter) {
     _linger_cancel(*iter);
@@ -1151,7 +1128,7 @@ void Objecter::_scan_requests(
 
 void Objecter::handle_osd_map(MOSDMap *m)
 {
-  shunique_lock sul(rwlock, acquire_unique);
+  ceph::shunique_lock sul(rwlock, acquire_unique);
   if (!initialized)
     return;
 
@@ -1168,8 +1145,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
   bool was_pausewr = osdmap->test_flag(CEPH_OSDMAP_PAUSEWR) || cluster_full ||
     _osdmap_has_pool_full();
   map<int64_t, bool> pool_full_map;
-  for (map<int64_t, pg_pool_t>::const_iterator it
-        = osdmap->get_pools().begin();
+  for (auto it = osdmap->get_pools().begin();
        it != osdmap->get_pools().end(); ++it)
     pool_full_map[it->first] = _osdmap_pool_full(it->second);
 
@@ -1242,9 +1218,9 @@ void Objecter::handle_osd_map(MOSDMap *m)
        _scan_requests(homeless_session, skipped_map, cluster_full,
                       &pool_full_map, need_resend,
                       need_resend_linger, need_resend_command, sul);
-       for (map<int,OSDSession*>::iterator p = osd_sessions.begin();
+       for (auto p = osd_sessions.begin();
             p != osd_sessions.end(); ) {
-         OSDSession *s = p->second;
+         auto s = p->second;
          _scan_requests(s, skipped_map, cluster_full,
                         &pool_full_map, need_resend,
                         need_resend_linger, need_resend_command, sul);
@@ -1263,7 +1239,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
     } else {
       // first map.  we want the full thing.
       if (m->maps.count(m->get_last())) {
-       for (map<int,OSDSession*>::iterator p = osd_sessions.begin();
+       for (auto p = osd_sessions.begin();
             p != osd_sessions.end(); ++p) {
          OSDSession *s = p->second;
          _scan_requests(s, false, false, NULL, need_resend,
@@ -1314,10 +1290,10 @@ void Objecter::handle_osd_map(MOSDMap *m)
   }
 
   // resend requests
-  for (map<ceph_tid_t, Op*>::iterator p = need_resend.begin();
+  for (auto p = need_resend.begin();
        p != need_resend.end(); ++p) {
-    Op *op = p->second;
-    OSDSession *s = op->session;
+    auto op = p->second;
+    auto s = op->session;
     bool mapped_session = false;
     if (!s) {
       int r = _map_session(&op->target, &s, sul);
@@ -1326,7 +1302,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
     } else {
       get_session(s);
     }
-    OSDSession::unique_lock sl(s->lock);
+    std::unique_lock sl(s->lock);
     if (mapped_session) {
       _session_op_assign(s, op);
     }
@@ -1342,7 +1318,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
     sl.unlock();
     put_session(s);
   }
-  for (list<LingerOp*>::iterator p = need_resend_linger.begin();
+  for (auto p = need_resend_linger.begin();
        p != need_resend_linger.end(); ++p) {
     LingerOp *op = *p;
     ceph_assert(op->session);
@@ -1351,9 +1327,9 @@ void Objecter::handle_osd_map(MOSDMap *m)
       _send_linger(op, sul);
     }
   }
-  for (map<ceph_tid_t,CommandOp*>::iterator p = need_resend_command.begin();
+  for (auto p = need_resend_command.begin();
        p != need_resend_command.end(); ++p) {
-    CommandOp *c = p->second;
+    auto c = p->second;
     if (c->target.osd >= 0) {
       _assign_command_session(c, sul);
       if (c->session && !c->session->is_homeless()) {
@@ -1365,14 +1341,12 @@ void Objecter::handle_osd_map(MOSDMap *m)
   _dump_active();
 
   // finish any Contexts that were waiting on a map update
-  map<epoch_t,list< pair< Context*, int > > >::iterator p =
-    waiting_for_map.begin();
+  auto p = waiting_for_map.begin();
   while (p != waiting_for_map.end() &&
         p->first <= osdmap->get_epoch()) {
     //go through the list and call the onfinish methods
-    for (list<pair<Context*, int> >::iterator i = p->second.begin();
-        i != p->second.end(); ++i) {
-      i->first->complete(i->second);
+    for (auto& [c, ec] : p->second) {
+      ca::post(std::move(c), ec);
     }
     waiting_for_map.erase(p++);
   }
@@ -1438,21 +1412,20 @@ void Objecter::emit_blacklist_events(const OSDMap &old_osd_map,
 
 // op pool check
 
-void Objecter::CB_Op_Map_Latest::operator()(boost::system::error_code e,
+void Objecter::CB_Op_Map_Latest::operator()(bs::error_code e,
                                            version_t latest, version_t)
 {
-  if (e == boost::system::errc::resource_unavailable_try_again ||
-      e == boost::system::errc::operation_canceled)
+  if (e == bs::errc::resource_unavailable_try_again ||
+      e == bs::errc::operation_canceled)
     return;
 
   lgeneric_subdout(objecter->cct, objecter, 10)
     << "op_map_latest r=" << e << " tid=" << tid
     << " latest " << latest << dendl;
 
-  Objecter::unique_lock wl(objecter->rwlock);
+  unique_lock wl(objecter->rwlock);
 
-  map<ceph_tid_t, Op*>::iterator iter =
-    objecter->check_latest_map_ops.find(tid);
+  auto iter = objecter->check_latest_map_ops.find(tid);
   if (iter == objecter->check_latest_map_ops.end()) {
     lgeneric_subdout(objecter->cct, objecter, 10)
       << "op_map_latest op "<< tid << " not found" << dendl;
@@ -1468,7 +1441,7 @@ void Objecter::CB_Op_Map_Latest::operator()(boost::system::error_code e,
   if (op->map_dne_bound == 0)
     op->map_dne_bound = latest;
 
-  OSDSession::unique_lock sl(op->session->lock, defer_lock);
+  unique_lock sl(op->session->lock, defer_lock);
   objecter->_check_op_pool_dne(op, &sl);
 
   op->put();
@@ -1522,7 +1495,7 @@ int Objecter::pool_snap_list(int64_t poolid, vector<uint64_t> *snaps)
   const pg_pool_t *pi = osdmap->get_pg_pool(poolid);
   if (!pi)
     return -ENOENT;
-  for (map<snapid_t,pool_snap_info_t>::const_iterator p = pi->snaps.begin();
+  for (auto p = pi->snaps.begin();
        p != pi->snaps.end();
        ++p) {
     snaps->push_back(p->first);
@@ -1531,7 +1504,7 @@ int Objecter::pool_snap_list(int64_t poolid, vector<uint64_t> *snaps)
 }
 
 // sl may be unlocked.
-void Objecter::_check_op_pool_dne(Op *op, unique_lock *sl)
+void Objecter::_check_op_pool_dne(Op *op, std::unique_lock<ceph::shared_mutex> *sl)
 {
   // rwlock is locked unique
 
@@ -1554,9 +1527,9 @@ void Objecter::_check_op_pool_dne(Op *op, unique_lock *sl)
       ldout(cct, 10) << "check_op_pool_dne tid " << op->tid
                     << " concluding pool " << op->target.base_pgid.pool()
                     << " dne" << dendl;
-      if (op->onfinish) {
+      if (op->has_completion()) {
        num_in_flight--;
-       op->onfinish->complete(-ENOENT);
+       op->complete(osdc_errc::pool_dne, -ENOENT);
       }
 
       OSDSession *s = op->session;
@@ -1594,8 +1567,7 @@ void Objecter::_send_op_map_check(Op *op)
 void Objecter::_op_cancel_map_check(Op *op)
 {
   // rwlock is locked unique
-  map<ceph_tid_t, Op*>::iterator iter =
-    check_latest_map_ops.find(op->tid);
+  auto iter = check_latest_map_ops.find(op->tid);
   if (iter != check_latest_map_ops.end()) {
     Op *op = iter->second;
     op->put();
@@ -1605,25 +1577,24 @@ void Objecter::_op_cancel_map_check(Op *op)
 
 // linger pool check
 
-void Objecter::CB_Linger_Map_Latest::operator()(boost::system::error_code e,
+void Objecter::CB_Linger_Map_Latest::operator()(bs::error_code e,
                                                version_t latest,
                                                version_t)
 {
-  if (e == boost::system::errc::resource_unavailable_try_again ||
-      e == boost::system::errc::operation_canceled) {
+  if (e == bs::errc::resource_unavailable_try_again ||
+      e == bs::errc::operation_canceled) {
     // ignore callback; we will retry in resend_mon_ops()
     return;
   }
 
   unique_lock wl(objecter->rwlock);
 
-  map<uint64_t, LingerOp*>::iterator iter =
-    objecter->check_latest_map_lingers.find(linger_id);
+  auto iter = objecter->check_latest_map_lingers.find(linger_id);
   if (iter == objecter->check_latest_map_lingers.end()) {
     return;
   }
 
-  LingerOp *op = iter->second;
+  auto op = iter->second;
   objecter->check_latest_map_lingers.erase(iter);
 
   if (op->map_dne_bound == 0)
@@ -1658,13 +1629,15 @@ void Objecter::_check_linger_pool_dne(LingerOp *op, bool *need_unregister)
   }
   if (op->map_dne_bound > 0) {
     if (osdmap->get_epoch() >= op->map_dne_bound) {
-      LingerOp::unique_lock wl{op->watch_lock};
+      std::unique_lock wl{op->watch_lock};
       if (op->on_reg_commit) {
-       op->on_reg_commit->complete(-ENOENT);
+       op->on_reg_commit->defer(std::move(op->on_reg_commit),
+                                osdc_errc::pool_dne, cb::list{});
        op->on_reg_commit = nullptr;
       }
       if (op->on_notify_finish) {
-        op->on_notify_finish->complete(-ENOENT);
+       op->on_notify_finish->defer(std::move(op->on_notify_finish),
+                                   osdc_errc::pool_dne, cb::list{});
         op->on_notify_finish = nullptr;
       }
       *need_unregister = true;
@@ -1688,8 +1661,7 @@ void Objecter::_linger_cancel_map_check(LingerOp *op)
 {
   // rwlock is locked unique
 
-  map<uint64_t, LingerOp*>::iterator iter =
-    check_latest_map_lingers.find(op->linger_id);
+  auto iter = check_latest_map_lingers.find(op->linger_id);
   if (iter != check_latest_map_lingers.end()) {
     LingerOp *op = iter->second;
     op->put();
@@ -1699,30 +1671,29 @@ void Objecter::_linger_cancel_map_check(LingerOp *op)
 
 // command pool check
 
-void Objecter::CB_Command_Map_Latest::operator()(boost::system::error_code e,
+void Objecter::CB_Command_Map_Latest::operator()(bs::error_code e,
                                                 version_t latest, version_t)
 {
-  if (e == boost::system::errc::resource_unavailable_try_again ||
-      e == boost::system::errc::operation_canceled) {
+  if (e == bs::errc::resource_unavailable_try_again ||
+      e == bs::errc::operation_canceled) {
     // ignore callback; we will retry in resend_mon_ops()
     return;
   }
 
   unique_lock wl(objecter->rwlock);
 
-  map<uint64_t, CommandOp*>::iterator iter =
-    objecter->check_latest_map_commands.find(tid);
+  auto iter = objecter->check_latest_map_commands.find(tid);
   if (iter == objecter->check_latest_map_commands.end()) {
     return;
   }
 
-  CommandOp *c = iter->second;
+  auto c = iter->second;
   objecter->check_latest_map_commands.erase(iter);
 
   if (c->map_dne_bound == 0)
     c->map_dne_bound = latest;
 
-  OSDSession::unique_lock sul(c->session->lock);
+  unique_lock sul(c->session->lock);
   objecter->_check_command_map_dne(c);
   sul.unlock();
 
@@ -1740,7 +1711,8 @@ void Objecter::_check_command_map_dne(CommandOp *c)
                 << dendl;
   if (c->map_dne_bound > 0) {
     if (osdmap->get_epoch() >= c->map_dne_bound) {
-      _finish_command(c, c->map_check_error, c->map_check_error_str);
+      _finish_command(c, osdcode(c->map_check_error),
+                     std::move(c->map_check_error_str), {});
     }
   } else {
     _send_command_map_check(c);
@@ -1764,10 +1736,9 @@ void Objecter::_command_cancel_map_check(CommandOp *c)
 {
   // rwlock is locked uniqe
 
-  map<uint64_t, CommandOp*>::iterator iter =
-    check_latest_map_commands.find(c->tid);
+  auto iter = check_latest_map_commands.find(c->tid);
   if (iter != check_latest_map_commands.end()) {
-    CommandOp *c = iter->second;
+    auto c = iter->second;
     c->put();
     check_latest_map_commands.erase(iter);
   }
@@ -1780,7 +1751,8 @@ void Objecter::_command_cancel_map_check(CommandOp *c)
  * @returns 0 on success, or -EAGAIN if the lock context requires
  * promotion to write.
  */
-int Objecter::_get_session(int osd, OSDSession **session, shunique_lock& sul)
+int Objecter::_get_session(int osd, OSDSession **session,
+                          shunique_lock<ceph::shared_mutex>& sul)
 {
   ceph_assert(sul && sul.mutex() == &rwlock);
 
@@ -1791,9 +1763,9 @@ int Objecter::_get_session(int osd, OSDSession **session, shunique_lock& sul)
     return 0;
   }
 
-  map<int,OSDSession*>::iterator p = osd_sessions.find(osd);
+  auto p = osd_sessions.find(osd);
   if (p != osd_sessions.end()) {
-    OSDSession *s = p->second;
+    auto s = p->second;
     s->get();
     *session = s;
     ldout(cct, 20) << __func__ << " s=" << s << " osd=" << osd << " "
@@ -1803,7 +1775,7 @@ int Objecter::_get_session(int osd, OSDSession **session, shunique_lock& sul)
   if (!sul.owns_lock()) {
     return -EAGAIN;
   }
-  OSDSession *s = new OSDSession(cct, osd);
+  auto s = new OSDSession(cct, osd);
   osd_sessions[osd] = s;
   s->con = messenger->connect_to_osd(osdmap->get_addrs(osd));
   s->con->set_priv(RefCountedPtr{s});
@@ -1865,28 +1837,28 @@ void Objecter::close_session(OSDSession *s)
     s->con->mark_down();
     logger->inc(l_osdc_osd_session_close);
   }
-  OSDSession::unique_lock sl(s->lock);
+  unique_lock sl(s->lock);
 
   std::list<LingerOp*> homeless_lingers;
   std::list<CommandOp*> homeless_commands;
   std::list<Op*> homeless_ops;
 
   while (!s->linger_ops.empty()) {
-    std::map<uint64_t, LingerOp*>::iterator i = s->linger_ops.begin();
+    auto i = s->linger_ops.begin();
     ldout(cct, 10) << " linger_op " << i->first << dendl;
     homeless_lingers.push_back(i->second);
     _session_linger_op_remove(s, i->second);
   }
 
   while (!s->ops.empty()) {
-    std::map<ceph_tid_t, Op*>::iterator i = s->ops.begin();
+    auto i = s->ops.begin();
     ldout(cct, 10) << " op " << i->first << dendl;
     homeless_ops.push_back(i->second);
     _session_op_remove(s, i->second);
   }
 
   while (!s->command_ops.empty()) {
-    std::map<ceph_tid_t, CommandOp*>::iterator i = s->command_ops.begin();
+    auto i = s->command_ops.begin();
     ldout(cct, 10) << " command_op " << i->first << dendl;
     homeless_commands.push_back(i->second);
     _session_command_op_remove(s, i->second);
@@ -1898,16 +1870,16 @@ void Objecter::close_session(OSDSession *s)
 
   // Assign any leftover ops to the homeless session
   {
-    OSDSession::unique_lock hsl(homeless_session->lock);
-    for (std::list<LingerOp*>::iterator i = homeless_lingers.begin();
+    unique_lock hsl(homeless_session->lock);
+    for (auto i = homeless_lingers.begin();
         i != homeless_lingers.end(); ++i) {
       _session_linger_op_assign(homeless_session, *i);
     }
-    for (std::list<Op*>::iterator i = homeless_ops.begin();
+    for (auto i = homeless_ops.begin();
         i != homeless_ops.end(); ++i) {
       _session_op_assign(homeless_session, *i);
     }
-    for (std::list<CommandOp*>::iterator i = homeless_commands.begin();
+    for (auto i = homeless_commands.begin();
         i != homeless_commands.end(); ++i) {
       _session_command_op_assign(homeless_session, *i);
     }
@@ -1924,53 +1896,29 @@ void Objecter::wait_for_osd_map(epoch_t e)
     return;
   }
 
-  // Leave this since it goes with C_SafeCond
-  ceph::mutex lock = ceph::make_mutex("");
-  ceph::condition_variable cond;
-  bool done;
-  std::unique_lock mlock{lock};
-  C_SafeCond *context = new C_SafeCond(lock, cond, &done, NULL);
-  waiting_for_map[e].push_back(pair<Context*, int>(context, 0));
+  ca::waiter<bs::error_code> w;
+  waiting_for_map[e].emplace_back(OpCompletion::create(
+                                   service.get_executor(),
+                                   w.ref()),
+                                 bs::error_code{});
   l.unlock();
-  cond.wait(mlock, [&done] { return done; });
+  w.wait();
 }
 
-struct CB_Objecter_GetVersion {
-  Objecter *objecter;
-  Context *fin;
-  CB_Objecter_GetVersion(Objecter *o, Context *c) : objecter(o), fin(c) {}
-  void operator()(boost::system::error_code e, version_t newest, version_t oldest) {
-    if (!e) {
-      objecter->get_latest_version(oldest, newest, fin);
-    } else if (e == boost::system::errc::resource_unavailable_try_again) {
-      // try again as instructed
-      objecter->wait_for_latest_osdmap(fin);
-    } else {
-      // it doesn't return any other error codes!
-      ceph_abort();
-    }
-  }
-};
-
-void Objecter::wait_for_latest_osdmap(Context *fin)
-{
-  ldout(cct, 10) << __func__ << dendl;
-  monc->get_version("osdmap", CB_Objecter_GetVersion(this, fin));
-}
-
-void Objecter::get_latest_version(epoch_t oldest, epoch_t newest, Context *fin)
+void Objecter::_get_latest_version(epoch_t oldest, epoch_t newest,
+                                  std::unique_ptr<OpCompletion> fin,
+                                  std::unique_lock<ceph::shared_mutex>&& l)
 {
-  unique_lock wl(rwlock);
+  ceph_assert(fin);
   if (osdmap->get_epoch() >= newest) {
     ldout(cct, 10) << __func__ << " latest " << newest << ", have it" << dendl;
-    wl.unlock();
-    if (fin)
-      fin->complete(0);
-    return;
+    l.unlock();
+    ca::defer(std::move(fin), bs::error_code{});
+  } else {
+    ldout(cct, 10) << __func__ << " latest " << newest << ", waiting" << dendl;
+    _wait_for_new_map(std::move(fin), newest, bs::error_code{});
+    l.unlock();
   }
-
-  ldout(cct, 10) << __func__ << " latest " << newest << ", waiting" << dendl;
-  _wait_for_new_map(fin, newest, 0);
 }
 
 void Objecter::maybe_request_map()
@@ -1999,10 +1947,11 @@ void Objecter::_maybe_request_map()
   }
 }
 
-void Objecter::_wait_for_new_map(Context *c, epoch_t epoch, int err)
+void Objecter::_wait_for_new_map(std::unique_ptr<OpCompletion> c, epoch_t epoch,
+                                bs::error_code ec)
 {
   // rwlock is locked unique
-  waiting_for_map[epoch].push_back(pair<Context *, int>(c, err));
+  waiting_for_map[epoch].emplace_back(std::move(c), ec);
   _maybe_request_map();
 }
 
@@ -2027,16 +1976,6 @@ bool Objecter::have_map(const epoch_t epoch)
   }
 }
 
-bool Objecter::wait_for_map(epoch_t epoch, Context *c, int err)
-{
-  unique_lock wl(rwlock);
-  if (osdmap->get_epoch() >= epoch) {
-    return true;
-  }
-  _wait_for_new_map(c, epoch, err);
-  return false;
-}
-
 void Objecter::_kick_requests(OSDSession *session,
                              map<uint64_t, LingerOp *>& lresend)
 {
@@ -2048,8 +1987,7 @@ void Objecter::_kick_requests(OSDSession *session,
 
   // resend ops
   map<ceph_tid_t,Op*> resend;  // resend in tid order
-  for (map<ceph_tid_t, Op*>::iterator p = session->ops.begin();
-       p != session->ops.end();) {
+  for (auto p = session->ops.begin(); p != session->ops.end();) {
     Op *op = p->second;
     ++p;
     if (op->should_resend) {
@@ -2069,7 +2007,7 @@ void Objecter::_kick_requests(OSDSession *session,
 
   // resend lingers
   logger->inc(l_osdc_linger_resend, session->linger_ops.size());
-  for (map<ceph_tid_t, LingerOp*>::iterator j = session->linger_ops.begin();
+  for (auto j = session->linger_ops.begin();
        j != session->linger_ops.end(); ++j) {
     LingerOp *op = j->second;
     op->get();
@@ -2080,7 +2018,7 @@ void Objecter::_kick_requests(OSDSession *session,
   // resend commands
   logger->inc(l_osdc_command_resend, session->command_ops.size());
   map<uint64_t,CommandOp*> cresend;  // resend in order
-  for (map<ceph_tid_t, CommandOp*>::iterator k = session->command_ops.begin();
+  for (auto k = session->command_ops.begin();
        k != session->command_ops.end(); ++k) {
     cresend[k->first] = k->second;
   }
@@ -2091,7 +2029,7 @@ void Objecter::_kick_requests(OSDSession *session,
 }
 
 void Objecter::_linger_ops_resend(map<uint64_t, LingerOp *>& lresend,
-                                 unique_lock& ul)
+                                 unique_lock<ceph::shared_mutex>& ul)
 {
   ceph_assert(ul.owns_lock());
   shunique_lock sul(std::move(ul));
@@ -2138,15 +2076,13 @@ void Objecter::tick()
 
   unsigned laggy_ops = 0;
 
-  for (map<int,OSDSession*>::iterator siter = osd_sessions.begin();
+  for (auto siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
-    OSDSession *s = siter->second;
-    OSDSession::lock_guard l(s->lock);
+    auto s = siter->second;
+    scoped_lock l(s->lock);
     bool found = false;
-    for (map<ceph_tid_t,Op*>::iterator p = s->ops.begin();
-       p != s->ops.end();
-       ++p) {
-      Op *op = p->second;
+    for (auto p = s->ops.begin(); p != s->ops.end(); ++p) {
+      auto op = p->second;
       ceph_assert(op->session);
       if (op->stamp < cutoff) {
        ldout(cct, 2) << " tid " << p->first << " on osd." << op->session->osd
@@ -2155,11 +2091,11 @@ void Objecter::tick()
        ++laggy_ops;
       }
     }
-    for (map<uint64_t,LingerOp*>::iterator p = s->linger_ops.begin();
+    for (auto p = s->linger_ops.begin();
        p != s->linger_ops.end();
        ++p) {
-      LingerOp *op = p->second;
-      LingerOp::unique_lock wl(op->watch_lock);
+      auto op = p->second;
+      std::unique_lock wl(op->watch_lock);
       ceph_assert(op->session);
       ldout(cct, 10) << " pinging osd that serves lingering tid " << p->first
                     << " (osd." << op->session->osd << ")" << dendl;
@@ -2167,10 +2103,10 @@ void Objecter::tick()
       if (op->is_watch && op->registered && !op->last_error)
        _send_linger_ping(op);
     }
-    for (map<uint64_t,CommandOp*>::iterator p = s->command_ops.begin();
+    for (auto p = s->command_ops.begin();
        p != s->command_ops.end();
        ++p) {
-      CommandOp *op = p->second;
+      auto op = p->second;
       ceph_assert(op->session);
       ldout(cct, 10) << " pinging osd that serves command tid " << p->first
                     << " (osd." << op->session->osd << ")" << dendl;
@@ -2189,9 +2125,7 @@ void Objecter::tick()
   if (!toping.empty()) {
     // send a ping to these osds, to ensure we detect any session resets
     // (osd reply message policy is lossy)
-    for (set<OSDSession*>::const_iterator i = toping.begin();
-        i != toping.end();
-        ++i) {
+    for (auto i = toping.begin(); i != toping.end(); ++i) {
       (*i)->con->send_message(new MPing);
     }
   }
@@ -2209,41 +2143,34 @@ void Objecter::resend_mon_ops()
 
   ldout(cct, 10) << "resend_mon_ops" << dendl;
 
-  for (map<ceph_tid_t,PoolStatOp*>::iterator p = poolstat_ops.begin();
-       p != poolstat_ops.end();
-       ++p) {
+  for (auto p = poolstat_ops.begin(); p != poolstat_ops.end(); ++p) {
     _poolstat_submit(p->second);
     logger->inc(l_osdc_poolstat_resend);
   }
 
-  for (map<ceph_tid_t,StatfsOp*>::iterator p = statfs_ops.begin();
-       p != statfs_ops.end();
-       ++p) {
+  for (auto p = statfs_ops.begin(); p != statfs_ops.end(); ++p) {
     _fs_stats_submit(p->second);
     logger->inc(l_osdc_statfs_resend);
   }
 
-  for (map<ceph_tid_t,PoolOp*>::iterator p = pool_ops.begin();
-       p != pool_ops.end();
-       ++p) {
+  for (auto p = pool_ops.begin(); p != pool_ops.end(); ++p) {
     _pool_op_submit(p->second);
     logger->inc(l_osdc_poolop_resend);
   }
 
-  for (map<ceph_tid_t, Op*>::iterator p = check_latest_map_ops.begin();
+  for (auto p = check_latest_map_ops.begin();
        p != check_latest_map_ops.end();
        ++p) {
     monc->get_version("osdmap", CB_Op_Map_Latest(this, p->second->tid));
   }
 
-  for (map<uint64_t, LingerOp*>::iterator p = check_latest_map_lingers.begin();
+  for (auto p = check_latest_map_lingers.begin();
        p != check_latest_map_lingers.end();
        ++p) {
     monc->get_version("osdmap", CB_Linger_Map_Latest(this, p->second->linger_id));
   }
 
-  for (map<uint64_t, CommandOp*>::iterator p
-        = check_latest_map_commands.begin();
+  for (auto p = check_latest_map_commands.begin();
        p != check_latest_map_commands.end();
        ++p) {
     monc->get_version("osdmap", CB_Command_Map_Latest(this, p->second->tid));
@@ -2262,7 +2189,8 @@ void Objecter::op_submit(Op *op, ceph_tid_t *ptid, int *ctx_budget)
   _op_submit_with_budget(op, rl, ptid, ctx_budget);
 }
 
-void Objecter::_op_submit_with_budget(Op *op, shunique_lock& sul,
+void Objecter::_op_submit_with_budget(Op *op,
+                                     shunique_lock<ceph::shared_mutex>& sul,
                                      ceph_tid_t *ptid,
                                      int *ctx_budget)
 {
@@ -2300,7 +2228,7 @@ void Objecter::_send_op_account(Op *op)
   inflight_ops++;
 
   // add to gather set(s)
-  if (op->onfinish) {
+  if (op->has_completion()) {
     num_in_flight++;
   } else {
     ldout(cct, 20) << " note: not requesting reply" << dendl;
@@ -2320,7 +2248,7 @@ void Objecter::_send_op_account(Op *op)
   if (op->target.flags & CEPH_OSD_FLAG_PGOP)
     logger->inc(l_osdc_op_pg);
 
-  for (vector<OSDOp>::iterator p = op->ops.begin(); p != op->ops.end(); ++p) {
+  for (auto p = op->ops.begin(); p != op->ops.end(); ++p) {
     int code = l_osdc_osdop_other;
     switch (p->op.op) {
     case CEPH_OSD_OP_STAT: code = l_osdc_osdop_stat; break;
@@ -2365,7 +2293,7 @@ void Objecter::_send_op_account(Op *op)
   }
 }
 
-void Objecter::_op_submit(Op *op, shunique_lock& sul, ceph_tid_t *ptid)
+void Objecter::_op_submit(Op *op, shunique_lock<ceph::shared_mutex>& sul, ceph_tid_t *ptid)
 {
   // rwlock is locked
 
@@ -2430,7 +2358,7 @@ void Objecter::_op_submit(Op *op, shunique_lock& sul, ceph_tid_t *ptid)
     _maybe_request_map();
   }
 
-  OSDSession::unique_lock sl(s->lock);
+  unique_lock sl(s->lock);
   if (op->tid == 0)
     op->tid = ++last_tid;
 
@@ -2466,9 +2394,9 @@ int Objecter::op_cancel(OSDSession *s, ceph_tid_t tid, int r)
 {
   ceph_assert(initialized);
 
-  OSDSession::unique_lock sl(s->lock);
+  unique_lock sl(s->lock);
 
-  map<ceph_tid_t, Op*>::iterator p = s->ops.find(tid);
+  auto p = s->ops.find(tid);
   if (p == s->ops.end()) {
     ldout(cct, 10) << __func__ << " tid " << tid << " dne in session "
                   << s->osd << dendl;
@@ -2486,10 +2414,9 @@ int Objecter::op_cancel(OSDSession *s, ceph_tid_t tid, int r)
   ldout(cct, 10) << __func__ << " tid " << tid << " in session " << s->osd
                 << dendl;
   Op *op = p->second;
-  if (op->onfinish) {
+  if (op->has_completion()) {
     num_in_flight--;
-    op->onfinish->complete(r);
-    op->onfinish = NULL;
+    op->complete(osdcode(r), r);
   }
   _op_cancel_map_check(op);
   _finish_op(op, r);
@@ -2527,10 +2454,10 @@ int Objecter::_op_cancel(ceph_tid_t tid, int r)
 
 start:
 
-  for (map<int, OSDSession *>::iterator siter = osd_sessions.begin();
+  for (auto siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
     OSDSession *s = siter->second;
-    OSDSession::shared_lock sl(s->lock);
+    shared_lock sl(s->lock);
     if (s->ops.find(tid) != s->ops.end()) {
       sl.unlock();
       ret = op_cancel(s, tid, r);
@@ -2546,7 +2473,7 @@ start:
                << " not found in live sessions" << dendl;
 
   // Handle case where the op is in homeless session
-  OSDSession::shared_lock sl(homeless_session->lock);
+  shared_lock sl(homeless_session->lock);
   if (homeless_session->ops.find(tid) != homeless_session->ops.end()) {
     sl.unlock();
     ret = op_cancel(homeless_session, tid, r);
@@ -2574,11 +2501,11 @@ epoch_t Objecter::op_cancel_writes(int r, int64_t pool)
   std::vector<ceph_tid_t> to_cancel;
   bool found = false;
 
-  for (map<int, OSDSession *>::iterator siter = osd_sessions.begin();
+  for (auto siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
     OSDSession *s = siter->second;
-    OSDSession::shared_lock sl(s->lock);
-    for (map<ceph_tid_t, Op*>::iterator op_i = s->ops.begin();
+    shared_lock sl(s->lock);
+    for (auto op_i = s->ops.begin();
         op_i != s->ops.end(); ++op_i) {
       if (op_i->second->target.flags & CEPH_OSD_FLAG_WRITE
          && (pool == -1 || op_i->second->target.target_oloc.pool == pool)) {
@@ -2587,9 +2514,7 @@ epoch_t Objecter::op_cancel_writes(int r, int64_t pool)
     }
     sl.unlock();
 
-    for (std::vector<ceph_tid_t>::iterator titer = to_cancel.begin();
-        titer != to_cancel.end();
-        ++titer) {
+    for (auto titer = to_cancel.begin(); titer != to_cancel.end(); ++titer) {
       int cancel_result = op_cancel(s, *titer, r);
       // We hold rwlock across search and cancellation, so cancels
       // should always succeed
@@ -2675,8 +2600,7 @@ bool Objecter::_osdmap_pool_full(const int64_t pool_id) const
 
 bool Objecter::_osdmap_has_pool_full() const
 {
-  for (map<int64_t, pg_pool_t>::const_iterator it
-        = osdmap->get_pools().begin();
+  for (auto it = osdmap->get_pools().begin();
        it != osdmap->get_pools().end(); ++it) {
     if (_osdmap_pool_full(it->second))
       return true;
@@ -2981,7 +2905,7 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change)
 }
 
 int Objecter::_map_session(op_target_t *target, OSDSession **s,
-                          shunique_lock& sul)
+                          shunique_lock<ceph::shared_mutex>& sul)
 {
   _calc_target(target, nullptr);
   return _get_session(target->osd, s, sul);
@@ -3088,7 +3012,7 @@ void Objecter::_session_command_op_assign(OSDSession *to, CommandOp *op)
 }
 
 int Objecter::_recalc_linger_op_target(LingerOp *linger_op,
-                                      shunique_lock& sul)
+                                      shunique_lock<ceph::shared_mutex>& sul)
 {
   // rwlock is locked unique
 
@@ -3107,7 +3031,7 @@ int Objecter::_recalc_linger_op_target(LingerOp *linger_op,
       // same time here is only safe because we are the only one that
       // takes two, and we are holding rwlock for write.  Disable
       // lockdep because it doesn't know that.
-      OSDSession::unique_lock sl(s->lock);
+      unique_lock sl(s->lock);
       _session_linger_op_remove(linger_op->session, linger_op);
       _session_linger_op_assign(s, linger_op);
     }
@@ -3123,8 +3047,8 @@ void Objecter::_cancel_linger_op(Op *op)
   ldout(cct, 15) << "cancel_op " << op->tid << dendl;
 
   ceph_assert(!op->should_resend);
-  if (op->onfinish) {
-    delete op->onfinish;
+  if (op->has_completion()) {
+    op->onfinish = nullptr;
     num_in_flight--;
   }
 
@@ -3176,10 +3100,10 @@ MOSDOp *Objecter::_prepare_osd_op(Op *op)
   op->stamp = ceph::coarse_mono_clock::now();
 
   hobject_t hobj = op->target.get_hobj();
-  MOSDOp *m = new MOSDOp(client_inc, op->tid,
-                        hobj, op->target.actual_pgid,
-                        osdmap->get_epoch(),
-                        flags, op->features);
+  auto m = new MOSDOp(client_inc, op->tid,
+                     hobj, op->target.actual_pgid,
+                     osdmap->get_epoch(),
+                     flags, op->features);
 
   m->set_snapid(op->snapid);
   m->set_snap_seq(op->snapc.seq);
@@ -3306,7 +3230,7 @@ int Objecter::calc_op_budget(const vector<OSDOp>& ops)
 }
 
 void Objecter::_throttle_op(Op *op,
-                           shunique_lock& sul,
+                           shunique_lock<ceph::shared_mutex>& sul,
                            int op_budget)
 {
   ceph_assert(sul && sul.mutex() == &rwlock);
@@ -3360,7 +3284,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
     return;
   }
 
-  OSDSession::unique_lock sl(s->lock);
+  unique_lock sl(s->lock);
 
   map<ceph_tid_t, Op *>::iterator iter = s->ops.find(tid);
   if (iter == s->ops.end()) {
@@ -3386,7 +3310,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
   if (retry_writes_after_first_reply && op->attempts == 1 &&
       (op->target.flags & CEPH_OSD_FLAG_WRITE)) {
     ldout(cct, 7) << "retrying write after first reply: " << tid << dendl;
-    if (op->onfinish) {
+    if (op->has_completion()) {
       num_in_flight--;
     }
     _session_op_remove(s, op);
@@ -3414,13 +3338,13 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
     // have, but that is better than doing callbacks out of order.
   }
 
-  Context *onfinish = 0;
+  decltype(op->onfinish) onfinish;
 
   int rc = m->get_result();
 
   if (m->is_redirect_reply()) {
     ldout(cct, 5) << " got redirect reply; redirecting" << dendl;
-    if (op->onfinish)
+    if (op->has_completion())
       num_in_flight--;
     _session_op_remove(s, op);
     sl.unlock();
@@ -3440,7 +3364,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
 
   if (rc == -EAGAIN) {
     ldout(cct, 7) << " got -EAGAIN, resubmitting" << dendl;
-    if (op->onfinish)
+    if (op->has_completion())
       num_in_flight--;
     _session_op_remove(s, op);
     sl.unlock();
@@ -3478,7 +3402,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
       ldout(cct,10) << __func__ << " copying resulting " << bl.length()
                    << " into existing ceph::buffer of length " << op->outbl->length()
                    << dendl;
-      ceph::buffer::list t;
+      cb::list t;
       t.claim(*op->outbl);
       t.invalidate_crc();  // we're overwriting the raw buffers via c_str()
       bl.begin().copy(bl.length(), t.c_str());
@@ -3498,15 +3422,20 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
                  << " != request ops " << op->ops
                  << " from " << m->get_source_inst() << dendl;
 
-  vector<ceph::buffer::list*>::iterator pb = op->out_bl.begin();
-  vector<int*>::iterator pr = op->out_rval.begin();
-  vector<Context*>::iterator ph = op->out_handler.begin();
+  ceph_assert(op->ops.size() == op->out_bl.size());
+  ceph_assert(op->ops.size() == op->out_rval.size());
+  ceph_assert(op->ops.size() == op->out_ec.size());
+  ceph_assert(op->ops.size() == op->out_handler.size());
+  auto pb = op->out_bl.begin();
+  auto pr = op->out_rval.begin();
+  auto pe = op->out_ec.begin();
+  auto ph = op->out_handler.begin();
   ceph_assert(op->out_bl.size() == op->out_rval.size());
   ceph_assert(op->out_bl.size() == op->out_handler.size());
-  vector<OSDOp>::iterator p = out_ops.begin();
+  auto p = out_ops.begin();
   for (unsigned i = 0;
        p != out_ops.end() && pb != op->out_bl.end();
-       ++i, ++p, ++pb, ++pr, ++ph) {
+       ++i, ++p, ++pb, ++pr, ++pe, ++ph) {
     ldout(cct, 10) << " op " << i << " rval " << p->rval
                   << " len " << p->outdata.length() << dendl;
     if (*pb)
@@ -3515,20 +3444,24 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
     // can change it if e.g. decoding fails
     if (*pr)
       **pr = ceph_to_hostos_errno(p->rval);
+    if (*pe)
+      **pe = p->rval < 0 ? bs::error_code(-p->rval, osd_category()) :
+       bs::error_code();
     if (*ph) {
-      ldout(cct, 10) << " op " << i << " handler " << *ph << dendl;
-      (*ph)->complete(ceph_to_hostos_errno(p->rval));
-      *ph = NULL;
+      std::move((*ph))(p->rval < 0 ?
+                      bs::error_code(-p->rval, osd_category()) :
+                      bs::error_code(),
+                      p->rval, p->outdata);
     }
   }
 
   // NOTE: we assume that since we only request ONDISK ever we will
   // only ever get back one (type of) ack ever.
 
-  if (op->onfinish) {
+  if (op->has_completion()) {
     num_in_flight--;
-    onfinish = op->onfinish;
-    op->onfinish = NULL;
+    onfinish = std::move(op->onfinish);
+    op->onfinish = nullptr;
   }
   logger->inc(l_osdc_op_reply);
 
@@ -3547,8 +3480,8 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
   sl.unlock();
 
   // do callbacks
-  if (onfinish) {
-    onfinish->complete(rc);
+  if (Op::has_completion(onfinish)) {
+    Op::complete(std::move(onfinish), osdcode(rc), rc);
   }
   if (completion_lock.mutex()) {
     completion_lock.unlock();
@@ -3577,7 +3510,7 @@ void Objecter::handle_osd_backoff(MOSDBackoff *m)
 
   get_session(s);
 
-  OSDSession::unique_lock sl(s->lock);
+  unique_lock sl(s->lock);
 
   switch (m->op) {
   case CEPH_OSD_BACKOFF_OP_BLOCK:
@@ -3592,10 +3525,9 @@ void Objecter::handle_osd_backoff(MOSDBackoff *m)
 
       // ack with original backoff's epoch so that the osd can discard this if
       // there was a pg split.
-      Message *r = new MOSDBackoff(m->pgid,
-                                  m->map_epoch,
-                                  CEPH_OSD_BACKOFF_OP_ACK_BLOCK,
-                                  m->id, m->begin, m->end);
+      auto r = new MOSDBackoff(m->pgid, m->map_epoch,
+                              CEPH_OSD_BACKOFF_OP_ACK_BLOCK,
+                              m->id, m->begin, m->end);
       // this priority must match the MOSDOps from _prepare_osd_op
       r->set_priority(cct->_conf->osd_client_op_priority);
       con->send_message(r);
@@ -3758,7 +3690,7 @@ void Objecter::list_nobjects(NListContext *list_context, Context *onfinish)
   op.pg_nls(list_context->max_entries, list_context->filter,
            list_context->pos, osdmap->get_epoch());
   list_context->bl.clear();
-  C_NList *onack = new C_NList(list_context, onfinish, this);
+  auto onack = new C_NList(list_context, onfinish, this);
   object_locator_t oloc(list_context->pool_id, list_context->nspace);
 
   // note current_pg in case we don't have (or lose) SORTBITWISE
@@ -3780,7 +3712,7 @@ void Objecter::_nlist_reply(NListContext *list_context, int r,
   decode(response, iter);
   if (!iter.end()) {
     // we do this as legacy.
-    ceph::buffer::list legacy_extra_info;
+    cb::list legacy_extra_info;
     decode(legacy_extra_info, iter);
   }
 
@@ -3809,7 +3741,9 @@ void Objecter::_nlist_reply(NListContext *list_context, int r,
                 << ", handle " << response.handle
                 << ", tentative new pos " << list_context->pos << dendl;
   if (response_size) {
-    list_context->list.splice(list_context->list.end(), response.entries);
+    std::move(response.entries.begin(), response.entries.end(),
+             std::back_inserter(list_context->list));
+    response.entries.clear();
   }
 
   if (list_context->list.size() >= list_context->max_entries) {
@@ -3838,176 +3772,177 @@ void Objecter::put_nlist_context_budget(NListContext *list_context)
 
 // snapshots
 
-int Objecter::create_pool_snap(int64_t pool, string& snap_name,
-                              Context *onfinish)
+void Objecter::create_pool_snap(int64_t pool, std::string_view snap_name,
+                               decltype(PoolOp::onfinish)&& onfinish)
 {
   unique_lock wl(rwlock);
   ldout(cct, 10) << "create_pool_snap; pool: " << pool << "; snap: "
                 << snap_name << dendl;
 
   const pg_pool_t *p = osdmap->get_pg_pool(pool);
-  if (!p)
-    return -EINVAL;
-  if (p->snap_exists(snap_name.c_str()))
-    return -EEXIST;
+  if (!p) {
+    onfinish->defer(std::move(onfinish), osdc_errc::pool_dne, cb::list{});
+    return;
+  }
+  if (p->snap_exists(snap_name)) {
+    onfinish->defer(std::move(onfinish), osdc_errc::snapshot_exists,
+                   cb::list{});
+    return;
+  }
 
-  PoolOp *op = new PoolOp;
-  if (!op)
-    return -ENOMEM;
+  auto op = new PoolOp;
   op->tid = ++last_tid;
   op->pool = pool;
   op->name = snap_name;
-  op->onfinish = onfinish;
+  op->onfinish = std::move(onfinish);
   op->pool_op = POOL_OP_CREATE_SNAP;
   pool_ops[op->tid] = op;
 
   pool_op_submit(op);
-
-  return 0;
 }
 
-struct C_SelfmanagedSnap : public Context {
-  ceph::buffer::list bl;
-  snapid_t *psnapid;
-  Context *fin;
-  C_SelfmanagedSnap(snapid_t *ps, Context *f) : psnapid(ps), fin(f) {}
-  void finish(int r) override {
-    if (r == 0) {
+struct CB_SelfmanagedSnap {
+  std::unique_ptr<ca::Completion<void(bs::error_code, snapid_t)>> fin;
+  CB_SelfmanagedSnap(decltype(fin)&& fin)
+    : fin(std::move(fin)) {}
+  void operator()(bs::error_code ec, const cb::list& bl) {
+    snapid_t snapid = 0;
+    if (!ec) {
       try {
-        auto p = bl.cbegin();
-        decode(*psnapid, p);
-      } catch (ceph::buffer::error&) {
-        r = -EIO;
+       auto p = bl.cbegin();
+       decode(snapid, p);
+      } catch (const cb::error& e) {
+        ec = e.code();
       }
     }
-    fin->complete(r);
+    fin->defer(std::move(fin), ec, snapid);
   }
 };
 
-int Objecter::allocate_selfmanaged_snap(int64_t pool, snapid_t *psnapid,
-                                       Context *onfinish)
+void Objecter::allocate_selfmanaged_snap(
+  int64_t pool,
+  std::unique_ptr<ca::Completion<void(bs::error_code, snapid_t)>> onfinish)
 {
   unique_lock wl(rwlock);
   ldout(cct, 10) << "allocate_selfmanaged_snap; pool: " << pool << dendl;
-  PoolOp *op = new PoolOp;
-  if (!op) return -ENOMEM;
+  auto op = new PoolOp;
   op->tid = ++last_tid;
   op->pool = pool;
-  C_SelfmanagedSnap *fin = new C_SelfmanagedSnap(psnapid, onfinish);
-  op->onfinish = fin;
-  op->blp = &fin->bl;
+  op->onfinish = PoolOp::OpComp::create(
+    service.get_executor(),
+    CB_SelfmanagedSnap(std::move(onfinish)));
   op->pool_op = POOL_OP_CREATE_UNMANAGED_SNAP;
   pool_ops[op->tid] = op;
 
   pool_op_submit(op);
-  return 0;
 }
 
-int Objecter::delete_pool_snap(int64_t pool, string& snap_name,
-                              Context *onfinish)
+void Objecter::delete_pool_snap(
+  int64_t pool, std::string_view snap_name,
+  decltype(PoolOp::onfinish)&& onfinish)
 {
   unique_lock wl(rwlock);
   ldout(cct, 10) << "delete_pool_snap; pool: " << pool << "; snap: "
                 << snap_name << dendl;
 
   const pg_pool_t *p = osdmap->get_pg_pool(pool);
-  if (!p)
-    return -EINVAL;
-  if (!p->snap_exists(snap_name.c_str()))
-    return -ENOENT;
+  if (!p) {
+    onfinish->defer(std::move(onfinish), osdc_errc::pool_dne, cb::list{});
+    return;
+  }
 
-  PoolOp *op = new PoolOp;
-  if (!op)
-    return -ENOMEM;
+  if (!p->snap_exists(snap_name)) {
+    onfinish->defer(std::move(onfinish), osdc_errc::snapshot_dne, cb::list{});
+    return;
+  }
+
+  auto op = new PoolOp;
   op->tid = ++last_tid;
   op->pool = pool;
   op->name = snap_name;
-  op->onfinish = onfinish;
+  op->onfinish = std::move(onfinish);
   op->pool_op = POOL_OP_DELETE_SNAP;
   pool_ops[op->tid] = op;
 
   pool_op_submit(op);
-
-  return 0;
 }
 
-int Objecter::delete_selfmanaged_snap(int64_t pool, snapid_t snap,
-                                     Context *onfinish)
+void Objecter::delete_selfmanaged_snap(int64_t pool, snapid_t snap,
+                                      decltype(PoolOp::onfinish)&& onfinish)
 {
   unique_lock wl(rwlock);
   ldout(cct, 10) << "delete_selfmanaged_snap; pool: " << pool << "; snap: "
                 << snap << dendl;
-  PoolOp *op = new PoolOp;
-  if (!op) return -ENOMEM;
+  auto op = new PoolOp;
   op->tid = ++last_tid;
   op->pool = pool;
-  op->onfinish = onfinish;
+  op->onfinish = std::move(onfinish);
   op->pool_op = POOL_OP_DELETE_UNMANAGED_SNAP;
   op->snapid = snap;
   pool_ops[op->tid] = op;
 
   pool_op_submit(op);
-
-  return 0;
 }
 
-int Objecter::create_pool(string& name, Context *onfinish,
-                         int crush_rule)
+void Objecter::create_pool(std::string_view name,
+                          decltype(PoolOp::onfinish)&& onfinish,
+                          int crush_rule)
 {
   unique_lock wl(rwlock);
   ldout(cct, 10) << "create_pool name=" << name << dendl;
 
-  if (osdmap->lookup_pg_pool_name(name) >= 0)
-    return -EEXIST;
+  if (osdmap->lookup_pg_pool_name(name) >= 0) {
+    onfinish->defer(std::move(onfinish), osdc_errc::pool_exists, cb::list{});
+    return;
+  }
 
-  PoolOp *op = new PoolOp;
-  if (!op)
-    return -ENOMEM;
+  auto op = new PoolOp;
   op->tid = ++last_tid;
   op->pool = 0;
   op->name = name;
-  op->onfinish = onfinish;
+  op->onfinish = std::move(onfinish);
   op->pool_op = POOL_OP_CREATE;
   pool_ops[op->tid] = op;
   op->crush_rule = crush_rule;
 
   pool_op_submit(op);
-
-  return 0;
 }
 
-int Objecter::delete_pool(int64_t pool, Context *onfinish)
+void Objecter::delete_pool(int64_t pool,
+                          decltype(PoolOp::onfinish)&& onfinish)
 {
   unique_lock wl(rwlock);
   ldout(cct, 10) << "delete_pool " << pool << dendl;
 
   if (!osdmap->have_pg_pool(pool))
-    return -ENOENT;
-
-  _do_delete_pool(pool, onfinish);
-  return 0;
+    onfinish->defer(std::move(onfinish), osdc_errc::pool_dne, cb::list{});
+  else
+    _do_delete_pool(pool, std::move(onfinish));
 }
 
-int Objecter::delete_pool(const string &pool_name, Context *onfinish)
+void Objecter::delete_pool(std::string_view pool_name,
+                          decltype(PoolOp::onfinish)&& onfinish)
 {
   unique_lock wl(rwlock);
   ldout(cct, 10) << "delete_pool " << pool_name << dendl;
 
   int64_t pool = osdmap->lookup_pg_pool_name(pool_name);
   if (pool < 0)
-    return pool;
-
-  _do_delete_pool(pool, onfinish);
-  return 0;
+    // This only returns one error: -ENOENT.
+    onfinish->defer(std::move(onfinish), osdc_errc::pool_dne, cb::list{});
+  else
+    _do_delete_pool(pool, std::move(onfinish));
 }
 
-void Objecter::_do_delete_pool(int64_t pool, Context *onfinish)
+void Objecter::_do_delete_pool(int64_t pool,
+                              decltype(PoolOp::onfinish)&& onfinish)
+
 {
-  PoolOp *op = new PoolOp;
+  auto op = new PoolOp;
   op->tid = ++last_tid;
   op->pool = pool;
   op->name = "delete";
-  op->onfinish = onfinish;
+  op->onfinish = std::move(onfinish);
   op->pool_op = POOL_OP_DELETE;
   pool_ops[op->tid] = op;
   pool_op_submit(op);
@@ -4029,9 +3964,9 @@ void Objecter::_pool_op_submit(PoolOp *op)
   // rwlock is locked unique
 
   ldout(cct, 10) << "pool_op_submit " << op->tid << dendl;
-  MPoolOp *m = new MPoolOp(monc->get_fsid(), op->tid, op->pool,
-                          op->name, op->pool_op,
-                          last_seen_osdmap_version);
+  auto m = new MPoolOp(monc->get_fsid(), op->tid, op->pool,
+                      op->name, op->pool_op,
+                      last_seen_osdmap_version);
   if (op->snapid) m->snapid = op->snapid;
   if (op->crush_rule) m->crush_rule = op->crush_rule;
   monc->send_mon_message(m);
@@ -4042,13 +3977,15 @@ void Objecter::_pool_op_submit(PoolOp *op)
 
 /**
  * Handle a reply to a PoolOp message. Check that we sent the message
- * and give the caller responsibility for the returned ceph::buffer::list.
+ * and give the caller responsibility for the returned cb::list.
  * Then either call the finisher or stash the PoolOp, depending on if we
  * have a new enough map.
  * Lastly, clean up the message and PoolOp.
  */
 void Objecter::handle_pool_op_reply(MPoolOpReply *m)
 {
+  int rc = m->replyCode;
+  auto ec = rc < 0 ? bs::error_code(-rc, mon_category()) : bs::error_code();
   FUNCTRACE(cct);
   shunique_lock sul(rwlock, acquire_shared);
   if (!initialized) {
@@ -4059,13 +3996,13 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m)
 
   ldout(cct, 10) << "handle_pool_op_reply " << *m << dendl;
   ceph_tid_t tid = m->get_tid();
-  map<ceph_tid_t, PoolOp *>::iterator iter = pool_ops.find(tid);
+  auto iter = pool_ops.find(tid);
   if (iter != pool_ops.end()) {
     PoolOp *op = iter->second;
     ldout(cct, 10) << "have request " << tid << " at " << op << " Op: "
                   << ceph_pool_op_name(op->pool_op) << dendl;
-    if (op->blp)
-      op->blp->claim(m->response_data);
+    cb::list bl;
+    bl.claim(m->response_data);
     if (m->version > last_seen_osdmap_version)
       last_seen_osdmap_version = m->version;
     if (osdmap->get_epoch() < m->epoch) {
@@ -4079,19 +4016,27 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m)
       if (osdmap->get_epoch() < m->epoch) {
        ldout(cct, 20) << "waiting for client to reach epoch " << m->epoch
                       << " before calling back" << dendl;
-       _wait_for_new_map(op->onfinish, m->epoch, m->replyCode);
+       _wait_for_new_map(OpCompletion::create(
+                           service.get_executor(),
+                           [o = std::move(op->onfinish),
+                            bl = std::move(bl)](
+                             bs::error_code ec) mutable {
+                             o->defer(std::move(o), ec, bl);
+                           }),
+                         m->epoch,
+                         ec);
       } else {
        // map epoch changed, probably because a MOSDMap message
        // sneaked in. Do caller-specified callback now or else
        // we lose it forever.
        ceph_assert(op->onfinish);
-       op->onfinish->complete(m->replyCode);
+       op->onfinish->defer(std::move(op->onfinish), ec, std::move(bl));
       }
     } else {
       ceph_assert(op->onfinish);
-      op->onfinish->complete(m->replyCode);
+      op->onfinish->defer(std::move(op->onfinish), ec, std::move(bl));
     }
-    op->onfinish = NULL;
+    op->onfinish = nullptr;
     if (!sul.owns_lock()) {
       sul.unlock();
       sul.lock();
@@ -4118,7 +4063,7 @@ int Objecter::pool_op_cancel(ceph_tid_t tid, int r)
 
   unique_lock wl(rwlock);
 
-  map<ceph_tid_t, PoolOp*>::iterator it = pool_ops.find(tid);
+  auto it = pool_ops.find(tid);
   if (it == pool_ops.end()) {
     ldout(cct, 10) << __func__ << " tid " << tid << " dne" << dendl;
     return -ENOENT;
@@ -4128,7 +4073,7 @@ int Objecter::pool_op_cancel(ceph_tid_t tid, int r)
 
   PoolOp *op = it->second;
   if (op->onfinish)
-    op->onfinish->complete(r);
+    op->onfinish->defer(std::move(op->onfinish), osdcode(r), cb::list{});
 
   _finish_pool_op(op, r);
   return 0;
@@ -4149,19 +4094,16 @@ void Objecter::_finish_pool_op(PoolOp *op, int r)
 
 // pool stats
 
-void Objecter::get_pool_stats(list<string>& pools,
-                             map<string,pool_stat_t> *result,
-                             bool *per_pool,
-                             Context *onfinish)
+void Objecter::get_pool_stats(
+  const std::vector<std::string>& pools,
+  decltype(PoolStatOp::onfinish)&& onfinish)
 {
   ldout(cct, 10) << "get_pool_stats " << pools << dendl;
 
-  PoolStatOp *op = new PoolStatOp;
+  auto op = new PoolStatOp;
   op->tid = ++last_tid;
   op->pools = pools;
-  op->pool_stats = result;
-  op->per_pool = per_pool;
-  op->onfinish = onfinish;
+  op->onfinish = std::move(onfinish);
   if (mon_timeout > timespan(0)) {
     op->ontimeout = timer.add_event(mon_timeout,
                                    [this, op]() {
@@ -4202,16 +4144,15 @@ void Objecter::handle_get_pool_stats_reply(MGetPoolStatsReply *m)
     return;
   }
 
-  map<ceph_tid_t, PoolStatOp *>::iterator iter = poolstat_ops.find(tid);
+  auto iter = poolstat_ops.find(tid);
   if (iter != poolstat_ops.end()) {
     PoolStatOp *op = poolstat_ops[tid];
     ldout(cct, 10) << "have request " << tid << " at " << op << dendl;
-    *op->pool_stats = m->pool_stats;
-    *op->per_pool = m->per_pool;
     if (m->version > last_seen_pgmap_version) {
       last_seen_pgmap_version = m->version;
     }
-    op->onfinish->complete(0);
+    op->onfinish->defer(std::move(op->onfinish), bs::error_code{},
+                       std::move(m->pool_stats), m->per_pool);
     _finish_pool_stat_op(op, 0);
   } else {
     ldout(cct, 10) << "unknown request " << tid << dendl;
@@ -4226,7 +4167,7 @@ int Objecter::pool_stat_op_cancel(ceph_tid_t tid, int r)
 
   unique_lock wl(rwlock);
 
-  map<ceph_tid_t, PoolStatOp*>::iterator it = poolstat_ops.find(tid);
+  auto it = poolstat_ops.find(tid);
   if (it == poolstat_ops.end()) {
     ldout(cct, 10) << __func__ << " tid " << tid << " dne" << dendl;
     return -ENOENT;
@@ -4234,9 +4175,10 @@ int Objecter::pool_stat_op_cancel(ceph_tid_t tid, int r)
 
   ldout(cct, 10) << __func__ << " tid " << tid << dendl;
 
-  PoolStatOp *op = it->second;
+  auto op = it->second;
   if (op->onfinish)
-    op->onfinish->complete(r);
+    op->onfinish->defer(std::move(op->onfinish), osdcode(r),
+                       bc::flat_map<std::string, pool_stat_t>{}, false);
   _finish_pool_stat_op(op, r);
   return 0;
 }
@@ -4254,18 +4196,16 @@ void Objecter::_finish_pool_stat_op(PoolStatOp *op, int r)
   delete op;
 }
 
-void Objecter::get_fs_stats(ceph_statfs& result,
-                           boost::optional<int64_t> data_pool,
-                           Context *onfinish)
+void Objecter::get_fs_stats(boost::optional<int64_t> poolid,
+                           decltype(StatfsOp::onfinish)&& onfinish)
 {
   ldout(cct, 10) << "get_fs_stats" << dendl;
   unique_lock l(rwlock);
 
-  StatfsOp *op = new StatfsOp;
+  auto op = new StatfsOp;
   op->tid = ++last_tid;
-  op->stats = &result;
-  op->data_pool = data_pool;
-  op->onfinish = onfinish;
+  op->data_pool = poolid;
+  op->onfinish = std::move(onfinish);
   if (mon_timeout > timespan(0)) {
     op->ontimeout = timer.add_event(mon_timeout,
                                    [this, op]() {
@@ -4308,10 +4248,9 @@ void Objecter::handle_fs_stats_reply(MStatfsReply *m)
   if (statfs_ops.count(tid)) {
     StatfsOp *op = statfs_ops[tid];
     ldout(cct, 10) << "have request " << tid << " at " << op << dendl;
-    *(op->stats) = m->h.st;
     if (m->h.version > last_seen_pgmap_version)
       last_seen_pgmap_version = m->h.version;
-    op->onfinish->complete(0);
+    op->onfinish->defer(std::move(op->onfinish), bs::error_code{}, m->h.st);
     _finish_statfs_op(op, 0);
   } else {
     ldout(cct, 10) << "unknown request " << tid << dendl;
@@ -4326,7 +4265,7 @@ int Objecter::statfs_op_cancel(ceph_tid_t tid, int r)
 
   unique_lock wl(rwlock);
 
-  map<ceph_tid_t, StatfsOp*>::iterator it = statfs_ops.find(tid);
+  auto it = statfs_ops.find(tid);
   if (it == statfs_ops.end()) {
     ldout(cct, 10) << __func__ << " tid " << tid << " dne" << dendl;
     return -ENOENT;
@@ -4334,9 +4273,9 @@ int Objecter::statfs_op_cancel(ceph_tid_t tid, int r)
 
   ldout(cct, 10) << __func__ << " tid " << tid << dendl;
 
-  StatfsOp *op = it->second;
+  auto op = it->second;
   if (op->onfinish)
-    op->onfinish->complete(r);
+    op->onfinish->defer(std::move(op->onfinish), osdcode(r), ceph_statfs{});
   _finish_statfs_op(op, r);
   return 0;
 }
@@ -4357,16 +4296,16 @@ void Objecter::_finish_statfs_op(StatfsOp *op, int r)
 // scatter/gather
 
 void Objecter::_sg_read_finish(vector<ObjectExtent>& extents,
-                              vector<ceph::buffer::list>& resultbl,
-                              ceph::buffer::list *bl, Context *onfinish)
+                              vector<cb::list>& resultbl,
+                              cb::list *bl, Context *onfinish)
 {
   // all done
   ldout(cct, 15) << "_sg_read_finish" << dendl;
 
   if (extents.size() > 1) {
     Striper::StripedReadResult r;
-    vector<ceph::buffer::list>::iterator bit = resultbl.begin();
-    for (vector<ObjectExtent>::iterator eit = extents.begin();
+    auto bit = resultbl.begin();
+    for (auto eit = extents.begin();
         eit != extents.end();
         ++eit, ++bit) {
       r.add_partial_result(cct, *bit, eit->buffer_extents);
@@ -4418,7 +4357,7 @@ bool Objecter::ms_handle_reset(Connection *con)
        return false;
       }
       map<uint64_t, LingerOp *> lresend;
-      OSDSession::unique_lock sl(session->lock);
+      unique_lock sl(session->lock);
       _reopen_session(session);
       _kick_requests(session, lresend);
       sl.unlock();
@@ -4466,9 +4405,7 @@ void Objecter::op_target_t::dump(Formatter *f) const
 
 void Objecter::_dump_active(OSDSession *s)
 {
-  for (map<ceph_tid_t,Op*>::iterator p = s->ops.begin();
-       p != s->ops.end();
-       ++p) {
+  for (auto p = s->ops.begin(); p != s->ops.end(); ++p) {
     Op *op = p->second;
     ldout(cct, 20) << op->tid << "\t" << op->target.pgid
                   << "\tosd." << (op->session ? op->session->osd : -1)
@@ -4481,10 +4418,10 @@ void Objecter::_dump_active()
 {
   ldout(cct, 20) << "dump_active .. " << num_homeless_ops << " homeless"
                 << dendl;
-  for (map<int, OSDSession *>::iterator siter = osd_sessions.begin();
+  for (auto siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
-    OSDSession *s = siter->second;
-    OSDSession::shared_lock sl(s->lock);
+    auto s = siter->second;
+    shared_lock sl(s->lock);
     _dump_active(s);
     sl.unlock();
   }
@@ -4513,9 +4450,7 @@ void Objecter::dump_requests(Formatter *fmt)
 
 void Objecter::_dump_ops(const OSDSession *s, Formatter *fmt)
 {
-  for (map<ceph_tid_t,Op*>::const_iterator p = s->ops.begin();
-       p != s->ops.end();
-       ++p) {
+  for (auto p = s->ops.begin(); p != s->ops.end(); ++p) {
     Op *op = p->second;
     auto age = std::chrono::duration<double>(ceph::coarse_mono_clock::now() - op->stamp);
     fmt->open_object_section("op");
@@ -4529,9 +4464,7 @@ void Objecter::_dump_ops(const OSDSession *s, Formatter *fmt)
     fmt->dump_stream("mtime") << op->mtime;
 
     fmt->open_array_section("osd_ops");
-    for (vector<OSDOp>::const_iterator it = op->ops.begin();
-        it != op->ops.end();
-        ++it) {
+    for (auto it = op->ops.begin(); it != op->ops.end(); ++it) {
       fmt->dump_stream("osd_op") << *it;
     }
     fmt->close_section(); // osd_ops array
@@ -4544,10 +4477,10 @@ void Objecter::dump_ops(Formatter *fmt)
 {
   // Read-lock on Objecter held
   fmt->open_array_section("ops");
-  for (map<int, OSDSession *>::const_iterator siter = osd_sessions.begin();
+  for (auto siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
     OSDSession *s = siter->second;
-    OSDSession::shared_lock sl(s->lock);
+    shared_lock sl(s->lock);
     _dump_ops(s, fmt);
     sl.unlock();
   }
@@ -4557,10 +4490,8 @@ void Objecter::dump_ops(Formatter *fmt)
 
 void Objecter::_dump_linger_ops(const OSDSession *s, Formatter *fmt)
 {
-  for (map<uint64_t, LingerOp*>::const_iterator p = s->linger_ops.begin();
-       p != s->linger_ops.end();
-       ++p) {
-    LingerOp *op = p->second;
+  for (auto p = s->linger_ops.begin(); p != s->linger_ops.end(); ++p) {
+    auto op = p->second;
     fmt->open_object_section("linger_op");
     fmt->dump_unsigned("linger_id", op->linger_id);
     op->target.dump(fmt);
@@ -4574,10 +4505,10 @@ void Objecter::dump_linger_ops(Formatter *fmt)
 {
   // We have a read-lock on the objecter
   fmt->open_array_section("linger_ops");
-  for (map<int, OSDSession *>::const_iterator siter = osd_sessions.begin();
+  for (auto siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
-    OSDSession *s = siter->second;
-    OSDSession::shared_lock sl(s->lock);
+    auto s = siter->second;
+    shared_lock sl(s->lock);
     _dump_linger_ops(s, fmt);
     sl.unlock();
   }
@@ -4587,16 +4518,13 @@ void Objecter::dump_linger_ops(Formatter *fmt)
 
 void Objecter::_dump_command_ops(const OSDSession *s, Formatter *fmt)
 {
-  for (map<uint64_t, CommandOp*>::const_iterator p = s->command_ops.begin();
-       p != s->command_ops.end();
-       ++p) {
-    CommandOp *op = p->second;
+  for (auto p = s->command_ops.begin(); p != s->command_ops.end(); ++p) {
+    auto op = p->second;
     fmt->open_object_section("command_op");
     fmt->dump_unsigned("command_id", op->tid);
     fmt->dump_int("osd", op->session ? op->session->osd : -1);
     fmt->open_array_section("command");
-    for (vector<string>::const_iterator q = op->cmd.begin();
-        q != op->cmd.end(); ++q)
+    for (auto q = op->cmd.begin(); q != op->cmd.end(); ++q)
       fmt->dump_string("word", *q);
     fmt->close_section();
     if (op->target_osd >= 0)
@@ -4611,10 +4539,10 @@ void Objecter::dump_command_ops(Formatter *fmt)
 {
   // We have a read-lock on the Objecter here
   fmt->open_array_section("command_ops");
-  for (map<int, OSDSession *>::const_iterator siter = osd_sessions.begin();
+  for (auto siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
-    OSDSession *s = siter->second;
-    OSDSession::shared_lock sl(s->lock);
+    auto s = siter->second;
+    shared_lock sl(s->lock);
     _dump_command_ops(s, fmt);
     sl.unlock();
   }
@@ -4625,10 +4553,8 @@ void Objecter::dump_command_ops(Formatter *fmt)
 void Objecter::dump_pool_ops(Formatter *fmt) const
 {
   fmt->open_array_section("pool_ops");
-  for (map<ceph_tid_t, PoolOp*>::const_iterator p = pool_ops.begin();
-       p != pool_ops.end();
-       ++p) {
-    PoolOp *op = p->second;
+  for (auto p = pool_ops.begin(); p != pool_ops.end(); ++p) {
+    auto op = p->second;
     fmt->open_object_section("pool_op");
     fmt->dump_unsigned("tid", op->tid);
     fmt->dump_int("pool", op->pool);
@@ -4645,7 +4571,7 @@ void Objecter::dump_pool_ops(Formatter *fmt) const
 void Objecter::dump_pool_stat_ops(Formatter *fmt) const
 {
   fmt->open_array_section("pool_stat_ops");
-  for (map<ceph_tid_t, PoolStatOp*>::const_iterator p = poolstat_ops.begin();
+  for (auto p = poolstat_ops.begin();
        p != poolstat_ops.end();
        ++p) {
     PoolStatOp *op = p->second;
@@ -4654,10 +4580,8 @@ void Objecter::dump_pool_stat_ops(Formatter *fmt) const
     fmt->dump_stream("last_sent") << op->last_submit;
 
     fmt->open_array_section("pools");
-    for (list<string>::const_iterator it = op->pools.begin();
-        it != op->pools.end();
-        ++it) {
-      fmt->dump_string("pool", *it);
+    for (const auto& it : op->pools) {
+      fmt->dump_string("pool", it);
     }
     fmt->close_section(); // pools array
 
@@ -4669,10 +4593,8 @@ void Objecter::dump_pool_stat_ops(Formatter *fmt) const
 void Objecter::dump_statfs_ops(Formatter *fmt) const
 {
   fmt->open_array_section("statfs_ops");
-  for (map<ceph_tid_t, StatfsOp*>::const_iterator p = statfs_ops.begin();
-       p != statfs_ops.end();
-       ++p) {
-    StatfsOp *op = p->second;
+  for (auto p = statfs_ops.begin(); p != statfs_ops.end(); ++p) {
+    auto op = p->second;
     fmt->open_object_section("statfs_op");
     fmt->dump_unsigned("tid", op->tid);
     fmt->dump_stream("last_sent") << op->last_submit;
@@ -4690,7 +4612,7 @@ int Objecter::RequestStateHook::call(std::string_view command,
                                     const cmdmap_t& cmdmap,
                                     Formatter *f,
                                     std::ostream& ss,
-                                    ceph::buffer::list& out)
+                                    cb::list& out)
 {
   shared_lock rl(m_objecter->rwlock);
   m_objecter->dump_requests(f);
@@ -4712,7 +4634,7 @@ void Objecter::blacklist_self(bool set)
   ss << messenger->get_myaddrs().front().get_legacy_str();
   cmd.push_back("\"addr\":\"" + ss.str() + "\"");
 
-  MMonCommand *m = new MMonCommand(monc->get_fsid());
+  auto m = new MMonCommand(monc->get_fsid());
   m->cmd = cmd;
 
   monc->send_mon_message(m);
@@ -4737,8 +4659,8 @@ void Objecter::handle_command_reply(MCommandReply *m)
     return;
   }
 
-  OSDSession::shared_lock sl(s->lock);
-  map<ceph_tid_t,CommandOp*>::iterator p = s->command_ops.find(m->get_tid());
+  shared_lock sl(s->lock);
+  auto p = s->command_ops.find(m->get_tid());
   if (p == s->command_ops.end()) {
     ldout(cct, 10) << "handle_command_reply tid " << m->get_tid()
                   << " not found" << dendl;
@@ -4758,6 +4680,7 @@ void Objecter::handle_command_reply(MCommandReply *m)
     sl.unlock();
     return;
   }
+
   if (m->r == -EAGAIN) {
     ldout(cct,10) << __func__ << " tid " << m->get_tid()
                  << " got EAGAIN, requesting map and resending" << dendl;
@@ -4770,14 +4693,12 @@ void Objecter::handle_command_reply(MCommandReply *m)
     return;
   }
 
-  if (c->poutbl) {
-    c->poutbl->claim(m->get_data());
-  }
-
   sl.unlock();
 
-  OSDSession::unique_lock sul(s->lock);
-  _finish_command(c, m->r, m->rs);
+  unique_lock sul(s->lock);
+  _finish_command(c, m->r < 0 ? bs::error_code(-m->r, osd_category()) :
+                 bs::error_code(), std::move(m->rs),
+                 std::move(m->get_data()));
   sul.unlock();
 
   m->put();
@@ -4792,7 +4713,7 @@ void Objecter::submit_command(CommandOp *c, ceph_tid_t *ptid)
   c->tid = tid;
 
   {
-    OSDSession::unique_lock hs_wl(homeless_session->lock);
+    unique_lock hs_wl(homeless_session->lock);
     _session_command_op_assign(homeless_session, c);
   }
 
@@ -4801,8 +4722,9 @@ void Objecter::submit_command(CommandOp *c, ceph_tid_t *ptid)
   if (osd_timeout > timespan(0)) {
     c->ontimeout = timer.add_event(osd_timeout,
                                   [this, c, tid]() {
-                                    command_op_cancel(c->session, tid,
-                                                      -ETIMEDOUT); });
+                                    command_op_cancel(
+                                      c->session, tid,
+                                      osdc_errc::timed_out); });
   }
 
   if (!c->session->is_homeless()) {
@@ -4812,12 +4734,14 @@ void Objecter::submit_command(CommandOp *c, ceph_tid_t *ptid)
   }
   if (c->map_check_error)
     _send_command_map_check(c);
-  *ptid = tid;
+  if (ptid)
+    *ptid = tid;
 
   logger->inc(l_osdc_command_active);
 }
 
-int Objecter::_calc_command_target(CommandOp *c, shunique_lock& sul)
+int Objecter::_calc_command_target(CommandOp *c,
+                                  shunique_lock<ceph::shared_mutex>& sul)
 {
   ceph_assert(sul.owns_lock() && sul.mutex() == &rwlock);
 
@@ -4873,7 +4797,7 @@ int Objecter::_calc_command_target(CommandOp *c, shunique_lock& sul)
 }
 
 void Objecter::_assign_command_session(CommandOp *c,
-                                      shunique_lock& sul)
+                                      shunique_lock<ceph::shared_mutex>& sul)
 {
   ceph_assert(sul.owns_lock() && sul.mutex() == &rwlock);
 
@@ -4884,11 +4808,11 @@ void Objecter::_assign_command_session(CommandOp *c,
   if (c->session != s) {
     if (c->session) {
       OSDSession *cs = c->session;
-      OSDSession::unique_lock csl(cs->lock);
+      unique_lock csl(cs->lock);
       _session_command_op_remove(c->session, c);
       csl.unlock();
     }
-    OSDSession::unique_lock sl(s->lock);
+    unique_lock sl(s->lock);
     _session_command_op_assign(s, c);
   }
 
@@ -4900,7 +4824,7 @@ void Objecter::_send_command(CommandOp *c)
   ldout(cct, 10) << "_send_command " << c->tid << dendl;
   ceph_assert(c->session);
   ceph_assert(c->session->con);
-  MCommand *m = new MCommand(monc->monmap.fsid);
+  auto m = new MCommand(monc->monmap.fsid);
   m->cmd = c->cmd;
   m->set_data(c->inbl);
   m->set_tid(c->tid);
@@ -4908,13 +4832,14 @@ void Objecter::_send_command(CommandOp *c)
   logger->inc(l_osdc_command_send);
 }
 
-int Objecter::command_op_cancel(OSDSession *s, ceph_tid_t tid, int r)
+int Objecter::command_op_cancel(OSDSession *s, ceph_tid_t tid,
+                               bs::error_code ec)
 {
   ceph_assert(initialized);
 
   unique_lock wl(rwlock);
 
-  map<ceph_tid_t, CommandOp*>::iterator it = s->command_ops.find(tid);
+  auto it = s->command_ops.find(tid);
   if (it == s->command_ops.end()) {
     ldout(cct, 10) << __func__ << " tid " << tid << " dne" << dendl;
     return -ENOENT;
@@ -4924,25 +4849,25 @@ int Objecter::command_op_cancel(OSDSession *s, ceph_tid_t tid, int r)
 
   CommandOp *op = it->second;
   _command_cancel_map_check(op);
-  OSDSession::unique_lock sl(op->session->lock);
-  _finish_command(op, r, "");
+  unique_lock sl(op->session->lock);
+  _finish_command(op, ec, {}, {});
   sl.unlock();
   return 0;
 }
 
-void Objecter::_finish_command(CommandOp *c, int r, string rs)
+void Objecter::_finish_command(CommandOp *c, bs::error_code ec,
+                              string&& rs, cb::list&& bl)
 {
   // rwlock is locked unique
   // session lock is locked
 
-  ldout(cct, 10) << "_finish_command " << c->tid << " = " << r << " "
+  ldout(cct, 10) << "_finish_command " << c->tid << " = " << ec << " "
                 << rs << dendl;
-  if (c->prs)
-    *c->prs = rs;
+
   if (c->onfinish)
-    c->onfinish->complete(r);
+    c->onfinish->defer(std::move(c->onfinish), ec, std::move(rs), std::move(bl));
 
-  if (c->ontimeout && r != -ETIMEDOUT)
+  if (c->ontimeout && ec != bs::errc::timed_out)
     timer.cancel_event(c->ontimeout);
 
   _session_command_op_remove(c->session, c);
@@ -4961,20 +4886,14 @@ Objecter::OSDSession::~OSDSession()
   ceph_assert(command_ops.empty());
 }
 
-Objecter::Objecter(CephContext *cct_, Messenger *m, MonClient *mc,
-                  Finisher *fin,
+Objecter::Objecter(CephContext *cct,
+                  Messenger *m, MonClient *mc,
+                  boost::asio::io_context& service,
                   double mon_timeout,
                   double osd_timeout) :
-  Dispatcher(cct_), messenger(m), monc(mc), finisher(fin),
-  trace_endpoint("0.0.0.0", 0, "Objecter"),
-  osdmap{std::make_unique<OSDMap>()},
-  homeless_session(new OSDSession(cct, -1)),
+  Dispatcher(cct), messenger(m), monc(mc), service(service),
   mon_timeout(ceph::make_timespan(mon_timeout)),
-  osd_timeout(ceph::make_timespan(osd_timeout)),
-  op_throttle_bytes(cct, "objecter_bytes",
-                   cct->_conf->objecter_inflight_op_bytes),
-  op_throttle_ops(cct, "objecter_ops", cct->_conf->objecter_inflight_ops),
-  retry_writes_after_first_reply(cct->_conf->objecter_retry_writes_after_first_reply)
+  osd_timeout(ceph::make_timespan(osd_timeout))
 {}
 
 Objecter::~Objecter()
@@ -5029,60 +4948,82 @@ hobject_t Objecter::enumerate_objects_end()
   return hobject_t::get_max();
 }
 
-struct C_EnumerateReply : public Context {
-  ceph::buffer::list bl;
-
-  Objecter *objecter;
-  hobject_t *next;
-  std::list<librados::ListObjectImpl> *result;
+template<typename T>
+struct EnumerationContext {
+  Objecter* objecter;
   const hobject_t end;
-  const int64_t pool_id;
-  Context *on_finish;
+  const cb::list filter;
+  uint32_t max;
+  const object_locator_t oloc;
+  std::vector<T> ls;
+private:
+  fu2::unique_function<void(bs::error_code,
+                           std::vector<T>,
+                           hobject_t) &&> on_finish;
+public:
+  epoch_t epoch = 0;
+  int budget = -1;
+
+  EnumerationContext(Objecter* objecter,
+                    hobject_t end, cb::list filter,
+                    uint32_t max, object_locator_t oloc,
+                    decltype(on_finish) on_finish)
+    : objecter(objecter), end(std::move(end)), filter(std::move(filter)),
+      max(max), oloc(std::move(oloc)), on_finish(std::move(on_finish)) {}
+
+  void operator()(bs::error_code ec,
+                 std::vector<T> v,
+                 hobject_t h) && {
+    if (budget >= 0) {
+      objecter->put_op_budget_bytes(budget);
+      budget = -1;
+    }
+
+    std::move(on_finish)(ec, std::move(v), std::move(h));
+  }
+};
+
+template<typename T>
+struct CB_EnumerateReply {
+  cb::list bl;
 
-  epoch_t epoch;
-  int budget;
+  Objecter* objecter;
+  std::unique_ptr<EnumerationContext<T>> ctx;
 
-  C_EnumerateReply(Objecter *objecter_, hobject_t *next_,
-      std::list<librados::ListObjectImpl> *result_,
-      const hobject_t end_, const int64_t pool_id_, Context *on_finish_) :
-    objecter(objecter_), next(next_), result(result_),
-    end(end_), pool_id(pool_id_), on_finish(on_finish_),
-    epoch(0), budget(-1)
-  {}
+  CB_EnumerateReply(Objecter* objecter,
+                   std::unique_ptr<EnumerationContext<T>>&& ctx) :
+    objecter(objecter), ctx(std::move(ctx)) {}
 
-  void finish(int r) override {
-    objecter->_enumerate_reply(
-      bl, r, end, pool_id, budget, epoch, result, next, on_finish);
+  void operator()(bs::error_code ec) {
+    objecter->_enumerate_reply(std::move(bl), ec, std::move(ctx));
   }
 };
 
+template<typename T>
 void Objecter::enumerate_objects(
-    int64_t pool_id,
-    const std::string &ns,
-    const hobject_t &start,
-    const hobject_t &end,
-    const uint32_t max,
-    const ceph::buffer::list &filter_bl,
-    std::list<librados::ListObjectImpl> *result, 
-    hobject_t *next,
-    Context *on_finish)
-{
-  ceph_assert(result);
-
+  int64_t pool_id,
+  std::string_view ns,
+  hobject_t start,
+  hobject_t end,
+  const uint32_t max,
+  const cb::list& filter_bl,
+  fu2::unique_function<void(bs::error_code,
+                           std::vector<T>,
+                           hobject_t) &&> on_finish) {
   if (!end.is_max() && start > end) {
     lderr(cct) << __func__ << ": start " << start << " > end " << end << dendl;
-    on_finish->complete(-EINVAL);
+    std::move(on_finish)(osdc_errc::precondition_violated, {}, {});
     return;
   }
 
   if (max < 1) {
     lderr(cct) << __func__ << ": result size may not be zero" << dendl;
-    on_finish->complete(-EINVAL);
+    std::move(on_finish)(osdc_errc::precondition_violated, {}, {});
     return;
   }
 
   if (start.is_max()) {
-    on_finish->complete(0);
+    std::move(on_finish)({}, {}, {});
     return;
   }
 
@@ -5091,92 +5032,128 @@ void Objecter::enumerate_objects(
   if (!osdmap->test_flag(CEPH_OSDMAP_SORTBITWISE)) {
     rl.unlock();
     lderr(cct) << __func__ << ": SORTBITWISE cluster flag not set" << dendl;
-    on_finish->complete(-EOPNOTSUPP);
+    std::move(on_finish)(osdc_errc::not_supported, {}, {});
     return;
   }
-  const pg_pool_t *p = osdmap->get_pg_pool(pool_id);
+  const pg_pool_tp = osdmap->get_pg_pool(pool_id);
   if (!p) {
     lderr(cct) << __func__ << ": pool " << pool_id << " DNE in osd epoch "
               << osdmap->get_epoch() << dendl;
     rl.unlock();
-    on_finish->complete(-ENOENT);
+    std::move(on_finish)(osdc_errc::pool_dne, {}, {});
     return;
   } else {
     rl.unlock();
   }
 
-  ldout(cct, 20) << __func__ << ": start=" << start << " end=" << end << dendl;
-
-  // Stash completion state
-  C_EnumerateReply *on_ack = new C_EnumerateReply(
-      this, next, result, end, pool_id, on_finish);
-
+  _issue_enumerate(start,
+                  std::make_unique<EnumerationContext<T>>(
+                    this, std::move(end), filter_bl,
+                    max, object_locator_t{pool_id, ns},
+                    std::move(on_finish)));
+}
+
+template
+void Objecter::enumerate_objects<librados::ListObjectImpl>(
+  int64_t pool_id,
+  std::string_view ns,
+  hobject_t start,
+  hobject_t end,
+  const uint32_t max,
+  const cb::list& filter_bl,
+  fu2::unique_function<void(bs::error_code,
+                           std::vector<librados::ListObjectImpl>,
+                           hobject_t) &&> on_finish);
+
+template
+void Objecter::enumerate_objects<RADOS::Entry>(
+  int64_t pool_id,
+  std::string_view ns,
+  hobject_t start,
+  hobject_t end,
+  const uint32_t max,
+  const cb::list& filter_bl,
+  fu2::unique_function<void(bs::error_code,
+                           std::vector<RADOS::Entry>,
+                           hobject_t) &&> on_finish);
+
+
+
+template<typename T>
+void Objecter::_issue_enumerate(hobject_t start,
+                               std::unique_ptr<EnumerationContext<T>> ctx) {
   ObjectOperation op;
-  op.pg_nls(max, filter_bl, start, 0);
+  auto c = ctx.get();
+  op.pg_nls(c->max, c->filter, start, osdmap->get_epoch());
+  auto on_ack = std::make_unique<CB_EnumerateReply<T>>(this, std::move(ctx));
+  // I hate having to do this. Try to find a cleaner way
+  // later.
+  auto epoch = &c->epoch;
+  auto budget = &c->budget;
+  auto pbl = &on_ack->bl;
 
   // Issue.  See you later in _enumerate_reply
-  object_locator_t oloc(pool_id, ns);
-  pg_read(start.get_hash(), oloc, op,
-         &on_ack->bl, 0, on_ack, &on_ack->epoch, &on_ack->budget);
-}
-
+  pg_read(start.get_hash(),
+         c->oloc, op, pbl, 0,
+         Op::OpComp::create(service.get_executor(),
+                            [c = std::move(on_ack)]
+                            (bs::error_code ec) mutable {
+                              (*c)(ec);
+                            }), epoch, budget);
+}
+
+template
+void Objecter::_issue_enumerate<librados::ListObjectImpl>(
+  hobject_t start,
+  std::unique_ptr<EnumerationContext<librados::ListObjectImpl>> ctx);
+template
+void Objecter::_issue_enumerate<RADOS::Entry>(
+  hobject_t start, std::unique_ptr<EnumerationContext<RADOS::Entry>> ctx);
+
+template<typename T>
 void Objecter::_enumerate_reply(
-    ceph::buffer::list &bl,
-    int r,
-    const hobject_t &end,
-    const int64_t pool_id,
-    int budget,
-    epoch_t reply_epoch,
-    std::list<librados::ListObjectImpl> *result,
-    hobject_t *next,
-    Context *on_finish)
-{
-  if (budget >= 0) {
-    put_op_budget_bytes(budget);
-  }
-
-  if (r < 0) {
-    ldout(cct, 4) << __func__ << ": remote error " << r << dendl;
-    on_finish->complete(r);
+  cb::list&& bl,
+  bs::error_code ec,
+  std::unique_ptr<EnumerationContext<T>>&& ctx)
+{
+  if (ec) {
+    std::move(*ctx)(ec, {}, {});
     return;
   }
 
-  ceph_assert(next != NULL);
-
   // Decode the results
   auto iter = bl.cbegin();
-  pg_nls_response_t response;
+  pg_nls_response_template<T> response;
+
+  try {
+    response.decode(iter);
+    if (!iter.end()) {
+      // extra_info isn't used anywhere. We do this solely to preserve
+      // backward compatibility
+      cb::list legacy_extra_info;
+      decode(legacy_extra_info, iter);
+    }
+  } catch (const bs::system_error& e) {
+    std::move(*ctx)(e.code(), {}, {});
+    return;
+  }
 
-  decode(response, iter);
-  if (!iter.end()) {
-    // extra_info isn't used anywhere. We do this solely to preserve
-    // backward compatibility
-    ceph::buffer::list legacy_extra_info;
-    decode(legacy_extra_info, iter);
+  shared_lock rl(rwlock);
+  auto pool = osdmap->get_pg_pool(ctx->oloc.get_pool());
+  rl.unlock();
+  if (!pool) {
+    // pool is gone, drop any results which are now meaningless.
+    std::move(*ctx)(osdc_errc::pool_dne, {}, {});
+    return;
   }
 
-  ldout(cct, 10) << __func__ << ": got " << response.entries.size()
-                << " handle " << response.handle
-                << " reply_epoch " << reply_epoch << dendl;
-  ldout(cct, 20) << __func__ << ": response.entries.size "
-                << response.entries.size() << ", response.entries "
-                << response.entries << dendl;
-  if (response.handle <= end) {
-    *next = response.handle;
+  hobject_t next;
+  if ((response.handle <= ctx->end)) {
+    next = response.handle;
   } else {
-    ldout(cct, 10) << __func__ << ": adjusted next down to end " << end
-                  << dendl;
-    *next = end;
+    next = ctx->end;
 
     // drop anything after 'end'
-    shared_lock rl(rwlock);
-    const pg_pool_t *pool = osdmap->get_pg_pool(pool_id);
-    if (!pool) {
-      // pool is gone, drop any results which are now meaningless.
-      rl.unlock();
-      on_finish->complete(-ENOENT);
-      return;
-    }
     while (!response.entries.empty()) {
       uint32_t hash = response.entries.back().locator.empty() ?
        pool->hash_key(response.entries.back().oid,
@@ -5187,34 +5164,61 @@ void Objecter::_enumerate_reply(
                     response.entries.back().locator,
                     CEPH_NOSNAP,
                     hash,
-                    pool_id,
+                    ctx->oloc.get_pool(),
                     response.entries.back().nspace);
-      if (last < end)
+      if (last < ctx->end)
        break;
-      ldout(cct, 20) << __func__ << " dropping item " << last
-                    << " >= end " << end << dendl;
       response.entries.pop_back();
     }
-    rl.unlock();
   }
-  if (!response.entries.empty()) {
-    result->merge(response.entries);
+
+  if (response.entries.size() <= ctx->max) {
+    ctx->max -= response.entries.size();
+    std::move(response.entries.begin(), response.entries.end(),
+             std::back_inserter(ctx->ls));
+  } else {
+    auto i = response.entries.begin();
+    while (ctx->max > 0) {
+      ctx->ls.push_back(std::move(*i));
+      --(ctx->max);
+      ++i;
+    }
+    uint32_t hash =
+      i->locator.empty() ?
+      pool->hash_key(i->oid, i->nspace) :
+      pool->hash_key(i->locator, i->nspace);
+
+    next = hobject_t{i->oid, i->locator,
+                    CEPH_NOSNAP,
+                    hash,
+                    ctx->oloc.get_pool(),
+                    i->nspace};
   }
 
-  // release the listing context's budget once all
-  // OPs (in the session) are finished
-#if 0
-  put_nlist_context_budget(list_context);
-#endif
-  on_finish->complete(r);
-  return;
+  if (next == ctx->end || ctx->max == 0) {
+    std::move(*ctx)(ec, std::move(ctx->ls), std::move(next));
+  } else {
+    _issue_enumerate(next, std::move(ctx));
+  }
 }
 
+template
+void Objecter::_enumerate_reply<librados::ListObjectImpl>(
+  cb::list&& bl,
+  bs::error_code ec,
+  std::unique_ptr<EnumerationContext<librados::ListObjectImpl>>&& ctx);
+
+template
+void Objecter::_enumerate_reply<RADOS::Entry>(
+  cb::list&& bl,
+  bs::error_code ec,
+  std::unique_ptr<EnumerationContext<RADOS::Entry>>&& ctx);
+
 namespace {
   using namespace librados;
 
   template <typename T>
-  void do_decode(std::vector<T>& items, std::vector<ceph::buffer::list>& bls)
+  void do_decode(std::vector<T>& items, std::vector<cb::list>& bls)
   {
     for (auto bl : bls) {
       auto p = bl.cbegin();
@@ -5225,19 +5229,19 @@ namespace {
   }
 
   struct C_ObjectOperation_scrub_ls : public Context {
-    ceph::buffer::list bl;
-    uint32_t *interval;
+    cb::list bl;
+    uint32_tinterval;
     std::vector<inconsistent_obj_t> *objects = nullptr;
     std::vector<inconsistent_snapset_t> *snapsets = nullptr;
-    int *rval;
+    intrval;
 
-    C_ObjectOperation_scrub_ls(uint32_t *interval,
-                              std::vector<inconsistent_obj_t> *objects,
-                              int *rval)
+    C_ObjectOperation_scrub_ls(uint32_tinterval,
+                              std::vector<inconsistent_obj_t>objects,
+                              intrval)
       : interval(interval), objects(objects), rval(rval) {}
-    C_ObjectOperation_scrub_ls(uint32_t *interval,
-                              std::vector<inconsistent_snapset_t> *snapsets,
-                              int *rval)
+    C_ObjectOperation_scrub_ls(uint32_tinterval,
+                              std::vector<inconsistent_snapset_t>snapsets,
+                              intrval)
       : interval(interval), snapsets(snapsets), rval(rval) {}
     void finish(int r) override {
       if (r < 0 && r != -EAGAIN) {
@@ -5251,7 +5255,7 @@ namespace {
 
       try {
        decode();
-      } catch (ceph::buffer::error&) {
+      } catch (cb::error&) {
        if (rval)
          *rval = -EIO;
       }
@@ -5271,19 +5275,19 @@ namespace {
   };
 
   template <typename T>
-  void do_scrub_ls(::ObjectOperation *op,
+  void do_scrub_ls(::ObjectOperationop,
                   const scrub_ls_arg_t& arg,
                   std::vector<T> *items,
-                  uint32_t *interval,
-                  int *rval)
+                  uint32_tinterval,
+                  intrval)
   {
     OSDOp& osd_op = op->add_op(CEPH_OSD_OP_SCRUBLS);
     op->flags |= CEPH_OSD_FLAG_PGOP;
     ceph_assert(interval);
     arg.encode(osd_op.indata);
     unsigned p = op->ops.size() - 1;
-    auto *h = new C_ObjectOperation_scrub_ls{interval, items, rval};
-    op->out_handler[p] = h;
+    auto h = new C_ObjectOperation_scrub_ls{interval, items, rval};
+    op->set_handler(h);
     op->out_bl[p] = &h->bl;
     op->out_rval[p] = rval;
   }
@@ -5291,9 +5295,9 @@ namespace {
 
 void ::ObjectOperation::scrub_ls(const librados::object_id_t& start_after,
                                 uint64_t max_to_get,
-                                std::vector<librados::inconsistent_obj_t> *objects,
-                                uint32_t *interval,
-                                int *rval)
+                                std::vector<librados::inconsistent_obj_t>objects,
+                                uint32_tinterval,
+                                intrval)
 {
   scrub_ls_arg_t arg = {*interval, 0, start_after, max_to_get};
   do_scrub_ls(this, arg, objects, interval, rval);
index 5adc4a77b470195dd9b855189e6fa0a344575c89..c7d132a50879ac4faea15a7afae1ed413b0da6dc 100644 (file)
 #include <mutex>
 #include <memory>
 #include <sstream>
+#include <string>
+#include <string_view>
 #include <type_traits>
+#include <variant>
 
-#include <boost/thread/shared_mutex.hpp>
+#include <boost/asio.hpp>
 
+#include <fmt/format.h>
+
+#include "include/buffer.h"
 #include "include/ceph_assert.h"
+#include "include/ceph_fs.h"
 #include "include/common_fwd.h"
-#include "include/buffer.h"
+#include "include/expected.hpp"
 #include "include/types.h"
 #include "include/rados/rados_types.hpp"
+#include "include/function2.hpp"
+#include "include/RADOS/RADOS_Decodable.hpp"
 
 #include "common/admin_socket.h"
+#include "common/async/completion.h"
 #include "common/ceph_time.h"
+#include "common/ceph_mutex.h"
 #include "common/ceph_timer.h"
 #include "common/config_obs.h"
 #include "common/shunique_lock.h"
 #include "common/zipkin_trace.h"
-#include "common/Finisher.h"
 #include "common/Throttle.h"
 
+#include "mon/MonClient.h"
+
 #include "messages/MOSDOp.h"
 #include "msg/Dispatcher.h"
 
@@ -49,7 +61,6 @@ class Context;
 class Messenger;
 class MonClient;
 class Message;
-class Finisher;
 
 class MPoolOpReply;
 
@@ -57,54 +68,95 @@ class MGetPoolStatsReply;
 class MStatfsReply;
 class MCommandReply;
 class MWatchNotify;
-
+template<typename T>
+struct EnumerationContext;
+template<typename t>
+struct CB_EnumerateReply;
 
 // -----------------------------------------
 
 struct ObjectOperation {
   std::vector<OSDOp> ops;
-  int flags;
-  int priority;
+  int flags = 0;
+  int priority = 0;
 
   std::vector<ceph::buffer::list*> out_bl;
-  std::vector<Context*> out_handler;
+  std::vector<fu2::unique_function<void(boost::system::error_code, int,
+                                       const ceph::buffer::list& bl) &&>> out_handler;
   std::vector<int*> out_rval;
+  std::vector<boost::system::error_code*> out_ec;
 
-  ObjectOperation() : flags(0), priority(0) {}
-  ~ObjectOperation() {
-    while (!out_handler.empty()) {
-      delete out_handler.back();
-      out_handler.pop_back();
-    }
-  }
+  ObjectOperation() = default;
+  ObjectOperation(const ObjectOperation&) = delete;
+  ObjectOperation& operator =(const ObjectOperation&) = delete;
+  ObjectOperation(ObjectOperation&&) = default;
+  ObjectOperation& operator =(ObjectOperation&&) = default;
+  ~ObjectOperation() = default;
 
-  size_t size() {
+  size_t size() const {
     return ops.size();
   }
 
+  void clear() {
+    ops.clear();
+    flags = 0;
+    priority = 0;
+    out_bl.clear();
+    out_handler.clear();
+    out_rval.clear();
+    out_ec.clear();
+  }
+
   void set_last_op_flags(int flags) {
     ceph_assert(!ops.empty());
     ops.rbegin()->op.flags = flags;
   }
 
-  class C_TwoContexts;
-  /**
-   * Add a callback to run when this operation completes,
-   * after any other callbacks for it.
-   */
-  void add_handler(Context *extra);
+
+  void set_handler(fu2::unique_function<void(boost::system::error_code, int,
+                                            const ceph::buffer::list&) &&> f) {
+    if (f) {
+      if (out_handler.back()) {
+       // This happens seldom enough that we may as well keep folding
+       // functions together when we get another one rather than
+       // using a container.
+       out_handler.back() =
+         [f = std::move(f),
+          g = std::move(std::move(out_handler.back()))]
+         (boost::system::error_code ec, int r,
+          const ceph::buffer::list& bl) mutable {
+           std::move(g)(ec, r, bl);
+           std::move(f)(ec, r, bl);
+         };
+      } else {
+       out_handler.back() = std::move(f);
+      }
+    }
+    ceph_assert(ops.size() == out_handler.size());
+  }
+
+  void set_handler(Context *c) {
+    if (c)
+      set_handler([c = std::unique_ptr<Context>(c)](boost::system::error_code,
+                                                   int r,
+                                                   const ceph::buffer::list&) mutable {
+                   c.release()->complete(r);
+                 });
+
+  }
 
   OSDOp& add_op(int op) {
-    int s = ops.size();
-    ops.resize(s+1);
-    ops[s].op.op = op;
-    out_bl.resize(s+1);
-    out_bl[s] = NULL;
-    out_handler.resize(s+1);
-    out_handler[s] = NULL;
-    out_rval.resize(s+1);
-    out_rval[s] = NULL;
-    return ops[s];
+    ops.emplace_back();
+    ops.back().op.op = op;
+    out_bl.push_back(nullptr);
+    ceph_assert(ops.size() == out_bl.size());
+    out_handler.emplace_back();
+    ceph_assert(ops.size() == out_handler.size());
+    out_rval.push_back(nullptr);
+    ceph_assert(ops.size() == out_rval.size());
+    out_ec.push_back(nullptr);
+    ceph_assert(ops.size() == out_ec.size());
+    return ops.back();
   }
   void add_data(int op, uint64_t off, uint64_t len, ceph::buffer::list& bl) {
     OSDOp& osd_op = add_op(op);
@@ -139,21 +191,77 @@ struct ObjectOperation {
       osd_op.indata.append(name, osd_op.op.xattr.name_len);
     osd_op.indata.append(data);
   }
-  void add_call(int op, const char *cname, const char *method,
-               ceph::buffer::list &indata,
+  void add_xattr(int op, std::string_view name, const ceph::buffer::list& data) {
+    OSDOp& osd_op = add_op(op);
+    osd_op.op.xattr.name_len = name.size();
+    osd_op.op.xattr.value_len = data.length();
+    osd_op.indata.append(name.data(), osd_op.op.xattr.name_len);
+    osd_op.indata.append(data);
+  }
+  void add_xattr_cmp(int op, std::string_view name, uint8_t cmp_op,
+                    uint8_t cmp_mode, const ceph::buffer::list& data) {
+    OSDOp& osd_op = add_op(op);
+    osd_op.op.xattr.name_len = name.size();
+    osd_op.op.xattr.value_len = data.length();
+    osd_op.op.xattr.cmp_op = cmp_op;
+    osd_op.op.xattr.cmp_mode = cmp_mode;
+    if (!name.empty())
+      osd_op.indata.append(name.data(), osd_op.op.xattr.name_len);
+    osd_op.indata.append(data);
+  }
+  void add_call(int op, std::string_view cname, std::string_view method,
+               const ceph::buffer::list &indata,
                ceph::buffer::list *outbl, Context *ctx, int *prval) {
     OSDOp& osd_op = add_op(op);
 
     unsigned p = ops.size() - 1;
-    out_handler[p] = ctx;
+    set_handler(ctx);
     out_bl[p] = outbl;
     out_rval[p] = prval;
 
-    osd_op.op.cls.class_len = strlen(cname);
-    osd_op.op.cls.method_len = strlen(method);
+    osd_op.op.cls.class_len = cname.size();
+    osd_op.op.cls.method_len = method.size();
     osd_op.op.cls.indata_len = indata.length();
-    osd_op.indata.append(cname, osd_op.op.cls.class_len);
-    osd_op.indata.append(method, osd_op.op.cls.method_len);
+    osd_op.indata.append(cname.data(), osd_op.op.cls.class_len);
+    osd_op.indata.append(method.data(), osd_op.op.cls.method_len);
+    osd_op.indata.append(indata);
+  }
+  void add_call(int op, std::string_view cname, std::string_view method,
+               const ceph::buffer::list &indata,
+               fu2::unique_function<void(boost::system::error_code,
+                                         const ceph::buffer::list&) &&> f) {
+    OSDOp& osd_op = add_op(op);
+
+    set_handler([f = std::move(f)](boost::system::error_code ec,
+                                  int,
+                                  const ceph::buffer::list& bl) mutable {
+                 std::move(f)(ec, bl);
+               });
+
+    osd_op.op.cls.class_len = cname.size();
+    osd_op.op.cls.method_len = method.size();
+    osd_op.op.cls.indata_len = indata.length();
+    osd_op.indata.append(cname.data(), osd_op.op.cls.class_len);
+    osd_op.indata.append(method.data(), osd_op.op.cls.method_len);
+    osd_op.indata.append(indata);
+  }
+  void add_call(int op, std::string_view cname, std::string_view method,
+               const ceph::buffer::list &indata,
+               fu2::unique_function<void(boost::system::error_code, int,
+                                         const ceph::buffer::list&) &&> f) {
+    OSDOp& osd_op = add_op(op);
+
+    set_handler([f = std::move(f)](boost::system::error_code ec,
+                                  int r,
+                                  const ceph::buffer::list& bl) mutable {
+                 std::move(f)(ec, r, bl);
+               });
+
+    osd_op.op.cls.class_len = cname.size();
+    osd_op.op.cls.method_len = method.size();
+    osd_op.op.cls.indata_len = indata.length();
+    osd_op.indata.append(cname.data(), osd_op.op.cls.class_len);
+    osd_op.indata.append(method.data(), osd_op.op.cls.method_len);
     osd_op.indata.append(indata);
   }
   void add_pgls(int op, uint64_t count, collection_list_handle_t cookie,
@@ -225,17 +333,18 @@ struct ObjectOperation {
     o.op.flags = (excl ? CEPH_OSD_OP_FLAG_EXCL : 0);
   }
 
-  struct C_ObjectOperation_stat : public Context {
+  struct CB_ObjectOperation_stat {
     ceph::buffer::list bl;
     uint64_t *psize;
     ceph::real_time *pmtime;
     time_t *ptime;
     struct timespec *pts;
     int *prval;
-    C_ObjectOperation_stat(uint64_t *ps, ceph::real_time *pm, time_t *pt, struct timespec *_pts,
-                          int *prval)
-      : psize(ps), pmtime(pm), ptime(pt), pts(_pts), prval(prval) {}
-    void finish(int r) override {
+    boost::system::error_code* pec;
+    CB_ObjectOperation_stat(uint64_t *ps, ceph::real_time *pm, time_t *pt, struct timespec *_pts,
+                           int *prval, boost::system::error_code* pec)
+      : psize(ps), pmtime(pm), ptime(pt), pts(_pts), prval(prval), pec(pec) {}
+    void operator()(boost::system::error_code ec, int r, const ceph::buffer::list& bl) {
       using ceph::decode;
       if (r >= 0) {
        auto p = bl.cbegin();
@@ -252,58 +361,91 @@ struct ObjectOperation {
            *ptime = ceph::real_clock::to_time_t(mtime);
          if (pts)
            *pts = ceph::real_clock::to_timespec(mtime);
-       } catch (ceph::buffer::error& e) {
+       } catch (const ceph::buffer::error& e) {
          if (prval)
            *prval = -EIO;
+         if (pec)
+           *pec = e.code();
        }
       }
     }
   };
   void stat(uint64_t *psize, ceph::real_time *pmtime, int *prval) {
     add_op(CEPH_OSD_OP_STAT);
-    unsigned p = ops.size() - 1;
-    C_ObjectOperation_stat *h = new C_ObjectOperation_stat(psize, pmtime, NULL, NULL,
-                                                          prval);
-    out_bl[p] = &h->bl;
-    out_handler[p] = h;
-    out_rval[p] = prval;
+    set_handler(CB_ObjectOperation_stat(psize, pmtime, nullptr, nullptr, prval,
+                                       nullptr));
+    out_rval.back() = prval;
+  }
+  void stat(uint64_t *psize, ceph::real_time *pmtime,
+           boost::system::error_code* ec) {
+    add_op(CEPH_OSD_OP_STAT);
+    set_handler(CB_ObjectOperation_stat(psize, pmtime, nullptr, nullptr,
+                                       nullptr, ec));
+    out_ec.back() = ec;
   }
   void stat(uint64_t *psize, time_t *ptime, int *prval) {
     add_op(CEPH_OSD_OP_STAT);
-    unsigned p = ops.size() - 1;
-    C_ObjectOperation_stat *h = new C_ObjectOperation_stat(psize, NULL, ptime, NULL,
-                                                          prval);
-    out_bl[p] = &h->bl;
-    out_handler[p] = h;
-    out_rval[p] = prval;
+    set_handler(CB_ObjectOperation_stat(psize, nullptr, ptime, nullptr, prval,
+                                       nullptr));
+    out_rval.back() = prval;
   }
   void stat(uint64_t *psize, struct timespec *pts, int *prval) {
     add_op(CEPH_OSD_OP_STAT);
-    unsigned p = ops.size() - 1;
-    C_ObjectOperation_stat *h = new C_ObjectOperation_stat(psize, NULL, NULL, pts,
-                                                          prval);
-    out_bl[p] = &h->bl;
-    out_handler[p] = h;
-    out_rval[p] = prval;
+    set_handler(CB_ObjectOperation_stat(psize, nullptr, nullptr, pts, prval, nullptr));
+    out_rval.back() = prval;
+  }
+  void stat(uint64_t *psize, ceph::real_time *pmtime, nullptr_t) {
+    add_op(CEPH_OSD_OP_STAT);
+    set_handler(CB_ObjectOperation_stat(psize, pmtime, nullptr, nullptr, nullptr,
+                                       nullptr));
+  }
+  void stat(uint64_t *psize, time_t *ptime, nullptr_t) {
+    add_op(CEPH_OSD_OP_STAT);
+    set_handler(CB_ObjectOperation_stat(psize, nullptr, ptime, nullptr, nullptr,
+                                       nullptr));
   }
+  void stat(uint64_t *psize, struct timespec *pts, nullptr_t) {
+    add_op(CEPH_OSD_OP_STAT);
+    set_handler(CB_ObjectOperation_stat(psize, nullptr, nullptr, pts, nullptr,
+                                       nullptr));
+  }
+  void stat(uint64_t *psize, nullptr_t, nullptr_t) {
+    add_op(CEPH_OSD_OP_STAT);
+    set_handler(CB_ObjectOperation_stat(psize, nullptr, nullptr, nullptr,
+                                       nullptr, nullptr));
+  }
+
   // object cmpext
-  struct C_ObjectOperation_cmpext : public Context {
-    int *prval;
-    explicit C_ObjectOperation_cmpext(int *prval)
+  struct CB_ObjectOperation_cmpext {
+    int* prval = nullptr;
+    boost::system::error_code* ec = nullptr;
+    std::size_t* s = nullptr;
+    explicit CB_ObjectOperation_cmpext(int *prval)
       : prval(prval) {}
+    CB_ObjectOperation_cmpext(boost::system::error_code* ec, std::size_t* s)
+      : ec(ec), s(s) {}
 
-    void finish(int r) {
+    void operator()(boost::system::error_code ec, int r, const ceph::buffer::list&) {
       if (prval)
         *prval = r;
+      if (this->ec)
+       *this->ec = ec;
+      if (s)
+       *s = static_cast<std::size_t>(-(MAX_ERRNO - r));
     }
   };
 
   void cmpext(uint64_t off, ceph::buffer::list& cmp_bl, int *prval) {
     add_data(CEPH_OSD_OP_CMPEXT, off, cmp_bl.length(), cmp_bl);
-    unsigned p = ops.size() - 1;
-    C_ObjectOperation_cmpext *h = new C_ObjectOperation_cmpext(prval);
-    out_handler[p] = h;
-    out_rval[p] = prval;
+    set_handler(CB_ObjectOperation_cmpext(prval));
+    out_rval.back() = prval;
+  }
+
+  void cmpext(uint64_t off, ceph::buffer::list&& cmp_bl, boost::system::error_code* ec,
+             std::size_t* s) {
+    add_data(CEPH_OSD_OP_CMPEXT, off, cmp_bl.length(), cmp_bl);
+    set_handler(CB_ObjectOperation_cmpext(ec, s));
+    out_ec.back() = ec;
   }
 
   // Used by C API
@@ -311,10 +453,8 @@ struct ObjectOperation {
     ceph::buffer::list cmp_bl;
     cmp_bl.append(cmp_buf, cmp_len);
     add_data(CEPH_OSD_OP_CMPEXT, off, cmp_len, cmp_bl);
-    unsigned p = ops.size() - 1;
-    C_ObjectOperation_cmpext *h = new C_ObjectOperation_cmpext(prval);
-    out_handler[p] = h;
-    out_rval[p] = prval;
+    set_handler(CB_ObjectOperation_cmpext(prval));
+    out_rval.back() = prval;
   }
 
   void read(uint64_t off, uint64_t len, ceph::buffer::list *pbl, int *prval,
@@ -324,20 +464,29 @@ struct ObjectOperation {
     unsigned p = ops.size() - 1;
     out_bl[p] = pbl;
     out_rval[p] = prval;
-    out_handler[p] = ctx;
+    set_handler(ctx);
   }
 
-  struct C_ObjectOperation_sparse_read : public Context {
+  void read(uint64_t off, uint64_t len, boost::system::error_code* ec,
+           ceph::buffer::list* pbl) {
     ceph::buffer::list bl;
-    ceph::buffer::list *data_bl;
-    std::map<uint64_t, uint64_t> *extents;
-    int *prval;
-    C_ObjectOperation_sparse_read(ceph::buffer::list *data_bl,
-                                 std::map<uint64_t, uint64_t> *extents,
-                                 int *prval)
-      : data_bl(data_bl), extents(extents), prval(prval) {}
-    void finish(int r) override {
-      using ceph::decode;
+    add_data(CEPH_OSD_OP_READ, off, len, bl);
+    out_ec.back() = ec;
+    out_bl.back() = pbl;
+  }
+
+  template<typename Ex>
+  struct CB_ObjectOperation_sparse_read {
+    ceph::buffer::list* data_bl;
+    Ex* extents;
+    int* prval;
+    boost::system::error_code* pec;
+    CB_ObjectOperation_sparse_read(ceph::buffer::list* data_bl,
+                                  Ex* extents,
+                                  int* prval,
+                                  boost::system::error_code* pec)
+      : data_bl(data_bl), extents(extents), prval(prval), pec(pec) {}
+    void operator()(boost::system::error_code ec, int r, const ceph::buffer::list& bl) {
       auto iter = bl.cbegin();
       if (r >= 0) {
         // NOTE: it's possible the sub-op has not been executed but the result
@@ -347,26 +496,35 @@ struct ObjectOperation {
          try {
            decode(*extents, iter);
            decode(*data_bl, iter);
-         } catch (ceph::buffer::error& e) {
+         } catch (const ceph::buffer::error& e) {
            if (prval)
               *prval = -EIO;
+           if (pec)
+             *pec = e.code();
          }
         } else if (prval) {
           *prval = -EIO;
-        }
+         if (pec)
+           *pec = buffer::errc::end_of_buffer;
+       }
       }
     }
   };
-  void sparse_read(uint64_t off, uint64_t len, std::map<uint64_t,uint64_t> *m,
-                  ceph::buffer::list *data_bl, int *prval) {
+  void sparse_read(uint64_t off, uint64_t len, std::map<uint64_t, uint64_t>* m,
+                  ceph::buffer::list* data_bl, int* prval) {
     ceph::buffer::list bl;
     add_data(CEPH_OSD_OP_SPARSE_READ, off, len, bl);
-    unsigned p = ops.size() - 1;
-    C_ObjectOperation_sparse_read *h =
-      new C_ObjectOperation_sparse_read(data_bl, m, prval);
-    out_bl[p] = &h->bl;
-    out_handler[p] = h;
-    out_rval[p] = prval;
+    set_handler(CB_ObjectOperation_sparse_read(data_bl, m, prval, nullptr));
+    out_rval.back() = prval;
+  }
+  void sparse_read(uint64_t off, uint64_t len,
+                  boost::system::error_code* ec,
+                  std::vector<std::pair<uint64_t, uint64_t>>* m,
+                  ceph::buffer::list* data_bl) {
+    ceph::buffer::list bl;
+    add_data(CEPH_OSD_OP_SPARSE_READ, off, len, bl);
+    set_handler(CB_ObjectOperation_sparse_read(data_bl, m, nullptr, ec));
+    out_ec.back() = ec;
   }
   void write(uint64_t off, ceph::buffer::list& bl,
             uint64_t truncate_size,
@@ -422,7 +580,7 @@ struct ObjectOperation {
     unsigned p = ops.size() - 1;
     out_bl[p] = pbl;
     out_rval[p] = prval;
-    out_handler[p] = ctx;
+    set_handler(ctx);
   }
 
   // object attrs
@@ -433,28 +591,37 @@ struct ObjectOperation {
     out_bl[p] = pbl;
     out_rval[p] = prval;
   }
-  struct C_ObjectOperation_decodevals : public Context {
-    uint64_t max_entries;
+  void getxattr(std::string_view name, boost::system::error_code* ec,
+               buffer::list *pbl) {
     ceph::buffer::list bl;
-    std::map<std::string,ceph::buffer::list> *pattrs;
-    bool *ptruncated;
-    int *prval;
-    C_ObjectOperation_decodevals(uint64_t m, std::map<std::string,ceph::buffer::list> *pa,
-                                bool *pt, int *pr)
-      : max_entries(m), pattrs(pa), ptruncated(pt), prval(pr) {
+    add_xattr(CEPH_OSD_OP_GETXATTR, name, bl);
+    out_bl.back() = pbl;
+    out_ec.back() = ec;
+  }
+
+  template<typename Vals>
+  struct CB_ObjectOperation_decodevals {
+    uint64_t max_entries;
+    Vals* pattrs;
+    bool* ptruncated;
+    int* prval;
+    boost::system::error_code* pec;
+    CB_ObjectOperation_decodevals(uint64_t m, Vals* pa,
+                                 bool *pt, int *pr,
+                                 boost::system::error_code* pec)
+      : max_entries(m), pattrs(pa), ptruncated(pt), prval(pr), pec(pec) {
       if (ptruncated) {
        *ptruncated = false;
       }
     }
-    void finish(int r) override {
-      using ceph::decode;
+    void operator()(boost::system::error_code ec, int r, const ceph::buffer::list& bl) {
       if (r >= 0) {
        auto p = bl.cbegin();
        try {
          if (pattrs)
            decode(*pattrs, p);
          if (ptruncated) {
-           std::map<std::string,ceph::buffer::list> ignore;
+           Vals ignore;
            if (!pattrs) {
              decode(ignore, p);
              pattrs = &ignore;
@@ -462,34 +629,36 @@ struct ObjectOperation {
            if (!p.end()) {
              decode(*ptruncated, p);
            } else {
-             // the OSD did not provide this.  since old OSDs do not
+             // The OSD did not provide this.  Since old OSDs do not
              // enfoce omap result limits either, we can infer it from
              // the size of the result
              *ptruncated = (pattrs->size() == max_entries);
            }
          }
-       }
-       catch (ceph::buffer::error& e) {
+       } catch (const ceph::buffer::error& e) {
          if (prval)
            *prval = -EIO;
+         if (pec)
+           *pec = e.code();
        }
       }
     }
   };
-  struct C_ObjectOperation_decodekeys : public Context {
+  template<typename Keys>
+  struct CB_ObjectOperation_decodekeys {
     uint64_t max_entries;
-    ceph::buffer::list bl;
-    std::set<std::string> *pattrs;
+    Keys* pattrs;
     bool *ptruncated;
     int *prval;
-    C_ObjectOperation_decodekeys(uint64_t m, std::set<std::string> *pa, bool *pt,
-                                int *pr)
-      : max_entries(m), pattrs(pa), ptruncated(pt), prval(pr) {
+    boost::system::error_code* pec;
+    CB_ObjectOperation_decodekeys(uint64_t m, Keys* pa, bool *pt,
+                                 int *pr, boost::system::error_code* pec)
+      : max_entries(m), pattrs(pa), ptruncated(pt), prval(pr), pec(pec) {
       if (ptruncated) {
        *ptruncated = false;
       }
     }
-    void finish(int r) override {
+    void operator()(boost::system::error_code ec, int r, const ceph::buffer::list& bl) {
       if (r >= 0) {
        using ceph::decode;
        auto p = bl.cbegin();
@@ -497,7 +666,7 @@ struct ObjectOperation {
          if (pattrs)
            decode(*pattrs, p);
          if (ptruncated) {
-           std::set<std::string> ignore;
+           Keys ignore;
            if (!pattrs) {
              decode(ignore, p);
              pattrs = &ignore;
@@ -511,22 +680,24 @@ struct ObjectOperation {
              *ptruncated = (pattrs->size() == max_entries);
            }
          }
-       }
-       catch (ceph::buffer::error& e) {
+       } catch (const ceph::buffer::error& e) {
          if (prval)
            *prval = -EIO;
+         if (pec)
+           *pec = e.code();
        }
       }
     }
   };
-  struct C_ObjectOperation_decodewatchers : public Context {
-    ceph::buffer::list bl;
-    std::list<obj_watch_t> *pwatchers;
-    int *prval;
-    C_ObjectOperation_decodewatchers(std::list<obj_watch_t> *pw, int *pr)
-      : pwatchers(pw), prval(pr) {}
-    void finish(int r) override {
-      using ceph::decode;
+  struct CB_ObjectOperation_decodewatchers {
+    std::list<obj_watch_t>* pwatchers;
+    int* prval;
+    boost::system::error_code* pec;
+    CB_ObjectOperation_decodewatchers(std::list<obj_watch_t>* pw, int* pr,
+                                     boost::system::error_code* pec)
+      : pwatchers(pw), prval(pr), pec(pec) {}
+    void operator()(boost::system::error_code ec, int r,
+                   const ceph::buffer::list& bl) {
       if (r >= 0) {
        auto p = bl.cbegin();
        try {
@@ -544,21 +715,62 @@ struct ObjectOperation {
              pwatchers->push_back(std::move(ow));
            }
          }
+       } catch (const ceph::buffer::error& e) {
+         if (prval)
+           *prval = -EIO;
+         if (pec)
+           *pec = e.code();
        }
-       catch (ceph::buffer::error& e) {
+      }
+    }
+  };
+
+  struct CB_ObjectOperation_decodewatchersneo {
+    std::vector<RADOS::ObjWatcher>* pwatchers;
+    int* prval;
+    boost::system::error_code* pec;
+    CB_ObjectOperation_decodewatchersneo(std::vector<RADOS::ObjWatcher>* pw,
+                                        int* pr,
+                                        boost::system::error_code* pec)
+      : pwatchers(pw), prval(pr), pec(pec) {}
+    void operator()(boost::system::error_code ec, int r,
+                   const ceph::buffer::list& bl) {
+      if (r >= 0) {
+       auto p = bl.cbegin();
+       try {
+         obj_list_watch_response_t resp;
+         decode(resp, p);
+         if (pwatchers) {
+           for (const auto& watch_item : resp.entries) {
+             RADOS::ObjWatcher ow;
+             ow.addr = watch_item.addr.get_legacy_str();
+             ow.watcher_id = watch_item.name.num();
+             ow.cookie = watch_item.cookie;
+             ow.timeout_seconds = watch_item.timeout_seconds;
+             pwatchers->push_back(std::move(ow));
+           }
+         }
+       } catch (const ceph::buffer::error& e) {
          if (prval)
            *prval = -EIO;
+         if (pec)
+           *pec = e.code();
        }
       }
     }
   };
-  struct C_ObjectOperation_decodesnaps : public Context {
-    ceph::buffer::list bl;
+
+
+  struct CB_ObjectOperation_decodesnaps {
     librados::snap_set_t *psnaps;
+    RADOS::SnapSet *neosnaps;
     int *prval;
-    C_ObjectOperation_decodesnaps(librados::snap_set_t *ps, int *pr)
-      : psnaps(ps), prval(pr) {}
-    void finish(int r) override {
+    boost::system::error_code* pec;
+    CB_ObjectOperation_decodesnaps(librados::snap_set_t* ps,
+                                  RADOS::SnapSet* ns, int* pr,
+                                  boost::system::error_code* pec)
+      : psnaps(ps), neosnaps(ns), prval(pr), pec(pec) {}
+    void operator()(boost::system::error_code ec, int r, const ceph::buffer::list& bl) {
       if (r >= 0) {
        using ceph::decode;
        auto p = bl.cbegin();
@@ -567,7 +779,9 @@ struct ObjectOperation {
          decode(resp, p);
          if (psnaps) {
            psnaps->clones.clear();
-           for (auto ci = resp.clones.begin(); ci != resp.clones.end(); ++ci) {
+           for (auto ci = resp.clones.begin();
+                ci != resp.clones.end();
+                ++ci) {
              librados::clone_info_t clone;
 
              clone.cloneid = ci->cloneid;
@@ -581,9 +795,27 @@ struct ObjectOperation {
            }
            psnaps->seq = resp.seq;
          }
-       } catch (ceph::buffer::error& e) {
+
+         if (neosnaps) {
+           neosnaps->clones.clear();
+           for (auto&& c : resp.clones) {
+             RADOS::CloneInfo clone;
+
+             clone.cloneid = std::move(c.cloneid);
+             clone.snaps.reserve(c.snaps.size());
+             std::move(c.snaps.begin(), c.snaps.end(),
+                       clone.snaps.end());
+             clone.overlap = c.overlap;
+             clone.size = c.size;
+             neosnaps->clones.push_back(std::move(clone));
+           }
+           neosnaps->seq = resp.seq;
+         }
+       } catch (const ceph::buffer::error& e) {
          if (prval)
            *prval = -EIO;
+         if (pec)
+           *pec = e.code();
        }
       }
     }
@@ -591,17 +823,23 @@ struct ObjectOperation {
   void getxattrs(std::map<std::string,ceph::buffer::list> *pattrs, int *prval) {
     add_op(CEPH_OSD_OP_GETXATTRS);
     if (pattrs || prval) {
-      unsigned p = ops.size() - 1;
-      C_ObjectOperation_decodevals *h
-       = new C_ObjectOperation_decodevals(0, pattrs, nullptr, prval);
-      out_handler[p] = h;
-      out_bl[p] = &h->bl;
-      out_rval[p] = prval;
+      set_handler(CB_ObjectOperation_decodevals(0, pattrs, nullptr, prval,
+                                               nullptr));
+      out_rval.back() = prval;
     }
   }
+  void getxattrs(boost::system::error_code* ec,
+                boost::container::flat_map<std::string, ceph::buffer::list> *pattrs) {
+    add_op(CEPH_OSD_OP_GETXATTRS);
+    set_handler(CB_ObjectOperation_decodevals(0, pattrs, nullptr, nullptr, ec));
+    out_ec.back() = ec;
+  }
   void setxattr(const char *name, const ceph::buffer::list& bl) {
     add_xattr(CEPH_OSD_OP_SETXATTR, name, bl);
   }
+  void setxattr(std::string_view name, const ceph::buffer::list& bl) {
+    add_xattr(CEPH_OSD_OP_SETXATTR, name, bl);
+  }
   void setxattr(const char *name, const std::string& s) {
     ceph::buffer::list bl;
     bl.append(s);
@@ -611,11 +849,19 @@ struct ObjectOperation {
                const ceph::buffer::list& bl) {
     add_xattr_cmp(CEPH_OSD_OP_CMPXATTR, name, cmp_op, cmp_mode, bl);
   }
+  void cmpxattr(std::string_view name, uint8_t cmp_op, uint8_t cmp_mode,
+               const ceph::buffer::list& bl) {
+    add_xattr_cmp(CEPH_OSD_OP_CMPXATTR, name, cmp_op, cmp_mode, bl);
+  }
   void rmxattr(const char *name) {
     ceph::buffer::list bl;
     add_xattr(CEPH_OSD_OP_RMXATTR, name, bl);
   }
-  void setxattrs(std::map<std::string, ceph::buffer::list>& attrs) {
+  void rmxattr(std::string_view name) {
+    ceph::buffer::list bl;
+    add_xattr(CEPH_OSD_OP_RMXATTR, name, bl);
+  }
+  void setxattrs(map<string, ceph::buffer::list>& attrs) {
     using ceph::encode;
     ceph::buffer::list bl;
     encode(attrs, bl);
@@ -648,14 +894,28 @@ struct ObjectOperation {
     op.op.extent.length = bl.length();
     op.indata.claim_append(bl);
     if (prval || ptruncated || out_set) {
-      unsigned p = ops.size() - 1;
-      C_ObjectOperation_decodekeys *h =
-       new C_ObjectOperation_decodekeys(max_to_get, out_set, ptruncated, prval);
-      out_handler[p] = h;
-      out_bl[p] = &h->bl;
-      out_rval[p] = prval;
+      set_handler(CB_ObjectOperation_decodekeys(max_to_get, out_set, ptruncated, prval,
+                                               nullptr));
+      out_rval.back() = prval;
     }
   }
+  void omap_get_keys(std::optional<std::string_view> start_after,
+                    uint64_t max_to_get,
+                    boost::system::error_code* ec,
+                    boost::container::flat_set<std::string> *out_set,
+                    bool *ptruncated) {
+    OSDOp& op = add_op(CEPH_OSD_OP_OMAPGETKEYS);
+    ceph::buffer::list bl;
+    encode(start_after ? *start_after : std::string_view{}, bl);
+    encode(max_to_get, bl);
+    op.op.extent.offset = 0;
+    op.op.extent.length = bl.length();
+    op.indata.claim_append(bl);
+    set_handler(
+      CB_ObjectOperation_decodekeys(max_to_get, out_set, ptruncated, nullptr,
+                                   ec));
+    out_ec.back() = ec;
+  }
 
   void omap_get_vals(const std::string &start_after,
                     const std::string &filter_prefix,
@@ -673,19 +933,34 @@ struct ObjectOperation {
     op.op.extent.length = bl.length();
     op.indata.claim_append(bl);
     if (prval || out_set || ptruncated) {
-      unsigned p = ops.size() - 1;
-      C_ObjectOperation_decodevals *h =
-       new C_ObjectOperation_decodevals(max_to_get, out_set, ptruncated, prval);
-      out_handler[p] = h;
-      out_bl[p] = &h->bl;
-      out_rval[p] = prval;
+      set_handler(CB_ObjectOperation_decodevals(max_to_get, out_set, ptruncated,
+                                               prval, nullptr));
+      out_rval.back() = prval;
     }
   }
 
+  void omap_get_vals(std::optional<std::string_view> start_after,
+                    std::optional<std::string_view> filter_prefix,
+                    uint64_t max_to_get,
+                    boost::system::error_code* ec,
+                    boost::container::flat_map<std::string, ceph::buffer::list> *out_set,
+                    bool *ptruncated) {
+    OSDOp &op = add_op(CEPH_OSD_OP_OMAPGETVALS);
+    ceph::buffer::list bl;
+    encode(start_after ? *start_after : std::string_view{}, bl);
+    encode(max_to_get, bl);
+    encode(filter_prefix ? *start_after : std::string_view{}, bl);
+    op.op.extent.offset = 0;
+    op.op.extent.length = bl.length();
+    op.indata.claim_append(bl);
+    set_handler(CB_ObjectOperation_decodevals(max_to_get, out_set, ptruncated,
+                                             nullptr, ec));
+    out_ec.back() = ec;
+  }
+
   void omap_get_vals_by_keys(const std::set<std::string> &to_get,
-                           std::map<std::string, ceph::buffer::list> *out_set,
-                           int *prval) {
-    using ceph::encode;
+                            std::map<std::string, ceph::buffer::list> *out_set,
+                            int *prval) {
     OSDOp &op = add_op(CEPH_OSD_OP_OMAPGETVALSBYKEYS);
     ceph::buffer::list bl;
     encode(to_get, bl);
@@ -693,16 +968,28 @@ struct ObjectOperation {
     op.op.extent.length = bl.length();
     op.indata.claim_append(bl);
     if (prval || out_set) {
-      unsigned p = ops.size() - 1;
-      C_ObjectOperation_decodevals *h =
-       new C_ObjectOperation_decodevals(0, out_set, nullptr, prval);
-      out_handler[p] = h;
-      out_bl[p] = &h->bl;
-      out_rval[p] = prval;
+      set_handler(CB_ObjectOperation_decodevals(0, out_set, nullptr, prval,
+                                               nullptr));
+      out_rval.back() = prval;
     }
   }
 
-  void omap_cmp(const std::map<std::string, std::pair<ceph::buffer::list,int> > &assertions,
+  void omap_get_vals_by_keys(
+    const boost::container::flat_set<std::string>& to_get,
+    boost::system::error_code* ec,
+    boost::container::flat_map<std::string, ceph::buffer::list> *out_set) {
+    OSDOp &op = add_op(CEPH_OSD_OP_OMAPGETVALSBYKEYS);
+    ceph::buffer::list bl;
+    encode(to_get, bl);
+    op.op.extent.offset = 0;
+    op.op.extent.length = bl.length();
+    op.indata.claim_append(bl);
+    set_handler(CB_ObjectOperation_decodevals(0, out_set, nullptr, nullptr,
+                                             ec));
+    out_ec.back() = ec;
+  }
+
+  void omap_cmp(const std::map<std::string, pair<ceph::buffer::list,int> > &assertions,
                int *prval) {
     using ceph::encode;
     OSDOp &op = add_op(CEPH_OSD_OP_OMAP_CMP);
@@ -717,6 +1004,18 @@ struct ObjectOperation {
     }
   }
 
+  void omap_cmp(const boost::container::flat_map<
+                 std::string, pair<ceph::buffer::list, int>>& assertions,
+               boost::system::error_code *ec) {
+    OSDOp &op = add_op(CEPH_OSD_OP_OMAP_CMP);
+    ceph::buffer::list bl;
+    encode(assertions, bl);
+    op.op.extent.offset = 0;
+    op.op.extent.length = bl.length();
+    op.indata.claim_append(bl);
+    out_ec.back() = ec;
+  }
+
   struct C_ObjectOperation_copyget : public Context {
     ceph::buffer::list bl;
     object_copy_cursor_t *cursor;
@@ -805,7 +1104,7 @@ struct ObjectOperation {
        if (out_truncate_size)
          *out_truncate_size = copy_reply.truncate_size;
        *cursor = copy_reply.cursor;
-      } catch (ceph::buffer::error& e) {
+      } catch (const ceph::buffer::error& e) {
        if (prval)
          *prval = -EIO;
       }
@@ -846,7 +1145,7 @@ struct ObjectOperation {
                                    out_reqid_return_codes, truncate_seq,
                                    truncate_size, prval);
     out_bl[p] = &h->bl;
-    out_handler[p] = h;
+    set_handler(h);
   }
 
   void undirty() {
@@ -869,7 +1168,7 @@ struct ObjectOperation {
        decode(isdirty, p);
        if (pisdirty)
          *pisdirty = isdirty;
-      } catch (ceph::buffer::error& e) {
+      } catch (const ceph::buffer::error& e) {
        if (prval)
          *prval = -EIO;
       }
@@ -883,7 +1182,7 @@ struct ObjectOperation {
     C_ObjectOperation_isdirty *h =
       new C_ObjectOperation_isdirty(pisdirty, prval);
     out_bl[p] = &h->bl;
-    out_handler[p] = h;
+    set_handler(h);
   }
 
   struct C_ObjectOperation_hit_set_ls : public Context {
@@ -918,7 +1217,7 @@ struct ObjectOperation {
        }
        if (putls)
          putls->swap(ls);
-      } catch (ceph::buffer::error& e) {
+      } catch (const ceph::buffer::error& e) {
        r = -EIO;
       }
       if (prval)
@@ -943,7 +1242,7 @@ struct ObjectOperation {
     C_ObjectOperation_hit_set_ls *h =
       new C_ObjectOperation_hit_set_ls(pls, NULL, prval);
     out_bl[p] = &h->bl;
-    out_handler[p] = h;
+    set_handler(h);
   }
   void hit_set_ls(std::list<std::pair<ceph::real_time, ceph::real_time> > *pls,
                  int *prval) {
@@ -953,7 +1252,7 @@ struct ObjectOperation {
     C_ObjectOperation_hit_set_ls *h =
       new C_ObjectOperation_hit_set_ls(NULL, pls, prval);
     out_bl[p] = &h->bl;
-    out_handler[p] = h;
+    set_handler(h);
   }
 
   /**
@@ -981,8 +1280,19 @@ struct ObjectOperation {
     out_rval[p] = prval;
   }
 
-  void omap_set(const std::map<std::string, ceph::buffer::list>& map) {
-    using ceph::encode;
+  void omap_get_header(boost::system::error_code* ec, ceph::buffer::list *bl) {
+    add_op(CEPH_OSD_OP_OMAPGETHEADER);
+    out_bl.back() = bl;
+    out_ec.back() = ec;
+  }
+
+  void omap_set(const map<string, ceph::buffer::list> &map) {
+    ceph::buffer::list bl;
+    encode(map, bl);
+    add_data(CEPH_OSD_OP_OMAPSETVALS, 0, bl.length(), bl);
+  }
+
+  void omap_set(const boost::container::flat_map<string, ceph::buffer::list>& map) {
     ceph::buffer::list bl;
     encode(map, bl);
     add_data(CEPH_OSD_OP_OMAPSETVALS, 0, bl.length(), bl);
@@ -1002,6 +1312,11 @@ struct ObjectOperation {
     encode(to_remove, bl);
     add_data(CEPH_OSD_OP_OMAPRMKEYS, 0, bl.length(), bl);
   }
+  void omap_rm_keys(const boost::container::flat_set<std::string>& to_remove) {
+    ceph::buffer::list bl;
+    encode(to_remove, bl);
+    add_data(CEPH_OSD_OP_OMAPRMKEYS, 0, bl.length(), bl);
+  }
 
   void omap_rm_range(std::string_view key_begin, std::string_view key_end) {
     ceph::buffer::list bl;
@@ -1021,6 +1336,30 @@ struct ObjectOperation {
     add_call(CEPH_OSD_OP_CALL, cname, method, indata, outdata, ctx, prval);
   }
 
+  void call(std::string_view cname, std::string_view method,
+           const ceph::buffer::list& indata, boost::system::error_code* ec) {
+    add_call(CEPH_OSD_OP_CALL, cname, method, indata, NULL, NULL, NULL);
+    out_ec.back() = ec;
+  }
+
+  void call(std::string_view cname, std::string_view method, const ceph::buffer::list& indata,
+           boost::system::error_code* ec, ceph::buffer::list *outdata) {
+    add_call(CEPH_OSD_OP_CALL, cname, method, indata, outdata, nullptr, nullptr);
+    out_ec.back() = ec;
+  }
+  void call(std::string_view cname, std::string_view method,
+           const ceph::buffer::list& indata,
+           fu2::unique_function<void (boost::system::error_code,
+                                      const ceph::buffer::list&) &&> f) {
+    add_call(CEPH_OSD_OP_CALL, cname, method, indata, std::move(f));
+  }
+  void call(std::string_view cname, std::string_view method,
+           const ceph::buffer::list& indata,
+           fu2::unique_function<void (boost::system::error_code, int,
+                                      const ceph::buffer::list&) &&> f) {
+    add_call(CEPH_OSD_OP_CALL, cname, method, indata, std::move(f));
+  }
+
   // watch/notify
   void watch(uint64_t cookie, __u8 op, uint32_t timeout = 0) {
     OSDOp& osd_op = add_op(CEPH_OSD_OP_WATCH);
@@ -1053,26 +1392,36 @@ struct ObjectOperation {
 
   void list_watchers(std::list<obj_watch_t> *out,
                     int *prval) {
-    (void)add_op(CEPH_OSD_OP_LIST_WATCHERS);
+    add_op(CEPH_OSD_OP_LIST_WATCHERS);
     if (prval || out) {
-      unsigned p = ops.size() - 1;
-      C_ObjectOperation_decodewatchers *h =
-       new C_ObjectOperation_decodewatchers(out, prval);
-      out_handler[p] = h;
-      out_bl[p] = &h->bl;
-      out_rval[p] = prval;
+      set_handler(CB_ObjectOperation_decodewatchers(out, prval, nullptr));
+      out_rval.back() = prval;
     }
   }
+  void list_watchers(vector<RADOS::ObjWatcher>* out,
+                    boost::system::error_code* ec) {
+    add_op(CEPH_OSD_OP_LIST_WATCHERS);
+    set_handler(CB_ObjectOperation_decodewatchersneo(out, nullptr, ec));
+    out_ec.back() = ec;
+  }
 
-  void list_snaps(librados::snap_set_t *out, int *prval) {
-    (void)add_op(CEPH_OSD_OP_LIST_SNAPS);
-    if (prval || out) {
-      unsigned p = ops.size() - 1;
-      C_ObjectOperation_decodesnaps *h =
-       new C_ObjectOperation_decodesnaps(out, prval);
-      out_handler[p] = h;
-      out_bl[p] = &h->bl;
-      out_rval[p] = prval;
+  void list_snaps(librados::snap_set_t *out, int *prval,
+                 boost::system::error_code* ec = nullptr) {
+    add_op(CEPH_OSD_OP_LIST_SNAPS);
+    if (prval || out || ec) {
+      set_handler(CB_ObjectOperation_decodesnaps(out, nullptr, prval, ec));
+      out_rval.back() = prval;
+      out_ec.back() = ec;
+    }
+  }
+
+  void list_snaps(RADOS::SnapSet *out, int *prval,
+                 boost::system::error_code* ec = nullptr) {
+    add_op(CEPH_OSD_OP_LIST_SNAPS);
+    if (prval || out || ec) {
+      set_handler(CB_ObjectOperation_decodesnaps(nullptr, out, prval, ec));
+      out_rval.back() = prval;
+      out_ec.back() = ec;
     }
   }
 
@@ -1218,10 +1567,11 @@ struct ObjectOperation {
     out_bl.resize(sops.size());
     out_handler.resize(sops.size());
     out_rval.resize(sops.size());
+    out_ec.resize(sops.size());
     for (uint32_t i = 0; i < sops.size(); i++) {
       out_bl[i] = &sops[i].outdata;
-      out_handler[i] = NULL;
       out_rval[i] = &sops[i].rval;
+      out_ec[i] = nullptr;
     }
   }
 
@@ -1237,12 +1587,27 @@ struct ObjectOperation {
   }
 };
 
+inline std::ostream& operator <<(std::ostream& m, const ObjectOperation& oo) {
+  auto i = oo.ops.cbegin();
+  m << '[';
+  while (i != oo.ops.cend()) {
+    if (i != oo.ops.cbegin())
+      m << ' ';
+    m << *i;
+    ++i;
+  }
+  m << ']';
+  return m;
+}
 
-// ----------------
 
+// ----------------
 
 class Objecter : public md_config_obs_t, public Dispatcher {
 public:
+  using OpSignature = void(boost::system::error_code);
+  using OpCompletion = ceph::async::Completion<OpSignature>;
+
   // config observer bits
   const char** get_tracked_conf_keys() const override;
   void handle_conf_change(const ConfigProxy& conf,
@@ -1251,10 +1616,13 @@ public:
 public:
   Messenger *messenger;
   MonClient *monc;
-  Finisher *finisher;
-  ZTracer::Endpoint trace_endpoint;
+  boost::asio::io_context& service;
+  // The guaranteed sequenced, one-at-a-time execution and apparently
+  // people sometimes depend on this.
+  boost::asio::io_context::strand finish_strand{service};
+  ZTracer::Endpoint trace_endpoint{"0.0.0.0", 0, "Objecter"};
 private:
-  std::unique_ptr<OSDMap> osdmap;
+  std::unique_ptr<OSDMap> osdmap{std::make_unique<OSDMap>()};
 public:
   using Dispatcher::cct;
   std::multimap<std::string,std::string> crush_location;
@@ -1289,7 +1657,8 @@ private:
                : epoch(epoch), up(up), up_primary(up_primary),
                  acting(acting), acting_primary(acting_primary) {}
   };
-  std::shared_mutex pg_mapping_lock;
+  ceph::shared_mutex pg_mapping_lock =
+    ceph::make_shared_mutex("Objecter::pg_mapping_lock");
   // pool -> pg mapping
   std::map<int64_t, std::vector<pg_mapping_t>> pg_mappings;
 
@@ -1344,14 +1713,11 @@ private:
   version_t last_seen_osdmap_version = 0;
   version_t last_seen_pgmap_version = 0;
 
-  mutable std::shared_mutex rwlock;
-  using lock_guard = std::lock_guard<decltype(rwlock)>;
-  using unique_lock = std::unique_lock<decltype(rwlock)>;
-  using shared_lock = boost::shared_lock<decltype(rwlock)>;
-  using shunique_lock = ceph::shunique_lock<decltype(rwlock)>;
+  mutable ceph::shared_mutex rwlock =
+          ceph::make_shared_mutex("Objecter::rwlock");
   ceph::timer<ceph::coarse_mono_clock> timer;
 
-  PerfCounters *logger = nullptr;
+  PerfCounterslogger = nullptr;
 
   uint64_t tick_event = 0;
 
@@ -1447,92 +1813,198 @@ public:
     void dump(ceph::Formatter *f) const;
   };
 
+  std::unique_ptr<ceph::async::Completion<void(boost::system::error_code)>>
+  OpContextVert(Context* c) {
+    if (c)
+      return ceph::async::Completion<void(boost::system::error_code)>::create(
+       service.get_executor(),
+       [c = std::unique_ptr<Context>(c)]
+       (boost::system::error_code e) mutable {
+         c.release()->complete(e);
+       });
+    else
+      return nullptr;
+  }
+
+  template<typename T>
+  std::unique_ptr<ceph::async::Completion<void(boost::system::error_code, T)>>
+  OpContextVert(Context* c, T* p) {
+
+    if (c || p)
+      return
+       ceph::async::Completion<void(boost::system::error_code, T)>::create(
+         service.get_executor(),
+         [c = std::unique_ptr<Context>(c), p]
+         (boost::system::error_code e, T r) mutable {
+             if (p)
+               *p = std::move(r);
+             if (c)
+               c.release()->complete(ceph::from_error_code(e));
+           });
+    else
+      return nullptr;
+  }
+
+  template<typename T>
+  std::unique_ptr<ceph::async::Completion<void(boost::system::error_code, T)>>
+  OpContextVert(Context* c, T& p) {
+    if (c)
+      return ceph::async::Completion<
+       void(boost::system::error_code, T)>::create(
+         service.get_executor(),
+         [c = std::unique_ptr<Context>(c), &p]
+         (boost::system::error_code e, T r) mutable {
+           p = std::move(r);
+           if (c)
+             c.release()->complete(ceph::from_error_code(e));
+         });
+    else
+      return nullptr;
+  }
+
   struct Op : public RefCountedObject {
-    OSDSession *session;
-    int incarnation;
+    OSDSession *session = nullptr;
+    int incarnation = 0;
 
     op_target_t target;
 
-    ConnectionRef con;  // for rx buffer only
-    uint64_t features // explicitly specified op features
+    ConnectionRef con = nullptr;  // for rx buffer only
+    uint64_t features = CEPH_FEATURES_SUPPORTED_DEFAULT; // explicitly specified op features
 
     std::vector<OSDOp> ops;
 
-    snapid_t snapid;
+    snapid_t snapid = CEPH_NOSNAP;
     SnapContext snapc;
     ceph::real_time mtime;
 
-    ceph::buffer::list *outbl;
+    ceph::buffer::list *outbl = nullptr;
     std::vector<ceph::buffer::list*> out_bl;
-    std::vector<Context*> out_handler;
+    std::vector<fu2::unique_function<void(boost::system::error_code, int,
+                                         const ceph::buffer::list& bl) &&>> out_handler;
     std::vector<int*> out_rval;
+    std::vector<boost::system::error_code*> out_ec;
+
+    int priority = 0;
+    using OpSig = void(boost::system::error_code);
+    using OpComp = ceph::async::Completion<OpSig>;
+    // Due to an irregularity of cmpxattr, we actualy need the 'int'
+    // value for onfinish for legacy librados users. As such just
+    // preserve the Context* in this one case. That way we can have
+    // our callers just pass in a unique_ptr<OpComp> and not deal with
+    // our signature in Objecter being different than the exposed
+    // signature in RADOS.
+    //
+    // Add a function for the linger case, where we want better
+    // semantics than Context, but still need to be under the completion_lock.
+    std::variant<std::unique_ptr<OpComp>, fu2::unique_function<OpSig>,
+                Context*> onfinish;
+    uint64_t ontimeout = 0;
 
-    int priority;
-    Context *onfinish;
-    uint64_t ontimeout;
-
-    ceph_tid_t tid;
-    int attempts;
+    ceph_tid_t tid = 0;
+    int attempts = 0;
 
     version_t *objver;
-    epoch_t *reply_epoch;
+    epoch_t *reply_epoch = nullptr;
 
     ceph::coarse_mono_time stamp;
 
-    epoch_t map_dne_bound;
+    epoch_t map_dne_bound = 0;
 
-    int budget;
+    int budget = -1;
 
     /// true if we should resend this message on failure
-    bool should_resend;
+    bool should_resend = true;
 
     /// true if the throttle budget is get/put on a series of OPs,
     /// instead of per OP basis, when this flag is set, the budget is
     /// acquired before sending the very first OP of the series and
     /// released upon receiving the last OP reply.
-    bool ctx_budgeted;
+    bool ctx_budgeted = false;
 
     int *data_offset;
 
     osd_reqid_t reqid; // explicitly setting reqid
     ZTracer::Trace trace;
 
-    Op(const object_t& o, const object_locator_t& ol, std::vector<OSDOp>& op,
-       int f, Context *fin, version_t *ov, int *offset = NULL,
+    static bool has_completion(decltype(onfinish)& f) {
+      return std::visit([](auto&& arg) { return bool(arg);}, f);
+    }
+    bool has_completion() {
+      return has_completion(onfinish);
+    }
+
+    static void complete(decltype(onfinish)&& f, boost::system::error_code ec,
+                        int r) {
+      std::visit([ec, r](auto&& arg) {
+                  if constexpr (std::is_same_v<std::decay_t<decltype(arg)>,
+                                Context*>) {
+                    arg->complete(r);
+                  } else if constexpr (std::is_same_v<std::decay_t<decltype(arg)>,
+                             fu2::unique_function<OpSig>>) {
+                    std::move(arg)(ec);
+                   } else {
+                    arg->defer(std::move(arg), ec);
+                  }
+                }, std::move(f));
+    }
+    void complete(boost::system::error_code ec, int r) {
+      complete(std::move(onfinish), ec, r);
+    }
+
+    Op(const object_t& o, const object_locator_t& ol, vector<OSDOp>&& _ops,
+       int f, std::unique_ptr<OpComp>&& fin,
+       version_t *ov, int *offset = nullptr,
        ZTracer::Trace *parent_trace = nullptr) :
-      session(NULL), incarnation(0),
       target(o, ol, f),
-      con(NULL),
-      features(CEPH_FEATURES_SUPPORTED_DEFAULT),
-      snapid(CEPH_NOSNAP),
-      outbl(NULL),
-      priority(0),
+      ops(std::move(_ops)),
+      out_bl(ops.size(), nullptr),
+      out_handler(ops.size()),
+      out_rval(ops.size(), nullptr),
+      out_ec(ops.size(), nullptr),
+      onfinish(std::move(fin)),
+      objver(ov),
+      data_offset(offset) {
+      if (target.base_oloc.key == o)
+       target.base_oloc.key.clear();
+      if (parent_trace && parent_trace->valid()) {
+        trace.init("op", nullptr, parent_trace);
+        trace.event("start");
+      }
+    }
+    Op(const object_t& o, const object_locator_t& ol, vector<OSDOp>&& _ops,
+       int f, Context* fin, version_t *ov, int *offset = nullptr,
+       ZTracer::Trace *parent_trace = nullptr) :
+      target(o, ol, f),
+      ops(std::move(_ops)),
+      out_bl(ops.size(), nullptr),
+      out_handler(ops.size()),
+      out_rval(ops.size(), nullptr),
+      out_ec(ops.size(), nullptr),
       onfinish(fin),
-      ontimeout(0),
-      tid(0),
-      attempts(0),
       objver(ov),
-      reply_epoch(NULL),
-      map_dne_bound(0),
-      budget(-1),
-      should_resend(true),
-      ctx_budgeted(false),
       data_offset(offset) {
-      ops.swap(op);
-
-      /* initialize out_* to match op std::vector */
-      out_bl.resize(ops.size());
-      out_rval.resize(ops.size());
-      out_handler.resize(ops.size());
-      for (unsigned i = 0; i < ops.size(); i++) {
-       out_bl[i] = NULL;
-       out_handler[i] = NULL;
-       out_rval[i] = NULL;
+      if (target.base_oloc.key == o)
+       target.base_oloc.key.clear();
+      if (parent_trace && parent_trace->valid()) {
+        trace.init("op", nullptr, parent_trace);
+        trace.event("start");
       }
+    }
 
+    Op(const object_t& o, const object_locator_t& ol,  vector<OSDOp>&& _ops,
+       int f, fu2::unique_function<OpSig>&& fin, version_t *ov, int *offset = nullptr,
+       ZTracer::Trace *parent_trace = nullptr) :
+      target(o, ol, f),
+      ops(std::move(_ops)),
+      out_bl(ops.size(), nullptr),
+      out_handler(ops.size()),
+      out_rval(ops.size(), nullptr),
+      out_ec(ops.size(), nullptr),
+      onfinish(std::move(fin)),
+      objver(ov),
+      data_offset(offset) {
       if (target.base_oloc.key == o)
        target.base_oloc.key.clear();
-
       if (parent_trace && parent_trace->valid()) {
         trace.init("op", nullptr, parent_trace);
         trace.event("start");
@@ -1545,10 +2017,6 @@ public:
 
   private:
     ~Op() override {
-      while (!out_handler.empty()) {
-       delete out_handler.back();
-       out_handler.pop_back();
-      }
       trace.event("finish");
     }
   };
@@ -1662,40 +2130,43 @@ public:
 
   struct PoolStatOp {
     ceph_tid_t tid;
-    std::list<std::string> pools;
-
-    std::map<std::string,pool_stat_t> *pool_stats;
-    bool *per_pool;
-    Context *onfinish;
-    uint64_t ontimeout;
-
+    std::vector<std::string> pools;
+    using OpSig = void(boost::system::error_code,
+                      boost::container::flat_map<std::string, pool_stat_t>,
+                      bool);
+    using OpComp = ceph::async::Completion<OpSig>;
+    std::unique_ptr<OpComp> onfinish;
+    std::uint64_t ontimeout;
     ceph::coarse_mono_time last_submit;
   };
 
   struct StatfsOp {
     ceph_tid_t tid;
-    struct ceph_statfs *stats;
     boost::optional<int64_t> data_pool;
-    Context *onfinish;
+    using OpSig = void(boost::system::error_code,
+                      const struct ceph_statfs);
+    using OpComp = ceph::async::Completion<OpSig>;
+
+    std::unique_ptr<OpComp> onfinish;
     uint64_t ontimeout;
 
     ceph::coarse_mono_time last_submit;
   };
 
   struct PoolOp {
-    ceph_tid_t tid;
-    int64_t pool;
+    ceph_tid_t tid = 0;
+    int64_t pool = 0;
     std::string name;
-    Context *onfinish;
-    uint64_t ontimeout;
-    int pool_op;
-    int16_t crush_rule;
-    snapid_t snapid;
-    ceph::buffer::list *blp;
-
+    using OpSig = void(boost::system::error_code, ceph::buffer::list);
+    using OpComp = ceph::async::Completion<OpSig>;
+    std::unique_ptr<OpComp> onfinish;
+    uint64_t ontimeout = 0;
+    int pool_op = 0;
+    int16_t crush_rule = 0;
+    snapid_t snapid = 0;
     ceph::coarse_mono_time last_submit;
-    PoolOp() : tid(0), pool(0), onfinish(NULL), ontimeout(0), pool_op(0),
-              crush_rule(0), snapid(0), blp(NULL) {}
+
+    PoolOp() {}
   };
 
   // -- osd commands --
@@ -1704,8 +2175,6 @@ public:
     ceph_tid_t tid = 0;
     std::vector<std::string> cmd;
     ceph::buffer::list inbl;
-    ceph::buffer::list *poutbl = nullptr;
-    std::string *prs = nullptr;
 
     // target_osd == -1 means target_pg is valid
     const int target_osd = -1;
@@ -1717,108 +2186,91 @@ public:
     int map_check_error = 0; // error to return if std::map check fails
     const char *map_check_error_str = nullptr;
 
-    Context *onfinish = nullptr;
+    using OpSig = void(boost::system::error_code, std::string,
+                      ceph::buffer::list);
+    using OpComp = ceph::async::Completion<OpSig>;
+    std::unique_ptr<OpComp> onfinish;
+
     uint64_t ontimeout = 0;
     ceph::coarse_mono_time last_submit;
 
     CommandOp(
       int target_osd,
-      const std::vector<std::string> &cmd,
-      ceph::buffer::list inbl,
-      ceph::buffer::list *poutbl,
-      std::string *prs,
-      Context *onfinish)
-      : cmd(cmd),
-       inbl(inbl),
-       poutbl(poutbl),
-       prs(prs),
+      std::vector<string>&& cmd,
+      ceph::buffer::list&& inbl,
+      decltype(onfinish)&& onfinish)
+      : cmd(std::move(cmd)),
+       inbl(std::move(inbl)),
        target_osd(target_osd),
-       onfinish(onfinish) {}
+       onfinish(std::move(onfinish)) {}
 
     CommandOp(
       pg_t pgid,
-      const std::vector<std::string> &cmd,
-      ceph::buffer::list inbl,
-      ceph::buffer::list *poutbl,
-      std::string *prs,
-      Context *onfinish)
-      : cmd(cmd),
-       inbl(inbl),
-       poutbl(poutbl),
-       prs(prs),
+      std::vector<string>&& cmd,
+      ceph::buffer::list&& inbl,
+      decltype(onfinish)&& onfinish)
+      : cmd(std::move(cmd)),
+       inbl(std::move(inbl)),
        target_pg(pgid),
        target(pgid),
-       onfinish(onfinish) {}
-
+       onfinish(std::move(onfinish)) {}
   };
 
   void submit_command(CommandOp *c, ceph_tid_t *ptid);
-  int _calc_command_target(CommandOp *c, shunique_lock &sul);
-  void _assign_command_session(CommandOp *c, shunique_lock &sul);
+  int _calc_command_target(CommandOp *c,
+                          ceph::shunique_lock<ceph::shared_mutex> &sul);
+  void _assign_command_session(CommandOp *c,
+                              ceph::shunique_lock<ceph::shared_mutex> &sul);
   void _send_command(CommandOp *c);
-  int command_op_cancel(OSDSession *s, ceph_tid_t tid, int r);
-  void _finish_command(CommandOp *c, int r, std::string rs);
+  int command_op_cancel(OSDSession *s, ceph_tid_t tid,
+                       boost::system::error_code ec);
+  void _finish_command(CommandOp *c, boost::system::error_code ec,
+                      std::string&& rs, ceph::buffer::list&& bl);
   void handle_command_reply(MCommandReply *m);
 
-
   // -- lingering ops --
 
-  struct WatchContext {
-    // this simply mirrors librados WatchCtx2
-    virtual void handle_notify(uint64_t notify_id,
-                              uint64_t cookie,
-                              uint64_t notifier_id,
-                              ceph::buffer::list& bl) = 0;
-    virtual void handle_error(uint64_t cookie, int err) = 0;
-    virtual ~WatchContext() {}
-  };
-
   struct LingerOp : public RefCountedObject {
-    uint64_t linger_id;
-
-    op_target_t target;
-
-    snapid_t snap;
+    Objecter *objecter;
+    uint64_t linger_id{0};
+    op_target_t target{object_t(), object_locator_t(), 0};
+    snapid_t snap{CEPH_NOSNAP};
     SnapContext snapc;
     ceph::real_time mtime;
 
     std::vector<OSDOp> ops;
     ceph::buffer::list inbl;
-    ceph::buffer::list *poutbl;
-    version_t *pobjver;
+    version_t *pobjver{nullptr};
 
-    bool is_watch;
+    bool is_watch{false};
     ceph::coarse_mono_time watch_valid_thru; ///< send time for last acked ping
-    int last_error;  ///< error from last failed ping|reconnect, if any
-    std::shared_mutex watch_lock;
-    using lock_guard = std::unique_lock<decltype(watch_lock)>;
-    using unique_lock = std::unique_lock<decltype(watch_lock)>;
-    using shared_lock = boost::shared_lock<decltype(watch_lock)>;
-    using shunique_lock = ceph::shunique_lock<decltype(watch_lock)>;
+    boost::system::error_code last_error;  ///< error from last failed ping|reconnect, if any
+    ceph::shared_mutex watch_lock;
 
     // queue of pending async operations, with the timestamp of
     // when they were queued.
     std::list<ceph::coarse_mono_time> watch_pending_async;
 
-    uint32_t register_gen;
-    bool registered;
-    bool canceled;
-    Context *on_reg_commit;
-
-    // we trigger these from an async finisher
-    Context *on_notify_finish;
-    ceph::buffer::list *notify_result_bl;
-    uint64_t notify_id;
-
-    WatchContext *watch_context;
-
-    OSDSession *session;
-
-    Objecter *objecter;
-    int ctx_budget;
-    ceph_tid_t register_tid;
-    ceph_tid_t ping_tid;
-    epoch_t map_dne_bound;
+    uint32_t register_gen{0};
+    bool registered{false};
+    bool canceled{false};
+    using OpSig = void(boost::system::error_code, ceph::buffer::list);
+    using OpComp = ceph::async::Completion<OpSig>;
+    std::unique_ptr<OpComp> on_reg_commit;
+    std::unique_ptr<OpComp> on_notify_finish;
+    uint64_t notify_id{0};
+
+    fu2::unique_function<void(boost::system::error_code,
+                             uint64_t notify_id,
+                             uint64_t cookie,
+                             uint64_t notifier_id,
+                             ceph::buffer::list&& bl)> handle;
+    OSDSession *session{nullptr};
+
+    int ctx_budget{-1};
+    ceph_tid_t register_tid{0};
+    ceph_tid_t ping_tid{0};
+    epoch_t map_dne_bound{0};
 
     void _queued_async() {
       // watch_lock ust be locked unique
@@ -1830,81 +2282,53 @@ public:
       watch_pending_async.pop_front();
     }
 
-    explicit LingerOp(Objecter *o) : linger_id(0),
-                           target(object_t(), object_locator_t(), 0),
-                           snap(CEPH_NOSNAP), poutbl(NULL), pobjver(NULL),
-                           is_watch(false), last_error(0),
-                           register_gen(0),
-                           registered(false),
-                           canceled(false),
-                           on_reg_commit(NULL),
-                           on_notify_finish(NULL),
-                           notify_result_bl(NULL),
-                           notify_id(0),
-                           watch_context(NULL),
-                           session(NULL),
-                           objecter(o),
-                           ctx_budget(-1),
-                           register_tid(0),
-                           ping_tid(0),
-                           map_dne_bound(0) {}
-
-    const LingerOp &operator=(const LingerOp& r) = delete;
+    explicit LingerOp(Objecter *o, uint64_t linger_id)
+      : objecter(o), linger_id(linger_id),
+       watch_lock(ceph::make_shared_mutex(
+                    fmt::format("LingerOp::watch_lock #{}", linger_id))) {}
+
+    const LingerOp& operator=(const LingerOp& r) = delete;
     LingerOp(const LingerOp& o) = delete;
 
     uint64_t get_cookie() {
       return reinterpret_cast<uint64_t>(this);
     }
-
-  private:
-    ~LingerOp() override {
-      delete watch_context;
-    }
   };
 
-  struct C_Linger_Commit : public Context {
+  struct CB_Linger_Commit {
     Objecter *objecter;
-    LingerOp *info;
+    boost::intrusive_ptr<LingerOp> info;
     ceph::buffer::list outbl;  // used for notify only
-    C_Linger_Commit(Objecter *o, LingerOp *l) : objecter(o), info(l) {
-      info->get();
-    }
-    ~C_Linger_Commit() override {
-      info->put();
-    }
-    void finish(int r) override {
-      objecter->_linger_commit(info, r, outbl);
+    CB_Linger_Commit(Objecter *o, LingerOp *l) : objecter(o), info(l) {}
+    ~CB_Linger_Commit() = default;
+
+    void operator()(boost::system::error_code ec) {
+      objecter->_linger_commit(info.get(), ec, outbl);
     }
   };
 
-  struct C_Linger_Reconnect : public Context {
+  struct CB_Linger_Reconnect {
     Objecter *objecter;
-    LingerOp *info;
-    C_Linger_Reconnect(Objecter *o, LingerOp *l) : objecter(o), info(l) {
-      info->get();
-    }
-    ~C_Linger_Reconnect() override {
-      info->put();
-    }
-    void finish(int r) override {
-      objecter->_linger_reconnect(info, r);
+    boost::intrusive_ptr<LingerOp> info;
+    CB_Linger_Reconnect(Objecter *o, LingerOp *l) : objecter(o), info(l) {}
+    ~CB_Linger_Reconnect() = default;
+
+    void operator()(boost::system::error_code ec) {
+      objecter->_linger_reconnect(info.get(), ec);
+      info.reset();
     }
   };
 
-  struct C_Linger_Ping : public Context {
+  struct CB_Linger_Ping {
     Objecter *objecter;
-    LingerOp *info;
+    boost::intrusive_ptr<LingerOp> info;
     ceph::coarse_mono_time sent;
     uint32_t register_gen;
-    C_Linger_Ping(Objecter *o, LingerOp *l)
-      : objecter(o), info(l), register_gen(info->register_gen) {
-      info->get();
-    }
-    ~C_Linger_Ping() override {
-      info->put();
-    }
-    void finish(int r) override {
-      objecter->_linger_ping(info, r, sent, register_gen);
+    CB_Linger_Ping(Objecter *o, LingerOp *l, ceph::coarse_mono_time s)
+      : objecter(o), info(l), sent(s), register_gen(info->register_gen) {}
+    void operator()(boost::system::error_code ec) {
+      objecter->_linger_ping(info.get(), ec, sent, register_gen);
+      info.reset();
     }
   };
 
@@ -1923,12 +2347,6 @@ public:
   };
 
   struct OSDSession : public RefCountedObject {
-    std::shared_mutex lock;
-    using lock_guard = std::lock_guard<decltype(lock)>;
-    using unique_lock = std::unique_lock<decltype(lock)>;
-    using shared_lock = boost::shared_lock<decltype(lock)>;
-    using shunique_lock = ceph::shunique_lock<decltype(lock)>;
-
     // pending ops
     std::map<ceph_tid_t,Op*> ops;
     std::map<uint64_t, LingerOp*> linger_ops;
@@ -1939,16 +2357,17 @@ public:
     std::map<uint64_t,OSDBackoff*> backoffs_by_id;
 
     int osd;
+    ceph::shared_mutex lock;
+
     int incarnation;
     ConnectionRef con;
     int num_locks;
     std::unique_ptr<std::mutex[]> completion_locks;
-    using unique_completion_lock = std::unique_lock<
-      decltype(completion_locks)::element_type>;
-
 
     OSDSession(CephContext *cct, int o) :
-      osd(o), incarnation(0), con(NULL),
+      osd(o), lock(ceph::make_shared_mutex(
+                    fmt::format("OSDSession::lock #{}", o))),
+      incarnation(0), con(NULL),
       num_locks(cct->_conf->objecter_completion_locks_per_session),
       completion_locks(new std::mutex[num_locks]) {}
 
@@ -1956,7 +2375,7 @@ public:
 
     bool is_homeless() { return (osd == -1); }
 
-    unique_completion_lock get_lock(object_t& oid);
+    std::unique_lock<std::mutex> get_lock(object_t& oid);
   };
   std::map<int,OSDSession*> osd_sessions;
 
@@ -1987,7 +2406,8 @@ public:
   std::map<ceph_tid_t,PoolOp*> pool_ops;
   std::atomic<unsigned> num_homeless_ops{0};
 
-  OSDSession *homeless_session;
+  OSDSession* homeless_session = new OSDSession(cct, -1);
+
 
   // ops waiting for an osdmap with a new pool or confirmation that
   // the pool does not exist (may be expanded to other uses later)
@@ -1995,7 +2415,9 @@ public:
   std::map<ceph_tid_t, Op*> check_latest_map_ops;
   std::map<ceph_tid_t, CommandOp*> check_latest_map_commands;
 
-  std::map<epoch_t,std::list< std::pair<Context*, int> > > waiting_for_map;
+  std::map<epoch_t,
+          std::vector<std::pair<std::unique_ptr<OpCompletion>,
+                                boost::system::error_code>>> waiting_for_map;
 
   ceph::timespan mon_timeout;
   ceph::timespan osd_timeout;
@@ -2028,7 +2450,7 @@ public:
   int _calc_target(op_target_t *t, Connection *con,
                   bool any_change = false);
   int _map_session(op_target_t *op, OSDSession **s,
-                  shunique_lock& lc);
+                  ceph::shunique_lock<ceph::shared_mutex>& lc);
 
   void _session_op_assign(OSDSession *s, Op *op);
   void _session_op_remove(OSDSession *s, Op *op);
@@ -2037,28 +2459,35 @@ public:
   void _session_command_op_assign(OSDSession *to, CommandOp *op);
   void _session_command_op_remove(OSDSession *from, CommandOp *op);
 
-  int _assign_op_target_session(Op *op, shunique_lock& lc,
+  int _assign_op_target_session(Op *op, ceph::shunique_lock<ceph::shared_mutex>& lc,
                                bool src_session_locked,
                                bool dst_session_locked);
-  int _recalc_linger_op_target(LingerOp *op, shunique_lock& lc);
-
-  void _linger_submit(LingerOp *info, shunique_lock& sul);
-  void _send_linger(LingerOp *info, shunique_lock& sul);
-  void _linger_commit(LingerOp *info, int r, ceph::buffer::list& outbl);
-  void _linger_reconnect(LingerOp *info, int r);
+  int _recalc_linger_op_target(LingerOp *op,
+                              ceph::shunique_lock<ceph::shared_mutex>& lc);
+
+  void _linger_submit(LingerOp *info,
+                     ceph::shunique_lock<ceph::shared_mutex>& sul);
+  void _send_linger(LingerOp *info,
+                   ceph::shunique_lock<ceph::shared_mutex>& sul);
+  void _linger_commit(LingerOp *info, boost::system::error_code ec,
+                     ceph::buffer::list& outbl);
+  void _linger_reconnect(LingerOp *info, boost::system::error_code ec);
   void _send_linger_ping(LingerOp *info);
-  void _linger_ping(LingerOp *info, int r, ceph::coarse_mono_time sent,
-                   uint32_t register_gen);
-  int _normalize_watch_error(int r);
+  void _linger_ping(LingerOp *info, boost::system::error_code ec,
+                   ceph::coarse_mono_time sent, uint32_t register_gen);
+  boost::system::error_code _normalize_watch_error(boost::system::error_code ec);
 
-  friend class C_DoWatchError;
+  friend class CB_DoWatchError;
 public:
-  void linger_callback_flush(Context *ctx) {
-    finisher->queue(ctx);
+  template<typename CT>
+  auto linger_callback_flush(CT&& ct) {
+    boost::asio::async_completion<CT, void(void)> init(ct);
+    boost::asio::defer(finish_strand, std::move(init.completion_handler));
+    return init.result.get();
   }
 
 private:
-  void _check_op_pool_dne(Op *op, unique_lock *sl);
+  void _check_op_pool_dne(Op *op, std::unique_lock<ceph::shared_mutex> *sl);
   void _send_op_map_check(Op *op);
   void _op_cancel_map_check(Op *op);
   void _check_linger_pool_dne(LingerOp *op, bool *need_unregister);
@@ -2069,9 +2498,11 @@ private:
   void _command_cancel_map_check(CommandOp *op);
 
   void _kick_requests(OSDSession *session, std::map<uint64_t, LingerOp *>& lresend);
-  void _linger_ops_resend(std::map<uint64_t, LingerOp *>& lresend, unique_lock& ul);
+  void _linger_ops_resend(std::map<uint64_t, LingerOp *>& lresend,
+                         std::unique_lock<ceph::shared_mutex>& ul);
 
-  int _get_session(int osd, OSDSession **session, shunique_lock& sul);
+  int _get_session(int osd, OSDSession **session,
+                  ceph::shunique_lock<ceph::shared_mutex>& sul);
   void put_session(OSDSession *s);
   void get_session(OSDSession *s);
   void _reopen_session(OSDSession *session);
@@ -2089,8 +2520,9 @@ private:
    * If throttle_op needs to throttle it will unlock client_lock.
    */
   int calc_op_budget(const std::vector<OSDOp>& ops);
-  void _throttle_op(Op *op, shunique_lock& sul, int op_size = 0);
-  int _take_op_budget(Op *op, shunique_lock& sul) {
+  void _throttle_op(Op *op, ceph::shunique_lock<ceph::shared_mutex>& sul,
+                   int op_size = 0);
+  int _take_op_budget(Op *op, ceph::shunique_lock<ceph::shared_mutex>& sul) {
     ceph_assert(sul && sul.mutex() == &rwlock);
     int op_budget = calc_op_budget(op->ops);
     if (keep_balanced_budget) {
@@ -2103,18 +2535,21 @@ private:
     return op_budget;
   }
   int take_linger_budget(LingerOp *info);
-  friend struct WatchContext; // to invoke put_up_budget_bytes
   void put_op_budget_bytes(int op_budget) {
     ceph_assert(op_budget >= 0);
     op_throttle_bytes.put(op_budget);
     op_throttle_ops.put(1);
   }
   void put_nlist_context_budget(NListContext *list_context);
-  Throttle op_throttle_bytes, op_throttle_ops;
-
+  Throttle op_throttle_bytes{cct, "objecter_bytes",
+                            static_cast<int64_t>(
+                              cct->_conf->objecter_inflight_op_bytes)};
+  Throttle op_throttle_ops{cct, "objecter_ops",
+                          static_cast<int64_t>(
+                            cct->_conf->objecter_inflight_ops)};
  public:
-  Objecter(CephContext *cct_, Messenger *m, MonClient *mc,
-          Finisher *fin,
+  Objecter(CephContext *cct, Messenger *m, MonClient *mc,
+          boost::asio::io_context& service,
           double mon_timeout,
           double osd_timeout);
   ~Objecter() override;
@@ -2166,7 +2601,7 @@ private:
     std::map<ceph_tid_t, Op*>& need_resend,
     std::list<LingerOp*>& need_resend_linger,
     std::map<ceph_tid_t, CommandOp*>& need_resend_command,
-    shunique_lock& sul);
+    ceph::shunique_lock<ceph::shared_mutex>& sul);
 
   int64_t get_object_hash_position(int64_t pool, const std::string& key,
                                   const std::string& ns);
@@ -2200,6 +2635,27 @@ private:
   void handle_osd_map(class MOSDMap *m);
   void wait_for_osd_map(epoch_t e=0);
 
+  template<typename CompletionToken>
+  auto wait_for_osd_map(CompletionToken&& token) {
+    boost::asio::async_completion<CompletionToken, void()> init(token);
+    unique_lock l(rwlock);
+    if (osdmap->get_epoch()) {
+      l.unlock();
+      boost::asio::dispatch(std::move(init.completion_handler));
+    } else {
+      waiting_for_map[0].emplace_back(
+       OpCompletion::create(
+         service.get_executor(),
+         [c = std::move(init.completion_handler)]
+         (boost::system::error_code) mutable {
+           std::move(c)();
+         }), boost::system::error_code{});
+      l.unlock();
+    }
+    return init.result.get();
+  }
+
+
   /**
    * Get std::list of entities blacklisted since this was last called,
    * and reset the std::list.
@@ -2224,15 +2680,17 @@ private:
                              const OSDMap &new_osd_map);
 
   // low-level
-  void _op_submit(Op *op, shunique_lock& lc, ceph_tid_t *ptid);
-  void _op_submit_with_budget(Op *op, shunique_lock& lc,
+  void _op_submit(Op *op, ceph::shunique_lock<ceph::shared_mutex>& lc,
+                 ceph_tid_t *ptid);
+  void _op_submit_with_budget(Op *op,
+                             ceph::shunique_lock<ceph::shared_mutex>& lc,
                              ceph_tid_t *ptid,
                              int *ctx_budget = NULL);
   // public interface
 public:
   void op_submit(Op *op, ceph_tid_t *ptid = NULL, int *ctx_budget = NULL);
   bool is_active() {
-    shared_lock l(rwlock);
+    std::shared_lock l(rwlock);
     return !((!inflight_ops) && linger_ops.empty() &&
             poolstat_ops.empty() && statfs_ops.empty());
   }
@@ -2258,11 +2716,87 @@ public:
   void set_client_incarnation(int inc) { client_inc = inc; }
 
   bool have_map(epoch_t epoch);
-  /// wait for epoch; true if we already have it
-  bool wait_for_map(epoch_t epoch, Context *c, int err=0);
-  void _wait_for_new_map(Context *c, epoch_t epoch, int err=0);
-  void wait_for_latest_osdmap(Context *fin);
-  void get_latest_version(epoch_t oldest, epoch_t neweset, Context *fin);
+
+  struct CB_Objecter_GetVersion {
+    Objecter *objecter;
+    std::unique_ptr<OpCompletion> fin;
+
+    CB_Objecter_GetVersion(Objecter *o, std::unique_ptr<OpCompletion> c)
+      : objecter(o), fin(std::move(c)) {}
+    void operator()(boost::system::error_code ec, version_t newest,
+                   version_t oldest) {
+      if (ec == boost::system::errc::resource_unavailable_try_again) {
+       // try again as instructed
+       objecter->wait_for_latest_osdmap(std::move(fin));
+      } else if (ec) {
+       ceph::async::post(std::move(fin), ec);
+      } else {
+       auto l = std::unique_lock(objecter->rwlock);
+       objecter->_get_latest_version(oldest, newest, std::move(fin),
+                                     std::move(l));
+      }
+    }
+  };
+
+  template<typename CompletionToken>
+  typename boost::asio::async_result<CompletionToken, OpSignature>::return_type
+  wait_for_map(epoch_t epoch, CompletionToken&& token) {
+    boost::asio::async_completion<CompletionToken, OpSignature> init(token);
+
+    if (osdmap->get_epoch() >= epoch) {
+      boost::asio::post(service,
+                       ceph::async::bind_handler(
+                         std::move(init.completion_handler),
+                         boost::system::error_code()));
+    } else {
+      monc->get_version("osdmap",
+                       CB_Objecter_GetVersion(
+                         this,
+                         OpCompletion::create(service.get_executor(),
+                                              std::move(init.completion_handler))));
+    }
+    return init.result.get();
+  }
+
+  void _wait_for_new_map(std::unique_ptr<OpCompletion>, epoch_t epoch,
+                        boost::system::error_code = {});
+
+  template<typename CompletionToken>
+  typename boost::asio::async_result<CompletionToken, OpSignature>::return_type
+  wait_for_latest_osdmap(CompletionToken&& token) {
+    boost::asio::async_completion<CompletionToken, OpSignature> init(token);
+
+    monc->get_version("osdmap",
+                     CB_Objecter_GetVersion(
+                       this,
+                       OpCompletion::create(service.get_executor(),
+                                            std::move(init.completion_handler))));
+    return init.result.get();
+  }
+
+  void wait_for_latest_osdmap(std::unique_ptr<OpCompletion> c) {
+    monc->get_version("osdmap",
+                     CB_Objecter_GetVersion(this, std::move(c)));
+  }
+
+  template<typename CompletionToken>
+  auto get_latest_version(epoch_t oldest, epoch_t newest,
+                         CompletionToken&& token) {
+    boost::asio::async_completion<CompletionToken, OpSignature> init(token);
+    {
+      std::unique_lock wl(rwlock);
+      _get_latest_version(oldest, newest,
+                         OpCompletion::create(
+                           service.get_executor(),
+                           std::move(init.completion_handler)),
+                         std::move(wl));
+    }
+    return init.result.get();
+  }
+
+  void _get_latest_version(epoch_t oldest, epoch_t neweset,
+                          std::unique_ptr<OpCompletion> fin,
+                          std::unique_lock<ceph::shared_mutex>&& ul);
 
   /** Get the current set of global op flags */
   int get_global_op_flags() const { return global_op_flags; }
@@ -2294,32 +2828,52 @@ public:
   epoch_t op_cancel_writes(int r, int64_t pool=-1);
 
   // commands
-  void osd_command(int osd, const std::vector<std::string>& cmd,
-                 const ceph::buffer::list& inbl, ceph_tid_t *ptid,
-                 ceph::buffer::list *poutbl, std::string *prs, Context *onfinish) {
+  void osd_command(int osd, std::vector<std::string> cmd,
+                  ceph::buffer::list inbl, ceph_tid_t *ptid,
+                  decltype(CommandOp::onfinish)&& onfinish) {
     ceph_assert(osd >= 0);
-    CommandOp *c = new CommandOp(
+    auto c = new CommandOp(
       osd,
-      cmd,
-      inbl,
-      poutbl,
-      prs,
-      onfinish);
+      std::move(cmd),
+      std::move(inbl),
+      std::move(onfinish));
     submit_command(c, ptid);
   }
-  void pg_command(pg_t pgid, const std::vector<std::string>& cmd,
-                const ceph::buffer::list& inbl, ceph_tid_t *ptid,
-                ceph::buffer::list *poutbl, std::string *prs, Context *onfinish) {
-    CommandOp *c = new CommandOp(
+  template<typename CompletionToken>
+  auto osd_command(int osd, std::vector<std::string> cmd,
+                  ceph::buffer::list inbl, ceph_tid_t *ptid,
+                  CompletionToken&& token) {
+    boost::asio::async_completion<CompletionToken,
+                                 CommandOp::OpSig> init(token);
+    osd_command(osd, std::move(cmd), std::move(inbl), ptid,
+               CommandOp::OpComp::create(service.get_executor(),
+                                         std::move(init.completion_handler)));
+    return init.result.get();
+  }
+
+  void pg_command(pg_t pgid, std::vector<std::string> cmd,
+                 ceph::buffer::list inbl, ceph_tid_t *ptid,
+                 decltype(CommandOp::onfinish)&& onfinish) {
+    auto *c = new CommandOp(
       pgid,
-      cmd,
-      inbl,
-      poutbl,
-      prs,
-      onfinish);
+      std::move(cmd),
+      std::move(inbl),
+      std::move(onfinish));
     submit_command(c, ptid);
   }
 
+  template<typename CompletionToken>
+  auto pg_command(pg_t pgid, std::vector<std::string> cmd,
+                 ceph::buffer::list inbl, ceph_tid_t *ptid,
+                 CompletionToken&& token) {
+    boost::asio::async_completion<CompletionToken,
+                                 CommandOp::OpSig> init(token);
+    pg_command(pgid, std::move(cmd), std::move(inbl), ptid,
+              CommandOp::OpComp::create(service.get_executor(),
+                                        std::move(init.completion_handler)));
+    return init.result.get();
+  }
+
   // mid-level helpers
   Op *prepare_mutate_op(
     const object_t& oid, const object_locator_t& oloc,
@@ -2328,15 +2882,18 @@ public:
     Context *oncommit, version_t *objver = NULL,
     osd_reqid_t reqid = osd_reqid_t(),
     ZTracer::Trace *parent_trace = nullptr) {
-    Op *o = new Op(oid, oloc, op.ops, flags | global_op_flags |
-                  CEPH_OSD_FLAG_WRITE, oncommit, objver, nullptr, parent_trace);
+    Op *o = new Op(oid, oloc, std::move(op.ops), flags | global_op_flags |
+                  CEPH_OSD_FLAG_WRITE, oncommit, objver,
+                  nullptr, parent_trace);
     o->priority = op.priority;
     o->mtime = mtime;
     o->snapc = snapc;
     o->out_rval.swap(op.out_rval);
     o->out_bl.swap(op.out_bl);
     o->out_handler.swap(op.out_handler);
+    o->out_ec.swap(op.out_ec);
     o->reqid = reqid;
+    op.clear();
     return o;
   }
   ceph_tid_t mutate(
@@ -2351,6 +2908,27 @@ public:
     op_submit(o, &tid);
     return tid;
   }
+
+  void mutate(const object_t& oid, const object_locator_t& oloc,
+             ObjectOperation&& op, const SnapContext& snapc,
+             ceph::real_time mtime, int flags,
+             std::unique_ptr<Op::OpComp>&& oncommit,
+             version_t *objver = NULL, osd_reqid_t reqid = osd_reqid_t()) {
+    Op *o = new Op(oid, oloc, std::move(op.ops), flags | global_op_flags |
+                  CEPH_OSD_FLAG_WRITE, std::move(oncommit), objver,
+                  nullptr);
+    o->priority = op.priority;
+    o->mtime = mtime;
+    o->snapc = snapc;
+    o->out_bl.swap(op.out_bl);
+    o->out_handler.swap(op.out_handler);
+    o->out_rval.swap(op.out_rval);
+    o->out_ec.swap(op.out_ec);
+    o->reqid = reqid;
+    op.clear();
+    op_submit(o);
+  }
+
   Op *prepare_read_op(
     const object_t& oid, const object_locator_t& oloc,
     ObjectOperation& op,
@@ -2359,8 +2937,9 @@ public:
     int *data_offset = NULL,
     uint64_t features = 0,
     ZTracer::Trace *parent_trace = nullptr) {
-    Op *o = new Op(oid, oloc, op.ops, flags | global_op_flags |
-                  CEPH_OSD_FLAG_READ, onack, objver, data_offset, parent_trace);
+    Op *o = new Op(oid, oloc, std::move(op.ops), flags | global_op_flags |
+                  CEPH_OSD_FLAG_READ, onack, objver,
+                  data_offset, parent_trace);
     o->priority = op.priority;
     o->snapid = snapid;
     o->outbl = pbl;
@@ -2369,6 +2948,8 @@ public:
     o->out_bl.swap(op.out_bl);
     o->out_handler.swap(op.out_handler);
     o->out_rval.swap(op.out_rval);
+    o->out_ec.swap(op.out_ec);
+    op.clear();
     return o;
   }
   ceph_tid_t read(
@@ -2386,13 +2967,38 @@ public:
     op_submit(o, &tid);
     return tid;
   }
+
+  void read(const object_t& oid, const object_locator_t& oloc,
+           ObjectOperation&& op, snapid_t snapid, ceph::buffer::list *pbl,
+           int flags, std::unique_ptr<Op::OpComp>&& onack,
+           version_t *objver = nullptr, int *data_offset = nullptr,
+           uint64_t features = 0) {
+    Op *o = new Op(oid, oloc, std::move(op.ops), flags | global_op_flags |
+                  CEPH_OSD_FLAG_READ, std::move(onack), objver,
+                  data_offset);
+    o->priority = op.priority;
+    o->snapid = snapid;
+    o->outbl = pbl;
+    if (!o->outbl && op.size() == 1 && op.out_bl[0]->length())
+       o->outbl = op.out_bl[0];
+    o->out_bl.swap(op.out_bl);
+    o->out_handler.swap(op.out_handler);
+    o->out_rval.swap(op.out_rval);
+    o->out_ec.swap(op.out_ec);
+    if (features)
+      o->features = features;
+    op.clear();
+    op_submit(o);
+  }
+
+
   Op *prepare_pg_read_op(
     uint32_t hash, object_locator_t oloc,
     ObjectOperation& op, ceph::buffer::list *pbl, int flags,
     Context *onack, epoch_t *reply_epoch,
     int *ctx_budget) {
     Op *o = new Op(object_t(), oloc,
-                  op.ops,
+                  std::move(op.ops),
                   flags | global_op_flags | CEPH_OSD_FLAG_READ |
                   CEPH_OSD_FLAG_IGNORE_OVERLAY,
                   onack, NULL);
@@ -2404,11 +3010,13 @@ public:
     o->out_bl.swap(op.out_bl);
     o->out_handler.swap(op.out_handler);
     o->out_rval.swap(op.out_rval);
+    o->out_ec.swap(op.out_ec);
     o->reply_epoch = reply_epoch;
     if (ctx_budget) {
       // budget is tracked by listing context
       o->ctx_budgeted = true;
     }
+    op.clear();
     return o;
   }
   ceph_tid_t pg_read(
@@ -2423,6 +3031,35 @@ public:
     return tid;
   }
 
+  ceph_tid_t pg_read(
+    uint32_t hash, object_locator_t oloc,
+    ObjectOperation& op, ceph::buffer::list *pbl, int flags,
+    std::unique_ptr<Op::OpComp>&& onack, epoch_t *reply_epoch, int *ctx_budget) {
+    ceph_tid_t tid;
+    Op *o = new Op(object_t(), oloc,
+                  std::move(op.ops),
+                  flags | global_op_flags | CEPH_OSD_FLAG_READ |
+                  CEPH_OSD_FLAG_IGNORE_OVERLAY,
+                  std::move(onack), nullptr);
+    o->target.precalc_pgid = true;
+    o->target.base_pgid = pg_t(hash, oloc.pool);
+    o->priority = op.priority;
+    o->snapid = CEPH_NOSNAP;
+    o->outbl = pbl;
+    o->out_bl.swap(op.out_bl);
+    o->out_handler.swap(op.out_handler);
+    o->out_rval.swap(op.out_rval);
+    o->out_ec.swap(op.out_ec);
+    o->reply_epoch = reply_epoch;
+    if (ctx_budget) {
+      // budget is tracked by listing context
+      o->ctx_budgeted = true;
+    }
+    op_submit(o, &tid, ctx_budget);
+    op.clear();
+    return tid;
+  }
+
   // caller owns a ref
   LingerOp *linger_register(const object_t& oid, const object_locator_t& oloc,
                            int flags);
@@ -2430,19 +3067,39 @@ public:
                          ObjectOperation& op,
                          const SnapContext& snapc, ceph::real_time mtime,
                          ceph::buffer::list& inbl,
-                         Context *onfinish,
+                         decltype(info->on_reg_commit)&& oncommit,
                          version_t *objver);
+  ceph_tid_t linger_watch(LingerOp *info,
+                         ObjectOperation& op,
+                         const SnapContext& snapc, ceph::real_time mtime,
+                         ceph::buffer::list& inbl,
+                         Context* onfinish,
+                         version_t *objver) {
+    return linger_watch(info, op, snapc, mtime, inbl,
+                       OpContextVert<ceph::buffer::list>(onfinish, nullptr), objver);
+  }
   ceph_tid_t linger_notify(LingerOp *info,
                           ObjectOperation& op,
                           snapid_t snap, ceph::buffer::list& inbl,
-                          ceph::buffer::list *poutbl,
-                          Context *onack,
+                          decltype(LingerOp::on_reg_commit)&& onfinish,
                           version_t *objver);
-  int linger_check(LingerOp *info);
+  ceph_tid_t linger_notify(LingerOp *info,
+                          ObjectOperation& op,
+                          snapid_t snap, ceph::buffer::list& inbl,
+                          ceph::buffer::list *poutbl,
+                          Context* onack,
+                          version_t *objver) {
+    return linger_notify(info, op, snap, inbl,
+                        OpContextVert(onack, poutbl),
+                        objver);
+  }
+  tl::expected<ceph::timespan,
+              boost::system::error_code> linger_check(LingerOp *info);
   void linger_cancel(LingerOp *info);  // releases a reference
   void _linger_cancel(LingerOp *info);
 
-  void _do_watch_notify(LingerOp *info, MWatchNotify *m);
+  void _do_watch_notify(boost::intrusive_ptr<LingerOp> info,
+                        boost::intrusive_ptr<MWatchNotify> m);
 
   /**
    * set up initial ops in the op std::vector, and allocate a final op slot.
@@ -2481,7 +3138,7 @@ public:
     int i = init_ops(ops, 1, extra_ops);
     ops[i].op.op = CEPH_OSD_OP_STAT;
     C_Stat *fin = new C_Stat(psize, pmtime, onfinish);
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_READ, fin, objver);
     o->snapid = snap;
     o->outbl = &fin->bl;
@@ -2513,8 +3170,9 @@ public:
     ops[i].op.extent.truncate_size = 0;
     ops[i].op.extent.truncate_seq = 0;
     ops[i].op.flags = op_flags;
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
-                  CEPH_OSD_FLAG_READ, onfinish, objver, nullptr, parent_trace);
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
+                  CEPH_OSD_FLAG_READ, onfinish, objver,
+                  nullptr, parent_trace);
     o->snapid = snap;
     o->outbl = pbl;
     return o;
@@ -2545,7 +3203,7 @@ public:
     ops[i].op.extent.truncate_seq = 0;
     ops[i].indata = cmp_bl;
     ops[i].op.flags = op_flags;
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_READ, onfinish, objver);
     o->snapid = snap;
     return o;
@@ -2577,7 +3235,7 @@ public:
     ops[i].op.extent.truncate_size = trunc_size;
     ops[i].op.extent.truncate_seq = trunc_seq;
     ops[i].op.flags = op_flags;
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_READ, onfinish, objver);
     o->snapid = snap;
     o->outbl = pbl;
@@ -2596,7 +3254,7 @@ public:
     ops[i].op.extent.length = len;
     ops[i].op.extent.truncate_size = 0;
     ops[i].op.extent.truncate_seq = 0;
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_READ, onfinish, objver);
     o->snapid = snap;
     o->outbl = pbl;
@@ -2615,7 +3273,7 @@ public:
     ops[i].op.xattr.value_len = 0;
     if (name)
       ops[i].indata.append(name, ops[i].op.xattr.name_len);
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_READ, onfinish, objver);
     o->snapid = snap;
     o->outbl = pbl;
@@ -2632,7 +3290,7 @@ public:
     int i = init_ops(ops, 1, extra_ops);
     ops[i].op.op = CEPH_OSD_OP_GETXATTRS;
     C_GetAttrs *fin = new C_GetAttrs(attrset, onfinish);
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_READ, fin, objver);
     o->snapid = snap;
     o->outbl = &fin->bl;
@@ -2656,7 +3314,7 @@ public:
                     const SnapContext& snapc, int flags,
                     Context *oncommit,
                     version_t *objver = NULL) {
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_WRITE, oncommit, objver);
     o->mtime = mtime;
     o->snapc = snapc;
@@ -2680,8 +3338,8 @@ public:
     ops[i].op.extent.truncate_seq = 0;
     ops[i].indata = bl;
     ops[i].op.flags = op_flags;
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
-                  CEPH_OSD_FLAG_WRITE, oncommit, objver,
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
+                  CEPH_OSD_FLAG_WRITE, std::move(oncommit), objver,
                    nullptr, parent_trace);
     o->mtime = mtime;
     o->snapc = snapc;
@@ -2714,7 +3372,7 @@ public:
     ops[i].op.extent.truncate_size = 0;
     ops[i].op.extent.truncate_seq = 0;
     ops[i].indata = bl;
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_WRITE, oncommit, objver);
     o->mtime = mtime;
     o->snapc = snapc;
@@ -2749,7 +3407,7 @@ public:
     ops[i].op.extent.truncate_seq = trunc_seq;
     ops[i].indata = bl;
     ops[i].op.flags = op_flags;
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_WRITE, oncommit, objver);
     o->mtime = mtime;
     o->snapc = snapc;
@@ -2770,7 +3428,7 @@ public:
     ops[i].op.extent.length = bl.length();
     ops[i].indata = bl;
     ops[i].op.flags = op_flags;
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_WRITE, oncommit, objver);
     o->mtime = mtime;
     o->snapc = snapc;
@@ -2804,7 +3462,7 @@ public:
     ops[i].op.writesame.data_length = bl.length();
     ops[i].indata = bl;
     ops[i].op.flags = op_flags;
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_WRITE, oncommit, objver);
     o->mtime = mtime;
     o->snapc = snapc;
@@ -2837,7 +3495,7 @@ public:
     ops[i].op.extent.offset = trunc_size;
     ops[i].op.extent.truncate_size = trunc_size;
     ops[i].op.extent.truncate_seq = trunc_seq;
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_WRITE, oncommit, objver);
     o->mtime = mtime;
     o->snapc = snapc;
@@ -2854,7 +3512,7 @@ public:
     ops[i].op.op = CEPH_OSD_OP_ZERO;
     ops[i].op.extent.offset = off;
     ops[i].op.extent.length = len;
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_WRITE, oncommit, objver);
     o->mtime = mtime;
     o->snapc = snapc;
@@ -2871,7 +3529,7 @@ public:
     int i = init_ops(ops, 1, extra_ops);
     ops[i].op.op = CEPH_OSD_OP_ROLLBACK;
     ops[i].op.snap.snapid = snapid;
-    Op *o = new Op(oid, oloc, ops, CEPH_OSD_FLAG_WRITE, oncommit, objver);
+    Op *o = new Op(oid, oloc, std::move(ops), CEPH_OSD_FLAG_WRITE, oncommit, objver);
     o->mtime = mtime;
     o->snapc = snapc;
     ceph_tid_t tid;
@@ -2887,7 +3545,7 @@ public:
     int i = init_ops(ops, 1, extra_ops);
     ops[i].op.op = CEPH_OSD_OP_CREATE;
     ops[i].op.flags = create_flags;
-    Op *o = new Op(oid, oloc, ops, global_flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), global_flags | global_op_flags |
                   CEPH_OSD_FLAG_WRITE, oncommit, objver);
     o->mtime = mtime;
     o->snapc = snapc;
@@ -2903,7 +3561,7 @@ public:
     std::vector<OSDOp> ops;
     int i = init_ops(ops, 1, extra_ops);
     ops[i].op.op = CEPH_OSD_OP_DELETE;
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_WRITE, oncommit, objver);
     o->mtime = mtime;
     o->snapc = snapc;
@@ -2934,8 +3592,9 @@ public:
     if (name)
       ops[i].indata.append(name, ops[i].op.xattr.name_len);
     ops[i].indata.append(bl);
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
-                  CEPH_OSD_FLAG_WRITE, oncommit, objver);
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
+                  CEPH_OSD_FLAG_WRITE, oncommit,
+                  objver);
     o->mtime = mtime;
     o->snapc = snapc;
     ceph_tid_t tid;
@@ -2954,7 +3613,7 @@ public:
     ops[i].op.xattr.value_len = 0;
     if (name)
       ops[i].indata.append(name, ops[i].op.xattr.name_len);
-    Op *o = new Op(oid, oloc, ops, flags | global_op_flags |
+    Op *o = new Op(oid, oloc, std::move(ops), flags | global_op_flags |
                   CEPH_OSD_FLAG_WRITE, oncommit, objver);
     o->mtime = mtime;
     o->snapc = snapc;
@@ -2970,29 +3629,30 @@ public:
 
   hobject_t enumerate_objects_begin();
   hobject_t enumerate_objects_end();
-  //hobject_t enumerate_objects_begin(int n, int m);
+
+  template<typename T>
+  friend struct EnumerationContext;
+  template<typename T>
+  friend struct CB_EnumerateReply;
+  template<typename T>
   void enumerate_objects(
     int64_t pool_id,
-    const std::string &ns,
-    const hobject_t &start,
-    const hobject_t &end,
+    std::string_view ns,
+    hobject_t start,
+    hobject_t end,
     const uint32_t max,
-    const ceph::buffer::list &filter_bl,
-    std::list<librados::ListObjectImpl> *result, 
-    hobject_t *next,
-    Context *on_finish);
-
+    const ceph::buffer::list& filter_bl,
+    fu2::unique_function<void(boost::system::error_code,
+                             std::vector<T>,
+                             hobject_t) &&> on_finish);
+  template<typename T>
+  void _issue_enumerate(hobject_t start,
+                       std::unique_ptr<EnumerationContext<T>>);
+  template<typename T>
   void _enumerate_reply(
-      ceph::buffer::list &bl,
-      int r,
-      const hobject_t &end,
-      const int64_t pool_id,
-      int budget,
-      epoch_t reply_epoch,
-      std::list<librados::ListObjectImpl> *result, 
-      hobject_t *next,
-      Context *on_finish);
-  friend class C_EnumerateReply;
+    ceph::buffer::list&& bl,
+    boost::system::error_code ec,
+    std::unique_ptr<EnumerationContext<T>>&& ectx);
 
   // -------------------------
   // pool ops
@@ -3000,18 +3660,66 @@ private:
   void pool_op_submit(PoolOp *op);
   void _pool_op_submit(PoolOp *op);
   void _finish_pool_op(PoolOp *op, int r);
-  void _do_delete_pool(int64_t pool, Context *onfinish);
-public:
-  int create_pool_snap(int64_t pool, std::string& snapName, Context *onfinish);
-  int allocate_selfmanaged_snap(int64_t pool, snapid_t *psnapid,
-                               Context *onfinish);
-  int delete_pool_snap(int64_t pool, std::string& snapName, Context *onfinish);
-  int delete_selfmanaged_snap(int64_t pool, snapid_t snap, Context *onfinish);
+  void _do_delete_pool(int64_t pool,
+                      decltype(PoolOp::onfinish)&& onfinish);
 
-  int create_pool(std::string& name, Context *onfinish,
-                 int crush_rule=-1);
-  int delete_pool(int64_t pool, Context *onfinish);
-  int delete_pool(const std::string& name, Context *onfinish);
+public:
+  void create_pool_snap(int64_t pool, std::string_view snapName,
+                       decltype(PoolOp::onfinish)&& onfinish);
+  void create_pool_snap(int64_t pool, std::string_view snapName,
+                       Context* c) {
+    create_pool_snap(pool, snapName,
+                    OpContextVert<ceph::buffer::list>(c, nullptr));
+  }
+  void allocate_selfmanaged_snap(int64_t pool,
+                                std::unique_ptr<ceph::async::Completion<
+                                void(boost::system::error_code,
+                                     snapid_t)>> onfinish);
+  void allocate_selfmanaged_snap(int64_t pool, snapid_t* psnapid,
+                                Context* c) {
+    allocate_selfmanaged_snap(pool,
+                             OpContextVert(c, psnapid));
+  }
+  void delete_pool_snap(int64_t pool, std::string_view snapName,
+                       decltype(PoolOp::onfinish)&& onfinish);
+  void delete_pool_snap(int64_t pool, std::string_view snapName,
+                       Context* c) {
+    delete_pool_snap(pool, snapName,
+                    OpContextVert<ceph::buffer::list>(c, nullptr));
+  }
+
+  void delete_selfmanaged_snap(int64_t pool, snapid_t snap,
+                              decltype(PoolOp::onfinish)&& onfinish);
+  void delete_selfmanaged_snap(int64_t pool, snapid_t snap,
+                              Context* c) {
+    delete_selfmanaged_snap(pool, snap,
+                           OpContextVert<ceph::buffer::list>(c, nullptr));
+  }
+
+
+  void create_pool(std::string_view name,
+                  decltype(PoolOp::onfinish)&& onfinish,
+                  int crush_rule=-1);
+  void create_pool(std::string_view name, Context *onfinish,
+                 int crush_rule=-1) {
+    create_pool(name,
+               OpContextVert<ceph::buffer::list>(onfinish, nullptr),
+               crush_rule);
+  }
+  void delete_pool(int64_t pool,
+                  decltype(PoolOp::onfinish)&& onfinish);
+  void delete_pool(int64_t pool,
+                  Context* onfinish) {
+    delete_pool(pool, OpContextVert<ceph::buffer::list>(onfinish, nullptr));
+  }
+
+  void delete_pool(std::string_view name,
+                  decltype(PoolOp::onfinish)&& onfinish);
+
+  void delete_pool(std::string_view name,
+                  Context* onfinish) {
+    delete_pool(name, OpContextVert<ceph::buffer::list>(onfinish, nullptr));
+  }
 
   void handle_pool_op_reply(MPoolOpReply *m);
   int pool_op_cancel(ceph_tid_t tid, int r);
@@ -3022,10 +3730,19 @@ private:
   void _poolstat_submit(PoolStatOp *op);
 public:
   void handle_get_pool_stats_reply(MGetPoolStatsReply *m);
-  void get_pool_stats(std::list<std::string>& pools,
-                     std::map<std::string,pool_stat_t> *result,
-                     bool *per_pool,
-                     Context *onfinish);
+  void get_pool_stats(const std::vector<std::string>& pools,
+                     decltype(PoolStatOp::onfinish)&& onfinish);
+  template<typename CompletionToken>
+  auto get_pool_stats(const std::vector<std::string>& pools,
+                     CompletionToken&& token) {
+    boost::asio::async_completion<CompletionToken,
+                                 PoolStatOp::OpSig> init(token);
+    get_pool_stats(pools,
+                  PoolStatOp::OpComp::create(
+                    service.get_executor(),
+                    std::move(init.completion_handler)));
+    return init.result.get();
+  }
   int pool_stat_op_cancel(ceph_tid_t tid, int r);
   void _finish_pool_stat_op(PoolStatOp *op, int r);
 
@@ -3035,8 +3752,21 @@ private:
   void _fs_stats_submit(StatfsOp *op);
 public:
   void handle_fs_stats_reply(MStatfsReply *m);
+  void get_fs_stats(boost::optional<int64_t> poolid,
+                   decltype(StatfsOp::onfinish)&& onfinish);
+  template<typename CompletionToken>
+  auto get_fs_stats(boost::optional<int64_t> poolid,
+                   CompletionToken&& token) {
+    boost::asio::async_completion<CompletionToken, StatfsOp::OpSig> init(token);
+    get_fs_stats(poolid,
+                StatfsOp::OpComp::create(service.get_executor(),
+                                         std::move(init.completion_handler)));
+    return init.result.get();
+  }
   void get_fs_stats(struct ceph_statfs& result, boost::optional<int64_t> poolid,
-                   Context *onfinish);
+                   Context *onfinish) {
+    get_fs_stats(poolid, OpContextVert(onfinish, result));
+  }
   int statfs_op_cancel(ceph_tid_t tid, int r);
   void _finish_statfs_op(StatfsOp *op, int r);
 
@@ -3139,7 +3869,9 @@ public:
 
 private:
   epoch_t epoch_barrier = 0;
-  bool retry_writes_after_first_reply;
+  bool retry_writes_after_first_reply =
+    cct->_conf->objecter_retry_writes_after_first_reply;
+
 public:
   void set_epoch_barrier(epoch_t epoch);
 
index 14962e84d27a3bbf6024688db20cfbadaf5da42c..135794da9f7f8a713ebf4e7cb628f08f2a703b24 100644 (file)
@@ -190,10 +190,13 @@ target_link_libraries(unittest_librados_config
   librados
   ${BLKID_LIBRARIES} ${GSSAPI_LIBRARIES} ${OPENLDAP_LIBRARIES})
 
-add_executable(ceph_test_rados_completion_speed
-  completion_speed.cc)
-target_link_libraries(ceph_test_rados_completion_speed
-  librados ${UNITTEST_LIBS} radostest-cxx)
+# Removing this test. We can't shove it into Finisher as it's not a
+# Context any more, and wrapping it to adapt it would be less fair.
+
+#add_executable(ceph_test_rados_completion_speed
+#  completion_speed.cc)
+#target_link_libraries(ceph_test_rados_completion_speed
+#  librados ${UNITTEST_LIBS} radostest-cxx)
 
 add_executable(ceph_test_rados_op_speed
   op_speed.cc)
index d62b0d7104a736f40a3d9707e1b271d3e4af5a98..613a18f247e175b4f648126d72a228b8a7dc9552 100644 (file)
@@ -263,7 +263,7 @@ class ClientStub : public TestStub
     dout(10) << "ClientStub::" << __func__ << " starting messenger at "
            << messenger->get_myaddrs() << dendl;
 
-    objecter.reset(new Objecter(cct, messenger.get(), &monc, NULL, 0, 0));
+    objecter.reset(new Objecter(cct, messenger.get(), &monc, poolctx, 0, 0));
     ceph_assert(objecter.get() != NULL);
     objecter->set_balanced_budget();
 
index 3e867aa735f277b777eae0a9c6ab6f0422f41f8c..cc4de9ff1b8b2586efbc38b48f7cf45beb0ea58e 100644 (file)
@@ -28,7 +28,7 @@ MDSUtility::MDSUtility() :
   monc = new MonClient(g_ceph_context, poolctx);
   messenger = Messenger::create_client_messenger(g_ceph_context, "mds");
   fsmap = new FSMap();
-  objecter = new Objecter(g_ceph_context, messenger, monc, NULL, 0, 0);
+  objecter = new Objecter(g_ceph_context, messenger, monc, poolctx, 0, 0);
 }