]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
cls/fifo: Use neorados interface
authorAdam C. Emerson <aemerson@redhat.com>
Tue, 4 Feb 2020 16:33:48 +0000 (11:33 -0500)
committerAdam C. Emerson <aemerson@redhat.com>
Wed, 9 Sep 2020 02:09:40 +0000 (22:09 -0400)
Rewrite around the asynchronous ASIO-based RADOS interface, and
include support for calls from multiple threads.

`allocate_unique` pattern contributed by
Casey Bodley <cbodley@redhat.com>

Co-authored-by: Casey Bodley <cbodley@redhat.com>
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
16 files changed:
src/cls/CMakeLists.txt
src/cls/fifo/cls_fifo.cc
src/cls/fifo/cls_fifo_client.cc [deleted file]
src/cls/fifo/cls_fifo_client.h [deleted file]
src/cls/fifo/cls_fifo_ops.cc [deleted file]
src/cls/fifo/cls_fifo_ops.h
src/cls/fifo/cls_fifo_types.cc [deleted file]
src/cls/fifo/cls_fifo_types.h
src/common/options.cc
src/neorados/CMakeLists.txt
src/neorados/cls/fifo.cc [new file with mode: 0644]
src/neorados/cls/fifo.h [new file with mode: 0644]
src/test/CMakeLists.txt
src/test/cls_fifo/CMakeLists.txt
src/test/cls_fifo/bench_cls_fifo.cc [new file with mode: 0644]
src/test/cls_fifo/test_cls_fifo.cc

index a100dd04b68e380de007deed70ea0b2c3f19bc0a..a18b6bf3ea76a1af7b18252737cdc18dd62fc14d 100644 (file)
@@ -329,7 +329,7 @@ add_library(cls_2pc_queue_client STATIC ${cls_2pc_queue_client_srcs})
 add_subdirectory(cmpomap)
 
 # cls_fifo
-set(cls_fifo_srcs fifo/cls_fifo.cc fifo/cls_fifo_types.cc)
+set(cls_fifo_srcs fifo/cls_fifo.cc)
 add_library(cls_fifo SHARED ${cls_fifo_srcs})
 set_target_properties(cls_fifo PROPERTIES
   VERSION "1.0.0"
@@ -337,9 +337,3 @@ set_target_properties(cls_fifo PROPERTIES
   INSTALL_RPATH ""
   CXX_VISIBILITY_PRESET hidden)
 install(TARGETS cls_fifo DESTINATION ${cls_dir})
-
-set(cls_fifo_client_srcs
-  fifo/cls_fifo_client.cc
-  fifo/cls_fifo_types.cc
-  fifo/cls_fifo_ops.cc)
-add_library(cls_fifo_client STATIC ${cls_fifo_client_srcs})
index 553ad303573982e29691d92cee936e2e81df7da0..baa94dc8eb830cee037fa0261f8dc00ac8273406 100644 (file)
@@ -8,26 +8,32 @@
  *
  */
 
-#include <errno.h>
+#include <cerrno>
+#include <optional>
+#include <string>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
+#include "include/buffer.h"
+#include "include/types.h"
 
 #include "objclass/objclass.h"
 
 #include "cls/fifo/cls_fifo_ops.h"
 #include "cls/fifo/cls_fifo_types.h"
 
-
-using namespace rados::cls::fifo;
-
-
 CLS_VER(1,0)
 CLS_NAME(fifo)
 
+namespace rados::cls::fifo {
 
-#define CLS_FIFO_MAX_PART_HEADER_SIZE 512
+static constexpr auto CLS_FIFO_MAX_PART_HEADER_SIZE = 512;
 
-static uint32_t part_entry_overhead;
+static std::uint32_t part_entry_overhead;
 
-struct cls_fifo_entry_header_pre {
+struct entry_header_pre {
   __le64 magic;
   __le64 pre_size;
   __le64 header_size;
@@ -36,66 +42,64 @@ struct cls_fifo_entry_header_pre {
   __le32 reserved;
 } __attribute__ ((packed));
 
-struct cls_fifo_entry_header {
+struct entry_header {
   ceph::real_time mtime;
 
-  void encode(bufferlist &bl) const {
+  void encode(ceph::buffer::list& bl) const {
     ENCODE_START(1, 1, bl);
     encode(mtime, bl);
     ENCODE_FINISH(bl);
   }
-  void decode(bufferlist::const_iterator &bl) {
+  void decode(ceph::buffer::list::const_iterator& bl) {
     DECODE_START(1, bl);
     decode(mtime, bl);
     DECODE_FINISH(bl);
   }
 };
-WRITE_CLASS_ENCODER(cls_fifo_entry_header)
+WRITE_CLASS_ENCODER(entry_header)
 
+namespace {
 
-static string new_oid_prefix(string id, std::optional<string>& val)
+std::string new_oid_prefix(std::string id, std::optional<std::string>& val)
 {
+  static constexpr auto PREFIX_RND_SIZE = 12;
   if (val) {
     return *val;
   }
 
-#define PREFIX_RND_SIZE 12
-
   char buf[PREFIX_RND_SIZE + 1];
   buf[PREFIX_RND_SIZE] = 0;
 
   cls_gen_rand_base64(buf, sizeof(buf) - 1);
 
-  char s[id.size() + 1 + sizeof(buf) + 16];
-  snprintf(s, sizeof(s), "%s.%s", id.c_str(), buf);
-  return s;
+  return fmt::format("{}.{}", id, buf);
 }
 
-static int write_header(cls_method_context_t hctx,
-                        fifo_info_t& header,
-                        bool inc_ver = true)
+int write_header(cls_method_context_t hctx,
+                info& header,
+                bool inc_ver = true)
 {
-  if (header.objv.instance.empty()) {
-#define HEADER_INSTANCE_SIZE 16
-  char buf[HEADER_INSTANCE_SIZE + 1];
-  buf[HEADER_INSTANCE_SIZE] = 0;
-  cls_gen_rand_base64(buf, sizeof(buf) - 1);
-
-    header.objv.instance = buf;
+  static constexpr auto HEADER_INSTANCE_SIZE = 16;
+  if (header.version.instance.empty()) {
+    char buf[HEADER_INSTANCE_SIZE + 1];
+    buf[HEADER_INSTANCE_SIZE] = 0;
+    cls_gen_rand_base64(buf, sizeof(buf) - 1);
+    header.version.instance = buf;
   }
   if (inc_ver) {
-    ++header.objv.ver;
+    ++header.version.ver;
   }
-  bufferlist bl;
+  ceph::buffer::list bl;
   encode(header, bl);
   return cls_cxx_write_full(hctx, &bl);
 }
 
-static int read_part_header(cls_method_context_t hctx,
-                            fifo_part_header_t *part_header)
+int read_part_header(cls_method_context_t hctx,
+                    part_header* part_header)
 {
-  bufferlist bl;
-  int r = cls_cxx_read2(hctx, 0, CLS_FIFO_MAX_PART_HEADER_SIZE, &bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
+  ceph::buffer::list bl;
+  int r = cls_cxx_read2(hctx, 0, CLS_FIFO_MAX_PART_HEADER_SIZE, &bl,
+                       CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
   if (r < 0) {
     CLS_ERR("ERROR: %s(): cls_cxx_read2() on obj returned %d", __func__, r);
     return r;
@@ -104,34 +108,40 @@ static int read_part_header(cls_method_context_t hctx,
   auto iter = bl.cbegin();
   try {
     decode(*part_header, iter);
-  } catch (buffer::error& err) {
+  } catch (const ceph::buffer::error& err) {
     CLS_ERR("ERROR: %s(): failed decoding part header", __func__);
     return -EIO;
   }
 
-  CLS_LOG(20, "%s():%d read part_header:\n"
-           "\ttag=%s\n"
-           "\tmagic=0x%llx\n"
-           "\tmin_ofs=%lld\n"
-           "\tmax_ofs=%lld\n"
-           "\tmin_index=%lld\n"
-           "\tmax_index=%lld\n",
-           __func__, __LINE__,
-           part_header->tag.c_str(), 
-           (long long)part_header->magic,
-           (long long)part_header->min_ofs,
-           (long long)part_header->max_ofs,
-           (long long)part_header->min_index,
-           (long long)part_header->max_index);
+  using ceph::operator <<;
+  std::ostringstream ss;
+  ss << part_header->max_time;
+  CLS_LOG(10, "%s():%d read part_header:\n"
+         "\ttag=%s\n"
+         "\tmagic=0x%" PRIx64 "\n"
+         "\tmin_ofs=%" PRId64 "\n"
+         "\tlast_ofs=%" PRId64 "\n"
+         "\tnext_ofs=%" PRId64 "\n"
+         "\tmin_index=%" PRId64 "\n"
+         "\tmax_index=%" PRId64 "\n"
+         "\tmax_time=%s\n",
+         __func__, __LINE__,
+         part_header->tag.c_str(),
+         part_header->magic,
+         part_header->min_ofs,
+         part_header->last_ofs,
+         part_header->next_ofs,
+         part_header->min_index,
+         part_header->max_index,
+         ss.str().c_str());
 
   return 0;
-
 }
 
-static int write_part_header(cls_method_context_t hctx,
-                             fifo_part_header_t& part_header)
+int write_part_header(cls_method_context_t hctx,
+                     part_header& part_header)
 {
-  bufferlist bl;
+  ceph::buffer::list bl;
   encode(part_header, bl);
 
   if (bl.length() > CLS_FIFO_MAX_PART_HEADER_SIZE) {
@@ -140,7 +150,7 @@ static int write_part_header(cls_method_context_t hctx,
   }
 
   int r = cls_cxx_write2(hctx, 0, bl.length(),
-                     &bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
+                        &bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
   if (r < 0) {
     CLS_LOG(10, "%s(): failed to write part header: r=%d",
             __func__, r);
@@ -150,11 +160,11 @@ static int write_part_header(cls_method_context_t hctx,
   return 0;
 }
 
-static int read_header(cls_method_context_t hctx,
-                       std::optional<fifo_objv_t> objv,
-                       fifo_info_t *info)
+int read_header(cls_method_context_t hctx,
+               std::optional<objv> objv,
+               info* info)
 {
-  uint64_t size;
+  std::uint64_t size;
 
   int r = cls_cxx_stat2(hctx, &size, nullptr);
   if (r < 0) {
@@ -162,47 +172,64 @@ static int read_header(cls_method_context_t hctx,
     return r;
   }
 
-  bufferlist bl;
+  ceph::buffer::list bl;
   r = cls_cxx_read2(hctx, 0, size, &bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
   if (r < 0) {
     CLS_ERR("ERROR: %s(): cls_cxx_read2() on obj returned %d", __func__, r);
     return r;
   }
 
+  if (r == 0) {
+    CLS_ERR("ERROR: %s(): Zero length object, returning ENODATA", __func__);
+    return -ENODATA;
+  }
+
   try {
     auto iter = bl.cbegin();
     decode(*info, iter);
-  } catch (buffer::error& err) {
+  } catch (const ceph::buffer::error& err) {
     CLS_ERR("ERROR: %s(): failed decoding header", __func__);
     return -EIO;
   }
 
-  if (objv &&
-      !(info->objv == *objv)) {
-    string s1 = info->objv.to_str();
-    string s2 = objv->to_str();
-    CLS_LOG(10, "%s(): version mismatch (header=%s, req=%s), cancelled operation", __func__, s1.c_str(), s2.c_str());
+  if (objv && !(info->version== *objv)) {
+    auto s1 = info->version.to_str();
+    auto s2 = objv->to_str();
+    CLS_LOG(10, "%s(): version mismatch (header=%s, req=%s), canceled operation",
+           __func__, s1.c_str(), s2.c_str());
     return -ECANCELED;
   }
 
   return 0;
 }
 
-static int fifo_meta_create_op(cls_method_context_t hctx,
-                          bufferlist *in, bufferlist *out)
+int create_meta(cls_method_context_t hctx,
+               ceph::buffer::list* in, ceph::buffer::list* out)
 {
-  CLS_LOG(20, "%s", __func__);
+  CLS_LOG(10, "%s", __func__);
 
-  cls_fifo_meta_create_op op;
+  op::create_meta op;
   try {
     auto iter = in->cbegin();
     decode(op, iter);
-  } catch (const buffer::error &err) {
+  } catch (const ceph::buffer::error& err) {
     CLS_ERR("ERROR: %s(): failed to decode request", __func__);
     return -EINVAL;
   }
 
-  uint64_t size;
+  if (op.id.empty()) {
+    CLS_LOG(10, "%s(): ID cannot be empty", __func__);
+    return -EINVAL;
+  }
+
+  if (op.max_part_size == 0 ||
+      op.max_entry_size == 0 ||
+      op.max_entry_size > op.max_part_size) {
+    CLS_ERR("ERROR: %s(): invalid dimensions.", __func__);
+    return -EINVAL;
+  }
+
+  std::uint64_t size;
 
   int r = cls_cxx_stat2(hctx, &size, nullptr);
   if (r < 0 && r != -ENOENT) {
@@ -215,18 +242,18 @@ static int fifo_meta_create_op(cls_method_context_t hctx,
   }
 
   if (r == 0) {
-    bufferlist bl;
+    ceph::buffer::list bl;
     r = cls_cxx_read2(hctx, 0, size, &bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
     if (r < 0) {
       CLS_ERR("ERROR: %s(): cls_cxx_read2() on obj returned %d", __func__, r);
       return r;
     }
 
-    fifo_info_t header;
+    info header;
     try {
       auto iter = bl.cbegin();
       decode(header, iter);
-    } catch (buffer::error& err) {
+    } catch (const ceph::buffer::error& err) {
       CLS_ERR("ERROR: %s(): failed decoding header", __func__);
       return -EIO;
     }
@@ -234,32 +261,33 @@ static int fifo_meta_create_op(cls_method_context_t hctx,
     if (!(header.id == op.id &&
           (!op.oid_prefix ||
            header.oid_prefix == *op.oid_prefix) &&
-          (!op.objv ||
-           header.objv == *op.objv))) {
-      CLS_LOG(10, "%s(): failed to re-create existing queue with different params", __func__);
+          (!op.version ||
+           header.version == *op.version))) {
+      CLS_LOG(10, "%s(): failed to re-create existing queue "
+             "with different params", __func__);
       return -EEXIST;
     }
 
     return 0; /* already exists */
   }
-  fifo_info_t header;
-  
+  info header;
+
   header.id = op.id;
-  if (op.objv) {
-    header.objv = *op.objv;
+  if (op.version) {
+    header.version = *op.version;
   } else {
-#define DEFAULT_INSTANCE_SIZE 16
+    static constexpr auto DEFAULT_INSTANCE_SIZE = 16;
     char buf[DEFAULT_INSTANCE_SIZE + 1];
     cls_gen_rand_base64(buf, sizeof(buf));
     buf[DEFAULT_INSTANCE_SIZE] = '\0';
-    header.objv.instance = buf;
-    header.objv.ver = 1;
+    header.version.instance = buf;
+    header.version.ver = 1;
   }
   header.oid_prefix = new_oid_prefix(op.id, op.oid_prefix);
 
-  header.data_params.max_part_size = op.max_part_size;
-  header.data_params.max_entry_size = op.max_entry_size;
-  header.data_params.full_size_threshold = op.max_part_size - op.max_entry_size - part_entry_overhead;
+  header.params.max_part_size = op.max_part_size;
+  header.params.max_entry_size = op.max_entry_size;
+  header.params.full_size_threshold = op.max_part_size - op.max_entry_size - part_entry_overhead;
 
   r = write_header(hctx, header, false);
   if (r < 0) {
@@ -270,41 +298,46 @@ static int fifo_meta_create_op(cls_method_context_t hctx,
   return 0;
 }
 
-static int fifo_meta_update_op(cls_method_context_t hctx,
-                                bufferlist *in, bufferlist *out)
+int update_meta(cls_method_context_t hctx, ceph::buffer::list* in,
+               ceph::buffer::list* out)
 {
-  CLS_LOG(20, "%s", __func__);
+  CLS_LOG(10, "%s", __func__);
 
-  cls_fifo_meta_update_op op;
+  op::update_meta op;
   try {
     auto iter = in->cbegin();
     decode(op, iter);
-  } catch (const buffer::error &err) {
+  } catch (const ceph::buffer::error& err) {
     CLS_ERR("ERROR: %s(): failed to decode request", __func__);
     return -EINVAL;
   }
 
-  fifo_info_t header;
-
-  int r = read_header(hctx, op.objv, &header);
-  if (r < 0) {
-    return r;
+  if (op.version.empty()) {
+    CLS_LOG(10, "%s(): no version supplied", __func__);
+    return -EINVAL;
   }
 
-  string err;
+  info header;
 
-  r = header.apply_update(op.tail_part_num,
-                          op.head_part_num,
-                          op.min_push_part_num,
-                          op.max_push_part_num,
-                          op.journal_entries_add,
-                          op.journal_entries_rm,
-                          &err);
+  int r = read_header(hctx, op.version, &header);
   if (r < 0) {
-    CLS_LOG(10, "%s(): %s", __func__, err.c_str());
     return r;
   }
 
+  auto err = header.apply_update(fifo::update()
+                                .tail_part_num(op.tail_part_num)
+                                .head_part_num(op.head_part_num)
+                                .min_push_part_num(op.min_push_part_num)
+                                .max_push_part_num(op.max_push_part_num)
+                                .journal_entries_add(
+                                  std::move(op.journal_entries_add))
+                                .journal_entries_rm(
+                                  std::move(op.journal_entries_rm)));
+  if (err) {
+    CLS_LOG(10, "%s(): %s", __func__, err->c_str());
+    return -EINVAL;
+  }
+
   r = write_header(hctx, header);
   if (r < 0) {
     CLS_LOG(10, "%s(): failed to write header: r=%d", __func__, r);
@@ -314,22 +347,22 @@ static int fifo_meta_update_op(cls_method_context_t hctx,
   return 0;
 }
 
-static int fifo_meta_get_op(cls_method_context_t hctx,
-                          bufferlist *in, bufferlist *out)
+int get_meta(cls_method_context_t hctx, ceph::buffer::list* in,
+            ceph::buffer::list* out)
 {
-  CLS_LOG(20, "%s", __func__);
+  CLS_LOG(10, "%s", __func__);
 
-  cls_fifo_meta_get_op op;
+  op::get_meta op;
   try {
     auto iter = in->cbegin();
     decode(op, iter);
-  } catch (const buffer::error &err) {
+  } catch (const ceph::buffer::error &err) {
     CLS_ERR("ERROR: %s(): failed to decode request", __func__);
     return -EINVAL;
   }
 
-  cls_fifo_meta_get_op_reply reply;
-  int r = read_header(hctx, op.objv, &reply.info);
+  op::get_meta_reply reply;
+  int r = read_header(hctx, op.version, &reply.info);
   if (r < 0) {
     return r;
   }
@@ -342,21 +375,26 @@ static int fifo_meta_get_op(cls_method_context_t hctx,
   return 0;
 }
 
-static int fifo_part_init_op(cls_method_context_t hctx,
-                             bufferlist *in, bufferlist *out)
+int init_part(cls_method_context_t hctx, ceph::buffer::list* in,
+             ceph::buffer::list *out)
 {
-  CLS_LOG(20, "%s", __func__);
+  CLS_LOG(10, "%s", __func__);
 
-  cls_fifo_part_init_op op;
+  op::init_part op;
   try {
     auto iter = in->cbegin();
     decode(op, iter);
-  } catch (const buffer::error &err) {
+  } catch (const ceph::buffer::error &err) {
     CLS_ERR("ERROR: %s(): failed to decode request", __func__);
     return -EINVAL;
   }
 
-  uint64_t size;
+  std::uint64_t size;
+
+  if (op.tag.empty()) {
+    CLS_LOG(10, "%s(): tag required", __func__);
+    return -EINVAL;
+  }
 
   int r = cls_cxx_stat2(hctx, &size, nullptr);
   if (r < 0 && r != -ENOENT) {
@@ -364,7 +402,7 @@ static int fifo_part_init_op(cls_method_context_t hctx,
     return r;
   }
   if (r == 0 && size > 0) {
-    fifo_part_header_t part_header;
+    part_header part_header;
     r = read_part_header(hctx, &part_header);
     if (r < 0) {
       CLS_LOG(10, "%s(): failed to read part header", __func__);
@@ -372,23 +410,27 @@ static int fifo_part_init_op(cls_method_context_t hctx,
     }
 
     if (!(part_header.tag == op.tag &&
-          part_header.params == op.data_params)) {
-      CLS_LOG(10, "%s(): failed to re-create existing part with different params", __func__);
+          part_header.params == op.params)) {
+      CLS_LOG(10, "%s(): failed to re-create existing part with different "
+             "params", __func__);
       return -EEXIST;
     }
 
     return 0; /* already exists */
   }
 
-  fifo_part_header_t part_header;
-  
+  part_header part_header;
+
   part_header.tag = op.tag;
-  part_header.params = op.data_params;
+  part_header.params = op.params;
 
   part_header.min_ofs = CLS_FIFO_MAX_PART_HEADER_SIZE;
-  part_header.max_ofs = part_header.min_ofs;
+  part_header.last_ofs = 0;
+  part_header.next_ofs = part_header.min_ofs;
+  part_header.max_time = ceph::real_clock::now();
 
-  cls_gen_random_bytes((char *)&part_header.magic, sizeof(part_header.magic));
+  cls_gen_random_bytes(reinterpret_cast<char *>(&part_header.magic),
+                      sizeof(part_header.magic));
 
   r = write_part_header(hctx, part_header);
   if (r < 0) {
@@ -399,26 +441,31 @@ static int fifo_part_init_op(cls_method_context_t hctx,
   return 0;
 }
 
-static bool full_part(const fifo_part_header_t& part_header)
+bool full_part(const part_header& part_header)
 {
-  return (part_header.max_ofs > part_header.params.full_size_threshold);
+  return (part_header.next_ofs > part_header.params.full_size_threshold);
 }
 
-static int fifo_part_push_op(cls_method_context_t hctx,
-                             bufferlist *in, bufferlist *out)
+int push_part(cls_method_context_t hctx, ceph::buffer::list* in,
+             ceph::buffer::list* out)
 {
-  CLS_LOG(20, "%s", __func__);
+  CLS_LOG(10, "%s", __func__);
 
-  cls_fifo_part_push_op op;
+  op::push_part op;
   try {
     auto iter = in->cbegin();
     decode(op, iter);
-  } catch (const buffer::error &err) {
+  } catch (const ceph::buffer::error& err) {
     CLS_ERR("ERROR: %s(): failed to decode request", __func__);
     return -EINVAL;
   }
 
-  fifo_part_header_t part_header;
+  if (op.tag.empty()) {
+    CLS_LOG(10, "%s(): tag required", __func__);
+    return -EINVAL;
+  }
+
+  part_header part_header;
   int r = read_part_header(hctx, &part_header);
   if (r < 0) {
     CLS_LOG(10, "%s(): failed to read part header", __func__);
@@ -430,9 +477,10 @@ static int fifo_part_push_op(cls_method_context_t hctx,
     return -EINVAL;
   }
 
-  uint64_t effective_len = op.total_len + op.data_bufs.size() * part_entry_overhead;
+  std::uint64_t effective_len = op.total_len + op.data_bufs.size() *
+    part_entry_overhead;
 
-  if (effective_len > part_header.params.max_entry_size + part_entry_overhead) {
+  if (effective_len > part_header.params.max_part_size) {
     return -EINVAL;
   }
 
@@ -440,57 +488,69 @@ static int fifo_part_push_op(cls_method_context_t hctx,
     return -ERANGE;
   }
 
-  struct cls_fifo_entry_header entry_header;
-  entry_header.mtime = real_clock::now();
-
-  bufferlist entry_header_bl;
+  auto now = ceph::real_clock::now();
+  struct entry_header entry_header = { now };
+  ceph::buffer::list entry_header_bl;
   encode(entry_header, entry_header_bl);
 
   auto max_index = part_header.max_index;
-  auto ofs = part_header.max_ofs;
+  const auto write_ofs = part_header.next_ofs;
+  auto ofs = part_header.next_ofs;
 
-  cls_fifo_entry_header_pre pre_header;
+  entry_header_pre pre_header;
   pre_header.magic = part_header.magic;
   pre_header.pre_size = sizeof(pre_header);
   pre_header.reserved = 0;
 
-  uint64_t total_data = 0;
-
+  std::uint64_t total_data = 0;
   for (auto& data : op.data_bufs) {
     total_data += data.length();
+  }
+  if (total_data != op.total_len) {
+    CLS_LOG(10, "%s(): length mismatch: op.total_len=%" PRId64
+           " total data received=%" PRId64,
+            __func__, op.total_len, total_data);
+    return -EINVAL;
+  }
+
+
+  int entries_pushed = 0;
+  ceph::buffer::list all_data;
+  for (auto& data : op.data_bufs) {
+    if (full_part(part_header))
+      break;
 
     pre_header.header_size = entry_header_bl.length();
     pre_header.data_size = data.length();
     pre_header.index = max_index;
 
-    bufferptr pre((char *)&pre_header, sizeof(pre_header));
-    bufferlist all_data;
+    bufferptr pre(reinterpret_cast<char*>(&pre_header), sizeof(pre_header));
+    auto entry_write_len = pre.length() + entry_header_bl.length() + data.length();
     all_data.append(pre);
     all_data.append(entry_header_bl);
     all_data.claim_append(data);
 
-    auto write_len = all_data.length();
-
-    r = cls_cxx_write2(hctx, ofs, write_len,
-                       &all_data, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
-    if (r < 0) {
-      CLS_LOG(10, "%s(): failed to write entry (ofs=%lld len=%lld): r=%d",
-              __func__, (long long)part_header.max_ofs, (long long)write_len, r);
-      return r;
-    }
-
-    ofs += write_len;
+    part_header.last_ofs = ofs;
+    ofs += entry_write_len;
     ++max_index;
+    ++entries_pushed;
+    part_header.max_index = max_index;
+    part_header.next_ofs = ofs;
   }
+  part_header.max_time = now;
 
-  if (total_data != op.total_len) {
-    CLS_LOG(10, "%s(): length mismatch: op.total_len=%lld total data received=%lld",
-            __func__, (long long)op.total_len, (long long)total_data);
-    return -EINVAL;
+  auto write_len = all_data.length();
+
+  r = cls_cxx_write2(hctx, write_ofs, write_len,
+                    &all_data, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
+
+  if (r < 0) {
+    CLS_LOG(10,"%s(): failed to write entries (ofs=%" PRIu64
+           " len=%u): r=%d", __func__, write_ofs,
+           write_len, r);
+    return r;
   }
 
-  part_header.max_index = max_index;
-  part_header.max_ofs = ofs;
 
   r = write_part_header(hctx, part_header);
   if (r < 0) {
@@ -498,56 +558,60 @@ static int fifo_part_push_op(cls_method_context_t hctx,
     return r;
   }
 
-  return 0;
+  if (entries_pushed == 0) {
+    CLS_LOG(0, "%s(): pushed no entries? Can't happen!", __func__);
+    return -EFAULT;
+  }
+
+  return entries_pushed;
 }
 
 class EntryReader {
-  static constexpr uint64_t prefetch_len = (128 * 1024);
+  static constexpr std::uint64_t prefetch_len = (128 * 1024);
 
   cls_method_context_t hctx;
 
-  fifo_part_header_t& part_header;
+  const fifo::part_header& part_header;
 
-  uint64_t ofs;
-  bufferlist data;
+  std::uint64_t ofs;
+  ceph::buffer::list data;
 
-  int fetch(uint64_t num_bytes);
-  int read(uint64_t num_bytes, bufferlist *pbl);
-  int peek(uint64_t num_bytes, char *dest);
-  int seek(uint64_t num_bytes);
+  int fetch(std::uint64_t num_bytes);
+  int read(std::uint64_t num_bytes, ceph::buffer::list* pbl);
+  int peek(std::uint64_t num_bytes, char *dest);
+  int seek(std::uint64_t num_bytes);
 
 public:
-  EntryReader(cls_method_context_t _hctx,
-              fifo_part_header_t& _part_header,
-              uint64_t _ofs) : hctx(_hctx),
-                               part_header(_part_header),
-                               ofs(_ofs) {
-    if (ofs < part_header.min_ofs) {
-      ofs = part_header.min_ofs;
-    }
-  }
-
-  uint64_t get_ofs() const {
+  EntryReader(cls_method_context_t hctx,
+              const fifo::part_header& part_header,
+              uint64_t ofs) : hctx(hctx),
+                             part_header(part_header),
+                             ofs(ofs < part_header.min_ofs ?
+                                 part_header.min_ofs :
+                                 ofs) {}
+
+  std::uint64_t get_ofs() const {
     return ofs;
   }
 
   bool end() const {
-    return (ofs >= part_header.max_ofs);
+    return (ofs >= part_header.next_ofs);
   }
 
-  int peek_pre_header(cls_fifo_entry_header_pre *pre_header);
-  int get_next_entry(bufferlist *pbl,
-                     uint64_t *pofs,
-                     ceph::real_time *pmtime);
+  int peek_pre_header(entry_header_pre* pre_header);
+  int get_next_entry(ceph::buffer::list* pbl,
+                     std::uint64_t* pofs,
+                     ceph::real_timepmtime);
 };
 
 
-int EntryReader::fetch(uint64_t num_bytes)
+int EntryReader::fetch(std::uint64_t num_bytes)
 {
-  CLS_LOG(20, "%s(): fetch %d bytes, ofs=%d data.length()=%d", __func__, (int)num_bytes, (int)ofs, (int)data.length());
+  CLS_LOG(10, "%s(): fetch %d bytes, ofs=%d data.length()=%d", __func__, (int)num_bytes, (int)ofs, (int)data.length());
   if (data.length() < num_bytes) {
-    bufferlist bl;
-    CLS_LOG(20, "%s(): reading %d bytes at ofs=%d", __func__, (int)prefetch_len, (int)ofs + data.length());
+    ceph::buffer::list bl;
+    CLS_LOG(10, "%s(): reading % " PRId64 " bytes at ofs=%" PRId64, __func__,
+           prefetch_len, ofs + data.length());
     int r = cls_cxx_read2(hctx, ofs + data.length(), prefetch_len, &bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
     if (r < 0) {
       CLS_ERR("ERROR: %s(): cls_cxx_read2() on obj returned %d", __func__, r);
@@ -556,15 +620,16 @@ int EntryReader::fetch(uint64_t num_bytes)
     data.claim_append(bl);
   }
 
-  if ((unsigned)num_bytes > data.length()) {
-    CLS_LOG(20, "%s(): requested %lld bytes, but only %lld were available", __func__, (long long)num_bytes, (long long)data.length());
+  if (static_cast<unsigned>(num_bytes) > data.length()) {
+    CLS_LOG(10, "%s(): requested %" PRId64 " bytes, but only "
+           "%u were available", __func__, num_bytes, data.length());
     return -ERANGE;
   }
 
   return 0;
 }
 
-int EntryReader::read(uint64_t num_bytes, bufferlist *pbl)
+int EntryReader::read(std::uint64_t num_bytes, ceph::buffer::list* pbl)
 {
   int r = fetch(num_bytes);
   if (r < 0) {
@@ -577,7 +642,7 @@ int EntryReader::read(uint64_t num_bytes, bufferlist *pbl)
   return 0;
 }
 
-int EntryReader::peek(uint64_t num_bytes, char *dest)
+int EntryReader::peek(std::uint64_t num_bytes, char* dest)
 {
   int r = fetch(num_bytes);
   if (r < 0) {
@@ -589,23 +654,25 @@ int EntryReader::peek(uint64_t num_bytes, char *dest)
   return 0;
 }
 
-int EntryReader::seek(uint64_t num_bytes)
+int EntryReader::seek(std::uint64_t num_bytes)
 {
-  bufferlist bl;
+  ceph::buffer::list bl;
 
-  CLS_LOG(20, "%s():%d: num_bytes=%d", __func__, __LINE__, (int)num_bytes);
+  CLS_LOG(10, "%s():%d: num_bytes=%" PRIu64, __func__, __LINE__, num_bytes);
   return read(num_bytes, &bl);
 }
 
-int EntryReader::peek_pre_header(cls_fifo_entry_header_pre *pre_header)
+int EntryReader::peek_pre_header(entry_header_pre* pre_header)
 {
   if (end()) {
     return -ENOENT;
   }
 
-  int r = peek(sizeof(*pre_header), (char *)pre_header);
+  int r = peek(sizeof(*pre_header),
+              reinterpret_cast<char*>(pre_header));
   if (r < 0) {
-    CLS_ERR("ERROR: %s(): peek() size=%d failed: r=%d", __func__, (int)sizeof(pre_header), r);
+    CLS_ERR("ERROR: %s(): peek() size=%zu failed: r=%d", __func__,
+           sizeof(pre_header), r);
     return r;
   }
 
@@ -618,11 +685,11 @@ int EntryReader::peek_pre_header(cls_fifo_entry_header_pre *pre_header)
 }
 
 
-int EntryReader::get_next_entry(bufferlist *pbl,
-                                uint64_t *pofs,
-                                ceph::real_time *pmtime)
+int EntryReader::get_next_entry(ceph::buffer::list* pbl,
+                                std::uint64_t* pofs,
+                                ceph::real_timepmtime)
 {
-  cls_fifo_entry_header_pre pre_header;
+  entry_header_pre pre_header;
   int r = peek_pre_header(&pre_header);
   if (r < 0) {
     CLS_ERR("ERROR: %s(): peek_pre_header() failed: r=%d", __func__, r);
@@ -633,26 +700,27 @@ int EntryReader::get_next_entry(bufferlist *pbl,
     *pofs = ofs;
   }
 
-  CLS_LOG(20, "%s():%d: pre_header.pre_size=%d", __func__, __LINE__, (int)pre_header.pre_size);
+  CLS_LOG(10, "%s():%d: pre_header.pre_size=%llu", __func__, __LINE__,
+         pre_header.pre_size);
   r = seek(pre_header.pre_size);
   if (r < 0) {
     CLS_ERR("ERROR: %s(): failed to seek: r=%d", __func__, r);
     return r;
   }
 
-  bufferlist header;
-  CLS_LOG(20, "%s():%d: pre_header.header_size=%d", __func__, __LINE__, (int)pre_header.header_size);
+  ceph::buffer::list header;
+  CLS_LOG(10, "%s():%d: pre_header.header_size=%d", __func__, __LINE__, (int)pre_header.header_size);
   r = read(pre_header.header_size, &header);
   if (r < 0) {
     CLS_ERR("ERROR: %s(): failed to read entry header: r=%d", __func__, r);
     return r;
   }
 
-  cls_fifo_entry_header entry_header;
+  entry_header entry_header;
   auto iter = header.cbegin();
   try {
     decode(entry_header, iter);
-  } catch (buffer::error& err) {
+  } catch (ceph::buffer::error& err) {
     CLS_ERR("%s(): failed decoding entry header", __func__);
     return -EIO;
   }
@@ -678,21 +746,21 @@ int EntryReader::get_next_entry(bufferlist *pbl,
   return 0;
 }
 
-static int fifo_part_trim_op(cls_method_context_t hctx,
-                             bufferlist *in, bufferlist *out)
+int trim_part(cls_method_context_t hctx,
+             ceph::buffer::list *in, ceph::buffer::list *out)
 {
-  CLS_LOG(20, "%s", __func__);
+  CLS_LOG(10, "%s", __func__);
 
-  cls_fifo_part_trim_op op;
+  op::trim_part op;
   try {
     auto iter = in->cbegin();
     decode(op, iter);
-  } catch (const buffer::error &err) {
+  } catch (const ceph::buffer::error &err) {
     CLS_ERR("ERROR: %s(): failed to decode request", __func__);
     return -EINVAL;
   }
 
-  fifo_part_header_t part_header;
+  part_header part_header;
   int r = read_part_header(hctx, &part_header);
   if (r < 0) {
     CLS_LOG(10, "%s(): failed to read part header", __func__);
@@ -709,7 +777,7 @@ static int fifo_part_trim_op(cls_method_context_t hctx,
     return 0;
   }
 
-  if (op.ofs >= part_header.max_ofs) {
+  if (op.ofs >= part_header.next_ofs) {
     if (full_part(part_header)) {
       /*
        * trim full part completely: remove object
@@ -718,18 +786,18 @@ static int fifo_part_trim_op(cls_method_context_t hctx,
       r = cls_cxx_remove(hctx);
       if (r < 0) {
         CLS_LOG(0, "%s(): ERROR: cls_cxx_remove() returned r=%d", __func__, r);
-        return r;
+       return r;
       }
 
       return 0;
     }
-    
-    part_header.min_ofs = part_header.max_ofs;
+
+    part_header.min_ofs = part_header.next_ofs;
     part_header.min_index = part_header.max_index;
   } else {
     EntryReader reader(hctx, part_header, op.ofs);
 
-    cls_fifo_entry_header_pre pre_header;
+    entry_header_pre pre_header;
     int r = reader.peek_pre_header(&pre_header);
     if (r < 0) {
       return r;
@@ -737,7 +805,8 @@ static int fifo_part_trim_op(cls_method_context_t hctx,
 
     r = reader.get_next_entry(nullptr, nullptr, nullptr);
     if (r < 0) {
-      CLS_ERR("ERROR: %s(): unexpected failure at get_next_entry(): r=%d", __func__, r);
+      CLS_ERR("ERROR: %s(): unexpected failure at get_next_entry(): r=%d",
+             __func__, r);
       return r;
     }
 
@@ -754,12 +823,12 @@ static int fifo_part_trim_op(cls_method_context_t hctx,
   return 0;
 }
 
-static int fifo_part_list_op(cls_method_context_t hctx,
-                             bufferlist *in, bufferlist *out)
+int list_part(cls_method_context_t hctx, ceph::buffer::list* in,
+             ceph::buffer::list* out)
 {
-  CLS_LOG(20, "%s", __func__);
+  CLS_LOG(10, "%s", __func__);
 
-  cls_fifo_part_list_op op;
+  op::list_part op;
   try {
     auto iter = in->cbegin();
     decode(op, iter);
@@ -768,7 +837,7 @@ static int fifo_part_list_op(cls_method_context_t hctx,
     return -EINVAL;
   }
 
-  fifo_part_header_t part_header;
+  part_header part_header;
   int r = read_part_header(hctx, &part_header);
   if (r < 0) {
     CLS_LOG(10, "%s(): failed to read part header", __func__);
@@ -792,22 +861,21 @@ static int fifo_part_list_op(cls_method_context_t hctx,
     }
   }
 
-  cls_fifo_part_list_op_reply reply;
+  op::list_part_reply reply;
 
   reply.tag = part_header.tag;
 
-#define LIST_MAX_ENTRIES 512
-
-  auto max_entries = std::min(op.max_entries, (int)LIST_MAX_ENTRIES);
+  auto max_entries = std::min(op.max_entries, op::MAX_LIST_ENTRIES);
 
   for (int i = 0; i < max_entries && !reader.end(); ++i) {
-    bufferlist data;
+    ceph::buffer::list data;
     ceph::real_time mtime;
-    uint64_t ofs;
+    std::uint64_t ofs;
 
     r = reader.get_next_entry(&data, &ofs, &mtime);
     if (r < 0) {
-      CLS_ERR("ERROR: %s(): unexpected failure at get_next_entry(): r=%d", __func__, r);
+      CLS_ERR("ERROR: %s(): unexpected failure at get_next_entry(): r=%d",
+             __func__, r);
       return r;
     }
 
@@ -822,21 +890,21 @@ static int fifo_part_list_op(cls_method_context_t hctx,
   return 0;
 }
 
-static int fifo_part_get_info_op(cls_method_context_t hctx,
-                                 bufferlist *in, bufferlist *out)
+int get_part_info(cls_method_context_t hctx, ceph::buffer::list *in,
+                 ceph::buffer::list *out)
 {
-  CLS_LOG(20, "%s", __func__);
+  CLS_LOG(10, "%s", __func__);
 
-  cls_fifo_part_get_info_op op;
+  op::get_part_info op;
   try {
     auto iter = in->cbegin();
     decode(op, iter);
-  } catch (const buffer::error &err) {
+  } catch (const ceph::buffer::error &err) {
     CLS_ERR("ERROR: %s(): failed to decode request", __func__);
     return -EINVAL;
   }
 
-  cls_fifo_part_get_info_op_reply reply;
+  op::get_part_info_reply reply;
 
   int r = read_part_header(hctx, &reply.header);
   if (r < 0) {
@@ -848,60 +916,63 @@ static int fifo_part_get_info_op(cls_method_context_t hctx,
 
   return 0;
 }
+}
+} // namespace rados::cls::fifo
 
 CLS_INIT(fifo)
 {
-  CLS_LOG(20, "Loaded fifo class!");
+  using namespace rados::cls::fifo;
+  CLS_LOG(10, "Loaded fifo class!");
 
   cls_handle_t h_class;
-  cls_method_handle_t h_fifo_meta_create_op;
-  cls_method_handle_t h_fifo_meta_get_op;
-  cls_method_handle_t h_fifo_meta_update_op;
-  cls_method_handle_t h_fifo_part_init_op;
-  cls_method_handle_t h_fifo_part_push_op;
-  cls_method_handle_t h_fifo_part_trim_op;
-  cls_method_handle_t h_fifo_part_list_op;
-  cls_method_handle_t h_fifo_part_get_info_op;
-
-  cls_register("fifo", &h_class);
-  cls_register_cxx_method(h_class, "fifo_meta_create",
+  cls_method_handle_t h_create_meta;
+  cls_method_handle_t h_get_meta;
+  cls_method_handle_t h_update_meta;
+  cls_method_handle_t h_init_part;
+  cls_method_handle_t h_push_part;
+  cls_method_handle_t h_trim_part;
+  cls_method_handle_t h_list_part;
+  cls_method_handle_t h_get_part_info;
+
+  cls_register(op::CLASS, &h_class);
+  cls_register_cxx_method(h_class, op::CREATE_META,
                           CLS_METHOD_RD | CLS_METHOD_WR,
-                          fifo_meta_create_op, &h_fifo_meta_create_op);
+                          create_meta, &h_create_meta);
 
-  cls_register_cxx_method(h_class, "fifo_meta_get",
+  cls_register_cxx_method(h_class, op::GET_META,
                           CLS_METHOD_RD,
-                          fifo_meta_get_op, &h_fifo_meta_get_op);
+                          get_meta, &h_get_meta);
 
-  cls_register_cxx_method(h_class, "fifo_meta_update",
+  cls_register_cxx_method(h_class, op::UPDATE_META,
                           CLS_METHOD_RD | CLS_METHOD_WR,
-                          fifo_meta_update_op, &h_fifo_meta_update_op);
+                          update_meta, &h_update_meta);
 
-  cls_register_cxx_method(h_class, "fifo_part_init",
+  cls_register_cxx_method(h_class, op::INIT_PART,
                           CLS_METHOD_RD | CLS_METHOD_WR,
-                          fifo_part_init_op, &h_fifo_part_init_op);
+                          init_part, &h_init_part);
 
-  cls_register_cxx_method(h_class, "fifo_part_push",
+  cls_register_cxx_method(h_class, op::PUSH_PART,
                           CLS_METHOD_RD | CLS_METHOD_WR,
-                          fifo_part_push_op, &h_fifo_part_push_op);
+                          push_part, &h_push_part);
 
-  cls_register_cxx_method(h_class, "fifo_part_trim",
+  cls_register_cxx_method(h_class, op::TRIM_PART,
                           CLS_METHOD_RD | CLS_METHOD_WR,
-                          fifo_part_trim_op, &h_fifo_part_trim_op);
+                          trim_part, &h_trim_part);
 
-  cls_register_cxx_method(h_class, "fifo_part_list",
+  cls_register_cxx_method(h_class, op::LIST_PART,
                           CLS_METHOD_RD,
-                          fifo_part_list_op, &h_fifo_part_list_op);
+                          list_part, &h_list_part);
 
-  cls_register_cxx_method(h_class, "fifo_part_get_info",
+  cls_register_cxx_method(h_class, op::GET_PART_INFO,
                           CLS_METHOD_RD,
-                          fifo_part_get_info_op, &h_fifo_part_get_info_op);
+                          get_part_info, &h_get_part_info);
 
   /* calculate entry overhead */
-  struct cls_fifo_entry_header entry_header;
-  bufferlist entry_header_bl;
+  struct entry_header entry_header;
+  ceph::buffer::list entry_header_bl;
   encode(entry_header, entry_header_bl);
 
-  part_entry_overhead = sizeof(cls_fifo_entry_header_pre) + entry_header_bl.length();
+  part_entry_overhead = sizeof(entry_header_pre) + entry_header_bl.length();
 
   return;
 }
diff --git a/src/cls/fifo/cls_fifo_client.cc b/src/cls/fifo/cls_fifo_client.cc
deleted file mode 100644 (file)
index f4888fd..0000000
+++ /dev/null
@@ -1,1070 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2019 Red Hat, Inc.
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation.  See file COPYING.
- *
- */
-
-#include "include/rados/librados.hpp"
-#include "common/dout.h"
-
-#include "auth/Crypto.h"
-
-using namespace librados;
-
-#include "cls/fifo/cls_fifo_ops.h"
-#include "cls/fifo/cls_fifo_client.h"
-
-
-#define dout_subsys ceph_subsys_objclass
-
-
-namespace rados {
-  namespace cls {
-    namespace fifo {
-      int ClsFIFO::meta_create(librados::ObjectWriteOperation *rados_op,
-                               const string& id,
-                               const MetaCreateParams& params) {
-        cls_fifo_meta_create_op op;
-
-        auto& state = params.state;
-
-        if (id.empty()) {
-          return -EINVAL;
-        }
-
-        op.id = id;
-        op.objv = state.objv;
-        op.oid_prefix = state.oid_prefix;
-        op.max_part_size = state.max_part_size;
-        op.max_entry_size = state.max_entry_size;
-        op.exclusive = state.exclusive;
-
-        if (op.max_part_size == 0 ||
-            op.max_entry_size == 0 ||
-            op.max_entry_size > op.max_part_size) {
-          return -EINVAL;
-        }
-
-        bufferlist in;
-        encode(op, in);
-        rados_op->exec("fifo", "fifo_meta_create", in);
-
-        return 0;
-      }
-
-      int ClsFIFO::meta_get(librados::IoCtx& ioctx,
-                            const string& oid,
-                            const MetaGetParams& params,
-                            fifo_info_t *result,
-                            uint32_t *part_header_size,
-                            uint32_t *part_entry_overhead) {
-        cls_fifo_meta_get_op op;
-
-        auto& state = params.state;
-
-        op.objv = state.objv;
-
-        librados::ObjectReadOperation rop;
-
-        bufferlist in;
-        bufferlist out;
-        int op_ret;
-        encode(op, in);
-        rop.exec("fifo", "fifo_meta_get", in, &out, &op_ret);
-
-        int r = ioctx.operate(oid, &rop, nullptr);
-        if (r < 0) {
-          return r;
-        }
-
-        if (op_ret < 0) {
-          return op_ret;
-        }
-
-        cls_fifo_meta_get_op_reply reply;
-        auto iter = out.cbegin();
-        try {
-          decode(reply, iter);
-        } catch (buffer::error& err) {
-          return -EIO;
-        }
-
-        *result = reply.info;
-
-        if (part_header_size) {
-          *part_header_size = reply.part_header_size;
-        }
-
-        if (part_entry_overhead) {
-          *part_entry_overhead = reply.part_entry_overhead;
-        }
-
-        return 0;
-      }
-
-      int ClsFIFO::meta_update(librados::ObjectWriteOperation *rados_op,
-                               const MetaUpdateParams& params) {
-        cls_fifo_meta_update_op op;
-
-        auto& state = params.state;
-
-        if (state.objv.empty()) {
-          return -EINVAL;
-        }
-
-        op.objv = state.objv;
-        op.tail_part_num = state.tail_part_num;
-        op.head_part_num = state.head_part_num;
-        op.min_push_part_num = state.min_push_part_num;
-        op.max_push_part_num = state.max_push_part_num;
-        op.journal_entries_add = state.journal_entries_add;
-        op.journal_entries_rm = state.journal_entries_rm;
-
-        bufferlist in;
-        encode(op, in);
-        rados_op->exec("fifo", "fifo_meta_update", in);
-
-        return 0;
-      }
-
-      int ClsFIFO::part_init(librados::ObjectWriteOperation *rados_op,
-                             const PartInitParams& params) {
-        cls_fifo_part_init_op op;
-
-        auto& state = params.state;
-
-        if (state.tag.empty()) {
-          return -EINVAL;
-        }
-
-        op.tag = state.tag;
-        op.data_params = state.data_params;
-
-        bufferlist in;
-        encode(op, in);
-        rados_op->exec("fifo", "fifo_part_init", in);
-
-        return 0;
-      }
-
-      int ClsFIFO::push_part(librados::ObjectWriteOperation *rados_op,
-                             const PushPartParams& params) {
-        cls_fifo_part_push_op op;
-
-        auto& state = params.state;
-
-        if (state.tag.empty()) {
-          return -EINVAL;
-        }
-
-        op.tag = state.tag;
-        op.data_bufs = state.data_bufs;
-        op.total_len = state.total_len;
-
-        bufferlist in;
-        encode(op, in);
-        rados_op->exec("fifo", "fifo_part_push", in);
-
-        return 0;
-      }
-
-      int ClsFIFO::trim_part(librados::ObjectWriteOperation *rados_op,
-                             const TrimPartParams& params) {
-        cls_fifo_part_trim_op op;
-
-        auto& state = params.state;
-
-        op.tag = state.tag;
-        op.ofs = state.ofs;
-
-        bufferlist in;
-        encode(op, in);
-        rados_op->exec("fifo", "fifo_part_trim", in);
-
-        return 0;
-      }
-
-      int ClsFIFO::list_part(librados::IoCtx& ioctx,
-                             const string& oid,
-                             const ListPartParams& params,
-                             std::vector<cls_fifo_part_list_entry_t> *pentries,
-                             bool *more,
-                             bool *full_part,
-                             string *ptag)
-      {
-        cls_fifo_part_list_op op;
-
-        auto& state = params.state;
-
-        op.tag = state.tag;
-        op.ofs = state.ofs;
-        op.max_entries = state.max_entries;
-
-        librados::ObjectReadOperation rop;
-
-        bufferlist in;
-        bufferlist out;
-        int op_ret;
-        encode(op, in);
-        rop.exec("fifo", "fifo_part_list", in, &out, &op_ret);
-
-        int r = ioctx.operate(oid, &rop, nullptr);
-        if (r < 0) {
-          return r;
-        }
-
-        if (op_ret < 0) {
-          return op_ret;
-        }
-
-        cls_fifo_part_list_op_reply reply;
-        auto iter = out.cbegin();
-        try {
-          decode(reply, iter);
-        } catch (buffer::error& err) {
-          return -EIO;
-        }
-
-        if (pentries) {
-          *pentries = std::move(reply.entries);
-        }
-
-        if (more) {
-          *more = reply.more;
-        }
-
-        if (full_part) {
-          *full_part = reply.full_part;
-        }
-
-        if (ptag) {
-          *ptag = reply.tag;
-        }
-
-        return 0;
-      }
-
-      int ClsFIFO::get_part_info(librados::IoCtx& ioctx,
-                                 const string& oid,
-                                 rados::cls::fifo::fifo_part_header_t *header)
-      {
-        cls_fifo_part_get_info_op op;
-
-        librados::ObjectReadOperation rop;
-
-        bufferlist in;
-        bufferlist out;
-        int op_ret;
-        encode(op, in);
-        rop.exec("fifo", "fifo_part_get_info", in, &out, &op_ret);
-
-        int r = ioctx.operate(oid, &rop, nullptr);
-        if (r < 0) {
-          return r;
-        }
-
-        if (op_ret < 0) {
-          return op_ret;
-        }
-
-        cls_fifo_part_get_info_op_reply reply;
-        auto iter = out.cbegin();
-        try {
-          decode(reply, iter);
-        } catch (buffer::error& err) {
-          return -EIO;
-        }
-
-        if (header) {
-          *header = std::move(reply.header);
-        }
-
-        return 0;
-      }
-
-      string FIFO::craft_marker(int64_t part_num,
-                                   uint64_t part_ofs)
-      {
-        char buf[64];
-        snprintf(buf, sizeof(buf), "%lld:%lld", (long long)part_num, (long long)part_ofs);
-        return string(buf);
-      }
-
-      bool FIFO::parse_marker(const string& marker,
-                              int64_t *part_num,
-                              uint64_t *part_ofs)
-      {
-        if (marker.empty()) {
-          *part_num = meta_info.tail_part_num;
-          *part_ofs = 0;
-          return true;
-        }
-
-        auto pos = marker.find(':');
-        if (pos == string::npos) {
-          return false;
-        }
-
-        auto first = marker.substr(0, pos);
-        auto second = marker.substr(pos + 1);
-
-        string err;
-
-        *part_num = (int64_t)strict_strtoll(first.c_str(), 10, &err);
-        if (!err.empty()) {
-          return false;
-        }
-
-        *part_ofs = (uint64_t)strict_strtoll(second.c_str(), 10, &err);
-        if (!err.empty()) {
-          return false;
-        }
-
-        return true;
-      }
-
-      int FIFO::init_ioctx(librados::Rados *rados,
-                           const string& pool,
-                           std::optional<string> pool_ns)
-      {
-        _ioctx.emplace();
-        int r = rados->ioctx_create(pool.c_str(), *_ioctx);
-        if (r < 0) {
-          return r;
-        }
-
-        if (pool_ns && !pool_ns->empty()) {
-          _ioctx->set_namespace(*pool_ns);
-        }
-
-        ioctx = &(*_ioctx);
-
-        return 0;
-      }
-
-      int ClsFIFO::MetaUpdateParams::apply_update(CephContext *cct,
-                                                  fifo_info_t *info)
-      {
-        string err;
-
-        int r = info->apply_update(state.tail_part_num,
-                                   state.head_part_num,
-                                   state.min_push_part_num,
-                                   state.max_push_part_num,
-                                   state.journal_entries_add,
-                                   state.journal_entries_rm,
-                                   &err);
-        if (r < 0) {
-          ldout(cct, 0) << __func__ << "(): ERROR: " << err << dendl;
-          return r;
-        }
-
-        ++info->objv.ver;
-
-        return 0;
-      }
-
-      int FIFO::update_meta(ClsFIFO::MetaUpdateParams& update_params,
-                            bool *canceled)
-      {
-        update_params.objv(meta_info.objv);
-
-        librados::ObjectWriteOperation wop;
-        int r = ClsFIFO::meta_update(&wop, update_params);
-        if (r < 0) {
-          return r;
-        }
-
-        r = ioctx->operate(meta_oid, &wop);
-        if (r < 0 && r != -ECANCELED) {
-          return r;
-        }
-
-        *canceled = (r == -ECANCELED);
-
-        if (!*canceled) {
-          r = update_params.apply_update(cct, &meta_info);
-          if (r < 0) { /* should really not happen,
-                          but if it does, let's treat it as if race was detected */
-            *canceled = true;
-          }
-        }
-
-        if (*canceled) {
-          r = do_read_meta();
-        }
-        if (r < 0) {
-          return r;
-        }
-
-        return 0;
-      }
-
-      int FIFO::do_read_meta(std::optional<fifo_objv_t> objv)
-      {
-        ClsFIFO::MetaGetParams get_params;
-        if (objv) {
-          get_params.objv(*objv);
-        }
-        int r = ClsFIFO::meta_get(*ioctx,
-                                  meta_oid,
-                                  get_params,
-                                  &meta_info,
-                                  &part_header_size,
-                                  &part_entry_overhead);
-        if (r < 0) {
-          return r;
-        }
-
-        return 0;
-      }
-
-      int FIFO::create_part(int64_t part_num, const string& tag,
-                            int64_t& max_part_num) {
-        librados::ObjectWriteOperation op;
-
-        op.create(true); /* exclusive */
-        int r = ClsFIFO::part_init(&op,
-                                   ClsFIFO::PartInitParams()
-                                   .tag(tag)
-                                   .data_params(meta_info.data_params));
-        if (r < 0) {
-          return r;
-        }
-
-        r = ioctx->operate(meta_info.part_oid(part_num), &op);
-        if (r < 0) {
-          return r;
-        }
-
-        if (part_num > max_part_num) {
-          max_part_num = part_num;
-        }
-
-        return 0;
-      }
-
-      int FIFO::remove_part(int64_t part_num, const string& tag,
-                            int64_t& tail_part_num) {
-        librados::ObjectWriteOperation op;
-        op.remove();
-        int r = ioctx->operate(meta_info.part_oid(part_num), &op);
-        if (r == -ENOENT) {
-          r = 0;
-        }
-        if (r < 0) {
-          return r;
-        }
-
-        if (part_num >= tail_part_num) {
-          tail_part_num = part_num + 1;
-        }
-
-        return 0;
-      }
-
-      int FIFO::process_journal_entry(const fifo_journal_entry_t& entry,
-                                      int64_t& tail_part_num,
-                                      int64_t& head_part_num,
-                                      int64_t& max_part_num)
-      {
-
-        switch (entry.op) {
-          case fifo_journal_entry_t::Op::OP_CREATE:
-            return create_part(entry.part_num, entry.part_tag, max_part_num);
-          case fifo_journal_entry_t::Op::OP_SET_HEAD:
-            if (entry.part_num > head_part_num) {
-              head_part_num = entry.part_num;
-            }
-            return 0;
-          case fifo_journal_entry_t::Op::OP_REMOVE:
-            return remove_part(entry.part_num, entry.part_tag, tail_part_num);
-        default:
-          /* nothing to do */
-          break;
-        }
-
-        return -EIO;
-      }
-
-      int FIFO::process_journal_entries(vector<fifo_journal_entry_t> *processed,
-                                        int64_t& tail_part_num,
-                                        int64_t& head_part_num,
-                                        int64_t& max_part_num)
-      {
-        for (auto& iter : meta_info.journal) {
-          auto& entry = iter.second;
-          int r = process_journal_entry(entry, tail_part_num, head_part_num, max_part_num);
-          if (r < 0) {
-            ldout(cct, 10) << __func__ << "(): ERROR: failed processing journal entry for part=" << entry.part_num << dendl;
-          } else {
-            processed->push_back(entry);
-          }
-        }
-
-        return 0;
-      }
-
-      int FIFO::process_journal()
-      {
-        vector<fifo_journal_entry_t> processed;
-
-        int64_t new_tail = meta_info.tail_part_num;
-        int64_t new_head = meta_info.head_part_num;
-        int64_t new_max = meta_info.max_push_part_num;
-
-        int r = process_journal_entries(&processed, new_tail, new_head, new_max);
-        if (r < 0) {
-          return r;
-        }
-
-        if (processed.empty()) {
-          return 0;
-        }
-
-#define RACE_RETRY 10
-
-        int i;
-
-        for (i = 0; i < RACE_RETRY; ++i) {
-          bool canceled;
-
-          std::optional<int64_t> tail_part_num;
-          std::optional<int64_t> head_part_num;
-          std::optional<int64_t> max_part_num;
-
-          if (new_tail > meta_info.tail_part_num) {
-            tail_part_num = new_tail;
-          }
-
-          if (new_head > meta_info.head_part_num) {
-            head_part_num = new_head;
-          }
-
-          if (new_max > meta_info.max_push_part_num) {
-            max_part_num = new_max;
-          }
-
-          if (processed.empty() &&
-              !tail_part_num &&
-              !max_part_num) {
-            /* nothing to update anymore */
-            break;
-          }
-
-          r = update_meta(ClsFIFO::MetaUpdateParams()
-                          .journal_entries_rm(processed)
-                          .tail_part_num(tail_part_num)
-                          .head_part_num(head_part_num)
-                          .max_push_part_num(max_part_num),
-                          &canceled);
-          if (r < 0) {
-            return r;
-          }
-
-          if (canceled) {
-            vector<fifo_journal_entry_t> new_processed;
-
-            for (auto& e : processed) {
-              auto jiter = meta_info.journal.find(e.part_num);
-              if (jiter == meta_info.journal.end() || /* journal entry was already processed */
-                  !(jiter->second == e)) {
-                continue;
-              }
-              
-              new_processed.push_back(e);
-            }
-            processed = std::move(new_processed);
-            continue;
-          }
-          break;
-        }
-        if (i == RACE_RETRY) {
-          ldout(cct, 0) << "ERROR: " << __func__ << "(): race check failed too many times, likely a bug" << dendl;
-          return -ECANCELED;
-        }
-        return 0;
-      }
-
-      static const char alphanum_plain_table[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
-
-      void gen_rand_alphanumeric_plain(CephContext *cct, char *dest, int size) /* size should be the required string size + 1 */
-      {
-        cct->random()->get_bytes(dest, size);
-
-        int i;
-        for (i = 0; i < size - 1; i++) {
-          int pos = (unsigned)dest[i];
-          dest[i] = alphanum_plain_table[pos % (sizeof(alphanum_plain_table) - 1)];
-        }
-        dest[i] = '\0';
-      }
-
-      static string generate_tag(CephContext *cct)
-      {
-#define HEADER_TAG_SIZE 16
-        char buf[HEADER_TAG_SIZE + 1];
-        buf[HEADER_TAG_SIZE] = 0;
-        gen_rand_alphanumeric_plain(cct, buf, sizeof(buf));
-        return string(buf);
-      }
-
-      int FIFO::prepare_new_part(bool is_head)
-      {
-        fifo_journal_entry_t jentry;
-
-        meta_info.prepare_next_journal_entry(&jentry, generate_tag(cct));
-
-        int64_t new_head_part_num = meta_info.head_part_num;
-
-        std::optional<fifo_journal_entry_t> new_head_jentry;
-        if (is_head) {
-          new_head_jentry = jentry;
-          new_head_jentry->op = fifo_journal_entry_t::OP_SET_HEAD;
-          new_head_part_num = jentry.part_num;
-        }
-
-        int r;
-        bool canceled;
-
-        int i;
-
-        for (i = 0; i < RACE_RETRY; ++i) {
-          r = update_meta(ClsFIFO::MetaUpdateParams()
-                          .journal_entry_add(jentry)
-                          .journal_entry_add(new_head_jentry),
-                          &canceled);
-          if (r < 0) {
-            return r;
-          }
-
-          if (canceled) {
-            if (meta_info.max_push_part_num >= jentry.part_num &&
-                meta_info.head_part_num >= new_head_part_num) { /* raced, but new part was already written */
-              return 0;
-            }
-
-            auto iter = meta_info.journal.find(jentry.part_num);
-            if (iter == meta_info.journal.end()) {
-              continue;
-            }
-          }
-          break;
-        }
-        if (i == RACE_RETRY) {
-          ldout(cct, 0) << "ERROR: " << __func__ << "(): race check failed too many times, likely a bug" << dendl;
-          return -ECANCELED;
-        }
-
-        r = process_journal();
-        if (r < 0) {
-          return r;
-        }
-
-        return 0;
-      }
-
-      int FIFO::prepare_new_head()
-      {
-        int64_t new_head_num = meta_info.head_part_num + 1;
-
-        if (meta_info.max_push_part_num < new_head_num) {
-          int r = prepare_new_part(true);
-          if (r < 0) {
-            return r;
-          }
-
-          if (meta_info.max_push_part_num < new_head_num) {
-            ldout(cct, 0) << "ERROR: " << __func__ << ": after new part creation: meta_info.max_push_part_num="
-              << meta_info.max_push_part_num << " new_head_num=" << meta_info.max_push_part_num << dendl;
-            return -EIO;
-          }
-
-          return 0;
-        }
-
-        int i;
-
-        for (i = 0; i < RACE_RETRY; ++i) {
-          bool canceled;
-          int r = update_meta(ClsFIFO::MetaUpdateParams()
-                              .head_part_num(new_head_num),
-                              &canceled);
-          if (r < 0) {
-            return r;
-          }
-
-          if (canceled) {
-            if (meta_info.head_part_num < new_head_num) {
-              continue;
-            }
-          }
-          break;
-        }
-        if (i == RACE_RETRY) {
-          ldout(cct, 0) << "ERROR: " << __func__ << "(): race check failed too many times, likely a bug" << dendl;
-          return -ECANCELED;
-        }
-
-        
-        return 0;
-      }
-
-      int FIFO::open(bool create,
-                        std::optional<ClsFIFO::MetaCreateParams> create_params)
-      {
-        if (!ioctx) {
-          return -EINVAL;
-        }
-
-        if (create) {
-          librados::ObjectWriteOperation op;
-
-          ClsFIFO::MetaCreateParams default_params;
-          ClsFIFO::MetaCreateParams *params = (create_params ? &(*create_params) : &default_params);
-
-          int r = ClsFIFO::meta_create(&op, id, *params);
-          if (r < 0) {
-            return r;
-          }
-
-          r = ioctx->operate(meta_oid, &op);
-          if (r < 0) {
-            return r;
-          }
-        }
-
-        std::optional<fifo_objv_t> objv = (create_params ?  create_params->state.objv : nullopt);
-
-        int r = do_read_meta(objv);
-        if (r < 0) {
-          return r;
-        }
-
-        is_open = true;
-
-        return 0;
-      }
-
-      int FIFO::read_meta(std::optional<fifo_objv_t> objv)
-      {
-        if (!is_open) {
-          return -EINVAL;
-        }
-
-        return do_read_meta(objv);
-      }
-
-      int FIFO::push_entries(int64_t part_num, std::vector<bufferlist>& data_bufs)
-      {
-        if (!is_open) {
-          return -EINVAL;
-        }
-
-        librados::ObjectWriteOperation op;
-
-        int r = ClsFIFO::push_part(&op, ClsFIFO::PushPartParams()
-                                   .tag(meta_info.head_tag)
-                                   .data_bufs(data_bufs));
-        if (r < 0) {
-          return r;
-        }
-
-        r = ioctx->operate(meta_info.part_oid(part_num), &op);
-        if (r < 0) {
-          return r;
-        }
-
-        return 0;
-      }
-
-      int FIFO::trim_part(int64_t part_num,
-                          uint64_t ofs,
-                          std::optional<string> tag)
-      {
-        if (!is_open) {
-          return -EINVAL;
-        }
-
-        librados::ObjectWriteOperation op;
-
-        int r = ClsFIFO::trim_part(&op, ClsFIFO::TrimPartParams()
-                                        .tag(tag)
-                                        .ofs(ofs));
-        if (r < 0) {
-          return r;
-        }
-
-        r = ioctx->operate(meta_info.part_oid(part_num), &op);
-        if (r < 0) {
-          return r;
-        }
-
-        return 0;
-      }
-
-      int FIFO::push(bufferlist& bl)
-      {
-        std::vector<bufferlist> data_bufs;
-        data_bufs.push_back(bl);
-
-        return push(data_bufs);
-      }
-
-      int FIFO::push(vector<bufferlist>& data_bufs)
-      {
-        if (!is_open) {
-          return -EINVAL;
-        }
-
-        int r;
-
-        if (meta_info.need_new_head()) {
-          r = prepare_new_head();
-          if (r < 0) {
-            return r;
-          }
-        }
-
-        int i;
-
-        auto iter = data_bufs.begin();
-
-        while (iter != data_bufs.end()) {
-          uint64_t batch_len = 0;
-
-          vector<bufferlist> batch;
-
-          for (; iter != data_bufs.end(); ++iter) {
-            auto& data = *iter;
-            auto data_len = data.length();
-            auto max_entry_size = meta_info.data_params.max_entry_size;
-
-            if (data_len > max_entry_size) {
-              ldout(cct, 10) << __func__ << "(): entry too large: " << data_len << " > " <<  meta_info.data_params.max_entry_size << dendl;
-              return -EINVAL;
-            }
-
-            if (batch_len + data_len > max_entry_size) {
-              break;
-            }
-
-            batch_len +=  data_len + part_entry_overhead; /* we can send entry with data_len up to max_entry_size,
-                                                             however, we want to also account the overhead when dealing
-                                                             with multiple entries. Previous check doesn't account
-                                                             for overhead on purpose. */
-
-            batch.push_back(data);
-          }
-
-
-          for (i = 0; i < RACE_RETRY; ++i) {
-            r = push_entries(meta_info.head_part_num, batch);
-            if (r == -ERANGE) {
-              r = prepare_new_head();
-              if (r < 0) {
-                return r;
-              }
-              continue;
-            }
-            if (r < 0) {
-              return r;
-            }
-            break;
-          }
-          if (i == RACE_RETRY) {
-            ldout(cct, 0) << "ERROR: " << __func__ << "(): race check failed too many times, likely a bug" << dendl;
-            return -ECANCELED;
-          }
-        }
-
-        return 0;
-      }
-
-      int FIFO::list(int max_entries,
-                     std::optional<string> marker,
-                     vector<fifo_entry> *result,
-                     bool *more)
-      {
-        if (!is_open) {
-          return -EINVAL;
-        }
-
-        *more = false;
-
-        int64_t part_num = meta_info.tail_part_num;
-        uint64_t ofs = 0;
-
-        if (marker) {
-          if (!parse_marker(*marker, &part_num, &ofs)) {
-            ldout(cct, 20) << __func__ << "(): failed to parse marker (" << *marker << ")" << dendl;
-            return -EINVAL;
-          }
-        }
-
-        result->clear();
-        result->reserve(max_entries);
-
-        bool part_more{false};
-        bool part_full{false};
-
-        while (max_entries > 0) {
-          std::vector<cls_fifo_part_list_entry_t> entries;
-          int r = ClsFIFO::list_part(*ioctx,
-                                     meta_info.part_oid(part_num),
-                                     ClsFIFO::ListPartParams()
-                                     .ofs(ofs)
-                                     .max_entries(max_entries),
-                                     &entries,
-                                     &part_more,
-                                     &part_full,
-                                     nullptr);
-          if (r == -ENOENT) {
-            r = do_read_meta();
-            if (r < 0) {
-              return r;
-            }
-
-            if (part_num < meta_info.tail_part_num) {
-              /* raced with trim? restart */
-              result->clear();
-              part_num = meta_info.tail_part_num;
-              ofs = 0;
-              continue;
-            }
-
-            /* assuming part was not written yet, so end of data */
-
-            *more = false;
-
-            return 0;
-          }
-          if (r < 0) {
-            ldout(cct, 20) << __func__ << "(): ClsFIFO::list_part() on oid=" << meta_info.part_oid(part_num) << " returned r=" << r << dendl;
-            return r;
-          }
-
-          for (auto& entry : entries) {
-            fifo_entry e;
-            e.data = std::move(entry.data);
-            e.marker = craft_marker(part_num, entry.ofs);
-            e.mtime = entry.mtime;
-
-            result->push_back(e);
-          }
-          max_entries -= entries.size();
-
-          if (max_entries > 0 &&
-              part_more) {
-            continue;
-          }
-
-          if (!part_full) { /* head part is not full */
-            break;
-          }
-
-          ++part_num;
-          ofs = 0;
-        }
-
-        *more = part_full || part_more;
-
-        return 0;
-      }
-
-      int FIFO::trim(const string& marker)
-      {
-        if (!is_open) {
-          return -EINVAL;
-        }
-
-        int64_t part_num;
-        uint64_t ofs;
-
-        if (!parse_marker(marker, &part_num, &ofs)) {
-          ldout(cct, 20) << __func__ << "(): failed to parse marker: marker=" << marker << dendl;
-          return -EINVAL;
-        }
-
-        for (int64_t pn = meta_info.tail_part_num; pn < part_num; ++pn) {
-          int r = trim_part(pn, meta_info.data_params.max_part_size, std::nullopt);
-          if (r < 0 &&
-              r != -ENOENT) {
-            ldout(cct, 0) << __func__ << "(): ERROR: trim_part() on part=" << pn << " returned r=" << r << dendl;
-            return r;
-          }
-        }
-
-        int r = trim_part(part_num, ofs, std::nullopt);
-        if (r < 0 &&
-            r != -ENOENT) {
-          ldout(cct, 0) << __func__ << "(): ERROR: trim_part() on part=" << part_num << " returned r=" << r << dendl;
-          return r;
-        }
-
-        if (part_num <= meta_info.tail_part_num) {
-          /* don't need to modify meta info */
-          return 0;
-        }
-
-        int i;
-
-        for (i = 0; i < RACE_RETRY; ++i) {
-          bool canceled;
-          int r = update_meta(ClsFIFO::MetaUpdateParams()
-                              .tail_part_num(part_num),
-                              &canceled);
-          if (r < 0) {
-            return r;
-          }
-
-          if (canceled) {
-            if (meta_info.tail_part_num < part_num) {
-              continue;
-            }
-          }
-          break;
-
-          if (i == RACE_RETRY) {
-            ldout(cct, 0) << "ERROR: " << __func__ << "(): race check failed too many times, likely a bug" << dendl;
-            return -ECANCELED;
-          }
-        }
-
-        return 0;
-      }
-
-      int FIFO::get_part_info(int64_t part_num,
-                              fifo_part_info *result)
-      {
-        if (!is_open) {
-          return -EINVAL;
-        }
-
-        fifo_part_header_t header;
-
-        int r = ClsFIFO::get_part_info(*ioctx,
-                                       meta_info.part_oid(part_num),
-                                       &header);
-        if (r < 0) {
-          return r;
-        }
-
-        *result = std::move(header);
-
-        return 0;
-      }
-
-    } // namespace fifo
-  } // namespace cls
-} // namespace rados
-
diff --git a/src/cls/fifo/cls_fifo_client.h b/src/cls/fifo/cls_fifo_client.h
deleted file mode 100644 (file)
index 02e5681..0000000
+++ /dev/null
@@ -1,382 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2019 Red Hat, Inc.
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation.  See file COPYING.
- *
- */
-
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-
-#pragma once
-
-#include "cls/fifo/cls_fifo_types.h"
-
-namespace rados {
-  namespace cls {
-    namespace fifo {
-
-      class ClsFIFO {
-      public:
-
-        /* create */
-
-        struct MetaCreateParams {
-          struct State {
-            static constexpr uint64_t default_max_part_size = 4 * 1024 * 1024;
-            static constexpr uint64_t default_max_entry_size = 32 * 1024;
-            std::optional<fifo_objv_t> objv;
-            std::optional<std::string> oid_prefix;
-            bool exclusive{false};
-            uint64_t max_part_size{default_max_part_size};
-            uint64_t max_entry_size{default_max_entry_size};
-          } state;
-
-          MetaCreateParams& oid_prefix(const std::string& oid_prefix) {
-            state.oid_prefix = oid_prefix;
-            return *this;
-          }
-          MetaCreateParams& exclusive(bool exclusive) {
-            state.exclusive = exclusive;
-            return *this;
-          }
-          MetaCreateParams& max_part_size(uint64_t max_part_size) {
-            state.max_part_size = max_part_size;
-            return *this;
-          }
-          MetaCreateParams& max_entry_size(uint64_t max_entry_size) {
-            state.max_entry_size = max_entry_size;
-            return *this;
-          }
-          MetaCreateParams& objv(const fifo_objv_t& objv) {
-            state.objv = objv;
-            return *this;
-          }
-          MetaCreateParams& objv(const std::string& instance, uint64_t ver) {
-            state.objv = fifo_objv_t{instance, ver};
-            return *this;
-          }
-        };
-
-        static int meta_create(librados::ObjectWriteOperation *op,
-                               const string& id,
-                               const MetaCreateParams& params);
-
-        /* get info */
-
-        struct MetaGetParams {
-          struct State {
-            std::optional<fifo_objv_t> objv;
-          } state;
-
-          MetaGetParams& objv(std::optional<fifo_objv_t>& v) {
-            state.objv = v;
-            return *this;
-          }
-          MetaGetParams& objv(const fifo_objv_t& v) {
-            state.objv = v;
-            return *this;
-          }
-          MetaGetParams& objv(const std::string& instance, uint64_t ver) {
-            state.objv = fifo_objv_t{instance, ver};
-            return *this;
-          }
-        };
-        static int meta_get(librados::IoCtx& ioctx,
-                            const string& oid,
-                            const MetaGetParams& params,
-                            rados::cls::fifo::fifo_info_t *result,
-                            uint32_t *part_header_size,
-                            uint32_t *part_entry_overhead);
-
-        /* update */
-
-        struct MetaUpdateParams {
-          struct State {
-            rados::cls::fifo::fifo_objv_t objv;
-
-            std::optional<uint64_t> tail_part_num;
-            std::optional<uint64_t> head_part_num;
-            std::optional<uint64_t> min_push_part_num;
-            std::optional<uint64_t> max_push_part_num;
-            std::vector<rados::cls::fifo::fifo_journal_entry_t> journal_entries_add;
-            std::vector<rados::cls::fifo::fifo_journal_entry_t> journal_entries_rm;
-          } state;
-
-          MetaUpdateParams& objv(const fifo_objv_t& objv) {
-            state.objv = objv;
-            return *this;
-          }
-          MetaUpdateParams& tail_part_num(std::optional<uint64_t> tail_part_num) {
-            state.tail_part_num = tail_part_num;
-            return *this;
-          }
-          MetaUpdateParams& tail_part_num(uint64_t tail_part_num) {
-            state.tail_part_num = tail_part_num;
-            return *this;
-          }
-          MetaUpdateParams& head_part_num(std::optional<uint64_t> head_part_num) {
-            state.head_part_num = head_part_num;
-            return *this;
-          }
-          MetaUpdateParams& head_part_num(uint64_t head_part_num) {
-            state.head_part_num = head_part_num;
-            return *this;
-          }
-          MetaUpdateParams& min_push_part_num(uint64_t num) {
-            state.min_push_part_num = num;
-            return *this;
-          }
-          MetaUpdateParams& max_push_part_num(std::optional<uint64_t> num) {
-            state.max_push_part_num = num;
-            return *this;
-          }
-          MetaUpdateParams& max_push_part_num(uint64_t num) {
-            state.max_push_part_num = num;
-            return *this;
-          }
-          MetaUpdateParams& journal_entry_add(std::optional<rados::cls::fifo::fifo_journal_entry_t> entry) {
-            if (entry) {
-              state.journal_entries_add.push_back(*entry);
-            }
-            return *this;
-          }
-          MetaUpdateParams& journal_entry_add(const rados::cls::fifo::fifo_journal_entry_t& entry) {
-            state.journal_entries_add.push_back(entry);
-            return *this;
-          }
-          MetaUpdateParams& journal_entries_rm(std::vector<rados::cls::fifo::fifo_journal_entry_t>& entries) {
-            state.journal_entries_rm = entries;
-            return *this;
-          }
-
-          int apply_update(CephContext *cct,
-                           rados::cls::fifo::fifo_info_t *info);
-        };
-
-        static int meta_update(librados::ObjectWriteOperation *rados_op,
-                                const MetaUpdateParams& params);
-        /* init part */
-
-        struct PartInitParams {
-          struct State {
-            string tag;
-            rados::cls::fifo::fifo_data_params_t data_params;
-          } state;
-
-          PartInitParams& tag(const std::string& tag) {
-            state.tag = tag;
-            return *this;
-          }
-          PartInitParams& data_params(const rados::cls::fifo::fifo_data_params_t& data_params) {
-            state.data_params = data_params;
-            return *this;
-          }
-        };
-
-        static int part_init(librados::ObjectWriteOperation *op,
-                             const PartInitParams& params);
-
-       /* push part */
-
-        struct PushPartParams {
-          struct State {
-            string tag;
-           std::vector<bufferlist> data_bufs;
-           uint64_t total_len{0};
-          } state;
-
-          PushPartParams& tag(const std::string& tag) {
-            state.tag = tag;
-            return *this;
-          }
-          PushPartParams& data(bufferlist& bl) {
-           state.total_len += bl.length();
-            state.data_bufs.emplace_back(bl);
-            return *this;
-          }
-          PushPartParams& data_bufs(std::vector<bufferlist>& dbs) {
-           for (auto& bl : dbs) {
-             data(bl);
-           }
-            return *this;
-          }
-        };
-
-        static int push_part(librados::ObjectWriteOperation *op,
-                             const PushPartParams& params);
-       /* trim part */
-
-        struct TrimPartParams {
-          struct State {
-            std::optional<string> tag;
-            uint64_t ofs;
-          } state;
-
-          TrimPartParams& tag(std::optional<std::string> tag) {
-            state.tag = tag;
-            return *this;
-          }
-          TrimPartParams& ofs(uint64_t ofs) {
-            state.ofs = ofs;
-            return *this;
-          }
-        };
-
-        static int trim_part(librados::ObjectWriteOperation *op,
-                             const TrimPartParams& params);
-       /* list part */
-
-        struct ListPartParams {
-          struct State {
-            std::optional<string> tag;
-            uint64_t ofs;
-            int max_entries{100};
-          } state;
-
-          ListPartParams& tag(const std::string& tag) {
-            state.tag = tag;
-            return *this;
-          }
-          ListPartParams& ofs(uint64_t ofs) {
-            state.ofs = ofs;
-            return *this;
-          }
-          ListPartParams& max_entries(int _max_entries) {
-            state.max_entries = _max_entries;
-            return *this;
-          }
-        };
-
-        static int list_part(librados::IoCtx& ioctx,
-                             const string& oid,
-                             const ListPartParams& params,
-                             std::vector<cls_fifo_part_list_entry_t> *pentries,
-                             bool *more,
-                             bool *full_part = nullptr,
-                             string *ptag = nullptr);
-
-        static int get_part_info(librados::IoCtx& ioctx,
-                                 const string& oid,
-                                 rados::cls::fifo::fifo_part_header_t *header);
-      };
-
-      struct fifo_entry {
-        bufferlist data;
-        string marker;
-        ceph::real_time mtime;
-      };
-
-      using fifo_part_info = rados::cls::fifo::fifo_part_header_t;
-
-      class FIFO {
-        CephContext *cct;
-        string id;
-
-        string meta_oid;
-
-        std::optional<librados::IoCtx> _ioctx;
-        librados::IoCtx *ioctx{nullptr};
-
-        fifo_info_t meta_info;
-
-        uint32_t part_header_size;
-        uint32_t part_entry_overhead;
-
-        bool is_open{false};
-
-        string craft_marker(int64_t part_num,
-                        uint64_t part_ofs);
-
-        bool parse_marker(const string& marker,
-                          int64_t *part_num,
-                          uint64_t *part_ofs);
-
-        int update_meta(ClsFIFO::MetaUpdateParams& update_params,
-                        bool *canceled);
-        int do_read_meta(std::optional<fifo_objv_t> objv = std::nullopt);
-
-        int create_part(int64_t part_num, const string& tag,
-                        int64_t& max_part_num);
-        int remove_part(int64_t part_num, const string& tag,
-                        int64_t& tail_part_num);
-
-        int process_journal_entry(const fifo_journal_entry_t& entry,
-                                  int64_t& tail_part_num,
-                                  int64_t& head_part_num,
-                                  int64_t& max_part_num);
-        int process_journal_entries(vector<fifo_journal_entry_t> *processed,
-                                    int64_t& tail_part_num,
-                                    int64_t& head_part_num,
-                                    int64_t& max_part_num);
-        int process_journal();
-
-        int prepare_new_part(bool is_head);
-        int prepare_new_head();
-
-       int push_entries(int64_t part_num, std::vector<bufferlist>& data_bufs);
-        int trim_part(int64_t part_num,
-                      uint64_t ofs,
-                      std::optional<string> tag);
-
-      public:
-        FIFO(CephContext *_cct,
-             const string& _id,
-             librados::IoCtx *_ioctx = nullptr) : cct(_cct),
-                                                  id(_id),
-                                                  ioctx(_ioctx) {
-          meta_oid = id;
-        }
-
-        int init_ioctx(librados::Rados *rados,
-                       const string& pool,
-                       std::optional<string> pool_ns);
-
-        void set_ioctx(librados::IoCtx *_ioctx) {
-          ioctx = ioctx;
-        }
-
-        int open(bool create,
-                 std::optional<ClsFIFO::MetaCreateParams> create_params = std::nullopt);
-
-        int read_meta(std::optional<fifo_objv_t> objv = std::nullopt);
-
-        const fifo_info_t& get_meta() const {
-          return meta_info;
-        }
-
-        void get_part_layout_info(uint32_t *header_size, uint32_t *entry_overhead) {
-          if (header_size) {
-            *header_size = part_header_size;
-          }
-
-          if (entry_overhead) {
-            *entry_overhead = part_entry_overhead;
-          }
-        }
-
-        int push(bufferlist& bl);
-       int push(vector<bufferlist>& bl);
-
-        int list(int max_entries,
-                 std::optional<string> marker,
-                 vector<fifo_entry> *result,
-                 bool *more);
-
-        int trim(const string& marker);
-
-        int get_part_info(int64_t part_num,
-                          fifo_part_info *result);
-      };
-    } // namespace fifo
-  }  // namespace cls
-} // namespace rados
diff --git a/src/cls/fifo/cls_fifo_ops.cc b/src/cls/fifo/cls_fifo_ops.cc
deleted file mode 100644 (file)
index e69de29..0000000
index 2a8ceab3eaaf32d9f5b52540e3d0084529ceed7a..0adae1e5d39ae96c373d89758207e491d2f09cd1 100644 (file)
 
 #pragma once
 
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "include/buffer.h"
+#include "include/encoding.h"
 #include "include/types.h"
-#include "include/utime.h"
+
 #include "cls/fifo/cls_fifo_types.h"
 
-struct cls_fifo_meta_create_op
+namespace rados::cls::fifo::op {
+struct create_meta
 {
-  string id;
-  std::optional<rados::cls::fifo::fifo_objv_t> objv;
+  std::string id;
+  std::optional<objv> version;
   struct {
-    string name;
-    string ns;
+    std::string name;
+    std::string ns;
   } pool;
-  std::optional<string> oid_prefix;
+  std::optional<std::string> oid_prefix;
 
-  uint64_t max_part_size{0};
-  uint64_t max_entry_size{0};
+  std::uint64_t max_part_size{0};
+  std::uint64_t max_entry_size{0};
 
   bool exclusive{false};
 
-  void encode(bufferlist &bl) const {
+  void encode(ceph::buffer::list& bl) const {
     ENCODE_START(1, 1, bl);
     encode(id, bl);
-    encode(objv, bl);
+    encode(version, bl);
     encode(pool.name, bl);
     encode(pool.ns, bl);
     encode(oid_prefix, bl);
@@ -47,10 +55,10 @@ struct cls_fifo_meta_create_op
     encode(exclusive, bl);
     ENCODE_FINISH(bl);
   }
-  void decode(bufferlist::const_iterator &bl) {
+  void decode(ceph::buffer::list::const_iterator& bl) {
     DECODE_START(1, bl);
     decode(id, bl);
-    decode(objv, bl);
+    decode(version, bl);
     decode(pool.name, bl);
     decode(pool.ns, bl);
     decode(oid_prefix, bl);
@@ -60,39 +68,40 @@ struct cls_fifo_meta_create_op
     DECODE_FINISH(bl);
   }
 };
-WRITE_CLASS_ENCODER(cls_fifo_meta_create_op)
+WRITE_CLASS_ENCODER(create_meta)
 
-struct cls_fifo_meta_get_op
+struct get_meta
 {
-  std::optional<rados::cls::fifo::fifo_objv_t> objv;
+  std::optional<objv> version;
 
-  void encode(bufferlist &bl) const {
+  void encode(ceph::buffer::list& bl) const {
     ENCODE_START(1, 1, bl);
-    encode(objv, bl);
+    encode(version, bl);
     ENCODE_FINISH(bl);
   }
-  void decode(bufferlist::const_iterator &bl) {
+  void decode(ceph::buffer::list::const_iterator& bl) {
     DECODE_START(1, bl);
-    decode(objv, bl);
+    decode(version, bl);
     DECODE_FINISH(bl);
   }
 };
-WRITE_CLASS_ENCODER(cls_fifo_meta_get_op)
+WRITE_CLASS_ENCODER(get_meta)
 
-struct cls_fifo_meta_get_op_reply
+struct get_meta_reply
 {
-  rados::cls::fifo::fifo_info_t info;
-  uint32_t part_header_size{0};
-  uint32_t part_entry_overhead{0}; /* per entry extra data that is stored */
+  fifo::info info;
+  std::uint32_t part_header_size{0};
+  /* per entry extra data that is stored */
+  std::uint32_t part_entry_overhead{0};
 
-  void encode(bufferlist &bl) const {
+  void encode(ceph::buffer::list& bl) const {
     ENCODE_START(1, 1, bl);
     encode(info, bl);
     encode(part_header_size, bl);
     encode(part_entry_overhead, bl);
     ENCODE_FINISH(bl);
   }
-  void decode(bufferlist::const_iterator &bl) {
+  void decode(ceph::buffer::list::const_iterator& bl) {
     DECODE_START(1, bl);
     decode(info, bl);
     decode(part_header_size, bl);
@@ -100,22 +109,22 @@ struct cls_fifo_meta_get_op_reply
     DECODE_FINISH(bl);
   }
 };
-WRITE_CLASS_ENCODER(cls_fifo_meta_get_op_reply)
+WRITE_CLASS_ENCODER(get_meta_reply)
 
-struct cls_fifo_meta_update_op
+struct update_meta
 {
-  rados::cls::fifo::fifo_objv_t objv;
+  objv version;
 
-  std::optional<uint64_t> tail_part_num;
-  std::optional<uint64_t> head_part_num;
-  std::optional<uint64_t> min_push_part_num;
-  std::optional<uint64_t> max_push_part_num;
-  std::vector<rados::cls::fifo::fifo_journal_entry_t> journal_entries_add;
-  std::vector<rados::cls::fifo::fifo_journal_entry_t> journal_entries_rm;
+  std::optional<std::uint64_t> tail_part_num;
+  std::optional<std::uint64_t> head_part_num;
+  std::optional<std::uint64_t> min_push_part_num;
+  std::optional<std::uint64_t> max_push_part_num;
+  std::vector<journal_entry> journal_entries_add;
+  std::vector<journal_entry> journal_entries_rm;
 
-  void encode(bufferlist &bl) const {
+  void encode(ceph::buffer::list& bl) const {
     ENCODE_START(1, 1, bl);
-    encode(objv, bl);
+    encode(version, bl);
     encode(tail_part_num, bl);
     encode(head_part_num, bl);
     encode(min_push_part_num, bl);
@@ -124,9 +133,9 @@ struct cls_fifo_meta_update_op
     encode(journal_entries_rm, bl);
     ENCODE_FINISH(bl);
   }
-  void decode(bufferlist::const_iterator &bl) {
+  void decode(ceph::buffer::list::const_iterator& bl) {
     DECODE_START(1, bl);
-    decode(objv, bl);
+    decode(version, bl);
     decode(tail_part_num, bl);
     decode(head_part_num, bl);
     decode(min_push_part_num, bl);
@@ -136,42 +145,42 @@ struct cls_fifo_meta_update_op
     DECODE_FINISH(bl);
   }
 };
-WRITE_CLASS_ENCODER(cls_fifo_meta_update_op)
+WRITE_CLASS_ENCODER(update_meta)
 
-struct cls_fifo_part_init_op
+struct init_part
 {
-  string tag;
-  rados::cls::fifo::fifo_data_params_t data_params;
+  std::string tag;
+  data_params params;
 
-  void encode(bufferlist &bl) const {
+  void encode(ceph::buffer::list& bl) const {
     ENCODE_START(1, 1, bl);
     encode(tag, bl);
-    encode(data_params, bl);
+    encode(params, bl);
     ENCODE_FINISH(bl);
   }
-  void decode(bufferlist::const_iterator &bl) {
+  void decode(ceph::buffer::list::const_iterator& bl) {
     DECODE_START(1, bl);
     decode(tag, bl);
-    decode(data_params, bl);
+    decode(params, bl);
     DECODE_FINISH(bl);
   }
 };
-WRITE_CLASS_ENCODER(cls_fifo_part_init_op)
+WRITE_CLASS_ENCODER(init_part)
 
-struct cls_fifo_part_push_op
+struct push_part
 {
-  string tag;
-  std::vector<bufferlist> data_bufs;
-  uint64_t total_len{0};
+  std::string tag;
+  std::deque<ceph::buffer::list> data_bufs;
+  std::uint64_t total_len{0};
 
-  void encode(bufferlist &bl) const {
+  void encode(ceph::buffer::list& bl) const {
     ENCODE_START(1, 1, bl);
     encode(tag, bl);
     encode(data_bufs, bl);
     encode(total_len, bl);
     ENCODE_FINISH(bl);
   }
-  void decode(bufferlist::const_iterator &bl) {
+  void decode(ceph::buffer::list::const_iterator& bl) {
     DECODE_START(1, bl);
     decode(tag, bl);
     decode(data_bufs, bl);
@@ -179,42 +188,42 @@ struct cls_fifo_part_push_op
     DECODE_FINISH(bl);
   }
 };
-WRITE_CLASS_ENCODER(cls_fifo_part_push_op)
+WRITE_CLASS_ENCODER(push_part)
 
-struct cls_fifo_part_trim_op
+struct trim_part
 {
-  std::optional<string> tag;
-  uint64_t ofs{0};
+  std::optional<std::string> tag;
+  std::uint64_t ofs{0};
 
-  void encode(bufferlist &bl) const {
+  void encode(ceph::buffer::list& bl) const {
     ENCODE_START(1, 1, bl);
     encode(tag, bl);
     encode(ofs, bl);
     ENCODE_FINISH(bl);
   }
-  void decode(bufferlist::const_iterator &bl) {
+  void decode(ceph::buffer::list::const_iterator& bl) {
     DECODE_START(1, bl);
     decode(tag, bl);
     decode(ofs, bl);
     DECODE_FINISH(bl);
   }
 };
-WRITE_CLASS_ENCODER(cls_fifo_part_trim_op)
+WRITE_CLASS_ENCODER(trim_part)
 
-struct cls_fifo_part_list_op
+struct list_part
 {
   std::optional<string> tag;
-  uint64_t ofs{0};
+  std::uint64_t ofs{0};
   int max_entries{100};
 
-  void encode(bufferlist &bl) const {
+  void encode(ceph::buffer::list& bl) const {
     ENCODE_START(1, 1, bl);
     encode(tag, bl);
     encode(ofs, bl);
     encode(max_entries, bl);
     ENCODE_FINISH(bl);
   }
-  void decode(bufferlist::const_iterator &bl) {
+  void decode(ceph::buffer::list::const_iterator& bl) {
     DECODE_START(1, bl);
     decode(tag, bl);
     decode(ofs, bl);
@@ -222,17 +231,18 @@ struct cls_fifo_part_list_op
     DECODE_FINISH(bl);
   }
 };
-WRITE_CLASS_ENCODER(cls_fifo_part_list_op)
+WRITE_CLASS_ENCODER(list_part)
+inline constexpr int MAX_LIST_ENTRIES = 512;
 
-struct cls_fifo_part_list_op_reply
+struct list_part_reply
 {
-  string tag;
-  vector<rados::cls::fifo::cls_fifo_part_list_entry_t> entries;
+  std::string tag;
+  std::vector<part_list_entry> entries;
   bool more{false};
   bool full_part{false}; /* whether part is full or still can be written to.
                             A non full part is by definition head part */
 
-  void encode(bufferlist &bl) const {
+  void encode(ceph::buffer::list& bl) const {
     ENCODE_START(1, 1, bl);
     encode(tag, bl);
     encode(entries, bl);
@@ -240,7 +250,7 @@ struct cls_fifo_part_list_op_reply
     encode(full_part, bl);
     ENCODE_FINISH(bl);
   }
-  void decode(bufferlist::const_iterator &bl) {
+  void decode(ceph::buffer::list::const_iterator& bl) {
     DECODE_START(1, bl);
     decode(tag, bl);
     decode(entries, bl);
@@ -249,34 +259,45 @@ struct cls_fifo_part_list_op_reply
     DECODE_FINISH(bl);
   }
 };
-WRITE_CLASS_ENCODER(cls_fifo_part_list_op_reply)
+WRITE_CLASS_ENCODER(list_part_reply)
 
-struct cls_fifo_part_get_info_op
+struct get_part_info
 {
-  void encode(bufferlist &bl) const {
+  void encode(ceph::buffer::list &bl) const {
     ENCODE_START(1, 1, bl);
     ENCODE_FINISH(bl);
   }
-  void decode(bufferlist::const_iterator &bl) {
+  void decode(ceph::buffer::list::const_iterator &bl) {
     DECODE_START(1, bl);
     DECODE_FINISH(bl);
   }
 };
-WRITE_CLASS_ENCODER(cls_fifo_part_get_info_op)
+WRITE_CLASS_ENCODER(get_part_info)
 
-struct cls_fifo_part_get_info_op_reply
+struct get_part_info_reply
 {
-  rados::cls::fifo::fifo_part_header_t header;
+  part_header header;
 
-  void encode(bufferlist &bl) const {
+  void encode(ceph::buffer::list &bl) const {
     ENCODE_START(1, 1, bl);
     encode(header, bl);
     ENCODE_FINISH(bl);
   }
-  void decode(bufferlist::const_iterator &bl) {
+  void decode(ceph::buffer::list::const_iterator &bl) {
     DECODE_START(1, bl);
     decode(header, bl);
     DECODE_FINISH(bl);
   }
 };
-WRITE_CLASS_ENCODER(cls_fifo_part_get_info_op_reply)
+WRITE_CLASS_ENCODER(get_part_info_reply)
+
+inline constexpr auto CLASS = "fifo";
+inline constexpr auto CREATE_META = "create_meta";
+inline constexpr auto GET_META = "get_meta";
+inline constexpr auto UPDATE_META = "update_meta";
+inline constexpr auto INIT_PART = "init_part";
+inline constexpr auto PUSH_PART = "push_part";
+inline constexpr auto TRIM_PART = "trim_part";
+inline constexpr auto LIST_PART = "part_list";
+inline constexpr auto GET_PART_INFO = "get_part_info";
+} // namespace rados::cls::fifo::op
diff --git a/src/cls/fifo/cls_fifo_types.cc b/src/cls/fifo/cls_fifo_types.cc
deleted file mode 100644 (file)
index b182551..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2019 Red Hat, Inc.
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation.  See file COPYING.
- *
- */
-
-#include "cls_fifo_types.h"
-
-string rados::cls::fifo::fifo_info_t::part_oid(int64_t part_num)
-{
-  char buf[oid_prefix.size() + 32];
-  snprintf(buf, sizeof(buf), "%s.%lld", oid_prefix.c_str(), (long long)part_num);
-
-  return string(buf);
-}
-
-void rados::cls::fifo::fifo_info_t::prepare_next_journal_entry(fifo_journal_entry_t *entry, const string& tag)
-{
-  entry->op = fifo_journal_entry_t::Op::OP_CREATE;
-  entry->part_num = max_push_part_num + 1;
-  entry->part_tag = tag;
-}
-
-int rados::cls::fifo::fifo_info_t::apply_update(std::optional<uint64_t>& _tail_part_num,
-                                                std::optional<uint64_t>& _head_part_num,
-                                                std::optional<uint64_t>& _min_push_part_num,
-                                                std::optional<uint64_t>& _max_push_part_num,
-                                                std::vector<rados::cls::fifo::fifo_journal_entry_t>& journal_entries_add,
-                                                std::vector<rados::cls::fifo::fifo_journal_entry_t>& journal_entries_rm,
-                                                string *err)
-{
-  if (_tail_part_num) {
-    tail_part_num = *_tail_part_num;
-  }
-
-  if (_min_push_part_num) {
-    min_push_part_num = *_min_push_part_num;
-  }
-
-  if (_max_push_part_num) {
-    max_push_part_num = *_max_push_part_num;
-  }
-
-  for (auto& entry : journal_entries_add) {
-    auto iter = journal.find(entry.part_num);
-    if (iter != journal.end() &&
-        iter->second.op == entry.op) {
-      /* don't allow multiple concurrent (same) operations on the same part,
-         racing clients should use objv to avoid races anyway */
-      if (err) {
-        stringstream ss;
-        ss << "multiple concurrent operations on same part are not allowed, part num=" << entry.part_num;
-        *err = ss.str();
-      }
-      return -EINVAL;
-    }
-
-    if (entry.op == fifo_journal_entry_t::Op::OP_CREATE) {
-      tags[entry.part_num] = entry.part_tag;
-    }
-
-    journal.insert(std::pair<int64_t, fifo_journal_entry_t>(entry.part_num, std::move(entry)));
-  }
-
-  for (auto& entry : journal_entries_rm) {
-    journal.erase(entry.part_num);
-  }
-
-  if (_head_part_num) {
-    tags.erase(head_part_num);
-    head_part_num = *_head_part_num;
-    auto iter = tags.find(head_part_num);
-    if (iter != tags.end()) {
-      head_tag = iter->second;
-    } else {
-      head_tag.erase();
-    }
-  }
-
-  return 0;
-}
index bdc1ed773edf8c0b6d6141fd01d90bc2076989f8..749f66e7b96e67184377e75495c4777fcf7563c0 100644 (file)
 
 #pragma once
 
-
+#include <cstdint>
+#include <map>
+#include <optional>
+#include <ostream>
+#include <string>
+#include <vector>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
+#include "include/buffer.h"
 #include "include/encoding.h"
 #include "include/types.h"
 
+#include "common/ceph_time.h"
 
 class JSONObj;
 
-namespace rados {
-  namespace cls {
-    namespace fifo {
-      struct fifo_objv_t {
-        string instance;
-        uint64_t ver{0};
-
-        void encode(bufferlist &bl) const {
-          ENCODE_START(1, 1, bl);
-          encode(instance, bl);
-          encode(ver, bl);
-          ENCODE_FINISH(bl);
-        }
-        void decode(bufferlist::const_iterator &bl) {
-          DECODE_START(1, bl);
-          decode(instance, bl);
-          decode(ver, bl);
-          DECODE_FINISH(bl);
-        }
-        void dump(Formatter *f) const;
-        void decode_json(JSONObj *obj);
-
-        bool operator==(const fifo_objv_t& rhs) const {
-          return (instance == rhs.instance &&
-                  ver == rhs.ver);
-        }
-
-        bool empty() const {
-          return instance.empty();
-        }
-
-        string to_str() {
-          char buf[instance.size() + 32];
-          snprintf(buf, sizeof(buf), "%s{%lld}", instance.c_str(), (long long)ver);
-          return string(buf);
-        }
-      };
-      WRITE_CLASS_ENCODER(rados::cls::fifo::fifo_objv_t)
-
-      struct fifo_data_params_t {
-        uint64_t max_part_size{0};
-        uint64_t max_entry_size{0};
-        uint64_t full_size_threshold{0};
-
-        void encode(bufferlist &bl) const {
-          ENCODE_START(1, 1, bl);
-          encode(max_part_size, bl);
-          encode(max_entry_size, bl);
-          encode(full_size_threshold, bl);
-          ENCODE_FINISH(bl);
-        }
-        void decode(bufferlist::const_iterator &bl) {
-          DECODE_START(1, bl);
-          decode(max_part_size, bl);
-          decode(max_entry_size, bl);
-          decode(full_size_threshold, bl);
-          DECODE_FINISH(bl);
-        }
-        void dump(Formatter *f) const;
-        void decode_json(JSONObj *obj);
-
-        bool operator==(const fifo_data_params_t& rhs) const {
-          return (max_part_size == rhs.max_part_size &&
-                  max_entry_size == rhs.max_entry_size &&
-                  full_size_threshold == rhs.full_size_threshold);
-        }
-      };
-      WRITE_CLASS_ENCODER(rados::cls::fifo::fifo_data_params_t)
-
-      struct fifo_journal_entry_t {
-        enum Op {
-          OP_UNKNOWN  = 0,
-          OP_CREATE   = 1,
-          OP_SET_HEAD = 2,
-          OP_REMOVE   = 3,
-        } op{OP_UNKNOWN};
-
-        int64_t part_num{0};
-        string part_tag;
-
-        void encode(bufferlist &bl) const {
-          ENCODE_START(1, 1, bl);
-          encode((int)op, bl);
-          encode(part_num, bl);
-          encode(part_tag, bl);
-          ENCODE_FINISH(bl);
-        }
-        void decode(bufferlist::const_iterator &bl) {
-          DECODE_START(1, bl);
-          int i;
-          decode(i, bl);
-          op = (Op)i;
-          decode(part_num, bl);
-          decode(part_tag, bl);
-          DECODE_FINISH(bl);
-        }
-        void dump(Formatter *f) const;
-
-        bool operator==(const fifo_journal_entry_t& e) {
-          return (op == e.op &&
-                  part_num == e.part_num &&
-                  part_tag == e.part_tag);
-        }
-      };
-      WRITE_CLASS_ENCODER(rados::cls::fifo::fifo_journal_entry_t)
-
-      struct fifo_info_t {
-        string id;
-        fifo_objv_t objv;
-        string oid_prefix;
-        fifo_data_params_t data_params;
-
-        int64_t tail_part_num{0};
-        int64_t head_part_num{-1};
-        int64_t min_push_part_num{0};
-        int64_t max_push_part_num{-1};
-
-        string head_tag;
-        map<int64_t, string> tags;
-
-        std::multimap<int64_t, fifo_journal_entry_t> journal;
-
-        bool need_new_head() {
-          return (head_part_num < min_push_part_num);
-        }
-
-        bool need_new_part() {
-          return (max_push_part_num < min_push_part_num);
-        }
-
-        string part_oid(int64_t part_num);
-        void prepare_next_journal_entry(fifo_journal_entry_t *entry, const string& tag);
-
-        int apply_update(std::optional<uint64_t>& _tail_part_num,
-                         std::optional<uint64_t>& _head_part_num,
-                         std::optional<uint64_t>& _min_push_part_num,
-                         std::optional<uint64_t>& _max_push_part_num,
-                         std::vector<rados::cls::fifo::fifo_journal_entry_t>& journal_entries_add,
-                         std::vector<rados::cls::fifo::fifo_journal_entry_t>& journal_entries_rm,
-                         string *err);
-
-        void encode(bufferlist &bl) const {
-          ENCODE_START(1, 1, bl);
-          encode(id, bl);
-          encode(objv, bl);
-          encode(oid_prefix, bl);
-          encode(data_params, bl);
-          encode(tail_part_num, bl);
-          encode(head_part_num, bl);
-          encode(min_push_part_num, bl);
-          encode(max_push_part_num, bl);
-          encode(tags, bl);
-          encode(head_tag, bl);
-          encode(journal, bl);
-          ENCODE_FINISH(bl);
-        }
-        void decode(bufferlist::const_iterator &bl) {
-          DECODE_START(1, bl);
-          decode(id, bl);
-          decode(objv, bl);
-          decode(oid_prefix, bl);
-          decode(data_params, bl);
-          decode(tail_part_num, bl);
-          decode(head_part_num, bl);
-          decode(min_push_part_num, bl);
-          decode(max_push_part_num, bl);
-          decode(tags, bl);
-          decode(head_tag, bl);
-          decode(journal, bl);
-          DECODE_FINISH(bl);
-        }
-        void dump(Formatter *f) const;
-        void decode_json(JSONObj *obj);
-      };
-      WRITE_CLASS_ENCODER(rados::cls::fifo::fifo_info_t)
-
-      struct cls_fifo_part_list_entry_t {
-        bufferlist data;
-        uint64_t ofs;
-        ceph::real_time mtime;
-
-        cls_fifo_part_list_entry_t() {}
-        cls_fifo_part_list_entry_t(bufferlist&& _data,
-                                   uint64_t _ofs,
-                                   ceph::real_time _mtime) : data(std::move(_data)), ofs(_ofs), mtime(_mtime) {}
-
-
-        void encode(bufferlist &bl) const {
-          ENCODE_START(1, 1, bl);
-          encode(data, bl);
-          encode(ofs, bl);
-          encode(mtime, bl);
-          ENCODE_FINISH(bl);
-        }
-        void decode(bufferlist::const_iterator &bl) {
-          DECODE_START(1, bl);
-          decode(data, bl);
-          decode(ofs, bl);
-          decode(mtime, bl);
-          DECODE_FINISH(bl);
-        }
-      };
-      WRITE_CLASS_ENCODER(rados::cls::fifo::cls_fifo_part_list_entry_t)
-
-      struct fifo_part_header_t {
-        string tag;
-
-        fifo_data_params_t params;
-
-        uint64_t magic{0};
-
-        uint64_t min_ofs{0};
-        uint64_t max_ofs{0};
-        uint64_t min_index{0};
-        uint64_t max_index{0};
-
-        void encode(bufferlist &bl) const {
-          ENCODE_START(1, 1, bl);
-          encode(tag, bl);
-          encode(params, bl);
-          encode(magic, bl);
-          encode(min_ofs, bl);
-          encode(max_ofs, bl);
-          encode(min_index, bl);
-          encode(max_index, bl);
-          ENCODE_FINISH(bl);
-        }
-        void decode(bufferlist::const_iterator &bl) {
-          DECODE_START(1, bl);
-          decode(tag, bl);
-          decode(params, bl);
-          decode(magic, bl);
-          decode(min_ofs, bl);
-          decode(max_ofs, bl);
-          decode(min_index, bl);
-          decode(max_index, bl);
-          DECODE_FINISH(bl);
-        }
-      };
-      WRITE_CLASS_ENCODER(fifo_part_header_t)
+namespace rados::cls::fifo {
+struct objv {
+  std::string instance;
+  std::uint64_t ver{0};
+
+  void encode(ceph::buffer::list& bl) const {
+    ENCODE_START(1, 1, bl);
+    encode(instance, bl);
+    encode(ver, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(ceph::buffer::list::const_iterator& bl) {
+    DECODE_START(1, bl);
+    decode(instance, bl);
+    decode(ver, bl);
+    DECODE_FINISH(bl);
+  }
+  void dump(ceph::Formatter* f) const;
+  void decode_json(JSONObj* obj);
+
+  bool operator ==(const objv& rhs) const {
+    return (instance == rhs.instance &&
+           ver == rhs.ver);
+  }
+  bool operator !=(const objv& rhs) const {
+    return (instance != rhs.instance ||
+           ver != rhs.ver);
+  }
+  bool same_or_later(const objv& rhs) const {
+    return (instance == rhs.instance ||
+           ver >= rhs.ver);
+  }
+
+  bool empty() const {
+    return instance.empty();
+  }
+
+  std::string to_str() const {
+    return fmt::format("{}{{{}}}", instance, ver);
+  }
+};
+WRITE_CLASS_ENCODER(objv)
+inline ostream& operator <<(std::ostream& os, const objv& objv)
+{
+  return os << objv.to_str();
+}
+
+struct data_params {
+  std::uint64_t max_part_size{0};
+  std::uint64_t max_entry_size{0};
+  std::uint64_t full_size_threshold{0};
+
+  void encode(ceph::buffer::list& bl) const {
+    ENCODE_START(1, 1, bl);
+    encode(max_part_size, bl);
+    encode(max_entry_size, bl);
+    encode(full_size_threshold, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(ceph::buffer::list::const_iterator& bl) {
+    DECODE_START(1, bl);
+    decode(max_part_size, bl);
+    decode(max_entry_size, bl);
+    decode(full_size_threshold, bl);
+    DECODE_FINISH(bl);
+  }
+  void dump(ceph::Formatter* f) const;
+  void decode_json(JSONObj* obj);
+
+  bool operator ==(const data_params& rhs) const {
+    return (max_part_size == rhs.max_part_size &&
+           max_entry_size == rhs.max_entry_size &&
+           full_size_threshold == rhs.full_size_threshold);
+  }
+};
+WRITE_CLASS_ENCODER(data_params)
+inline std::ostream& operator <<(std::ostream& m, const data_params& d) {
+  return m << "max_part_size: " << d.max_part_size << ", "
+          << "max_entry_size: " << d.max_entry_size << ", "
+          << "full_size_threshold: " << d.full_size_threshold;
+}
+
+struct journal_entry {
+  enum class Op {
+    unknown  = 0,
+    create   = 1,
+    set_head = 2,
+    remove   = 3,
+  } op{Op::unknown};
+
+  std::int64_t part_num{0};
+  std::string part_tag;
+
+  void encode(ceph::buffer::list& bl) const {
+    ENCODE_START(1, 1, bl);
+    encode((int)op, bl);
+    encode(part_num, bl);
+    encode(part_tag, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(ceph::buffer::list::const_iterator& bl) {
+    DECODE_START(1, bl);
+    int i;
+    decode(i, bl);
+    op = static_cast<Op>(i);
+    decode(part_num, bl);
+    decode(part_tag, bl);
+    DECODE_FINISH(bl);
+  }
+  void dump(ceph::Formatter* f) const;
+
+  bool operator ==(const journal_entry& e) {
+    return (op == e.op &&
+           part_num == e.part_num &&
+           part_tag == e.part_tag);
+  }
+};
+WRITE_CLASS_ENCODER(journal_entry)
+inline std::ostream& operator <<(std::ostream& m, const journal_entry::Op& o) {
+  switch (o) {
+  case journal_entry::Op::unknown:
+    return m << "Op::unknown";
+  case journal_entry::Op::create:
+    return m << "Op::create";
+  case journal_entry::Op::set_head:
+    return m << "Op::set_head";
+  case journal_entry::Op::remove:
+    return m << "Op::remove";
+  }
+  return m << "Bad value: " << static_cast<int>(o);
+}
+inline std::ostream& operator <<(std::ostream& m, const journal_entry& j) {
+  return m << "op: " << j.op << ", "
+          << "part_num: " << j.part_num <<  ", "
+          << "part_tag: " << j.part_tag;
+}
+
+// This is actually a useful builder, since otherwise we end up with
+// four uint64_ts in a row and only care about a subset at a time.
+class update {
+  std::optional<std::uint64_t> tail_part_num_;
+  std::optional<std::uint64_t> head_part_num_;
+  std::optional<std::uint64_t> min_push_part_num_;
+  std::optional<std::uint64_t> max_push_part_num_;
+  std::vector<fifo::journal_entry> journal_entries_add_;
+  std::vector<fifo::journal_entry> journal_entries_rm_;
+
+public:
+
+  update&& tail_part_num(std::optional<std::uint64_t> num) noexcept {
+    tail_part_num_ = num;
+    return std::move(*this);
+  }
+  auto tail_part_num() const noexcept {
+    return tail_part_num_;
+  }
+
+  update&& head_part_num(std::optional<std::uint64_t> num) noexcept {
+    head_part_num_ = num;
+    return std::move(*this);
+  }
+  auto head_part_num() const noexcept {
+    return head_part_num_;
+  }
+
+  update&& min_push_part_num(std::optional<std::uint64_t> num)
+    noexcept {
+    min_push_part_num_ = num;
+    return std::move(*this);
+  }
+  auto min_push_part_num() const noexcept {
+    return min_push_part_num_;
+  }
+
+  update&& max_push_part_num(std::optional<std::uint64_t> num) noexcept {
+    max_push_part_num_ = num;
+    return std::move(*this);
+  }
+  auto max_push_part_num() const noexcept {
+    return max_push_part_num_;
+  }
+
+  update&& journal_entry_add(fifo::journal_entry entry) {
+    journal_entries_add_.push_back(std::move(entry));
+    return std::move(*this);
+  }
+  update&& journal_entries_add(
+    std::optional<std::vector<fifo::journal_entry>>&& entries) {
+    if (entries) {
+      journal_entries_add_ = std::move(*entries);
+    } else {
+      journal_entries_add_.clear();
+    }
+    return std::move(*this);
+  }
+  const auto& journal_entries_add() const & noexcept {
+    return journal_entries_add_;
+  }
+  auto&& journal_entries_add() && noexcept {
+    return std::move(journal_entries_add_);
+  }
+
+  update&& journal_entry_rm(fifo::journal_entry entry) {
+    journal_entries_rm_.push_back(std::move(entry));
+    return std::move(*this);
+  }
+  update&& journal_entries_rm(
+    std::optional<std::vector<fifo::journal_entry>>&& entries) {
+    if (entries) {
+      journal_entries_rm_ = std::move(*entries);
+    } else {
+      journal_entries_rm_.clear();
+    }
+    return std::move(*this);
+  }
+  const auto& journal_entries_rm() const & noexcept {
+    return journal_entries_rm_;
+  }
+  auto&& journal_entries_rm() && noexcept {
+    return std::move(journal_entries_rm_);
+  }
+  friend std::ostream& operator <<(std::ostream& m, const update& u);
+};
+inline std::ostream& operator <<(std::ostream& m, const update& u) {
+  bool prev = false;
+  if (u.tail_part_num_) {
+    m << "tail_part_num: " << *u.tail_part_num_;
+    prev = true;
+  }
+  if (u.head_part_num_) {
+    if (prev)
+      m << ", ";
+    m << "head_part_num: " << *u.head_part_num_;
+    prev = true;
+  }
+  if (u.min_push_part_num_) {
+    if (prev)
+      m << ", ";
+    m << "min_push_part_num: " << *u.min_push_part_num_;
+    prev = true;
+  }
+  if (u.max_push_part_num_) {
+    if (prev)
+      m << ", ";
+    m << "max_push_part_num: " << *u.max_push_part_num_;
+    prev = true;
+  }
+  if (!u.journal_entries_add_.empty()) {
+    if (prev)
+      m << ", ";
+    m << "journal_entries_add: {" << u.journal_entries_add_ << "}";
+    prev = true;
+  }
+  if (!u.journal_entries_rm_.empty()) {
+    if (prev)
+      m << ", ";
+    m << "journal_entries_rm: {" << u.journal_entries_rm_ << "}";
+    prev = true;
+  }
+  if (!prev)
+    m << "(none)";
+  return m;
+}
 
+struct info {
+  std::string id;
+  objv version;
+  std::string oid_prefix;
+  data_params params;
+
+  std::int64_t tail_part_num{0};
+  std::int64_t head_part_num{-1};
+  std::int64_t min_push_part_num{0};
+  std::int64_t max_push_part_num{-1};
+
+  std::string head_tag;
+  std::map<int64_t, string> tags;
+
+  std::multimap<int64_t, journal_entry> journal;
+
+  bool need_new_head() const {
+    return (head_part_num < min_push_part_num);
+  }
+
+  bool need_new_part() const {
+    return (max_push_part_num < min_push_part_num);
+  }
+
+  void encode(ceph::buffer::list& bl) const {
+    ENCODE_START(1, 1, bl);
+    encode(id, bl);
+    encode(version, bl);
+    encode(oid_prefix, bl);
+    encode(params, bl);
+    encode(tail_part_num, bl);
+    encode(head_part_num, bl);
+    encode(min_push_part_num, bl);
+    encode(max_push_part_num, bl);
+    encode(tags, bl);
+    encode(head_tag, bl);
+    encode(journal, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(ceph::buffer::list::const_iterator& bl) {
+    DECODE_START(1, bl);
+    decode(id, bl);
+    decode(version, bl);
+    decode(oid_prefix, bl);
+    decode(params, bl);
+    decode(tail_part_num, bl);
+    decode(head_part_num, bl);
+    decode(min_push_part_num, bl);
+    decode(max_push_part_num, bl);
+    decode(tags, bl);
+    decode(head_tag, bl);
+    decode(journal, bl);
+    DECODE_FINISH(bl);
+  }
+  void dump(ceph::Formatter* f) const;
+  void decode_json(JSONObj* obj);
+
+  std::string part_oid(std::int64_t part_num) const {
+    return fmt::format("{}.{}", oid_prefix, part_num);
+  }
+
+  journal_entry next_journal_entry(std::string tag) const {
+    journal_entry entry;
+    entry.op = journal_entry::Op::create;
+    entry.part_num = max_push_part_num + 1;
+    entry.part_tag = std::move(tag);
+    return entry;
+  }
+
+  std::optional<std::string>
+  apply_update(const update& update) {
+    if (update.tail_part_num()) {
+      tail_part_num = *update.tail_part_num();
+    }
+
+    if (update.min_push_part_num()) {
+      min_push_part_num = *update.min_push_part_num();
+    }
+
+    if (update.max_push_part_num()) {
+      max_push_part_num = *update.max_push_part_num();
+    }
+
+    for (const auto& entry : update.journal_entries_add()) {
+      auto iter = journal.find(entry.part_num);
+      if (iter != journal.end() &&
+         iter->second.op == entry.op) {
+       /* don't allow multiple concurrent (same) operations on the same part,
+          racing clients should use objv to avoid races anyway */
+       return fmt::format("multiple concurrent operations on same part are not "
+                          "allowed, part num={}", entry.part_num);
+      }
+
+      if (entry.op == journal_entry::Op::create) {
+       tags[entry.part_num] = entry.part_tag;
+      }
+
+      journal.emplace(entry.part_num, entry);
     }
+
+    for (const auto& entry : update.journal_entries_rm()) {
+      journal.erase(entry.part_num);
+    }
+
+    if (update.head_part_num()) {
+      tags.erase(head_part_num);
+      head_part_num = *update.head_part_num();
+      auto iter = tags.find(head_part_num);
+      if (iter != tags.end()) {
+       head_tag = iter->second;
+      } else {
+       head_tag.erase();
+      }
+    }
+
+    return std::nullopt;
   }
+};
+WRITE_CLASS_ENCODER(info)
+inline std::ostream& operator <<(std::ostream& m, const info& i) {
+  return m << "id: " << i.id << ", "
+          << "version: " << i.version << ", "
+          << "oid_prefix: " << i.oid_prefix << ", "
+          << "params: {" << i.params << "}, "
+          << "tail_part_num: " << i.tail_part_num << ", "
+          << "head_part_num: " << i.head_part_num << ", "
+          << "min_push_part_num: " << i.min_push_part_num << ", "
+          << "max_push_part_num: " << i.max_push_part_num << ", "
+          << "head_tag: " << i.head_tag << ", "
+          << "tags: {" << i.tags << "}, "
+          << "journal: {" << i.journal;
 }
 
-static inline ostream& operator<<(ostream& os, const rados::cls::fifo::fifo_objv_t& objv)
-{
-  return os << objv.instance << "{" << objv.ver << "}";
+struct part_list_entry {
+  ceph::buffer::list data;
+  std::uint64_t ofs = 0;
+  ceph::real_time mtime;
+
+  part_list_entry() {}
+  part_list_entry(ceph::buffer::list&& data,
+                 uint64_t ofs,
+                 ceph::real_time mtime)
+    : data(std::move(data)), ofs(ofs), mtime(mtime) {}
+
+
+  void encode(ceph::buffer::list& bl) const {
+    ENCODE_START(1, 1, bl);
+    encode(data, bl);
+    encode(ofs, bl);
+    encode(mtime, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(ceph::buffer::list::const_iterator& bl) {
+    DECODE_START(1, bl);
+    decode(data, bl);
+    decode(ofs, bl);
+    decode(mtime, bl);
+    DECODE_FINISH(bl);
+  }
+};
+WRITE_CLASS_ENCODER(part_list_entry)
+inline std::ostream& operator <<(std::ostream& m,
+                                const part_list_entry& p) {
+  using ceph::operator <<;
+  return m << "data: " << p.data << ", "
+          << "ofs: " << p.ofs << ", "
+          << "mtime: " << p.mtime;
 }
 
+struct part_header {
+  std::string tag;
+
+  data_params params;
+
+  std::uint64_t magic{0};
+
+  std::uint64_t min_ofs{0};
+  std::uint64_t last_ofs{0};
+  std::uint64_t next_ofs{0};
+  std::uint64_t min_index{0};
+  std::uint64_t max_index{0};
+  ceph::real_time max_time;
+
+  void encode(ceph::buffer::list& bl) const {
+    ENCODE_START(1, 1, bl);
+    encode(tag, bl);
+    encode(params, bl);
+    encode(magic, bl);
+    encode(min_ofs, bl);
+    encode(last_ofs, bl);
+    encode(next_ofs, bl);
+    encode(min_index, bl);
+    encode(max_index, bl);
+    encode(max_time, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(ceph::buffer::list::const_iterator& bl) {
+    DECODE_START(1, bl);
+    decode(tag, bl);
+    decode(params, bl);
+    decode(magic, bl);
+    decode(min_ofs, bl);
+    decode(last_ofs, bl);
+    decode(next_ofs, bl);
+    decode(min_index, bl);
+    decode(max_index, bl);
+    decode(max_time, bl);
+    DECODE_FINISH(bl);
+  }
+};
+WRITE_CLASS_ENCODER(part_header)
+inline std::ostream& operator <<(std::ostream& m, const part_header& p) {
+  using ceph::operator <<;
+  return m << "tag: " << p.tag << ", "
+          << "params: {" << p.params << "}, "
+          << "magic: " << p.magic << ", "
+          << "min_ofs: " << p.min_ofs << ", "
+          << "last_ofs: " << p.last_ofs << ", "
+          << "next_ofs: " << p.next_ofs << ", "
+          << "min_index: " << p.min_index << ", "
+          << "max_index: " << p.max_index << ", "
+          << "max_time: " << p.max_time;
+}
+} // namespace rados::cls::fifo
index 919241515b9fdf02ca1dc074f9ee0db01b46ba37..b94a4715174eef93815919d856f25646577b53cd 100644 (file)
@@ -3306,11 +3306,11 @@ std::vector<Option> get_global_options() {
     .set_description(""),
 
     Option("osd_class_load_list", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-    .set_default("cephfs hello journal lock log numops " "otp rbd refcount rgw rgw_gc timeindex user version cas cmpomap queue 2pc_queue")
+    .set_default("cephfs hello journal lock log numops " "otp rbd refcount rgw rgw_gc timeindex user version cas cmpomap queue 2pc_queue fifo")
     .set_description(""),
 
     Option("osd_class_default_list", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-    .set_default("cephfs hello journal lock log numops " "otp rbd refcount rgw rgw_gc timeindex user version cas cmpomap queue 2pc_queue")
+    .set_default("cephfs hello journal lock log numops " "otp rbd refcount rgw rgw_gc timeindex user version cas cmpomap queue 2pc_queue fifo")
     .set_description(""),
 
     Option("osd_check_for_log_corruption", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
index 3e56edf15a69add0c3123c932bf18feca5ddf4f2..50272374d2b5e0727a9fe053d3c313193e760374 100644 (file)
@@ -35,3 +35,6 @@ target_link_libraries(libneorados PRIVATE
 #   ${BLKID_LIBRARIES} ${CRYPTO_LIBS} ${EXTRALIBS})
 # target_link_libraries(libneorados ${rados_libs})
 # install(TARGETS libneorados DESTINATION ${CMAKE_INSTALL_LIBDIR})
+add_library(neorados_cls_fifo STATIC cls/fifo.cc)
+target_link_libraries(neorados_cls_fifo PRIVATE
+       libneorados ceph-common fmt::fmt)
diff --git a/src/neorados/cls/fifo.cc b/src/neorados/cls/fifo.cc
new file mode 100644 (file)
index 0000000..84f20a5
--- /dev/null
@@ -0,0 +1,384 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2020 Red Hat <contact@redhat.com>
+ * Author: Adam C. Emerson
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include <cstdint>
+#include <numeric>
+#include <optional>
+#include <string_view>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
+#include <boost/system/error_code.hpp>
+
+#include "include/neorados/RADOS.hpp"
+
+#include "include/buffer.h"
+
+#include "common/random_string.h"
+
+#include "cls/fifo/cls_fifo_types.h"
+#include "cls/fifo/cls_fifo_ops.h"
+
+#include "fifo.h"
+
+namespace neorados::cls::fifo {
+namespace bs = boost::system;
+namespace cb = ceph::buffer;
+namespace fifo = rados::cls::fifo;
+
+void create_meta(WriteOp& op, std::string_view id,
+                std::optional<fifo::objv> objv,
+                std::optional<std::string_view> oid_prefix,
+                bool exclusive,
+                std::uint64_t max_part_size,
+                std::uint64_t max_entry_size)
+{
+  fifo::op::create_meta cm;
+
+  cm.id = id;
+  cm.version = objv;
+  cm.oid_prefix = oid_prefix;
+  cm.max_part_size = max_part_size;
+  cm.max_entry_size = max_entry_size;
+  cm.exclusive = exclusive;
+
+  cb::list in;
+  encode(cm, in);
+  op.exec(fifo::op::CLASS, fifo::op::CREATE_META, in);
+}
+
+void get_meta(ReadOp& op, std::optional<fifo::objv> objv,
+             bs::error_code* ec_out, fifo::info* info,
+             std::uint32_t* part_header_size,
+             std::uint32_t* part_entry_overhead)
+{
+  fifo::op::get_meta gm;
+  gm.version = objv;
+  cb::list in;
+  encode(gm, in);
+  op.exec(fifo::op::CLASS, fifo::op::GET_META, in,
+         [ec_out, info, part_header_size,
+          part_entry_overhead](bs::error_code ec, const cb::list& bl) {
+           fifo::op::get_meta_reply reply;
+           if (!ec) try {
+               auto iter = bl.cbegin();
+               decode(reply, iter);
+             } catch (const cb::error& err) {
+               ec = err.code();
+             }
+           if (ec_out) *ec_out = ec;
+           if (info) *info = std::move(reply.info);
+           if (part_header_size) *part_header_size = reply.part_header_size;
+           if (part_entry_overhead)
+               *part_entry_overhead = reply.part_entry_overhead;
+         });
+};
+
+void update_meta(WriteOp& op, const fifo::objv& objv,
+                const fifo::update& update)
+{
+  fifo::op::update_meta um;
+
+  um.version = objv;
+  um.tail_part_num = update.tail_part_num();
+  um.head_part_num = update.head_part_num();
+  um.min_push_part_num = update.min_push_part_num();
+  um.max_push_part_num = update.max_push_part_num();
+  um.journal_entries_add = std::move(update).journal_entries_add();
+  um.journal_entries_rm = std::move(update).journal_entries_rm();
+
+  cb::list in;
+  encode(um, in);
+  op.exec(fifo::op::CLASS, fifo::op::UPDATE_META, in);
+}
+
+void part_init(WriteOp& op, std::string_view tag,
+              fifo::data_params params)
+{
+  fifo::op::init_part ip;
+
+  ip.tag = tag;
+  ip.params = params;
+
+  cb::list in;
+  encode(ip, in);
+  op.exec(fifo::op::CLASS, fifo::op::INIT_PART, in);
+}
+
+void push_part(WriteOp& op, std::string_view tag,
+              std::deque<cb::list> data_bufs,
+              fu2::unique_function<void(bs::error_code, int)> f)
+{
+  fifo::op::push_part pp;
+
+  pp.tag = tag;
+  pp.data_bufs = data_bufs;
+  pp.total_len = 0;
+
+  for (const auto& bl : data_bufs)
+    pp.total_len += bl.length();
+
+  cb::list in;
+  encode(pp, in);
+  op.exec(fifo::op::CLASS, fifo::op::PUSH_PART, in,
+         [f = std::move(f)](bs::error_code ec, int r, const cb::list&) mutable {
+           std::move(f)(ec, r);
+         });
+  op.returnvec();
+}
+
+void trim_part(WriteOp& op,
+              std::optional<std::string_view> tag,
+              std::uint64_t ofs)
+{
+  fifo::op::trim_part tp;
+
+  tp.tag = tag;
+  tp.ofs = ofs;
+
+  bufferlist in;
+  encode(tp, in);
+  op.exec(fifo::op::CLASS, fifo::op::TRIM_PART, in);
+}
+
+void list_part(ReadOp& op,
+              std::optional<string_view> tag,
+              std::uint64_t ofs,
+              std::uint64_t max_entries,
+              bs::error_code* ec_out,
+              std::vector<fifo::part_list_entry>* entries,
+              bool* more,
+              bool* full_part,
+              std::string* ptag)
+{
+  fifo::op::list_part lp;
+
+  lp.tag = tag;
+  lp.ofs = ofs;
+  lp.max_entries = max_entries;
+
+  bufferlist in;
+  encode(lp, in);
+  op.exec(fifo::op::CLASS, fifo::op::LIST_PART, in,
+         [entries, more, full_part, ptag, ec_out](bs::error_code ec,
+                                                  const cb::list& bl) {
+           if (ec) {
+             if (ec_out) *ec_out = ec;
+             return;
+           }
+
+           fifo::op::list_part_reply reply;
+           auto iter = bl.cbegin();
+           try {
+             decode(reply, iter);
+           } catch (const cb::error& err) {
+             if (ec_out) *ec_out = ec;
+             return;
+           }
+
+           if (entries) *entries = std::move(reply.entries);
+           if (more) *more = reply.more;
+           if (full_part) *full_part = reply.full_part;
+           if (ptag) *ptag = reply.tag;
+         });
+}
+
+void get_part_info(ReadOp& op,
+                  bs::error_code* out_ec,
+                  fifo::part_header* header)
+{
+  fifo::op::get_part_info gpi;
+
+  bufferlist in;
+  encode(gpi, in);
+  op.exec(fifo::op::CLASS, fifo::op::GET_PART_INFO, in,
+         [out_ec, header](bs::error_code ec, const cb::list& bl) {
+           if (ec) {
+             if (out_ec) *out_ec = ec;
+           }
+           fifo::op::get_part_info_reply reply;
+           auto iter = bl.cbegin();
+           try {
+             decode(reply, iter);
+           } catch (const cb::error& err) {
+             if (out_ec) *out_ec = ec;
+             return;
+           }
+
+           if (header) *header = std::move(reply.header);
+         });
+}
+
+std::optional<marker> FIFO::to_marker(std::string_view s) {
+  marker m;
+  if (s.empty()) {
+    m.num = info.tail_part_num;
+    m.ofs = 0;
+    return m;
+  }
+
+  auto pos = s.find(':');
+  if (pos == string::npos) {
+    return std::nullopt;
+  }
+
+  auto num = s.substr(0, pos);
+  auto ofs = s.substr(pos + 1);
+
+  auto n = ceph::parse<decltype(m.num)>(num);
+  if (!n) {
+    return std::nullopt;
+  }
+  m.num = *n;
+  auto o = ceph::parse<decltype(m.ofs)>(ofs);
+  if (!o) {
+    return std::nullopt;
+  }
+  m.ofs = *o;
+  return m;
+}
+
+bs::error_code FIFO::apply_update(fifo::info* info,
+                                 const fifo::objv& objv,
+                                 const fifo::update& update) {
+  std::unique_lock l(m);
+  auto err = info->apply_update(update);
+  if (objv != info->version) {
+    ldout(r->cct(), 0) << __func__ << "(): Raced locally!" << dendl;
+    return errc::raced;
+  }
+  if (err) {
+    ldout(r->cct(), 0) << __func__ << "(): ERROR: " << err << dendl;
+    return errc::update_failed;
+  }
+
+  ++info->version.ver;
+
+  return {};
+}
+
+std::string FIFO::generate_tag() const
+{
+  static constexpr auto HEADER_TAG_SIZE = 16;
+  return gen_rand_alphanumeric_plain(r->cct(), HEADER_TAG_SIZE);
+}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+class error_category : public ceph::converting_category {
+public:
+  error_category(){}
+  const char* name() const noexcept override;
+  const char* message(int ev, char*, std::size_t) const noexcept override;
+  std::string message(int ev) const override;
+  bs::error_condition default_error_condition(int ev) const noexcept
+    override;
+  bool equivalent(int ev, const bs::error_condition& c) const
+    noexcept override;
+  using ceph::converting_category::equivalent;
+  int from_code(int ev) const noexcept override;
+};
+#pragma GCC diagnostic pop
+#pragma clang diagnostic pop
+
+const char* error_category::name() const noexcept {
+  return "FIFO";
+}
+
+const char* error_category::message(int ev, char*, std::size_t) const noexcept {
+  if (ev == 0)
+    return "No error";
+
+  switch (static_cast<errc>(ev)) {
+  case errc::raced:
+    return "Retry-race count exceeded";
+
+  case errc::inconsistency:
+    return "Inconsistent result! New head before old head";
+
+  case errc::entry_too_large:
+    return "Pushed entry too large";
+
+  case errc::invalid_marker:
+    return "Invalid marker string";
+
+  case errc::update_failed:
+    return "Update failed";
+  }
+
+  return "Unknown error";
+}
+
+std::string error_category::message(int ev) const {
+  return message(ev, nullptr, 0);
+}
+
+bs::error_condition
+error_category::default_error_condition(int ev) const noexcept {
+  switch (static_cast<errc>(ev)) {
+  case errc::raced:
+    return bs::errc::operation_canceled;
+
+  case errc::inconsistency:
+    return bs::errc::io_error;
+
+  case errc::entry_too_large:
+    return bs::errc::value_too_large;
+
+  case errc::invalid_marker:
+    return bs::errc::invalid_argument;
+
+  case errc::update_failed:
+    return bs::errc::invalid_argument;
+  }
+
+  return { ev, *this };
+}
+
+bool error_category::equivalent(int ev, const bs::error_condition& c) const noexcept {
+  return default_error_condition(ev) == c;
+}
+
+int error_category::from_code(int ev) const noexcept {
+  switch (static_cast<errc>(ev)) {
+  case errc::raced:
+    return -ECANCELED;
+
+  case errc::inconsistency:
+    return -EIO;
+
+  case errc::entry_too_large:
+    return -E2BIG;
+
+  case errc::invalid_marker:
+    return -EINVAL;
+
+  case errc::update_failed:
+    return -EINVAL;
+
+  }
+  return -EDOM;
+}
+
+const bs::error_category& error_category() noexcept {
+  static const class error_category c;
+  return c;
+}
+
+}
diff --git a/src/neorados/cls/fifo.h b/src/neorados/cls/fifo.h
new file mode 100644 (file)
index 0000000..3bdf55a
--- /dev/null
@@ -0,0 +1,1747 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2020 Red Hat <contact@redhat.com>
+ * Author: Adam C. Emerson
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_NEORADOS_CLS_FIFIO_H
+#define CEPH_NEORADOS_CLS_FIFIO_H
+
+#include <cstdint>
+#include <deque>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <string_view>
+#include <vector>
+
+#include <boost/asio.hpp>
+#include <boost/system/error_code.hpp>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
+#include "include/neorados/RADOS.hpp"
+#include "include/buffer.h"
+
+#include "common/allocate_unique.h"
+#include "common/async/bind_handler.h"
+#include "common/async/bind_like.h"
+#include "common/async/completion.h"
+#include "common/async/forward_handler.h"
+
+#include "common/dout.h"
+
+#include "cls/fifo/cls_fifo_types.h"
+#include "cls/fifo/cls_fifo_ops.h"
+
+namespace neorados::cls::fifo {
+namespace ba = boost::asio;
+namespace bs = boost::system;
+namespace ca = ceph::async;
+namespace cb = ceph::buffer;
+namespace fifo = rados::cls::fifo;
+
+inline constexpr auto dout_subsys = ceph_subsys_rados;
+inline constexpr std::uint64_t default_max_part_size = 4 * 1024 * 1024;
+inline constexpr std::uint64_t default_max_entry_size = 32 * 1024;
+inline constexpr auto MAX_RACE_RETRIES = 10;
+
+
+const boost::system::error_category& error_category() noexcept;
+
+enum class errc {
+  raced = 1,
+  inconsistency,
+  entry_too_large,
+  invalid_marker,
+  update_failed
+};
+}
+
+namespace boost::system {
+template<>
+struct is_error_code_enum<::neorados::cls::fifo::errc> {
+  static const bool value = true;
+};
+template<>
+struct is_error_condition_enum<::neorados::cls::fifo::errc> {
+  static const bool value = false;
+};
+}
+
+namespace neorados::cls::fifo {
+//  explicit conversion:
+inline bs::error_code make_error_code(errc e) noexcept {
+  return { static_cast<int>(e), error_category() };
+}
+
+inline bs::error_code make_error_category(errc e) noexcept {
+  return { static_cast<int>(e), error_category() };
+}
+
+void create_meta(WriteOp& op, std::string_view id,
+                std::optional<fifo::objv> objv,
+                std::optional<std::string_view> oid_prefix,
+                bool exclusive = false,
+                std::uint64_t max_part_size = default_max_part_size,
+                std::uint64_t max_entry_size = default_max_entry_size);
+void get_meta(ReadOp& op, std::optional<fifo::objv> objv,
+             bs::error_code* ec_out, fifo::info* info,
+             std::uint32_t* part_header_size,
+             std::uint32_t* part_entry_overhead);
+
+void update_meta(WriteOp& op, const fifo::objv& objv,
+                const fifo::update& desc);
+
+void part_init(WriteOp& op, std::string_view tag,
+              fifo::data_params params);
+
+void push_part(WriteOp& op, std::string_view tag,
+              std::deque<cb::list> data_bufs,
+              fu2::unique_function<void(bs::error_code, int)>);
+void trim_part(WriteOp& op, std::optional<std::string_view> tag,
+              std::uint64_t ofs);
+void list_part(ReadOp& op,
+              std::optional<std::string_view> tag,
+              std::uint64_t ofs,
+              std::uint64_t max_entries,
+              bs::error_code* ec_out,
+              std::vector<fifo::part_list_entry>* entries,
+              bool* more,
+              bool* full_part,
+              std::string* ptag);
+void get_part_info(ReadOp& op,
+                  bs::error_code* out_ec,
+                  fifo::part_header* header);
+
+struct marker {
+  std::int64_t num = 0;
+  std::uint64_t ofs = 0;
+
+  marker() = default;
+  marker(std::int64_t num, std::uint64_t ofs) : num(num), ofs(ofs) {}
+  static marker max() {
+    return { std::numeric_limits<decltype(num)>::max(),
+            std::numeric_limits<decltype(ofs)>::max() };
+  }
+
+  std::string to_string() {
+    return fmt::format("{:0>20}:{:0>20}", num, ofs);
+  }
+};
+
+struct list_entry {
+  cb::list data;
+  std::string marker;
+  ceph::real_time mtime;
+};
+
+using part_info = fifo::part_header;
+
+namespace detail {
+template<typename Handler>
+class JournalProcessor;
+}
+
+/// Completions, Handlers, and CompletionTokens
+/// ===========================================
+///
+/// This class is based on Boost.Asio. For information, see
+/// https://www.boost.org/doc/libs/1_74_0/doc/html/boost_asio.html
+///
+/// As summary, Asio's design is that of functions taking completion
+/// handlers. Every handler has a signature, like
+/// (boost::system::error_code, std::string). The completion handler
+/// receives the result of the function, and the signature is the type
+/// of that result.
+///
+/// The completion handler is specified with a CompletionToken. The
+/// CompletionToken is any type that has a specialization of
+/// async_complete and async_result. See
+/// https://www.boost.org/doc/libs/1_74_0/doc/html/boost_asio/reference/async_completion.html
+/// and https://www.boost.org/doc/libs/1_74_0/doc/html/boost_asio/reference/async_result.html
+///
+/// The return type of a function taking a CompletionToken is
+/// async_result<CompletionToken, Signature>::return_type.
+///
+/// Functions
+/// ---------
+///
+/// The default implementations treat whatever value is described as a
+/// function, whose parameters correspond to the signature, and calls
+/// it upon completion.
+///
+/// EXAMPLE:
+/// Let f be an asynchronous function whose signature is (bs::error_code, int)
+/// Let g be an asynchronous function whose signature is
+/// (bs::error_code, int, std::string).
+///
+///
+///    f([](bs::error_code ec, int i) { ... });
+///    g([](bs::error_code ec, int i, std::string s) { ... });
+///
+/// Will schedule asynchronous tasks, and the provided lambdas will be
+/// called on completion. In this case, f and g return void.
+///
+/// There are other specializations. Commonly used ones are.
+///
+/// Futures
+/// -------
+///
+/// A CompletionToken of boost::asio::use_future will complete with a
+/// promise whose type matches (minus any initial error_code) the
+/// function's signature. The corresponding future is returned. If the
+/// error_code of the result is non-zero, the future is set with an
+/// exception of type boost::asio::system_error.
+///
+/// See https://www.boost.org/doc/libs/1_74_0/doc/html/boost_asio/reference/use_future_t.html
+///
+/// EXAMPLE:
+///
+/// std::future<int> = f(ba::use_future);
+/// std::future<std::tuple<int, std::string> = g(ba::use_future).
+///
+/// Coroutines
+/// ----------
+///
+/// A CompletionToken of type spawn::yield_context suspends execution
+/// of the current coroutine until completion of the operation. See
+/// src/spawn/README.md
+/// https://www.boost.org/doc/libs/1_74_0/doc/html/boost_asio/reference/spawn.html and
+/// https://www.boost.org/doc/libs/1_74_0/doc/html/boost_asio/reference/yield_context.html
+///
+/// Operations given this CompletionToken return their results, modulo
+/// any leading error_code. A non-zero error code will be thrown, by
+/// default, but may be bound to a variable instead with the overload
+/// of the array-subscript oeprator.
+///
+/// EXAMPLE:
+/// // Within a function with a yield_context parameter named y
+///
+/// try {
+///    int i = f(y);
+/// } catch (const bs::system_error& ec) { ... }
+///
+/// bs::error_code ec;
+/// auto [i, s] = g(y[ec]);
+///
+/// Blocking calls
+/// --------------
+///
+/// ceph::async::use_blocked, defined in src/common/async/blocked_completion.h
+/// Suspends the current thread of execution, returning the results of
+/// the operation on resumption. Its calling convention is analogous to
+/// that of yield_context.
+///
+/// EXAMPLE:
+/// try {
+///    int i = f(ca::use_blocked);
+/// } catch (const bs::system_error& e) { ... }
+///
+/// bs::error_code ec;
+/// auto [i, s] = g(ca::use_blocked[ec]);
+///
+/// librados Completions
+/// --------------------
+///
+/// If src/common/async/librados_completion.h is included in the
+/// current translation unit, then librados::AioCompletion* may be used
+/// as a CompletionToken. This is only permitted when the completion
+/// signature is either bs::system_error or void. The return type of
+/// functions provided a CompletionToken of AioCompletion* is void. If
+/// the signature includes an error code and the error code is set,
+/// then the error is translated to an int which is set as the result
+/// of the AioCompletion.
+///
+/// EXAMPLE:
+/// // Assume an asynchronous function h whose signature is bs::error_code.
+///
+/// AioCompletion* c = Rados::aio_create_completion();
+/// h(c);
+/// int r = c.get_return_value();
+///
+/// See also src/test/cls_fifo/bench_cls_fifo.cc for a full, simple
+/// example of a program using this class with coroutines.
+///
+///
+/// Markers
+/// =======
+///
+/// Markers represent a position within the FIFO. Internally, they are
+/// part/offset pairs. Externally, they are ordered but otherwise
+/// opaque strings. Markers that compare lower denote positions closer
+/// to the tail.
+///
+/// A marker is returned with every entry from a list() operation. They
+/// may be supplied to a list operation to resume from a given
+/// position, and must be supplied to trim give the position to which
+/// to trim.
+
+class FIFO {
+public:
+
+  FIFO(const FIFO&) = delete;
+  FIFO& operator =(const FIFO&) = delete;
+  FIFO(FIFO&&) = delete;
+  FIFO& operator =(FIFO&&) = delete;
+
+  /// Open an existing FIFO.
+  /// Signature: (bs::error_code ec, std::unique_ptr<FIFO> f)
+  template<typename CT>
+  static auto open(RADOS& r, //< RADOS handle
+                  const IOContext& ioc, //< Context for pool, namespace, etc.
+                  Object oid, //< OID for the 'main' object of the FIFO
+                  CT&& ct, //< CompletionToken
+                  /// Fail if is not this version
+                  std::optional<fifo::objv> objv = std::nullopt,
+                  /// Default executor. By default use the one
+                  /// associated with the RADOS handle.
+                  std::optional<ba::executor> executor = std::nullopt) {
+    ba::async_completion<CT, void(bs::error_code,
+                                 std::unique_ptr<FIFO>)> init(ct);
+    auto e = ba::get_associated_executor(init.completion_handler,
+                                        executor.value_or(r.get_executor()));
+    auto a = ba::get_associated_allocator(init.completion_handler);
+    _read_meta_(
+      &r, oid, ioc, objv,
+      ca::bind_ea(
+       e, a,
+       [&r, ioc, oid, executor, handler = std::move(init.completion_handler)]
+       (bs::error_code ec, fifo::info info,
+        std::uint32_t size, std::uint32_t over) mutable {
+         std::unique_ptr<FIFO> f(
+           new FIFO(r, ioc, oid, executor.value_or(r.get_executor())));
+         f->info = info;
+         f->part_header_size = size;
+         f->part_entry_overhead = over;
+         // If there are journal entries, process them, in case
+         // someone crashed mid-transaction.
+         if (!ec && !info.journal.empty()) {
+           auto e = ba::get_associated_executor(handler, f->get_executor());
+           auto a = ba::get_associated_allocator(handler);
+           auto g = f.get();
+           g->_process_journal(
+             ca::bind_ea(
+               e, a,
+               [f = std::move(f),
+                handler = std::move(handler)](bs::error_code ec) mutable {
+                 std::move(handler)(ec, std::move(f));
+               }));
+           return;
+         }
+         std::move(handler)(ec, std::move(f));
+         return;
+       }));
+    return init.result.get();
+  }
+
+  /// Open an existing or create a new FIFO.
+  /// Signature: (bs::error_code ec, std::unique_ptr<FIFO> f)
+  template<typename CT>
+  static auto create(RADOS& r, /// RADOS handle
+                    const IOContext& ioc, /// Context for pool, namespace, etc.
+                    Object oid, /// OID for the 'main' object of the FIFO
+                    CT&& ct, /// CompletionToken
+                    /// Fail if FIFO exists and is not this version
+                    std::optional<fifo::objv> objv = std::nullopt,
+                    /// Custom prefix for parts
+                    std::optional<std::string_view> oid_prefix = std::nullopt,
+                    /// Fail if FIFO already exists
+                    bool exclusive = false,
+                    /// Size at which a part is considered full
+                    std::uint64_t max_part_size = default_max_part_size,
+                    /// Maximum size of any entry
+                    std::uint64_t max_entry_size = default_max_entry_size,
+                    /// Default executor. By default use the one
+                    /// associated with the RADOS handle.
+                    std::optional<ba::executor> executor = std::nullopt) {
+    ba::async_completion<CT, void(bs::error_code,
+                                 std::unique_ptr<FIFO>)> init(ct);
+    WriteOp op;
+    create_meta(op, oid, objv, oid_prefix, exclusive, max_part_size,
+               max_entry_size);
+    auto e = ba::get_associated_executor(init.completion_handler,
+                                        executor.value_or(r.get_executor()));
+    auto a = ba::get_associated_allocator(init.completion_handler);
+    r.execute(
+      oid, ioc, std::move(op),
+      ca::bind_ea(
+       e, a,
+       [objv, &r, ioc, oid, executor, handler = std::move(init.completion_handler)]
+       (bs::error_code ec) mutable {
+         if (ec) {
+           std::move(handler)(ec, nullptr);
+           return;
+         }
+         auto e = ba::get_associated_executor(
+           handler, executor.value_or(r.get_executor()));
+         auto a = ba::get_associated_allocator(handler);
+         FIFO::_read_meta_(
+           &r, oid, ioc, objv,
+           ca::bind_ea(
+             e, a,
+             [&r, ioc, executor, oid, handler = std::move(handler)]
+             (bs::error_code ec, fifo::info info,
+              std::uint32_t size, std::uint32_t over) mutable {
+               std::unique_ptr<FIFO> f(
+                 new FIFO(r, ioc, oid, executor.value_or(r.get_executor())));
+               f->info = info;
+               f->part_header_size = size;
+               f->part_entry_overhead = over;
+               if (!ec && !info.journal.empty()) {
+                 auto e = ba::get_associated_executor(handler,
+                                                      f->get_executor());
+                 auto a = ba::get_associated_allocator(handler);
+                 auto g = f.get();
+                 g->_process_journal(
+                   ca::bind_ea(
+                     e, a,
+                     [f = std::move(f), handler = std::move(handler)]
+                     (bs::error_code ec) mutable {
+                       std::move(handler)(ec, std::move(f));
+                     }));
+                 return;
+               }
+               std::move(handler)(ec, std::move(f));
+             }));
+           }));
+    return init.result.get();
+  }
+
+  /// Force a re-read of FIFO metadata.
+  /// Signature: (bs::error_code ec)
+  template<typename CT>
+  auto read_meta(CT&& ct, //< CompletionToken
+                /// Fail if FIFO not at this version
+                std::optional<fifo::objv> objv = std::nullopt) {
+    std::unique_lock l(m);
+    auto version = info.version;
+    l.unlock();
+    ba::async_completion<CT, void(bs::error_code)> init(ct);
+    auto e = ba::get_associated_executor(init.completion_handler,
+                                        get_executor());
+    auto a = ba::get_associated_allocator(init.completion_handler);
+    _read_meta_(
+      r, oid, ioc, objv,
+      ca::bind_ea(
+       e, a,
+       [this, version, handler = std::move(init.completion_handler)]
+       (bs::error_code ec, fifo::info newinfo,
+        std::uint32_t size, std::uint32_t over) mutable {
+         std::unique_lock l(m);
+         if (version == info.version) {
+           info = newinfo;
+           part_header_size = size;
+           part_entry_overhead = over;
+         }
+         l.unlock();
+         return std::move(handler)(ec);
+       }));
+    return init.result.get();
+  }
+
+  /// Return a reference to currently known metadata
+  const fifo::info& meta() const {
+    return info;
+  }
+
+  /// Return header size and entry overhead of partitions.
+  std::pair<std::uint32_t, std::uint32_t> get_part_layout_info() {
+    return {part_header_size, part_entry_overhead};
+  }
+
+  /// Push a single entry to the FIFO.
+  /// Signature: (bs::error_code)
+  template<typename CT>
+  auto push(const cb::list& bl, //< Bufferlist holding entry to push
+           CT&& ct //< CompletionToken
+    ) {
+    return push(std::vector{ bl }, std::forward<CT>(ct));
+  }
+
+  /// Push a many entries to the FIFO.
+  /// Signature: (bs::error_code)
+  template<typename CT>
+  auto push(const std::vector<cb::list>& data_bufs, //< Entries to push
+           CT&& ct //< CompletionToken
+    ) {
+    ba::async_completion<CT, void(bs::error_code)> init(ct);
+    std::unique_lock l(m);
+    auto max_entry_size = info.params.max_entry_size;
+    auto need_new_head = info.need_new_head();
+    l.unlock();
+    auto e = ba::get_associated_executor(init.completion_handler,
+                                        get_executor());
+    auto a = ba::get_associated_allocator(init.completion_handler);
+    if (data_bufs.empty() ) {
+      // Can't fail if you don't try.
+      e.post(ca::bind_handler(std::move(init.completion_handler),
+                                 bs::error_code{}), a);
+      return init.result.get();
+    }
+
+    // Validate sizes
+    for (const auto& bl : data_bufs) {
+      if (bl.length() > max_entry_size) {
+       ldout(r->cct(), 10) << __func__ << "(): entry too large: "
+                           << bl.length() << " > "
+                           << info.params.max_entry_size << dendl;
+       e.post(ca::bind_handler(std::move(init.completion_handler),
+                                   errc::entry_too_large), a);
+       return init.result.get();
+      }
+    }
+
+    auto p = ca::bind_ea(e, a,
+                        Pusher(this, {data_bufs.begin(), data_bufs.end()},
+                                 {}, 0, std::move(init.completion_handler)));
+
+    if (need_new_head) {
+      _prepare_new_head(std::move(p));
+    } else {
+      e.dispatch(std::move(p), a);
+    }
+    return init.result.get();
+  }
+
+  /// List the entries in a FIFO
+  /// Signature(bs::error_code ec, bs::vector<list_entry> entries, bool more)
+  ///
+  /// More is true if entries beyond the last exist.
+  /// The list entries are of the form:
+  /// data - Contents of the entry
+  /// marker - String representing the position of this entry within the FIFO.
+  /// mtime - Time (on the OSD) at which the entry was pushed.
+  template<typename CT>
+  auto list(int max_entries, //< Maximum number of entries to fetch
+           /// Optionally, a marker indicating the position after
+           /// which to begin listing. If null, begin at the tail.
+           std::optional<std::string_view> markstr,
+           CT&& ct //< CompletionToken
+    ) {
+    ba::async_completion<CT, void(bs::error_code,
+                                 std::vector<list_entry>, bool)> init(ct);
+    std::unique_lock l(m);
+    std::int64_t part_num = info.tail_part_num;
+    l.unlock();
+    std::uint64_t ofs = 0;
+    auto a = ba::get_associated_allocator(init.completion_handler);
+    auto e = ba::get_associated_executor(init.completion_handler);
+
+    if (markstr) {
+      auto marker = to_marker(*markstr);
+      if (!marker) {
+       ldout(r->cct(), 0) << __func__
+                          << "(): failed to parse marker (" << *markstr
+                          << ")" << dendl;
+       e.post(ca::bind_handler(std::move(init.completion_handler),
+                               errc::invalid_marker,
+                               std::vector<list_entry>{}, false), a);
+       return init.result.get();
+      }
+      part_num = marker->num;
+      ofs = marker->ofs;
+    }
+
+    using handler_type = decltype(init.completion_handler);
+    auto ls = ceph::allocate_unique<Lister<handler_type>>(
+      a, this, part_num, ofs, max_entries,
+      std::move(init.completion_handler));
+    ls.release()->list();
+    return init.result.get();
+  }
+
+  /// Trim entries from the tail to the given position
+  /// Signature: (bs::error_code)
+  template<typename CT>
+  auto trim(std::string_view markstr, //< Position to which to trim, inclusive
+           CT&& ct //< CompletionToken
+    ) {
+    auto m = to_marker(markstr);
+    ba::async_completion<CT, void(bs::error_code)> init(ct);
+    auto a = ba::get_associated_allocator(init.completion_handler);
+    auto e = ba::get_associated_executor(init.completion_handler);
+    if (!m) {
+      ldout(r->cct(), 0) << __func__ << "(): failed to parse marker: marker="
+                        << markstr << dendl;
+      e.post(ca::bind_handler(std::move(init.completion_handler),
+                             errc::invalid_marker), a);
+      return init.result.get();
+    } else {
+      using handler_type = decltype(init.completion_handler);
+      auto t = ceph::allocate_unique<Trimmer<handler_type>>(
+       a, this, m->num, m->ofs, std::move(init.completion_handler));
+      t.release()->trim();
+    }
+    return init.result.get();
+  }
+
+  /// Get information about a specific partition
+  /// Signature: (bs::error_code, part_info)
+  ///
+  /// part_info has the following entries
+  /// tag - A random string identifying this partition. Used internally
+  ///       as a sanity check to make sure operations haven't been misdirected
+  /// params - Data parameters, identical for every partition within a
+  ///          FIFO and the same as what is returned from get_part_layout()
+  /// magic - A random magic number, used internally as a prefix to
+  ///         every entry stored on the OSD to ensure sync
+  /// min_ofs - Offset of the first entry
+  /// max_ofs - Offset of the highest entry
+  /// min_index - Minimum entry index
+  /// max_index - Maximum entry index
+  /// max_time - Time of the latest push
+  ///
+  /// The difference between ofs and index is that ofs is a byte
+  /// offset. Index is a count. Nothing really uses indices, but
+  /// they're tracked and sanity-checked as an invariant on the OSD.
+  ///
+  /// max_ofs and max_time are the two that have been used externally
+  /// so far.
+  template<typename CT>
+  auto get_part_info(int64_t part_num, // The number of the partition
+                    CT&& ct // CompletionToken
+    ) {
+
+    ba::async_completion<CT, void(bs::error_code, part_info)> init(ct);
+    fifo::op::get_part_info gpi;
+    cb::list in;
+    encode(gpi, in);
+    ReadOp op;
+    auto e = ba::get_associated_executor(init.completion_handler,
+                                        get_executor());
+    auto a = ba::get_associated_allocator(init.completion_handler);
+    auto reply = ceph::allocate_unique<
+      ExecDecodeCB<fifo::op::get_part_info_reply>>(a);
+
+    op.exec(fifo::op::CLASS, fifo::op::GET_PART_INFO, in,
+           std::ref(*reply));
+    std::unique_lock l(m);
+    auto part_oid = info.part_oid(part_num);
+    l.unlock();
+    r->execute(part_oid, ioc, std::move(op), nullptr,
+              ca::bind_ea(e, a,
+                          PartInfoGetter(std::move(init.completion_handler),
+                                         std::move(reply))));
+    return init.result.get();
+  }
+
+  using executor_type = ba::executor;
+
+  /// Return the default executor, as specified at creation.
+  ba::executor get_executor() const {
+    return executor;
+  }
+
+private:
+  template<typename Handler>
+  friend class detail::JournalProcessor;
+  RADOS* const r;
+  const IOContext ioc;
+  const Object oid;
+  std::mutex m;
+
+  fifo::info info;
+
+  std::uint32_t part_header_size = 0xdeadbeef;
+  std::uint32_t part_entry_overhead = 0xdeadbeef;
+
+  ba::executor executor;
+
+  std::optional<marker> to_marker(std::string_view s);
+
+  template<typename Handler, typename T>
+  static void assoc_delete(const Handler& handler, T* t) {
+    typename std::allocator_traits<typename ba::associated_allocator<Handler>::type>
+      ::template rebind_alloc<T> a(
+       ba::get_associated_allocator(handler));
+    a.destroy(t);
+    a.deallocate(t, 1);
+  }
+
+  FIFO(RADOS& r,
+       IOContext ioc,
+       Object oid,
+       ba::executor executor)
+    : r(&r), ioc(std::move(ioc)), oid(oid), executor(executor) {}
+
+  std::string generate_tag() const;
+
+  template <typename T>
+  struct ExecDecodeCB {
+    bs::error_code ec;
+    T result;
+    void operator()(bs::error_code e, const cb::list& r) {
+      if (e) {
+        ec = e;
+        return;
+      }
+      try {
+        auto p = r.begin();
+        using ceph::decode;
+        decode(result, p);
+      } catch (const cb::error& err) {
+        ec = err.code();
+      }
+    }
+  };
+
+  template<typename Handler>
+  class MetaReader {
+    Handler handler;
+    using allocator_type = boost::asio::associated_allocator_t<Handler>;
+    using decoder_type = ExecDecodeCB<fifo::op::get_meta_reply>;
+    using decoder_ptr = ceph::allocated_unique_ptr<decoder_type, allocator_type>;
+    decoder_ptr decoder;
+  public:
+    MetaReader(Handler&& handler, decoder_ptr&& decoder)
+      : handler(std::move(handler)), decoder(std::move(decoder)) {}
+
+    void operator ()(bs::error_code ec) {
+      if (!ec) {
+        ec = decoder->ec;
+      }
+      auto reply = std::move(decoder->result);
+      decoder.reset(); // free handler-allocated memory before dispatching
+
+      std::move(handler)(ec, std::move(reply.info),
+                        std::move(reply.part_header_size),
+                        std::move(reply.part_entry_overhead));
+    }
+  };
+
+  // Renamed to get around a compiler bug in Bionic that kept
+  // complaining we weren't capturing 'this' to make a static function call.
+  template<typename Handler>
+  static void _read_meta_(RADOS* r, const Object& oid, const IOContext& ioc,
+                         std::optional<fifo::objv> objv,
+                         Handler&& handler, /* error_code, info, uint64,
+                                               uint64 */
+                         std::optional<ba::executor> executor = std::nullopt){
+    fifo::op::get_meta gm;
+
+    gm.version = objv;
+
+    cb::list in;
+    encode(gm, in);
+    ReadOp op;
+
+    auto a = ba::get_associated_allocator(handler);
+    auto reply =
+      ceph::allocate_unique<ExecDecodeCB<fifo::op::get_meta_reply>>(a);
+
+    auto e = ba::get_associated_executor(handler);
+    op.exec(fifo::op::CLASS, fifo::op::GET_META, in, std::ref(*reply));
+    r->execute(oid, ioc, std::move(op), nullptr,
+              ca::bind_ea(e, a, MetaReader(std::move(handler),
+                                           std::move(reply))));
+  };
+
+  template<typename Handler>
+  void _read_meta(Handler&& handler /* error_code */) {
+    auto e = ba::get_associated_executor(handler, get_executor());
+    auto a = ba::get_associated_allocator(handler);
+    _read_meta_(r, oid, ioc,
+               nullopt,
+               ca::bind_ea(
+                 e, a,
+                 [this,
+                  handler = std::move(handler)](bs::error_code ec,
+                                                fifo::info&& info,
+                                                std::uint64_t phs,
+                                                std::uint64_t peo) mutable {
+                   std::unique_lock l(m);
+                   if (ec) {
+                     l.unlock();
+                     std::move(handler)(ec);
+                     return;
+                   }
+                   // We have a newer version already!
+                   if (!info.version.same_or_later(this->info.version)) {
+                     l.unlock();
+                     std::move(handler)(bs::error_code{});
+                     return;
+                   }
+                   this->info = std::move(info);
+                   part_header_size = phs;
+                   part_entry_overhead = peo;
+                   l.unlock();
+                   std::move(handler)(bs::error_code{});
+                 }), get_executor());
+  }
+
+  bs::error_code apply_update(fifo::info* info,
+                             const fifo::objv& objv,
+                             const fifo::update& update);
+
+
+  template<typename Handler>
+  void _update_meta(const fifo::update& update,
+                   fifo::objv version,
+                   Handler&& handler /* error_code, bool */) {
+    WriteOp op;
+
+    cls::fifo::update_meta(op, info.version, update);
+
+    auto a = ba::get_associated_allocator(handler);
+    auto e = ba::get_associated_executor(handler, get_executor());
+
+    r->execute(
+      oid, ioc, std::move(op),
+      ca::bind_ea(
+       e, a,
+       [this, e, a, version, update,
+        handler = std::move(handler)](bs::error_code ec) mutable {
+         if (ec && ec != bs::errc::operation_canceled) {
+           std::move(handler)(ec, bool{});
+           return;
+         }
+
+         auto canceled = (ec == bs::errc::operation_canceled);
+
+         if (!canceled) {
+           ec = apply_update(&info,
+                             version,
+                             update);
+           if (ec) {
+             canceled = true;
+           }
+         }
+
+         if (canceled) {
+           _read_meta(
+             ca::bind_ea(
+               e, a,
+               [handler = std::move(handler)](bs::error_code ec) mutable {
+                 std::move(handler)(ec, ec ? false : true);
+               }));
+           return;
+         }
+         std::move(handler)(ec, false);
+         return;
+       }));
+  }
+
+  template<typename Handler>
+  auto _process_journal(Handler&& handler /* error_code */) {
+    auto a = ba::get_associated_allocator(std::ref(handler));
+    auto j = ceph::allocate_unique<detail::JournalProcessor<Handler>>(
+      a, this, std::move(handler));
+    auto p = j.release();
+    p->process();
+  }
+
+  template<typename Handler>
+  class NewPartPreparer {
+    FIFO* f;
+    Handler handler;
+    std::vector<fifo::journal_entry> jentries;
+    int i;
+    std::int64_t new_head_part_num;
+
+  public:
+
+    void operator ()(bs::error_code ec, bool canceled) {
+      if (ec) {
+       std::move(handler)(ec);
+       return;
+      }
+
+      if (canceled) {
+       std::unique_lock l(f->m);
+       auto iter = f->info.journal.find(jentries.front().part_num);
+       auto max_push_part_num = f->info.max_push_part_num;
+       auto head_part_num = f->info.head_part_num;
+       auto version = f->info.version;
+       auto found = (iter != f->info.journal.end());
+       l.unlock();
+       if ((max_push_part_num >= jentries.front().part_num &&
+           head_part_num >= new_head_part_num)) {
+         /* raced, but new part was already written */
+         std::move(handler)(bs::error_code{});
+         return;
+       }
+       if (i >= MAX_RACE_RETRIES) {
+         std::move(handler)(errc::raced);
+         return;
+       }
+       if (!found) {
+         auto e = ba::get_associated_executor(handler, f->get_executor());
+         auto a = ba::get_associated_allocator(handler);
+         f->_update_meta(fifo::update{}
+                         .journal_entries_add(jentries),
+                          version,
+                         ca::bind_ea(
+                           e, a,
+                           NewPartPreparer(f, std::move(handler),
+                                           jentries,
+                                           i + 1, new_head_part_num)));
+         return;
+       }
+       // Fall through. We still need to process the journal.
+      }
+      f->_process_journal(std::move(handler));
+      return;
+    }
+
+    NewPartPreparer(FIFO* f,
+                   Handler&& handler,
+                   std::vector<fifo::journal_entry> jentries,
+                   int i, std::int64_t new_head_part_num)
+      : f(f), handler(std::move(handler)), jentries(std::move(jentries)),
+       i(i), new_head_part_num(new_head_part_num) {}
+  };
+
+  template<typename Handler>
+  void _prepare_new_part(bool is_head,
+                        Handler&& handler /* error_code */) {
+    std::unique_lock l(m);
+    std::vector jentries = { info.next_journal_entry(generate_tag()) };
+    std::int64_t new_head_part_num = info.head_part_num;
+    auto version = info.version;
+
+    if (is_head) {
+      auto new_head_jentry = jentries.front();
+      new_head_jentry.op = fifo::journal_entry::Op::set_head;
+      new_head_part_num = jentries.front().part_num;
+      jentries.push_back(std::move(new_head_jentry));
+    }
+    l.unlock();
+
+    auto e = ba::get_associated_executor(handler, get_executor());
+    auto a = ba::get_associated_allocator(handler);
+    _update_meta(fifo::update{}.journal_entries_add(jentries),
+                version,
+                ca::bind_ea(
+                  e, a,
+                  NewPartPreparer(this, std::move(handler),
+                                  jentries, 0, new_head_part_num)));
+  }
+
+  template<typename Handler>
+  class NewHeadPreparer {
+    FIFO* f;
+    Handler handler;
+    int i;
+    std::int64_t new_head_num;
+
+  public:
+
+    void operator ()(bs::error_code ec, bool canceled) {
+      std::unique_lock l(f->m);
+      auto head_part_num = f->info.head_part_num;
+      auto version = f->info.version;
+      l.unlock();
+
+      if (ec) {
+       std::move(handler)(ec);
+       return;
+      }
+      if (canceled) {
+       if (i >= MAX_RACE_RETRIES) {
+         std::move(handler)(errc::raced);
+         return;
+       }
+
+       // Raced, but there's still work to do!
+       if (head_part_num < new_head_num) {
+         auto e = ba::get_associated_executor(handler, f->get_executor());
+         auto a = ba::get_associated_allocator(handler);
+         f->_update_meta(fifo::update{}.head_part_num(new_head_num),
+                         version,
+                         ca::bind_ea(
+                           e, a,
+                           NewHeadPreparer(f, std::move(handler),
+                                           i + 1,
+                                           new_head_num)));
+         return;
+       }
+      }
+      // Either we succeeded, or we were raced by someone who did it for us.
+      std::move(handler)(bs::error_code{});
+      return;
+    }
+
+    NewHeadPreparer(FIFO* f,
+                   Handler&& handler,
+                   int i, std::int64_t new_head_num)
+      : f(f), handler(std::move(handler)), i(i), new_head_num(new_head_num) {}
+  };
+
+  template<typename Handler>
+  void _prepare_new_head(Handler&& handler /* error_code */) {
+    std::unique_lock l(m);
+    int64_t new_head_num = info.head_part_num + 1;
+    auto max_push_part_num = info.max_push_part_num;
+    auto version = info.version;
+    l.unlock();
+
+    if (max_push_part_num < new_head_num) {
+      auto e = ba::get_associated_executor(handler, get_executor());
+      auto a = ba::get_associated_allocator(handler);
+      _prepare_new_part(
+       true,
+       ca::bind_ea(
+         e, a,
+         [this, new_head_num,
+          handler = std::move(handler)](bs::error_code ec) mutable {
+           if (ec) {
+             handler(ec);
+             return;
+           }
+           std::unique_lock l(m);
+           if (info.max_push_part_num < new_head_num) {
+             l.unlock();
+             ldout(r->cct(), 0)
+               << "ERROR: " << __func__
+               << ": after new part creation: meta_info.max_push_part_num="
+               << info.max_push_part_num << " new_head_num="
+               << info.max_push_part_num << dendl;
+             std::move(handler)(errc::inconsistency);
+           } else {
+             l.unlock();
+             std::move(handler)(bs::error_code{});
+           }
+         }));
+      return;
+    }
+    auto e = ba::get_associated_executor(handler, get_executor());
+    auto a = ba::get_associated_allocator(handler);
+    _update_meta(fifo::update{}.head_part_num(new_head_num),
+                version,
+                ca::bind_ea(
+                  e, a,
+                  NewHeadPreparer(this, std::move(handler), 0,
+                                  new_head_num)));
+  }
+
+  template<typename T>
+  struct ExecHandleCB {
+    bs::error_code ec;
+    T result;
+    void operator()(bs::error_code e, const T& t) {
+      if (e) {
+        ec = e;
+        return;
+      }
+      result = t;
+    }
+  };
+
+  template<typename Handler>
+  class EntryPusher {
+    Handler handler;
+    using allocator_type = boost::asio::associated_allocator_t<Handler>;
+    using decoder_type = ExecHandleCB<int>;
+    using decoder_ptr = ceph::allocated_unique_ptr<decoder_type, allocator_type>;
+    decoder_ptr decoder;
+
+  public:
+
+    EntryPusher(Handler&& handler, decoder_ptr&& decoder)
+      : handler(std::move(handler)), decoder(std::move(decoder)) {}
+
+    void operator ()(bs::error_code ec) {
+      if (!ec) {
+        ec = decoder->ec;
+      }
+      auto reply = std::move(decoder->result);
+      decoder.reset(); // free handler-allocated memory before dispatching
+
+      std::move(handler)(ec, std::move(reply));
+    }
+  };
+
+  template<typename Handler>
+  auto push_entries(const std::deque<cb::list>& data_bufs,
+                   Handler&& handler /* error_code, int */) {
+    WriteOp op;
+    std::unique_lock l(m);
+    auto head_part_num = info.head_part_num;
+    auto tag = info.head_tag;
+    auto oid = info.part_oid(head_part_num);
+    l.unlock();
+
+    auto a = ba::get_associated_allocator(handler);
+    auto reply = ceph::allocate_unique<ExecHandleCB<int>>(a);
+
+    auto e = ba::get_associated_executor(handler, get_executor());
+    push_part(op, tag, data_bufs, std::ref(*reply));
+    return r->execute(oid, ioc, std::move(op),
+                     ca::bind_ea(e, a, EntryPusher(std::move(handler),
+                                                   std::move(reply))));
+  }
+
+  template<typename CT>
+  auto trim_part(int64_t part_num,
+                uint64_t ofs,
+                std::optional<std::string_view> tag,
+                CT&& ct) {
+    WriteOp op;
+    cls::fifo::trim_part(op, tag, ofs);
+    return r->execute(info.part_oid(part_num), ioc, std::move(op),
+                     std::forward<CT>(ct));
+  }
+
+
+  template<typename Handler>
+  class Pusher {
+    FIFO* f;
+    std::deque<cb::list> remaining;
+    std::deque<cb::list> batch;
+    int i;
+    Handler handler;
+
+    void prep_then_push(const unsigned successes) {
+      std::unique_lock l(f->m);
+      auto max_part_size = f->info.params.max_part_size;
+      auto part_entry_overhead = f->part_entry_overhead;
+      l.unlock();
+
+      uint64_t batch_len = 0;
+      if (successes > 0) {
+       if (successes == batch.size()) {
+         batch.clear();
+       } else  {
+         batch.erase(batch.begin(), batch.begin() + successes);
+         for (const auto& b : batch) {
+           batch_len +=  b.length() + part_entry_overhead;
+         }
+       }
+      }
+
+      if (batch.empty() && remaining.empty()) {
+       std::move(handler)(bs::error_code{});
+       return;
+      }
+
+      while (!remaining.empty() &&
+            (remaining.front().length() + batch_len <= max_part_size)) {
+
+       /* We can send entries with data_len up to max_entry_size,
+          however, we want to also account the overhead when
+          dealing with multiple entries. Previous check doesn't
+          account for overhead on purpose. */
+       batch_len += remaining.front().length() + part_entry_overhead;
+       batch.push_back(std::move(remaining.front()));
+       remaining.pop_front();
+      }
+      push();
+    }
+
+    void push() {
+      auto e = ba::get_associated_executor(handler, f->get_executor());
+      auto a = ba::get_associated_allocator(handler);
+      f->push_entries(batch,
+                     ca::bind_ea(e, a,
+                                 Pusher(f, std::move(remaining),
+                                        batch, i,
+                                        std::move(handler))));
+    }
+
+  public:
+
+    // Initial call!
+    void operator ()() {
+      prep_then_push(0);
+    }
+
+    // Called with response to push_entries
+    void operator ()(bs::error_code ec, int r) {
+      if (ec == bs::errc::result_out_of_range) {
+       auto e = ba::get_associated_executor(handler, f->get_executor());
+       auto a = ba::get_associated_allocator(handler);
+       f->_prepare_new_head(
+         ca::bind_ea(e, a,
+                     Pusher(f, std::move(remaining),
+                            std::move(batch), i,
+                            std::move(handler))));
+       return;
+      }
+      if (ec) {
+       std::move(handler)(ec);
+       return;
+      }
+      i = 0; // We've made forward progress, so reset the race counter!
+      prep_then_push(r);
+    }
+
+    // Called with response to prepare_new_head
+    void operator ()(bs::error_code ec) {
+      if (ec == bs::errc::operation_canceled) {
+       if (i == MAX_RACE_RETRIES) {
+         ldout(f->r->cct(), 0)
+           << "ERROR: " << __func__
+           << "(): race check failed too many times, likely a bug" << dendl;
+         std::move(handler)(make_error_code(errc::raced));
+         return;
+       }
+       ++i;
+      } else if (ec) {
+       std::move(handler)(ec);
+       return;
+      }
+
+      if (batch.empty()) {
+       prep_then_push(0);
+       return;
+      } else {
+       push();
+       return;
+      }
+    }
+
+    Pusher(FIFO* f, std::deque<cb::list>&& remaining,
+          std::deque<cb::list> batch, int i,
+          Handler&& handler)
+      : f(f), remaining(std::move(remaining)),
+       batch(std::move(batch)), i(i),
+       handler(std::move(handler)) {}
+  };
+
+  template<typename Handler>
+  class Lister {
+    FIFO* f;
+    std::vector<list_entry> result;
+    bool more = false;
+    std::int64_t part_num;
+    std::uint64_t ofs;
+    int max_entries;
+    bs::error_code ec_out;
+    std::vector<fifo::part_list_entry> entries;
+    bool part_more = false;
+    bool part_full = false;
+    Handler handler;
+
+    void handle(bs::error_code ec) {
+      auto h = std::move(handler);
+      auto m = more;
+      auto r = std::move(result);
+
+      FIFO::assoc_delete(h, this);
+      std::move(h)(ec, std::move(r), m);
+    }
+
+  public:
+    Lister(FIFO* f, std::int64_t part_num, std::uint64_t ofs, int max_entries,
+          Handler&& handler)
+      : f(f), part_num(part_num), ofs(ofs), max_entries(max_entries),
+       handler(std::move(handler)) {
+      result.reserve(max_entries);
+    }
+
+
+    Lister(const Lister&) = delete;
+    Lister& operator =(const Lister&) = delete;
+    Lister(Lister&&) = delete;
+    Lister& operator =(Lister&&) = delete;
+
+    void list() {
+      if (max_entries > 0) {
+       ReadOp op;
+       ec_out.clear();
+       part_more = false;
+       part_full = false;
+       entries.clear();
+
+       std::unique_lock l(f->m);
+       auto part_oid = f->info.part_oid(part_num);
+       l.unlock();
+
+       list_part(op,
+                 {},
+                 ofs,
+                 max_entries,
+                 &ec_out,
+                 &entries,
+                 &part_more,
+                 &part_full,
+                 nullptr);
+       auto e = ba::get_associated_executor(handler, f->get_executor());
+       auto a = ba::get_associated_allocator(handler);
+       f->r->execute(
+         part_oid,
+         f->ioc,
+         std::move(op),
+         nullptr,
+         ca::bind_ea(
+           e, a,
+           [t = std::unique_ptr<Lister>(this), this,
+            part_oid](bs::error_code ec) mutable {
+             t.release();
+             if (ec == bs::errc::no_such_file_or_directory) {
+               auto e = ba::get_associated_executor(handler,
+                                                    f->get_executor());
+               auto a = ba::get_associated_allocator(handler);
+               f->_read_meta(
+                 ca::bind_ea(
+                   e, a,
+                   [this](bs::error_code ec) mutable {
+                     if (ec) {
+                       handle(ec);
+                       return;
+                     }
+
+                     if (part_num < f->info.tail_part_num) {
+                       /* raced with trim? restart */
+                       max_entries += result.size();
+                       result.clear();
+                       part_num = f->info.tail_part_num;
+                       ofs = 0;
+                       list();
+                     }
+                     /* assuming part was not written yet, so end of data */
+                     more = false;
+                     handle({});
+                     return;
+                   }));
+               return;
+             }
+             if (ec) {
+               ldout(f->r->cct(), 0)
+                 << __func__
+                 << "(): list_part() on oid=" << part_oid
+                 << " returned ec=" << ec.message() << dendl;
+               handle(ec);
+               return;
+             }
+             if (ec_out) {
+               ldout(f->r->cct(), 0)
+                 << __func__
+                 << "(): list_part() on oid=" << f->info.part_oid(part_num)
+                 << " returned ec=" << ec_out.message() << dendl;
+               handle(ec_out);
+               return;
+             }
+
+             more = part_full || part_more;
+             for (auto& entry : entries) {
+               list_entry e;
+               e.data = std::move(entry.data);
+               e.marker = marker{part_num, entry.ofs}.to_string();
+               e.mtime = entry.mtime;
+               result.push_back(std::move(e));
+             }
+             max_entries -= entries.size();
+             entries.clear();
+             if (max_entries > 0 &&
+                 part_more) {
+               list();
+               return;
+             }
+
+             if (!part_full) { /* head part is not full */
+               handle({});
+               return;
+             }
+             ++part_num;
+             ofs = 0;
+           list();
+           }));
+      } else {
+       handle({});
+       return;
+      }
+    }
+  };
+
+  template<typename Handler>
+  class Trimmer {
+    FIFO* f;
+    std::int64_t part_num;
+    std::uint64_t ofs;
+    Handler handler;
+    std::int64_t pn;
+    int i = 0;
+
+    void handle(bs::error_code ec) {
+      auto h = std::move(handler);
+
+      FIFO::assoc_delete(h, this);
+      return std::move(h)(ec);
+    }
+
+    void update() {
+      std::unique_lock l(f->m);
+      auto objv = f->info.version;
+      l.unlock();
+      auto a = ba::get_associated_allocator(handler);
+      auto e = ba::get_associated_executor(handler, f->get_executor());
+      f->_update_meta(
+       fifo::update{}.tail_part_num(part_num),
+       objv,
+       ca::bind_ea(
+         e, a,
+         [this, t = std::unique_ptr<Trimmer>(this)](bs::error_code ec,
+                                                    bool canceled) mutable {
+           t.release();
+           if (canceled)
+             if (i >= MAX_RACE_RETRIES) {
+               ldout(f->r->cct(), 0)
+                 << "ERROR: " << __func__
+                 << "(): race check failed too many times, likely a bug"
+                 << dendl;
+               handle(errc::raced);
+               return;
+             }
+           std::unique_lock l(f->m);
+           auto tail_part_num = f->info.tail_part_num;
+           l.unlock();
+           if (tail_part_num < part_num) {
+             ++i;
+             update();
+             return;
+           }
+           handle({});
+           return;
+         }));
+    }
+
+  public:
+    Trimmer(FIFO* f, std::int64_t part_num, std::uint64_t ofs,
+           Handler&& handler)
+      : f(f), part_num(part_num), ofs(ofs), handler(std::move(handler)) {
+      std::unique_lock l(f->m);
+      pn = f->info.tail_part_num;
+    }
+
+    void trim() {
+      auto a = ba::get_associated_allocator(handler);
+      auto e = ba::get_associated_executor(handler, f->get_executor());
+      if (pn < part_num) {
+       std::unique_lock l(f->m);
+       auto max_part_size = f->info.params.max_part_size;
+       l.unlock();
+       f->trim_part(
+         pn, max_part_size, std::nullopt,
+         ca::bind_ea(
+           e, a,
+           [t = std::unique_ptr<Trimmer>(this),
+            this](bs::error_code ec) mutable {
+             t.release();
+             if (ec && ec != bs::errc::no_such_file_or_directory) {
+               ldout(f->r->cct(), 0)
+                 << __func__ << "(): ERROR: trim_part() on part="
+                 << pn << " returned ec=" << ec.message() << dendl;
+               handle(ec);
+               return;
+             }
+             ++pn;
+             trim();
+           }));
+       return;
+      }
+      f->trim_part(
+       part_num, ofs, std::nullopt,
+       ca::bind_ea(
+         e, a,
+         [t = std::unique_ptr<Trimmer>(this),
+           this](bs::error_code ec) mutable {
+           t.release();
+           if (ec && ec != bs::errc::no_such_file_or_directory) {
+             ldout(f->r->cct(), 0)
+               << __func__ << "(): ERROR: trim_part() on part=" << part_num
+               << " returned ec=" << ec.message() << dendl;
+             handle(ec);
+             return;
+           }
+           std::unique_lock l(f->m);
+           auto tail_part_num = f->info.tail_part_num;
+           l.unlock();
+           if (part_num <= tail_part_num) {
+             /* don't need to modify meta info */
+             handle({});
+             return;
+           }
+           update();
+         }));
+    }
+  };
+
+  template<typename Handler>
+  class PartInfoGetter {
+    Handler handler;
+    using allocator_type = boost::asio::associated_allocator_t<Handler>;
+    using decoder_type = ExecDecodeCB<fifo::op::get_part_info_reply>;
+    using decoder_ptr = ceph::allocated_unique_ptr<decoder_type, allocator_type>;
+    decoder_ptr decoder;
+  public:
+    PartInfoGetter(Handler&& handler, decoder_ptr&& decoder)
+      : handler(std::move(handler)), decoder(std::move(decoder)) {}
+
+    void operator ()(bs::error_code ec) {
+      if (!ec) {
+        ec = decoder->ec;
+      }
+      auto reply = std::move(decoder->result);
+      decoder.reset(); // free handler-allocated memory before dispatching
+
+      auto p = ca::bind_handler(std::move(handler),
+                               ec, std::move(reply.header));
+      std::move(p)();
+    }
+  };
+
+
+};
+
+namespace detail {
+template<typename Handler>
+class JournalProcessor {
+private:
+  FIFO* const fifo;
+  Handler handler;
+
+  std::vector<fifo::journal_entry> processed;
+  std::multimap<std::int64_t, fifo::journal_entry> journal;
+  std::multimap<std::int64_t, fifo::journal_entry>::iterator iter;
+  std::int64_t new_tail;
+  std::int64_t new_head;
+  std::int64_t new_max;
+  int race_retries = 0;
+
+  template<typename CT>
+  auto create_part(int64_t part_num, std::string_view tag, CT&& ct) {
+    WriteOp op;
+    op.create(false); /* We don't need exclusivity, part_init ensures
+                        we're creating from the  same journal entry. */
+    std::unique_lock l(fifo->m);
+    part_init(op, tag, fifo->info.params);
+    auto oid = fifo->info.part_oid(part_num);
+    l.unlock();
+    return fifo->r->execute(oid, fifo->ioc,
+                           std::move(op), std::forward<CT>(ct));
+  }
+
+  template<typename CT>
+  auto remove_part(int64_t part_num, std::string_view tag, CT&& ct) {
+    WriteOp op;
+    op.remove();
+    std::unique_lock l(fifo->m);
+    auto oid = fifo->info.part_oid(part_num);
+    l.unlock();
+    return fifo->r->execute(oid, fifo->ioc,
+                           std::move(op), std::forward<CT>(ct));
+  }
+
+  template<typename PP>
+  void process_journal_entry(const fifo::journal_entry& entry,
+                            PP&& pp) {
+    switch (entry.op) {
+    case fifo::journal_entry::Op::unknown:
+      std::move(pp)(errc::inconsistency);
+      return;
+      break;
+
+    case fifo::journal_entry::Op::create:
+      create_part(entry.part_num, entry.part_tag, std::move(pp));
+      return;
+      break;
+    case fifo::journal_entry::Op::set_head:
+      ba::post(ba::get_associated_executor(handler, fifo->get_executor()),
+                     [pp = std::move(pp)]() mutable {
+                       std::move(pp)(bs::error_code{});
+                     });
+      return;
+      break;
+    case fifo::journal_entry::Op::remove:
+      remove_part(entry.part_num, entry.part_tag, std::move(pp));
+      return;
+      break;
+    }
+    std::move(pp)(errc::inconsistency);
+    return;
+  }
+
+  auto journal_entry_finisher(const fifo::journal_entry& entry) {
+    auto a = ba::get_associated_allocator(handler);
+    auto e = ba::get_associated_executor(handler, fifo->get_executor());
+    return
+      ca::bind_ea(
+       e, a,
+       [t = std::unique_ptr<JournalProcessor>(this), this,
+        entry](bs::error_code ec) mutable {
+         t.release();
+         if (entry.op == fifo::journal_entry::Op::remove &&
+             ec == bs::errc::no_such_file_or_directory)
+           ec.clear();
+
+         if (ec) {
+           ldout(fifo->r->cct(), 0)
+             << __func__
+             << "(): ERROR: failed processing journal entry for part="
+             << entry.part_num << " with error " << ec.message()
+             << " Bug or inconsistency." << dendl;
+           handle(errc::inconsistency);
+           return;
+         } else {
+           switch (entry.op) {
+           case fifo::journal_entry::Op::unknown:
+             // Can't happen. Filtered out in process_journal_entry.
+             abort();
+             break;
+
+           case fifo::journal_entry::Op::create:
+             if (entry.part_num > new_max) {
+               new_max = entry.part_num;
+             }
+             break;
+           case fifo::journal_entry::Op::set_head:
+             if (entry.part_num > new_head) {
+               new_head = entry.part_num;
+             }
+             break;
+           case fifo::journal_entry::Op::remove:
+             if (entry.part_num >= new_tail) {
+               new_tail = entry.part_num + 1;
+             }
+             break;
+           }
+           processed.push_back(entry);
+         }
+         ++iter;
+         process();
+       });
+  }
+
+  struct JournalPostprocessor {
+    std::unique_ptr<JournalProcessor> j_;
+    bool first;
+    void operator ()(bs::error_code ec, bool canceled) {
+      std::optional<int64_t> tail_part_num;
+      std::optional<int64_t> head_part_num;
+      std::optional<int64_t> max_part_num;
+
+      auto j = j_.release();
+
+      if (!first && !ec && !canceled) {
+       j->handle({});
+       return;
+      }
+
+      if (canceled) {
+       if (j->race_retries >= MAX_RACE_RETRIES) {
+         ldout(j->fifo->r->cct(), 0) << "ERROR: " << __func__ <<
+           "(): race check failed too many times, likely a bug" << dendl;
+         j->handle(errc::raced);
+         return;
+       }
+
+       ++j->race_retries;
+
+       std::vector<fifo::journal_entry> new_processed;
+       std::unique_lock l(j->fifo->m);
+       for (auto& e : j->processed) {
+         auto jiter = j->fifo->info.journal.find(e.part_num);
+         /* journal entry was already processed */
+         if (jiter == j->fifo->info.journal.end() ||
+             !(jiter->second == e)) {
+           continue;
+         }
+         new_processed.push_back(e);
+       }
+       j->processed = std::move(new_processed);
+      }
+
+      std::unique_lock l(j->fifo->m);
+      auto objv = j->fifo->info.version;
+      if (j->new_tail > j->fifo->info.tail_part_num) {
+       tail_part_num = j->new_tail;
+      }
+
+      if (j->new_head > j->fifo->info.head_part_num) {
+       head_part_num = j->new_head;
+      }
+
+      if (j->new_max > j->fifo->info.max_push_part_num) {
+       max_part_num = j->new_max;
+      }
+      l.unlock();
+
+      if (j->processed.empty() &&
+         !tail_part_num &&
+         !max_part_num) {
+       /* nothing to update anymore */
+       j->handle({});
+       return;
+      }
+      auto a = ba::get_associated_allocator(j->handler);
+      auto e = ba::get_associated_executor(j->handler, j->fifo->get_executor());
+      j->fifo->_update_meta(fifo::update{}
+                           .tail_part_num(tail_part_num)
+                           .head_part_num(head_part_num)
+                           .max_push_part_num(max_part_num)
+                           .journal_entries_rm(j->processed),
+                            objv,
+                            ca::bind_ea(
+                             e, a,
+                             JournalPostprocessor{j, false}));
+      return;
+    }
+
+    JournalPostprocessor(JournalProcessor* j, bool first)
+      : j_(j), first(first) {}
+  };
+
+  void postprocess() {
+    if (processed.empty()) {
+      handle({});
+      return;
+    }
+    JournalPostprocessor(this, true)({}, false);
+  }
+
+  void handle(bs::error_code ec) {
+    auto e = ba::get_associated_executor(handler, fifo->get_executor());
+    auto a = ba::get_associated_allocator(handler);
+    auto h = std::move(handler);
+    FIFO::assoc_delete(h, this);
+    e.dispatch(ca::bind_handler(std::move(h), ec), a);
+    return;
+  }
+
+public:
+
+  JournalProcessor(FIFO* fifo, Handler&& handler)
+    : fifo(fifo), handler(std::move(handler)) {
+    std::unique_lock l(fifo->m);
+    journal = fifo->info.journal;
+    iter = journal.begin();
+    new_tail = fifo->info.tail_part_num;
+    new_head = fifo->info.head_part_num;
+    new_max = fifo->info.max_push_part_num;
+  }
+
+  JournalProcessor(const JournalProcessor&) = delete;
+  JournalProcessor& operator =(const JournalProcessor&) = delete;
+  JournalProcessor(JournalProcessor&&) = delete;
+  JournalProcessor& operator =(JournalProcessor&&) = delete;
+
+  void process() {
+    if (iter != journal.end()) {
+      const auto entry = iter->second;
+      process_journal_entry(entry,
+                           journal_entry_finisher(entry));
+      return;
+    } else {
+      postprocess();
+      return;
+    }
+  }
+};
+}
+}
+
+#endif // CEPH_RADOS_CLS_FIFIO_H
index c261a313e3bb86fd544a2fd29ce642b7497b0df6..5f1ac9ff734f2927df3d57033e26ed33c864ca34 100644 (file)
@@ -8,7 +8,9 @@ target_include_directories(unit-main PRIVATE
   $<TARGET_PROPERTY:GTest::GTest,INTERFACE_INCLUDE_DIRECTORIES>)
 
 add_subdirectory(cls_hello)
+if(WITH_BOOST_CONTEXT)
 add_subdirectory(cls_fifo)
+endif()
 add_subdirectory(cls_lock)
 add_subdirectory(cls_cas)
 add_subdirectory(cls_log)
index 49487f8b795bb7906df902a0ce306175690843dd..492e6e1f9a1c4f2c7c99883c5cbc36e8d27a1fff 100644 (file)
@@ -1,17 +1,40 @@
+if(WITH_BOOST_CONTEXT)
 add_executable(ceph_test_cls_fifo
   test_cls_fifo.cc
   )
+target_include_directories(ceph_test_cls_fifo PRIVATE
+  $<TARGET_PROPERTY:spawn,INTERFACE_INCLUDE_DIRECTORIES>)
 target_link_libraries(ceph_test_cls_fifo
-  cls_fifo_client
-  librados
-  global
+  neorados_cls_fifo
+  libneorados
+  spawn
   ${UNITTEST_LIBS}
   ${BLKID_LIBRARIES}
   ${CMAKE_DL_LIBS}
   ${CRYPTO_LIBS}
   ${EXTRALIBS}
-  radostest-cxx
+  neoradostest-support
   )
 install(TARGETS
   ceph_test_cls_fifo
   DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+add_executable(ceph_bench_cls_fifo
+  bench_cls_fifo.cc
+  )
+target_include_directories(ceph_bench_cls_fifo PRIVATE
+  $<TARGET_PROPERTY:spawn,INTERFACE_INCLUDE_DIRECTORIES>)
+target_link_libraries(ceph_bench_cls_fifo
+  neorados_cls_fifo
+  libneorados
+  spawn
+  ${UNITTEST_LIBS}
+  ${BLKID_LIBRARIES}
+  ${CMAKE_DL_LIBS}
+  ${CRYPTO_LIBS}
+  ${EXTRALIBS}
+  )
+install(TARGETS
+  ceph_test_cls_fifo
+  DESTINATION ${CMAKE_INSTALL_BINDIR})
+endif()
diff --git a/src/test/cls_fifo/bench_cls_fifo.cc b/src/test/cls_fifo/bench_cls_fifo.cc
new file mode 100644 (file)
index 0000000..b990fbe
--- /dev/null
@@ -0,0 +1,462 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+
+#include <cerrno>
+#include <chrono>
+#include <cstdint>
+#include <exception>
+#include <future>
+#include <iostream>
+#include <string_view>
+
+#include <boost/asio.hpp>
+#include <boost/system/error_code.hpp>
+#include <boost/program_options.hpp>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/chrono.h>
+#include <fmt/format.h>
+#include <fmt/ostream.h>
+
+#include <spawn/spawn.hpp>
+
+#include "include/neorados/RADOS.hpp"
+
+#include "neorados/cls/fifo.h"
+
+namespace ba = boost::asio;
+namespace bs = boost::system;
+namespace bpo = boost::program_options;
+namespace cb = ceph::buffer;
+namespace R = neorados;
+namespace RCf = neorados::cls::fifo;
+namespace fifo = rados::cls::fifo;
+namespace s = spawn;
+namespace sc = std::chrono;
+
+namespace {
+static constexpr auto PUSH = 0x01 << 0;
+static constexpr auto PULL = 0x01 << 1;
+static constexpr auto BOTH = PUSH | PULL;
+static constexpr auto CLEAN = 0x01 << 2;
+static constexpr auto METADATA = 0x01 << 3;
+static constexpr auto PARTINFO = 0x01 << 4;
+static constexpr auto LIST = 0x01 << 5;
+
+struct benchmark {
+  std::uint32_t entries = 0;
+  sc::duration<double> elapsed = 0ns;
+
+  std::uint64_t ratio() const {
+    return entries/std::max(elapsed,
+                           sc::duration<double>(1ns)).count();
+  }
+  benchmark() = default;
+  benchmark(std::uint32_t entries, sc::duration<double> elapsed)
+    : entries(entries), elapsed(elapsed) {}
+};
+
+benchmark push(RCf::FIFO& f, const std::uint32_t count,
+              const std::uint32_t entry_size, const std::uint32_t push_entries,
+              s::yield_context y)
+{
+  cb::list entry;
+  entry.push_back(cb::create_small_page_aligned(entry_size));
+  entry.zero();
+
+  std::vector entries(std::min(count, push_entries), entry);
+  auto remaining = count;
+  auto start = sc::steady_clock::now();
+  while (remaining) {
+    if (entries.size() > remaining) {
+      entries.resize(remaining);
+    }
+    f.push(entries, y);
+    remaining -= entries.size();
+  }
+  auto finish = sc::steady_clock::now();
+  return benchmark(count, (finish - start));
+}
+
+benchmark pull(RCf::FIFO& f, const std::uint32_t count,
+              const std::uint32_t pull_entries, s::yield_context y)
+{
+  auto remaining = count;
+  std::uint32_t got = 0;
+
+  auto start = sc::steady_clock::now();
+  while (remaining) {
+    auto [result, more] = f.list(std::min(remaining, pull_entries),
+                                std::nullopt, y);
+    if (result.empty())
+      break;
+    got += result.size();
+    remaining -= result.size();
+    f.trim(result.back().marker, y);
+  }
+  auto finish = sc::steady_clock::now();
+  return benchmark(got, (finish - start));
+}
+
+void concurpull(const std::string& oid, const std::int64_t pool,
+               const std::uint32_t count, const std::uint32_t pull_entries,
+               std::promise<benchmark> notify, const bool* const exit_early)
+{
+  ba::io_context c;
+  benchmark bench;
+  std::exception_ptr ex;
+  s::spawn(
+    c,
+    [&](s::yield_context y) {
+      try {
+       auto r = R::RADOS::Builder{}.build(c, y);
+       R::IOContext ioc(pool);
+       auto f = RCf::FIFO::open(r, ioc, oid, y);
+       auto remaining = count;
+       std::uint32_t got = 0;
+
+       auto start = sc::steady_clock::now();
+       while (remaining) {
+         if (*exit_early) break;
+         auto [result, more] =
+           f->list(std::min(remaining, pull_entries), std::nullopt, y);
+         if (result.empty()) {
+           // We just keep going assuming they'll push more.
+           continue;
+         }
+         got += result.size();
+         remaining -= result.size();
+         if (*exit_early) break;
+         f->trim(result.back().marker, y);
+       }
+       auto finish = sc::steady_clock::now();
+       bench.entries = got;
+       bench.elapsed = finish - start;
+      } catch (const std::exception&) {
+       ex = std::current_exception();
+      }
+    });
+  c.run();
+  if (ex) {
+    notify.set_exception(std::current_exception());
+  } else {
+    notify.set_value(bench);
+  }
+}
+
+void clean(R::RADOS& r, const R::IOContext& ioc, RCf::FIFO& f,
+          s::yield_context y)
+{
+  f.read_meta(y);
+  const auto info = f.meta();
+  if (info.head_part_num > -1) {
+    for (auto i = info.tail_part_num; i <= info.head_part_num; ++i) {
+      R::WriteOp op;
+      op.remove();
+      r.execute(info.part_oid(i), ioc, std::move(op), y);
+    }
+  }
+  R::WriteOp op;
+  op.remove();
+  r.execute(info.id, ioc, std::move(op), y);
+}
+}
+
+int main(int argc, char* argv[])
+{
+  const std::string_view prog(argv[0]);
+  std::string command;
+  try {
+    std::uint32_t count = 0;
+    std::string oid;
+    std::string pool;
+    std::uint32_t entry_size = 0;
+    std::uint32_t push_entries = 0;
+    std::uint32_t pull_entries = 0;
+    std::uint64_t max_part_size = 0;
+    std::uint64_t max_entry_size = 0;
+    std::int64_t part_num = 0;
+    std::string marker;
+
+    bpo::options_description desc(fmt::format("{} options", prog));
+    desc.add_options()
+      ("help", "show help")
+      ("oid", bpo::value<std::string>(&oid)->default_value("fifo"s),
+       "the base oid for the fifo")
+      ("pool", bpo::value<std::string>(&pool)->default_value("fifo_benchmark"s),
+       "the base oid for the fifo")
+      ("count", bpo::value<std::uint32_t>(&count)->default_value(1024),
+       "total count of items")
+      ("entry-size", bpo::value<std::uint32_t>(&entry_size)->default_value(64),
+       "size of entries to push")
+      ("push-entries",
+       bpo::value<std::uint32_t>(&push_entries)
+       ->default_value(512), "entries to push per call")
+      ("max-part-size", bpo::value<std::uint64_t>(&max_part_size)
+       ->default_value(RCf::default_max_part_size),
+       "maximum entry size allowed by FIFO")
+      ("max-entry-size", bpo::value<std::uint64_t>(&max_entry_size)
+       ->default_value(RCf::default_max_entry_size),
+       "maximum entry size allowed by FIFO")
+      ("pull-entries",
+       bpo::value<uint32_t>(&pull_entries)
+       ->default_value(512), "entries to pull per call")
+      ("part-num",
+       bpo::value<int64_t>(&part_num)
+       ->default_value(-1), "partition number, -1 for head")
+      ("marker", bpo::value<std::string>(&marker), "marker to begin list")
+      ("command", bpo::value<std::string>(&command),
+       "the operation to perform");
+
+    bpo::positional_options_description p;
+    p.add("command", 1);
+
+    bpo::variables_map vm;
+
+    bpo::store(bpo::command_line_parser(argc, argv).
+              options(desc).positional(p).run(), vm);
+
+    bpo::notify(vm);
+
+    if (vm.count("help")) {
+      fmt::print(std::cout, "{}", desc);
+      fmt::print(std::cout, "\n{} commands:\n", prog);
+      fmt::print(std::cout, "    push\t\t\t push entries into fifo\n");
+      fmt::print(std::cout, "    pull\t\t\t retrieve and trim entries\n");
+      fmt::print(std::cout, "    both\t\t\t both at once, in two threads\n");
+      fmt::print(std::cout, "    metadata\t\t\t print metadata\n");
+      fmt::print(std::cout, "    partinfo\t\t\t print metadata\n");
+      fmt::print(std::cout, "    list\t\t\t list entries\n");
+      fmt::print(std::cout, "    clean\t\t\t clean up\n");
+      return 0;
+    }
+
+
+    if (vm.find("command") == vm.end()) {
+      fmt::print(std::cerr, "{}: a command is required\n", prog);
+      return 1;
+    }
+
+    int op = 0;
+    if (command == "push"s) {
+      op = PUSH;
+    } else if (command == "pull"s) {
+      op = PULL;
+    } else if (command == "both"s) {
+      op = BOTH;
+    } else if (command == "clean"s) {
+      op = CLEAN;
+    } else if (command == "metadata"s) {
+      op = METADATA;
+    } else if (command == "partinfo"s) {
+      op = PARTINFO;
+    } else if (command == "list"s) {
+      op = LIST;
+    } else {
+      fmt::print(std::cerr, "{}: {} is not a valid command\n",
+                prog, command);
+      return 1;
+    }
+
+    if (!(op & PULL) && !vm["pull-entries"].defaulted()) {
+      fmt::print(std::cerr, "{}: pull-entries is only meaningful when pulling\n",
+                prog);
+      return 1;
+    }
+
+    if (!(op & PUSH)) {
+      for (const auto& p : { "entry-size"s, "push-entries"s, "max-part-size"s,
+           "max-entry-size"s }) {
+       if (!vm[p].defaulted()) {
+         fmt::print(std::cerr, "{}: {} is only meaningful when pushing\n",
+                    prog, p);
+         return 1;
+       }
+      }
+    }
+
+    if (!(op & BOTH) && !(op & LIST) && !vm["count"].defaulted()) {
+      fmt::print(std::cerr, "{}: count is only meaningful when pulling, pushing, both, or listing\n",
+                prog);
+      return 1;
+    }
+
+    if (!(op & PARTINFO) && !vm["part-num"].defaulted()) {
+      fmt::print(std::cerr, "{}: part-num is only meaningful when getting part info\n",
+                prog);
+      return 1;
+    }
+
+    if (count == 0) {
+      fmt::print(std::cerr, "{}: count must be nonzero\n", prog);
+      return 1;
+    }
+
+    if ((op & PULL) && (pull_entries == 0)) {
+      fmt::print(std::cerr,
+                "{}: pull-entries must be nonzero\n", prog);
+      return 1;
+    }
+
+    if (!(op & LIST) && vm.count("marker") > 0) {
+      fmt::print(std::cerr, "{}: marker is only meaningful when listing\n",
+                prog);
+      return 1;
+    }
+
+    if (op & PUSH) {
+      if (entry_size == 0) {
+       fmt::print(std::cerr, "{}: entry-size must be nonzero\n", prog);
+       return 1;
+      }
+      if (push_entries== 0) {
+       fmt::print(std::cerr, "{}: push-entries must be nonzero\n", prog);
+       return 1;
+      }
+      if (max_entry_size == 0) {
+       fmt::print(std::cerr, "{}: max-entry-size must be nonzero\n", prog);
+       return 1;
+      }
+      if (max_part_size == 0) {
+       fmt::print(std::cerr, "{}: max-part-size must be nonzero\n", prog);
+       return 1;
+      }
+      if (entry_size > max_entry_size) {
+       fmt::print(std::cerr,
+                  "{}: entry-size may not be greater than max-entry-size\n",
+                  prog);
+       return 1;
+      }
+      if (max_entry_size >= max_part_size) {
+       fmt::print(std::cerr,
+                  "{}: max-entry-size may be less than max-part-size\n",
+                  prog);
+       return 1;
+      }
+    }
+
+    ba::io_context c;
+    benchmark pushmark, pullmark;
+    fifo::info meta;
+    fifo::part_header partinfo;
+    bool more = false;
+    std::vector<RCf::list_entry> entries;
+    s::spawn(
+      c,
+      [&](s::yield_context y) {
+       auto r = R::RADOS::Builder{}.build(c, y);
+       bs::error_code ec;
+       std::int64_t pid;
+       pid = r.lookup_pool(pool, y[ec]);
+       if (ec) {
+         r.create_pool(pool, std::nullopt, y);
+         pid = r.lookup_pool(pool, y);
+       }
+       const R::IOContext ioc(pid);
+       auto f = RCf::FIFO::create(r, ioc, oid, y, std::nullopt,
+                                  std::nullopt, false, max_part_size,
+                                  max_entry_size);
+
+       switch (op) {
+       case PUSH:
+         pushmark = push(*f, count, entry_size, push_entries, y);
+         break;
+
+       case PULL:
+         pullmark = pull(*f, count, pull_entries, y);
+         break;
+
+       case METADATA:
+         meta = f->meta();
+         break;
+
+       case PARTINFO:
+         meta = f->meta();
+         if (part_num == -1) {
+           part_num = meta.head_part_num;
+         }
+         partinfo = f->get_part_info(part_num, y);
+         break;
+
+       case LIST:
+         if (vm.count("marker") == 0) {
+           std::tie(entries, more) = f->list(count, std::nullopt, y);
+         } else {
+           std::tie(entries, more) = f->list(count, marker, y);
+         }
+         break;
+
+       case BOTH: {
+         std::promise<benchmark> notify;
+         bool exit_early = false;
+
+         auto notifier = notify.get_future();
+         std::thread t(concurpull, oid, pid, count, pull_entries,
+                       std::move(notify), &exit_early);
+         t.detach();
+         try {
+           pushmark = push(*f, count, entry_size, push_entries, y);
+         } catch (const std::exception&) {
+           exit_early = true;
+           notifier.wait();
+           throw;
+         }
+         pullmark = notifier.get();
+       }
+       }
+
+       if (op & CLEAN)
+         clean(r, ioc, *f, y);
+      });
+    c.run();
+    if (op & PUSH) {
+      fmt::print("Pushed {} in {} at {}/s\n",
+                pushmark.entries, pushmark.elapsed, pushmark.ratio());
+    }
+    if (op & PULL) {
+      if (pullmark.entries == count) {
+       fmt::print(std::cout, "Pulled {} in {} at {}/s\n",
+                  pullmark.entries, pullmark.elapsed, pullmark.ratio());
+      } else {
+       fmt::print(std::cout, "Pulled {} (of {} requested), in {} at {}/s\n",
+                  pullmark.entries, count, pullmark.elapsed, pullmark.ratio());
+      }
+    }
+    if (op & METADATA) {
+      fmt::print(std::cout, "Metadata: [{}]\n", meta);
+    }
+    if (op & PARTINFO) {
+      fmt::print(std::cout, "Info for partition {}: [{}]\n", part_num, partinfo);
+    }
+    if (op & LIST) {
+      for (const auto& entry : entries) {
+       fmt::print(std::cout, "{}\t{}\n", entry.marker, entry.mtime);
+      }
+      if (more) {
+       fmt::print(std::cout, "...");
+      }
+    }
+  } catch (const std::exception& e) {
+    if (command.empty()) {
+      fmt::print(std::cerr, "{}: {}\n", prog, e.what());
+    } else {
+      fmt::print(std::cerr, "{}: {}: {}\n", prog, command, e.what());
+    }
+    return 1;
+  }
+
+  return 0;
+}
index 58286f47cd2f33f22de0a768b1023ecaace34b02..23f106c49caef0e0a98af29d86f59c6060ca0540 100644 (file)
@@ -1,4 +1,4 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 /*
  * Ceph - scalable distributed file system
  *
  * This is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software 
+ * License version 2.1, as published by the Free Software
  * Foundation.  See file COPYING.
- * 
+ *
  */
 
+#include <cerrno>
 #include <iostream>
-#include <errno.h>
+#include <string_view>
 
-#include "include/types.h"
-#include "include/rados/librados.hpp"
+#include <boost/asio.hpp>
+#include <boost/system/error_code.hpp>
 
-#include "test/librados/test_cxx.h"
-#include "global/global_context.h"
+#include <spawn/spawn.hpp>
 
-#include "gtest/gtest.h"
+#include "include/scope_guard.h"
+#include "include/types.h"
+#include "include/neorados/RADOS.hpp"
 
-using namespace librados;
+#include "cls/fifo/cls_fifo_ops.h"
 
-#include "cls/fifo/cls_fifo_client.h"
+#include "neorados/cls/fifo.h"
 
+#include "test/neorados/common_tests.h"
 
-using namespace rados::cls::fifo;
+#include "gtest/gtest.h"
 
-static CephContext *cct(librados::IoCtx& ioctx)
+namespace R = neorados;
+namespace ba = boost::asio;
+namespace bs = boost::system;
+namespace cb = ceph::buffer;
+namespace fifo = rados::cls::fifo;
+namespace RCf = neorados::cls::fifo;
+namespace s = spawn;
+
+namespace {
+void fifo_create(R::RADOS& r,
+                const R::IOContext& ioc,
+                const R::Object& oid,
+                std::string_view id,
+                s::yield_context y,
+                std::optional<fifo::objv> objv = std::nullopt,
+                std::optional<std::string_view> oid_prefix = std::nullopt,
+                bool exclusive = false,
+                std::uint64_t max_part_size = RCf::default_max_part_size,
+                std::uint64_t max_entry_size = RCf::default_max_entry_size)
 {
-  return reinterpret_cast<CephContext *>(ioctx.cct());
+  R::WriteOp op;
+  RCf::create_meta(op, id, objv, oid_prefix, exclusive, max_part_size,
+                  max_entry_size);
+  r.execute(oid, ioc, std::move(op), y);
 }
-
-static int fifo_create(IoCtx& ioctx,
-                       const string& oid,
-                       const string& id,
-                       const ClsFIFO::MetaCreateParams& params)
-{
-  ObjectWriteOperation op;
-
-  int r = ClsFIFO::meta_create(&op, id, params);
-  if (r < 0) {
-    return r;
-  }
-
-  return ioctx.operate(oid, &op);
 }
 
 TEST(ClsFIFO, TestCreate) {
-  Rados cluster;
-  std::string pool_name = get_temp_pool_name();
-  ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
-  IoCtx ioctx;
-  cluster.ioctx_create(pool_name.c_str(), ioctx);
-
-  string fifo_id = "fifo";
-  string oid = fifo_id;
-
-  ASSERT_EQ(-EINVAL, fifo_create(ioctx, oid, string(),
-                                  ClsFIFO::MetaCreateParams()));
-
-  ASSERT_EQ(-EINVAL, fifo_create(ioctx, oid, fifo_id,
-                     ClsFIFO::MetaCreateParams()
-                     .max_part_size(0)));
-
-  ASSERT_EQ(-EINVAL, fifo_create(ioctx, oid, fifo_id,
-                     ClsFIFO::MetaCreateParams()
-                     .max_entry_size(0)));
-  
-  /* first successful create */
-  ASSERT_EQ(0, fifo_create(ioctx, oid, fifo_id,
-               ClsFIFO::MetaCreateParams()));
-
-  uint64_t size;
-  struct timespec ts;
-  ASSERT_EQ(0, ioctx.stat2(oid, &size, &ts));
-  ASSERT_GT(size, 0);
-
-  /* test idempotency */
-  ASSERT_EQ(0, fifo_create(ioctx, oid, fifo_id,
-               ClsFIFO::MetaCreateParams()));
-
-  uint64_t size2;
-  struct timespec ts2;
-  ASSERT_EQ(0, ioctx.stat2(oid, &size2, &ts2));
-  ASSERT_EQ(size2, size);
-
-  ASSERT_EQ(-EEXIST, fifo_create(ioctx, oid, fifo_id,
-               ClsFIFO::MetaCreateParams()
-               .exclusive(true)));
-
-  ASSERT_EQ(-EEXIST, fifo_create(ioctx, oid, fifo_id,
-               ClsFIFO::MetaCreateParams()
-               .oid_prefix("myprefix")
-               .exclusive(false)));
-
-  ASSERT_EQ(-EEXIST, fifo_create(ioctx, oid, "foo",
-               ClsFIFO::MetaCreateParams()
-               .exclusive(false)));
-
-  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+  ba::io_context c;
+  auto fifo_id = "fifo"sv;
+  R::Object oid(fifo_id);
+
+  s::spawn(c, [&](s::yield_context y) {
+               auto r = R::RADOS::Builder{}.build(c, y);
+               auto pool = create_pool(r, get_temp_pool_name(), y);
+               auto sg = make_scope_guard(
+                 [&] {
+                   r.delete_pool(pool, y);
+                 });
+               R::IOContext ioc(pool);
+               bs::error_code ec;
+               fifo_create(r, ioc, oid, ""s, y[ec]);
+               EXPECT_EQ(bs::errc::invalid_argument, ec);
+               fifo_create(r, ioc, oid, fifo_id, y[ec], std::nullopt,
+                           std::nullopt, false, 0);
+               EXPECT_EQ(bs::errc::invalid_argument, ec);
+               fifo_create(r, ioc, oid, {}, y[ec],
+                           std::nullopt, std::nullopt,
+                           false, RCf::default_max_part_size, 0);
+               EXPECT_EQ(bs::errc::invalid_argument, ec);
+               fifo_create(r, ioc, oid, fifo_id, y);
+               {
+                 std::uint64_t size;
+                 std::uint64_t size2;
+                 {
+                   R::ReadOp op;
+                   op.stat(&size, nullptr);
+                   r.execute(oid, ioc, std::move(op),
+                             nullptr, y);
+                   EXPECT_GT(size, 0);
+                 }
+
+                 {
+                   R::ReadOp op;
+                   op.stat(&size2, nullptr);
+                   r.execute(oid, ioc, std::move(op), nullptr, y);
+                 }
+                 EXPECT_EQ(size2, size);
+               }
+               /* test idempotency */
+               fifo_create(r, ioc, oid, fifo_id, y);
+               fifo_create(r, ioc, oid, {}, y[ec], std::nullopt,
+                           std::nullopt, false);
+               EXPECT_EQ(bs::errc::invalid_argument, ec);
+               fifo_create(r, ioc, oid, {}, y[ec], std::nullopt,
+                           "myprefix"sv, false);
+               EXPECT_EQ(bs::errc::invalid_argument, ec);
+               fifo_create(r, ioc, oid, "foo"sv, y[ec],
+                           std::nullopt, std::nullopt, false);
+               EXPECT_EQ(bs::errc::file_exists, ec);
+             });
+  c.run();
 }
 
 TEST(ClsFIFO, TestGetInfo) {
-  Rados cluster;
-  std::string pool_name = get_temp_pool_name();
-  ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
-  IoCtx ioctx;
-  cluster.ioctx_create(pool_name.c_str(), ioctx);
-
-  string fifo_id = "fifo";
-  string oid = fifo_id;
-
-  fifo_info_t info;
-
-  /* first successful create */
-  ASSERT_EQ(0, fifo_create(ioctx, oid, fifo_id,
-               ClsFIFO::MetaCreateParams()));
-
-  uint32_t part_header_size;
-  uint32_t part_entry_overhead;
-
-  ASSERT_EQ(0, ClsFIFO::meta_get(ioctx, oid,
-               ClsFIFO::MetaGetParams(), &info,
-               &part_header_size, &part_entry_overhead));
-
-  ASSERT_GT(part_header_size, 0);
-  ASSERT_GT(part_entry_overhead, 0);
-
-  ASSERT_TRUE(!info.objv.instance.empty());
-
-  ASSERT_EQ(0, ClsFIFO::meta_get(ioctx, oid,
-               ClsFIFO::MetaGetParams()
-               .objv(info.objv),
-               &info,
-               &part_header_size, &part_entry_overhead));
-
-  fifo_objv_t objv;
-  objv.instance="foo";
-  objv.ver = 12;
-  ASSERT_EQ(-ECANCELED, ClsFIFO::meta_get(ioctx, oid,
-               ClsFIFO::MetaGetParams()
-               .objv(objv),
-               &info,
-               &part_header_size, &part_entry_overhead));
-
-  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+  ba::io_context c;
+  auto fifo_id = "fifo"sv;
+  R::Object oid(fifo_id);
+
+  s::spawn(c, [&](s::yield_context y) {
+               auto r = R::RADOS::Builder{}.build(c, y);
+               auto pool = create_pool(r, get_temp_pool_name(), y);
+               auto sg = make_scope_guard(
+                 [&] {
+                   r.delete_pool(pool, y);
+                 });
+               R::IOContext ioc(pool);
+               /* first successful create */
+               fifo_create(r, ioc, oid, fifo_id, y);
+
+               fifo::info info;
+               std::uint32_t part_header_size;
+               std::uint32_t part_entry_overhead;
+               {
+                 R::ReadOp op;
+                 RCf::get_meta(op, std::nullopt,
+                               nullptr, &info, &part_header_size,
+                               &part_entry_overhead);
+                 r.execute(oid, ioc, std::move(op), nullptr, y);
+                 EXPECT_GT(part_header_size, 0);
+                 EXPECT_GT(part_entry_overhead, 0);
+                 EXPECT_FALSE(info.version.instance.empty());
+               }
+               {
+                 R::ReadOp op;
+                 RCf::get_meta(op, info.version,
+                               nullptr, &info, &part_header_size,
+                               &part_entry_overhead);
+                 r.execute(oid, ioc, std::move(op), nullptr, y);
+               }
+               {
+                 R::ReadOp op;
+                 fifo::objv objv;
+                 objv.instance = "foo";
+                 objv.ver = 12;
+                 RCf::get_meta(op, objv,
+                               nullptr, &info, &part_header_size,
+                               &part_entry_overhead);
+                 ASSERT_ANY_THROW(r.execute(oid, ioc, std::move(op),
+                                            nullptr, y));
+               }
+             });
+  c.run();
 }
 
 TEST(FIFO, TestOpenDefault) {
-  Rados cluster;
-  std::string pool_name = get_temp_pool_name();
-  ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
-  IoCtx ioctx;
-  cluster.ioctx_create(pool_name.c_str(), ioctx);
-
-  string fifo_id = "fifo";
-
-  FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
-  /* pre-open ops that should fail */
-  ASSERT_EQ(-EINVAL, fifo.read_meta());
-
-  bufferlist bl;
-  ASSERT_EQ(-EINVAL, fifo.push(bl));
-
-  ASSERT_EQ(-EINVAL, fifo.list(100, nullopt, nullptr, nullptr));
-  ASSERT_EQ(-EINVAL, fifo.trim(string()));
-
-  ASSERT_EQ(-ENOENT, fifo.open(false));
-
-  /* first successful create */
-  ASSERT_EQ(0, fifo.open(true));
-
-  ASSERT_EQ(0, fifo.read_meta()); /* force reading from backend */
-
-  auto info = fifo.get_meta();
-
-  ASSERT_EQ(info.id, fifo_id);
-
-  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+  ba::io_context c;
+  auto fifo_id = "fifo"s;
+
+  s::spawn(c, [&](s::yield_context y) {
+               auto r = R::RADOS::Builder{}.build(c, y);
+               auto pool = create_pool(r, get_temp_pool_name(), y);
+               auto sg = make_scope_guard(
+                 [&] {
+                   r.delete_pool(pool, y);
+                 });
+               R::IOContext ioc(pool);
+               auto fifo = RCf::FIFO::create(r, ioc, fifo_id, y);
+               // force reading from backend
+               fifo->read_meta(y);
+               auto info = fifo->meta();
+               EXPECT_EQ(info.id, fifo_id);
+             });
+  c.run();
 }
 
 TEST(FIFO, TestOpenParams) {
-  Rados cluster;
-  std::string pool_name = get_temp_pool_name();
-  ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
-  IoCtx ioctx;
-  cluster.ioctx_create(pool_name.c_str(), ioctx);
-
-  string fifo_id = "fifo";
-
-  FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
-  uint64_t max_part_size = 10 * 1024;
-  uint64_t max_entry_size = 128;
-  string oid_prefix = "foo.123.";
-
-  fifo_objv_t objv;
-  objv.instance = "fooz";
-  objv.ver = 10;
-
-
-  /* first successful create */
-  ASSERT_EQ(0, fifo.open(true,
-                         ClsFIFO::MetaCreateParams()
-                         .max_part_size(max_part_size)
-                         .max_entry_size(max_entry_size)
-                         .oid_prefix(oid_prefix)
-                         .objv(objv)));
-
-  ASSERT_EQ(0, fifo.read_meta()); /* force reading from backend */
-
-  auto info = fifo.get_meta();
-
-  ASSERT_EQ(info.id, fifo_id);
-  ASSERT_EQ(info.data_params.max_part_size, max_part_size);
-  ASSERT_EQ(info.data_params.max_entry_size, max_entry_size);
-  ASSERT_EQ(info.objv, objv);
-
-  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+  ba::io_context c;
+  auto fifo_id = "fifo"sv;
+
+  s::spawn(c, [&](s::yield_context y) {
+               auto r = R::RADOS::Builder{}.build(c, y);
+               auto pool = create_pool(r, get_temp_pool_name(), y);
+               auto sg = make_scope_guard(
+                 [&] {
+                   r.delete_pool(pool, y);
+                 });
+               R::IOContext ioc(pool);
+
+               const std::uint64_t max_part_size = 10 * 1024;
+               const std::uint64_t max_entry_size = 128;
+               auto oid_prefix = "foo.123."sv;
+               fifo::objv objv;
+               objv.instance = "fooz"s;
+               objv.ver = 10;
+
+               /* first successful create */
+               auto f = RCf::FIFO::create(r, ioc, fifo_id, y, objv, oid_prefix,
+                                          false, max_part_size,
+                                          max_entry_size);
+
+
+               /* force reading from backend */
+               f->read_meta(y);
+               auto info = f->meta();
+               ASSERT_EQ(info.id, fifo_id);
+               ASSERT_EQ(info.params.max_part_size, max_part_size);
+               ASSERT_EQ(info.params.max_entry_size, max_entry_size);
+               ASSERT_EQ(info.version, objv);
+             });
+  c.run();
 }
 
-template <class T>
-static int decode_entry(fifo_entry& entry,
-                        T *val,
-                        string *marker)
+namespace {
+template<class T>
+std::pair<T, std::string> decode_entry(const RCf::list_entry& entry)
 {
-  *marker = entry.marker;
+  T val;
   auto iter = entry.data.cbegin();
-
-  try {
-    decode(*val, iter);
-  } catch (buffer::error& err) {
-    return -EIO;
-  }
-
-  return 0;
+  decode(val, iter);
+  return std::make_pair(std::move(val), entry.marker);
 }
-
-TEST(FIFO, TestPushListTrim) {
-  Rados cluster;
-  std::string pool_name = get_temp_pool_name();
-  ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
-  IoCtx ioctx;
-  cluster.ioctx_create(pool_name.c_str(), ioctx);
-
-  string fifo_id = "fifo";
-
-  FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
-  /* first successful create */
-  ASSERT_EQ(0, fifo.open(true));
-
-  uint32_t max_entries = 10;
-
-  for (uint32_t i = 0; i < max_entries; ++i) {
-    bufferlist bl;
-    encode(i, bl);
-    ASSERT_EQ(0, fifo.push(bl));
-  }
-
-  string marker;
-
-  /* get entries one by one */
-
-  for (uint32_t i = 0; i < max_entries; ++i) {
-    vector<fifo_entry> result;
-    bool more;
-    ASSERT_EQ(0, fifo.list(1, marker, &result, &more));
-
-    bool expected_more = (i != (max_entries - 1));
-    ASSERT_EQ(expected_more, more);
-    ASSERT_EQ(1, result.size());
-
-    uint32_t val;
-    ASSERT_EQ(0, decode_entry(result.front(), &val, &marker));
-
-    ASSERT_EQ(i, val);
-  }
-
-  /* get all entries at once */
-  vector<fifo_entry> result;
-  bool more;
-  ASSERT_EQ(0, fifo.list(max_entries * 10, string(), &result, &more));
-
-  ASSERT_FALSE(more);
-  ASSERT_EQ(max_entries, result.size());
-
-  string markers[max_entries];
-
-
-  for (uint32_t i = 0; i < max_entries; ++i) {
-    uint32_t val;
-
-    ASSERT_EQ(0, decode_entry(result[i], &val, &markers[i]));
-    ASSERT_EQ(i, val);
-  }
-
-  uint32_t min_entry = 0;
-
-  /* trim one entry */
-  fifo.trim(markers[min_entry]);
-  ++min_entry;
-
-  ASSERT_EQ(0, fifo.list(max_entries * 10, string(), &result, &more));
-
-  ASSERT_FALSE(more);
-  ASSERT_EQ(max_entries - min_entry, result.size());
-
-  for (uint32_t i = min_entry; i < max_entries; ++i) {
-    uint32_t val;
-
-    ASSERT_EQ(0, decode_entry(result[i - min_entry], &val, &markers[i]));
-    ASSERT_EQ(i, val);
-  }
-
-
-  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
 }
 
-TEST(FIFO, TestPushTooBig) {
-  Rados cluster;
-  std::string pool_name = get_temp_pool_name();
-  ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
-  IoCtx ioctx;
-  cluster.ioctx_create(pool_name.c_str(), ioctx);
-
-  string fifo_id = "fifo";
-
-  FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
-  uint64_t max_part_size = 2048;
-  uint64_t max_entry_size = 128;
-
-  char buf[max_entry_size + 1];
-  memset(buf, 0, sizeof(buf));
 
-  /* first successful create */
-  ASSERT_EQ(0, fifo.open(true,
-                         ClsFIFO::MetaCreateParams()
-                         .max_part_size(max_part_size)
-                         .max_entry_size(max_entry_size)));
 
-  bufferlist bl;
-  bl.append(buf, sizeof(buf));
-
-  ASSERT_EQ(-EINVAL, fifo.push(bl));
-
-
-  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+TEST(FIFO, TestPushListTrim) {
+  ba::io_context c;
+  auto fifo_id = "fifo"sv;
+
+  s::spawn(c, [&](s::yield_context y) mutable {
+               auto r = R::RADOS::Builder{}.build(c, y);
+               auto pool = create_pool(r, get_temp_pool_name(), y);
+               auto sg = make_scope_guard(
+                 [&] {
+                   r.delete_pool(pool, y);
+                 });
+               R::IOContext ioc(pool);
+               auto f = RCf::FIFO::create(r, ioc, fifo_id, y);
+               static constexpr auto max_entries = 10u;
+               for (uint32_t i = 0; i < max_entries; ++i) {
+                 cb::list bl;
+                 encode(i, bl);
+                 f->push(bl, y);
+               }
+
+               std::optional<std::string> marker;
+               /* get entries one by one */
+
+               for (auto i = 0u; i < max_entries; ++i) {
+                 auto [result, more] = f->list(1, marker, y);
+
+                 bool expected_more = (i != (max_entries - 1));
+                 ASSERT_EQ(expected_more, more);
+                 ASSERT_EQ(1, result.size());
+
+                 std::uint32_t val;
+                 std::tie(val, marker) =
+                   decode_entry<std::uint32_t>(result.front());
+
+                 ASSERT_EQ(i, val);
+               }
+
+               /* get all entries at once */
+               std::string markers[max_entries];
+               std::uint32_t min_entry = 0;
+               {
+                 auto [result, more] = f->list(max_entries * 10, std::nullopt,
+                                               y);
+
+                 ASSERT_FALSE(more);
+                 ASSERT_EQ(max_entries, result.size());
+
+
+                 for (auto i = 0u; i < max_entries; ++i) {
+                   std::uint32_t val;
+
+                   std::tie(val, markers[i]) =
+                     decode_entry<std::uint32_t>(result[i]);
+                   ASSERT_EQ(i, val);
+                 }
+
+
+                 /* trim one entry */
+                 f->trim(markers[min_entry], y);
+                 ++min_entry;
+               }
+
+               auto [result, more] = f->list(max_entries * 10,
+                                             std::nullopt, y);
+
+               ASSERT_FALSE(more);
+               ASSERT_EQ(max_entries - min_entry, result.size());
+
+               for (auto i = min_entry; i < max_entries; ++i) {
+                 std::uint32_t val;
+
+                 std::tie(val, markers[i - min_entry]) =
+                   decode_entry<std::uint32_t>(result[i - min_entry]);
+                 ASSERT_EQ(i, val);
+               }
+
+             });
+  c.run();
 }
 
-TEST(FIFO, TestMultipleParts) {
-  Rados cluster;
-  std::string pool_name = get_temp_pool_name();
-  ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
-  IoCtx ioctx;
-  cluster.ioctx_create(pool_name.c_str(), ioctx);
-
-  string fifo_id = "fifo";
-
-  FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
-  uint64_t max_part_size = 2048;
-  uint64_t max_entry_size = 128;
-
-  char buf[max_entry_size];
-  memset(buf, 0, sizeof(buf));
-
-  /* create */
-  ASSERT_EQ(0, fifo.open(true,
-                         ClsFIFO::MetaCreateParams()
-                         .max_part_size(max_part_size)
-                         .max_entry_size(max_entry_size)));
-
-  uint32_t part_header_size;
-  uint32_t part_entry_overhead;
-
-  fifo.get_part_layout_info(&part_header_size, &part_entry_overhead);
-
-  int entries_per_part = (max_part_size - part_header_size) / (max_entry_size + part_entry_overhead);
-
-  int max_entries = entries_per_part * 4 + 1;
-
-  /* push enough entries */
-  for (int i = 0; i < max_entries; ++i) {
-    bufferlist bl;
-
-    *(int *)buf = i;
-    bl.append(buf, sizeof(buf));
-
-    ASSERT_EQ(0, fifo.push(bl));
-  }
-
-  auto info = fifo.get_meta();
-
-  ASSERT_EQ(info.id, fifo_id);
-  ASSERT_GT(info.head_part_num, 0); /* head should have advanced */
-
-
-  /* list all at once */
-  vector<fifo_entry> result;
-  bool more;
-  ASSERT_EQ(0, fifo.list(max_entries, string(), &result, &more));
-  ASSERT_EQ(false, more);
-
-  ASSERT_EQ(max_entries, result.size());
-
-  for (int i = 0; i < max_entries; ++i) {
-    auto& bl = result[i].data;
-    ASSERT_EQ(i, *(int *)bl.c_str());
-  }
-
 
-  /* list one at a time */
-  string marker;
-  for (int i = 0; i < max_entries; ++i) {
-    ASSERT_EQ(0, fifo.list(1, marker, &result, &more));
-
-    ASSERT_EQ(result.size(), 1);
-    bool expected_more = (i != (max_entries - 1));
-    ASSERT_EQ(expected_more, more);
-
-    auto& entry = result[0];
-
-    auto& bl = entry.data;
-    marker = entry.marker;
-
-    ASSERT_EQ(i, *(int *)bl.c_str());
-  }
-
-  /* trim one at a time */
-  marker.clear();
-  for (int i = 0; i < max_entries; ++i) {
-    /* read single entry */
-    ASSERT_EQ(0, fifo.list(1, marker, &result, &more));
-
-    ASSERT_EQ(result.size(), 1);
-    bool expected_more = (i != (max_entries - 1));
-    ASSERT_EQ(expected_more, more);
-
-    marker = result[0].marker;
-
-    /* trim */
-    ASSERT_EQ(0, fifo.trim(marker));
-
-    /* check tail */
-    info = fifo.get_meta();
-    ASSERT_EQ(info.tail_part_num, i / entries_per_part);
-
-    /* try to read all again, see how many entries left */
-    ASSERT_EQ(0, fifo.list(max_entries, marker, &result, &more));
-    ASSERT_EQ(max_entries - i - 1, result.size());
-    ASSERT_EQ(false, more);
-  }
-
-  /* tail now should point at head */
-  info = fifo.get_meta();
-  ASSERT_EQ(info.head_part_num, info.tail_part_num);
-
-  fifo_part_info part_info;
-
-  /* check old tails are removed */
-  for (int i = 0; i < info.tail_part_num; ++i) {
-    ASSERT_EQ(-ENOENT, fifo.get_part_info(i, &part_info));
-  }
-
-  /* check curent tail exists */
-  ASSERT_EQ(0, fifo.get_part_info(info.tail_part_num, &part_info));
-
-  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+TEST(FIFO, TestPushTooBig) {
+  ba::io_context c;
+  auto fifo_id = "fifo"sv;
+  static constexpr auto max_part_size = 2048ull;
+  static constexpr auto max_entry_size = 128ull;
+
+  s::spawn(c, [&](s::yield_context y) {
+               auto r = R::RADOS::Builder{}.build(c, y);
+               auto pool = create_pool(r, get_temp_pool_name(), y);
+               auto sg = make_scope_guard(
+                 [&] {
+                   r.delete_pool(pool, y);
+                 });
+               R::IOContext ioc(pool);
+
+               auto f = RCf::FIFO::create(r, ioc, fifo_id, y, std::nullopt,
+                                          std::nullopt, false, max_part_size,
+                                          max_entry_size);
+
+               char buf[max_entry_size + 1];
+               memset(buf, 0, sizeof(buf));
+
+               cb::list bl;
+               bl.append(buf, sizeof(buf));
+
+               bs::error_code ec;
+               f->push(bl, y[ec]);
+               EXPECT_EQ(RCf::errc::entry_too_large, ec);
+             });
+  c.run();
 }
 
-TEST(FIFO, TestTwoPushers) {
-  Rados cluster;
-  std::string pool_name = get_temp_pool_name();
-  ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
-  IoCtx ioctx;
-  cluster.ioctx_create(pool_name.c_str(), ioctx);
-
-  string fifo_id = "fifo";
 
-  FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
-  uint64_t max_part_size = 2048;
-  uint64_t max_entry_size = 128;
+TEST(FIFO, TestMultipleParts) {
+  ba::io_context c;
+  auto fifo_id = "fifo"sv;
+  static constexpr auto max_part_size = 2048ull;
+  static constexpr auto max_entry_size = 128ull;
 
-  char buf[max_entry_size];
-  memset(buf, 0, sizeof(buf));
+  s::spawn(c, [&](s::yield_context y) mutable {
+               auto r = R::RADOS::Builder{}.build(c, y);
+               auto pool = create_pool(r, get_temp_pool_name(), y);
+               auto sg = make_scope_guard(
+                 [&] {
+                   r.delete_pool(pool, y);
+                 });
+               R::IOContext ioc(pool);
+
+               auto f = RCf::FIFO::create(r, ioc, fifo_id, y, std::nullopt,
+                                          std::nullopt, false, max_part_size,
+                                          max_entry_size);
+
+
+               char buf[max_entry_size];
+               memset(buf, 0, sizeof(buf));
+
+               const auto [part_header_size, part_entry_overhead] =
+                 f->get_part_layout_info();
+
+               const auto entries_per_part =
+                 (max_part_size - part_header_size) /
+                 (max_entry_size + part_entry_overhead);
+
+               const auto max_entries = entries_per_part * 4 + 1;
+
+               /* push enough entries */
+               for (auto i = 0u; i < max_entries; ++i) {
+                 cb::list bl;
+
+                 *(int *)buf = i;
+                 bl.append(buf, sizeof(buf));
+
+                 f->push(bl, y);
+               }
+
+               auto info = f->meta();
+
+               ASSERT_EQ(info.id, fifo_id);
+               /* head should have advanced */
+               ASSERT_GT(info.head_part_num, 0);
+
+
+               /* list all at once */
+               auto [result, more] = f->list(max_entries, std::nullopt, y);
+               EXPECT_EQ(false, more);
+
+               ASSERT_EQ(max_entries, result.size());
+
+               for (auto i = 0u; i < max_entries; ++i) {
+                 auto& bl = result[i].data;
+                 ASSERT_EQ(i, *(int *)bl.c_str());
+               }
+
+               std::optional<std::string> marker;
+               /* get entries one by one */
+
+               for (auto i = 0u; i < max_entries; ++i) {
+                 auto [result, more] = f->list(1, marker, y);
+                 ASSERT_EQ(result.size(), 1);
+                 const bool expected_more = (i != (max_entries - 1));
+                 ASSERT_EQ(expected_more, more);
+
+                 std::uint32_t val;
+                 std::tie(val, marker) =
+                   decode_entry<std::uint32_t>(result.front());
+
+                 auto& entry = result.front();
+                 auto& bl = entry.data;
+                 ASSERT_EQ(i, *(int *)bl.c_str());
+                 marker = entry.marker;
+               }
+
+               /* trim one at a time */
+               marker.reset();
+               for (auto i = 0u; i < max_entries; ++i) {
+                 /* read single entry */
+                 {
+                   auto [result, more] = f->list(1, marker, y);
+                   ASSERT_EQ(result.size(), 1);
+                   const bool expected_more = (i != (max_entries - 1));
+                   ASSERT_EQ(expected_more, more);
+
+                   marker = result.front().marker;
+
+                   f->trim(*marker, y);
+                 }
+
+                 /* check tail */
+                 info = f->meta();
+                 ASSERT_EQ(info.tail_part_num, i / entries_per_part);
+
+                 /* try to read all again, see how many entries left */
+                 auto [result, more] = f->list(max_entries, marker, y);
+                 ASSERT_EQ(max_entries - i - 1, result.size());
+                 ASSERT_EQ(false, more);
+               }
+
+               /* tail now should point at head */
+               info = f->meta();
+               ASSERT_EQ(info.head_part_num, info.tail_part_num);
+
+               /* check old tails are removed */
+               for (auto i = 0; i < info.tail_part_num; ++i) {
+                 bs::error_code ec;
+                 f->get_part_info(i, y[ec]);
+                 ASSERT_EQ(bs::errc::no_such_file_or_directory, ec);
+               }
+               /* check current tail exists */
+               f->get_part_info(info.tail_part_num, y);
+             });
+  c.run();
+}
 
-  /* create */
-  ASSERT_EQ(0, fifo.open(true,
-                         ClsFIFO::MetaCreateParams()
-                         .max_part_size(max_part_size)
-                         .max_entry_size(max_entry_size)));
 
-  uint32_t part_header_size;
-  uint32_t part_entry_overhead;
+TEST(FIFO, TestTwoPushers) {
+  ba::io_context c;
+  auto fifo_id = "fifo"sv;
+  static constexpr auto max_part_size = 2048ull;
+  static constexpr auto max_entry_size = 128ull;
 
-  fifo.get_part_layout_info(&part_header_size, &part_entry_overhead);
+  s::spawn(c, [&](s::yield_context y) {
+               auto r = R::RADOS::Builder{}.build(c, y);
+               auto pool = create_pool(r, get_temp_pool_name(), y);
+               auto sg = make_scope_guard(
+                 [&] {
+                   r.delete_pool(pool, y);
+                 });
+               R::IOContext ioc(pool);
 
-  int entries_per_part = (max_part_size - part_header_size) / (max_entry_size + part_entry_overhead);
+               auto f = RCf::FIFO::create(r, ioc, fifo_id, y, std::nullopt,
+                                          std::nullopt, false, max_part_size,
+                                          max_entry_size);
 
-  int max_entries = entries_per_part * 4 + 1;
 
-  FIFO fifo2(cct(ioctx), fifo_id, &ioctx);
 
-  /* open second one */
-  ASSERT_EQ(0, fifo2.open(true,
-                         ClsFIFO::MetaCreateParams()));
+               char buf[max_entry_size];
+               memset(buf, 0, sizeof(buf));
 
-  vector<FIFO *> fifos(2);
-  fifos[0] = &fifo;
-  fifos[1] = &fifo2;
 
-  for (int i = 0; i < max_entries; ++i) {
-    bufferlist bl;
+               auto [part_header_size, part_entry_overhead] =
+                 f->get_part_layout_info();
 
-    *(int *)buf = i;
-    bl.append(buf, sizeof(buf));
+               const auto entries_per_part =
+                 (max_part_size - part_header_size) /
+                 (max_entry_size + part_entry_overhead);
 
-    auto& f = fifos[i % fifos.size()];
+               const auto max_entries = entries_per_part * 4 + 1;
 
-    ASSERT_EQ(0, f->push(bl));
-  }
+               auto f2 = RCf::FIFO::open(r, ioc, fifo_id, y);
 
-  /* list all by both */
-  vector<fifo_entry> result;
-  bool more;
-  ASSERT_EQ(0, fifo2.list(max_entries, string(), &result, &more));
+               std::vector fifos{&f, &f2};
 
-  ASSERT_EQ(false, more);
+               for (auto i = 0u; i < max_entries; ++i) {
+                 cb::list bl;
+                 *(int *)buf = i;
+                 bl.append(buf, sizeof(buf));
 
-  ASSERT_EQ(max_entries, result.size());
+                 auto& f = fifos[i % fifos.size()];
 
-  ASSERT_EQ(0, fifo.list(max_entries, string(), &result, &more));
-  ASSERT_EQ(false, more);
+                 (*f)->push(bl, y);
+               }
 
-  ASSERT_EQ(max_entries, result.size());
+               /* list all by both */
+               {
+                 auto [result, more] = f2->list(max_entries, std::nullopt, y);
 
-  for (int i = 0; i < max_entries; ++i) {
-    auto& bl = result[i].data;
-    ASSERT_EQ(i, *(int *)bl.c_str());
-  }
+                 ASSERT_EQ(false, more);
+                 ASSERT_EQ(max_entries, result.size());
+               }
+               auto [result, more] = f2->list(max_entries, std::nullopt, y);
+               ASSERT_EQ(false, more);
+               ASSERT_EQ(max_entries, result.size());
 
-  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+               for (auto i = 0u; i < max_entries; ++i) {
+                 auto& bl = result[i].data;
+                 ASSERT_EQ(i, *(int *)bl.c_str());
+               }
+             });
+  c.run();
 }
 
-TEST(FIFO, TestTwoPushersTrim) {
-  Rados cluster;
-  std::string pool_name = get_temp_pool_name();
-  ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
-  IoCtx ioctx;
-  cluster.ioctx_create(pool_name.c_str(), ioctx);
-
-  string fifo_id = "fifo";
 
-  FIFO fifo1(cct(ioctx), fifo_id, &ioctx);
-
-  uint64_t max_part_size = 2048;
-  uint64_t max_entry_size = 128;
-
-  char buf[max_entry_size];
-  memset(buf, 0, sizeof(buf));
-
-  /* create */
-  ASSERT_EQ(0, fifo1.open(true,
-                          ClsFIFO::MetaCreateParams()
-                          .max_part_size(max_part_size)
-                          .max_entry_size(max_entry_size)));
+TEST(FIFO, TestTwoPushersTrim) {
+  ba::io_context c;
+  auto fifo_id = "fifo"sv;
+  static constexpr auto max_part_size = 2048ull;
+  static constexpr auto max_entry_size = 128ull;
 
-  uint32_t part_header_size;
-  uint32_t part_entry_overhead;
+  s::spawn(c, [&](s::yield_context y) {
+               auto r = R::RADOS::Builder{}.build(c, y);
+               auto pool = create_pool(r, get_temp_pool_name(), y);
+               auto sg = make_scope_guard(
+                 [&] {
+                   r.delete_pool(pool, y);
+                 });
+               R::IOContext ioc(pool);
 
-  fifo1.get_part_layout_info(&part_header_size, &part_entry_overhead);
+               auto f1 = RCf::FIFO::create(r, ioc, fifo_id, y, std::nullopt,
+                                           std::nullopt, false, max_part_size,
+                                           max_entry_size);
 
-  int entries_per_part = (max_part_size - part_header_size) / (max_entry_size + part_entry_overhead);
+               char buf[max_entry_size];
+               memset(buf, 0, sizeof(buf));
 
-  int max_entries = entries_per_part * 4 + 1;
 
-  FIFO fifo2(cct(ioctx), fifo_id, &ioctx);
+               auto [part_header_size, part_entry_overhead] =
+                 f1->get_part_layout_info();
 
-  /* open second one */
-  ASSERT_EQ(0, fifo2.open(true,
-                         ClsFIFO::MetaCreateParams()));
+               const auto entries_per_part =
+                 (max_part_size - part_header_size) /
+                 (max_entry_size + part_entry_overhead);
 
-  /* push one entry to fifo2 and the rest to fifo1 */
+               const auto max_entries = entries_per_part * 4 + 1;
 
-  for (int i = 0; i < max_entries; ++i) {
-    bufferlist bl;
+               auto f2 = RCf::FIFO::open(r, ioc, fifo_id, y);
 
-    *(int *)buf = i;
-    bl.append(buf, sizeof(buf));
+               /* push one entry to f2 and the rest to f1 */
 
-    FIFO *f = (i < 1 ? &fifo2 : &fifo1);
+               for (auto i = 0u; i < max_entries; ++i) {
+                 cb::list bl;
 
-    ASSERT_EQ(0, f->push(bl));
-  }
+                 *(int *)buf = i;
+                 bl.append(buf, sizeof(buf));
 
-  /* trim half by fifo1 */
-  int num = max_entries / 2;
+                 auto f = (i < 1 ? &f2 : &f1);
+                 (*f)->push(bl, y);
+               }
 
-  vector<fifo_entry> result;
-  bool more;
-  ASSERT_EQ(0, fifo1.list(num, string(), &result, &more));
+               /* trim half by fifo1 */
+               auto num = max_entries / 2;
 
-  ASSERT_EQ(true, more);
-  ASSERT_EQ(num, result.size());
+               std::string marker;
+               {
+                 auto [result, more] = f1->list(num, std::nullopt, y);
 
-  for (int i = 0; i < num; ++i) {
-    auto& bl = result[i].data;
-    ASSERT_EQ(i, *(int *)bl.c_str());
-  }
+                 ASSERT_EQ(true, more);
+                 ASSERT_EQ(num, result.size());
 
-  auto& entry = result[num - 1];
-  auto& marker = entry.marker;
+                 for (auto i = 0u; i < num; ++i) {
+                   auto& bl = result[i].data;
+                   ASSERT_EQ(i, *(int *)bl.c_str());
+                 }
 
-  ASSERT_EQ(0, fifo1.trim(marker));
+                 auto& entry = result[num - 1];
+                 marker = entry.marker;
 
-  /* list what's left by fifo2 */
+                 f1->trim(marker, y);
 
-  int left = max_entries - num;
+                 /* list what's left by fifo2 */
 
-  ASSERT_EQ(0, fifo2.list(left, marker, &result, &more));
-  ASSERT_EQ(left, result.size());
-  ASSERT_EQ(false, more);
+               }
 
-  for (int i = num; i < max_entries; ++i) {
-    auto& bl = result[i - num].data;
-    ASSERT_EQ(i, *(int *)bl.c_str());
-  }
+               const auto left = max_entries - num;
+               auto [result, more] = f2->list(left, marker, y);
+               ASSERT_EQ(left, result.size());
+               ASSERT_EQ(false, more);
 
-  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+               for (auto i = num; i < max_entries; ++i) {
+                 auto& bl = result[i - num].data;
+                 ASSERT_EQ(i, *(int *)bl.c_str());
+               }
+             });
+  c.run();
 }
 
 TEST(FIFO, TestPushBatch) {
-  Rados cluster;
-  std::string pool_name = get_temp_pool_name();
-  ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
-  IoCtx ioctx;
-  cluster.ioctx_create(pool_name.c_str(), ioctx);
-
-  string fifo_id = "fifo";
-
-  FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
-  uint64_t max_part_size = 2048;
-  uint64_t max_entry_size = 128;
-
-  char buf[max_entry_size];
-  memset(buf, 0, sizeof(buf));
+  ba::io_context c;
+  auto fifo_id = "fifo"sv;
+  static constexpr auto max_part_size = 2048ull;
+  static constexpr auto max_entry_size = 128ull;
 
-  /* create */
-  ASSERT_EQ(0, fifo.open(true,
-                          ClsFIFO::MetaCreateParams()
-                          .max_part_size(max_part_size)
-                          .max_entry_size(max_entry_size)));
+  s::spawn(c, [&](s::yield_context y) {
+               auto r = R::RADOS::Builder{}.build(c, y);
+               auto pool = create_pool(r, get_temp_pool_name(), y);
+               auto sg = make_scope_guard(
+                 [&] {
+                   r.delete_pool(pool, y);
+                 });
+               R::IOContext ioc(pool);
 
-  uint32_t part_header_size;
-  uint32_t part_entry_overhead;
+               auto f = RCf::FIFO::create(r, ioc, fifo_id, y, std::nullopt,
+                                          std::nullopt, false, max_part_size,
+                                          max_entry_size);
 
-  fifo.get_part_layout_info(&part_header_size, &part_entry_overhead);
 
-  int entries_per_part = (max_part_size - part_header_size) / (max_entry_size + part_entry_overhead);
+               char buf[max_entry_size];
+               memset(buf, 0, sizeof(buf));
 
-  int max_entries = entries_per_part * 4 + 1; /* enough entries to span multiple parts */
+               auto [part_header_size, part_entry_overhead]
+                 = f->get_part_layout_info();
 
-  vector<bufferlist> bufs;
+               auto entries_per_part =
+                 (max_part_size - part_header_size) /
+                 (max_entry_size + part_entry_overhead);
 
-  for (int i = 0; i < max_entries; ++i) {
-    bufferlist bl;
+               auto max_entries = entries_per_part * 4 + 1; /* enough entries to span multiple parts */
 
-    *(int *)buf = i;
-    bl.append(buf, sizeof(buf));
+               std::vector<cb::list> bufs;
 
-    bufs.push_back(bl);
-  }
+               for (auto i = 0u; i < max_entries; ++i) {
+                 cb::list bl;
 
-  ASSERT_EQ(0, fifo.push(bufs));
+                 *(int *)buf = i;
+                 bl.append(buf, sizeof(buf));
 
-  /* list all */
+                 bufs.push_back(bl);
+               }
 
-  vector<fifo_entry> result;
-  bool more;
-  ASSERT_EQ(0, fifo.list(max_entries, string(), &result, &more));
+               f->push(bufs, y);
 
-  ASSERT_EQ(false, more);
-  ASSERT_EQ(max_entries, result.size());
+               /* list all */
 
-  for (int i = 0; i < max_entries; ++i) {
-    auto& bl = result[i].data;
-    ASSERT_EQ(i, *(int *)bl.c_str());
-  }
+               auto [result, more] = f->list(max_entries, std::nullopt, y);
+               ASSERT_EQ(false, more);
+               ASSERT_EQ(max_entries, result.size());
 
-  auto& info = fifo.get_meta();
-  ASSERT_EQ(info.head_part_num, 4);
+               for (auto i = 0u; i < max_entries; ++i) {
+                 auto& bl = result[i].data;
+                 ASSERT_EQ(i, *(int *)bl.c_str());
+               }
 
-  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+               auto& info = f->meta();
+               ASSERT_EQ(info.head_part_num, 4);
+             });
+  c.run();
 }