add_subdirectory(cmpomap)
# cls_fifo
-set(cls_fifo_srcs fifo/cls_fifo.cc fifo/cls_fifo_types.cc)
+set(cls_fifo_srcs fifo/cls_fifo.cc)
add_library(cls_fifo SHARED ${cls_fifo_srcs})
set_target_properties(cls_fifo PROPERTIES
VERSION "1.0.0"
INSTALL_RPATH ""
CXX_VISIBILITY_PRESET hidden)
install(TARGETS cls_fifo DESTINATION ${cls_dir})
-
-set(cls_fifo_client_srcs
- fifo/cls_fifo_client.cc
- fifo/cls_fifo_types.cc
- fifo/cls_fifo_ops.cc)
-add_library(cls_fifo_client STATIC ${cls_fifo_client_srcs})
*
*/
-#include <errno.h>
+#include <cerrno>
+#include <optional>
+#include <string>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
+#include "include/buffer.h"
+#include "include/types.h"
#include "objclass/objclass.h"
#include "cls/fifo/cls_fifo_ops.h"
#include "cls/fifo/cls_fifo_types.h"
-
-using namespace rados::cls::fifo;
-
-
CLS_VER(1,0)
CLS_NAME(fifo)
+namespace rados::cls::fifo {
-#define CLS_FIFO_MAX_PART_HEADER_SIZE 512
+static constexpr auto CLS_FIFO_MAX_PART_HEADER_SIZE = 512;
-static uint32_t part_entry_overhead;
+static std::uint32_t part_entry_overhead;
-struct cls_fifo_entry_header_pre {
+struct entry_header_pre {
__le64 magic;
__le64 pre_size;
__le64 header_size;
__le32 reserved;
} __attribute__ ((packed));
-struct cls_fifo_entry_header {
+struct entry_header {
ceph::real_time mtime;
- void encode(bufferlist &bl) const {
+ void encode(ceph::buffer::list& bl) const {
ENCODE_START(1, 1, bl);
encode(mtime, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::const_iterator &bl) {
+ void decode(ceph::buffer::list::const_iterator& bl) {
DECODE_START(1, bl);
decode(mtime, bl);
DECODE_FINISH(bl);
}
};
-WRITE_CLASS_ENCODER(cls_fifo_entry_header)
+WRITE_CLASS_ENCODER(entry_header)
+namespace {
-static string new_oid_prefix(string id, std::optional<string>& val)
+std::string new_oid_prefix(std::string id, std::optional<std::string>& val)
{
+ static constexpr auto PREFIX_RND_SIZE = 12;
if (val) {
return *val;
}
-#define PREFIX_RND_SIZE 12
-
char buf[PREFIX_RND_SIZE + 1];
buf[PREFIX_RND_SIZE] = 0;
cls_gen_rand_base64(buf, sizeof(buf) - 1);
- char s[id.size() + 1 + sizeof(buf) + 16];
- snprintf(s, sizeof(s), "%s.%s", id.c_str(), buf);
- return s;
+ return fmt::format("{}.{}", id, buf);
}
-static int write_header(cls_method_context_t hctx,
- fifo_info_t& header,
- bool inc_ver = true)
+int write_header(cls_method_context_t hctx,
+ info& header,
+ bool inc_ver = true)
{
- if (header.objv.instance.empty()) {
-#define HEADER_INSTANCE_SIZE 16
- char buf[HEADER_INSTANCE_SIZE + 1];
- buf[HEADER_INSTANCE_SIZE] = 0;
- cls_gen_rand_base64(buf, sizeof(buf) - 1);
-
- header.objv.instance = buf;
+ static constexpr auto HEADER_INSTANCE_SIZE = 16;
+ if (header.version.instance.empty()) {
+ char buf[HEADER_INSTANCE_SIZE + 1];
+ buf[HEADER_INSTANCE_SIZE] = 0;
+ cls_gen_rand_base64(buf, sizeof(buf) - 1);
+ header.version.instance = buf;
}
if (inc_ver) {
- ++header.objv.ver;
+ ++header.version.ver;
}
- bufferlist bl;
+ ceph::buffer::list bl;
encode(header, bl);
return cls_cxx_write_full(hctx, &bl);
}
-static int read_part_header(cls_method_context_t hctx,
- fifo_part_header_t *part_header)
+int read_part_header(cls_method_context_t hctx,
+ part_header* part_header)
{
- bufferlist bl;
- int r = cls_cxx_read2(hctx, 0, CLS_FIFO_MAX_PART_HEADER_SIZE, &bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
+ ceph::buffer::list bl;
+ int r = cls_cxx_read2(hctx, 0, CLS_FIFO_MAX_PART_HEADER_SIZE, &bl,
+ CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
if (r < 0) {
CLS_ERR("ERROR: %s(): cls_cxx_read2() on obj returned %d", __func__, r);
return r;
auto iter = bl.cbegin();
try {
decode(*part_header, iter);
- } catch (buffer::error& err) {
+ } catch (const ceph::buffer::error& err) {
CLS_ERR("ERROR: %s(): failed decoding part header", __func__);
return -EIO;
}
- CLS_LOG(20, "%s():%d read part_header:\n"
- "\ttag=%s\n"
- "\tmagic=0x%llx\n"
- "\tmin_ofs=%lld\n"
- "\tmax_ofs=%lld\n"
- "\tmin_index=%lld\n"
- "\tmax_index=%lld\n",
- __func__, __LINE__,
- part_header->tag.c_str(),
- (long long)part_header->magic,
- (long long)part_header->min_ofs,
- (long long)part_header->max_ofs,
- (long long)part_header->min_index,
- (long long)part_header->max_index);
+ using ceph::operator <<;
+ std::ostringstream ss;
+ ss << part_header->max_time;
+ CLS_LOG(10, "%s():%d read part_header:\n"
+ "\ttag=%s\n"
+ "\tmagic=0x%" PRIx64 "\n"
+ "\tmin_ofs=%" PRId64 "\n"
+ "\tlast_ofs=%" PRId64 "\n"
+ "\tnext_ofs=%" PRId64 "\n"
+ "\tmin_index=%" PRId64 "\n"
+ "\tmax_index=%" PRId64 "\n"
+ "\tmax_time=%s\n",
+ __func__, __LINE__,
+ part_header->tag.c_str(),
+ part_header->magic,
+ part_header->min_ofs,
+ part_header->last_ofs,
+ part_header->next_ofs,
+ part_header->min_index,
+ part_header->max_index,
+ ss.str().c_str());
return 0;
-
}
-static int write_part_header(cls_method_context_t hctx,
- fifo_part_header_t& part_header)
+int write_part_header(cls_method_context_t hctx,
+ part_header& part_header)
{
- bufferlist bl;
+ ceph::buffer::list bl;
encode(part_header, bl);
if (bl.length() > CLS_FIFO_MAX_PART_HEADER_SIZE) {
}
int r = cls_cxx_write2(hctx, 0, bl.length(),
- &bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
+ &bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
if (r < 0) {
CLS_LOG(10, "%s(): failed to write part header: r=%d",
__func__, r);
return 0;
}
-static int read_header(cls_method_context_t hctx,
- std::optional<fifo_objv_t> objv,
- fifo_info_t *info)
+int read_header(cls_method_context_t hctx,
+ std::optional<objv> objv,
+ info* info)
{
- uint64_t size;
+ std::uint64_t size;
int r = cls_cxx_stat2(hctx, &size, nullptr);
if (r < 0) {
return r;
}
- bufferlist bl;
+ ceph::buffer::list bl;
r = cls_cxx_read2(hctx, 0, size, &bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
if (r < 0) {
CLS_ERR("ERROR: %s(): cls_cxx_read2() on obj returned %d", __func__, r);
return r;
}
+ if (r == 0) {
+ CLS_ERR("ERROR: %s(): Zero length object, returning ENODATA", __func__);
+ return -ENODATA;
+ }
+
try {
auto iter = bl.cbegin();
decode(*info, iter);
- } catch (buffer::error& err) {
+ } catch (const ceph::buffer::error& err) {
CLS_ERR("ERROR: %s(): failed decoding header", __func__);
return -EIO;
}
- if (objv &&
- !(info->objv == *objv)) {
- string s1 = info->objv.to_str();
- string s2 = objv->to_str();
- CLS_LOG(10, "%s(): version mismatch (header=%s, req=%s), cancelled operation", __func__, s1.c_str(), s2.c_str());
+ if (objv && !(info->version== *objv)) {
+ auto s1 = info->version.to_str();
+ auto s2 = objv->to_str();
+ CLS_LOG(10, "%s(): version mismatch (header=%s, req=%s), canceled operation",
+ __func__, s1.c_str(), s2.c_str());
return -ECANCELED;
}
return 0;
}
-static int fifo_meta_create_op(cls_method_context_t hctx,
- bufferlist *in, bufferlist *out)
+int create_meta(cls_method_context_t hctx,
+ ceph::buffer::list* in, ceph::buffer::list* out)
{
- CLS_LOG(20, "%s", __func__);
+ CLS_LOG(10, "%s", __func__);
- cls_fifo_meta_create_op op;
+ op::create_meta op;
try {
auto iter = in->cbegin();
decode(op, iter);
- } catch (const buffer::error &err) {
+ } catch (const ceph::buffer::error& err) {
CLS_ERR("ERROR: %s(): failed to decode request", __func__);
return -EINVAL;
}
- uint64_t size;
+ if (op.id.empty()) {
+ CLS_LOG(10, "%s(): ID cannot be empty", __func__);
+ return -EINVAL;
+ }
+
+ if (op.max_part_size == 0 ||
+ op.max_entry_size == 0 ||
+ op.max_entry_size > op.max_part_size) {
+ CLS_ERR("ERROR: %s(): invalid dimensions.", __func__);
+ return -EINVAL;
+ }
+
+ std::uint64_t size;
int r = cls_cxx_stat2(hctx, &size, nullptr);
if (r < 0 && r != -ENOENT) {
}
if (r == 0) {
- bufferlist bl;
+ ceph::buffer::list bl;
r = cls_cxx_read2(hctx, 0, size, &bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
if (r < 0) {
CLS_ERR("ERROR: %s(): cls_cxx_read2() on obj returned %d", __func__, r);
return r;
}
- fifo_info_t header;
+ info header;
try {
auto iter = bl.cbegin();
decode(header, iter);
- } catch (buffer::error& err) {
+ } catch (const ceph::buffer::error& err) {
CLS_ERR("ERROR: %s(): failed decoding header", __func__);
return -EIO;
}
if (!(header.id == op.id &&
(!op.oid_prefix ||
header.oid_prefix == *op.oid_prefix) &&
- (!op.objv ||
- header.objv == *op.objv))) {
- CLS_LOG(10, "%s(): failed to re-create existing queue with different params", __func__);
+ (!op.version ||
+ header.version == *op.version))) {
+ CLS_LOG(10, "%s(): failed to re-create existing queue "
+ "with different params", __func__);
return -EEXIST;
}
return 0; /* already exists */
}
- fifo_info_t header;
-
+ info header;
+
header.id = op.id;
- if (op.objv) {
- header.objv = *op.objv;
+ if (op.version) {
+ header.version = *op.version;
} else {
-#define DEFAULT_INSTANCE_SIZE 16
+ static constexpr auto DEFAULT_INSTANCE_SIZE = 16;
char buf[DEFAULT_INSTANCE_SIZE + 1];
cls_gen_rand_base64(buf, sizeof(buf));
buf[DEFAULT_INSTANCE_SIZE] = '\0';
- header.objv.instance = buf;
- header.objv.ver = 1;
+ header.version.instance = buf;
+ header.version.ver = 1;
}
header.oid_prefix = new_oid_prefix(op.id, op.oid_prefix);
- header.data_params.max_part_size = op.max_part_size;
- header.data_params.max_entry_size = op.max_entry_size;
- header.data_params.full_size_threshold = op.max_part_size - op.max_entry_size - part_entry_overhead;
+ header.params.max_part_size = op.max_part_size;
+ header.params.max_entry_size = op.max_entry_size;
+ header.params.full_size_threshold = op.max_part_size - op.max_entry_size - part_entry_overhead;
r = write_header(hctx, header, false);
if (r < 0) {
return 0;
}
-static int fifo_meta_update_op(cls_method_context_t hctx,
- bufferlist *in, bufferlist *out)
+int update_meta(cls_method_context_t hctx, ceph::buffer::list* in,
+ ceph::buffer::list* out)
{
- CLS_LOG(20, "%s", __func__);
+ CLS_LOG(10, "%s", __func__);
- cls_fifo_meta_update_op op;
+ op::update_meta op;
try {
auto iter = in->cbegin();
decode(op, iter);
- } catch (const buffer::error &err) {
+ } catch (const ceph::buffer::error& err) {
CLS_ERR("ERROR: %s(): failed to decode request", __func__);
return -EINVAL;
}
- fifo_info_t header;
-
- int r = read_header(hctx, op.objv, &header);
- if (r < 0) {
- return r;
+ if (op.version.empty()) {
+ CLS_LOG(10, "%s(): no version supplied", __func__);
+ return -EINVAL;
}
- string err;
+ info header;
- r = header.apply_update(op.tail_part_num,
- op.head_part_num,
- op.min_push_part_num,
- op.max_push_part_num,
- op.journal_entries_add,
- op.journal_entries_rm,
- &err);
+ int r = read_header(hctx, op.version, &header);
if (r < 0) {
- CLS_LOG(10, "%s(): %s", __func__, err.c_str());
return r;
}
+ auto err = header.apply_update(fifo::update()
+ .tail_part_num(op.tail_part_num)
+ .head_part_num(op.head_part_num)
+ .min_push_part_num(op.min_push_part_num)
+ .max_push_part_num(op.max_push_part_num)
+ .journal_entries_add(
+ std::move(op.journal_entries_add))
+ .journal_entries_rm(
+ std::move(op.journal_entries_rm)));
+ if (err) {
+ CLS_LOG(10, "%s(): %s", __func__, err->c_str());
+ return -EINVAL;
+ }
+
r = write_header(hctx, header);
if (r < 0) {
CLS_LOG(10, "%s(): failed to write header: r=%d", __func__, r);
return 0;
}
-static int fifo_meta_get_op(cls_method_context_t hctx,
- bufferlist *in, bufferlist *out)
+int get_meta(cls_method_context_t hctx, ceph::buffer::list* in,
+ ceph::buffer::list* out)
{
- CLS_LOG(20, "%s", __func__);
+ CLS_LOG(10, "%s", __func__);
- cls_fifo_meta_get_op op;
+ op::get_meta op;
try {
auto iter = in->cbegin();
decode(op, iter);
- } catch (const buffer::error &err) {
+ } catch (const ceph::buffer::error &err) {
CLS_ERR("ERROR: %s(): failed to decode request", __func__);
return -EINVAL;
}
- cls_fifo_meta_get_op_reply reply;
- int r = read_header(hctx, op.objv, &reply.info);
+ op::get_meta_reply reply;
+ int r = read_header(hctx, op.version, &reply.info);
if (r < 0) {
return r;
}
return 0;
}
-static int fifo_part_init_op(cls_method_context_t hctx,
- bufferlist *in, bufferlist *out)
+int init_part(cls_method_context_t hctx, ceph::buffer::list* in,
+ ceph::buffer::list *out)
{
- CLS_LOG(20, "%s", __func__);
+ CLS_LOG(10, "%s", __func__);
- cls_fifo_part_init_op op;
+ op::init_part op;
try {
auto iter = in->cbegin();
decode(op, iter);
- } catch (const buffer::error &err) {
+ } catch (const ceph::buffer::error &err) {
CLS_ERR("ERROR: %s(): failed to decode request", __func__);
return -EINVAL;
}
- uint64_t size;
+ std::uint64_t size;
+
+ if (op.tag.empty()) {
+ CLS_LOG(10, "%s(): tag required", __func__);
+ return -EINVAL;
+ }
int r = cls_cxx_stat2(hctx, &size, nullptr);
if (r < 0 && r != -ENOENT) {
return r;
}
if (r == 0 && size > 0) {
- fifo_part_header_t part_header;
+ part_header part_header;
r = read_part_header(hctx, &part_header);
if (r < 0) {
CLS_LOG(10, "%s(): failed to read part header", __func__);
}
if (!(part_header.tag == op.tag &&
- part_header.params == op.data_params)) {
- CLS_LOG(10, "%s(): failed to re-create existing part with different params", __func__);
+ part_header.params == op.params)) {
+ CLS_LOG(10, "%s(): failed to re-create existing part with different "
+ "params", __func__);
return -EEXIST;
}
return 0; /* already exists */
}
- fifo_part_header_t part_header;
-
+ part_header part_header;
+
part_header.tag = op.tag;
- part_header.params = op.data_params;
+ part_header.params = op.params;
part_header.min_ofs = CLS_FIFO_MAX_PART_HEADER_SIZE;
- part_header.max_ofs = part_header.min_ofs;
+ part_header.last_ofs = 0;
+ part_header.next_ofs = part_header.min_ofs;
+ part_header.max_time = ceph::real_clock::now();
- cls_gen_random_bytes((char *)&part_header.magic, sizeof(part_header.magic));
+ cls_gen_random_bytes(reinterpret_cast<char *>(&part_header.magic),
+ sizeof(part_header.magic));
r = write_part_header(hctx, part_header);
if (r < 0) {
return 0;
}
-static bool full_part(const fifo_part_header_t& part_header)
+bool full_part(const part_header& part_header)
{
- return (part_header.max_ofs > part_header.params.full_size_threshold);
+ return (part_header.next_ofs > part_header.params.full_size_threshold);
}
-static int fifo_part_push_op(cls_method_context_t hctx,
- bufferlist *in, bufferlist *out)
+int push_part(cls_method_context_t hctx, ceph::buffer::list* in,
+ ceph::buffer::list* out)
{
- CLS_LOG(20, "%s", __func__);
+ CLS_LOG(10, "%s", __func__);
- cls_fifo_part_push_op op;
+ op::push_part op;
try {
auto iter = in->cbegin();
decode(op, iter);
- } catch (const buffer::error &err) {
+ } catch (const ceph::buffer::error& err) {
CLS_ERR("ERROR: %s(): failed to decode request", __func__);
return -EINVAL;
}
- fifo_part_header_t part_header;
+ if (op.tag.empty()) {
+ CLS_LOG(10, "%s(): tag required", __func__);
+ return -EINVAL;
+ }
+
+ part_header part_header;
int r = read_part_header(hctx, &part_header);
if (r < 0) {
CLS_LOG(10, "%s(): failed to read part header", __func__);
return -EINVAL;
}
- uint64_t effective_len = op.total_len + op.data_bufs.size() * part_entry_overhead;
+ std::uint64_t effective_len = op.total_len + op.data_bufs.size() *
+ part_entry_overhead;
- if (effective_len > part_header.params.max_entry_size + part_entry_overhead) {
+ if (effective_len > part_header.params.max_part_size) {
return -EINVAL;
}
return -ERANGE;
}
- struct cls_fifo_entry_header entry_header;
- entry_header.mtime = real_clock::now();
-
- bufferlist entry_header_bl;
+ auto now = ceph::real_clock::now();
+ struct entry_header entry_header = { now };
+ ceph::buffer::list entry_header_bl;
encode(entry_header, entry_header_bl);
auto max_index = part_header.max_index;
- auto ofs = part_header.max_ofs;
+ const auto write_ofs = part_header.next_ofs;
+ auto ofs = part_header.next_ofs;
- cls_fifo_entry_header_pre pre_header;
+ entry_header_pre pre_header;
pre_header.magic = part_header.magic;
pre_header.pre_size = sizeof(pre_header);
pre_header.reserved = 0;
- uint64_t total_data = 0;
-
+ std::uint64_t total_data = 0;
for (auto& data : op.data_bufs) {
total_data += data.length();
+ }
+ if (total_data != op.total_len) {
+ CLS_LOG(10, "%s(): length mismatch: op.total_len=%" PRId64
+ " total data received=%" PRId64,
+ __func__, op.total_len, total_data);
+ return -EINVAL;
+ }
+
+
+ int entries_pushed = 0;
+ ceph::buffer::list all_data;
+ for (auto& data : op.data_bufs) {
+ if (full_part(part_header))
+ break;
pre_header.header_size = entry_header_bl.length();
pre_header.data_size = data.length();
pre_header.index = max_index;
- bufferptr pre((char *)&pre_header, sizeof(pre_header));
- bufferlist all_data;
+ bufferptr pre(reinterpret_cast<char*>(&pre_header), sizeof(pre_header));
+ auto entry_write_len = pre.length() + entry_header_bl.length() + data.length();
all_data.append(pre);
all_data.append(entry_header_bl);
all_data.claim_append(data);
- auto write_len = all_data.length();
-
- r = cls_cxx_write2(hctx, ofs, write_len,
- &all_data, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
- if (r < 0) {
- CLS_LOG(10, "%s(): failed to write entry (ofs=%lld len=%lld): r=%d",
- __func__, (long long)part_header.max_ofs, (long long)write_len, r);
- return r;
- }
-
- ofs += write_len;
+ part_header.last_ofs = ofs;
+ ofs += entry_write_len;
++max_index;
+ ++entries_pushed;
+ part_header.max_index = max_index;
+ part_header.next_ofs = ofs;
}
+ part_header.max_time = now;
- if (total_data != op.total_len) {
- CLS_LOG(10, "%s(): length mismatch: op.total_len=%lld total data received=%lld",
- __func__, (long long)op.total_len, (long long)total_data);
- return -EINVAL;
+ auto write_len = all_data.length();
+
+ r = cls_cxx_write2(hctx, write_ofs, write_len,
+ &all_data, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
+
+ if (r < 0) {
+ CLS_LOG(10,"%s(): failed to write entries (ofs=%" PRIu64
+ " len=%u): r=%d", __func__, write_ofs,
+ write_len, r);
+ return r;
}
- part_header.max_index = max_index;
- part_header.max_ofs = ofs;
r = write_part_header(hctx, part_header);
if (r < 0) {
return r;
}
- return 0;
+ if (entries_pushed == 0) {
+ CLS_LOG(0, "%s(): pushed no entries? Can't happen!", __func__);
+ return -EFAULT;
+ }
+
+ return entries_pushed;
}
class EntryReader {
- static constexpr uint64_t prefetch_len = (128 * 1024);
+ static constexpr std::uint64_t prefetch_len = (128 * 1024);
cls_method_context_t hctx;
- fifo_part_header_t& part_header;
+ const fifo::part_header& part_header;
- uint64_t ofs;
- bufferlist data;
+ std::uint64_t ofs;
+ ceph::buffer::list data;
- int fetch(uint64_t num_bytes);
- int read(uint64_t num_bytes, bufferlist *pbl);
- int peek(uint64_t num_bytes, char *dest);
- int seek(uint64_t num_bytes);
+ int fetch(std::uint64_t num_bytes);
+ int read(std::uint64_t num_bytes, ceph::buffer::list* pbl);
+ int peek(std::uint64_t num_bytes, char *dest);
+ int seek(std::uint64_t num_bytes);
public:
- EntryReader(cls_method_context_t _hctx,
- fifo_part_header_t& _part_header,
- uint64_t _ofs) : hctx(_hctx),
- part_header(_part_header),
- ofs(_ofs) {
- if (ofs < part_header.min_ofs) {
- ofs = part_header.min_ofs;
- }
- }
-
- uint64_t get_ofs() const {
+ EntryReader(cls_method_context_t hctx,
+ const fifo::part_header& part_header,
+ uint64_t ofs) : hctx(hctx),
+ part_header(part_header),
+ ofs(ofs < part_header.min_ofs ?
+ part_header.min_ofs :
+ ofs) {}
+
+ std::uint64_t get_ofs() const {
return ofs;
}
bool end() const {
- return (ofs >= part_header.max_ofs);
+ return (ofs >= part_header.next_ofs);
}
- int peek_pre_header(cls_fifo_entry_header_pre *pre_header);
- int get_next_entry(bufferlist *pbl,
- uint64_t *pofs,
- ceph::real_time *pmtime);
+ int peek_pre_header(entry_header_pre* pre_header);
+ int get_next_entry(ceph::buffer::list* pbl,
+ std::uint64_t* pofs,
+ ceph::real_time* pmtime);
};
-int EntryReader::fetch(uint64_t num_bytes)
+int EntryReader::fetch(std::uint64_t num_bytes)
{
- CLS_LOG(20, "%s(): fetch %d bytes, ofs=%d data.length()=%d", __func__, (int)num_bytes, (int)ofs, (int)data.length());
+ CLS_LOG(10, "%s(): fetch %d bytes, ofs=%d data.length()=%d", __func__, (int)num_bytes, (int)ofs, (int)data.length());
if (data.length() < num_bytes) {
- bufferlist bl;
- CLS_LOG(20, "%s(): reading %d bytes at ofs=%d", __func__, (int)prefetch_len, (int)ofs + data.length());
+ ceph::buffer::list bl;
+ CLS_LOG(10, "%s(): reading % " PRId64 " bytes at ofs=%" PRId64, __func__,
+ prefetch_len, ofs + data.length());
int r = cls_cxx_read2(hctx, ofs + data.length(), prefetch_len, &bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
if (r < 0) {
CLS_ERR("ERROR: %s(): cls_cxx_read2() on obj returned %d", __func__, r);
data.claim_append(bl);
}
- if ((unsigned)num_bytes > data.length()) {
- CLS_LOG(20, "%s(): requested %lld bytes, but only %lld were available", __func__, (long long)num_bytes, (long long)data.length());
+ if (static_cast<unsigned>(num_bytes) > data.length()) {
+ CLS_LOG(10, "%s(): requested %" PRId64 " bytes, but only "
+ "%u were available", __func__, num_bytes, data.length());
return -ERANGE;
}
return 0;
}
-int EntryReader::read(uint64_t num_bytes, bufferlist *pbl)
+int EntryReader::read(std::uint64_t num_bytes, ceph::buffer::list* pbl)
{
int r = fetch(num_bytes);
if (r < 0) {
return 0;
}
-int EntryReader::peek(uint64_t num_bytes, char *dest)
+int EntryReader::peek(std::uint64_t num_bytes, char* dest)
{
int r = fetch(num_bytes);
if (r < 0) {
return 0;
}
-int EntryReader::seek(uint64_t num_bytes)
+int EntryReader::seek(std::uint64_t num_bytes)
{
- bufferlist bl;
+ ceph::buffer::list bl;
- CLS_LOG(20, "%s():%d: num_bytes=%d", __func__, __LINE__, (int)num_bytes);
+ CLS_LOG(10, "%s():%d: num_bytes=%" PRIu64, __func__, __LINE__, num_bytes);
return read(num_bytes, &bl);
}
-int EntryReader::peek_pre_header(cls_fifo_entry_header_pre *pre_header)
+int EntryReader::peek_pre_header(entry_header_pre* pre_header)
{
if (end()) {
return -ENOENT;
}
- int r = peek(sizeof(*pre_header), (char *)pre_header);
+ int r = peek(sizeof(*pre_header),
+ reinterpret_cast<char*>(pre_header));
if (r < 0) {
- CLS_ERR("ERROR: %s(): peek() size=%d failed: r=%d", __func__, (int)sizeof(pre_header), r);
+ CLS_ERR("ERROR: %s(): peek() size=%zu failed: r=%d", __func__,
+ sizeof(pre_header), r);
return r;
}
}
-int EntryReader::get_next_entry(bufferlist *pbl,
- uint64_t *pofs,
- ceph::real_time *pmtime)
+int EntryReader::get_next_entry(ceph::buffer::list* pbl,
+ std::uint64_t* pofs,
+ ceph::real_time* pmtime)
{
- cls_fifo_entry_header_pre pre_header;
+ entry_header_pre pre_header;
int r = peek_pre_header(&pre_header);
if (r < 0) {
CLS_ERR("ERROR: %s(): peek_pre_header() failed: r=%d", __func__, r);
*pofs = ofs;
}
- CLS_LOG(20, "%s():%d: pre_header.pre_size=%d", __func__, __LINE__, (int)pre_header.pre_size);
+ CLS_LOG(10, "%s():%d: pre_header.pre_size=%llu", __func__, __LINE__,
+ pre_header.pre_size);
r = seek(pre_header.pre_size);
if (r < 0) {
CLS_ERR("ERROR: %s(): failed to seek: r=%d", __func__, r);
return r;
}
- bufferlist header;
- CLS_LOG(20, "%s():%d: pre_header.header_size=%d", __func__, __LINE__, (int)pre_header.header_size);
+ ceph::buffer::list header;
+ CLS_LOG(10, "%s():%d: pre_header.header_size=%d", __func__, __LINE__, (int)pre_header.header_size);
r = read(pre_header.header_size, &header);
if (r < 0) {
CLS_ERR("ERROR: %s(): failed to read entry header: r=%d", __func__, r);
return r;
}
- cls_fifo_entry_header entry_header;
+ entry_header entry_header;
auto iter = header.cbegin();
try {
decode(entry_header, iter);
- } catch (buffer::error& err) {
+ } catch (ceph::buffer::error& err) {
CLS_ERR("%s(): failed decoding entry header", __func__);
return -EIO;
}
return 0;
}
-static int fifo_part_trim_op(cls_method_context_t hctx,
- bufferlist *in, bufferlist *out)
+int trim_part(cls_method_context_t hctx,
+ ceph::buffer::list *in, ceph::buffer::list *out)
{
- CLS_LOG(20, "%s", __func__);
+ CLS_LOG(10, "%s", __func__);
- cls_fifo_part_trim_op op;
+ op::trim_part op;
try {
auto iter = in->cbegin();
decode(op, iter);
- } catch (const buffer::error &err) {
+ } catch (const ceph::buffer::error &err) {
CLS_ERR("ERROR: %s(): failed to decode request", __func__);
return -EINVAL;
}
- fifo_part_header_t part_header;
+ part_header part_header;
int r = read_part_header(hctx, &part_header);
if (r < 0) {
CLS_LOG(10, "%s(): failed to read part header", __func__);
return 0;
}
- if (op.ofs >= part_header.max_ofs) {
+ if (op.ofs >= part_header.next_ofs) {
if (full_part(part_header)) {
/*
* trim full part completely: remove object
r = cls_cxx_remove(hctx);
if (r < 0) {
CLS_LOG(0, "%s(): ERROR: cls_cxx_remove() returned r=%d", __func__, r);
- return r;
+ return r;
}
return 0;
}
-
- part_header.min_ofs = part_header.max_ofs;
+
+ part_header.min_ofs = part_header.next_ofs;
part_header.min_index = part_header.max_index;
} else {
EntryReader reader(hctx, part_header, op.ofs);
- cls_fifo_entry_header_pre pre_header;
+ entry_header_pre pre_header;
int r = reader.peek_pre_header(&pre_header);
if (r < 0) {
return r;
r = reader.get_next_entry(nullptr, nullptr, nullptr);
if (r < 0) {
- CLS_ERR("ERROR: %s(): unexpected failure at get_next_entry(): r=%d", __func__, r);
+ CLS_ERR("ERROR: %s(): unexpected failure at get_next_entry(): r=%d",
+ __func__, r);
return r;
}
return 0;
}
-static int fifo_part_list_op(cls_method_context_t hctx,
- bufferlist *in, bufferlist *out)
+int list_part(cls_method_context_t hctx, ceph::buffer::list* in,
+ ceph::buffer::list* out)
{
- CLS_LOG(20, "%s", __func__);
+ CLS_LOG(10, "%s", __func__);
- cls_fifo_part_list_op op;
+ op::list_part op;
try {
auto iter = in->cbegin();
decode(op, iter);
return -EINVAL;
}
- fifo_part_header_t part_header;
+ part_header part_header;
int r = read_part_header(hctx, &part_header);
if (r < 0) {
CLS_LOG(10, "%s(): failed to read part header", __func__);
}
}
- cls_fifo_part_list_op_reply reply;
+ op::list_part_reply reply;
reply.tag = part_header.tag;
-#define LIST_MAX_ENTRIES 512
-
- auto max_entries = std::min(op.max_entries, (int)LIST_MAX_ENTRIES);
+ auto max_entries = std::min(op.max_entries, op::MAX_LIST_ENTRIES);
for (int i = 0; i < max_entries && !reader.end(); ++i) {
- bufferlist data;
+ ceph::buffer::list data;
ceph::real_time mtime;
- uint64_t ofs;
+ std::uint64_t ofs;
r = reader.get_next_entry(&data, &ofs, &mtime);
if (r < 0) {
- CLS_ERR("ERROR: %s(): unexpected failure at get_next_entry(): r=%d", __func__, r);
+ CLS_ERR("ERROR: %s(): unexpected failure at get_next_entry(): r=%d",
+ __func__, r);
return r;
}
return 0;
}
-static int fifo_part_get_info_op(cls_method_context_t hctx,
- bufferlist *in, bufferlist *out)
+int get_part_info(cls_method_context_t hctx, ceph::buffer::list *in,
+ ceph::buffer::list *out)
{
- CLS_LOG(20, "%s", __func__);
+ CLS_LOG(10, "%s", __func__);
- cls_fifo_part_get_info_op op;
+ op::get_part_info op;
try {
auto iter = in->cbegin();
decode(op, iter);
- } catch (const buffer::error &err) {
+ } catch (const ceph::buffer::error &err) {
CLS_ERR("ERROR: %s(): failed to decode request", __func__);
return -EINVAL;
}
- cls_fifo_part_get_info_op_reply reply;
+ op::get_part_info_reply reply;
int r = read_part_header(hctx, &reply.header);
if (r < 0) {
return 0;
}
+}
+} // namespace rados::cls::fifo
CLS_INIT(fifo)
{
- CLS_LOG(20, "Loaded fifo class!");
+ using namespace rados::cls::fifo;
+ CLS_LOG(10, "Loaded fifo class!");
cls_handle_t h_class;
- cls_method_handle_t h_fifo_meta_create_op;
- cls_method_handle_t h_fifo_meta_get_op;
- cls_method_handle_t h_fifo_meta_update_op;
- cls_method_handle_t h_fifo_part_init_op;
- cls_method_handle_t h_fifo_part_push_op;
- cls_method_handle_t h_fifo_part_trim_op;
- cls_method_handle_t h_fifo_part_list_op;
- cls_method_handle_t h_fifo_part_get_info_op;
-
- cls_register("fifo", &h_class);
- cls_register_cxx_method(h_class, "fifo_meta_create",
+ cls_method_handle_t h_create_meta;
+ cls_method_handle_t h_get_meta;
+ cls_method_handle_t h_update_meta;
+ cls_method_handle_t h_init_part;
+ cls_method_handle_t h_push_part;
+ cls_method_handle_t h_trim_part;
+ cls_method_handle_t h_list_part;
+ cls_method_handle_t h_get_part_info;
+
+ cls_register(op::CLASS, &h_class);
+ cls_register_cxx_method(h_class, op::CREATE_META,
CLS_METHOD_RD | CLS_METHOD_WR,
- fifo_meta_create_op, &h_fifo_meta_create_op);
+ create_meta, &h_create_meta);
- cls_register_cxx_method(h_class, "fifo_meta_get",
+ cls_register_cxx_method(h_class, op::GET_META,
CLS_METHOD_RD,
- fifo_meta_get_op, &h_fifo_meta_get_op);
+ get_meta, &h_get_meta);
- cls_register_cxx_method(h_class, "fifo_meta_update",
+ cls_register_cxx_method(h_class, op::UPDATE_META,
CLS_METHOD_RD | CLS_METHOD_WR,
- fifo_meta_update_op, &h_fifo_meta_update_op);
+ update_meta, &h_update_meta);
- cls_register_cxx_method(h_class, "fifo_part_init",
+ cls_register_cxx_method(h_class, op::INIT_PART,
CLS_METHOD_RD | CLS_METHOD_WR,
- fifo_part_init_op, &h_fifo_part_init_op);
+ init_part, &h_init_part);
- cls_register_cxx_method(h_class, "fifo_part_push",
+ cls_register_cxx_method(h_class, op::PUSH_PART,
CLS_METHOD_RD | CLS_METHOD_WR,
- fifo_part_push_op, &h_fifo_part_push_op);
+ push_part, &h_push_part);
- cls_register_cxx_method(h_class, "fifo_part_trim",
+ cls_register_cxx_method(h_class, op::TRIM_PART,
CLS_METHOD_RD | CLS_METHOD_WR,
- fifo_part_trim_op, &h_fifo_part_trim_op);
+ trim_part, &h_trim_part);
- cls_register_cxx_method(h_class, "fifo_part_list",
+ cls_register_cxx_method(h_class, op::LIST_PART,
CLS_METHOD_RD,
- fifo_part_list_op, &h_fifo_part_list_op);
+ list_part, &h_list_part);
- cls_register_cxx_method(h_class, "fifo_part_get_info",
+ cls_register_cxx_method(h_class, op::GET_PART_INFO,
CLS_METHOD_RD,
- fifo_part_get_info_op, &h_fifo_part_get_info_op);
+ get_part_info, &h_get_part_info);
/* calculate entry overhead */
- struct cls_fifo_entry_header entry_header;
- bufferlist entry_header_bl;
+ struct entry_header entry_header;
+ ceph::buffer::list entry_header_bl;
encode(entry_header, entry_header_bl);
- part_entry_overhead = sizeof(cls_fifo_entry_header_pre) + entry_header_bl.length();
+ part_entry_overhead = sizeof(entry_header_pre) + entry_header_bl.length();
return;
}
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2019 Red Hat, Inc.
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-#include "include/rados/librados.hpp"
-#include "common/dout.h"
-
-#include "auth/Crypto.h"
-
-using namespace librados;
-
-#include "cls/fifo/cls_fifo_ops.h"
-#include "cls/fifo/cls_fifo_client.h"
-
-
-#define dout_subsys ceph_subsys_objclass
-
-
-namespace rados {
- namespace cls {
- namespace fifo {
- int ClsFIFO::meta_create(librados::ObjectWriteOperation *rados_op,
- const string& id,
- const MetaCreateParams& params) {
- cls_fifo_meta_create_op op;
-
- auto& state = params.state;
-
- if (id.empty()) {
- return -EINVAL;
- }
-
- op.id = id;
- op.objv = state.objv;
- op.oid_prefix = state.oid_prefix;
- op.max_part_size = state.max_part_size;
- op.max_entry_size = state.max_entry_size;
- op.exclusive = state.exclusive;
-
- if (op.max_part_size == 0 ||
- op.max_entry_size == 0 ||
- op.max_entry_size > op.max_part_size) {
- return -EINVAL;
- }
-
- bufferlist in;
- encode(op, in);
- rados_op->exec("fifo", "fifo_meta_create", in);
-
- return 0;
- }
-
- int ClsFIFO::meta_get(librados::IoCtx& ioctx,
- const string& oid,
- const MetaGetParams& params,
- fifo_info_t *result,
- uint32_t *part_header_size,
- uint32_t *part_entry_overhead) {
- cls_fifo_meta_get_op op;
-
- auto& state = params.state;
-
- op.objv = state.objv;
-
- librados::ObjectReadOperation rop;
-
- bufferlist in;
- bufferlist out;
- int op_ret;
- encode(op, in);
- rop.exec("fifo", "fifo_meta_get", in, &out, &op_ret);
-
- int r = ioctx.operate(oid, &rop, nullptr);
- if (r < 0) {
- return r;
- }
-
- if (op_ret < 0) {
- return op_ret;
- }
-
- cls_fifo_meta_get_op_reply reply;
- auto iter = out.cbegin();
- try {
- decode(reply, iter);
- } catch (buffer::error& err) {
- return -EIO;
- }
-
- *result = reply.info;
-
- if (part_header_size) {
- *part_header_size = reply.part_header_size;
- }
-
- if (part_entry_overhead) {
- *part_entry_overhead = reply.part_entry_overhead;
- }
-
- return 0;
- }
-
- int ClsFIFO::meta_update(librados::ObjectWriteOperation *rados_op,
- const MetaUpdateParams& params) {
- cls_fifo_meta_update_op op;
-
- auto& state = params.state;
-
- if (state.objv.empty()) {
- return -EINVAL;
- }
-
- op.objv = state.objv;
- op.tail_part_num = state.tail_part_num;
- op.head_part_num = state.head_part_num;
- op.min_push_part_num = state.min_push_part_num;
- op.max_push_part_num = state.max_push_part_num;
- op.journal_entries_add = state.journal_entries_add;
- op.journal_entries_rm = state.journal_entries_rm;
-
- bufferlist in;
- encode(op, in);
- rados_op->exec("fifo", "fifo_meta_update", in);
-
- return 0;
- }
-
- int ClsFIFO::part_init(librados::ObjectWriteOperation *rados_op,
- const PartInitParams& params) {
- cls_fifo_part_init_op op;
-
- auto& state = params.state;
-
- if (state.tag.empty()) {
- return -EINVAL;
- }
-
- op.tag = state.tag;
- op.data_params = state.data_params;
-
- bufferlist in;
- encode(op, in);
- rados_op->exec("fifo", "fifo_part_init", in);
-
- return 0;
- }
-
- int ClsFIFO::push_part(librados::ObjectWriteOperation *rados_op,
- const PushPartParams& params) {
- cls_fifo_part_push_op op;
-
- auto& state = params.state;
-
- if (state.tag.empty()) {
- return -EINVAL;
- }
-
- op.tag = state.tag;
- op.data_bufs = state.data_bufs;
- op.total_len = state.total_len;
-
- bufferlist in;
- encode(op, in);
- rados_op->exec("fifo", "fifo_part_push", in);
-
- return 0;
- }
-
- int ClsFIFO::trim_part(librados::ObjectWriteOperation *rados_op,
- const TrimPartParams& params) {
- cls_fifo_part_trim_op op;
-
- auto& state = params.state;
-
- op.tag = state.tag;
- op.ofs = state.ofs;
-
- bufferlist in;
- encode(op, in);
- rados_op->exec("fifo", "fifo_part_trim", in);
-
- return 0;
- }
-
- int ClsFIFO::list_part(librados::IoCtx& ioctx,
- const string& oid,
- const ListPartParams& params,
- std::vector<cls_fifo_part_list_entry_t> *pentries,
- bool *more,
- bool *full_part,
- string *ptag)
- {
- cls_fifo_part_list_op op;
-
- auto& state = params.state;
-
- op.tag = state.tag;
- op.ofs = state.ofs;
- op.max_entries = state.max_entries;
-
- librados::ObjectReadOperation rop;
-
- bufferlist in;
- bufferlist out;
- int op_ret;
- encode(op, in);
- rop.exec("fifo", "fifo_part_list", in, &out, &op_ret);
-
- int r = ioctx.operate(oid, &rop, nullptr);
- if (r < 0) {
- return r;
- }
-
- if (op_ret < 0) {
- return op_ret;
- }
-
- cls_fifo_part_list_op_reply reply;
- auto iter = out.cbegin();
- try {
- decode(reply, iter);
- } catch (buffer::error& err) {
- return -EIO;
- }
-
- if (pentries) {
- *pentries = std::move(reply.entries);
- }
-
- if (more) {
- *more = reply.more;
- }
-
- if (full_part) {
- *full_part = reply.full_part;
- }
-
- if (ptag) {
- *ptag = reply.tag;
- }
-
- return 0;
- }
-
- int ClsFIFO::get_part_info(librados::IoCtx& ioctx,
- const string& oid,
- rados::cls::fifo::fifo_part_header_t *header)
- {
- cls_fifo_part_get_info_op op;
-
- librados::ObjectReadOperation rop;
-
- bufferlist in;
- bufferlist out;
- int op_ret;
- encode(op, in);
- rop.exec("fifo", "fifo_part_get_info", in, &out, &op_ret);
-
- int r = ioctx.operate(oid, &rop, nullptr);
- if (r < 0) {
- return r;
- }
-
- if (op_ret < 0) {
- return op_ret;
- }
-
- cls_fifo_part_get_info_op_reply reply;
- auto iter = out.cbegin();
- try {
- decode(reply, iter);
- } catch (buffer::error& err) {
- return -EIO;
- }
-
- if (header) {
- *header = std::move(reply.header);
- }
-
- return 0;
- }
-
- string FIFO::craft_marker(int64_t part_num,
- uint64_t part_ofs)
- {
- char buf[64];
- snprintf(buf, sizeof(buf), "%lld:%lld", (long long)part_num, (long long)part_ofs);
- return string(buf);
- }
-
- bool FIFO::parse_marker(const string& marker,
- int64_t *part_num,
- uint64_t *part_ofs)
- {
- if (marker.empty()) {
- *part_num = meta_info.tail_part_num;
- *part_ofs = 0;
- return true;
- }
-
- auto pos = marker.find(':');
- if (pos == string::npos) {
- return false;
- }
-
- auto first = marker.substr(0, pos);
- auto second = marker.substr(pos + 1);
-
- string err;
-
- *part_num = (int64_t)strict_strtoll(first.c_str(), 10, &err);
- if (!err.empty()) {
- return false;
- }
-
- *part_ofs = (uint64_t)strict_strtoll(second.c_str(), 10, &err);
- if (!err.empty()) {
- return false;
- }
-
- return true;
- }
-
- int FIFO::init_ioctx(librados::Rados *rados,
- const string& pool,
- std::optional<string> pool_ns)
- {
- _ioctx.emplace();
- int r = rados->ioctx_create(pool.c_str(), *_ioctx);
- if (r < 0) {
- return r;
- }
-
- if (pool_ns && !pool_ns->empty()) {
- _ioctx->set_namespace(*pool_ns);
- }
-
- ioctx = &(*_ioctx);
-
- return 0;
- }
-
- int ClsFIFO::MetaUpdateParams::apply_update(CephContext *cct,
- fifo_info_t *info)
- {
- string err;
-
- int r = info->apply_update(state.tail_part_num,
- state.head_part_num,
- state.min_push_part_num,
- state.max_push_part_num,
- state.journal_entries_add,
- state.journal_entries_rm,
- &err);
- if (r < 0) {
- ldout(cct, 0) << __func__ << "(): ERROR: " << err << dendl;
- return r;
- }
-
- ++info->objv.ver;
-
- return 0;
- }
-
- int FIFO::update_meta(ClsFIFO::MetaUpdateParams& update_params,
- bool *canceled)
- {
- update_params.objv(meta_info.objv);
-
- librados::ObjectWriteOperation wop;
- int r = ClsFIFO::meta_update(&wop, update_params);
- if (r < 0) {
- return r;
- }
-
- r = ioctx->operate(meta_oid, &wop);
- if (r < 0 && r != -ECANCELED) {
- return r;
- }
-
- *canceled = (r == -ECANCELED);
-
- if (!*canceled) {
- r = update_params.apply_update(cct, &meta_info);
- if (r < 0) { /* should really not happen,
- but if it does, let's treat it as if race was detected */
- *canceled = true;
- }
- }
-
- if (*canceled) {
- r = do_read_meta();
- }
- if (r < 0) {
- return r;
- }
-
- return 0;
- }
-
- int FIFO::do_read_meta(std::optional<fifo_objv_t> objv)
- {
- ClsFIFO::MetaGetParams get_params;
- if (objv) {
- get_params.objv(*objv);
- }
- int r = ClsFIFO::meta_get(*ioctx,
- meta_oid,
- get_params,
- &meta_info,
- &part_header_size,
- &part_entry_overhead);
- if (r < 0) {
- return r;
- }
-
- return 0;
- }
-
- int FIFO::create_part(int64_t part_num, const string& tag,
- int64_t& max_part_num) {
- librados::ObjectWriteOperation op;
-
- op.create(true); /* exclusive */
- int r = ClsFIFO::part_init(&op,
- ClsFIFO::PartInitParams()
- .tag(tag)
- .data_params(meta_info.data_params));
- if (r < 0) {
- return r;
- }
-
- r = ioctx->operate(meta_info.part_oid(part_num), &op);
- if (r < 0) {
- return r;
- }
-
- if (part_num > max_part_num) {
- max_part_num = part_num;
- }
-
- return 0;
- }
-
- int FIFO::remove_part(int64_t part_num, const string& tag,
- int64_t& tail_part_num) {
- librados::ObjectWriteOperation op;
- op.remove();
- int r = ioctx->operate(meta_info.part_oid(part_num), &op);
- if (r == -ENOENT) {
- r = 0;
- }
- if (r < 0) {
- return r;
- }
-
- if (part_num >= tail_part_num) {
- tail_part_num = part_num + 1;
- }
-
- return 0;
- }
-
- int FIFO::process_journal_entry(const fifo_journal_entry_t& entry,
- int64_t& tail_part_num,
- int64_t& head_part_num,
- int64_t& max_part_num)
- {
-
- switch (entry.op) {
- case fifo_journal_entry_t::Op::OP_CREATE:
- return create_part(entry.part_num, entry.part_tag, max_part_num);
- case fifo_journal_entry_t::Op::OP_SET_HEAD:
- if (entry.part_num > head_part_num) {
- head_part_num = entry.part_num;
- }
- return 0;
- case fifo_journal_entry_t::Op::OP_REMOVE:
- return remove_part(entry.part_num, entry.part_tag, tail_part_num);
- default:
- /* nothing to do */
- break;
- }
-
- return -EIO;
- }
-
- int FIFO::process_journal_entries(vector<fifo_journal_entry_t> *processed,
- int64_t& tail_part_num,
- int64_t& head_part_num,
- int64_t& max_part_num)
- {
- for (auto& iter : meta_info.journal) {
- auto& entry = iter.second;
- int r = process_journal_entry(entry, tail_part_num, head_part_num, max_part_num);
- if (r < 0) {
- ldout(cct, 10) << __func__ << "(): ERROR: failed processing journal entry for part=" << entry.part_num << dendl;
- } else {
- processed->push_back(entry);
- }
- }
-
- return 0;
- }
-
- int FIFO::process_journal()
- {
- vector<fifo_journal_entry_t> processed;
-
- int64_t new_tail = meta_info.tail_part_num;
- int64_t new_head = meta_info.head_part_num;
- int64_t new_max = meta_info.max_push_part_num;
-
- int r = process_journal_entries(&processed, new_tail, new_head, new_max);
- if (r < 0) {
- return r;
- }
-
- if (processed.empty()) {
- return 0;
- }
-
-#define RACE_RETRY 10
-
- int i;
-
- for (i = 0; i < RACE_RETRY; ++i) {
- bool canceled;
-
- std::optional<int64_t> tail_part_num;
- std::optional<int64_t> head_part_num;
- std::optional<int64_t> max_part_num;
-
- if (new_tail > meta_info.tail_part_num) {
- tail_part_num = new_tail;
- }
-
- if (new_head > meta_info.head_part_num) {
- head_part_num = new_head;
- }
-
- if (new_max > meta_info.max_push_part_num) {
- max_part_num = new_max;
- }
-
- if (processed.empty() &&
- !tail_part_num &&
- !max_part_num) {
- /* nothing to update anymore */
- break;
- }
-
- r = update_meta(ClsFIFO::MetaUpdateParams()
- .journal_entries_rm(processed)
- .tail_part_num(tail_part_num)
- .head_part_num(head_part_num)
- .max_push_part_num(max_part_num),
- &canceled);
- if (r < 0) {
- return r;
- }
-
- if (canceled) {
- vector<fifo_journal_entry_t> new_processed;
-
- for (auto& e : processed) {
- auto jiter = meta_info.journal.find(e.part_num);
- if (jiter == meta_info.journal.end() || /* journal entry was already processed */
- !(jiter->second == e)) {
- continue;
- }
-
- new_processed.push_back(e);
- }
- processed = std::move(new_processed);
- continue;
- }
- break;
- }
- if (i == RACE_RETRY) {
- ldout(cct, 0) << "ERROR: " << __func__ << "(): race check failed too many times, likely a bug" << dendl;
- return -ECANCELED;
- }
- return 0;
- }
-
- static const char alphanum_plain_table[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
-
- void gen_rand_alphanumeric_plain(CephContext *cct, char *dest, int size) /* size should be the required string size + 1 */
- {
- cct->random()->get_bytes(dest, size);
-
- int i;
- for (i = 0; i < size - 1; i++) {
- int pos = (unsigned)dest[i];
- dest[i] = alphanum_plain_table[pos % (sizeof(alphanum_plain_table) - 1)];
- }
- dest[i] = '\0';
- }
-
- static string generate_tag(CephContext *cct)
- {
-#define HEADER_TAG_SIZE 16
- char buf[HEADER_TAG_SIZE + 1];
- buf[HEADER_TAG_SIZE] = 0;
- gen_rand_alphanumeric_plain(cct, buf, sizeof(buf));
- return string(buf);
- }
-
- int FIFO::prepare_new_part(bool is_head)
- {
- fifo_journal_entry_t jentry;
-
- meta_info.prepare_next_journal_entry(&jentry, generate_tag(cct));
-
- int64_t new_head_part_num = meta_info.head_part_num;
-
- std::optional<fifo_journal_entry_t> new_head_jentry;
- if (is_head) {
- new_head_jentry = jentry;
- new_head_jentry->op = fifo_journal_entry_t::OP_SET_HEAD;
- new_head_part_num = jentry.part_num;
- }
-
- int r;
- bool canceled;
-
- int i;
-
- for (i = 0; i < RACE_RETRY; ++i) {
- r = update_meta(ClsFIFO::MetaUpdateParams()
- .journal_entry_add(jentry)
- .journal_entry_add(new_head_jentry),
- &canceled);
- if (r < 0) {
- return r;
- }
-
- if (canceled) {
- if (meta_info.max_push_part_num >= jentry.part_num &&
- meta_info.head_part_num >= new_head_part_num) { /* raced, but new part was already written */
- return 0;
- }
-
- auto iter = meta_info.journal.find(jentry.part_num);
- if (iter == meta_info.journal.end()) {
- continue;
- }
- }
- break;
- }
- if (i == RACE_RETRY) {
- ldout(cct, 0) << "ERROR: " << __func__ << "(): race check failed too many times, likely a bug" << dendl;
- return -ECANCELED;
- }
-
- r = process_journal();
- if (r < 0) {
- return r;
- }
-
- return 0;
- }
-
- int FIFO::prepare_new_head()
- {
- int64_t new_head_num = meta_info.head_part_num + 1;
-
- if (meta_info.max_push_part_num < new_head_num) {
- int r = prepare_new_part(true);
- if (r < 0) {
- return r;
- }
-
- if (meta_info.max_push_part_num < new_head_num) {
- ldout(cct, 0) << "ERROR: " << __func__ << ": after new part creation: meta_info.max_push_part_num="
- << meta_info.max_push_part_num << " new_head_num=" << meta_info.max_push_part_num << dendl;
- return -EIO;
- }
-
- return 0;
- }
-
- int i;
-
- for (i = 0; i < RACE_RETRY; ++i) {
- bool canceled;
- int r = update_meta(ClsFIFO::MetaUpdateParams()
- .head_part_num(new_head_num),
- &canceled);
- if (r < 0) {
- return r;
- }
-
- if (canceled) {
- if (meta_info.head_part_num < new_head_num) {
- continue;
- }
- }
- break;
- }
- if (i == RACE_RETRY) {
- ldout(cct, 0) << "ERROR: " << __func__ << "(): race check failed too many times, likely a bug" << dendl;
- return -ECANCELED;
- }
-
-
- return 0;
- }
-
- int FIFO::open(bool create,
- std::optional<ClsFIFO::MetaCreateParams> create_params)
- {
- if (!ioctx) {
- return -EINVAL;
- }
-
- if (create) {
- librados::ObjectWriteOperation op;
-
- ClsFIFO::MetaCreateParams default_params;
- ClsFIFO::MetaCreateParams *params = (create_params ? &(*create_params) : &default_params);
-
- int r = ClsFIFO::meta_create(&op, id, *params);
- if (r < 0) {
- return r;
- }
-
- r = ioctx->operate(meta_oid, &op);
- if (r < 0) {
- return r;
- }
- }
-
- std::optional<fifo_objv_t> objv = (create_params ? create_params->state.objv : nullopt);
-
- int r = do_read_meta(objv);
- if (r < 0) {
- return r;
- }
-
- is_open = true;
-
- return 0;
- }
-
- int FIFO::read_meta(std::optional<fifo_objv_t> objv)
- {
- if (!is_open) {
- return -EINVAL;
- }
-
- return do_read_meta(objv);
- }
-
- int FIFO::push_entries(int64_t part_num, std::vector<bufferlist>& data_bufs)
- {
- if (!is_open) {
- return -EINVAL;
- }
-
- librados::ObjectWriteOperation op;
-
- int r = ClsFIFO::push_part(&op, ClsFIFO::PushPartParams()
- .tag(meta_info.head_tag)
- .data_bufs(data_bufs));
- if (r < 0) {
- return r;
- }
-
- r = ioctx->operate(meta_info.part_oid(part_num), &op);
- if (r < 0) {
- return r;
- }
-
- return 0;
- }
-
- int FIFO::trim_part(int64_t part_num,
- uint64_t ofs,
- std::optional<string> tag)
- {
- if (!is_open) {
- return -EINVAL;
- }
-
- librados::ObjectWriteOperation op;
-
- int r = ClsFIFO::trim_part(&op, ClsFIFO::TrimPartParams()
- .tag(tag)
- .ofs(ofs));
- if (r < 0) {
- return r;
- }
-
- r = ioctx->operate(meta_info.part_oid(part_num), &op);
- if (r < 0) {
- return r;
- }
-
- return 0;
- }
-
- int FIFO::push(bufferlist& bl)
- {
- std::vector<bufferlist> data_bufs;
- data_bufs.push_back(bl);
-
- return push(data_bufs);
- }
-
- int FIFO::push(vector<bufferlist>& data_bufs)
- {
- if (!is_open) {
- return -EINVAL;
- }
-
- int r;
-
- if (meta_info.need_new_head()) {
- r = prepare_new_head();
- if (r < 0) {
- return r;
- }
- }
-
- int i;
-
- auto iter = data_bufs.begin();
-
- while (iter != data_bufs.end()) {
- uint64_t batch_len = 0;
-
- vector<bufferlist> batch;
-
- for (; iter != data_bufs.end(); ++iter) {
- auto& data = *iter;
- auto data_len = data.length();
- auto max_entry_size = meta_info.data_params.max_entry_size;
-
- if (data_len > max_entry_size) {
- ldout(cct, 10) << __func__ << "(): entry too large: " << data_len << " > " << meta_info.data_params.max_entry_size << dendl;
- return -EINVAL;
- }
-
- if (batch_len + data_len > max_entry_size) {
- break;
- }
-
- batch_len += data_len + part_entry_overhead; /* we can send entry with data_len up to max_entry_size,
- however, we want to also account the overhead when dealing
- with multiple entries. Previous check doesn't account
- for overhead on purpose. */
-
- batch.push_back(data);
- }
-
-
- for (i = 0; i < RACE_RETRY; ++i) {
- r = push_entries(meta_info.head_part_num, batch);
- if (r == -ERANGE) {
- r = prepare_new_head();
- if (r < 0) {
- return r;
- }
- continue;
- }
- if (r < 0) {
- return r;
- }
- break;
- }
- if (i == RACE_RETRY) {
- ldout(cct, 0) << "ERROR: " << __func__ << "(): race check failed too many times, likely a bug" << dendl;
- return -ECANCELED;
- }
- }
-
- return 0;
- }
-
- int FIFO::list(int max_entries,
- std::optional<string> marker,
- vector<fifo_entry> *result,
- bool *more)
- {
- if (!is_open) {
- return -EINVAL;
- }
-
- *more = false;
-
- int64_t part_num = meta_info.tail_part_num;
- uint64_t ofs = 0;
-
- if (marker) {
- if (!parse_marker(*marker, &part_num, &ofs)) {
- ldout(cct, 20) << __func__ << "(): failed to parse marker (" << *marker << ")" << dendl;
- return -EINVAL;
- }
- }
-
- result->clear();
- result->reserve(max_entries);
-
- bool part_more{false};
- bool part_full{false};
-
- while (max_entries > 0) {
- std::vector<cls_fifo_part_list_entry_t> entries;
- int r = ClsFIFO::list_part(*ioctx,
- meta_info.part_oid(part_num),
- ClsFIFO::ListPartParams()
- .ofs(ofs)
- .max_entries(max_entries),
- &entries,
- &part_more,
- &part_full,
- nullptr);
- if (r == -ENOENT) {
- r = do_read_meta();
- if (r < 0) {
- return r;
- }
-
- if (part_num < meta_info.tail_part_num) {
- /* raced with trim? restart */
- result->clear();
- part_num = meta_info.tail_part_num;
- ofs = 0;
- continue;
- }
-
- /* assuming part was not written yet, so end of data */
-
- *more = false;
-
- return 0;
- }
- if (r < 0) {
- ldout(cct, 20) << __func__ << "(): ClsFIFO::list_part() on oid=" << meta_info.part_oid(part_num) << " returned r=" << r << dendl;
- return r;
- }
-
- for (auto& entry : entries) {
- fifo_entry e;
- e.data = std::move(entry.data);
- e.marker = craft_marker(part_num, entry.ofs);
- e.mtime = entry.mtime;
-
- result->push_back(e);
- }
- max_entries -= entries.size();
-
- if (max_entries > 0 &&
- part_more) {
- continue;
- }
-
- if (!part_full) { /* head part is not full */
- break;
- }
-
- ++part_num;
- ofs = 0;
- }
-
- *more = part_full || part_more;
-
- return 0;
- }
-
- int FIFO::trim(const string& marker)
- {
- if (!is_open) {
- return -EINVAL;
- }
-
- int64_t part_num;
- uint64_t ofs;
-
- if (!parse_marker(marker, &part_num, &ofs)) {
- ldout(cct, 20) << __func__ << "(): failed to parse marker: marker=" << marker << dendl;
- return -EINVAL;
- }
-
- for (int64_t pn = meta_info.tail_part_num; pn < part_num; ++pn) {
- int r = trim_part(pn, meta_info.data_params.max_part_size, std::nullopt);
- if (r < 0 &&
- r != -ENOENT) {
- ldout(cct, 0) << __func__ << "(): ERROR: trim_part() on part=" << pn << " returned r=" << r << dendl;
- return r;
- }
- }
-
- int r = trim_part(part_num, ofs, std::nullopt);
- if (r < 0 &&
- r != -ENOENT) {
- ldout(cct, 0) << __func__ << "(): ERROR: trim_part() on part=" << part_num << " returned r=" << r << dendl;
- return r;
- }
-
- if (part_num <= meta_info.tail_part_num) {
- /* don't need to modify meta info */
- return 0;
- }
-
- int i;
-
- for (i = 0; i < RACE_RETRY; ++i) {
- bool canceled;
- int r = update_meta(ClsFIFO::MetaUpdateParams()
- .tail_part_num(part_num),
- &canceled);
- if (r < 0) {
- return r;
- }
-
- if (canceled) {
- if (meta_info.tail_part_num < part_num) {
- continue;
- }
- }
- break;
-
- if (i == RACE_RETRY) {
- ldout(cct, 0) << "ERROR: " << __func__ << "(): race check failed too many times, likely a bug" << dendl;
- return -ECANCELED;
- }
- }
-
- return 0;
- }
-
- int FIFO::get_part_info(int64_t part_num,
- fifo_part_info *result)
- {
- if (!is_open) {
- return -EINVAL;
- }
-
- fifo_part_header_t header;
-
- int r = ClsFIFO::get_part_info(*ioctx,
- meta_info.part_oid(part_num),
- &header);
- if (r < 0) {
- return r;
- }
-
- *result = std::move(header);
-
- return 0;
- }
-
- } // namespace fifo
- } // namespace cls
-} // namespace rados
-
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2019 Red Hat, Inc.
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-
-#pragma once
-
-#include "cls/fifo/cls_fifo_types.h"
-
-namespace rados {
- namespace cls {
- namespace fifo {
-
- class ClsFIFO {
- public:
-
- /* create */
-
- struct MetaCreateParams {
- struct State {
- static constexpr uint64_t default_max_part_size = 4 * 1024 * 1024;
- static constexpr uint64_t default_max_entry_size = 32 * 1024;
- std::optional<fifo_objv_t> objv;
- std::optional<std::string> oid_prefix;
- bool exclusive{false};
- uint64_t max_part_size{default_max_part_size};
- uint64_t max_entry_size{default_max_entry_size};
- } state;
-
- MetaCreateParams& oid_prefix(const std::string& oid_prefix) {
- state.oid_prefix = oid_prefix;
- return *this;
- }
- MetaCreateParams& exclusive(bool exclusive) {
- state.exclusive = exclusive;
- return *this;
- }
- MetaCreateParams& max_part_size(uint64_t max_part_size) {
- state.max_part_size = max_part_size;
- return *this;
- }
- MetaCreateParams& max_entry_size(uint64_t max_entry_size) {
- state.max_entry_size = max_entry_size;
- return *this;
- }
- MetaCreateParams& objv(const fifo_objv_t& objv) {
- state.objv = objv;
- return *this;
- }
- MetaCreateParams& objv(const std::string& instance, uint64_t ver) {
- state.objv = fifo_objv_t{instance, ver};
- return *this;
- }
- };
-
- static int meta_create(librados::ObjectWriteOperation *op,
- const string& id,
- const MetaCreateParams& params);
-
- /* get info */
-
- struct MetaGetParams {
- struct State {
- std::optional<fifo_objv_t> objv;
- } state;
-
- MetaGetParams& objv(std::optional<fifo_objv_t>& v) {
- state.objv = v;
- return *this;
- }
- MetaGetParams& objv(const fifo_objv_t& v) {
- state.objv = v;
- return *this;
- }
- MetaGetParams& objv(const std::string& instance, uint64_t ver) {
- state.objv = fifo_objv_t{instance, ver};
- return *this;
- }
- };
- static int meta_get(librados::IoCtx& ioctx,
- const string& oid,
- const MetaGetParams& params,
- rados::cls::fifo::fifo_info_t *result,
- uint32_t *part_header_size,
- uint32_t *part_entry_overhead);
-
- /* update */
-
- struct MetaUpdateParams {
- struct State {
- rados::cls::fifo::fifo_objv_t objv;
-
- std::optional<uint64_t> tail_part_num;
- std::optional<uint64_t> head_part_num;
- std::optional<uint64_t> min_push_part_num;
- std::optional<uint64_t> max_push_part_num;
- std::vector<rados::cls::fifo::fifo_journal_entry_t> journal_entries_add;
- std::vector<rados::cls::fifo::fifo_journal_entry_t> journal_entries_rm;
- } state;
-
- MetaUpdateParams& objv(const fifo_objv_t& objv) {
- state.objv = objv;
- return *this;
- }
- MetaUpdateParams& tail_part_num(std::optional<uint64_t> tail_part_num) {
- state.tail_part_num = tail_part_num;
- return *this;
- }
- MetaUpdateParams& tail_part_num(uint64_t tail_part_num) {
- state.tail_part_num = tail_part_num;
- return *this;
- }
- MetaUpdateParams& head_part_num(std::optional<uint64_t> head_part_num) {
- state.head_part_num = head_part_num;
- return *this;
- }
- MetaUpdateParams& head_part_num(uint64_t head_part_num) {
- state.head_part_num = head_part_num;
- return *this;
- }
- MetaUpdateParams& min_push_part_num(uint64_t num) {
- state.min_push_part_num = num;
- return *this;
- }
- MetaUpdateParams& max_push_part_num(std::optional<uint64_t> num) {
- state.max_push_part_num = num;
- return *this;
- }
- MetaUpdateParams& max_push_part_num(uint64_t num) {
- state.max_push_part_num = num;
- return *this;
- }
- MetaUpdateParams& journal_entry_add(std::optional<rados::cls::fifo::fifo_journal_entry_t> entry) {
- if (entry) {
- state.journal_entries_add.push_back(*entry);
- }
- return *this;
- }
- MetaUpdateParams& journal_entry_add(const rados::cls::fifo::fifo_journal_entry_t& entry) {
- state.journal_entries_add.push_back(entry);
- return *this;
- }
- MetaUpdateParams& journal_entries_rm(std::vector<rados::cls::fifo::fifo_journal_entry_t>& entries) {
- state.journal_entries_rm = entries;
- return *this;
- }
-
- int apply_update(CephContext *cct,
- rados::cls::fifo::fifo_info_t *info);
- };
-
- static int meta_update(librados::ObjectWriteOperation *rados_op,
- const MetaUpdateParams& params);
- /* init part */
-
- struct PartInitParams {
- struct State {
- string tag;
- rados::cls::fifo::fifo_data_params_t data_params;
- } state;
-
- PartInitParams& tag(const std::string& tag) {
- state.tag = tag;
- return *this;
- }
- PartInitParams& data_params(const rados::cls::fifo::fifo_data_params_t& data_params) {
- state.data_params = data_params;
- return *this;
- }
- };
-
- static int part_init(librados::ObjectWriteOperation *op,
- const PartInitParams& params);
-
- /* push part */
-
- struct PushPartParams {
- struct State {
- string tag;
- std::vector<bufferlist> data_bufs;
- uint64_t total_len{0};
- } state;
-
- PushPartParams& tag(const std::string& tag) {
- state.tag = tag;
- return *this;
- }
- PushPartParams& data(bufferlist& bl) {
- state.total_len += bl.length();
- state.data_bufs.emplace_back(bl);
- return *this;
- }
- PushPartParams& data_bufs(std::vector<bufferlist>& dbs) {
- for (auto& bl : dbs) {
- data(bl);
- }
- return *this;
- }
- };
-
- static int push_part(librados::ObjectWriteOperation *op,
- const PushPartParams& params);
- /* trim part */
-
- struct TrimPartParams {
- struct State {
- std::optional<string> tag;
- uint64_t ofs;
- } state;
-
- TrimPartParams& tag(std::optional<std::string> tag) {
- state.tag = tag;
- return *this;
- }
- TrimPartParams& ofs(uint64_t ofs) {
- state.ofs = ofs;
- return *this;
- }
- };
-
- static int trim_part(librados::ObjectWriteOperation *op,
- const TrimPartParams& params);
- /* list part */
-
- struct ListPartParams {
- struct State {
- std::optional<string> tag;
- uint64_t ofs;
- int max_entries{100};
- } state;
-
- ListPartParams& tag(const std::string& tag) {
- state.tag = tag;
- return *this;
- }
- ListPartParams& ofs(uint64_t ofs) {
- state.ofs = ofs;
- return *this;
- }
- ListPartParams& max_entries(int _max_entries) {
- state.max_entries = _max_entries;
- return *this;
- }
- };
-
- static int list_part(librados::IoCtx& ioctx,
- const string& oid,
- const ListPartParams& params,
- std::vector<cls_fifo_part_list_entry_t> *pentries,
- bool *more,
- bool *full_part = nullptr,
- string *ptag = nullptr);
-
- static int get_part_info(librados::IoCtx& ioctx,
- const string& oid,
- rados::cls::fifo::fifo_part_header_t *header);
- };
-
- struct fifo_entry {
- bufferlist data;
- string marker;
- ceph::real_time mtime;
- };
-
- using fifo_part_info = rados::cls::fifo::fifo_part_header_t;
-
- class FIFO {
- CephContext *cct;
- string id;
-
- string meta_oid;
-
- std::optional<librados::IoCtx> _ioctx;
- librados::IoCtx *ioctx{nullptr};
-
- fifo_info_t meta_info;
-
- uint32_t part_header_size;
- uint32_t part_entry_overhead;
-
- bool is_open{false};
-
- string craft_marker(int64_t part_num,
- uint64_t part_ofs);
-
- bool parse_marker(const string& marker,
- int64_t *part_num,
- uint64_t *part_ofs);
-
- int update_meta(ClsFIFO::MetaUpdateParams& update_params,
- bool *canceled);
- int do_read_meta(std::optional<fifo_objv_t> objv = std::nullopt);
-
- int create_part(int64_t part_num, const string& tag,
- int64_t& max_part_num);
- int remove_part(int64_t part_num, const string& tag,
- int64_t& tail_part_num);
-
- int process_journal_entry(const fifo_journal_entry_t& entry,
- int64_t& tail_part_num,
- int64_t& head_part_num,
- int64_t& max_part_num);
- int process_journal_entries(vector<fifo_journal_entry_t> *processed,
- int64_t& tail_part_num,
- int64_t& head_part_num,
- int64_t& max_part_num);
- int process_journal();
-
- int prepare_new_part(bool is_head);
- int prepare_new_head();
-
- int push_entries(int64_t part_num, std::vector<bufferlist>& data_bufs);
- int trim_part(int64_t part_num,
- uint64_t ofs,
- std::optional<string> tag);
-
- public:
- FIFO(CephContext *_cct,
- const string& _id,
- librados::IoCtx *_ioctx = nullptr) : cct(_cct),
- id(_id),
- ioctx(_ioctx) {
- meta_oid = id;
- }
-
- int init_ioctx(librados::Rados *rados,
- const string& pool,
- std::optional<string> pool_ns);
-
- void set_ioctx(librados::IoCtx *_ioctx) {
- ioctx = ioctx;
- }
-
- int open(bool create,
- std::optional<ClsFIFO::MetaCreateParams> create_params = std::nullopt);
-
- int read_meta(std::optional<fifo_objv_t> objv = std::nullopt);
-
- const fifo_info_t& get_meta() const {
- return meta_info;
- }
-
- void get_part_layout_info(uint32_t *header_size, uint32_t *entry_overhead) {
- if (header_size) {
- *header_size = part_header_size;
- }
-
- if (entry_overhead) {
- *entry_overhead = part_entry_overhead;
- }
- }
-
- int push(bufferlist& bl);
- int push(vector<bufferlist>& bl);
-
- int list(int max_entries,
- std::optional<string> marker,
- vector<fifo_entry> *result,
- bool *more);
-
- int trim(const string& marker);
-
- int get_part_info(int64_t part_num,
- fifo_part_info *result);
- };
- } // namespace fifo
- } // namespace cls
-} // namespace rados
#pragma once
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "include/buffer.h"
+#include "include/encoding.h"
#include "include/types.h"
-#include "include/utime.h"
+
#include "cls/fifo/cls_fifo_types.h"
-struct cls_fifo_meta_create_op
+namespace rados::cls::fifo::op {
+struct create_meta
{
- string id;
- std::optional<rados::cls::fifo::fifo_objv_t> objv;
+ std::string id;
+ std::optional<objv> version;
struct {
- string name;
- string ns;
+ std::string name;
+ std::string ns;
} pool;
- std::optional<string> oid_prefix;
+ std::optional<std::string> oid_prefix;
- uint64_t max_part_size{0};
- uint64_t max_entry_size{0};
+ std::uint64_t max_part_size{0};
+ std::uint64_t max_entry_size{0};
bool exclusive{false};
- void encode(bufferlist &bl) const {
+ void encode(ceph::buffer::list& bl) const {
ENCODE_START(1, 1, bl);
encode(id, bl);
- encode(objv, bl);
+ encode(version, bl);
encode(pool.name, bl);
encode(pool.ns, bl);
encode(oid_prefix, bl);
encode(exclusive, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::const_iterator &bl) {
+ void decode(ceph::buffer::list::const_iterator& bl) {
DECODE_START(1, bl);
decode(id, bl);
- decode(objv, bl);
+ decode(version, bl);
decode(pool.name, bl);
decode(pool.ns, bl);
decode(oid_prefix, bl);
DECODE_FINISH(bl);
}
};
-WRITE_CLASS_ENCODER(cls_fifo_meta_create_op)
+WRITE_CLASS_ENCODER(create_meta)
-struct cls_fifo_meta_get_op
+struct get_meta
{
- std::optional<rados::cls::fifo::fifo_objv_t> objv;
+ std::optional<objv> version;
- void encode(bufferlist &bl) const {
+ void encode(ceph::buffer::list& bl) const {
ENCODE_START(1, 1, bl);
- encode(objv, bl);
+ encode(version, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::const_iterator &bl) {
+ void decode(ceph::buffer::list::const_iterator& bl) {
DECODE_START(1, bl);
- decode(objv, bl);
+ decode(version, bl);
DECODE_FINISH(bl);
}
};
-WRITE_CLASS_ENCODER(cls_fifo_meta_get_op)
+WRITE_CLASS_ENCODER(get_meta)
-struct cls_fifo_meta_get_op_reply
+struct get_meta_reply
{
- rados::cls::fifo::fifo_info_t info;
- uint32_t part_header_size{0};
- uint32_t part_entry_overhead{0}; /* per entry extra data that is stored */
+ fifo::info info;
+ std::uint32_t part_header_size{0};
+ /* per entry extra data that is stored */
+ std::uint32_t part_entry_overhead{0};
- void encode(bufferlist &bl) const {
+ void encode(ceph::buffer::list& bl) const {
ENCODE_START(1, 1, bl);
encode(info, bl);
encode(part_header_size, bl);
encode(part_entry_overhead, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::const_iterator &bl) {
+ void decode(ceph::buffer::list::const_iterator& bl) {
DECODE_START(1, bl);
decode(info, bl);
decode(part_header_size, bl);
DECODE_FINISH(bl);
}
};
-WRITE_CLASS_ENCODER(cls_fifo_meta_get_op_reply)
+WRITE_CLASS_ENCODER(get_meta_reply)
-struct cls_fifo_meta_update_op
+struct update_meta
{
- rados::cls::fifo::fifo_objv_t objv;
+ objv version;
- std::optional<uint64_t> tail_part_num;
- std::optional<uint64_t> head_part_num;
- std::optional<uint64_t> min_push_part_num;
- std::optional<uint64_t> max_push_part_num;
- std::vector<rados::cls::fifo::fifo_journal_entry_t> journal_entries_add;
- std::vector<rados::cls::fifo::fifo_journal_entry_t> journal_entries_rm;
+ std::optional<std::uint64_t> tail_part_num;
+ std::optional<std::uint64_t> head_part_num;
+ std::optional<std::uint64_t> min_push_part_num;
+ std::optional<std::uint64_t> max_push_part_num;
+ std::vector<journal_entry> journal_entries_add;
+ std::vector<journal_entry> journal_entries_rm;
- void encode(bufferlist &bl) const {
+ void encode(ceph::buffer::list& bl) const {
ENCODE_START(1, 1, bl);
- encode(objv, bl);
+ encode(version, bl);
encode(tail_part_num, bl);
encode(head_part_num, bl);
encode(min_push_part_num, bl);
encode(journal_entries_rm, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::const_iterator &bl) {
+ void decode(ceph::buffer::list::const_iterator& bl) {
DECODE_START(1, bl);
- decode(objv, bl);
+ decode(version, bl);
decode(tail_part_num, bl);
decode(head_part_num, bl);
decode(min_push_part_num, bl);
DECODE_FINISH(bl);
}
};
-WRITE_CLASS_ENCODER(cls_fifo_meta_update_op)
+WRITE_CLASS_ENCODER(update_meta)
-struct cls_fifo_part_init_op
+struct init_part
{
- string tag;
- rados::cls::fifo::fifo_data_params_t data_params;
+ std::string tag;
+ data_params params;
- void encode(bufferlist &bl) const {
+ void encode(ceph::buffer::list& bl) const {
ENCODE_START(1, 1, bl);
encode(tag, bl);
- encode(data_params, bl);
+ encode(params, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::const_iterator &bl) {
+ void decode(ceph::buffer::list::const_iterator& bl) {
DECODE_START(1, bl);
decode(tag, bl);
- decode(data_params, bl);
+ decode(params, bl);
DECODE_FINISH(bl);
}
};
-WRITE_CLASS_ENCODER(cls_fifo_part_init_op)
+WRITE_CLASS_ENCODER(init_part)
-struct cls_fifo_part_push_op
+struct push_part
{
- string tag;
- std::vector<bufferlist> data_bufs;
- uint64_t total_len{0};
+ std::string tag;
+ std::deque<ceph::buffer::list> data_bufs;
+ std::uint64_t total_len{0};
- void encode(bufferlist &bl) const {
+ void encode(ceph::buffer::list& bl) const {
ENCODE_START(1, 1, bl);
encode(tag, bl);
encode(data_bufs, bl);
encode(total_len, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::const_iterator &bl) {
+ void decode(ceph::buffer::list::const_iterator& bl) {
DECODE_START(1, bl);
decode(tag, bl);
decode(data_bufs, bl);
DECODE_FINISH(bl);
}
};
-WRITE_CLASS_ENCODER(cls_fifo_part_push_op)
+WRITE_CLASS_ENCODER(push_part)
-struct cls_fifo_part_trim_op
+struct trim_part
{
- std::optional<string> tag;
- uint64_t ofs{0};
+ std::optional<std::string> tag;
+ std::uint64_t ofs{0};
- void encode(bufferlist &bl) const {
+ void encode(ceph::buffer::list& bl) const {
ENCODE_START(1, 1, bl);
encode(tag, bl);
encode(ofs, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::const_iterator &bl) {
+ void decode(ceph::buffer::list::const_iterator& bl) {
DECODE_START(1, bl);
decode(tag, bl);
decode(ofs, bl);
DECODE_FINISH(bl);
}
};
-WRITE_CLASS_ENCODER(cls_fifo_part_trim_op)
+WRITE_CLASS_ENCODER(trim_part)
-struct cls_fifo_part_list_op
+struct list_part
{
std::optional<string> tag;
- uint64_t ofs{0};
+ std::uint64_t ofs{0};
int max_entries{100};
- void encode(bufferlist &bl) const {
+ void encode(ceph::buffer::list& bl) const {
ENCODE_START(1, 1, bl);
encode(tag, bl);
encode(ofs, bl);
encode(max_entries, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::const_iterator &bl) {
+ void decode(ceph::buffer::list::const_iterator& bl) {
DECODE_START(1, bl);
decode(tag, bl);
decode(ofs, bl);
DECODE_FINISH(bl);
}
};
-WRITE_CLASS_ENCODER(cls_fifo_part_list_op)
+WRITE_CLASS_ENCODER(list_part)
+inline constexpr int MAX_LIST_ENTRIES = 512;
-struct cls_fifo_part_list_op_reply
+struct list_part_reply
{
- string tag;
- vector<rados::cls::fifo::cls_fifo_part_list_entry_t> entries;
+ std::string tag;
+ std::vector<part_list_entry> entries;
bool more{false};
bool full_part{false}; /* whether part is full or still can be written to.
A non full part is by definition head part */
- void encode(bufferlist &bl) const {
+ void encode(ceph::buffer::list& bl) const {
ENCODE_START(1, 1, bl);
encode(tag, bl);
encode(entries, bl);
encode(full_part, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::const_iterator &bl) {
+ void decode(ceph::buffer::list::const_iterator& bl) {
DECODE_START(1, bl);
decode(tag, bl);
decode(entries, bl);
DECODE_FINISH(bl);
}
};
-WRITE_CLASS_ENCODER(cls_fifo_part_list_op_reply)
+WRITE_CLASS_ENCODER(list_part_reply)
-struct cls_fifo_part_get_info_op
+struct get_part_info
{
- void encode(bufferlist &bl) const {
+ void encode(ceph::buffer::list &bl) const {
ENCODE_START(1, 1, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::const_iterator &bl) {
+ void decode(ceph::buffer::list::const_iterator &bl) {
DECODE_START(1, bl);
DECODE_FINISH(bl);
}
};
-WRITE_CLASS_ENCODER(cls_fifo_part_get_info_op)
+WRITE_CLASS_ENCODER(get_part_info)
-struct cls_fifo_part_get_info_op_reply
+struct get_part_info_reply
{
- rados::cls::fifo::fifo_part_header_t header;
+ part_header header;
- void encode(bufferlist &bl) const {
+ void encode(ceph::buffer::list &bl) const {
ENCODE_START(1, 1, bl);
encode(header, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::const_iterator &bl) {
+ void decode(ceph::buffer::list::const_iterator &bl) {
DECODE_START(1, bl);
decode(header, bl);
DECODE_FINISH(bl);
}
};
-WRITE_CLASS_ENCODER(cls_fifo_part_get_info_op_reply)
+WRITE_CLASS_ENCODER(get_part_info_reply)
+
+inline constexpr auto CLASS = "fifo";
+inline constexpr auto CREATE_META = "create_meta";
+inline constexpr auto GET_META = "get_meta";
+inline constexpr auto UPDATE_META = "update_meta";
+inline constexpr auto INIT_PART = "init_part";
+inline constexpr auto PUSH_PART = "push_part";
+inline constexpr auto TRIM_PART = "trim_part";
+inline constexpr auto LIST_PART = "part_list";
+inline constexpr auto GET_PART_INFO = "get_part_info";
+} // namespace rados::cls::fifo::op
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2019 Red Hat, Inc.
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-#include "cls_fifo_types.h"
-
-string rados::cls::fifo::fifo_info_t::part_oid(int64_t part_num)
-{
- char buf[oid_prefix.size() + 32];
- snprintf(buf, sizeof(buf), "%s.%lld", oid_prefix.c_str(), (long long)part_num);
-
- return string(buf);
-}
-
-void rados::cls::fifo::fifo_info_t::prepare_next_journal_entry(fifo_journal_entry_t *entry, const string& tag)
-{
- entry->op = fifo_journal_entry_t::Op::OP_CREATE;
- entry->part_num = max_push_part_num + 1;
- entry->part_tag = tag;
-}
-
-int rados::cls::fifo::fifo_info_t::apply_update(std::optional<uint64_t>& _tail_part_num,
- std::optional<uint64_t>& _head_part_num,
- std::optional<uint64_t>& _min_push_part_num,
- std::optional<uint64_t>& _max_push_part_num,
- std::vector<rados::cls::fifo::fifo_journal_entry_t>& journal_entries_add,
- std::vector<rados::cls::fifo::fifo_journal_entry_t>& journal_entries_rm,
- string *err)
-{
- if (_tail_part_num) {
- tail_part_num = *_tail_part_num;
- }
-
- if (_min_push_part_num) {
- min_push_part_num = *_min_push_part_num;
- }
-
- if (_max_push_part_num) {
- max_push_part_num = *_max_push_part_num;
- }
-
- for (auto& entry : journal_entries_add) {
- auto iter = journal.find(entry.part_num);
- if (iter != journal.end() &&
- iter->second.op == entry.op) {
- /* don't allow multiple concurrent (same) operations on the same part,
- racing clients should use objv to avoid races anyway */
- if (err) {
- stringstream ss;
- ss << "multiple concurrent operations on same part are not allowed, part num=" << entry.part_num;
- *err = ss.str();
- }
- return -EINVAL;
- }
-
- if (entry.op == fifo_journal_entry_t::Op::OP_CREATE) {
- tags[entry.part_num] = entry.part_tag;
- }
-
- journal.insert(std::pair<int64_t, fifo_journal_entry_t>(entry.part_num, std::move(entry)));
- }
-
- for (auto& entry : journal_entries_rm) {
- journal.erase(entry.part_num);
- }
-
- if (_head_part_num) {
- tags.erase(head_part_num);
- head_part_num = *_head_part_num;
- auto iter = tags.find(head_part_num);
- if (iter != tags.end()) {
- head_tag = iter->second;
- } else {
- head_tag.erase();
- }
- }
-
- return 0;
-}
#pragma once
-
+#include <cstdint>
+#include <map>
+#include <optional>
+#include <ostream>
+#include <string>
+#include <vector>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
+#include "include/buffer.h"
#include "include/encoding.h"
#include "include/types.h"
+#include "common/ceph_time.h"
class JSONObj;
-namespace rados {
- namespace cls {
- namespace fifo {
- struct fifo_objv_t {
- string instance;
- uint64_t ver{0};
-
- void encode(bufferlist &bl) const {
- ENCODE_START(1, 1, bl);
- encode(instance, bl);
- encode(ver, bl);
- ENCODE_FINISH(bl);
- }
- void decode(bufferlist::const_iterator &bl) {
- DECODE_START(1, bl);
- decode(instance, bl);
- decode(ver, bl);
- DECODE_FINISH(bl);
- }
- void dump(Formatter *f) const;
- void decode_json(JSONObj *obj);
-
- bool operator==(const fifo_objv_t& rhs) const {
- return (instance == rhs.instance &&
- ver == rhs.ver);
- }
-
- bool empty() const {
- return instance.empty();
- }
-
- string to_str() {
- char buf[instance.size() + 32];
- snprintf(buf, sizeof(buf), "%s{%lld}", instance.c_str(), (long long)ver);
- return string(buf);
- }
- };
- WRITE_CLASS_ENCODER(rados::cls::fifo::fifo_objv_t)
-
- struct fifo_data_params_t {
- uint64_t max_part_size{0};
- uint64_t max_entry_size{0};
- uint64_t full_size_threshold{0};
-
- void encode(bufferlist &bl) const {
- ENCODE_START(1, 1, bl);
- encode(max_part_size, bl);
- encode(max_entry_size, bl);
- encode(full_size_threshold, bl);
- ENCODE_FINISH(bl);
- }
- void decode(bufferlist::const_iterator &bl) {
- DECODE_START(1, bl);
- decode(max_part_size, bl);
- decode(max_entry_size, bl);
- decode(full_size_threshold, bl);
- DECODE_FINISH(bl);
- }
- void dump(Formatter *f) const;
- void decode_json(JSONObj *obj);
-
- bool operator==(const fifo_data_params_t& rhs) const {
- return (max_part_size == rhs.max_part_size &&
- max_entry_size == rhs.max_entry_size &&
- full_size_threshold == rhs.full_size_threshold);
- }
- };
- WRITE_CLASS_ENCODER(rados::cls::fifo::fifo_data_params_t)
-
- struct fifo_journal_entry_t {
- enum Op {
- OP_UNKNOWN = 0,
- OP_CREATE = 1,
- OP_SET_HEAD = 2,
- OP_REMOVE = 3,
- } op{OP_UNKNOWN};
-
- int64_t part_num{0};
- string part_tag;
-
- void encode(bufferlist &bl) const {
- ENCODE_START(1, 1, bl);
- encode((int)op, bl);
- encode(part_num, bl);
- encode(part_tag, bl);
- ENCODE_FINISH(bl);
- }
- void decode(bufferlist::const_iterator &bl) {
- DECODE_START(1, bl);
- int i;
- decode(i, bl);
- op = (Op)i;
- decode(part_num, bl);
- decode(part_tag, bl);
- DECODE_FINISH(bl);
- }
- void dump(Formatter *f) const;
-
- bool operator==(const fifo_journal_entry_t& e) {
- return (op == e.op &&
- part_num == e.part_num &&
- part_tag == e.part_tag);
- }
- };
- WRITE_CLASS_ENCODER(rados::cls::fifo::fifo_journal_entry_t)
-
- struct fifo_info_t {
- string id;
- fifo_objv_t objv;
- string oid_prefix;
- fifo_data_params_t data_params;
-
- int64_t tail_part_num{0};
- int64_t head_part_num{-1};
- int64_t min_push_part_num{0};
- int64_t max_push_part_num{-1};
-
- string head_tag;
- map<int64_t, string> tags;
-
- std::multimap<int64_t, fifo_journal_entry_t> journal;
-
- bool need_new_head() {
- return (head_part_num < min_push_part_num);
- }
-
- bool need_new_part() {
- return (max_push_part_num < min_push_part_num);
- }
-
- string part_oid(int64_t part_num);
- void prepare_next_journal_entry(fifo_journal_entry_t *entry, const string& tag);
-
- int apply_update(std::optional<uint64_t>& _tail_part_num,
- std::optional<uint64_t>& _head_part_num,
- std::optional<uint64_t>& _min_push_part_num,
- std::optional<uint64_t>& _max_push_part_num,
- std::vector<rados::cls::fifo::fifo_journal_entry_t>& journal_entries_add,
- std::vector<rados::cls::fifo::fifo_journal_entry_t>& journal_entries_rm,
- string *err);
-
- void encode(bufferlist &bl) const {
- ENCODE_START(1, 1, bl);
- encode(id, bl);
- encode(objv, bl);
- encode(oid_prefix, bl);
- encode(data_params, bl);
- encode(tail_part_num, bl);
- encode(head_part_num, bl);
- encode(min_push_part_num, bl);
- encode(max_push_part_num, bl);
- encode(tags, bl);
- encode(head_tag, bl);
- encode(journal, bl);
- ENCODE_FINISH(bl);
- }
- void decode(bufferlist::const_iterator &bl) {
- DECODE_START(1, bl);
- decode(id, bl);
- decode(objv, bl);
- decode(oid_prefix, bl);
- decode(data_params, bl);
- decode(tail_part_num, bl);
- decode(head_part_num, bl);
- decode(min_push_part_num, bl);
- decode(max_push_part_num, bl);
- decode(tags, bl);
- decode(head_tag, bl);
- decode(journal, bl);
- DECODE_FINISH(bl);
- }
- void dump(Formatter *f) const;
- void decode_json(JSONObj *obj);
- };
- WRITE_CLASS_ENCODER(rados::cls::fifo::fifo_info_t)
-
- struct cls_fifo_part_list_entry_t {
- bufferlist data;
- uint64_t ofs;
- ceph::real_time mtime;
-
- cls_fifo_part_list_entry_t() {}
- cls_fifo_part_list_entry_t(bufferlist&& _data,
- uint64_t _ofs,
- ceph::real_time _mtime) : data(std::move(_data)), ofs(_ofs), mtime(_mtime) {}
-
-
- void encode(bufferlist &bl) const {
- ENCODE_START(1, 1, bl);
- encode(data, bl);
- encode(ofs, bl);
- encode(mtime, bl);
- ENCODE_FINISH(bl);
- }
- void decode(bufferlist::const_iterator &bl) {
- DECODE_START(1, bl);
- decode(data, bl);
- decode(ofs, bl);
- decode(mtime, bl);
- DECODE_FINISH(bl);
- }
- };
- WRITE_CLASS_ENCODER(rados::cls::fifo::cls_fifo_part_list_entry_t)
-
- struct fifo_part_header_t {
- string tag;
-
- fifo_data_params_t params;
-
- uint64_t magic{0};
-
- uint64_t min_ofs{0};
- uint64_t max_ofs{0};
- uint64_t min_index{0};
- uint64_t max_index{0};
-
- void encode(bufferlist &bl) const {
- ENCODE_START(1, 1, bl);
- encode(tag, bl);
- encode(params, bl);
- encode(magic, bl);
- encode(min_ofs, bl);
- encode(max_ofs, bl);
- encode(min_index, bl);
- encode(max_index, bl);
- ENCODE_FINISH(bl);
- }
- void decode(bufferlist::const_iterator &bl) {
- DECODE_START(1, bl);
- decode(tag, bl);
- decode(params, bl);
- decode(magic, bl);
- decode(min_ofs, bl);
- decode(max_ofs, bl);
- decode(min_index, bl);
- decode(max_index, bl);
- DECODE_FINISH(bl);
- }
- };
- WRITE_CLASS_ENCODER(fifo_part_header_t)
+namespace rados::cls::fifo {
+struct objv {
+ std::string instance;
+ std::uint64_t ver{0};
+
+ void encode(ceph::buffer::list& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(instance, bl);
+ encode(ver, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(ceph::buffer::list::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(instance, bl);
+ decode(ver, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(ceph::Formatter* f) const;
+ void decode_json(JSONObj* obj);
+
+ bool operator ==(const objv& rhs) const {
+ return (instance == rhs.instance &&
+ ver == rhs.ver);
+ }
+ bool operator !=(const objv& rhs) const {
+ return (instance != rhs.instance ||
+ ver != rhs.ver);
+ }
+ bool same_or_later(const objv& rhs) const {
+ return (instance == rhs.instance ||
+ ver >= rhs.ver);
+ }
+
+ bool empty() const {
+ return instance.empty();
+ }
+
+ std::string to_str() const {
+ return fmt::format("{}{{{}}}", instance, ver);
+ }
+};
+WRITE_CLASS_ENCODER(objv)
+inline ostream& operator <<(std::ostream& os, const objv& objv)
+{
+ return os << objv.to_str();
+}
+
+struct data_params {
+ std::uint64_t max_part_size{0};
+ std::uint64_t max_entry_size{0};
+ std::uint64_t full_size_threshold{0};
+
+ void encode(ceph::buffer::list& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(max_part_size, bl);
+ encode(max_entry_size, bl);
+ encode(full_size_threshold, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(ceph::buffer::list::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(max_part_size, bl);
+ decode(max_entry_size, bl);
+ decode(full_size_threshold, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(ceph::Formatter* f) const;
+ void decode_json(JSONObj* obj);
+
+ bool operator ==(const data_params& rhs) const {
+ return (max_part_size == rhs.max_part_size &&
+ max_entry_size == rhs.max_entry_size &&
+ full_size_threshold == rhs.full_size_threshold);
+ }
+};
+WRITE_CLASS_ENCODER(data_params)
+inline std::ostream& operator <<(std::ostream& m, const data_params& d) {
+ return m << "max_part_size: " << d.max_part_size << ", "
+ << "max_entry_size: " << d.max_entry_size << ", "
+ << "full_size_threshold: " << d.full_size_threshold;
+}
+
+struct journal_entry {
+ enum class Op {
+ unknown = 0,
+ create = 1,
+ set_head = 2,
+ remove = 3,
+ } op{Op::unknown};
+
+ std::int64_t part_num{0};
+ std::string part_tag;
+
+ void encode(ceph::buffer::list& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode((int)op, bl);
+ encode(part_num, bl);
+ encode(part_tag, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(ceph::buffer::list::const_iterator& bl) {
+ DECODE_START(1, bl);
+ int i;
+ decode(i, bl);
+ op = static_cast<Op>(i);
+ decode(part_num, bl);
+ decode(part_tag, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(ceph::Formatter* f) const;
+
+ bool operator ==(const journal_entry& e) {
+ return (op == e.op &&
+ part_num == e.part_num &&
+ part_tag == e.part_tag);
+ }
+};
+WRITE_CLASS_ENCODER(journal_entry)
+inline std::ostream& operator <<(std::ostream& m, const journal_entry::Op& o) {
+ switch (o) {
+ case journal_entry::Op::unknown:
+ return m << "Op::unknown";
+ case journal_entry::Op::create:
+ return m << "Op::create";
+ case journal_entry::Op::set_head:
+ return m << "Op::set_head";
+ case journal_entry::Op::remove:
+ return m << "Op::remove";
+ }
+ return m << "Bad value: " << static_cast<int>(o);
+}
+inline std::ostream& operator <<(std::ostream& m, const journal_entry& j) {
+ return m << "op: " << j.op << ", "
+ << "part_num: " << j.part_num << ", "
+ << "part_tag: " << j.part_tag;
+}
+
+// This is actually a useful builder, since otherwise we end up with
+// four uint64_ts in a row and only care about a subset at a time.
+class update {
+ std::optional<std::uint64_t> tail_part_num_;
+ std::optional<std::uint64_t> head_part_num_;
+ std::optional<std::uint64_t> min_push_part_num_;
+ std::optional<std::uint64_t> max_push_part_num_;
+ std::vector<fifo::journal_entry> journal_entries_add_;
+ std::vector<fifo::journal_entry> journal_entries_rm_;
+
+public:
+
+ update&& tail_part_num(std::optional<std::uint64_t> num) noexcept {
+ tail_part_num_ = num;
+ return std::move(*this);
+ }
+ auto tail_part_num() const noexcept {
+ return tail_part_num_;
+ }
+
+ update&& head_part_num(std::optional<std::uint64_t> num) noexcept {
+ head_part_num_ = num;
+ return std::move(*this);
+ }
+ auto head_part_num() const noexcept {
+ return head_part_num_;
+ }
+
+ update&& min_push_part_num(std::optional<std::uint64_t> num)
+ noexcept {
+ min_push_part_num_ = num;
+ return std::move(*this);
+ }
+ auto min_push_part_num() const noexcept {
+ return min_push_part_num_;
+ }
+
+ update&& max_push_part_num(std::optional<std::uint64_t> num) noexcept {
+ max_push_part_num_ = num;
+ return std::move(*this);
+ }
+ auto max_push_part_num() const noexcept {
+ return max_push_part_num_;
+ }
+
+ update&& journal_entry_add(fifo::journal_entry entry) {
+ journal_entries_add_.push_back(std::move(entry));
+ return std::move(*this);
+ }
+ update&& journal_entries_add(
+ std::optional<std::vector<fifo::journal_entry>>&& entries) {
+ if (entries) {
+ journal_entries_add_ = std::move(*entries);
+ } else {
+ journal_entries_add_.clear();
+ }
+ return std::move(*this);
+ }
+ const auto& journal_entries_add() const & noexcept {
+ return journal_entries_add_;
+ }
+ auto&& journal_entries_add() && noexcept {
+ return std::move(journal_entries_add_);
+ }
+
+ update&& journal_entry_rm(fifo::journal_entry entry) {
+ journal_entries_rm_.push_back(std::move(entry));
+ return std::move(*this);
+ }
+ update&& journal_entries_rm(
+ std::optional<std::vector<fifo::journal_entry>>&& entries) {
+ if (entries) {
+ journal_entries_rm_ = std::move(*entries);
+ } else {
+ journal_entries_rm_.clear();
+ }
+ return std::move(*this);
+ }
+ const auto& journal_entries_rm() const & noexcept {
+ return journal_entries_rm_;
+ }
+ auto&& journal_entries_rm() && noexcept {
+ return std::move(journal_entries_rm_);
+ }
+ friend std::ostream& operator <<(std::ostream& m, const update& u);
+};
+inline std::ostream& operator <<(std::ostream& m, const update& u) {
+ bool prev = false;
+ if (u.tail_part_num_) {
+ m << "tail_part_num: " << *u.tail_part_num_;
+ prev = true;
+ }
+ if (u.head_part_num_) {
+ if (prev)
+ m << ", ";
+ m << "head_part_num: " << *u.head_part_num_;
+ prev = true;
+ }
+ if (u.min_push_part_num_) {
+ if (prev)
+ m << ", ";
+ m << "min_push_part_num: " << *u.min_push_part_num_;
+ prev = true;
+ }
+ if (u.max_push_part_num_) {
+ if (prev)
+ m << ", ";
+ m << "max_push_part_num: " << *u.max_push_part_num_;
+ prev = true;
+ }
+ if (!u.journal_entries_add_.empty()) {
+ if (prev)
+ m << ", ";
+ m << "journal_entries_add: {" << u.journal_entries_add_ << "}";
+ prev = true;
+ }
+ if (!u.journal_entries_rm_.empty()) {
+ if (prev)
+ m << ", ";
+ m << "journal_entries_rm: {" << u.journal_entries_rm_ << "}";
+ prev = true;
+ }
+ if (!prev)
+ m << "(none)";
+ return m;
+}
+struct info {
+ std::string id;
+ objv version;
+ std::string oid_prefix;
+ data_params params;
+
+ std::int64_t tail_part_num{0};
+ std::int64_t head_part_num{-1};
+ std::int64_t min_push_part_num{0};
+ std::int64_t max_push_part_num{-1};
+
+ std::string head_tag;
+ std::map<int64_t, string> tags;
+
+ std::multimap<int64_t, journal_entry> journal;
+
+ bool need_new_head() const {
+ return (head_part_num < min_push_part_num);
+ }
+
+ bool need_new_part() const {
+ return (max_push_part_num < min_push_part_num);
+ }
+
+ void encode(ceph::buffer::list& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(id, bl);
+ encode(version, bl);
+ encode(oid_prefix, bl);
+ encode(params, bl);
+ encode(tail_part_num, bl);
+ encode(head_part_num, bl);
+ encode(min_push_part_num, bl);
+ encode(max_push_part_num, bl);
+ encode(tags, bl);
+ encode(head_tag, bl);
+ encode(journal, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(ceph::buffer::list::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(id, bl);
+ decode(version, bl);
+ decode(oid_prefix, bl);
+ decode(params, bl);
+ decode(tail_part_num, bl);
+ decode(head_part_num, bl);
+ decode(min_push_part_num, bl);
+ decode(max_push_part_num, bl);
+ decode(tags, bl);
+ decode(head_tag, bl);
+ decode(journal, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(ceph::Formatter* f) const;
+ void decode_json(JSONObj* obj);
+
+ std::string part_oid(std::int64_t part_num) const {
+ return fmt::format("{}.{}", oid_prefix, part_num);
+ }
+
+ journal_entry next_journal_entry(std::string tag) const {
+ journal_entry entry;
+ entry.op = journal_entry::Op::create;
+ entry.part_num = max_push_part_num + 1;
+ entry.part_tag = std::move(tag);
+ return entry;
+ }
+
+ std::optional<std::string>
+ apply_update(const update& update) {
+ if (update.tail_part_num()) {
+ tail_part_num = *update.tail_part_num();
+ }
+
+ if (update.min_push_part_num()) {
+ min_push_part_num = *update.min_push_part_num();
+ }
+
+ if (update.max_push_part_num()) {
+ max_push_part_num = *update.max_push_part_num();
+ }
+
+ for (const auto& entry : update.journal_entries_add()) {
+ auto iter = journal.find(entry.part_num);
+ if (iter != journal.end() &&
+ iter->second.op == entry.op) {
+ /* don't allow multiple concurrent (same) operations on the same part,
+ racing clients should use objv to avoid races anyway */
+ return fmt::format("multiple concurrent operations on same part are not "
+ "allowed, part num={}", entry.part_num);
+ }
+
+ if (entry.op == journal_entry::Op::create) {
+ tags[entry.part_num] = entry.part_tag;
+ }
+
+ journal.emplace(entry.part_num, entry);
}
+
+ for (const auto& entry : update.journal_entries_rm()) {
+ journal.erase(entry.part_num);
+ }
+
+ if (update.head_part_num()) {
+ tags.erase(head_part_num);
+ head_part_num = *update.head_part_num();
+ auto iter = tags.find(head_part_num);
+ if (iter != tags.end()) {
+ head_tag = iter->second;
+ } else {
+ head_tag.erase();
+ }
+ }
+
+ return std::nullopt;
}
+};
+WRITE_CLASS_ENCODER(info)
+inline std::ostream& operator <<(std::ostream& m, const info& i) {
+ return m << "id: " << i.id << ", "
+ << "version: " << i.version << ", "
+ << "oid_prefix: " << i.oid_prefix << ", "
+ << "params: {" << i.params << "}, "
+ << "tail_part_num: " << i.tail_part_num << ", "
+ << "head_part_num: " << i.head_part_num << ", "
+ << "min_push_part_num: " << i.min_push_part_num << ", "
+ << "max_push_part_num: " << i.max_push_part_num << ", "
+ << "head_tag: " << i.head_tag << ", "
+ << "tags: {" << i.tags << "}, "
+ << "journal: {" << i.journal;
}
-static inline ostream& operator<<(ostream& os, const rados::cls::fifo::fifo_objv_t& objv)
-{
- return os << objv.instance << "{" << objv.ver << "}";
+struct part_list_entry {
+ ceph::buffer::list data;
+ std::uint64_t ofs = 0;
+ ceph::real_time mtime;
+
+ part_list_entry() {}
+ part_list_entry(ceph::buffer::list&& data,
+ uint64_t ofs,
+ ceph::real_time mtime)
+ : data(std::move(data)), ofs(ofs), mtime(mtime) {}
+
+
+ void encode(ceph::buffer::list& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(data, bl);
+ encode(ofs, bl);
+ encode(mtime, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(ceph::buffer::list::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(data, bl);
+ decode(ofs, bl);
+ decode(mtime, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(part_list_entry)
+inline std::ostream& operator <<(std::ostream& m,
+ const part_list_entry& p) {
+ using ceph::operator <<;
+ return m << "data: " << p.data << ", "
+ << "ofs: " << p.ofs << ", "
+ << "mtime: " << p.mtime;
}
+struct part_header {
+ std::string tag;
+
+ data_params params;
+
+ std::uint64_t magic{0};
+
+ std::uint64_t min_ofs{0};
+ std::uint64_t last_ofs{0};
+ std::uint64_t next_ofs{0};
+ std::uint64_t min_index{0};
+ std::uint64_t max_index{0};
+ ceph::real_time max_time;
+
+ void encode(ceph::buffer::list& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(tag, bl);
+ encode(params, bl);
+ encode(magic, bl);
+ encode(min_ofs, bl);
+ encode(last_ofs, bl);
+ encode(next_ofs, bl);
+ encode(min_index, bl);
+ encode(max_index, bl);
+ encode(max_time, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(ceph::buffer::list::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(tag, bl);
+ decode(params, bl);
+ decode(magic, bl);
+ decode(min_ofs, bl);
+ decode(last_ofs, bl);
+ decode(next_ofs, bl);
+ decode(min_index, bl);
+ decode(max_index, bl);
+ decode(max_time, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(part_header)
+inline std::ostream& operator <<(std::ostream& m, const part_header& p) {
+ using ceph::operator <<;
+ return m << "tag: " << p.tag << ", "
+ << "params: {" << p.params << "}, "
+ << "magic: " << p.magic << ", "
+ << "min_ofs: " << p.min_ofs << ", "
+ << "last_ofs: " << p.last_ofs << ", "
+ << "next_ofs: " << p.next_ofs << ", "
+ << "min_index: " << p.min_index << ", "
+ << "max_index: " << p.max_index << ", "
+ << "max_time: " << p.max_time;
+}
+} // namespace rados::cls::fifo
.set_description(""),
Option("osd_class_load_list", Option::TYPE_STR, Option::LEVEL_ADVANCED)
- .set_default("cephfs hello journal lock log numops " "otp rbd refcount rgw rgw_gc timeindex user version cas cmpomap queue 2pc_queue")
+ .set_default("cephfs hello journal lock log numops " "otp rbd refcount rgw rgw_gc timeindex user version cas cmpomap queue 2pc_queue fifo")
.set_description(""),
Option("osd_class_default_list", Option::TYPE_STR, Option::LEVEL_ADVANCED)
- .set_default("cephfs hello journal lock log numops " "otp rbd refcount rgw rgw_gc timeindex user version cas cmpomap queue 2pc_queue")
+ .set_default("cephfs hello journal lock log numops " "otp rbd refcount rgw rgw_gc timeindex user version cas cmpomap queue 2pc_queue fifo")
.set_description(""),
Option("osd_check_for_log_corruption", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
# ${BLKID_LIBRARIES} ${CRYPTO_LIBS} ${EXTRALIBS})
# target_link_libraries(libneorados ${rados_libs})
# install(TARGETS libneorados DESTINATION ${CMAKE_INSTALL_LIBDIR})
+add_library(neorados_cls_fifo STATIC cls/fifo.cc)
+target_link_libraries(neorados_cls_fifo PRIVATE
+ libneorados ceph-common fmt::fmt)
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2020 Red Hat <contact@redhat.com>
+ * Author: Adam C. Emerson
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <cstdint>
+#include <numeric>
+#include <optional>
+#include <string_view>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
+#include <boost/system/error_code.hpp>
+
+#include "include/neorados/RADOS.hpp"
+
+#include "include/buffer.h"
+
+#include "common/random_string.h"
+
+#include "cls/fifo/cls_fifo_types.h"
+#include "cls/fifo/cls_fifo_ops.h"
+
+#include "fifo.h"
+
+namespace neorados::cls::fifo {
+namespace bs = boost::system;
+namespace cb = ceph::buffer;
+namespace fifo = rados::cls::fifo;
+
+void create_meta(WriteOp& op, std::string_view id,
+ std::optional<fifo::objv> objv,
+ std::optional<std::string_view> oid_prefix,
+ bool exclusive,
+ std::uint64_t max_part_size,
+ std::uint64_t max_entry_size)
+{
+ fifo::op::create_meta cm;
+
+ cm.id = id;
+ cm.version = objv;
+ cm.oid_prefix = oid_prefix;
+ cm.max_part_size = max_part_size;
+ cm.max_entry_size = max_entry_size;
+ cm.exclusive = exclusive;
+
+ cb::list in;
+ encode(cm, in);
+ op.exec(fifo::op::CLASS, fifo::op::CREATE_META, in);
+}
+
+void get_meta(ReadOp& op, std::optional<fifo::objv> objv,
+ bs::error_code* ec_out, fifo::info* info,
+ std::uint32_t* part_header_size,
+ std::uint32_t* part_entry_overhead)
+{
+ fifo::op::get_meta gm;
+ gm.version = objv;
+ cb::list in;
+ encode(gm, in);
+ op.exec(fifo::op::CLASS, fifo::op::GET_META, in,
+ [ec_out, info, part_header_size,
+ part_entry_overhead](bs::error_code ec, const cb::list& bl) {
+ fifo::op::get_meta_reply reply;
+ if (!ec) try {
+ auto iter = bl.cbegin();
+ decode(reply, iter);
+ } catch (const cb::error& err) {
+ ec = err.code();
+ }
+ if (ec_out) *ec_out = ec;
+ if (info) *info = std::move(reply.info);
+ if (part_header_size) *part_header_size = reply.part_header_size;
+ if (part_entry_overhead)
+ *part_entry_overhead = reply.part_entry_overhead;
+ });
+};
+
+void update_meta(WriteOp& op, const fifo::objv& objv,
+ const fifo::update& update)
+{
+ fifo::op::update_meta um;
+
+ um.version = objv;
+ um.tail_part_num = update.tail_part_num();
+ um.head_part_num = update.head_part_num();
+ um.min_push_part_num = update.min_push_part_num();
+ um.max_push_part_num = update.max_push_part_num();
+ um.journal_entries_add = std::move(update).journal_entries_add();
+ um.journal_entries_rm = std::move(update).journal_entries_rm();
+
+ cb::list in;
+ encode(um, in);
+ op.exec(fifo::op::CLASS, fifo::op::UPDATE_META, in);
+}
+
+void part_init(WriteOp& op, std::string_view tag,
+ fifo::data_params params)
+{
+ fifo::op::init_part ip;
+
+ ip.tag = tag;
+ ip.params = params;
+
+ cb::list in;
+ encode(ip, in);
+ op.exec(fifo::op::CLASS, fifo::op::INIT_PART, in);
+}
+
+void push_part(WriteOp& op, std::string_view tag,
+ std::deque<cb::list> data_bufs,
+ fu2::unique_function<void(bs::error_code, int)> f)
+{
+ fifo::op::push_part pp;
+
+ pp.tag = tag;
+ pp.data_bufs = data_bufs;
+ pp.total_len = 0;
+
+ for (const auto& bl : data_bufs)
+ pp.total_len += bl.length();
+
+ cb::list in;
+ encode(pp, in);
+ op.exec(fifo::op::CLASS, fifo::op::PUSH_PART, in,
+ [f = std::move(f)](bs::error_code ec, int r, const cb::list&) mutable {
+ std::move(f)(ec, r);
+ });
+ op.returnvec();
+}
+
+void trim_part(WriteOp& op,
+ std::optional<std::string_view> tag,
+ std::uint64_t ofs)
+{
+ fifo::op::trim_part tp;
+
+ tp.tag = tag;
+ tp.ofs = ofs;
+
+ bufferlist in;
+ encode(tp, in);
+ op.exec(fifo::op::CLASS, fifo::op::TRIM_PART, in);
+}
+
+void list_part(ReadOp& op,
+ std::optional<string_view> tag,
+ std::uint64_t ofs,
+ std::uint64_t max_entries,
+ bs::error_code* ec_out,
+ std::vector<fifo::part_list_entry>* entries,
+ bool* more,
+ bool* full_part,
+ std::string* ptag)
+{
+ fifo::op::list_part lp;
+
+ lp.tag = tag;
+ lp.ofs = ofs;
+ lp.max_entries = max_entries;
+
+ bufferlist in;
+ encode(lp, in);
+ op.exec(fifo::op::CLASS, fifo::op::LIST_PART, in,
+ [entries, more, full_part, ptag, ec_out](bs::error_code ec,
+ const cb::list& bl) {
+ if (ec) {
+ if (ec_out) *ec_out = ec;
+ return;
+ }
+
+ fifo::op::list_part_reply reply;
+ auto iter = bl.cbegin();
+ try {
+ decode(reply, iter);
+ } catch (const cb::error& err) {
+ if (ec_out) *ec_out = ec;
+ return;
+ }
+
+ if (entries) *entries = std::move(reply.entries);
+ if (more) *more = reply.more;
+ if (full_part) *full_part = reply.full_part;
+ if (ptag) *ptag = reply.tag;
+ });
+}
+
+void get_part_info(ReadOp& op,
+ bs::error_code* out_ec,
+ fifo::part_header* header)
+{
+ fifo::op::get_part_info gpi;
+
+ bufferlist in;
+ encode(gpi, in);
+ op.exec(fifo::op::CLASS, fifo::op::GET_PART_INFO, in,
+ [out_ec, header](bs::error_code ec, const cb::list& bl) {
+ if (ec) {
+ if (out_ec) *out_ec = ec;
+ }
+ fifo::op::get_part_info_reply reply;
+ auto iter = bl.cbegin();
+ try {
+ decode(reply, iter);
+ } catch (const cb::error& err) {
+ if (out_ec) *out_ec = ec;
+ return;
+ }
+
+ if (header) *header = std::move(reply.header);
+ });
+}
+
+std::optional<marker> FIFO::to_marker(std::string_view s) {
+ marker m;
+ if (s.empty()) {
+ m.num = info.tail_part_num;
+ m.ofs = 0;
+ return m;
+ }
+
+ auto pos = s.find(':');
+ if (pos == string::npos) {
+ return std::nullopt;
+ }
+
+ auto num = s.substr(0, pos);
+ auto ofs = s.substr(pos + 1);
+
+ auto n = ceph::parse<decltype(m.num)>(num);
+ if (!n) {
+ return std::nullopt;
+ }
+ m.num = *n;
+ auto o = ceph::parse<decltype(m.ofs)>(ofs);
+ if (!o) {
+ return std::nullopt;
+ }
+ m.ofs = *o;
+ return m;
+}
+
+bs::error_code FIFO::apply_update(fifo::info* info,
+ const fifo::objv& objv,
+ const fifo::update& update) {
+ std::unique_lock l(m);
+ auto err = info->apply_update(update);
+ if (objv != info->version) {
+ ldout(r->cct(), 0) << __func__ << "(): Raced locally!" << dendl;
+ return errc::raced;
+ }
+ if (err) {
+ ldout(r->cct(), 0) << __func__ << "(): ERROR: " << err << dendl;
+ return errc::update_failed;
+ }
+
+ ++info->version.ver;
+
+ return {};
+}
+
+std::string FIFO::generate_tag() const
+{
+ static constexpr auto HEADER_TAG_SIZE = 16;
+ return gen_rand_alphanumeric_plain(r->cct(), HEADER_TAG_SIZE);
+}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+class error_category : public ceph::converting_category {
+public:
+ error_category(){}
+ const char* name() const noexcept override;
+ const char* message(int ev, char*, std::size_t) const noexcept override;
+ std::string message(int ev) const override;
+ bs::error_condition default_error_condition(int ev) const noexcept
+ override;
+ bool equivalent(int ev, const bs::error_condition& c) const
+ noexcept override;
+ using ceph::converting_category::equivalent;
+ int from_code(int ev) const noexcept override;
+};
+#pragma GCC diagnostic pop
+#pragma clang diagnostic pop
+
+const char* error_category::name() const noexcept {
+ return "FIFO";
+}
+
+const char* error_category::message(int ev, char*, std::size_t) const noexcept {
+ if (ev == 0)
+ return "No error";
+
+ switch (static_cast<errc>(ev)) {
+ case errc::raced:
+ return "Retry-race count exceeded";
+
+ case errc::inconsistency:
+ return "Inconsistent result! New head before old head";
+
+ case errc::entry_too_large:
+ return "Pushed entry too large";
+
+ case errc::invalid_marker:
+ return "Invalid marker string";
+
+ case errc::update_failed:
+ return "Update failed";
+ }
+
+ return "Unknown error";
+}
+
+std::string error_category::message(int ev) const {
+ return message(ev, nullptr, 0);
+}
+
+bs::error_condition
+error_category::default_error_condition(int ev) const noexcept {
+ switch (static_cast<errc>(ev)) {
+ case errc::raced:
+ return bs::errc::operation_canceled;
+
+ case errc::inconsistency:
+ return bs::errc::io_error;
+
+ case errc::entry_too_large:
+ return bs::errc::value_too_large;
+
+ case errc::invalid_marker:
+ return bs::errc::invalid_argument;
+
+ case errc::update_failed:
+ return bs::errc::invalid_argument;
+ }
+
+ return { ev, *this };
+}
+
+bool error_category::equivalent(int ev, const bs::error_condition& c) const noexcept {
+ return default_error_condition(ev) == c;
+}
+
+int error_category::from_code(int ev) const noexcept {
+ switch (static_cast<errc>(ev)) {
+ case errc::raced:
+ return -ECANCELED;
+
+ case errc::inconsistency:
+ return -EIO;
+
+ case errc::entry_too_large:
+ return -E2BIG;
+
+ case errc::invalid_marker:
+ return -EINVAL;
+
+ case errc::update_failed:
+ return -EINVAL;
+
+ }
+ return -EDOM;
+}
+
+const bs::error_category& error_category() noexcept {
+ static const class error_category c;
+ return c;
+}
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2020 Red Hat <contact@redhat.com>
+ * Author: Adam C. Emerson
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_NEORADOS_CLS_FIFIO_H
+#define CEPH_NEORADOS_CLS_FIFIO_H
+
+#include <cstdint>
+#include <deque>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <string_view>
+#include <vector>
+
+#include <boost/asio.hpp>
+#include <boost/system/error_code.hpp>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
+#include "include/neorados/RADOS.hpp"
+#include "include/buffer.h"
+
+#include "common/allocate_unique.h"
+#include "common/async/bind_handler.h"
+#include "common/async/bind_like.h"
+#include "common/async/completion.h"
+#include "common/async/forward_handler.h"
+
+#include "common/dout.h"
+
+#include "cls/fifo/cls_fifo_types.h"
+#include "cls/fifo/cls_fifo_ops.h"
+
+namespace neorados::cls::fifo {
+namespace ba = boost::asio;
+namespace bs = boost::system;
+namespace ca = ceph::async;
+namespace cb = ceph::buffer;
+namespace fifo = rados::cls::fifo;
+
+inline constexpr auto dout_subsys = ceph_subsys_rados;
+inline constexpr std::uint64_t default_max_part_size = 4 * 1024 * 1024;
+inline constexpr std::uint64_t default_max_entry_size = 32 * 1024;
+inline constexpr auto MAX_RACE_RETRIES = 10;
+
+
+const boost::system::error_category& error_category() noexcept;
+
+enum class errc {
+ raced = 1,
+ inconsistency,
+ entry_too_large,
+ invalid_marker,
+ update_failed
+};
+}
+
+namespace boost::system {
+template<>
+struct is_error_code_enum<::neorados::cls::fifo::errc> {
+ static const bool value = true;
+};
+template<>
+struct is_error_condition_enum<::neorados::cls::fifo::errc> {
+ static const bool value = false;
+};
+}
+
+namespace neorados::cls::fifo {
+// explicit conversion:
+inline bs::error_code make_error_code(errc e) noexcept {
+ return { static_cast<int>(e), error_category() };
+}
+
+inline bs::error_code make_error_category(errc e) noexcept {
+ return { static_cast<int>(e), error_category() };
+}
+
+void create_meta(WriteOp& op, std::string_view id,
+ std::optional<fifo::objv> objv,
+ std::optional<std::string_view> oid_prefix,
+ bool exclusive = false,
+ std::uint64_t max_part_size = default_max_part_size,
+ std::uint64_t max_entry_size = default_max_entry_size);
+void get_meta(ReadOp& op, std::optional<fifo::objv> objv,
+ bs::error_code* ec_out, fifo::info* info,
+ std::uint32_t* part_header_size,
+ std::uint32_t* part_entry_overhead);
+
+void update_meta(WriteOp& op, const fifo::objv& objv,
+ const fifo::update& desc);
+
+void part_init(WriteOp& op, std::string_view tag,
+ fifo::data_params params);
+
+void push_part(WriteOp& op, std::string_view tag,
+ std::deque<cb::list> data_bufs,
+ fu2::unique_function<void(bs::error_code, int)>);
+void trim_part(WriteOp& op, std::optional<std::string_view> tag,
+ std::uint64_t ofs);
+void list_part(ReadOp& op,
+ std::optional<std::string_view> tag,
+ std::uint64_t ofs,
+ std::uint64_t max_entries,
+ bs::error_code* ec_out,
+ std::vector<fifo::part_list_entry>* entries,
+ bool* more,
+ bool* full_part,
+ std::string* ptag);
+void get_part_info(ReadOp& op,
+ bs::error_code* out_ec,
+ fifo::part_header* header);
+
+struct marker {
+ std::int64_t num = 0;
+ std::uint64_t ofs = 0;
+
+ marker() = default;
+ marker(std::int64_t num, std::uint64_t ofs) : num(num), ofs(ofs) {}
+ static marker max() {
+ return { std::numeric_limits<decltype(num)>::max(),
+ std::numeric_limits<decltype(ofs)>::max() };
+ }
+
+ std::string to_string() {
+ return fmt::format("{:0>20}:{:0>20}", num, ofs);
+ }
+};
+
+struct list_entry {
+ cb::list data;
+ std::string marker;
+ ceph::real_time mtime;
+};
+
+using part_info = fifo::part_header;
+
+namespace detail {
+template<typename Handler>
+class JournalProcessor;
+}
+
+/// Completions, Handlers, and CompletionTokens
+/// ===========================================
+///
+/// This class is based on Boost.Asio. For information, see
+/// https://www.boost.org/doc/libs/1_74_0/doc/html/boost_asio.html
+///
+/// As summary, Asio's design is that of functions taking completion
+/// handlers. Every handler has a signature, like
+/// (boost::system::error_code, std::string). The completion handler
+/// receives the result of the function, and the signature is the type
+/// of that result.
+///
+/// The completion handler is specified with a CompletionToken. The
+/// CompletionToken is any type that has a specialization of
+/// async_complete and async_result. See
+/// https://www.boost.org/doc/libs/1_74_0/doc/html/boost_asio/reference/async_completion.html
+/// and https://www.boost.org/doc/libs/1_74_0/doc/html/boost_asio/reference/async_result.html
+///
+/// The return type of a function taking a CompletionToken is
+/// async_result<CompletionToken, Signature>::return_type.
+///
+/// Functions
+/// ---------
+///
+/// The default implementations treat whatever value is described as a
+/// function, whose parameters correspond to the signature, and calls
+/// it upon completion.
+///
+/// EXAMPLE:
+/// Let f be an asynchronous function whose signature is (bs::error_code, int)
+/// Let g be an asynchronous function whose signature is
+/// (bs::error_code, int, std::string).
+///
+///
+/// f([](bs::error_code ec, int i) { ... });
+/// g([](bs::error_code ec, int i, std::string s) { ... });
+///
+/// Will schedule asynchronous tasks, and the provided lambdas will be
+/// called on completion. In this case, f and g return void.
+///
+/// There are other specializations. Commonly used ones are.
+///
+/// Futures
+/// -------
+///
+/// A CompletionToken of boost::asio::use_future will complete with a
+/// promise whose type matches (minus any initial error_code) the
+/// function's signature. The corresponding future is returned. If the
+/// error_code of the result is non-zero, the future is set with an
+/// exception of type boost::asio::system_error.
+///
+/// See https://www.boost.org/doc/libs/1_74_0/doc/html/boost_asio/reference/use_future_t.html
+///
+/// EXAMPLE:
+///
+/// std::future<int> = f(ba::use_future);
+/// std::future<std::tuple<int, std::string> = g(ba::use_future).
+///
+/// Coroutines
+/// ----------
+///
+/// A CompletionToken of type spawn::yield_context suspends execution
+/// of the current coroutine until completion of the operation. See
+/// src/spawn/README.md
+/// https://www.boost.org/doc/libs/1_74_0/doc/html/boost_asio/reference/spawn.html and
+/// https://www.boost.org/doc/libs/1_74_0/doc/html/boost_asio/reference/yield_context.html
+///
+/// Operations given this CompletionToken return their results, modulo
+/// any leading error_code. A non-zero error code will be thrown, by
+/// default, but may be bound to a variable instead with the overload
+/// of the array-subscript oeprator.
+///
+/// EXAMPLE:
+/// // Within a function with a yield_context parameter named y
+///
+/// try {
+/// int i = f(y);
+/// } catch (const bs::system_error& ec) { ... }
+///
+/// bs::error_code ec;
+/// auto [i, s] = g(y[ec]);
+///
+/// Blocking calls
+/// --------------
+///
+/// ceph::async::use_blocked, defined in src/common/async/blocked_completion.h
+/// Suspends the current thread of execution, returning the results of
+/// the operation on resumption. Its calling convention is analogous to
+/// that of yield_context.
+///
+/// EXAMPLE:
+/// try {
+/// int i = f(ca::use_blocked);
+/// } catch (const bs::system_error& e) { ... }
+///
+/// bs::error_code ec;
+/// auto [i, s] = g(ca::use_blocked[ec]);
+///
+/// librados Completions
+/// --------------------
+///
+/// If src/common/async/librados_completion.h is included in the
+/// current translation unit, then librados::AioCompletion* may be used
+/// as a CompletionToken. This is only permitted when the completion
+/// signature is either bs::system_error or void. The return type of
+/// functions provided a CompletionToken of AioCompletion* is void. If
+/// the signature includes an error code and the error code is set,
+/// then the error is translated to an int which is set as the result
+/// of the AioCompletion.
+///
+/// EXAMPLE:
+/// // Assume an asynchronous function h whose signature is bs::error_code.
+///
+/// AioCompletion* c = Rados::aio_create_completion();
+/// h(c);
+/// int r = c.get_return_value();
+///
+/// See also src/test/cls_fifo/bench_cls_fifo.cc for a full, simple
+/// example of a program using this class with coroutines.
+///
+///
+/// Markers
+/// =======
+///
+/// Markers represent a position within the FIFO. Internally, they are
+/// part/offset pairs. Externally, they are ordered but otherwise
+/// opaque strings. Markers that compare lower denote positions closer
+/// to the tail.
+///
+/// A marker is returned with every entry from a list() operation. They
+/// may be supplied to a list operation to resume from a given
+/// position, and must be supplied to trim give the position to which
+/// to trim.
+
+class FIFO {
+public:
+
+ FIFO(const FIFO&) = delete;
+ FIFO& operator =(const FIFO&) = delete;
+ FIFO(FIFO&&) = delete;
+ FIFO& operator =(FIFO&&) = delete;
+
+ /// Open an existing FIFO.
+ /// Signature: (bs::error_code ec, std::unique_ptr<FIFO> f)
+ template<typename CT>
+ static auto open(RADOS& r, //< RADOS handle
+ const IOContext& ioc, //< Context for pool, namespace, etc.
+ Object oid, //< OID for the 'main' object of the FIFO
+ CT&& ct, //< CompletionToken
+ /// Fail if is not this version
+ std::optional<fifo::objv> objv = std::nullopt,
+ /// Default executor. By default use the one
+ /// associated with the RADOS handle.
+ std::optional<ba::executor> executor = std::nullopt) {
+ ba::async_completion<CT, void(bs::error_code,
+ std::unique_ptr<FIFO>)> init(ct);
+ auto e = ba::get_associated_executor(init.completion_handler,
+ executor.value_or(r.get_executor()));
+ auto a = ba::get_associated_allocator(init.completion_handler);
+ _read_meta_(
+ &r, oid, ioc, objv,
+ ca::bind_ea(
+ e, a,
+ [&r, ioc, oid, executor, handler = std::move(init.completion_handler)]
+ (bs::error_code ec, fifo::info info,
+ std::uint32_t size, std::uint32_t over) mutable {
+ std::unique_ptr<FIFO> f(
+ new FIFO(r, ioc, oid, executor.value_or(r.get_executor())));
+ f->info = info;
+ f->part_header_size = size;
+ f->part_entry_overhead = over;
+ // If there are journal entries, process them, in case
+ // someone crashed mid-transaction.
+ if (!ec && !info.journal.empty()) {
+ auto e = ba::get_associated_executor(handler, f->get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ auto g = f.get();
+ g->_process_journal(
+ ca::bind_ea(
+ e, a,
+ [f = std::move(f),
+ handler = std::move(handler)](bs::error_code ec) mutable {
+ std::move(handler)(ec, std::move(f));
+ }));
+ return;
+ }
+ std::move(handler)(ec, std::move(f));
+ return;
+ }));
+ return init.result.get();
+ }
+
+ /// Open an existing or create a new FIFO.
+ /// Signature: (bs::error_code ec, std::unique_ptr<FIFO> f)
+ template<typename CT>
+ static auto create(RADOS& r, /// RADOS handle
+ const IOContext& ioc, /// Context for pool, namespace, etc.
+ Object oid, /// OID for the 'main' object of the FIFO
+ CT&& ct, /// CompletionToken
+ /// Fail if FIFO exists and is not this version
+ std::optional<fifo::objv> objv = std::nullopt,
+ /// Custom prefix for parts
+ std::optional<std::string_view> oid_prefix = std::nullopt,
+ /// Fail if FIFO already exists
+ bool exclusive = false,
+ /// Size at which a part is considered full
+ std::uint64_t max_part_size = default_max_part_size,
+ /// Maximum size of any entry
+ std::uint64_t max_entry_size = default_max_entry_size,
+ /// Default executor. By default use the one
+ /// associated with the RADOS handle.
+ std::optional<ba::executor> executor = std::nullopt) {
+ ba::async_completion<CT, void(bs::error_code,
+ std::unique_ptr<FIFO>)> init(ct);
+ WriteOp op;
+ create_meta(op, oid, objv, oid_prefix, exclusive, max_part_size,
+ max_entry_size);
+ auto e = ba::get_associated_executor(init.completion_handler,
+ executor.value_or(r.get_executor()));
+ auto a = ba::get_associated_allocator(init.completion_handler);
+ r.execute(
+ oid, ioc, std::move(op),
+ ca::bind_ea(
+ e, a,
+ [objv, &r, ioc, oid, executor, handler = std::move(init.completion_handler)]
+ (bs::error_code ec) mutable {
+ if (ec) {
+ std::move(handler)(ec, nullptr);
+ return;
+ }
+ auto e = ba::get_associated_executor(
+ handler, executor.value_or(r.get_executor()));
+ auto a = ba::get_associated_allocator(handler);
+ FIFO::_read_meta_(
+ &r, oid, ioc, objv,
+ ca::bind_ea(
+ e, a,
+ [&r, ioc, executor, oid, handler = std::move(handler)]
+ (bs::error_code ec, fifo::info info,
+ std::uint32_t size, std::uint32_t over) mutable {
+ std::unique_ptr<FIFO> f(
+ new FIFO(r, ioc, oid, executor.value_or(r.get_executor())));
+ f->info = info;
+ f->part_header_size = size;
+ f->part_entry_overhead = over;
+ if (!ec && !info.journal.empty()) {
+ auto e = ba::get_associated_executor(handler,
+ f->get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ auto g = f.get();
+ g->_process_journal(
+ ca::bind_ea(
+ e, a,
+ [f = std::move(f), handler = std::move(handler)]
+ (bs::error_code ec) mutable {
+ std::move(handler)(ec, std::move(f));
+ }));
+ return;
+ }
+ std::move(handler)(ec, std::move(f));
+ }));
+ }));
+ return init.result.get();
+ }
+
+ /// Force a re-read of FIFO metadata.
+ /// Signature: (bs::error_code ec)
+ template<typename CT>
+ auto read_meta(CT&& ct, //< CompletionToken
+ /// Fail if FIFO not at this version
+ std::optional<fifo::objv> objv = std::nullopt) {
+ std::unique_lock l(m);
+ auto version = info.version;
+ l.unlock();
+ ba::async_completion<CT, void(bs::error_code)> init(ct);
+ auto e = ba::get_associated_executor(init.completion_handler,
+ get_executor());
+ auto a = ba::get_associated_allocator(init.completion_handler);
+ _read_meta_(
+ r, oid, ioc, objv,
+ ca::bind_ea(
+ e, a,
+ [this, version, handler = std::move(init.completion_handler)]
+ (bs::error_code ec, fifo::info newinfo,
+ std::uint32_t size, std::uint32_t over) mutable {
+ std::unique_lock l(m);
+ if (version == info.version) {
+ info = newinfo;
+ part_header_size = size;
+ part_entry_overhead = over;
+ }
+ l.unlock();
+ return std::move(handler)(ec);
+ }));
+ return init.result.get();
+ }
+
+ /// Return a reference to currently known metadata
+ const fifo::info& meta() const {
+ return info;
+ }
+
+ /// Return header size and entry overhead of partitions.
+ std::pair<std::uint32_t, std::uint32_t> get_part_layout_info() {
+ return {part_header_size, part_entry_overhead};
+ }
+
+ /// Push a single entry to the FIFO.
+ /// Signature: (bs::error_code)
+ template<typename CT>
+ auto push(const cb::list& bl, //< Bufferlist holding entry to push
+ CT&& ct //< CompletionToken
+ ) {
+ return push(std::vector{ bl }, std::forward<CT>(ct));
+ }
+
+ /// Push a many entries to the FIFO.
+ /// Signature: (bs::error_code)
+ template<typename CT>
+ auto push(const std::vector<cb::list>& data_bufs, //< Entries to push
+ CT&& ct //< CompletionToken
+ ) {
+ ba::async_completion<CT, void(bs::error_code)> init(ct);
+ std::unique_lock l(m);
+ auto max_entry_size = info.params.max_entry_size;
+ auto need_new_head = info.need_new_head();
+ l.unlock();
+ auto e = ba::get_associated_executor(init.completion_handler,
+ get_executor());
+ auto a = ba::get_associated_allocator(init.completion_handler);
+ if (data_bufs.empty() ) {
+ // Can't fail if you don't try.
+ e.post(ca::bind_handler(std::move(init.completion_handler),
+ bs::error_code{}), a);
+ return init.result.get();
+ }
+
+ // Validate sizes
+ for (const auto& bl : data_bufs) {
+ if (bl.length() > max_entry_size) {
+ ldout(r->cct(), 10) << __func__ << "(): entry too large: "
+ << bl.length() << " > "
+ << info.params.max_entry_size << dendl;
+ e.post(ca::bind_handler(std::move(init.completion_handler),
+ errc::entry_too_large), a);
+ return init.result.get();
+ }
+ }
+
+ auto p = ca::bind_ea(e, a,
+ Pusher(this, {data_bufs.begin(), data_bufs.end()},
+ {}, 0, std::move(init.completion_handler)));
+
+ if (need_new_head) {
+ _prepare_new_head(std::move(p));
+ } else {
+ e.dispatch(std::move(p), a);
+ }
+ return init.result.get();
+ }
+
+ /// List the entries in a FIFO
+ /// Signature(bs::error_code ec, bs::vector<list_entry> entries, bool more)
+ ///
+ /// More is true if entries beyond the last exist.
+ /// The list entries are of the form:
+ /// data - Contents of the entry
+ /// marker - String representing the position of this entry within the FIFO.
+ /// mtime - Time (on the OSD) at which the entry was pushed.
+ template<typename CT>
+ auto list(int max_entries, //< Maximum number of entries to fetch
+ /// Optionally, a marker indicating the position after
+ /// which to begin listing. If null, begin at the tail.
+ std::optional<std::string_view> markstr,
+ CT&& ct //< CompletionToken
+ ) {
+ ba::async_completion<CT, void(bs::error_code,
+ std::vector<list_entry>, bool)> init(ct);
+ std::unique_lock l(m);
+ std::int64_t part_num = info.tail_part_num;
+ l.unlock();
+ std::uint64_t ofs = 0;
+ auto a = ba::get_associated_allocator(init.completion_handler);
+ auto e = ba::get_associated_executor(init.completion_handler);
+
+ if (markstr) {
+ auto marker = to_marker(*markstr);
+ if (!marker) {
+ ldout(r->cct(), 0) << __func__
+ << "(): failed to parse marker (" << *markstr
+ << ")" << dendl;
+ e.post(ca::bind_handler(std::move(init.completion_handler),
+ errc::invalid_marker,
+ std::vector<list_entry>{}, false), a);
+ return init.result.get();
+ }
+ part_num = marker->num;
+ ofs = marker->ofs;
+ }
+
+ using handler_type = decltype(init.completion_handler);
+ auto ls = ceph::allocate_unique<Lister<handler_type>>(
+ a, this, part_num, ofs, max_entries,
+ std::move(init.completion_handler));
+ ls.release()->list();
+ return init.result.get();
+ }
+
+ /// Trim entries from the tail to the given position
+ /// Signature: (bs::error_code)
+ template<typename CT>
+ auto trim(std::string_view markstr, //< Position to which to trim, inclusive
+ CT&& ct //< CompletionToken
+ ) {
+ auto m = to_marker(markstr);
+ ba::async_completion<CT, void(bs::error_code)> init(ct);
+ auto a = ba::get_associated_allocator(init.completion_handler);
+ auto e = ba::get_associated_executor(init.completion_handler);
+ if (!m) {
+ ldout(r->cct(), 0) << __func__ << "(): failed to parse marker: marker="
+ << markstr << dendl;
+ e.post(ca::bind_handler(std::move(init.completion_handler),
+ errc::invalid_marker), a);
+ return init.result.get();
+ } else {
+ using handler_type = decltype(init.completion_handler);
+ auto t = ceph::allocate_unique<Trimmer<handler_type>>(
+ a, this, m->num, m->ofs, std::move(init.completion_handler));
+ t.release()->trim();
+ }
+ return init.result.get();
+ }
+
+ /// Get information about a specific partition
+ /// Signature: (bs::error_code, part_info)
+ ///
+ /// part_info has the following entries
+ /// tag - A random string identifying this partition. Used internally
+ /// as a sanity check to make sure operations haven't been misdirected
+ /// params - Data parameters, identical for every partition within a
+ /// FIFO and the same as what is returned from get_part_layout()
+ /// magic - A random magic number, used internally as a prefix to
+ /// every entry stored on the OSD to ensure sync
+ /// min_ofs - Offset of the first entry
+ /// max_ofs - Offset of the highest entry
+ /// min_index - Minimum entry index
+ /// max_index - Maximum entry index
+ /// max_time - Time of the latest push
+ ///
+ /// The difference between ofs and index is that ofs is a byte
+ /// offset. Index is a count. Nothing really uses indices, but
+ /// they're tracked and sanity-checked as an invariant on the OSD.
+ ///
+ /// max_ofs and max_time are the two that have been used externally
+ /// so far.
+ template<typename CT>
+ auto get_part_info(int64_t part_num, // The number of the partition
+ CT&& ct // CompletionToken
+ ) {
+
+ ba::async_completion<CT, void(bs::error_code, part_info)> init(ct);
+ fifo::op::get_part_info gpi;
+ cb::list in;
+ encode(gpi, in);
+ ReadOp op;
+ auto e = ba::get_associated_executor(init.completion_handler,
+ get_executor());
+ auto a = ba::get_associated_allocator(init.completion_handler);
+ auto reply = ceph::allocate_unique<
+ ExecDecodeCB<fifo::op::get_part_info_reply>>(a);
+
+ op.exec(fifo::op::CLASS, fifo::op::GET_PART_INFO, in,
+ std::ref(*reply));
+ std::unique_lock l(m);
+ auto part_oid = info.part_oid(part_num);
+ l.unlock();
+ r->execute(part_oid, ioc, std::move(op), nullptr,
+ ca::bind_ea(e, a,
+ PartInfoGetter(std::move(init.completion_handler),
+ std::move(reply))));
+ return init.result.get();
+ }
+
+ using executor_type = ba::executor;
+
+ /// Return the default executor, as specified at creation.
+ ba::executor get_executor() const {
+ return executor;
+ }
+
+private:
+ template<typename Handler>
+ friend class detail::JournalProcessor;
+ RADOS* const r;
+ const IOContext ioc;
+ const Object oid;
+ std::mutex m;
+
+ fifo::info info;
+
+ std::uint32_t part_header_size = 0xdeadbeef;
+ std::uint32_t part_entry_overhead = 0xdeadbeef;
+
+ ba::executor executor;
+
+ std::optional<marker> to_marker(std::string_view s);
+
+ template<typename Handler, typename T>
+ static void assoc_delete(const Handler& handler, T* t) {
+ typename std::allocator_traits<typename ba::associated_allocator<Handler>::type>
+ ::template rebind_alloc<T> a(
+ ba::get_associated_allocator(handler));
+ a.destroy(t);
+ a.deallocate(t, 1);
+ }
+
+ FIFO(RADOS& r,
+ IOContext ioc,
+ Object oid,
+ ba::executor executor)
+ : r(&r), ioc(std::move(ioc)), oid(oid), executor(executor) {}
+
+ std::string generate_tag() const;
+
+ template <typename T>
+ struct ExecDecodeCB {
+ bs::error_code ec;
+ T result;
+ void operator()(bs::error_code e, const cb::list& r) {
+ if (e) {
+ ec = e;
+ return;
+ }
+ try {
+ auto p = r.begin();
+ using ceph::decode;
+ decode(result, p);
+ } catch (const cb::error& err) {
+ ec = err.code();
+ }
+ }
+ };
+
+ template<typename Handler>
+ class MetaReader {
+ Handler handler;
+ using allocator_type = boost::asio::associated_allocator_t<Handler>;
+ using decoder_type = ExecDecodeCB<fifo::op::get_meta_reply>;
+ using decoder_ptr = ceph::allocated_unique_ptr<decoder_type, allocator_type>;
+ decoder_ptr decoder;
+ public:
+ MetaReader(Handler&& handler, decoder_ptr&& decoder)
+ : handler(std::move(handler)), decoder(std::move(decoder)) {}
+
+ void operator ()(bs::error_code ec) {
+ if (!ec) {
+ ec = decoder->ec;
+ }
+ auto reply = std::move(decoder->result);
+ decoder.reset(); // free handler-allocated memory before dispatching
+
+ std::move(handler)(ec, std::move(reply.info),
+ std::move(reply.part_header_size),
+ std::move(reply.part_entry_overhead));
+ }
+ };
+
+ // Renamed to get around a compiler bug in Bionic that kept
+ // complaining we weren't capturing 'this' to make a static function call.
+ template<typename Handler>
+ static void _read_meta_(RADOS* r, const Object& oid, const IOContext& ioc,
+ std::optional<fifo::objv> objv,
+ Handler&& handler, /* error_code, info, uint64,
+ uint64 */
+ std::optional<ba::executor> executor = std::nullopt){
+ fifo::op::get_meta gm;
+
+ gm.version = objv;
+
+ cb::list in;
+ encode(gm, in);
+ ReadOp op;
+
+ auto a = ba::get_associated_allocator(handler);
+ auto reply =
+ ceph::allocate_unique<ExecDecodeCB<fifo::op::get_meta_reply>>(a);
+
+ auto e = ba::get_associated_executor(handler);
+ op.exec(fifo::op::CLASS, fifo::op::GET_META, in, std::ref(*reply));
+ r->execute(oid, ioc, std::move(op), nullptr,
+ ca::bind_ea(e, a, MetaReader(std::move(handler),
+ std::move(reply))));
+ };
+
+ template<typename Handler>
+ void _read_meta(Handler&& handler /* error_code */) {
+ auto e = ba::get_associated_executor(handler, get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ _read_meta_(r, oid, ioc,
+ nullopt,
+ ca::bind_ea(
+ e, a,
+ [this,
+ handler = std::move(handler)](bs::error_code ec,
+ fifo::info&& info,
+ std::uint64_t phs,
+ std::uint64_t peo) mutable {
+ std::unique_lock l(m);
+ if (ec) {
+ l.unlock();
+ std::move(handler)(ec);
+ return;
+ }
+ // We have a newer version already!
+ if (!info.version.same_or_later(this->info.version)) {
+ l.unlock();
+ std::move(handler)(bs::error_code{});
+ return;
+ }
+ this->info = std::move(info);
+ part_header_size = phs;
+ part_entry_overhead = peo;
+ l.unlock();
+ std::move(handler)(bs::error_code{});
+ }), get_executor());
+ }
+
+ bs::error_code apply_update(fifo::info* info,
+ const fifo::objv& objv,
+ const fifo::update& update);
+
+
+ template<typename Handler>
+ void _update_meta(const fifo::update& update,
+ fifo::objv version,
+ Handler&& handler /* error_code, bool */) {
+ WriteOp op;
+
+ cls::fifo::update_meta(op, info.version, update);
+
+ auto a = ba::get_associated_allocator(handler);
+ auto e = ba::get_associated_executor(handler, get_executor());
+
+ r->execute(
+ oid, ioc, std::move(op),
+ ca::bind_ea(
+ e, a,
+ [this, e, a, version, update,
+ handler = std::move(handler)](bs::error_code ec) mutable {
+ if (ec && ec != bs::errc::operation_canceled) {
+ std::move(handler)(ec, bool{});
+ return;
+ }
+
+ auto canceled = (ec == bs::errc::operation_canceled);
+
+ if (!canceled) {
+ ec = apply_update(&info,
+ version,
+ update);
+ if (ec) {
+ canceled = true;
+ }
+ }
+
+ if (canceled) {
+ _read_meta(
+ ca::bind_ea(
+ e, a,
+ [handler = std::move(handler)](bs::error_code ec) mutable {
+ std::move(handler)(ec, ec ? false : true);
+ }));
+ return;
+ }
+ std::move(handler)(ec, false);
+ return;
+ }));
+ }
+
+ template<typename Handler>
+ auto _process_journal(Handler&& handler /* error_code */) {
+ auto a = ba::get_associated_allocator(std::ref(handler));
+ auto j = ceph::allocate_unique<detail::JournalProcessor<Handler>>(
+ a, this, std::move(handler));
+ auto p = j.release();
+ p->process();
+ }
+
+ template<typename Handler>
+ class NewPartPreparer {
+ FIFO* f;
+ Handler handler;
+ std::vector<fifo::journal_entry> jentries;
+ int i;
+ std::int64_t new_head_part_num;
+
+ public:
+
+ void operator ()(bs::error_code ec, bool canceled) {
+ if (ec) {
+ std::move(handler)(ec);
+ return;
+ }
+
+ if (canceled) {
+ std::unique_lock l(f->m);
+ auto iter = f->info.journal.find(jentries.front().part_num);
+ auto max_push_part_num = f->info.max_push_part_num;
+ auto head_part_num = f->info.head_part_num;
+ auto version = f->info.version;
+ auto found = (iter != f->info.journal.end());
+ l.unlock();
+ if ((max_push_part_num >= jentries.front().part_num &&
+ head_part_num >= new_head_part_num)) {
+ /* raced, but new part was already written */
+ std::move(handler)(bs::error_code{});
+ return;
+ }
+ if (i >= MAX_RACE_RETRIES) {
+ std::move(handler)(errc::raced);
+ return;
+ }
+ if (!found) {
+ auto e = ba::get_associated_executor(handler, f->get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ f->_update_meta(fifo::update{}
+ .journal_entries_add(jentries),
+ version,
+ ca::bind_ea(
+ e, a,
+ NewPartPreparer(f, std::move(handler),
+ jentries,
+ i + 1, new_head_part_num)));
+ return;
+ }
+ // Fall through. We still need to process the journal.
+ }
+ f->_process_journal(std::move(handler));
+ return;
+ }
+
+ NewPartPreparer(FIFO* f,
+ Handler&& handler,
+ std::vector<fifo::journal_entry> jentries,
+ int i, std::int64_t new_head_part_num)
+ : f(f), handler(std::move(handler)), jentries(std::move(jentries)),
+ i(i), new_head_part_num(new_head_part_num) {}
+ };
+
+ template<typename Handler>
+ void _prepare_new_part(bool is_head,
+ Handler&& handler /* error_code */) {
+ std::unique_lock l(m);
+ std::vector jentries = { info.next_journal_entry(generate_tag()) };
+ std::int64_t new_head_part_num = info.head_part_num;
+ auto version = info.version;
+
+ if (is_head) {
+ auto new_head_jentry = jentries.front();
+ new_head_jentry.op = fifo::journal_entry::Op::set_head;
+ new_head_part_num = jentries.front().part_num;
+ jentries.push_back(std::move(new_head_jentry));
+ }
+ l.unlock();
+
+ auto e = ba::get_associated_executor(handler, get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ _update_meta(fifo::update{}.journal_entries_add(jentries),
+ version,
+ ca::bind_ea(
+ e, a,
+ NewPartPreparer(this, std::move(handler),
+ jentries, 0, new_head_part_num)));
+ }
+
+ template<typename Handler>
+ class NewHeadPreparer {
+ FIFO* f;
+ Handler handler;
+ int i;
+ std::int64_t new_head_num;
+
+ public:
+
+ void operator ()(bs::error_code ec, bool canceled) {
+ std::unique_lock l(f->m);
+ auto head_part_num = f->info.head_part_num;
+ auto version = f->info.version;
+ l.unlock();
+
+ if (ec) {
+ std::move(handler)(ec);
+ return;
+ }
+ if (canceled) {
+ if (i >= MAX_RACE_RETRIES) {
+ std::move(handler)(errc::raced);
+ return;
+ }
+
+ // Raced, but there's still work to do!
+ if (head_part_num < new_head_num) {
+ auto e = ba::get_associated_executor(handler, f->get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ f->_update_meta(fifo::update{}.head_part_num(new_head_num),
+ version,
+ ca::bind_ea(
+ e, a,
+ NewHeadPreparer(f, std::move(handler),
+ i + 1,
+ new_head_num)));
+ return;
+ }
+ }
+ // Either we succeeded, or we were raced by someone who did it for us.
+ std::move(handler)(bs::error_code{});
+ return;
+ }
+
+ NewHeadPreparer(FIFO* f,
+ Handler&& handler,
+ int i, std::int64_t new_head_num)
+ : f(f), handler(std::move(handler)), i(i), new_head_num(new_head_num) {}
+ };
+
+ template<typename Handler>
+ void _prepare_new_head(Handler&& handler /* error_code */) {
+ std::unique_lock l(m);
+ int64_t new_head_num = info.head_part_num + 1;
+ auto max_push_part_num = info.max_push_part_num;
+ auto version = info.version;
+ l.unlock();
+
+ if (max_push_part_num < new_head_num) {
+ auto e = ba::get_associated_executor(handler, get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ _prepare_new_part(
+ true,
+ ca::bind_ea(
+ e, a,
+ [this, new_head_num,
+ handler = std::move(handler)](bs::error_code ec) mutable {
+ if (ec) {
+ handler(ec);
+ return;
+ }
+ std::unique_lock l(m);
+ if (info.max_push_part_num < new_head_num) {
+ l.unlock();
+ ldout(r->cct(), 0)
+ << "ERROR: " << __func__
+ << ": after new part creation: meta_info.max_push_part_num="
+ << info.max_push_part_num << " new_head_num="
+ << info.max_push_part_num << dendl;
+ std::move(handler)(errc::inconsistency);
+ } else {
+ l.unlock();
+ std::move(handler)(bs::error_code{});
+ }
+ }));
+ return;
+ }
+ auto e = ba::get_associated_executor(handler, get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ _update_meta(fifo::update{}.head_part_num(new_head_num),
+ version,
+ ca::bind_ea(
+ e, a,
+ NewHeadPreparer(this, std::move(handler), 0,
+ new_head_num)));
+ }
+
+ template<typename T>
+ struct ExecHandleCB {
+ bs::error_code ec;
+ T result;
+ void operator()(bs::error_code e, const T& t) {
+ if (e) {
+ ec = e;
+ return;
+ }
+ result = t;
+ }
+ };
+
+ template<typename Handler>
+ class EntryPusher {
+ Handler handler;
+ using allocator_type = boost::asio::associated_allocator_t<Handler>;
+ using decoder_type = ExecHandleCB<int>;
+ using decoder_ptr = ceph::allocated_unique_ptr<decoder_type, allocator_type>;
+ decoder_ptr decoder;
+
+ public:
+
+ EntryPusher(Handler&& handler, decoder_ptr&& decoder)
+ : handler(std::move(handler)), decoder(std::move(decoder)) {}
+
+ void operator ()(bs::error_code ec) {
+ if (!ec) {
+ ec = decoder->ec;
+ }
+ auto reply = std::move(decoder->result);
+ decoder.reset(); // free handler-allocated memory before dispatching
+
+ std::move(handler)(ec, std::move(reply));
+ }
+ };
+
+ template<typename Handler>
+ auto push_entries(const std::deque<cb::list>& data_bufs,
+ Handler&& handler /* error_code, int */) {
+ WriteOp op;
+ std::unique_lock l(m);
+ auto head_part_num = info.head_part_num;
+ auto tag = info.head_tag;
+ auto oid = info.part_oid(head_part_num);
+ l.unlock();
+
+ auto a = ba::get_associated_allocator(handler);
+ auto reply = ceph::allocate_unique<ExecHandleCB<int>>(a);
+
+ auto e = ba::get_associated_executor(handler, get_executor());
+ push_part(op, tag, data_bufs, std::ref(*reply));
+ return r->execute(oid, ioc, std::move(op),
+ ca::bind_ea(e, a, EntryPusher(std::move(handler),
+ std::move(reply))));
+ }
+
+ template<typename CT>
+ auto trim_part(int64_t part_num,
+ uint64_t ofs,
+ std::optional<std::string_view> tag,
+ CT&& ct) {
+ WriteOp op;
+ cls::fifo::trim_part(op, tag, ofs);
+ return r->execute(info.part_oid(part_num), ioc, std::move(op),
+ std::forward<CT>(ct));
+ }
+
+
+ template<typename Handler>
+ class Pusher {
+ FIFO* f;
+ std::deque<cb::list> remaining;
+ std::deque<cb::list> batch;
+ int i;
+ Handler handler;
+
+ void prep_then_push(const unsigned successes) {
+ std::unique_lock l(f->m);
+ auto max_part_size = f->info.params.max_part_size;
+ auto part_entry_overhead = f->part_entry_overhead;
+ l.unlock();
+
+ uint64_t batch_len = 0;
+ if (successes > 0) {
+ if (successes == batch.size()) {
+ batch.clear();
+ } else {
+ batch.erase(batch.begin(), batch.begin() + successes);
+ for (const auto& b : batch) {
+ batch_len += b.length() + part_entry_overhead;
+ }
+ }
+ }
+
+ if (batch.empty() && remaining.empty()) {
+ std::move(handler)(bs::error_code{});
+ return;
+ }
+
+ while (!remaining.empty() &&
+ (remaining.front().length() + batch_len <= max_part_size)) {
+
+ /* We can send entries with data_len up to max_entry_size,
+ however, we want to also account the overhead when
+ dealing with multiple entries. Previous check doesn't
+ account for overhead on purpose. */
+ batch_len += remaining.front().length() + part_entry_overhead;
+ batch.push_back(std::move(remaining.front()));
+ remaining.pop_front();
+ }
+ push();
+ }
+
+ void push() {
+ auto e = ba::get_associated_executor(handler, f->get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ f->push_entries(batch,
+ ca::bind_ea(e, a,
+ Pusher(f, std::move(remaining),
+ batch, i,
+ std::move(handler))));
+ }
+
+ public:
+
+ // Initial call!
+ void operator ()() {
+ prep_then_push(0);
+ }
+
+ // Called with response to push_entries
+ void operator ()(bs::error_code ec, int r) {
+ if (ec == bs::errc::result_out_of_range) {
+ auto e = ba::get_associated_executor(handler, f->get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ f->_prepare_new_head(
+ ca::bind_ea(e, a,
+ Pusher(f, std::move(remaining),
+ std::move(batch), i,
+ std::move(handler))));
+ return;
+ }
+ if (ec) {
+ std::move(handler)(ec);
+ return;
+ }
+ i = 0; // We've made forward progress, so reset the race counter!
+ prep_then_push(r);
+ }
+
+ // Called with response to prepare_new_head
+ void operator ()(bs::error_code ec) {
+ if (ec == bs::errc::operation_canceled) {
+ if (i == MAX_RACE_RETRIES) {
+ ldout(f->r->cct(), 0)
+ << "ERROR: " << __func__
+ << "(): race check failed too many times, likely a bug" << dendl;
+ std::move(handler)(make_error_code(errc::raced));
+ return;
+ }
+ ++i;
+ } else if (ec) {
+ std::move(handler)(ec);
+ return;
+ }
+
+ if (batch.empty()) {
+ prep_then_push(0);
+ return;
+ } else {
+ push();
+ return;
+ }
+ }
+
+ Pusher(FIFO* f, std::deque<cb::list>&& remaining,
+ std::deque<cb::list> batch, int i,
+ Handler&& handler)
+ : f(f), remaining(std::move(remaining)),
+ batch(std::move(batch)), i(i),
+ handler(std::move(handler)) {}
+ };
+
+ template<typename Handler>
+ class Lister {
+ FIFO* f;
+ std::vector<list_entry> result;
+ bool more = false;
+ std::int64_t part_num;
+ std::uint64_t ofs;
+ int max_entries;
+ bs::error_code ec_out;
+ std::vector<fifo::part_list_entry> entries;
+ bool part_more = false;
+ bool part_full = false;
+ Handler handler;
+
+ void handle(bs::error_code ec) {
+ auto h = std::move(handler);
+ auto m = more;
+ auto r = std::move(result);
+
+ FIFO::assoc_delete(h, this);
+ std::move(h)(ec, std::move(r), m);
+ }
+
+ public:
+ Lister(FIFO* f, std::int64_t part_num, std::uint64_t ofs, int max_entries,
+ Handler&& handler)
+ : f(f), part_num(part_num), ofs(ofs), max_entries(max_entries),
+ handler(std::move(handler)) {
+ result.reserve(max_entries);
+ }
+
+
+ Lister(const Lister&) = delete;
+ Lister& operator =(const Lister&) = delete;
+ Lister(Lister&&) = delete;
+ Lister& operator =(Lister&&) = delete;
+
+ void list() {
+ if (max_entries > 0) {
+ ReadOp op;
+ ec_out.clear();
+ part_more = false;
+ part_full = false;
+ entries.clear();
+
+ std::unique_lock l(f->m);
+ auto part_oid = f->info.part_oid(part_num);
+ l.unlock();
+
+ list_part(op,
+ {},
+ ofs,
+ max_entries,
+ &ec_out,
+ &entries,
+ &part_more,
+ &part_full,
+ nullptr);
+ auto e = ba::get_associated_executor(handler, f->get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ f->r->execute(
+ part_oid,
+ f->ioc,
+ std::move(op),
+ nullptr,
+ ca::bind_ea(
+ e, a,
+ [t = std::unique_ptr<Lister>(this), this,
+ part_oid](bs::error_code ec) mutable {
+ t.release();
+ if (ec == bs::errc::no_such_file_or_directory) {
+ auto e = ba::get_associated_executor(handler,
+ f->get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ f->_read_meta(
+ ca::bind_ea(
+ e, a,
+ [this](bs::error_code ec) mutable {
+ if (ec) {
+ handle(ec);
+ return;
+ }
+
+ if (part_num < f->info.tail_part_num) {
+ /* raced with trim? restart */
+ max_entries += result.size();
+ result.clear();
+ part_num = f->info.tail_part_num;
+ ofs = 0;
+ list();
+ }
+ /* assuming part was not written yet, so end of data */
+ more = false;
+ handle({});
+ return;
+ }));
+ return;
+ }
+ if (ec) {
+ ldout(f->r->cct(), 0)
+ << __func__
+ << "(): list_part() on oid=" << part_oid
+ << " returned ec=" << ec.message() << dendl;
+ handle(ec);
+ return;
+ }
+ if (ec_out) {
+ ldout(f->r->cct(), 0)
+ << __func__
+ << "(): list_part() on oid=" << f->info.part_oid(part_num)
+ << " returned ec=" << ec_out.message() << dendl;
+ handle(ec_out);
+ return;
+ }
+
+ more = part_full || part_more;
+ for (auto& entry : entries) {
+ list_entry e;
+ e.data = std::move(entry.data);
+ e.marker = marker{part_num, entry.ofs}.to_string();
+ e.mtime = entry.mtime;
+ result.push_back(std::move(e));
+ }
+ max_entries -= entries.size();
+ entries.clear();
+ if (max_entries > 0 &&
+ part_more) {
+ list();
+ return;
+ }
+
+ if (!part_full) { /* head part is not full */
+ handle({});
+ return;
+ }
+ ++part_num;
+ ofs = 0;
+ list();
+ }));
+ } else {
+ handle({});
+ return;
+ }
+ }
+ };
+
+ template<typename Handler>
+ class Trimmer {
+ FIFO* f;
+ std::int64_t part_num;
+ std::uint64_t ofs;
+ Handler handler;
+ std::int64_t pn;
+ int i = 0;
+
+ void handle(bs::error_code ec) {
+ auto h = std::move(handler);
+
+ FIFO::assoc_delete(h, this);
+ return std::move(h)(ec);
+ }
+
+ void update() {
+ std::unique_lock l(f->m);
+ auto objv = f->info.version;
+ l.unlock();
+ auto a = ba::get_associated_allocator(handler);
+ auto e = ba::get_associated_executor(handler, f->get_executor());
+ f->_update_meta(
+ fifo::update{}.tail_part_num(part_num),
+ objv,
+ ca::bind_ea(
+ e, a,
+ [this, t = std::unique_ptr<Trimmer>(this)](bs::error_code ec,
+ bool canceled) mutable {
+ t.release();
+ if (canceled)
+ if (i >= MAX_RACE_RETRIES) {
+ ldout(f->r->cct(), 0)
+ << "ERROR: " << __func__
+ << "(): race check failed too many times, likely a bug"
+ << dendl;
+ handle(errc::raced);
+ return;
+ }
+ std::unique_lock l(f->m);
+ auto tail_part_num = f->info.tail_part_num;
+ l.unlock();
+ if (tail_part_num < part_num) {
+ ++i;
+ update();
+ return;
+ }
+ handle({});
+ return;
+ }));
+ }
+
+ public:
+ Trimmer(FIFO* f, std::int64_t part_num, std::uint64_t ofs,
+ Handler&& handler)
+ : f(f), part_num(part_num), ofs(ofs), handler(std::move(handler)) {
+ std::unique_lock l(f->m);
+ pn = f->info.tail_part_num;
+ }
+
+ void trim() {
+ auto a = ba::get_associated_allocator(handler);
+ auto e = ba::get_associated_executor(handler, f->get_executor());
+ if (pn < part_num) {
+ std::unique_lock l(f->m);
+ auto max_part_size = f->info.params.max_part_size;
+ l.unlock();
+ f->trim_part(
+ pn, max_part_size, std::nullopt,
+ ca::bind_ea(
+ e, a,
+ [t = std::unique_ptr<Trimmer>(this),
+ this](bs::error_code ec) mutable {
+ t.release();
+ if (ec && ec != bs::errc::no_such_file_or_directory) {
+ ldout(f->r->cct(), 0)
+ << __func__ << "(): ERROR: trim_part() on part="
+ << pn << " returned ec=" << ec.message() << dendl;
+ handle(ec);
+ return;
+ }
+ ++pn;
+ trim();
+ }));
+ return;
+ }
+ f->trim_part(
+ part_num, ofs, std::nullopt,
+ ca::bind_ea(
+ e, a,
+ [t = std::unique_ptr<Trimmer>(this),
+ this](bs::error_code ec) mutable {
+ t.release();
+ if (ec && ec != bs::errc::no_such_file_or_directory) {
+ ldout(f->r->cct(), 0)
+ << __func__ << "(): ERROR: trim_part() on part=" << part_num
+ << " returned ec=" << ec.message() << dendl;
+ handle(ec);
+ return;
+ }
+ std::unique_lock l(f->m);
+ auto tail_part_num = f->info.tail_part_num;
+ l.unlock();
+ if (part_num <= tail_part_num) {
+ /* don't need to modify meta info */
+ handle({});
+ return;
+ }
+ update();
+ }));
+ }
+ };
+
+ template<typename Handler>
+ class PartInfoGetter {
+ Handler handler;
+ using allocator_type = boost::asio::associated_allocator_t<Handler>;
+ using decoder_type = ExecDecodeCB<fifo::op::get_part_info_reply>;
+ using decoder_ptr = ceph::allocated_unique_ptr<decoder_type, allocator_type>;
+ decoder_ptr decoder;
+ public:
+ PartInfoGetter(Handler&& handler, decoder_ptr&& decoder)
+ : handler(std::move(handler)), decoder(std::move(decoder)) {}
+
+ void operator ()(bs::error_code ec) {
+ if (!ec) {
+ ec = decoder->ec;
+ }
+ auto reply = std::move(decoder->result);
+ decoder.reset(); // free handler-allocated memory before dispatching
+
+ auto p = ca::bind_handler(std::move(handler),
+ ec, std::move(reply.header));
+ std::move(p)();
+ }
+ };
+
+
+};
+
+namespace detail {
+template<typename Handler>
+class JournalProcessor {
+private:
+ FIFO* const fifo;
+ Handler handler;
+
+ std::vector<fifo::journal_entry> processed;
+ std::multimap<std::int64_t, fifo::journal_entry> journal;
+ std::multimap<std::int64_t, fifo::journal_entry>::iterator iter;
+ std::int64_t new_tail;
+ std::int64_t new_head;
+ std::int64_t new_max;
+ int race_retries = 0;
+
+ template<typename CT>
+ auto create_part(int64_t part_num, std::string_view tag, CT&& ct) {
+ WriteOp op;
+ op.create(false); /* We don't need exclusivity, part_init ensures
+ we're creating from the same journal entry. */
+ std::unique_lock l(fifo->m);
+ part_init(op, tag, fifo->info.params);
+ auto oid = fifo->info.part_oid(part_num);
+ l.unlock();
+ return fifo->r->execute(oid, fifo->ioc,
+ std::move(op), std::forward<CT>(ct));
+ }
+
+ template<typename CT>
+ auto remove_part(int64_t part_num, std::string_view tag, CT&& ct) {
+ WriteOp op;
+ op.remove();
+ std::unique_lock l(fifo->m);
+ auto oid = fifo->info.part_oid(part_num);
+ l.unlock();
+ return fifo->r->execute(oid, fifo->ioc,
+ std::move(op), std::forward<CT>(ct));
+ }
+
+ template<typename PP>
+ void process_journal_entry(const fifo::journal_entry& entry,
+ PP&& pp) {
+ switch (entry.op) {
+ case fifo::journal_entry::Op::unknown:
+ std::move(pp)(errc::inconsistency);
+ return;
+ break;
+
+ case fifo::journal_entry::Op::create:
+ create_part(entry.part_num, entry.part_tag, std::move(pp));
+ return;
+ break;
+ case fifo::journal_entry::Op::set_head:
+ ba::post(ba::get_associated_executor(handler, fifo->get_executor()),
+ [pp = std::move(pp)]() mutable {
+ std::move(pp)(bs::error_code{});
+ });
+ return;
+ break;
+ case fifo::journal_entry::Op::remove:
+ remove_part(entry.part_num, entry.part_tag, std::move(pp));
+ return;
+ break;
+ }
+ std::move(pp)(errc::inconsistency);
+ return;
+ }
+
+ auto journal_entry_finisher(const fifo::journal_entry& entry) {
+ auto a = ba::get_associated_allocator(handler);
+ auto e = ba::get_associated_executor(handler, fifo->get_executor());
+ return
+ ca::bind_ea(
+ e, a,
+ [t = std::unique_ptr<JournalProcessor>(this), this,
+ entry](bs::error_code ec) mutable {
+ t.release();
+ if (entry.op == fifo::journal_entry::Op::remove &&
+ ec == bs::errc::no_such_file_or_directory)
+ ec.clear();
+
+ if (ec) {
+ ldout(fifo->r->cct(), 0)
+ << __func__
+ << "(): ERROR: failed processing journal entry for part="
+ << entry.part_num << " with error " << ec.message()
+ << " Bug or inconsistency." << dendl;
+ handle(errc::inconsistency);
+ return;
+ } else {
+ switch (entry.op) {
+ case fifo::journal_entry::Op::unknown:
+ // Can't happen. Filtered out in process_journal_entry.
+ abort();
+ break;
+
+ case fifo::journal_entry::Op::create:
+ if (entry.part_num > new_max) {
+ new_max = entry.part_num;
+ }
+ break;
+ case fifo::journal_entry::Op::set_head:
+ if (entry.part_num > new_head) {
+ new_head = entry.part_num;
+ }
+ break;
+ case fifo::journal_entry::Op::remove:
+ if (entry.part_num >= new_tail) {
+ new_tail = entry.part_num + 1;
+ }
+ break;
+ }
+ processed.push_back(entry);
+ }
+ ++iter;
+ process();
+ });
+ }
+
+ struct JournalPostprocessor {
+ std::unique_ptr<JournalProcessor> j_;
+ bool first;
+ void operator ()(bs::error_code ec, bool canceled) {
+ std::optional<int64_t> tail_part_num;
+ std::optional<int64_t> head_part_num;
+ std::optional<int64_t> max_part_num;
+
+ auto j = j_.release();
+
+ if (!first && !ec && !canceled) {
+ j->handle({});
+ return;
+ }
+
+ if (canceled) {
+ if (j->race_retries >= MAX_RACE_RETRIES) {
+ ldout(j->fifo->r->cct(), 0) << "ERROR: " << __func__ <<
+ "(): race check failed too many times, likely a bug" << dendl;
+ j->handle(errc::raced);
+ return;
+ }
+
+ ++j->race_retries;
+
+ std::vector<fifo::journal_entry> new_processed;
+ std::unique_lock l(j->fifo->m);
+ for (auto& e : j->processed) {
+ auto jiter = j->fifo->info.journal.find(e.part_num);
+ /* journal entry was already processed */
+ if (jiter == j->fifo->info.journal.end() ||
+ !(jiter->second == e)) {
+ continue;
+ }
+ new_processed.push_back(e);
+ }
+ j->processed = std::move(new_processed);
+ }
+
+ std::unique_lock l(j->fifo->m);
+ auto objv = j->fifo->info.version;
+ if (j->new_tail > j->fifo->info.tail_part_num) {
+ tail_part_num = j->new_tail;
+ }
+
+ if (j->new_head > j->fifo->info.head_part_num) {
+ head_part_num = j->new_head;
+ }
+
+ if (j->new_max > j->fifo->info.max_push_part_num) {
+ max_part_num = j->new_max;
+ }
+ l.unlock();
+
+ if (j->processed.empty() &&
+ !tail_part_num &&
+ !max_part_num) {
+ /* nothing to update anymore */
+ j->handle({});
+ return;
+ }
+ auto a = ba::get_associated_allocator(j->handler);
+ auto e = ba::get_associated_executor(j->handler, j->fifo->get_executor());
+ j->fifo->_update_meta(fifo::update{}
+ .tail_part_num(tail_part_num)
+ .head_part_num(head_part_num)
+ .max_push_part_num(max_part_num)
+ .journal_entries_rm(j->processed),
+ objv,
+ ca::bind_ea(
+ e, a,
+ JournalPostprocessor{j, false}));
+ return;
+ }
+
+ JournalPostprocessor(JournalProcessor* j, bool first)
+ : j_(j), first(first) {}
+ };
+
+ void postprocess() {
+ if (processed.empty()) {
+ handle({});
+ return;
+ }
+ JournalPostprocessor(this, true)({}, false);
+ }
+
+ void handle(bs::error_code ec) {
+ auto e = ba::get_associated_executor(handler, fifo->get_executor());
+ auto a = ba::get_associated_allocator(handler);
+ auto h = std::move(handler);
+ FIFO::assoc_delete(h, this);
+ e.dispatch(ca::bind_handler(std::move(h), ec), a);
+ return;
+ }
+
+public:
+
+ JournalProcessor(FIFO* fifo, Handler&& handler)
+ : fifo(fifo), handler(std::move(handler)) {
+ std::unique_lock l(fifo->m);
+ journal = fifo->info.journal;
+ iter = journal.begin();
+ new_tail = fifo->info.tail_part_num;
+ new_head = fifo->info.head_part_num;
+ new_max = fifo->info.max_push_part_num;
+ }
+
+ JournalProcessor(const JournalProcessor&) = delete;
+ JournalProcessor& operator =(const JournalProcessor&) = delete;
+ JournalProcessor(JournalProcessor&&) = delete;
+ JournalProcessor& operator =(JournalProcessor&&) = delete;
+
+ void process() {
+ if (iter != journal.end()) {
+ const auto entry = iter->second;
+ process_journal_entry(entry,
+ journal_entry_finisher(entry));
+ return;
+ } else {
+ postprocess();
+ return;
+ }
+ }
+};
+}
+}
+
+#endif // CEPH_RADOS_CLS_FIFIO_H
$<TARGET_PROPERTY:GTest::GTest,INTERFACE_INCLUDE_DIRECTORIES>)
add_subdirectory(cls_hello)
+if(WITH_BOOST_CONTEXT)
add_subdirectory(cls_fifo)
+endif()
add_subdirectory(cls_lock)
add_subdirectory(cls_cas)
add_subdirectory(cls_log)
+if(WITH_BOOST_CONTEXT)
add_executable(ceph_test_cls_fifo
test_cls_fifo.cc
)
+target_include_directories(ceph_test_cls_fifo PRIVATE
+ $<TARGET_PROPERTY:spawn,INTERFACE_INCLUDE_DIRECTORIES>)
target_link_libraries(ceph_test_cls_fifo
- cls_fifo_client
- librados
- global
+ neorados_cls_fifo
+ libneorados
+ spawn
${UNITTEST_LIBS}
${BLKID_LIBRARIES}
${CMAKE_DL_LIBS}
${CRYPTO_LIBS}
${EXTRALIBS}
- radostest-cxx
+ neoradostest-support
)
install(TARGETS
ceph_test_cls_fifo
DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+add_executable(ceph_bench_cls_fifo
+ bench_cls_fifo.cc
+ )
+target_include_directories(ceph_bench_cls_fifo PRIVATE
+ $<TARGET_PROPERTY:spawn,INTERFACE_INCLUDE_DIRECTORIES>)
+target_link_libraries(ceph_bench_cls_fifo
+ neorados_cls_fifo
+ libneorados
+ spawn
+ ${UNITTEST_LIBS}
+ ${BLKID_LIBRARIES}
+ ${CMAKE_DL_LIBS}
+ ${CRYPTO_LIBS}
+ ${EXTRALIBS}
+ )
+install(TARGETS
+ ceph_test_cls_fifo
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
+endif()
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#include <cerrno>
+#include <chrono>
+#include <cstdint>
+#include <exception>
+#include <future>
+#include <iostream>
+#include <string_view>
+
+#include <boost/asio.hpp>
+#include <boost/system/error_code.hpp>
+#include <boost/program_options.hpp>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/chrono.h>
+#include <fmt/format.h>
+#include <fmt/ostream.h>
+
+#include <spawn/spawn.hpp>
+
+#include "include/neorados/RADOS.hpp"
+
+#include "neorados/cls/fifo.h"
+
+namespace ba = boost::asio;
+namespace bs = boost::system;
+namespace bpo = boost::program_options;
+namespace cb = ceph::buffer;
+namespace R = neorados;
+namespace RCf = neorados::cls::fifo;
+namespace fifo = rados::cls::fifo;
+namespace s = spawn;
+namespace sc = std::chrono;
+
+namespace {
+static constexpr auto PUSH = 0x01 << 0;
+static constexpr auto PULL = 0x01 << 1;
+static constexpr auto BOTH = PUSH | PULL;
+static constexpr auto CLEAN = 0x01 << 2;
+static constexpr auto METADATA = 0x01 << 3;
+static constexpr auto PARTINFO = 0x01 << 4;
+static constexpr auto LIST = 0x01 << 5;
+
+struct benchmark {
+ std::uint32_t entries = 0;
+ sc::duration<double> elapsed = 0ns;
+
+ std::uint64_t ratio() const {
+ return entries/std::max(elapsed,
+ sc::duration<double>(1ns)).count();
+ }
+ benchmark() = default;
+ benchmark(std::uint32_t entries, sc::duration<double> elapsed)
+ : entries(entries), elapsed(elapsed) {}
+};
+
+benchmark push(RCf::FIFO& f, const std::uint32_t count,
+ const std::uint32_t entry_size, const std::uint32_t push_entries,
+ s::yield_context y)
+{
+ cb::list entry;
+ entry.push_back(cb::create_small_page_aligned(entry_size));
+ entry.zero();
+
+ std::vector entries(std::min(count, push_entries), entry);
+ auto remaining = count;
+ auto start = sc::steady_clock::now();
+ while (remaining) {
+ if (entries.size() > remaining) {
+ entries.resize(remaining);
+ }
+ f.push(entries, y);
+ remaining -= entries.size();
+ }
+ auto finish = sc::steady_clock::now();
+ return benchmark(count, (finish - start));
+}
+
+benchmark pull(RCf::FIFO& f, const std::uint32_t count,
+ const std::uint32_t pull_entries, s::yield_context y)
+{
+ auto remaining = count;
+ std::uint32_t got = 0;
+
+ auto start = sc::steady_clock::now();
+ while (remaining) {
+ auto [result, more] = f.list(std::min(remaining, pull_entries),
+ std::nullopt, y);
+ if (result.empty())
+ break;
+ got += result.size();
+ remaining -= result.size();
+ f.trim(result.back().marker, y);
+ }
+ auto finish = sc::steady_clock::now();
+ return benchmark(got, (finish - start));
+}
+
+void concurpull(const std::string& oid, const std::int64_t pool,
+ const std::uint32_t count, const std::uint32_t pull_entries,
+ std::promise<benchmark> notify, const bool* const exit_early)
+{
+ ba::io_context c;
+ benchmark bench;
+ std::exception_ptr ex;
+ s::spawn(
+ c,
+ [&](s::yield_context y) {
+ try {
+ auto r = R::RADOS::Builder{}.build(c, y);
+ R::IOContext ioc(pool);
+ auto f = RCf::FIFO::open(r, ioc, oid, y);
+ auto remaining = count;
+ std::uint32_t got = 0;
+
+ auto start = sc::steady_clock::now();
+ while (remaining) {
+ if (*exit_early) break;
+ auto [result, more] =
+ f->list(std::min(remaining, pull_entries), std::nullopt, y);
+ if (result.empty()) {
+ // We just keep going assuming they'll push more.
+ continue;
+ }
+ got += result.size();
+ remaining -= result.size();
+ if (*exit_early) break;
+ f->trim(result.back().marker, y);
+ }
+ auto finish = sc::steady_clock::now();
+ bench.entries = got;
+ bench.elapsed = finish - start;
+ } catch (const std::exception&) {
+ ex = std::current_exception();
+ }
+ });
+ c.run();
+ if (ex) {
+ notify.set_exception(std::current_exception());
+ } else {
+ notify.set_value(bench);
+ }
+}
+
+void clean(R::RADOS& r, const R::IOContext& ioc, RCf::FIFO& f,
+ s::yield_context y)
+{
+ f.read_meta(y);
+ const auto info = f.meta();
+ if (info.head_part_num > -1) {
+ for (auto i = info.tail_part_num; i <= info.head_part_num; ++i) {
+ R::WriteOp op;
+ op.remove();
+ r.execute(info.part_oid(i), ioc, std::move(op), y);
+ }
+ }
+ R::WriteOp op;
+ op.remove();
+ r.execute(info.id, ioc, std::move(op), y);
+}
+}
+
+int main(int argc, char* argv[])
+{
+ const std::string_view prog(argv[0]);
+ std::string command;
+ try {
+ std::uint32_t count = 0;
+ std::string oid;
+ std::string pool;
+ std::uint32_t entry_size = 0;
+ std::uint32_t push_entries = 0;
+ std::uint32_t pull_entries = 0;
+ std::uint64_t max_part_size = 0;
+ std::uint64_t max_entry_size = 0;
+ std::int64_t part_num = 0;
+ std::string marker;
+
+ bpo::options_description desc(fmt::format("{} options", prog));
+ desc.add_options()
+ ("help", "show help")
+ ("oid", bpo::value<std::string>(&oid)->default_value("fifo"s),
+ "the base oid for the fifo")
+ ("pool", bpo::value<std::string>(&pool)->default_value("fifo_benchmark"s),
+ "the base oid for the fifo")
+ ("count", bpo::value<std::uint32_t>(&count)->default_value(1024),
+ "total count of items")
+ ("entry-size", bpo::value<std::uint32_t>(&entry_size)->default_value(64),
+ "size of entries to push")
+ ("push-entries",
+ bpo::value<std::uint32_t>(&push_entries)
+ ->default_value(512), "entries to push per call")
+ ("max-part-size", bpo::value<std::uint64_t>(&max_part_size)
+ ->default_value(RCf::default_max_part_size),
+ "maximum entry size allowed by FIFO")
+ ("max-entry-size", bpo::value<std::uint64_t>(&max_entry_size)
+ ->default_value(RCf::default_max_entry_size),
+ "maximum entry size allowed by FIFO")
+ ("pull-entries",
+ bpo::value<uint32_t>(&pull_entries)
+ ->default_value(512), "entries to pull per call")
+ ("part-num",
+ bpo::value<int64_t>(&part_num)
+ ->default_value(-1), "partition number, -1 for head")
+ ("marker", bpo::value<std::string>(&marker), "marker to begin list")
+ ("command", bpo::value<std::string>(&command),
+ "the operation to perform");
+
+ bpo::positional_options_description p;
+ p.add("command", 1);
+
+ bpo::variables_map vm;
+
+ bpo::store(bpo::command_line_parser(argc, argv).
+ options(desc).positional(p).run(), vm);
+
+ bpo::notify(vm);
+
+ if (vm.count("help")) {
+ fmt::print(std::cout, "{}", desc);
+ fmt::print(std::cout, "\n{} commands:\n", prog);
+ fmt::print(std::cout, " push\t\t\t push entries into fifo\n");
+ fmt::print(std::cout, " pull\t\t\t retrieve and trim entries\n");
+ fmt::print(std::cout, " both\t\t\t both at once, in two threads\n");
+ fmt::print(std::cout, " metadata\t\t\t print metadata\n");
+ fmt::print(std::cout, " partinfo\t\t\t print metadata\n");
+ fmt::print(std::cout, " list\t\t\t list entries\n");
+ fmt::print(std::cout, " clean\t\t\t clean up\n");
+ return 0;
+ }
+
+
+ if (vm.find("command") == vm.end()) {
+ fmt::print(std::cerr, "{}: a command is required\n", prog);
+ return 1;
+ }
+
+ int op = 0;
+ if (command == "push"s) {
+ op = PUSH;
+ } else if (command == "pull"s) {
+ op = PULL;
+ } else if (command == "both"s) {
+ op = BOTH;
+ } else if (command == "clean"s) {
+ op = CLEAN;
+ } else if (command == "metadata"s) {
+ op = METADATA;
+ } else if (command == "partinfo"s) {
+ op = PARTINFO;
+ } else if (command == "list"s) {
+ op = LIST;
+ } else {
+ fmt::print(std::cerr, "{}: {} is not a valid command\n",
+ prog, command);
+ return 1;
+ }
+
+ if (!(op & PULL) && !vm["pull-entries"].defaulted()) {
+ fmt::print(std::cerr, "{}: pull-entries is only meaningful when pulling\n",
+ prog);
+ return 1;
+ }
+
+ if (!(op & PUSH)) {
+ for (const auto& p : { "entry-size"s, "push-entries"s, "max-part-size"s,
+ "max-entry-size"s }) {
+ if (!vm[p].defaulted()) {
+ fmt::print(std::cerr, "{}: {} is only meaningful when pushing\n",
+ prog, p);
+ return 1;
+ }
+ }
+ }
+
+ if (!(op & BOTH) && !(op & LIST) && !vm["count"].defaulted()) {
+ fmt::print(std::cerr, "{}: count is only meaningful when pulling, pushing, both, or listing\n",
+ prog);
+ return 1;
+ }
+
+ if (!(op & PARTINFO) && !vm["part-num"].defaulted()) {
+ fmt::print(std::cerr, "{}: part-num is only meaningful when getting part info\n",
+ prog);
+ return 1;
+ }
+
+ if (count == 0) {
+ fmt::print(std::cerr, "{}: count must be nonzero\n", prog);
+ return 1;
+ }
+
+ if ((op & PULL) && (pull_entries == 0)) {
+ fmt::print(std::cerr,
+ "{}: pull-entries must be nonzero\n", prog);
+ return 1;
+ }
+
+ if (!(op & LIST) && vm.count("marker") > 0) {
+ fmt::print(std::cerr, "{}: marker is only meaningful when listing\n",
+ prog);
+ return 1;
+ }
+
+ if (op & PUSH) {
+ if (entry_size == 0) {
+ fmt::print(std::cerr, "{}: entry-size must be nonzero\n", prog);
+ return 1;
+ }
+ if (push_entries== 0) {
+ fmt::print(std::cerr, "{}: push-entries must be nonzero\n", prog);
+ return 1;
+ }
+ if (max_entry_size == 0) {
+ fmt::print(std::cerr, "{}: max-entry-size must be nonzero\n", prog);
+ return 1;
+ }
+ if (max_part_size == 0) {
+ fmt::print(std::cerr, "{}: max-part-size must be nonzero\n", prog);
+ return 1;
+ }
+ if (entry_size > max_entry_size) {
+ fmt::print(std::cerr,
+ "{}: entry-size may not be greater than max-entry-size\n",
+ prog);
+ return 1;
+ }
+ if (max_entry_size >= max_part_size) {
+ fmt::print(std::cerr,
+ "{}: max-entry-size may be less than max-part-size\n",
+ prog);
+ return 1;
+ }
+ }
+
+ ba::io_context c;
+ benchmark pushmark, pullmark;
+ fifo::info meta;
+ fifo::part_header partinfo;
+ bool more = false;
+ std::vector<RCf::list_entry> entries;
+ s::spawn(
+ c,
+ [&](s::yield_context y) {
+ auto r = R::RADOS::Builder{}.build(c, y);
+ bs::error_code ec;
+ std::int64_t pid;
+ pid = r.lookup_pool(pool, y[ec]);
+ if (ec) {
+ r.create_pool(pool, std::nullopt, y);
+ pid = r.lookup_pool(pool, y);
+ }
+ const R::IOContext ioc(pid);
+ auto f = RCf::FIFO::create(r, ioc, oid, y, std::nullopt,
+ std::nullopt, false, max_part_size,
+ max_entry_size);
+
+ switch (op) {
+ case PUSH:
+ pushmark = push(*f, count, entry_size, push_entries, y);
+ break;
+
+ case PULL:
+ pullmark = pull(*f, count, pull_entries, y);
+ break;
+
+ case METADATA:
+ meta = f->meta();
+ break;
+
+ case PARTINFO:
+ meta = f->meta();
+ if (part_num == -1) {
+ part_num = meta.head_part_num;
+ }
+ partinfo = f->get_part_info(part_num, y);
+ break;
+
+ case LIST:
+ if (vm.count("marker") == 0) {
+ std::tie(entries, more) = f->list(count, std::nullopt, y);
+ } else {
+ std::tie(entries, more) = f->list(count, marker, y);
+ }
+ break;
+
+ case BOTH: {
+ std::promise<benchmark> notify;
+ bool exit_early = false;
+
+ auto notifier = notify.get_future();
+ std::thread t(concurpull, oid, pid, count, pull_entries,
+ std::move(notify), &exit_early);
+ t.detach();
+ try {
+ pushmark = push(*f, count, entry_size, push_entries, y);
+ } catch (const std::exception&) {
+ exit_early = true;
+ notifier.wait();
+ throw;
+ }
+ pullmark = notifier.get();
+ }
+ }
+
+ if (op & CLEAN)
+ clean(r, ioc, *f, y);
+ });
+ c.run();
+ if (op & PUSH) {
+ fmt::print("Pushed {} in {} at {}/s\n",
+ pushmark.entries, pushmark.elapsed, pushmark.ratio());
+ }
+ if (op & PULL) {
+ if (pullmark.entries == count) {
+ fmt::print(std::cout, "Pulled {} in {} at {}/s\n",
+ pullmark.entries, pullmark.elapsed, pullmark.ratio());
+ } else {
+ fmt::print(std::cout, "Pulled {} (of {} requested), in {} at {}/s\n",
+ pullmark.entries, count, pullmark.elapsed, pullmark.ratio());
+ }
+ }
+ if (op & METADATA) {
+ fmt::print(std::cout, "Metadata: [{}]\n", meta);
+ }
+ if (op & PARTINFO) {
+ fmt::print(std::cout, "Info for partition {}: [{}]\n", part_num, partinfo);
+ }
+ if (op & LIST) {
+ for (const auto& entry : entries) {
+ fmt::print(std::cout, "{}\t{}\n", entry.marker, entry.mtime);
+ }
+ if (more) {
+ fmt::print(std::cout, "...");
+ }
+ }
+ } catch (const std::exception& e) {
+ if (command.empty()) {
+ fmt::print(std::cerr, "{}: {}\n", prog, e.what());
+ } else {
+ fmt::print(std::cerr, "{}: {}: {}\n", prog, command, e.what());
+ }
+ return 1;
+ }
+
+ return 0;
+}
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
+ * License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
- *
+ *
*/
+#include <cerrno>
#include <iostream>
-#include <errno.h>
+#include <string_view>
-#include "include/types.h"
-#include "include/rados/librados.hpp"
+#include <boost/asio.hpp>
+#include <boost/system/error_code.hpp>
-#include "test/librados/test_cxx.h"
-#include "global/global_context.h"
+#include <spawn/spawn.hpp>
-#include "gtest/gtest.h"
+#include "include/scope_guard.h"
+#include "include/types.h"
+#include "include/neorados/RADOS.hpp"
-using namespace librados;
+#include "cls/fifo/cls_fifo_ops.h"
-#include "cls/fifo/cls_fifo_client.h"
+#include "neorados/cls/fifo.h"
+#include "test/neorados/common_tests.h"
-using namespace rados::cls::fifo;
+#include "gtest/gtest.h"
-static CephContext *cct(librados::IoCtx& ioctx)
+namespace R = neorados;
+namespace ba = boost::asio;
+namespace bs = boost::system;
+namespace cb = ceph::buffer;
+namespace fifo = rados::cls::fifo;
+namespace RCf = neorados::cls::fifo;
+namespace s = spawn;
+
+namespace {
+void fifo_create(R::RADOS& r,
+ const R::IOContext& ioc,
+ const R::Object& oid,
+ std::string_view id,
+ s::yield_context y,
+ std::optional<fifo::objv> objv = std::nullopt,
+ std::optional<std::string_view> oid_prefix = std::nullopt,
+ bool exclusive = false,
+ std::uint64_t max_part_size = RCf::default_max_part_size,
+ std::uint64_t max_entry_size = RCf::default_max_entry_size)
{
- return reinterpret_cast<CephContext *>(ioctx.cct());
+ R::WriteOp op;
+ RCf::create_meta(op, id, objv, oid_prefix, exclusive, max_part_size,
+ max_entry_size);
+ r.execute(oid, ioc, std::move(op), y);
}
-
-static int fifo_create(IoCtx& ioctx,
- const string& oid,
- const string& id,
- const ClsFIFO::MetaCreateParams& params)
-{
- ObjectWriteOperation op;
-
- int r = ClsFIFO::meta_create(&op, id, params);
- if (r < 0) {
- return r;
- }
-
- return ioctx.operate(oid, &op);
}
TEST(ClsFIFO, TestCreate) {
- Rados cluster;
- std::string pool_name = get_temp_pool_name();
- ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
- IoCtx ioctx;
- cluster.ioctx_create(pool_name.c_str(), ioctx);
-
- string fifo_id = "fifo";
- string oid = fifo_id;
-
- ASSERT_EQ(-EINVAL, fifo_create(ioctx, oid, string(),
- ClsFIFO::MetaCreateParams()));
-
- ASSERT_EQ(-EINVAL, fifo_create(ioctx, oid, fifo_id,
- ClsFIFO::MetaCreateParams()
- .max_part_size(0)));
-
- ASSERT_EQ(-EINVAL, fifo_create(ioctx, oid, fifo_id,
- ClsFIFO::MetaCreateParams()
- .max_entry_size(0)));
-
- /* first successful create */
- ASSERT_EQ(0, fifo_create(ioctx, oid, fifo_id,
- ClsFIFO::MetaCreateParams()));
-
- uint64_t size;
- struct timespec ts;
- ASSERT_EQ(0, ioctx.stat2(oid, &size, &ts));
- ASSERT_GT(size, 0);
-
- /* test idempotency */
- ASSERT_EQ(0, fifo_create(ioctx, oid, fifo_id,
- ClsFIFO::MetaCreateParams()));
-
- uint64_t size2;
- struct timespec ts2;
- ASSERT_EQ(0, ioctx.stat2(oid, &size2, &ts2));
- ASSERT_EQ(size2, size);
-
- ASSERT_EQ(-EEXIST, fifo_create(ioctx, oid, fifo_id,
- ClsFIFO::MetaCreateParams()
- .exclusive(true)));
-
- ASSERT_EQ(-EEXIST, fifo_create(ioctx, oid, fifo_id,
- ClsFIFO::MetaCreateParams()
- .oid_prefix("myprefix")
- .exclusive(false)));
-
- ASSERT_EQ(-EEXIST, fifo_create(ioctx, oid, "foo",
- ClsFIFO::MetaCreateParams()
- .exclusive(false)));
-
- ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+ ba::io_context c;
+ auto fifo_id = "fifo"sv;
+ R::Object oid(fifo_id);
+
+ s::spawn(c, [&](s::yield_context y) {
+ auto r = R::RADOS::Builder{}.build(c, y);
+ auto pool = create_pool(r, get_temp_pool_name(), y);
+ auto sg = make_scope_guard(
+ [&] {
+ r.delete_pool(pool, y);
+ });
+ R::IOContext ioc(pool);
+ bs::error_code ec;
+ fifo_create(r, ioc, oid, ""s, y[ec]);
+ EXPECT_EQ(bs::errc::invalid_argument, ec);
+ fifo_create(r, ioc, oid, fifo_id, y[ec], std::nullopt,
+ std::nullopt, false, 0);
+ EXPECT_EQ(bs::errc::invalid_argument, ec);
+ fifo_create(r, ioc, oid, {}, y[ec],
+ std::nullopt, std::nullopt,
+ false, RCf::default_max_part_size, 0);
+ EXPECT_EQ(bs::errc::invalid_argument, ec);
+ fifo_create(r, ioc, oid, fifo_id, y);
+ {
+ std::uint64_t size;
+ std::uint64_t size2;
+ {
+ R::ReadOp op;
+ op.stat(&size, nullptr);
+ r.execute(oid, ioc, std::move(op),
+ nullptr, y);
+ EXPECT_GT(size, 0);
+ }
+
+ {
+ R::ReadOp op;
+ op.stat(&size2, nullptr);
+ r.execute(oid, ioc, std::move(op), nullptr, y);
+ }
+ EXPECT_EQ(size2, size);
+ }
+ /* test idempotency */
+ fifo_create(r, ioc, oid, fifo_id, y);
+ fifo_create(r, ioc, oid, {}, y[ec], std::nullopt,
+ std::nullopt, false);
+ EXPECT_EQ(bs::errc::invalid_argument, ec);
+ fifo_create(r, ioc, oid, {}, y[ec], std::nullopt,
+ "myprefix"sv, false);
+ EXPECT_EQ(bs::errc::invalid_argument, ec);
+ fifo_create(r, ioc, oid, "foo"sv, y[ec],
+ std::nullopt, std::nullopt, false);
+ EXPECT_EQ(bs::errc::file_exists, ec);
+ });
+ c.run();
}
TEST(ClsFIFO, TestGetInfo) {
- Rados cluster;
- std::string pool_name = get_temp_pool_name();
- ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
- IoCtx ioctx;
- cluster.ioctx_create(pool_name.c_str(), ioctx);
-
- string fifo_id = "fifo";
- string oid = fifo_id;
-
- fifo_info_t info;
-
- /* first successful create */
- ASSERT_EQ(0, fifo_create(ioctx, oid, fifo_id,
- ClsFIFO::MetaCreateParams()));
-
- uint32_t part_header_size;
- uint32_t part_entry_overhead;
-
- ASSERT_EQ(0, ClsFIFO::meta_get(ioctx, oid,
- ClsFIFO::MetaGetParams(), &info,
- &part_header_size, &part_entry_overhead));
-
- ASSERT_GT(part_header_size, 0);
- ASSERT_GT(part_entry_overhead, 0);
-
- ASSERT_TRUE(!info.objv.instance.empty());
-
- ASSERT_EQ(0, ClsFIFO::meta_get(ioctx, oid,
- ClsFIFO::MetaGetParams()
- .objv(info.objv),
- &info,
- &part_header_size, &part_entry_overhead));
-
- fifo_objv_t objv;
- objv.instance="foo";
- objv.ver = 12;
- ASSERT_EQ(-ECANCELED, ClsFIFO::meta_get(ioctx, oid,
- ClsFIFO::MetaGetParams()
- .objv(objv),
- &info,
- &part_header_size, &part_entry_overhead));
-
- ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+ ba::io_context c;
+ auto fifo_id = "fifo"sv;
+ R::Object oid(fifo_id);
+
+ s::spawn(c, [&](s::yield_context y) {
+ auto r = R::RADOS::Builder{}.build(c, y);
+ auto pool = create_pool(r, get_temp_pool_name(), y);
+ auto sg = make_scope_guard(
+ [&] {
+ r.delete_pool(pool, y);
+ });
+ R::IOContext ioc(pool);
+ /* first successful create */
+ fifo_create(r, ioc, oid, fifo_id, y);
+
+ fifo::info info;
+ std::uint32_t part_header_size;
+ std::uint32_t part_entry_overhead;
+ {
+ R::ReadOp op;
+ RCf::get_meta(op, std::nullopt,
+ nullptr, &info, &part_header_size,
+ &part_entry_overhead);
+ r.execute(oid, ioc, std::move(op), nullptr, y);
+ EXPECT_GT(part_header_size, 0);
+ EXPECT_GT(part_entry_overhead, 0);
+ EXPECT_FALSE(info.version.instance.empty());
+ }
+ {
+ R::ReadOp op;
+ RCf::get_meta(op, info.version,
+ nullptr, &info, &part_header_size,
+ &part_entry_overhead);
+ r.execute(oid, ioc, std::move(op), nullptr, y);
+ }
+ {
+ R::ReadOp op;
+ fifo::objv objv;
+ objv.instance = "foo";
+ objv.ver = 12;
+ RCf::get_meta(op, objv,
+ nullptr, &info, &part_header_size,
+ &part_entry_overhead);
+ ASSERT_ANY_THROW(r.execute(oid, ioc, std::move(op),
+ nullptr, y));
+ }
+ });
+ c.run();
}
TEST(FIFO, TestOpenDefault) {
- Rados cluster;
- std::string pool_name = get_temp_pool_name();
- ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
- IoCtx ioctx;
- cluster.ioctx_create(pool_name.c_str(), ioctx);
-
- string fifo_id = "fifo";
-
- FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
- /* pre-open ops that should fail */
- ASSERT_EQ(-EINVAL, fifo.read_meta());
-
- bufferlist bl;
- ASSERT_EQ(-EINVAL, fifo.push(bl));
-
- ASSERT_EQ(-EINVAL, fifo.list(100, nullopt, nullptr, nullptr));
- ASSERT_EQ(-EINVAL, fifo.trim(string()));
-
- ASSERT_EQ(-ENOENT, fifo.open(false));
-
- /* first successful create */
- ASSERT_EQ(0, fifo.open(true));
-
- ASSERT_EQ(0, fifo.read_meta()); /* force reading from backend */
-
- auto info = fifo.get_meta();
-
- ASSERT_EQ(info.id, fifo_id);
-
- ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+ ba::io_context c;
+ auto fifo_id = "fifo"s;
+
+ s::spawn(c, [&](s::yield_context y) {
+ auto r = R::RADOS::Builder{}.build(c, y);
+ auto pool = create_pool(r, get_temp_pool_name(), y);
+ auto sg = make_scope_guard(
+ [&] {
+ r.delete_pool(pool, y);
+ });
+ R::IOContext ioc(pool);
+ auto fifo = RCf::FIFO::create(r, ioc, fifo_id, y);
+ // force reading from backend
+ fifo->read_meta(y);
+ auto info = fifo->meta();
+ EXPECT_EQ(info.id, fifo_id);
+ });
+ c.run();
}
TEST(FIFO, TestOpenParams) {
- Rados cluster;
- std::string pool_name = get_temp_pool_name();
- ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
- IoCtx ioctx;
- cluster.ioctx_create(pool_name.c_str(), ioctx);
-
- string fifo_id = "fifo";
-
- FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
- uint64_t max_part_size = 10 * 1024;
- uint64_t max_entry_size = 128;
- string oid_prefix = "foo.123.";
-
- fifo_objv_t objv;
- objv.instance = "fooz";
- objv.ver = 10;
-
-
- /* first successful create */
- ASSERT_EQ(0, fifo.open(true,
- ClsFIFO::MetaCreateParams()
- .max_part_size(max_part_size)
- .max_entry_size(max_entry_size)
- .oid_prefix(oid_prefix)
- .objv(objv)));
-
- ASSERT_EQ(0, fifo.read_meta()); /* force reading from backend */
-
- auto info = fifo.get_meta();
-
- ASSERT_EQ(info.id, fifo_id);
- ASSERT_EQ(info.data_params.max_part_size, max_part_size);
- ASSERT_EQ(info.data_params.max_entry_size, max_entry_size);
- ASSERT_EQ(info.objv, objv);
-
- ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+ ba::io_context c;
+ auto fifo_id = "fifo"sv;
+
+ s::spawn(c, [&](s::yield_context y) {
+ auto r = R::RADOS::Builder{}.build(c, y);
+ auto pool = create_pool(r, get_temp_pool_name(), y);
+ auto sg = make_scope_guard(
+ [&] {
+ r.delete_pool(pool, y);
+ });
+ R::IOContext ioc(pool);
+
+ const std::uint64_t max_part_size = 10 * 1024;
+ const std::uint64_t max_entry_size = 128;
+ auto oid_prefix = "foo.123."sv;
+ fifo::objv objv;
+ objv.instance = "fooz"s;
+ objv.ver = 10;
+
+ /* first successful create */
+ auto f = RCf::FIFO::create(r, ioc, fifo_id, y, objv, oid_prefix,
+ false, max_part_size,
+ max_entry_size);
+
+
+ /* force reading from backend */
+ f->read_meta(y);
+ auto info = f->meta();
+ ASSERT_EQ(info.id, fifo_id);
+ ASSERT_EQ(info.params.max_part_size, max_part_size);
+ ASSERT_EQ(info.params.max_entry_size, max_entry_size);
+ ASSERT_EQ(info.version, objv);
+ });
+ c.run();
}
-template <class T>
-static int decode_entry(fifo_entry& entry,
- T *val,
- string *marker)
+namespace {
+template<class T>
+std::pair<T, std::string> decode_entry(const RCf::list_entry& entry)
{
- *marker = entry.marker;
+ T val;
auto iter = entry.data.cbegin();
-
- try {
- decode(*val, iter);
- } catch (buffer::error& err) {
- return -EIO;
- }
-
- return 0;
+ decode(val, iter);
+ return std::make_pair(std::move(val), entry.marker);
}
-
-TEST(FIFO, TestPushListTrim) {
- Rados cluster;
- std::string pool_name = get_temp_pool_name();
- ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
- IoCtx ioctx;
- cluster.ioctx_create(pool_name.c_str(), ioctx);
-
- string fifo_id = "fifo";
-
- FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
- /* first successful create */
- ASSERT_EQ(0, fifo.open(true));
-
- uint32_t max_entries = 10;
-
- for (uint32_t i = 0; i < max_entries; ++i) {
- bufferlist bl;
- encode(i, bl);
- ASSERT_EQ(0, fifo.push(bl));
- }
-
- string marker;
-
- /* get entries one by one */
-
- for (uint32_t i = 0; i < max_entries; ++i) {
- vector<fifo_entry> result;
- bool more;
- ASSERT_EQ(0, fifo.list(1, marker, &result, &more));
-
- bool expected_more = (i != (max_entries - 1));
- ASSERT_EQ(expected_more, more);
- ASSERT_EQ(1, result.size());
-
- uint32_t val;
- ASSERT_EQ(0, decode_entry(result.front(), &val, &marker));
-
- ASSERT_EQ(i, val);
- }
-
- /* get all entries at once */
- vector<fifo_entry> result;
- bool more;
- ASSERT_EQ(0, fifo.list(max_entries * 10, string(), &result, &more));
-
- ASSERT_FALSE(more);
- ASSERT_EQ(max_entries, result.size());
-
- string markers[max_entries];
-
-
- for (uint32_t i = 0; i < max_entries; ++i) {
- uint32_t val;
-
- ASSERT_EQ(0, decode_entry(result[i], &val, &markers[i]));
- ASSERT_EQ(i, val);
- }
-
- uint32_t min_entry = 0;
-
- /* trim one entry */
- fifo.trim(markers[min_entry]);
- ++min_entry;
-
- ASSERT_EQ(0, fifo.list(max_entries * 10, string(), &result, &more));
-
- ASSERT_FALSE(more);
- ASSERT_EQ(max_entries - min_entry, result.size());
-
- for (uint32_t i = min_entry; i < max_entries; ++i) {
- uint32_t val;
-
- ASSERT_EQ(0, decode_entry(result[i - min_entry], &val, &markers[i]));
- ASSERT_EQ(i, val);
- }
-
-
- ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
}
-TEST(FIFO, TestPushTooBig) {
- Rados cluster;
- std::string pool_name = get_temp_pool_name();
- ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
- IoCtx ioctx;
- cluster.ioctx_create(pool_name.c_str(), ioctx);
-
- string fifo_id = "fifo";
-
- FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
- uint64_t max_part_size = 2048;
- uint64_t max_entry_size = 128;
-
- char buf[max_entry_size + 1];
- memset(buf, 0, sizeof(buf));
- /* first successful create */
- ASSERT_EQ(0, fifo.open(true,
- ClsFIFO::MetaCreateParams()
- .max_part_size(max_part_size)
- .max_entry_size(max_entry_size)));
- bufferlist bl;
- bl.append(buf, sizeof(buf));
-
- ASSERT_EQ(-EINVAL, fifo.push(bl));
-
-
- ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+TEST(FIFO, TestPushListTrim) {
+ ba::io_context c;
+ auto fifo_id = "fifo"sv;
+
+ s::spawn(c, [&](s::yield_context y) mutable {
+ auto r = R::RADOS::Builder{}.build(c, y);
+ auto pool = create_pool(r, get_temp_pool_name(), y);
+ auto sg = make_scope_guard(
+ [&] {
+ r.delete_pool(pool, y);
+ });
+ R::IOContext ioc(pool);
+ auto f = RCf::FIFO::create(r, ioc, fifo_id, y);
+ static constexpr auto max_entries = 10u;
+ for (uint32_t i = 0; i < max_entries; ++i) {
+ cb::list bl;
+ encode(i, bl);
+ f->push(bl, y);
+ }
+
+ std::optional<std::string> marker;
+ /* get entries one by one */
+
+ for (auto i = 0u; i < max_entries; ++i) {
+ auto [result, more] = f->list(1, marker, y);
+
+ bool expected_more = (i != (max_entries - 1));
+ ASSERT_EQ(expected_more, more);
+ ASSERT_EQ(1, result.size());
+
+ std::uint32_t val;
+ std::tie(val, marker) =
+ decode_entry<std::uint32_t>(result.front());
+
+ ASSERT_EQ(i, val);
+ }
+
+ /* get all entries at once */
+ std::string markers[max_entries];
+ std::uint32_t min_entry = 0;
+ {
+ auto [result, more] = f->list(max_entries * 10, std::nullopt,
+ y);
+
+ ASSERT_FALSE(more);
+ ASSERT_EQ(max_entries, result.size());
+
+
+ for (auto i = 0u; i < max_entries; ++i) {
+ std::uint32_t val;
+
+ std::tie(val, markers[i]) =
+ decode_entry<std::uint32_t>(result[i]);
+ ASSERT_EQ(i, val);
+ }
+
+
+ /* trim one entry */
+ f->trim(markers[min_entry], y);
+ ++min_entry;
+ }
+
+ auto [result, more] = f->list(max_entries * 10,
+ std::nullopt, y);
+
+ ASSERT_FALSE(more);
+ ASSERT_EQ(max_entries - min_entry, result.size());
+
+ for (auto i = min_entry; i < max_entries; ++i) {
+ std::uint32_t val;
+
+ std::tie(val, markers[i - min_entry]) =
+ decode_entry<std::uint32_t>(result[i - min_entry]);
+ ASSERT_EQ(i, val);
+ }
+
+ });
+ c.run();
}
-TEST(FIFO, TestMultipleParts) {
- Rados cluster;
- std::string pool_name = get_temp_pool_name();
- ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
- IoCtx ioctx;
- cluster.ioctx_create(pool_name.c_str(), ioctx);
-
- string fifo_id = "fifo";
-
- FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
- uint64_t max_part_size = 2048;
- uint64_t max_entry_size = 128;
-
- char buf[max_entry_size];
- memset(buf, 0, sizeof(buf));
-
- /* create */
- ASSERT_EQ(0, fifo.open(true,
- ClsFIFO::MetaCreateParams()
- .max_part_size(max_part_size)
- .max_entry_size(max_entry_size)));
-
- uint32_t part_header_size;
- uint32_t part_entry_overhead;
-
- fifo.get_part_layout_info(&part_header_size, &part_entry_overhead);
-
- int entries_per_part = (max_part_size - part_header_size) / (max_entry_size + part_entry_overhead);
-
- int max_entries = entries_per_part * 4 + 1;
-
- /* push enough entries */
- for (int i = 0; i < max_entries; ++i) {
- bufferlist bl;
-
- *(int *)buf = i;
- bl.append(buf, sizeof(buf));
-
- ASSERT_EQ(0, fifo.push(bl));
- }
-
- auto info = fifo.get_meta();
-
- ASSERT_EQ(info.id, fifo_id);
- ASSERT_GT(info.head_part_num, 0); /* head should have advanced */
-
-
- /* list all at once */
- vector<fifo_entry> result;
- bool more;
- ASSERT_EQ(0, fifo.list(max_entries, string(), &result, &more));
- ASSERT_EQ(false, more);
-
- ASSERT_EQ(max_entries, result.size());
-
- for (int i = 0; i < max_entries; ++i) {
- auto& bl = result[i].data;
- ASSERT_EQ(i, *(int *)bl.c_str());
- }
-
- /* list one at a time */
- string marker;
- for (int i = 0; i < max_entries; ++i) {
- ASSERT_EQ(0, fifo.list(1, marker, &result, &more));
-
- ASSERT_EQ(result.size(), 1);
- bool expected_more = (i != (max_entries - 1));
- ASSERT_EQ(expected_more, more);
-
- auto& entry = result[0];
-
- auto& bl = entry.data;
- marker = entry.marker;
-
- ASSERT_EQ(i, *(int *)bl.c_str());
- }
-
- /* trim one at a time */
- marker.clear();
- for (int i = 0; i < max_entries; ++i) {
- /* read single entry */
- ASSERT_EQ(0, fifo.list(1, marker, &result, &more));
-
- ASSERT_EQ(result.size(), 1);
- bool expected_more = (i != (max_entries - 1));
- ASSERT_EQ(expected_more, more);
-
- marker = result[0].marker;
-
- /* trim */
- ASSERT_EQ(0, fifo.trim(marker));
-
- /* check tail */
- info = fifo.get_meta();
- ASSERT_EQ(info.tail_part_num, i / entries_per_part);
-
- /* try to read all again, see how many entries left */
- ASSERT_EQ(0, fifo.list(max_entries, marker, &result, &more));
- ASSERT_EQ(max_entries - i - 1, result.size());
- ASSERT_EQ(false, more);
- }
-
- /* tail now should point at head */
- info = fifo.get_meta();
- ASSERT_EQ(info.head_part_num, info.tail_part_num);
-
- fifo_part_info part_info;
-
- /* check old tails are removed */
- for (int i = 0; i < info.tail_part_num; ++i) {
- ASSERT_EQ(-ENOENT, fifo.get_part_info(i, &part_info));
- }
-
- /* check curent tail exists */
- ASSERT_EQ(0, fifo.get_part_info(info.tail_part_num, &part_info));
-
- ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+TEST(FIFO, TestPushTooBig) {
+ ba::io_context c;
+ auto fifo_id = "fifo"sv;
+ static constexpr auto max_part_size = 2048ull;
+ static constexpr auto max_entry_size = 128ull;
+
+ s::spawn(c, [&](s::yield_context y) {
+ auto r = R::RADOS::Builder{}.build(c, y);
+ auto pool = create_pool(r, get_temp_pool_name(), y);
+ auto sg = make_scope_guard(
+ [&] {
+ r.delete_pool(pool, y);
+ });
+ R::IOContext ioc(pool);
+
+ auto f = RCf::FIFO::create(r, ioc, fifo_id, y, std::nullopt,
+ std::nullopt, false, max_part_size,
+ max_entry_size);
+
+ char buf[max_entry_size + 1];
+ memset(buf, 0, sizeof(buf));
+
+ cb::list bl;
+ bl.append(buf, sizeof(buf));
+
+ bs::error_code ec;
+ f->push(bl, y[ec]);
+ EXPECT_EQ(RCf::errc::entry_too_large, ec);
+ });
+ c.run();
}
-TEST(FIFO, TestTwoPushers) {
- Rados cluster;
- std::string pool_name = get_temp_pool_name();
- ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
- IoCtx ioctx;
- cluster.ioctx_create(pool_name.c_str(), ioctx);
-
- string fifo_id = "fifo";
- FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
- uint64_t max_part_size = 2048;
- uint64_t max_entry_size = 128;
+TEST(FIFO, TestMultipleParts) {
+ ba::io_context c;
+ auto fifo_id = "fifo"sv;
+ static constexpr auto max_part_size = 2048ull;
+ static constexpr auto max_entry_size = 128ull;
- char buf[max_entry_size];
- memset(buf, 0, sizeof(buf));
+ s::spawn(c, [&](s::yield_context y) mutable {
+ auto r = R::RADOS::Builder{}.build(c, y);
+ auto pool = create_pool(r, get_temp_pool_name(), y);
+ auto sg = make_scope_guard(
+ [&] {
+ r.delete_pool(pool, y);
+ });
+ R::IOContext ioc(pool);
+
+ auto f = RCf::FIFO::create(r, ioc, fifo_id, y, std::nullopt,
+ std::nullopt, false, max_part_size,
+ max_entry_size);
+
+
+ char buf[max_entry_size];
+ memset(buf, 0, sizeof(buf));
+
+ const auto [part_header_size, part_entry_overhead] =
+ f->get_part_layout_info();
+
+ const auto entries_per_part =
+ (max_part_size - part_header_size) /
+ (max_entry_size + part_entry_overhead);
+
+ const auto max_entries = entries_per_part * 4 + 1;
+
+ /* push enough entries */
+ for (auto i = 0u; i < max_entries; ++i) {
+ cb::list bl;
+
+ *(int *)buf = i;
+ bl.append(buf, sizeof(buf));
+
+ f->push(bl, y);
+ }
+
+ auto info = f->meta();
+
+ ASSERT_EQ(info.id, fifo_id);
+ /* head should have advanced */
+ ASSERT_GT(info.head_part_num, 0);
+
+
+ /* list all at once */
+ auto [result, more] = f->list(max_entries, std::nullopt, y);
+ EXPECT_EQ(false, more);
+
+ ASSERT_EQ(max_entries, result.size());
+
+ for (auto i = 0u; i < max_entries; ++i) {
+ auto& bl = result[i].data;
+ ASSERT_EQ(i, *(int *)bl.c_str());
+ }
+
+ std::optional<std::string> marker;
+ /* get entries one by one */
+
+ for (auto i = 0u; i < max_entries; ++i) {
+ auto [result, more] = f->list(1, marker, y);
+ ASSERT_EQ(result.size(), 1);
+ const bool expected_more = (i != (max_entries - 1));
+ ASSERT_EQ(expected_more, more);
+
+ std::uint32_t val;
+ std::tie(val, marker) =
+ decode_entry<std::uint32_t>(result.front());
+
+ auto& entry = result.front();
+ auto& bl = entry.data;
+ ASSERT_EQ(i, *(int *)bl.c_str());
+ marker = entry.marker;
+ }
+
+ /* trim one at a time */
+ marker.reset();
+ for (auto i = 0u; i < max_entries; ++i) {
+ /* read single entry */
+ {
+ auto [result, more] = f->list(1, marker, y);
+ ASSERT_EQ(result.size(), 1);
+ const bool expected_more = (i != (max_entries - 1));
+ ASSERT_EQ(expected_more, more);
+
+ marker = result.front().marker;
+
+ f->trim(*marker, y);
+ }
+
+ /* check tail */
+ info = f->meta();
+ ASSERT_EQ(info.tail_part_num, i / entries_per_part);
+
+ /* try to read all again, see how many entries left */
+ auto [result, more] = f->list(max_entries, marker, y);
+ ASSERT_EQ(max_entries - i - 1, result.size());
+ ASSERT_EQ(false, more);
+ }
+
+ /* tail now should point at head */
+ info = f->meta();
+ ASSERT_EQ(info.head_part_num, info.tail_part_num);
+
+ /* check old tails are removed */
+ for (auto i = 0; i < info.tail_part_num; ++i) {
+ bs::error_code ec;
+ f->get_part_info(i, y[ec]);
+ ASSERT_EQ(bs::errc::no_such_file_or_directory, ec);
+ }
+ /* check current tail exists */
+ f->get_part_info(info.tail_part_num, y);
+ });
+ c.run();
+}
- /* create */
- ASSERT_EQ(0, fifo.open(true,
- ClsFIFO::MetaCreateParams()
- .max_part_size(max_part_size)
- .max_entry_size(max_entry_size)));
- uint32_t part_header_size;
- uint32_t part_entry_overhead;
+TEST(FIFO, TestTwoPushers) {
+ ba::io_context c;
+ auto fifo_id = "fifo"sv;
+ static constexpr auto max_part_size = 2048ull;
+ static constexpr auto max_entry_size = 128ull;
- fifo.get_part_layout_info(&part_header_size, &part_entry_overhead);
+ s::spawn(c, [&](s::yield_context y) {
+ auto r = R::RADOS::Builder{}.build(c, y);
+ auto pool = create_pool(r, get_temp_pool_name(), y);
+ auto sg = make_scope_guard(
+ [&] {
+ r.delete_pool(pool, y);
+ });
+ R::IOContext ioc(pool);
- int entries_per_part = (max_part_size - part_header_size) / (max_entry_size + part_entry_overhead);
+ auto f = RCf::FIFO::create(r, ioc, fifo_id, y, std::nullopt,
+ std::nullopt, false, max_part_size,
+ max_entry_size);
- int max_entries = entries_per_part * 4 + 1;
- FIFO fifo2(cct(ioctx), fifo_id, &ioctx);
- /* open second one */
- ASSERT_EQ(0, fifo2.open(true,
- ClsFIFO::MetaCreateParams()));
+ char buf[max_entry_size];
+ memset(buf, 0, sizeof(buf));
- vector<FIFO *> fifos(2);
- fifos[0] = &fifo;
- fifos[1] = &fifo2;
- for (int i = 0; i < max_entries; ++i) {
- bufferlist bl;
+ auto [part_header_size, part_entry_overhead] =
+ f->get_part_layout_info();
- *(int *)buf = i;
- bl.append(buf, sizeof(buf));
+ const auto entries_per_part =
+ (max_part_size - part_header_size) /
+ (max_entry_size + part_entry_overhead);
- auto& f = fifos[i % fifos.size()];
+ const auto max_entries = entries_per_part * 4 + 1;
- ASSERT_EQ(0, f->push(bl));
- }
+ auto f2 = RCf::FIFO::open(r, ioc, fifo_id, y);
- /* list all by both */
- vector<fifo_entry> result;
- bool more;
- ASSERT_EQ(0, fifo2.list(max_entries, string(), &result, &more));
+ std::vector fifos{&f, &f2};
- ASSERT_EQ(false, more);
+ for (auto i = 0u; i < max_entries; ++i) {
+ cb::list bl;
+ *(int *)buf = i;
+ bl.append(buf, sizeof(buf));
- ASSERT_EQ(max_entries, result.size());
+ auto& f = fifos[i % fifos.size()];
- ASSERT_EQ(0, fifo.list(max_entries, string(), &result, &more));
- ASSERT_EQ(false, more);
+ (*f)->push(bl, y);
+ }
- ASSERT_EQ(max_entries, result.size());
+ /* list all by both */
+ {
+ auto [result, more] = f2->list(max_entries, std::nullopt, y);
- for (int i = 0; i < max_entries; ++i) {
- auto& bl = result[i].data;
- ASSERT_EQ(i, *(int *)bl.c_str());
- }
+ ASSERT_EQ(false, more);
+ ASSERT_EQ(max_entries, result.size());
+ }
+ auto [result, more] = f2->list(max_entries, std::nullopt, y);
+ ASSERT_EQ(false, more);
+ ASSERT_EQ(max_entries, result.size());
- ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+ for (auto i = 0u; i < max_entries; ++i) {
+ auto& bl = result[i].data;
+ ASSERT_EQ(i, *(int *)bl.c_str());
+ }
+ });
+ c.run();
}
-TEST(FIFO, TestTwoPushersTrim) {
- Rados cluster;
- std::string pool_name = get_temp_pool_name();
- ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
- IoCtx ioctx;
- cluster.ioctx_create(pool_name.c_str(), ioctx);
-
- string fifo_id = "fifo";
- FIFO fifo1(cct(ioctx), fifo_id, &ioctx);
-
- uint64_t max_part_size = 2048;
- uint64_t max_entry_size = 128;
-
- char buf[max_entry_size];
- memset(buf, 0, sizeof(buf));
-
- /* create */
- ASSERT_EQ(0, fifo1.open(true,
- ClsFIFO::MetaCreateParams()
- .max_part_size(max_part_size)
- .max_entry_size(max_entry_size)));
+TEST(FIFO, TestTwoPushersTrim) {
+ ba::io_context c;
+ auto fifo_id = "fifo"sv;
+ static constexpr auto max_part_size = 2048ull;
+ static constexpr auto max_entry_size = 128ull;
- uint32_t part_header_size;
- uint32_t part_entry_overhead;
+ s::spawn(c, [&](s::yield_context y) {
+ auto r = R::RADOS::Builder{}.build(c, y);
+ auto pool = create_pool(r, get_temp_pool_name(), y);
+ auto sg = make_scope_guard(
+ [&] {
+ r.delete_pool(pool, y);
+ });
+ R::IOContext ioc(pool);
- fifo1.get_part_layout_info(&part_header_size, &part_entry_overhead);
+ auto f1 = RCf::FIFO::create(r, ioc, fifo_id, y, std::nullopt,
+ std::nullopt, false, max_part_size,
+ max_entry_size);
- int entries_per_part = (max_part_size - part_header_size) / (max_entry_size + part_entry_overhead);
+ char buf[max_entry_size];
+ memset(buf, 0, sizeof(buf));
- int max_entries = entries_per_part * 4 + 1;
- FIFO fifo2(cct(ioctx), fifo_id, &ioctx);
+ auto [part_header_size, part_entry_overhead] =
+ f1->get_part_layout_info();
- /* open second one */
- ASSERT_EQ(0, fifo2.open(true,
- ClsFIFO::MetaCreateParams()));
+ const auto entries_per_part =
+ (max_part_size - part_header_size) /
+ (max_entry_size + part_entry_overhead);
- /* push one entry to fifo2 and the rest to fifo1 */
+ const auto max_entries = entries_per_part * 4 + 1;
- for (int i = 0; i < max_entries; ++i) {
- bufferlist bl;
+ auto f2 = RCf::FIFO::open(r, ioc, fifo_id, y);
- *(int *)buf = i;
- bl.append(buf, sizeof(buf));
+ /* push one entry to f2 and the rest to f1 */
- FIFO *f = (i < 1 ? &fifo2 : &fifo1);
+ for (auto i = 0u; i < max_entries; ++i) {
+ cb::list bl;
- ASSERT_EQ(0, f->push(bl));
- }
+ *(int *)buf = i;
+ bl.append(buf, sizeof(buf));
- /* trim half by fifo1 */
- int num = max_entries / 2;
+ auto f = (i < 1 ? &f2 : &f1);
+ (*f)->push(bl, y);
+ }
- vector<fifo_entry> result;
- bool more;
- ASSERT_EQ(0, fifo1.list(num, string(), &result, &more));
+ /* trim half by fifo1 */
+ auto num = max_entries / 2;
- ASSERT_EQ(true, more);
- ASSERT_EQ(num, result.size());
+ std::string marker;
+ {
+ auto [result, more] = f1->list(num, std::nullopt, y);
- for (int i = 0; i < num; ++i) {
- auto& bl = result[i].data;
- ASSERT_EQ(i, *(int *)bl.c_str());
- }
+ ASSERT_EQ(true, more);
+ ASSERT_EQ(num, result.size());
- auto& entry = result[num - 1];
- auto& marker = entry.marker;
+ for (auto i = 0u; i < num; ++i) {
+ auto& bl = result[i].data;
+ ASSERT_EQ(i, *(int *)bl.c_str());
+ }
- ASSERT_EQ(0, fifo1.trim(marker));
+ auto& entry = result[num - 1];
+ marker = entry.marker;
- /* list what's left by fifo2 */
+ f1->trim(marker, y);
- int left = max_entries - num;
+ /* list what's left by fifo2 */
- ASSERT_EQ(0, fifo2.list(left, marker, &result, &more));
- ASSERT_EQ(left, result.size());
- ASSERT_EQ(false, more);
+ }
- for (int i = num; i < max_entries; ++i) {
- auto& bl = result[i - num].data;
- ASSERT_EQ(i, *(int *)bl.c_str());
- }
+ const auto left = max_entries - num;
+ auto [result, more] = f2->list(left, marker, y);
+ ASSERT_EQ(left, result.size());
+ ASSERT_EQ(false, more);
- ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+ for (auto i = num; i < max_entries; ++i) {
+ auto& bl = result[i - num].data;
+ ASSERT_EQ(i, *(int *)bl.c_str());
+ }
+ });
+ c.run();
}
TEST(FIFO, TestPushBatch) {
- Rados cluster;
- std::string pool_name = get_temp_pool_name();
- ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
- IoCtx ioctx;
- cluster.ioctx_create(pool_name.c_str(), ioctx);
-
- string fifo_id = "fifo";
-
- FIFO fifo(cct(ioctx), fifo_id, &ioctx);
-
- uint64_t max_part_size = 2048;
- uint64_t max_entry_size = 128;
-
- char buf[max_entry_size];
- memset(buf, 0, sizeof(buf));
+ ba::io_context c;
+ auto fifo_id = "fifo"sv;
+ static constexpr auto max_part_size = 2048ull;
+ static constexpr auto max_entry_size = 128ull;
- /* create */
- ASSERT_EQ(0, fifo.open(true,
- ClsFIFO::MetaCreateParams()
- .max_part_size(max_part_size)
- .max_entry_size(max_entry_size)));
+ s::spawn(c, [&](s::yield_context y) {
+ auto r = R::RADOS::Builder{}.build(c, y);
+ auto pool = create_pool(r, get_temp_pool_name(), y);
+ auto sg = make_scope_guard(
+ [&] {
+ r.delete_pool(pool, y);
+ });
+ R::IOContext ioc(pool);
- uint32_t part_header_size;
- uint32_t part_entry_overhead;
+ auto f = RCf::FIFO::create(r, ioc, fifo_id, y, std::nullopt,
+ std::nullopt, false, max_part_size,
+ max_entry_size);
- fifo.get_part_layout_info(&part_header_size, &part_entry_overhead);
- int entries_per_part = (max_part_size - part_header_size) / (max_entry_size + part_entry_overhead);
+ char buf[max_entry_size];
+ memset(buf, 0, sizeof(buf));
- int max_entries = entries_per_part * 4 + 1; /* enough entries to span multiple parts */
+ auto [part_header_size, part_entry_overhead]
+ = f->get_part_layout_info();
- vector<bufferlist> bufs;
+ auto entries_per_part =
+ (max_part_size - part_header_size) /
+ (max_entry_size + part_entry_overhead);
- for (int i = 0; i < max_entries; ++i) {
- bufferlist bl;
+ auto max_entries = entries_per_part * 4 + 1; /* enough entries to span multiple parts */
- *(int *)buf = i;
- bl.append(buf, sizeof(buf));
+ std::vector<cb::list> bufs;
- bufs.push_back(bl);
- }
+ for (auto i = 0u; i < max_entries; ++i) {
+ cb::list bl;
- ASSERT_EQ(0, fifo.push(bufs));
+ *(int *)buf = i;
+ bl.append(buf, sizeof(buf));
- /* list all */
+ bufs.push_back(bl);
+ }
- vector<fifo_entry> result;
- bool more;
- ASSERT_EQ(0, fifo.list(max_entries, string(), &result, &more));
+ f->push(bufs, y);
- ASSERT_EQ(false, more);
- ASSERT_EQ(max_entries, result.size());
+ /* list all */
- for (int i = 0; i < max_entries; ++i) {
- auto& bl = result[i].data;
- ASSERT_EQ(i, *(int *)bl.c_str());
- }
+ auto [result, more] = f->list(max_entries, std::nullopt, y);
+ ASSERT_EQ(false, more);
+ ASSERT_EQ(max_entries, result.size());
- auto& info = fifo.get_meta();
- ASSERT_EQ(info.head_part_num, 4);
+ for (auto i = 0u; i < max_entries; ++i) {
+ auto& bl = result[i].data;
+ ASSERT_EQ(i, *(int *)bl.c_str());
+ }
- ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
+ auto& info = f->meta();
+ ASSERT_EQ(info.head_part_num, 4);
+ });
+ c.run();
}