add_library(crimson-os
- cyan_store.cc
- cyan_collection.cc
- cyan_object.cc
futurized_store.cc
${PROJECT_SOURCE_DIR}/src/os/Transaction.cc)
+add_subdirectory(cyanstore)
target_link_libraries(crimson-os
+ crimson-cyanstore
crimson)
+++ /dev/null
-#include "cyan_collection.h"
-
-#include "cyan_object.h"
-
-namespace crimson::os
-{
-
-Collection::Collection(const coll_t& c)
- : FuturizedCollection{c}
-{}
-
-Collection::~Collection() = default;
-
-Collection::ObjectRef Collection::create_object() const
-{
- return new crimson::os::Object{};
-}
-
-Collection::ObjectRef Collection::get_object(ghobject_t oid)
-{
- auto o = object_hash.find(oid);
- if (o == object_hash.end())
- return ObjectRef();
- return o->second;
-}
-
-Collection::ObjectRef Collection::get_or_create_object(ghobject_t oid)
-{
- auto result = object_hash.emplace(oid, ObjectRef{});
- if (result.second)
- object_map[oid] = result.first->second = create_object();
- return result.first->second;
-}
-
-uint64_t Collection::used_bytes() const
-{
- uint64_t result = 0;
- for (auto& obj : object_map) {
- result += obj.second->get_size();
- }
- return result;
-}
-
-void Collection::encode(bufferlist& bl) const
-{
- ENCODE_START(1, 1, bl);
- encode(xattr, bl);
- encode(use_page_set, bl);
- uint32_t s = object_map.size();
- encode(s, bl);
- for (auto& [oid, obj] : object_map) {
- encode(oid, bl);
- obj->encode(bl);
- }
- ENCODE_FINISH(bl);
-}
-
-void Collection::decode(bufferlist::const_iterator& p)
-{
- DECODE_START(1, p);
- decode(xattr, p);
- decode(use_page_set, p);
- uint32_t s;
- decode(s, p);
- while (s--) {
- ghobject_t k;
- decode(k, p);
- auto o = create_object();
- o->decode(p);
- object_map.insert(make_pair(k, o));
- object_hash.insert(make_pair(k, o));
- }
- DECODE_FINISH(p);
-}
-
-}
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include <string>
-#include <unordered_map>
-#include <boost/intrusive_ptr.hpp>
-#include <boost/smart_ptr/intrusive_ref_counter.hpp>
-
-#include "include/buffer.h"
-#include "osd/osd_types.h"
-
-#include "futurized_collection.h"
-
-namespace crimson::os {
-
-class Object;
-/**
- * a collection also orders transactions
- *
- * Any transactions queued under a given collection will be applied in
- * sequence. Transactions queued under different collections may run
- * in parallel.
- *
- * ObjectStore users may get collection handles with open_collection() (or,
- * for bootstrapping a new collection, create_new_collection()).
- */
-struct Collection final : public FuturizedCollection {
- using ObjectRef = boost::intrusive_ptr<Object>;
- int bits = 0;
- // always use bufferlist object for testing
- bool use_page_set = false;
- std::unordered_map<ghobject_t, ObjectRef> object_hash; ///< for lookup
- std::map<ghobject_t, ObjectRef> object_map; ///< for iteration
- std::map<std::string,bufferptr> xattr;
- bool exists = true;
-
- Collection(const coll_t& c);
- ~Collection() final;
-
- ObjectRef create_object() const;
- ObjectRef get_object(ghobject_t oid);
- ObjectRef get_or_create_object(ghobject_t oid);
- uint64_t used_bytes() const;
-
- void encode(bufferlist& bl) const;
- void decode(bufferlist::const_iterator& p);
-};
-
-}
+++ /dev/null
-#include "cyan_object.h"
-#include "include/encoding.h"
-
-namespace crimson::os {
-
-size_t Object::get_size() const {
- return data.length();
-}
-
-ceph::bufferlist Object::read(uint64_t offset, uint64_t len)
-{
- bufferlist ret;
- ret.substr_of(data, offset, len);
- return ret;
-}
-
-int Object::write(uint64_t offset, const bufferlist &src)
-{
- unsigned len = src.length();
- // before
- bufferlist newdata;
- if (get_size() >= offset) {
- newdata.substr_of(data, 0, offset);
- } else {
- if (get_size()) {
- newdata.substr_of(data, 0, get_size());
- }
- newdata.append_zero(offset - get_size());
- }
-
- newdata.append(src);
-
- // after
- if (get_size() > offset + len) {
- bufferlist tail;
- tail.substr_of(data, offset + len, get_size() - (offset + len));
- newdata.append(tail);
- }
-
- data.claim(newdata);
- return 0;
-}
-
-int Object::clone(Object *src, uint64_t srcoff, uint64_t len,
- uint64_t dstoff)
-{
- bufferlist bl;
- if (srcoff == dstoff && len == src->get_size()) {
- data = src->data;
- return 0;
- }
- bl.substr_of(src->data, srcoff, len);
- return write(dstoff, bl);
-
-}
-
-int Object::truncate(uint64_t size)
-{
- if (get_size() > size) {
- bufferlist bl;
- bl.substr_of(data, 0, size);
- data.claim(bl);
- } else if (get_size() == size) {
- // do nothing
- } else {
- data.append_zero(size - get_size());
- }
- return 0;
-}
-
-void Object::encode(bufferlist& bl) const {
- ENCODE_START(1, 1, bl);
- encode(data, bl);
- encode(xattr, bl);
- encode(omap_header, bl);
- encode(omap, bl);
- ENCODE_FINISH(bl);
-}
-
-void Object::decode(bufferlist::const_iterator& p) {
- DECODE_START(1, p);
- decode(data, p);
- decode(xattr, p);
- decode(omap_header, p);
- decode(omap, p);
- DECODE_FINISH(p);
-}
-
-}
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#include <cstddef>
-#include <map>
-#include <string>
-#include <boost/intrusive_ptr.hpp>
-#include <boost/smart_ptr/intrusive_ref_counter.hpp>
-#include "include/buffer.h"
-
-namespace crimson::os {
-
-struct Object : public boost::intrusive_ref_counter<
- Object,
- boost::thread_unsafe_counter>
-{
- using bufferlist = ceph::bufferlist;
-
- bufferlist data;
- // use transparent comparator for better performance, see
- // https://en.cppreference.com/w/cpp/utility/functional/less_void
- std::map<std::string,bufferptr,std::less<>> xattr;
- bufferlist omap_header;
- std::map<std::string,bufferlist> omap;
-
- typedef boost::intrusive_ptr<Object> Ref;
-
- Object() = default;
-
- // interface for object data
- size_t get_size() const;
- ceph::bufferlist read(uint64_t offset, uint64_t len);
- int write(uint64_t offset, const bufferlist &bl);
- int clone(Object *src, uint64_t srcoff, uint64_t len,
- uint64_t dstoff);
- int truncate(uint64_t offset);
-
- void encode(bufferlist& bl) const;
- void decode(bufferlist::const_iterator& p);
-};
-}
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "cyan_store.h"
-
-#include <boost/algorithm/string/trim.hpp>
-#include <fmt/format.h>
-#include <fmt/ostream.h>
-
-#include "common/safe_io.h"
-#include "os/Transaction.h"
-
-#include "crimson/common/buffer_io.h"
-#include "crimson/common/config_proxy.h"
-#include "crimson/os/cyan_collection.h"
-#include "crimson/os/cyan_object.h"
-
-namespace {
- seastar::logger& logger() {
- return crimson::get_logger(ceph_subsys_filestore);
- }
-}
-
-using crimson::common::local_conf;
-
-namespace crimson::os {
-
-using ObjectRef = boost::intrusive_ptr<Object>;
-
-CyanStore::CyanStore(const std::string& path)
- : path{path}
-{}
-
-CyanStore::~CyanStore() = default;
-
-seastar::future<> CyanStore::mount()
-{
- ceph::bufferlist bl;
- std::string fn = path + "/collections";
- std::string err;
- if (int r = bl.read_file(fn.c_str(), &err); r < 0) {
- throw std::runtime_error("read_file");
- }
-
- std::set<coll_t> collections;
- auto p = bl.cbegin();
- ceph::decode(collections, p);
-
- for (auto& coll : collections) {
- std::string fn = fmt::format("{}/{}", path, coll);
- ceph::bufferlist cbl;
- if (int r = cbl.read_file(fn.c_str(), &err); r < 0) {
- throw std::runtime_error("read_file");
- }
- boost::intrusive_ptr<Collection> c{new Collection{coll}};
- auto p = cbl.cbegin();
- c->decode(p);
- coll_map[coll] = c;
- used_bytes += c->used_bytes();
- }
- return seastar::now();
-}
-
-seastar::future<> CyanStore::umount()
-{
- return seastar::do_with(std::set<coll_t>{}, [this](auto& collections) {
- return seastar::do_for_each(coll_map, [&collections, this](auto& coll) {
- auto& [col, ch] = coll;
- collections.insert(col);
- ceph::bufferlist bl;
- ceph_assert(ch);
- ch->encode(bl);
- std::string fn = fmt::format("{}/{}", path, col);
- return ceph::buffer::write_file(std::move(bl), fn);
- }).then([&collections, this] {
- ceph::bufferlist bl;
- ceph::encode(collections, bl);
- std::string fn = fmt::format("{}/collections", path);
- return ceph::buffer::write_file(std::move(bl), fn);
- });
- });
-}
-
-seastar::future<> CyanStore::mkfs(uuid_d new_osd_fsid)
-{
- return read_meta("fsid").then([=](auto r, auto fsid_str) {
- if (r == -ENOENT) {
- if (new_osd_fsid.is_zero()) {
- osd_fsid.generate_random();
- } else {
- osd_fsid = new_osd_fsid;
- }
- return write_meta("fsid", fmt::format("{}", osd_fsid));
- } else if (r < 0) {
- throw std::runtime_error("read_meta");
- } else {
- logger().info("{} already has fsid {}", __func__, fsid_str);
- if (!osd_fsid.parse(fsid_str.c_str())) {
- throw std::runtime_error("failed to parse fsid");
- } else if (osd_fsid != new_osd_fsid) {
- logger().error("on-disk fsid {} != provided {}", osd_fsid, new_osd_fsid);
- throw std::runtime_error("unmatched osd_fsid");
- } else {
- return seastar::now();
- }
- }
- }).then([this]{
- std::string fn = path + "/collections";
- ceph::bufferlist bl;
- std::set<coll_t> collections;
- ceph::encode(collections, bl);
- return ceph::buffer::write_file(std::move(bl), fn);
- }).then([this] {
- return write_meta("type", "memstore");
- });
-}
-
-store_statfs_t CyanStore::stat() const
-{
- logger().debug("{}", __func__);
- store_statfs_t st;
- st.total = crimson::common::local_conf().get_val<Option::size_t>("memstore_device_bytes");
- st.available = st.total - used_bytes;
- return st;
-}
-
-seastar::future<std::vector<ghobject_t>, ghobject_t>
-CyanStore::list_objects(CollectionRef ch,
- const ghobject_t& start,
- const ghobject_t& end,
- uint64_t limit) const
-{
- auto c = static_cast<Collection*>(ch.get());
- logger().debug("{} {} {} {} {}",
- __func__, c->get_cid(), start, end, limit);
- std::vector<ghobject_t> objects;
- objects.reserve(limit);
- ghobject_t next = ghobject_t::get_max();
- for (const auto& [oid, obj] :
- boost::make_iterator_range(c->object_map.lower_bound(start),
- c->object_map.end())) {
- std::ignore = obj;
- if (oid >= end || objects.size() >= limit) {
- next = oid;
- break;
- }
- objects.push_back(oid);
- }
- return seastar::make_ready_future<std::vector<ghobject_t>, ghobject_t>(
- std::move(objects), next);
-}
-
-seastar::future<CollectionRef> CyanStore::create_new_collection(const coll_t& cid)
-{
- auto c = new Collection{cid};
- new_coll_map[cid] = c;
- return seastar::make_ready_future<CollectionRef>(c);
-}
-
-seastar::future<CollectionRef> CyanStore::open_collection(const coll_t& cid)
-{
- return seastar::make_ready_future<CollectionRef>(_get_collection(cid));
-}
-
-seastar::future<std::vector<coll_t>> CyanStore::list_collections()
-{
- std::vector<coll_t> collections;
- for (auto& coll : coll_map) {
- collections.push_back(coll.first);
- }
- return seastar::make_ready_future<std::vector<coll_t>>(std::move(collections));
-}
-
-CyanStore::read_errorator::future<ceph::bufferlist> CyanStore::read(
- CollectionRef ch,
- const ghobject_t& oid,
- uint64_t offset,
- size_t len,
- uint32_t op_flags)
-{
- auto c = static_cast<Collection*>(ch.get());
- logger().debug("{} {} {} {}~{}",
- __func__, c->get_cid(), oid, offset, len);
- if (!c->exists) {
- return crimson::ct_error::enoent::make();
- }
- ObjectRef o = c->get_object(oid);
- if (!o) {
- return crimson::ct_error::enoent::make();
- }
- if (offset >= o->get_size())
- return read_errorator::make_ready_future<ceph::bufferlist>();
- size_t l = len;
- if (l == 0 && offset == 0) // note: len == 0 means read the entire object
- l = o->get_size();
- else if (offset + l > o->get_size())
- l = o->get_size() - offset;
- return read_errorator::make_ready_future<ceph::bufferlist>(o->read(offset, l));
-}
-
-CyanStore::get_attr_errorator::future<ceph::bufferptr> CyanStore::get_attr(
- CollectionRef ch,
- const ghobject_t& oid,
- std::string_view name) const
-{
- auto c = static_cast<Collection*>(ch.get());
- logger().debug("{} {} {}",
- __func__, c->get_cid(), oid);
- auto o = c->get_object(oid);
- if (!o) {
- return crimson::ct_error::enoent::make();
- }
- if (auto found = o->xattr.find(name); found != o->xattr.end()) {
- return get_attr_errorator::make_ready_future<ceph::bufferptr>(found->second);
- } else {
- return crimson::ct_error::enodata::make();
- }
-}
-
-seastar::future<CyanStore::attrs_t> CyanStore::get_attrs(CollectionRef ch,
- const ghobject_t& oid)
-{
- auto c = static_cast<Collection*>(ch.get());
- logger().debug("{} {} {}",
- __func__, c->get_cid(), oid);
- auto o = c->get_object(oid);
- if (!o) {
- throw std::runtime_error(fmt::format("object does not exist: {}", oid));
- }
- return seastar::make_ready_future<attrs_t>(o->xattr);
-}
-
-seastar::future<CyanStore::omap_values_t>
-CyanStore::omap_get_values(CollectionRef ch,
- const ghobject_t& oid,
- const omap_keys_t& keys)
-{
- auto c = static_cast<Collection*>(ch.get());
- logger().debug("{} {} {}",
- __func__, c->get_cid(), oid);
- auto o = c->get_object(oid);
- if (!o) {
- throw std::runtime_error(fmt::format("object does not exist: {}", oid));
- }
- omap_values_t values;
- for (auto& key : keys) {
- if (auto found = o->omap.find(key); found != o->omap.end()) {
- values.insert(*found);
- }
- }
- return seastar::make_ready_future<omap_values_t>(std::move(values));
-}
-
-seastar::future<bool, CyanStore::omap_values_t>
-CyanStore::omap_get_values(
- CollectionRef ch,
- const ghobject_t &oid,
- const std::optional<string> &start
- ) {
- auto c = static_cast<Collection*>(ch.get());
- logger().debug(
- "{} {} {}",
- __func__, c->get_cid(), oid);
- auto o = c->get_object(oid);
- if (!o) {
- throw std::runtime_error(fmt::format("object does not exist: {}", oid));
- }
- omap_values_t values;
- for (auto i = start ? o->omap.upper_bound(*start) : o->omap.begin();
- values.size() < MAX_KEYS_PER_OMAP_GET_CALL && i != o->omap.end();
- ++i) {
- values.insert(*i);
- }
- return seastar::make_ready_future<bool, omap_values_t>(
- true, values);
-}
-
-seastar::future<> CyanStore::do_transaction(CollectionRef ch,
- ceph::os::Transaction&& t)
-{
- using ceph::os::Transaction;
- int r = 0;
- try {
- auto i = t.begin();
- while (i.have_op()) {
- r = 0;
- switch (auto op = i.decode_op(); op->op) {
- case Transaction::OP_NOP:
- break;
- case Transaction::OP_REMOVE:
- {
- coll_t cid = i.get_cid(op->cid);
- ghobject_t oid = i.get_oid(op->oid);
- r = _remove(cid, oid);
- if (r == -ENOENT) {
- r = 0;
- }
- }
- break;
- case Transaction::OP_TOUCH:
- {
- coll_t cid = i.get_cid(op->cid);
- ghobject_t oid = i.get_oid(op->oid);
- r = _touch(cid, oid);
- }
- break;
- case Transaction::OP_WRITE:
- {
- coll_t cid = i.get_cid(op->cid);
- ghobject_t oid = i.get_oid(op->oid);
- uint64_t off = op->off;
- uint64_t len = op->len;
- uint32_t fadvise_flags = i.get_fadvise_flags();
- ceph::bufferlist bl;
- i.decode_bl(bl);
- r = _write(cid, oid, off, len, bl, fadvise_flags);
- }
- break;
- case Transaction::OP_TRUNCATE:
- {
- coll_t cid = i.get_cid(op->cid);
- ghobject_t oid = i.get_oid(op->oid);
- uint64_t off = op->off;
- r = _truncate(cid, oid, off);
- }
- break;
- case Transaction::OP_SETATTR:
- {
- coll_t cid = i.get_cid(op->cid);
- ghobject_t oid = i.get_oid(op->oid);
- std::string name = i.decode_string();
- ceph::bufferlist bl;
- i.decode_bl(bl);
- std::map<std::string, bufferptr> to_set;
- to_set[name] = bufferptr(bl.c_str(), bl.length());
- r = _setattrs(cid, oid, to_set);
- }
- break;
- case Transaction::OP_MKCOLL:
- {
- coll_t cid = i.get_cid(op->cid);
- r = _create_collection(cid, op->split_bits);
- }
- break;
- case Transaction::OP_OMAP_SETKEYS:
- {
- coll_t cid = i.get_cid(op->cid);
- ghobject_t oid = i.get_oid(op->oid);
- std::map<std::string, ceph::bufferlist> aset;
- i.decode_attrset(aset);
- r = _omap_set_values(cid, oid, std::move(aset));
- }
- break;
- case Transaction::OP_OMAP_SETHEADER:
- {
- const coll_t &cid = i.get_cid(op->cid);
- const ghobject_t &oid = i.get_oid(op->oid);
- ceph::bufferlist bl;
- i.decode_bl(bl);
- r = _omap_set_header(cid, oid, bl);
- }
- break;
- case Transaction::OP_OMAP_RMKEYS:
- {
- const coll_t &cid = i.get_cid(op->cid);
- const ghobject_t &oid = i.get_oid(op->oid);
- omap_keys_t keys;
- i.decode_keyset(keys);
- r = _omap_rmkeys(cid, oid, keys);
- }
- break;
- case Transaction::OP_OMAP_RMKEYRANGE:
- {
- const coll_t &cid = i.get_cid(op->cid);
- const ghobject_t &oid = i.get_oid(op->oid);
- string first, last;
- first = i.decode_string();
- last = i.decode_string();
- r = _omap_rmkeyrange(cid, oid, first, last);
- }
- break;
- case Transaction::OP_COLL_HINT:
- {
- ceph::bufferlist hint;
- i.decode_bl(hint);
- // ignored
- break;
- }
- default:
- logger().error("bad op {}", static_cast<unsigned>(op->op));
- abort();
- }
- if (r < 0) {
- break;
- }
- }
- } catch (std::exception &e) {
- logger().error("{} got exception {}", __func__, e);
- r = -EINVAL;
- }
- if (r < 0) {
- logger().error(" transaction dump:\n");
- JSONFormatter f(true);
- f.open_object_section("transaction");
- t.dump(&f);
- f.close_section();
- std::stringstream str;
- f.flush(str);
- logger().error("{}", str.str());
- ceph_assert(r == 0);
- }
- for (auto i : {
- t.get_on_applied(),
- t.get_on_commit(),
- t.get_on_applied_sync()}) {
- if (i) {
- i->complete(0);
- }
- }
- return seastar::now();
-}
-
-int CyanStore::_remove(const coll_t& cid, const ghobject_t& oid)
-{
- logger().debug("{} cid={} oid={}",
- __func__, cid, oid);
- auto c = _get_collection(cid);
- if (!c)
- return -ENOENT;
-
- auto i = c->object_hash.find(oid);
- if (i == c->object_hash.end())
- return -ENOENT;
- used_bytes -= i->second->get_size();
- c->object_hash.erase(i);
- c->object_map.erase(oid);
- return 0;
-}
-
-int CyanStore::_touch(const coll_t& cid, const ghobject_t& oid)
-{
- logger().debug("{} cid={} oid={}",
- __func__, cid, oid);
- auto c = _get_collection(cid);
- if (!c)
- return -ENOENT;
-
- c->get_or_create_object(oid);
- return 0;
-}
-
-int CyanStore::_write(const coll_t& cid, const ghobject_t& oid,
- uint64_t offset, size_t len, const ceph::bufferlist& bl,
- uint32_t fadvise_flags)
-{
- logger().debug("{} {} {} {} ~ {}",
- __func__, cid, oid, offset, len);
- assert(len == bl.length());
-
- auto c = _get_collection(cid);
- if (!c)
- return -ENOENT;
-
- ObjectRef o = c->get_or_create_object(oid);
- if (len > 0 && !local_conf()->memstore_debug_omit_block_device_write) {
- const ssize_t old_size = o->get_size();
- o->write(offset, bl);
- used_bytes += (o->get_size() - old_size);
- }
-
- return 0;
-}
-
-int CyanStore::_omap_set_values(
- const coll_t& cid,
- const ghobject_t& oid,
- std::map<std::string, ceph::bufferlist> &&aset)
-{
- logger().debug(
- "{} {} {} {} keys",
- __func__, cid, oid, aset.size());
-
- auto c = _get_collection(cid);
- if (!c)
- return -ENOENT;
-
- ObjectRef o = c->get_or_create_object(oid);
- for (auto &&i: aset) {
- o->omap.insert(std::move(i));
- }
- return 0;
-}
-
-int CyanStore::_omap_set_header(
- const coll_t& cid,
- const ghobject_t& oid,
- const ceph::bufferlist &header)
-{
- logger().debug(
- "{} {} {} {} bytes",
- __func__, cid, oid, header.length());
-
- auto c = _get_collection(cid);
- if (!c)
- return -ENOENT;
-
- ObjectRef o = c->get_or_create_object(oid);
- o->omap_header = header;
- return 0;
-}
-
-int CyanStore::_omap_rmkeys(
- const coll_t& cid,
- const ghobject_t& oid,
- const omap_keys_t& aset)
-{
- logger().debug(
- "{} {} {} {} keys",
- __func__, cid, oid, aset.size());
-
- auto c = _get_collection(cid);
- if (!c)
- return -ENOENT;
-
- ObjectRef o = c->get_or_create_object(oid);
- for (auto &i: aset) {
- o->omap.erase(i);
- }
- return 0;
-}
-
-int CyanStore::_omap_rmkeyrange(
- const coll_t& cid,
- const ghobject_t& oid,
- const std::string &first,
- const std::string &last)
-{
- logger().debug(
- "{} {} {} first={} last={}",
- __func__, cid, oid, first, last);
-
- auto c = _get_collection(cid);
- if (!c)
- return -ENOENT;
-
- ObjectRef o = c->get_or_create_object(oid);
- for (auto i = o->omap.lower_bound(first);
- i != o->omap.end() && i->first <= last;
- o->omap.erase(i++));
- return 0;
-}
-
-int CyanStore::_truncate(const coll_t& cid, const ghobject_t& oid, uint64_t size)
-{
- logger().debug("{} cid={} oid={} size={}",
- __func__, cid, oid, size);
- auto c = _get_collection(cid);
- if (!c)
- return -ENOENT;
-
- ObjectRef o = c->get_object(oid);
- if (!o)
- return -ENOENT;
- if (local_conf()->memstore_debug_omit_block_device_write)
- return 0;
- const ssize_t old_size = o->get_size();
- int r = o->truncate(size);
- used_bytes += (o->get_size() - old_size);
- return r;
-}
-
-int CyanStore::_setattrs(const coll_t& cid, const ghobject_t& oid,
- std::map<std::string,bufferptr>& aset)
-{
- logger().debug("{} cid={} oid={}",
- __func__, cid, oid);
- auto c = _get_collection(cid);
- if (!c)
- return -ENOENT;
-
- ObjectRef o = c->get_object(oid);
- if (!o)
- return -ENOENT;
- for (std::map<std::string, bufferptr>::const_iterator p = aset.begin();
- p != aset.end(); ++p)
- o->xattr[p->first] = p->second;
- return 0;
-}
-
-int CyanStore::_create_collection(const coll_t& cid, int bits)
-{
- auto result = coll_map.try_emplace(cid);
- if (!result.second)
- return -EEXIST;
- auto p = new_coll_map.find(cid);
- assert(p != new_coll_map.end());
- result.first->second = p->second;
- result.first->second->bits = bits;
- new_coll_map.erase(p);
- return 0;
-}
-
-boost::intrusive_ptr<Collection> CyanStore::_get_collection(const coll_t& cid)
-{
- auto cp = coll_map.find(cid);
- if (cp == coll_map.end())
- return {};
- return cp->second;
-}
-
-seastar::future<> CyanStore::write_meta(const std::string& key,
- const std::string& value)
-{
- std::string v = value;
- v += "\n";
- if (int r = safe_write_file(path.c_str(), key.c_str(),
- v.c_str(), v.length(), 0600);
- r < 0) {
- throw std::runtime_error{fmt::format("unable to write_meta({})", key)};
- }
- return seastar::make_ready_future<>();
-}
-
-seastar::future<int, std::string> CyanStore::read_meta(const std::string& key)
-{
- std::string fsid(4096, '\0');
- int r = safe_read_file(path.c_str(), key.c_str(), fsid.data(), fsid.size());
- if (r > 0) {
- fsid.resize(r);
- // drop trailing newlines
- boost::algorithm::trim_right_if(fsid,
- [](unsigned char c) {return isspace(c);});
- } else {
- fsid.clear();
- }
- return seastar::make_ready_future<int, std::string>(r, fsid);
-}
-
-uuid_d CyanStore::get_fsid() const
-{
- return osd_fsid;
-}
-
-unsigned CyanStore::get_max_attr_name_length() const
-{
- // arbitrary limitation exactly like in the case of MemStore.
- return 256;
-}
-}
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include <string>
-#include <unordered_map>
-#include <map>
-#include <typeinfo>
-#include <vector>
-
-#include <optional>
-#include <seastar/core/future.hh>
-
-#include "osd/osd_types.h"
-#include "include/uuid.h"
-
-#include "futurized_store.h"
-
-namespace ceph::os {
-class Transaction;
-}
-
-namespace crimson::os {
-class Collection;
-
-class CyanStore final : public FuturizedStore {
- constexpr static unsigned MAX_KEYS_PER_OMAP_GET_CALL = 32;
-
- const std::string path;
- std::unordered_map<coll_t, boost::intrusive_ptr<Collection>> coll_map;
- std::map<coll_t, boost::intrusive_ptr<Collection>> new_coll_map;
- uint64_t used_bytes = 0;
- uuid_d osd_fsid;
-
-public:
-
- CyanStore(const std::string& path);
- ~CyanStore() final;
-
- seastar::future<> mount() final;
- seastar::future<> umount() final;
-
- seastar::future<> mkfs(uuid_d new_osd_fsid) final;
- store_statfs_t stat() const final;
-
- read_errorator::future<ceph::bufferlist> read(
- CollectionRef c,
- const ghobject_t& oid,
- uint64_t offset,
- size_t len,
- uint32_t op_flags = 0) final;
- get_attr_errorator::future<ceph::bufferptr> get_attr(
- CollectionRef c,
- const ghobject_t& oid,
- std::string_view name) const final;
- seastar::future<attrs_t> get_attrs(CollectionRef c,
- const ghobject_t& oid) final;
-
- seastar::future<omap_values_t> omap_get_values(
- CollectionRef c,
- const ghobject_t& oid,
- const omap_keys_t& keys) final;
-
- seastar::future<std::vector<ghobject_t>, ghobject_t> list_objects(
- CollectionRef c,
- const ghobject_t& start,
- const ghobject_t& end,
- uint64_t limit) const final;
-
- /// Retrieves paged set of values > start (if present)
- seastar::future<bool, omap_values_t> omap_get_values(
- CollectionRef c, ///< [in] collection
- const ghobject_t &oid, ///< [in] oid
- const std::optional<std::string> &start ///< [in] start, empty for begin
- ) final; ///< @return <done, values> values.empty() iff done
-
- seastar::future<CollectionRef> create_new_collection(const coll_t& cid) final;
- seastar::future<CollectionRef> open_collection(const coll_t& cid) final;
- seastar::future<std::vector<coll_t>> list_collections() final;
-
- seastar::future<> do_transaction(CollectionRef ch,
- ceph::os::Transaction&& txn) final;
-
- seastar::future<> write_meta(const std::string& key,
- const std::string& value) final;
- seastar::future<int, std::string> read_meta(const std::string& key) final;
- uuid_d get_fsid() const final;
- unsigned get_max_attr_name_length() const final;
-
-private:
- int _remove(const coll_t& cid, const ghobject_t& oid);
- int _touch(const coll_t& cid, const ghobject_t& oid);
- int _write(const coll_t& cid, const ghobject_t& oid,
- uint64_t offset, size_t len, const ceph::bufferlist& bl,
- uint32_t fadvise_flags);
- int _omap_set_values(
- const coll_t& cid,
- const ghobject_t& oid,
- std::map<std::string, ceph::bufferlist> &&aset);
- int _omap_set_header(
- const coll_t& cid,
- const ghobject_t& oid,
- const ceph::bufferlist &header);
- int _omap_rmkeys(
- const coll_t& cid,
- const ghobject_t& oid,
- const omap_keys_t& aset);
- int _omap_rmkeyrange(
- const coll_t& cid,
- const ghobject_t& oid,
- const std::string &first,
- const std::string &last);
- int _truncate(const coll_t& cid, const ghobject_t& oid, uint64_t size);
- int _setattrs(const coll_t& cid, const ghobject_t& oid,
- std::map<std::string,bufferptr>& aset);
- int _create_collection(const coll_t& cid, int bits);
- boost::intrusive_ptr<Collection> _get_collection(const coll_t& cid);
-};
-
-}
--- /dev/null
+add_library(crimson-cyanstore
+ cyan_store.cc
+ cyan_collection.cc
+ cyan_object.cc)
+target_link_libraries(crimson-cyanstore
+ crimson)
--- /dev/null
+#include "cyan_collection.h"
+
+#include "cyan_object.h"
+
+namespace crimson::os
+{
+
+Collection::Collection(const coll_t& c)
+ : FuturizedCollection{c}
+{}
+
+Collection::~Collection() = default;
+
+Collection::ObjectRef Collection::create_object() const
+{
+ return new crimson::os::Object{};
+}
+
+Collection::ObjectRef Collection::get_object(ghobject_t oid)
+{
+ auto o = object_hash.find(oid);
+ if (o == object_hash.end())
+ return ObjectRef();
+ return o->second;
+}
+
+Collection::ObjectRef Collection::get_or_create_object(ghobject_t oid)
+{
+ auto result = object_hash.emplace(oid, ObjectRef{});
+ if (result.second)
+ object_map[oid] = result.first->second = create_object();
+ return result.first->second;
+}
+
+uint64_t Collection::used_bytes() const
+{
+ uint64_t result = 0;
+ for (auto& obj : object_map) {
+ result += obj.second->get_size();
+ }
+ return result;
+}
+
+void Collection::encode(bufferlist& bl) const
+{
+ ENCODE_START(1, 1, bl);
+ encode(xattr, bl);
+ encode(use_page_set, bl);
+ uint32_t s = object_map.size();
+ encode(s, bl);
+ for (auto& [oid, obj] : object_map) {
+ encode(oid, bl);
+ obj->encode(bl);
+ }
+ ENCODE_FINISH(bl);
+}
+
+void Collection::decode(bufferlist::const_iterator& p)
+{
+ DECODE_START(1, p);
+ decode(xattr, p);
+ decode(use_page_set, p);
+ uint32_t s;
+ decode(s, p);
+ while (s--) {
+ ghobject_t k;
+ decode(k, p);
+ auto o = create_object();
+ o->decode(p);
+ object_map.insert(make_pair(k, o));
+ object_hash.insert(make_pair(k, o));
+ }
+ DECODE_FINISH(p);
+}
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include <boost/intrusive_ptr.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
+
+#include "include/buffer.h"
+#include "osd/osd_types.h"
+
+#include "crimson/os/futurized_collection.h"
+
+namespace crimson::os {
+
+class Object;
+/**
+ * a collection also orders transactions
+ *
+ * Any transactions queued under a given collection will be applied in
+ * sequence. Transactions queued under different collections may run
+ * in parallel.
+ *
+ * ObjectStore users may get collection handles with open_collection() (or,
+ * for bootstrapping a new collection, create_new_collection()).
+ */
+struct Collection final : public FuturizedCollection {
+ using ObjectRef = boost::intrusive_ptr<Object>;
+ int bits = 0;
+ // always use bufferlist object for testing
+ bool use_page_set = false;
+ std::unordered_map<ghobject_t, ObjectRef> object_hash; ///< for lookup
+ std::map<ghobject_t, ObjectRef> object_map; ///< for iteration
+ std::map<std::string,bufferptr> xattr;
+ bool exists = true;
+
+ Collection(const coll_t& c);
+ ~Collection() final;
+
+ ObjectRef create_object() const;
+ ObjectRef get_object(ghobject_t oid);
+ ObjectRef get_or_create_object(ghobject_t oid);
+ uint64_t used_bytes() const;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& p);
+};
+
+}
--- /dev/null
+#include "cyan_object.h"
+#include "include/encoding.h"
+
+namespace crimson::os {
+
+size_t Object::get_size() const {
+ return data.length();
+}
+
+ceph::bufferlist Object::read(uint64_t offset, uint64_t len)
+{
+ bufferlist ret;
+ ret.substr_of(data, offset, len);
+ return ret;
+}
+
+int Object::write(uint64_t offset, const bufferlist &src)
+{
+ unsigned len = src.length();
+ // before
+ bufferlist newdata;
+ if (get_size() >= offset) {
+ newdata.substr_of(data, 0, offset);
+ } else {
+ if (get_size()) {
+ newdata.substr_of(data, 0, get_size());
+ }
+ newdata.append_zero(offset - get_size());
+ }
+
+ newdata.append(src);
+
+ // after
+ if (get_size() > offset + len) {
+ bufferlist tail;
+ tail.substr_of(data, offset + len, get_size() - (offset + len));
+ newdata.append(tail);
+ }
+
+ data.claim(newdata);
+ return 0;
+}
+
+int Object::clone(Object *src, uint64_t srcoff, uint64_t len,
+ uint64_t dstoff)
+{
+ bufferlist bl;
+ if (srcoff == dstoff && len == src->get_size()) {
+ data = src->data;
+ return 0;
+ }
+ bl.substr_of(src->data, srcoff, len);
+ return write(dstoff, bl);
+
+}
+
+int Object::truncate(uint64_t size)
+{
+ if (get_size() > size) {
+ bufferlist bl;
+ bl.substr_of(data, 0, size);
+ data.claim(bl);
+ } else if (get_size() == size) {
+ // do nothing
+ } else {
+ data.append_zero(size - get_size());
+ }
+ return 0;
+}
+
+void Object::encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(data, bl);
+ encode(xattr, bl);
+ encode(omap_header, bl);
+ encode(omap, bl);
+ ENCODE_FINISH(bl);
+}
+
+void Object::decode(bufferlist::const_iterator& p) {
+ DECODE_START(1, p);
+ decode(data, p);
+ decode(xattr, p);
+ decode(omap_header, p);
+ decode(omap, p);
+ DECODE_FINISH(p);
+}
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include <cstddef>
+#include <map>
+#include <string>
+#include <boost/intrusive_ptr.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
+#include "include/buffer.h"
+
+namespace crimson::os {
+
+struct Object : public boost::intrusive_ref_counter<
+ Object,
+ boost::thread_unsafe_counter>
+{
+ using bufferlist = ceph::bufferlist;
+
+ bufferlist data;
+ // use transparent comparator for better performance, see
+ // https://en.cppreference.com/w/cpp/utility/functional/less_void
+ std::map<std::string,bufferptr,std::less<>> xattr;
+ bufferlist omap_header;
+ std::map<std::string,bufferlist> omap;
+
+ typedef boost::intrusive_ptr<Object> Ref;
+
+ Object() = default;
+
+ // interface for object data
+ size_t get_size() const;
+ ceph::bufferlist read(uint64_t offset, uint64_t len);
+ int write(uint64_t offset, const bufferlist &bl);
+ int clone(Object *src, uint64_t srcoff, uint64_t len,
+ uint64_t dstoff);
+ int truncate(uint64_t offset);
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& p);
+};
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "cyan_store.h"
+
+#include <boost/algorithm/string/trim.hpp>
+#include <fmt/format.h>
+#include <fmt/ostream.h>
+
+#include "common/safe_io.h"
+#include "os/Transaction.h"
+
+#include "crimson/common/buffer_io.h"
+#include "crimson/common/config_proxy.h"
+#include "cyan_collection.h"
+#include "cyan_object.h"
+
+namespace {
+ seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_filestore);
+ }
+}
+
+using crimson::common::local_conf;
+
+namespace crimson::os {
+
+using ObjectRef = boost::intrusive_ptr<Object>;
+
+CyanStore::CyanStore(const std::string& path)
+ : path{path}
+{}
+
+CyanStore::~CyanStore() = default;
+
+seastar::future<> CyanStore::mount()
+{
+ ceph::bufferlist bl;
+ std::string fn = path + "/collections";
+ std::string err;
+ if (int r = bl.read_file(fn.c_str(), &err); r < 0) {
+ throw std::runtime_error("read_file");
+ }
+
+ std::set<coll_t> collections;
+ auto p = bl.cbegin();
+ ceph::decode(collections, p);
+
+ for (auto& coll : collections) {
+ std::string fn = fmt::format("{}/{}", path, coll);
+ ceph::bufferlist cbl;
+ if (int r = cbl.read_file(fn.c_str(), &err); r < 0) {
+ throw std::runtime_error("read_file");
+ }
+ boost::intrusive_ptr<Collection> c{new Collection{coll}};
+ auto p = cbl.cbegin();
+ c->decode(p);
+ coll_map[coll] = c;
+ used_bytes += c->used_bytes();
+ }
+ return seastar::now();
+}
+
+seastar::future<> CyanStore::umount()
+{
+ return seastar::do_with(std::set<coll_t>{}, [this](auto& collections) {
+ return seastar::do_for_each(coll_map, [&collections, this](auto& coll) {
+ auto& [col, ch] = coll;
+ collections.insert(col);
+ ceph::bufferlist bl;
+ ceph_assert(ch);
+ ch->encode(bl);
+ std::string fn = fmt::format("{}/{}", path, col);
+ return ceph::buffer::write_file(std::move(bl), fn);
+ }).then([&collections, this] {
+ ceph::bufferlist bl;
+ ceph::encode(collections, bl);
+ std::string fn = fmt::format("{}/collections", path);
+ return ceph::buffer::write_file(std::move(bl), fn);
+ });
+ });
+}
+
+seastar::future<> CyanStore::mkfs(uuid_d new_osd_fsid)
+{
+ return read_meta("fsid").then([=](auto r, auto fsid_str) {
+ if (r == -ENOENT) {
+ if (new_osd_fsid.is_zero()) {
+ osd_fsid.generate_random();
+ } else {
+ osd_fsid = new_osd_fsid;
+ }
+ return write_meta("fsid", fmt::format("{}", osd_fsid));
+ } else if (r < 0) {
+ throw std::runtime_error("read_meta");
+ } else {
+ logger().info("{} already has fsid {}", __func__, fsid_str);
+ if (!osd_fsid.parse(fsid_str.c_str())) {
+ throw std::runtime_error("failed to parse fsid");
+ } else if (osd_fsid != new_osd_fsid) {
+ logger().error("on-disk fsid {} != provided {}", osd_fsid, new_osd_fsid);
+ throw std::runtime_error("unmatched osd_fsid");
+ } else {
+ return seastar::now();
+ }
+ }
+ }).then([this]{
+ std::string fn = path + "/collections";
+ ceph::bufferlist bl;
+ std::set<coll_t> collections;
+ ceph::encode(collections, bl);
+ return ceph::buffer::write_file(std::move(bl), fn);
+ }).then([this] {
+ return write_meta("type", "memstore");
+ });
+}
+
+store_statfs_t CyanStore::stat() const
+{
+ logger().debug("{}", __func__);
+ store_statfs_t st;
+ st.total = crimson::common::local_conf().get_val<Option::size_t>("memstore_device_bytes");
+ st.available = st.total - used_bytes;
+ return st;
+}
+
+seastar::future<std::vector<ghobject_t>, ghobject_t>
+CyanStore::list_objects(CollectionRef ch,
+ const ghobject_t& start,
+ const ghobject_t& end,
+ uint64_t limit) const
+{
+ auto c = static_cast<Collection*>(ch.get());
+ logger().debug("{} {} {} {} {}",
+ __func__, c->get_cid(), start, end, limit);
+ std::vector<ghobject_t> objects;
+ objects.reserve(limit);
+ ghobject_t next = ghobject_t::get_max();
+ for (const auto& [oid, obj] :
+ boost::make_iterator_range(c->object_map.lower_bound(start),
+ c->object_map.end())) {
+ std::ignore = obj;
+ if (oid >= end || objects.size() >= limit) {
+ next = oid;
+ break;
+ }
+ objects.push_back(oid);
+ }
+ return seastar::make_ready_future<std::vector<ghobject_t>, ghobject_t>(
+ std::move(objects), next);
+}
+
+seastar::future<CollectionRef> CyanStore::create_new_collection(const coll_t& cid)
+{
+ auto c = new Collection{cid};
+ new_coll_map[cid] = c;
+ return seastar::make_ready_future<CollectionRef>(c);
+}
+
+seastar::future<CollectionRef> CyanStore::open_collection(const coll_t& cid)
+{
+ return seastar::make_ready_future<CollectionRef>(_get_collection(cid));
+}
+
+seastar::future<std::vector<coll_t>> CyanStore::list_collections()
+{
+ std::vector<coll_t> collections;
+ for (auto& coll : coll_map) {
+ collections.push_back(coll.first);
+ }
+ return seastar::make_ready_future<std::vector<coll_t>>(std::move(collections));
+}
+
+CyanStore::read_errorator::future<ceph::bufferlist> CyanStore::read(
+ CollectionRef ch,
+ const ghobject_t& oid,
+ uint64_t offset,
+ size_t len,
+ uint32_t op_flags)
+{
+ auto c = static_cast<Collection*>(ch.get());
+ logger().debug("{} {} {} {}~{}",
+ __func__, c->get_cid(), oid, offset, len);
+ if (!c->exists) {
+ return crimson::ct_error::enoent::make();
+ }
+ ObjectRef o = c->get_object(oid);
+ if (!o) {
+ return crimson::ct_error::enoent::make();
+ }
+ if (offset >= o->get_size())
+ return read_errorator::make_ready_future<ceph::bufferlist>();
+ size_t l = len;
+ if (l == 0 && offset == 0) // note: len == 0 means read the entire object
+ l = o->get_size();
+ else if (offset + l > o->get_size())
+ l = o->get_size() - offset;
+ return read_errorator::make_ready_future<ceph::bufferlist>(o->read(offset, l));
+}
+
+CyanStore::get_attr_errorator::future<ceph::bufferptr> CyanStore::get_attr(
+ CollectionRef ch,
+ const ghobject_t& oid,
+ std::string_view name) const
+{
+ auto c = static_cast<Collection*>(ch.get());
+ logger().debug("{} {} {}",
+ __func__, c->get_cid(), oid);
+ auto o = c->get_object(oid);
+ if (!o) {
+ return crimson::ct_error::enoent::make();
+ }
+ if (auto found = o->xattr.find(name); found != o->xattr.end()) {
+ return get_attr_errorator::make_ready_future<ceph::bufferptr>(found->second);
+ } else {
+ return crimson::ct_error::enodata::make();
+ }
+}
+
+seastar::future<CyanStore::attrs_t> CyanStore::get_attrs(CollectionRef ch,
+ const ghobject_t& oid)
+{
+ auto c = static_cast<Collection*>(ch.get());
+ logger().debug("{} {} {}",
+ __func__, c->get_cid(), oid);
+ auto o = c->get_object(oid);
+ if (!o) {
+ throw std::runtime_error(fmt::format("object does not exist: {}", oid));
+ }
+ return seastar::make_ready_future<attrs_t>(o->xattr);
+}
+
+seastar::future<CyanStore::omap_values_t>
+CyanStore::omap_get_values(CollectionRef ch,
+ const ghobject_t& oid,
+ const omap_keys_t& keys)
+{
+ auto c = static_cast<Collection*>(ch.get());
+ logger().debug("{} {} {}",
+ __func__, c->get_cid(), oid);
+ auto o = c->get_object(oid);
+ if (!o) {
+ throw std::runtime_error(fmt::format("object does not exist: {}", oid));
+ }
+ omap_values_t values;
+ for (auto& key : keys) {
+ if (auto found = o->omap.find(key); found != o->omap.end()) {
+ values.insert(*found);
+ }
+ }
+ return seastar::make_ready_future<omap_values_t>(std::move(values));
+}
+
+seastar::future<bool, CyanStore::omap_values_t>
+CyanStore::omap_get_values(
+ CollectionRef ch,
+ const ghobject_t &oid,
+ const std::optional<string> &start
+ ) {
+ auto c = static_cast<Collection*>(ch.get());
+ logger().debug(
+ "{} {} {}",
+ __func__, c->get_cid(), oid);
+ auto o = c->get_object(oid);
+ if (!o) {
+ throw std::runtime_error(fmt::format("object does not exist: {}", oid));
+ }
+ omap_values_t values;
+ for (auto i = start ? o->omap.upper_bound(*start) : o->omap.begin();
+ values.size() < MAX_KEYS_PER_OMAP_GET_CALL && i != o->omap.end();
+ ++i) {
+ values.insert(*i);
+ }
+ return seastar::make_ready_future<bool, omap_values_t>(
+ true, values);
+}
+
+seastar::future<> CyanStore::do_transaction(CollectionRef ch,
+ ceph::os::Transaction&& t)
+{
+ using ceph::os::Transaction;
+ int r = 0;
+ try {
+ auto i = t.begin();
+ while (i.have_op()) {
+ r = 0;
+ switch (auto op = i.decode_op(); op->op) {
+ case Transaction::OP_NOP:
+ break;
+ case Transaction::OP_REMOVE:
+ {
+ coll_t cid = i.get_cid(op->cid);
+ ghobject_t oid = i.get_oid(op->oid);
+ r = _remove(cid, oid);
+ if (r == -ENOENT) {
+ r = 0;
+ }
+ }
+ break;
+ case Transaction::OP_TOUCH:
+ {
+ coll_t cid = i.get_cid(op->cid);
+ ghobject_t oid = i.get_oid(op->oid);
+ r = _touch(cid, oid);
+ }
+ break;
+ case Transaction::OP_WRITE:
+ {
+ coll_t cid = i.get_cid(op->cid);
+ ghobject_t oid = i.get_oid(op->oid);
+ uint64_t off = op->off;
+ uint64_t len = op->len;
+ uint32_t fadvise_flags = i.get_fadvise_flags();
+ ceph::bufferlist bl;
+ i.decode_bl(bl);
+ r = _write(cid, oid, off, len, bl, fadvise_flags);
+ }
+ break;
+ case Transaction::OP_TRUNCATE:
+ {
+ coll_t cid = i.get_cid(op->cid);
+ ghobject_t oid = i.get_oid(op->oid);
+ uint64_t off = op->off;
+ r = _truncate(cid, oid, off);
+ }
+ break;
+ case Transaction::OP_SETATTR:
+ {
+ coll_t cid = i.get_cid(op->cid);
+ ghobject_t oid = i.get_oid(op->oid);
+ std::string name = i.decode_string();
+ ceph::bufferlist bl;
+ i.decode_bl(bl);
+ std::map<std::string, bufferptr> to_set;
+ to_set[name] = bufferptr(bl.c_str(), bl.length());
+ r = _setattrs(cid, oid, to_set);
+ }
+ break;
+ case Transaction::OP_MKCOLL:
+ {
+ coll_t cid = i.get_cid(op->cid);
+ r = _create_collection(cid, op->split_bits);
+ }
+ break;
+ case Transaction::OP_OMAP_SETKEYS:
+ {
+ coll_t cid = i.get_cid(op->cid);
+ ghobject_t oid = i.get_oid(op->oid);
+ std::map<std::string, ceph::bufferlist> aset;
+ i.decode_attrset(aset);
+ r = _omap_set_values(cid, oid, std::move(aset));
+ }
+ break;
+ case Transaction::OP_OMAP_SETHEADER:
+ {
+ const coll_t &cid = i.get_cid(op->cid);
+ const ghobject_t &oid = i.get_oid(op->oid);
+ ceph::bufferlist bl;
+ i.decode_bl(bl);
+ r = _omap_set_header(cid, oid, bl);
+ }
+ break;
+ case Transaction::OP_OMAP_RMKEYS:
+ {
+ const coll_t &cid = i.get_cid(op->cid);
+ const ghobject_t &oid = i.get_oid(op->oid);
+ omap_keys_t keys;
+ i.decode_keyset(keys);
+ r = _omap_rmkeys(cid, oid, keys);
+ }
+ break;
+ case Transaction::OP_OMAP_RMKEYRANGE:
+ {
+ const coll_t &cid = i.get_cid(op->cid);
+ const ghobject_t &oid = i.get_oid(op->oid);
+ string first, last;
+ first = i.decode_string();
+ last = i.decode_string();
+ r = _omap_rmkeyrange(cid, oid, first, last);
+ }
+ break;
+ case Transaction::OP_COLL_HINT:
+ {
+ ceph::bufferlist hint;
+ i.decode_bl(hint);
+ // ignored
+ break;
+ }
+ default:
+ logger().error("bad op {}", static_cast<unsigned>(op->op));
+ abort();
+ }
+ if (r < 0) {
+ break;
+ }
+ }
+ } catch (std::exception &e) {
+ logger().error("{} got exception {}", __func__, e);
+ r = -EINVAL;
+ }
+ if (r < 0) {
+ logger().error(" transaction dump:\n");
+ JSONFormatter f(true);
+ f.open_object_section("transaction");
+ t.dump(&f);
+ f.close_section();
+ std::stringstream str;
+ f.flush(str);
+ logger().error("{}", str.str());
+ ceph_assert(r == 0);
+ }
+ for (auto i : {
+ t.get_on_applied(),
+ t.get_on_commit(),
+ t.get_on_applied_sync()}) {
+ if (i) {
+ i->complete(0);
+ }
+ }
+ return seastar::now();
+}
+
+int CyanStore::_remove(const coll_t& cid, const ghobject_t& oid)
+{
+ logger().debug("{} cid={} oid={}",
+ __func__, cid, oid);
+ auto c = _get_collection(cid);
+ if (!c)
+ return -ENOENT;
+
+ auto i = c->object_hash.find(oid);
+ if (i == c->object_hash.end())
+ return -ENOENT;
+ used_bytes -= i->second->get_size();
+ c->object_hash.erase(i);
+ c->object_map.erase(oid);
+ return 0;
+}
+
+int CyanStore::_touch(const coll_t& cid, const ghobject_t& oid)
+{
+ logger().debug("{} cid={} oid={}",
+ __func__, cid, oid);
+ auto c = _get_collection(cid);
+ if (!c)
+ return -ENOENT;
+
+ c->get_or_create_object(oid);
+ return 0;
+}
+
+int CyanStore::_write(const coll_t& cid, const ghobject_t& oid,
+ uint64_t offset, size_t len, const ceph::bufferlist& bl,
+ uint32_t fadvise_flags)
+{
+ logger().debug("{} {} {} {} ~ {}",
+ __func__, cid, oid, offset, len);
+ assert(len == bl.length());
+
+ auto c = _get_collection(cid);
+ if (!c)
+ return -ENOENT;
+
+ ObjectRef o = c->get_or_create_object(oid);
+ if (len > 0 && !local_conf()->memstore_debug_omit_block_device_write) {
+ const ssize_t old_size = o->get_size();
+ o->write(offset, bl);
+ used_bytes += (o->get_size() - old_size);
+ }
+
+ return 0;
+}
+
+int CyanStore::_omap_set_values(
+ const coll_t& cid,
+ const ghobject_t& oid,
+ std::map<std::string, ceph::bufferlist> &&aset)
+{
+ logger().debug(
+ "{} {} {} {} keys",
+ __func__, cid, oid, aset.size());
+
+ auto c = _get_collection(cid);
+ if (!c)
+ return -ENOENT;
+
+ ObjectRef o = c->get_or_create_object(oid);
+ for (auto &&i: aset) {
+ o->omap.insert(std::move(i));
+ }
+ return 0;
+}
+
+int CyanStore::_omap_set_header(
+ const coll_t& cid,
+ const ghobject_t& oid,
+ const ceph::bufferlist &header)
+{
+ logger().debug(
+ "{} {} {} {} bytes",
+ __func__, cid, oid, header.length());
+
+ auto c = _get_collection(cid);
+ if (!c)
+ return -ENOENT;
+
+ ObjectRef o = c->get_or_create_object(oid);
+ o->omap_header = header;
+ return 0;
+}
+
+int CyanStore::_omap_rmkeys(
+ const coll_t& cid,
+ const ghobject_t& oid,
+ const omap_keys_t& aset)
+{
+ logger().debug(
+ "{} {} {} {} keys",
+ __func__, cid, oid, aset.size());
+
+ auto c = _get_collection(cid);
+ if (!c)
+ return -ENOENT;
+
+ ObjectRef o = c->get_or_create_object(oid);
+ for (auto &i: aset) {
+ o->omap.erase(i);
+ }
+ return 0;
+}
+
+int CyanStore::_omap_rmkeyrange(
+ const coll_t& cid,
+ const ghobject_t& oid,
+ const std::string &first,
+ const std::string &last)
+{
+ logger().debug(
+ "{} {} {} first={} last={}",
+ __func__, cid, oid, first, last);
+
+ auto c = _get_collection(cid);
+ if (!c)
+ return -ENOENT;
+
+ ObjectRef o = c->get_or_create_object(oid);
+ for (auto i = o->omap.lower_bound(first);
+ i != o->omap.end() && i->first <= last;
+ o->omap.erase(i++));
+ return 0;
+}
+
+int CyanStore::_truncate(const coll_t& cid, const ghobject_t& oid, uint64_t size)
+{
+ logger().debug("{} cid={} oid={} size={}",
+ __func__, cid, oid, size);
+ auto c = _get_collection(cid);
+ if (!c)
+ return -ENOENT;
+
+ ObjectRef o = c->get_object(oid);
+ if (!o)
+ return -ENOENT;
+ if (local_conf()->memstore_debug_omit_block_device_write)
+ return 0;
+ const ssize_t old_size = o->get_size();
+ int r = o->truncate(size);
+ used_bytes += (o->get_size() - old_size);
+ return r;
+}
+
+int CyanStore::_setattrs(const coll_t& cid, const ghobject_t& oid,
+ std::map<std::string,bufferptr>& aset)
+{
+ logger().debug("{} cid={} oid={}",
+ __func__, cid, oid);
+ auto c = _get_collection(cid);
+ if (!c)
+ return -ENOENT;
+
+ ObjectRef o = c->get_object(oid);
+ if (!o)
+ return -ENOENT;
+ for (std::map<std::string, bufferptr>::const_iterator p = aset.begin();
+ p != aset.end(); ++p)
+ o->xattr[p->first] = p->second;
+ return 0;
+}
+
+int CyanStore::_create_collection(const coll_t& cid, int bits)
+{
+ auto result = coll_map.try_emplace(cid);
+ if (!result.second)
+ return -EEXIST;
+ auto p = new_coll_map.find(cid);
+ assert(p != new_coll_map.end());
+ result.first->second = p->second;
+ result.first->second->bits = bits;
+ new_coll_map.erase(p);
+ return 0;
+}
+
+boost::intrusive_ptr<Collection> CyanStore::_get_collection(const coll_t& cid)
+{
+ auto cp = coll_map.find(cid);
+ if (cp == coll_map.end())
+ return {};
+ return cp->second;
+}
+
+seastar::future<> CyanStore::write_meta(const std::string& key,
+ const std::string& value)
+{
+ std::string v = value;
+ v += "\n";
+ if (int r = safe_write_file(path.c_str(), key.c_str(),
+ v.c_str(), v.length(), 0600);
+ r < 0) {
+ throw std::runtime_error{fmt::format("unable to write_meta({})", key)};
+ }
+ return seastar::make_ready_future<>();
+}
+
+seastar::future<int, std::string> CyanStore::read_meta(const std::string& key)
+{
+ std::string fsid(4096, '\0');
+ int r = safe_read_file(path.c_str(), key.c_str(), fsid.data(), fsid.size());
+ if (r > 0) {
+ fsid.resize(r);
+ // drop trailing newlines
+ boost::algorithm::trim_right_if(fsid,
+ [](unsigned char c) {return isspace(c);});
+ } else {
+ fsid.clear();
+ }
+ return seastar::make_ready_future<int, std::string>(r, fsid);
+}
+
+uuid_d CyanStore::get_fsid() const
+{
+ return osd_fsid;
+}
+
+unsigned CyanStore::get_max_attr_name_length() const
+{
+ // arbitrary limitation exactly like in the case of MemStore.
+ return 256;
+}
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include <map>
+#include <typeinfo>
+#include <vector>
+
+#include <optional>
+#include <seastar/core/future.hh>
+
+#include "osd/osd_types.h"
+#include "include/uuid.h"
+
+#include "crimson/os/futurized_store.h"
+
+namespace ceph::os {
+class Transaction;
+}
+
+namespace crimson::os {
+class Collection;
+
+class CyanStore final : public FuturizedStore {
+ constexpr static unsigned MAX_KEYS_PER_OMAP_GET_CALL = 32;
+
+ const std::string path;
+ std::unordered_map<coll_t, boost::intrusive_ptr<Collection>> coll_map;
+ std::map<coll_t, boost::intrusive_ptr<Collection>> new_coll_map;
+ uint64_t used_bytes = 0;
+ uuid_d osd_fsid;
+
+public:
+
+ CyanStore(const std::string& path);
+ ~CyanStore() final;
+
+ seastar::future<> mount() final;
+ seastar::future<> umount() final;
+
+ seastar::future<> mkfs(uuid_d new_osd_fsid) final;
+ store_statfs_t stat() const final;
+
+ read_errorator::future<ceph::bufferlist> read(
+ CollectionRef c,
+ const ghobject_t& oid,
+ uint64_t offset,
+ size_t len,
+ uint32_t op_flags = 0) final;
+ get_attr_errorator::future<ceph::bufferptr> get_attr(
+ CollectionRef c,
+ const ghobject_t& oid,
+ std::string_view name) const final;
+ seastar::future<attrs_t> get_attrs(CollectionRef c,
+ const ghobject_t& oid) final;
+
+ seastar::future<omap_values_t> omap_get_values(
+ CollectionRef c,
+ const ghobject_t& oid,
+ const omap_keys_t& keys) final;
+
+ seastar::future<std::vector<ghobject_t>, ghobject_t> list_objects(
+ CollectionRef c,
+ const ghobject_t& start,
+ const ghobject_t& end,
+ uint64_t limit) const final;
+
+ /// Retrieves paged set of values > start (if present)
+ seastar::future<bool, omap_values_t> omap_get_values(
+ CollectionRef c, ///< [in] collection
+ const ghobject_t &oid, ///< [in] oid
+ const std::optional<std::string> &start ///< [in] start, empty for begin
+ ) final; ///< @return <done, values> values.empty() iff done
+
+ seastar::future<CollectionRef> create_new_collection(const coll_t& cid) final;
+ seastar::future<CollectionRef> open_collection(const coll_t& cid) final;
+ seastar::future<std::vector<coll_t>> list_collections() final;
+
+ seastar::future<> do_transaction(CollectionRef ch,
+ ceph::os::Transaction&& txn) final;
+
+ seastar::future<> write_meta(const std::string& key,
+ const std::string& value) final;
+ seastar::future<int, std::string> read_meta(const std::string& key) final;
+ uuid_d get_fsid() const final;
+ unsigned get_max_attr_name_length() const final;
+
+private:
+ int _remove(const coll_t& cid, const ghobject_t& oid);
+ int _touch(const coll_t& cid, const ghobject_t& oid);
+ int _write(const coll_t& cid, const ghobject_t& oid,
+ uint64_t offset, size_t len, const ceph::bufferlist& bl,
+ uint32_t fadvise_flags);
+ int _omap_set_values(
+ const coll_t& cid,
+ const ghobject_t& oid,
+ std::map<std::string, ceph::bufferlist> &&aset);
+ int _omap_set_header(
+ const coll_t& cid,
+ const ghobject_t& oid,
+ const ceph::bufferlist &header);
+ int _omap_rmkeys(
+ const coll_t& cid,
+ const ghobject_t& oid,
+ const omap_keys_t& aset);
+ int _omap_rmkeyrange(
+ const coll_t& cid,
+ const ghobject_t& oid,
+ const std::string &first,
+ const std::string &last);
+ int _truncate(const coll_t& cid, const ghobject_t& oid, uint64_t size);
+ int _setattrs(const coll_t& cid, const ghobject_t& oid,
+ std::map<std::string,bufferptr>& aset);
+ int _create_collection(const coll_t& cid, int bits);
+ boost::intrusive_ptr<Collection> _get_collection(const coll_t& cid);
+};
+
+}
#include "futurized_store.h"
-#include "cyan_store.h"
+#include "cyanstore/cyan_store.h"
namespace crimson::os {
#include "crimson/mon/MonClient.h"
#include "crimson/net/Connection.h"
#include "crimson/net/Messenger.h"
+#include "crimson/os/cyanstore/cyan_object.h"
#include "crimson/os/futurized_collection.h"
-#include "crimson/os/cyan_object.h"
#include "crimson/os/futurized_store.h"
#include "crimson/osd/heartbeat.h"
#include "crimson/osd/osd_meta.h"
#include "crimson/net/Connection.h"
#include "crimson/net/Messenger.h"
-#include "crimson/os/cyan_store.h"
+#include "crimson/os/cyanstore/cyan_store.h"
#include "crimson/os/futurized_collection.h"
#include "crimson/osd/exceptions.h"
#include "crimson/osd/pg_meta.h"
#include "messages/MOSDOp.h"
#include "os/Transaction.h"
+#include "crimson/os/cyanstore/cyan_object.h"
#include "crimson/os/futurized_collection.h"
-#include "crimson/os/cyan_object.h"
#include "crimson/os/futurized_store.h"
#include "replicated_backend.h"
#include "ec_backend.h"
#include "messages/MOSDRepOpReply.h"
#include "crimson/common/log.h"
-#include "crimson/os/cyan_object.h"
+#include "crimson/os/cyanstore/cyan_object.h"
#include "crimson/os/futurized_store.h"
#include "crimson/osd/shard_services.h"
#include "osd/osd_perf_counters.h"
#include "osd/PeeringState.h"
-#include "crimson/osd/osdmap_service.h"
-#include "crimson/os/cyan_store.h"
#include "crimson/mgr/client.h"
#include "crimson/mon/MonClient.h"
#include "crimson/net/Messenger.h"
#include "crimson/net/Connection.h"
-#include "crimson/os/cyan_store.h"
+#include "crimson/os/cyanstore/cyan_store.h"
+#include "crimson/osd/osdmap_service.h"
#include "messages/MOSDPGTemp.h"
#include "messages/MOSDPGCreated.h"
#include "messages/MOSDPGNotify.h"
#ifdef WITH_SEASTAR
#include <seastar/core/future.hh>
#include "crimson/os/futurized_store.h"
-#include "crimson/os/cyan_collection.h"
+#include "crimson/os/cyanstore/cyan_collection.h"
#endif
constexpr auto PGLOG_INDEXED_OBJECTS = 1 << 0;