level: dev
desc: The record fullness threshold to flush a journal batch
default: 0.95
+- name: seastore_default_max_object_size
+ type: uint
+ level: dev
+ desc: default logical address space reservation for seastore objects' data
+ default: 16777216
+- name: seastore_default_object_metadata_reservation
+ type: uint
+ level: dev
+ desc: default logical address space reservation for seastore objects' metadata
+ default: 16777216
}
namespace crimson::os::seastore {
-
-/**
- * MAX_OBJECT_SIZE
- *
- * For now, we allocate a fixed region of laddr space of size MAX_OBJECT_SIZE
- * for any object. In the future, once we have the ability to remap logical
- * mappings (necessary for clone), we'll add the ability to grow and shrink
- * these regions and remove this assumption.
- */
-static constexpr extent_len_t MAX_OBJECT_SIZE = Onode::DEFAULT_DATA_RESERVATION;
#define assert_aligned(x) ceph_assert(((x)%ctx.tm.get_block_size()) == 0)
using context_t = ObjectDataHandler::context_t;
extent_len_t size)
{
LOG_PREFIX(ObjectDataHandler::prepare_data_reservation);
- ceph_assert(size <= MAX_OBJECT_SIZE);
+ ceph_assert(size <= max_object_size);
if (!object_data.is_null()) {
- ceph_assert(object_data.get_reserved_data_len() == MAX_OBJECT_SIZE);
+ ceph_assert(object_data.get_reserved_data_len() == max_object_size);
DEBUGT("reservation present: {}~{}",
ctx.t,
object_data.get_reserved_data_base(),
DEBUGT("reserving: {}~{}",
ctx.t,
ctx.onode.get_data_hint(),
- MAX_OBJECT_SIZE);
+ max_object_size);
return ctx.tm.reserve_region(
ctx.t,
ctx.onode.get_data_hint(),
- MAX_OBJECT_SIZE
- ).si_then([&object_data](auto pin) {
- ceph_assert(pin->get_length() == MAX_OBJECT_SIZE);
+ max_object_size
+ ).si_then([max_object_size=max_object_size, &object_data](auto pin) {
+ ceph_assert(pin->get_length() == max_object_size);
object_data.update_reserved(
pin->get_laddr(),
pin->get_length());
public:
using base_iertr = TransactionManager::base_iertr;
+ ObjectDataHandler(uint32_t mos) : max_object_size(mos) {}
+
struct context_t {
TransactionManager &tm;
Transaction &t;
context_t ctx,
object_data_t &object_data,
extent_len_t size);
+private:
+ /**
+ * max_object_size
+ *
+ * For now, we allocate a fixed region of laddr space of size max_object_size
+ * for any object. In the future, once we have the ability to remap logical
+ * mappings (necessary for clone), we'll add the ability to grow and shrink
+ * these regions and remove this assumption.
+ */
+ const uint32_t max_object_size = 0;
};
}
{
protected:
virtual laddr_t get_hint() const = 0;
+ const uint32_t default_metadata_offset = 0;
+ const uint32_t default_metadata_range = 0;
public:
- static constexpr uint32_t DEFAULT_DATA_RESERVATION = 16<<20;
- static constexpr uint32_t DEFAULT_METADATA_OFFSET =
- DEFAULT_DATA_RESERVATION;
- static constexpr uint32_t DEFAULT_METADATA_RANGE = 16<<20;
+ Onode(uint32_t ddr, uint32_t dmr)
+ : default_metadata_offset(ddr),
+ default_metadata_range(dmr)
+ {}
virtual const onode_layout_t &get_layout() const = 0;
virtual onode_layout_t &get_mutable_layout(Transaction &t) = 0;
virtual ~Onode() = default;
laddr_t get_metadata_hint() const {
- return get_hint() + DEFAULT_METADATA_OFFSET +
- ((uint32_t)std::rand() % DEFAULT_METADATA_RANGE);
+ assert(default_metadata_offset);
+ assert(default_metadata_range);
+ return get_hint() + default_metadata_offset +
+ ((uint32_t)std::rand() % default_metadata_range);
}
laddr_t get_data_hint() const {
return get_hint();
DEBUGT("no entry for {}", trans, hoid);
return crimson::ct_error::enoent::make();
}
- auto val = OnodeRef(new FLTreeOnode(cursor.value()));
+ auto val = OnodeRef(new FLTreeOnode(
+ default_data_reservation,
+ default_metadata_range,
+ cursor.value()));
return get_onode_iertr::make_ready_future<OnodeRef>(
val
);
return tree.insert(
trans, hoid,
OnodeTree::tree_value_config_t{sizeof(onode_layout_t)}
- ).si_then([&trans, &hoid, FNAME](auto p)
+ ).si_then([this, &trans, &hoid, FNAME](auto p)
-> get_or_create_onode_ret {
auto [cursor, created] = std::move(p);
- auto val = OnodeRef(new FLTreeOnode(cursor.value()));
+ auto val = OnodeRef(new FLTreeOnode(
+ default_data_reservation,
+ default_metadata_range,
+ cursor.value()));
if (created) {
DEBUGT("created onode for entry for {}", trans, hoid);
val->get_mutable_layout(trans) = onode_layout_t{};
FLTreeOnode& operator=(const FLTreeOnode&) = delete;
template <typename... T>
- FLTreeOnode(T&&... args) : Value(std::forward<T>(args)...) {}
+ FLTreeOnode(uint32_t ddr, uint32_t dmr, T&&... args)
+ : Onode(ddr, dmr),
+ Value(std::forward<T>(args)...) {}
+
+ template <typename... T>
+ FLTreeOnode(T&&... args)
+ : Onode(0, 0),
+ Value(std::forward<T>(args)...) {}
struct Recorder : public ValueDeltaRecorder {
Recorder(bufferlist &bl) : ValueDeltaRecorder(bl) {}
using OnodeTree = Btree<FLTreeOnode>;
+using crimson::common::get_conf;
+
class FLTreeOnodeManager : public crimson::os::seastore::OnodeManager {
OnodeTree tree;
+ uint32_t default_data_reservation = 0;
+ uint32_t default_metadata_offset = 0;
+ uint32_t default_metadata_range = 0;
public:
FLTreeOnodeManager(TransactionManager &tm) :
- tree(NodeExtentManager::create_seastore(tm)) {}
+ tree(NodeExtentManager::create_seastore(tm)),
+ default_data_reservation(
+ get_conf<uint64_t>("seastore_default_max_object_size")),
+ default_metadata_offset(default_data_reservation),
+ default_metadata_range(
+ get_conf<uint64_t>("seastore_default_object_metadata_reservation"))
+ {}
mkfs_ret mkfs(Transaction &t) {
return tree.mkfs(t);
}
};
+using crimson::common::get_conf;
+
SeaStore::SeaStore(
const std::string& root,
MDStoreRef mdstore,
segment_manager(std::move(sm)),
transaction_manager(std::move(tm)),
collection_manager(std::move(cm)),
- onode_manager(std::move(om))
+ onode_manager(std::move(om)),
+ max_object_size(
+ get_conf<uint64_t>("seastore_default_max_object_size"))
{
register_metrics();
}
size - offset :
std::min(size - offset, len);
- return ObjectDataHandler().read(
+ return ObjectDataHandler(max_object_size).read(
ObjectDataHandler::context_t{
*transaction_manager,
t,
return seastar::do_with(
std::move(_bl),
[=, &ctx, &onode](auto &bl) {
- return ObjectDataHandler().write(
+ return ObjectDataHandler(max_object_size).write(
ObjectDataHandler::context_t{
*transaction_manager,
*ctx.transaction,
LOG_PREFIX(SeaStore::_truncate);
DEBUGT("onode={} size={}", *ctx.transaction, *onode, size);
onode->get_mutable_layout(*ctx.transaction).size = size;
- return ObjectDataHandler().truncate(
+ return ObjectDataHandler(max_object_size).truncate(
ObjectDataHandler::context_t{
*transaction_manager,
*ctx.transaction,
TransactionManagerRef transaction_manager;
CollectionManagerRef collection_manager;
OnodeManagerRef onode_manager;
+ const uint32_t max_object_size = 0;
using tm_iertr = TransactionManager::base_iertr;
using tm_ret = tm_iertr::future<>;
using namespace crimson::os;
using namespace crimson::os::seastore;
+#define MAX_OBJECT_SIZE (16<<20)
+#define DEFAULT_OBJECT_DATA_RESERVATION (16<<20)
+#define DEFAULT_OBJECT_METADATA_RESERVATION (16<<20)
+
namespace {
[[maybe_unused]] seastar::logger& logger() {
return crimson::get_logger(ceph_subsys_test);
bool dirty = false;
public:
+ TestOnode(uint32_t ddr, uint32_t dmr) : Onode(ddr, dmr) {}
const onode_layout_t &get_layout() const final {
return layout;
}
offset,
len));
with_trans_intr(t, [&](auto &t) {
- return ObjectDataHandler().write(
+ return ObjectDataHandler(MAX_OBJECT_SIZE).write(
ObjectDataHandler::context_t{
*tm,
t,
0,
size - offset);
with_trans_intr(t, [&](auto &t) {
- return ObjectDataHandler().truncate(
+ return ObjectDataHandler(MAX_OBJECT_SIZE).truncate(
ObjectDataHandler::context_t{
*tm,
t,
void read(Transaction &t, objaddr_t offset, extent_len_t len) {
bufferlist bl = with_trans_intr(t, [&](auto &t) {
- return ObjectDataHandler().read(
+ return ObjectDataHandler(MAX_OBJECT_SIZE).read(
ObjectDataHandler::context_t{
*tm,
t,
}
seastar::future<> set_up_fut() final {
- onode = new TestOnode{};
+ onode = new TestOnode(
+ DEFAULT_OBJECT_DATA_RESERVATION,
+ DEFAULT_OBJECT_METADATA_RESERVATION);
known_contents = buffer::create(4<<20 /* 4MB */);
memset(known_contents.c_str(), 0, known_contents.length());
size = 0;