random_block_manager/block_rb_manager.cc
random_block_manager/rbm_device.cc
random_block_manager/nvme_block_device.cc
+ random_block_manager/hdd_device.cc
random_block_manager/avlallocator.cc
journal/segmented_journal.cc
journal/segment_allocator.cc
virtual const std::set<device_id_t>& get_device_ids() const = 0;
+ virtual backend_type_t get_backend_type() const = 0;
+
virtual std::size_t get_reclaim_size_per_cycle() const = 0;
#ifdef UNIT_TESTS_BUILT
return sm_group->get_device_ids();
}
+ backend_type_t get_backend_type() const final {
+ return backend_type_t::SEGMENTED;
+ }
+
std::size_t get_reclaim_size_per_cycle() const final {
return config.reclaim_bytes_per_cycle;
}
return rb_group->get_device_ids();
}
+ backend_type_t get_backend_type() const final {
+ return backend_type_t::RANDOM_BLOCK;
+ }
+
std::size_t get_reclaim_size_per_cycle() const final {
return 0;
}
ceph_assert(shard_infos[i].size > block_size &&
shard_infos[i].size % block_size == 0);
ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX);
- ceph_assert(journal_size > 0 && journal_size % block_size == 0);
+ ceph_assert((journal_size > 0 && journal_size % block_size == 0) ||
+ config.spec.dtype == device_type_t::RANDOM_BLOCK_HDD);
ceph_assert(shard_infos[i].start_offset < total_size &&
shard_infos[i].start_offset % block_size == 0);
}
});
} else {
ceph_assert(btype != backend_type_t::NONE);
- return get_rb_device(device
+ return get_rb_device(device, dtype
).then([](DeviceRef ret) {
return ret;
});
{
LOG_PREFIX(ExtentPlacementManager::init);
writer_refs.clear();
- auto cold_segment_cleaner = dynamic_cast<SegmentCleaner*>(cold_cleaner.get());
dynamic_max_rewrite_generation = hot_tier_generations - 1;
- if (cold_segment_cleaner) {
+ if (cold_cleaner) {
dynamic_max_rewrite_generation = hot_tier_generations + cold_tier_generations - 1;
}
DEBUG("dynamic_max_rewrite_generation: {}, "
for (auto *rb : rb_cleaner->get_rb_group()->get_rb_managers()) {
add_device(rb->get_device());
}
- }
-
- if (cold_segment_cleaner) {
- // Cold DATA Segments
- for (rewrite_gen_t gen = hot_tier_generations; gen <= dynamic_max_rewrite_generation; ++gen) {
- writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(store_index,
- data_category_t::DATA, gen, *cold_segment_cleaner,
- *ool_segment_seq_allocator));
+ for (rewrite_gen_t gen = OOL_GENERATION; gen < hot_tier_generations; ++gen) {
data_writers_by_gen[generation_to_writer(gen)] = writer_refs.back().get();
}
- for (rewrite_gen_t gen = hot_tier_generations; gen <= dynamic_max_rewrite_generation; ++gen) {
- // Cold METADATA Segments
- writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(store_index,
- data_category_t::METADATA, gen, *cold_segment_cleaner,
- *ool_segment_seq_allocator));
+ for (rewrite_gen_t gen = OOL_GENERATION; gen < hot_tier_generations; ++gen) {
md_writers_by_gen[generation_to_writer(gen)] = writer_refs.back().get();
}
- for (auto *device : cold_segment_cleaner->get_segment_manager_group()
- ->get_segment_managers()) {
- add_device(device);
- }
}
+ if (cold_cleaner) {
+ if (cold_cleaner->get_backend_type() == backend_type_t::SEGMENTED) {
+ auto cold_segment_cleaner = static_cast<SegmentCleaner*>(cold_cleaner.get());
+ for (rewrite_gen_t gen = hot_tier_generations; gen <= dynamic_max_rewrite_generation; ++gen) {
+ writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(store_index,
+ data_category_t::DATA, gen, *cold_segment_cleaner,
+ *ool_segment_seq_allocator));
+ data_writers_by_gen[generation_to_writer(gen)] = writer_refs.back().get();
+ }
+ for (rewrite_gen_t gen = hot_tier_generations; gen <= dynamic_max_rewrite_generation; ++gen) {
+ writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(store_index,
+ data_category_t::METADATA, gen, *cold_segment_cleaner,
+ *ool_segment_seq_allocator));
+ md_writers_by_gen[generation_to_writer(gen)] = writer_refs.back().get();
+ }
+ for (auto *device : cold_segment_cleaner->get_segment_manager_group()
+ ->get_segment_managers()) {
+ add_device(device);
+ }
+ } else {
+ ceph_assert(cold_cleaner->get_backend_type() == backend_type_t::RANDOM_BLOCK);
+ auto rb_cleaner = static_cast<RBMCleaner*>(cold_cleaner.get());
+ ceph_assert(rb_cleaner);
+ writer_refs.emplace_back(std::make_unique<RandomBlockOolWriter>(rb_cleaner));
+ for (rewrite_gen_t gen = hot_tier_generations; gen <= dynamic_max_rewrite_generation; ++gen) {
+ data_writers_by_gen[generation_to_writer(gen)] = writer_refs.back().get();
+ }
+ for (rewrite_gen_t gen = hot_tier_generations; gen <= dynamic_max_rewrite_generation; ++gen) {
+ md_writers_by_gen[generation_to_writer(gen)] = writer_refs.back().get();
+ }
+ for (auto *rb : rb_cleaner->get_rb_group()->get_rb_managers()) {
+ add_device(rb->get_device());
+ }
+ }
+ }
+
+ auto cold_cleaner_ = cold_cleaner.get();
background_process.init(std::move(trimmer),
std::move(cleaner),
std::move(cold_cleaner),
hot_tier_generations,
pinboard);
ceph_assert(get_main_backend_type() != backend_type_t::NONE);
- if (cold_segment_cleaner) {
+ if (cold_cleaner_) {
ceph_assert(get_main_backend_type() == backend_type_t::SEGMENTED);
ceph_assert(background_process.has_cold_tier());
} else {
#include "crimson/os/seastore/random_block_manager.h"
#include "crimson/os/seastore/random_block_manager/nvme_block_device.h"
#include "crimson/os/seastore/random_block_manager/rbm_device.h"
+#include "crimson/os/seastore/random_block_manager/hdd_device.h"
namespace crimson::os::seastore {
seastar::future<random_block_device::RBMDeviceRef>
get_rb_device(
- const std::string &device)
+ const std::string &device, device_type_t dtype)
{
- return seastar::make_ready_future<random_block_device::RBMDeviceRef>(
- std::make_unique<
- random_block_device::nvme::NVMeBlockDevice
- >(device + "/block"));
+ if (dtype == device_type_t::RANDOM_BLOCK_HDD) {
+ return seastar::make_ready_future<random_block_device::RBMDeviceRef>(
+ std::make_unique<
+ random_block_device::RotationalDevice
+ >(device + "/block"));
+ } else {
+ return seastar::make_ready_future<random_block_device::RBMDeviceRef>(
+ std::make_unique<
+ random_block_device::nvme::NVMeBlockDevice
+ >(device + "/block"));
+ }
}
}
}
seastar::future<std::unique_ptr<random_block_device::RBMDevice>>
- get_rb_device(const std::string &device);
+ get_rb_device(const std::string &device, device_type_t dtype);
std::ostream &operator<<(std::ostream &out, const rbm_extent_state_t &state);
}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "crimson/common/errorator-utils.h"
+#include "crimson/os/seastore/logging.h"
+#include "crimson/os/seastore/random_block_manager/hdd_device.h"
+
+SET_SUBSYS(seastore_device);
+
+namespace crimson::os::seastore::random_block_device {
+
+seastar::future<> RotationalDevice::start(uint32_t shard_nums) {
+ device_shard_nums = shard_nums;
+ auto num_shard_services = (device_shard_nums + seastar::smp::count - 1 ) /
+ seastar::smp::count;
+ LOG_PREFIX(NVMeBlockDevice::start);
+ DEBUG("device_shard_nums={} seastar::smp={}, num_shard_services={}",
+ device_shard_nums, seastar::smp::count, num_shard_services);
+ return shard_devices.start(num_shard_services, device_path);
+}
+
+RotationalDevice::mkfs_ret RotationalDevice::mkfs(device_config_t config) {
+ LOG_PREFIX(RotationalDevice::mkfs);
+ INFO("{}", config);
+ return shard_devices.local().mshard_devices[0]->do_primary_mkfs(
+ config, seastar::smp::count, 0);
+}
+
+RotationalDevice::mount_ret RotationalDevice::mount() {
+ LOG_PREFIX(RotationalDevice::mount);
+ DEBUG("mount");
+ return shard_devices.invoke_on_all([](auto &local_device) {
+ return seastar::do_for_each(
+ local_device.mshard_devices,
+ [](auto &mshard_device) {
+ return mshard_device->do_shard_mount(
+ ).handle_error(
+ crimson::ct_error::assert_all{
+ "Invalid error in RBMDevice::do_mount"
+ }
+ );
+ });
+ });
+}
+
+read_ertr::future<> RotationalDevice::read(
+ uint64_t offset,
+ bufferptr &bptr)
+{
+ auto length = bptr.length();
+ return device.dma_read(offset, bptr.c_str(), length
+ ).handle_exception([](auto e) -> read_ertr::future<size_t> {
+ return crimson::ct_error::input_output_error::make();
+ }).then([length](auto result) -> read_ertr::future<> {
+ if (result != length) {
+ return crimson::ct_error::input_output_error::make();
+ }
+ return read_ertr::now();
+ });
+}
+
+read_ertr::future<> RotationalDevice::_readv(
+ uint64_t offset,
+ std::vector<bufferptr> ptrs) {
+ LOG_PREFIX(NVMeBlockDevice::_readv);
+ DEBUG("block: read offset {}, {} buffers", offset, ptrs.size());
+ if (ptrs.size() == 0) {
+ return read_ertr::now();
+ }
+
+ std::vector<iovec> iov;
+ size_t length = 0;
+ for (auto &ptr : ptrs) {
+ length += ptr.length();
+ assert((ptr.length() % super.block_size) == 0);
+ iov.emplace_back(ptr.c_str(), ptr.length());
+ }
+ return device.dma_read(offset, std::move(iov)
+ ).handle_exception(
+ [FNAME](auto e) -> read_ertr::future<size_t> {
+ ERROR("read: dma_read got error{}", e);
+ return crimson::ct_error::input_output_error::make();
+ }).then([length, FNAME](auto result) -> read_ertr::future<> {
+ if (result != length) {
+ ERROR("read: dma_read got error with not proper length");
+ return crimson::ct_error::input_output_error::make();
+ }
+ return read_ertr::now();
+ });
+}
+
+write_ertr::future<> RotationalDevice::write(
+ uint64_t offset,
+ bufferptr bptr,
+ uint16_t stream)
+{
+ auto length = bptr.length();
+ return seastar::do_with(std::move(bptr), [this, offset, length](auto &bptr) {
+ return device.dma_write(offset, bptr.c_str(), length
+ ).handle_exception([](auto e) -> write_ertr::future<size_t> {
+ return crimson::ct_error::input_output_error::make();
+ }).then([length](auto result) -> write_ertr::future<> {
+ if (result != length) {
+ return crimson::ct_error::input_output_error::make();
+ }
+ return write_ertr::now();
+ });
+ });
+}
+
+open_ertr::future<> RotationalDevice::open(
+ const std::string &path,
+ seastar::open_flags mode)
+{
+ return seastar::open_file_dma(path, mode).then([this](auto file) {
+ device = std::move(file);
+ }).handle_exception([](auto e) -> open_ertr::future<> {
+ return crimson::ct_error::input_output_error::make();
+ });
+}
+
+write_ertr::future<> RotationalDevice::writev(
+ uint64_t offset,
+ ceph::bufferlist bl,
+ uint16_t stream) {
+ bl.rebuild_aligned(super.block_size);
+
+ return seastar::do_with(
+ bl.prepare_iovs(),
+ std::move(bl),
+ [this, offset](auto& iovs, auto& bl)
+ {
+ return write_ertr::parallel_for_each(
+ iovs,
+ [this, offset](auto& p) mutable
+ {
+ auto off = offset + p.offset;
+ auto len = p.length;
+ auto& iov = p.iov;
+ return device.dma_write(off, std::move(iov)
+ ).handle_exception(
+ [](auto e) -> write_ertr::future<size_t>
+ {
+ return crimson::ct_error::input_output_error::make();
+ }).then([len](size_t written) -> write_ertr::future<> {
+ if (written != len) {
+ return crimson::ct_error::input_output_error::make();
+ }
+ return write_ertr::now();
+ });
+ });
+ });
+}
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "crimson/os/seastore/random_block_manager/rbm_device.h"
+
+namespace crimson::os::seastore::random_block_device {
+class RotationalDevice : public RBMDevice {
+public:
+ RotationalDevice(
+ std::string device_path,
+ store_index_t store_index = 0)
+ : RBMDevice(store_index),
+ device_path(device_path)
+ {}
+ ~RotationalDevice() = default;
+
+ /// Device interface
+
+ seastar::future<> start(uint32_t shard_nums) final;
+
+ seastar::future<> stop() final {
+ return shard_devices.stop();
+ }
+
+ Device& get_sharded_device(store_index_t store_index) final {
+ assert(store_index < shard_devices.local().mshard_devices.size());
+ return *shard_devices.local().mshard_devices[store_index];
+ }
+
+ mkfs_ret mkfs(device_config_t config) final;
+
+ mount_ret mount() final;
+
+ device_type_t get_device_type() const final {
+ return device_type_t::RANDOM_BLOCK_HDD;
+ }
+
+ close_ertr::future<> close() final {
+ return device.close();
+ }
+
+ /// RBMDevice interface
+
+ read_ertr::future<> read(
+ uint64_t offset,
+ bufferptr &bptr) final;
+ read_ertr::future<> _readv(
+ uint64_t offset,
+ std::vector<bufferptr> ptrs) final;
+
+ write_ertr::future<> write(
+ uint64_t offset,
+ bufferptr bptr,
+ uint16_t stream = 0) final;
+
+ open_ertr::future<> open(
+ const std::string &path,
+ seastar::open_flags mode) final;
+
+ write_ertr::future<> writev(
+ uint64_t offset,
+ ceph::bufferlist bl,
+ uint16_t stream = 0) final;
+
+ stat_device_ret stat_device() final {
+ return seastar::file_stat(device_path, seastar::follow_symlink::yes
+ ).then([this](auto stat) {
+ return seastar::open_file_dma(
+ device_path,
+ seastar::open_flags::rw | seastar::open_flags::dsync
+ ).then([stat](auto file) mutable {
+ return seastar::do_with(std::move(file), [stat](auto &file) mutable {
+ return file.size().then([stat](auto size) mutable {
+ stat.size = size;
+ return stat;
+ }).then([file](auto stat) mutable {
+ return file.close().then([stat] {
+ return stat;
+ });
+ });
+ });
+ });
+ }).handle_exception([](auto e) -> stat_device_ret {
+ return crimson::ct_error::input_output_error::make();
+ });
+ }
+
+ std::string get_device_path() const final {
+ return device_path;
+ }
+
+private:
+ std::string device_path;
+ seastar::file device;
+ seastar::sharded<MultiShardDevices<RotationalDevice>> shard_devices;
+};
+
+}
int namespace_id; // TODO: multi namespaces
std::string device_path;
- class MultiShardDevices {
- public:
- std::vector<std::unique_ptr<NVMeBlockDevice>> mshard_devices;
-
- public:
- MultiShardDevices(size_t count,
- const std::string path)
- : mshard_devices() {
- mshard_devices.reserve(count);
- for (size_t store_index = 0; store_index < count; ++store_index) {
- mshard_devices.emplace_back(std::make_unique<NVMeBlockDevice>(
- path, store_index));
- }
- }
- ~MultiShardDevices() {
- mshard_devices.clear();
- }
- };
- seastar::sharded<MultiShardDevices> shard_devices;
+ seastar::sharded<MultiShardDevices<NVMeBlockDevice>> shard_devices;
};
}
maybe_create = check_create_device(get_device_path(), size);
}
-
co_await std::move(maybe_create);
auto st = co_await stat_device(
).safe_then([] (auto st) mutable {
);
}
+ config.spec.id |= 0x80;
const size_t cur_block_size = (*st).block_size;
const size_t cur_total_size = (*st).size;
- ceph_assert_always(journal_size > 0);
+ ceph_assert_always(journal_size > 0 ||
+ config.spec.dtype == device_type_t::RANDOM_BLOCK_HDD);
ceph_assert_always(cur_total_size >= journal_size);
ceph_assert_always(shard_num > 0);
return super.config.spec.magic;
}
- device_type_t get_device_type() const final {
+ virtual device_type_t get_device_type() const {
return device_type_t::RANDOM_BLOCK_SSD;
}
uint64_t journal_size = DEFAULT_TEST_CBJOURNAL_SIZE,
uint64_t data_size = DEFAULT_TEST_CBJOURNAL_SIZE);
+template <typename T>
+class MultiShardDevices {
+ public:
+ std::vector<std::unique_ptr<T>> mshard_devices;
+
+ public:
+ MultiShardDevices(size_t count,
+ const std::string path)
+ : mshard_devices() {
+ mshard_devices.reserve(count);
+ for (size_t store_index = 0; store_index < count; ++store_index) {
+ mshard_devices.emplace_back(std::make_unique<T>(
+ path, store_index));
+ }
+ }
+ ~MultiShardDevices() {
+ mshard_devices.clear();
+ }
+};
+
}
if (type == "RANDOM_BLOCK_SSD") {
return device_type_t::RANDOM_BLOCK_SSD;
}
+ if (type == "RANDOM_BLOCK_HDD") {
+ return device_type_t::RANDOM_BLOCK_HDD;
+ }
return device_type_t::NONE;
}
return out << "RANDOM_BLOCK_SSD";
case device_type_t::RANDOM_BLOCK_EPHEMERAL:
return out << "RANDOM_BLOCK_EPHEMERAL";
+ case device_type_t::RANDOM_BLOCK_HDD:
+ return out << "RANDOM_BLOCK_HDD";
default:
return out << "INVALID_DEVICE_TYPE!";
}
EPHEMERAL_MAIN,
RANDOM_BLOCK_SSD,
RANDOM_BLOCK_EPHEMERAL,
+ RANDOM_BLOCK_HDD,
NUM_TYPES
};
auto rbs = std::make_unique<RBMDeviceGroup>();
auto backref_manager = create_backref_manager(*cache);
SegmentManagerGroupRef cold_sms = nullptr;
+ RBMDeviceGroupRef cold_rbs = nullptr;
std::vector<SegmentProvider*> segment_providers_by_id{DEVICE_ID_MAX, nullptr};
auto p_backend_type = primary_device->get_backend_type();
+ INFO("primary backend: {}", p_backend_type);
if (p_backend_type == backend_type_t::SEGMENTED) {
auto dtype = primary_device->get_device_type();
dtype != device_type_t::EPHEMERAL_COLD);
sms->add_segment_manager(static_cast<SegmentManager*>(primary_device));
} else {
+ assert(p_backend_type != backend_type_t::NONE);
auto rbm = std::make_unique<BlockRBManager>(
static_cast<RBMDevice*>(primary_device), "", is_test);
rbs->add_rb_manager(std::move(rbm));
for (auto &p_dev : secondary_devices) {
if (p_dev->get_backend_type() == backend_type_t::SEGMENTED) {
if (p_dev->get_device_type() == primary_device->get_device_type()) {
+ INFO("add {} to main segment backend", device_id_printer_t{p_dev->get_device_id()});
sms->add_segment_manager(static_cast<SegmentManager*>(p_dev));
} else {
if (!cold_sms) {
cold_sms = std::make_unique<SegmentManagerGroup>();
}
+ INFO("add {} to cold segment backend", device_id_printer_t{p_dev->get_device_id()});
cold_sms->add_segment_manager(static_cast<SegmentManager*>(p_dev));
}
} else {
+ assert(p_backend_type != backend_type_t::NONE);
auto rbm = std::make_unique<BlockRBManager>(
static_cast<RBMDevice*>(p_dev), "", is_test);
- rbs->add_rb_manager(std::move(rbm));
+ if (p_dev->get_device_type() == primary_device->get_device_type()) {
+ INFO("add {} to rbm backend", device_id_printer_t{p_dev->get_device_id()});
+ rbs->add_rb_manager(std::move(rbm));
+ } else {
+ if (!cold_rbs) {
+ cold_rbs = std::make_unique<RBMDeviceGroup>();
+ }
+ INFO("add {} to cold rbm backend", device_id_printer_t{p_dev->get_device_id()});
+ cold_rbs->add_rb_manager(std::move(rbm));
+ }
}
}
AsyncCleanerRef cleaner;
JournalRef journal;
- SegmentCleanerRef cold_segment_cleaner = nullptr;
+ AsyncCleanerRef cold_cleaner = nullptr;
if (cold_sms) {
- cold_segment_cleaner = SegmentCleaner::create(
+ assert(!cold_rbs);
+ auto segment_cleaner = SegmentCleaner::create(
store_index,
cleaner_config,
std::move(cold_sms),
cleaner_is_detailed,
/* is_cold = */ true);
if (backend_type == backend_type_t::SEGMENTED) {
- for (auto id : cold_segment_cleaner->get_device_ids()) {
+ for (auto id : segment_cleaner->get_device_ids()) {
segment_providers_by_id[id] =
- static_cast<SegmentProvider*>(cold_segment_cleaner.get());
+ static_cast<SegmentProvider*>(segment_cleaner.get());
}
}
+ cold_cleaner = std::move(segment_cleaner);
+ } else if (cold_rbs) {
+ cold_cleaner = RBMCleaner::create(
+ store_index,
+ std::move(cold_rbs),
+ *backref_manager,
+ *lba_manager,
+ cleaner_is_detailed);
}
if (backend_type == backend_type_t::SEGMENTED) {
epm->init(std::move(journal_trimmer),
std::move(cleaner),
- std::move(cold_segment_cleaner),
+ std::move(cold_cleaner),
cache->get_extent_pinboard());
epm->set_primary_device(primary_device);