denc(v.secondary_devices, p);
DENC_FINISH(p);
}
+ static device_config_t create_primary(
+ uuid_d new_osd_fsid,
+ device_id_t id,
+ device_type_t d_type,
+ secondary_device_set_t sds) {
+ return device_config_t{
+ true,
+ device_spec_t{
+ (magic_t)std::rand(),
+ d_type,
+ id},
+ seastore_meta_t{new_osd_fsid},
+ sds};
+ }
+ static device_config_t create_secondary(
+ uuid_d new_osd_fsid,
+ device_id_t id,
+ device_type_t d_type,
+ magic_t magic) {
+ return device_config_t{
+ false,
+ device_spec_t{
+ magic,
+ d_type,
+ id},
+ seastore_meta_t{new_osd_fsid},
+ secondary_device_set_t()};
+ }
};
std::ostream& operator<<(std::ostream&, const device_config_t&);
* Represents a general device regardless of the underlying medium.
*/
class Device {
+// interfaces used by device
public:
virtual ~Device() {}
+ virtual seastar::future<> start() {
+ return seastar::now();
+ }
+
+ virtual seastar::future<> stop() {
+ return seastar::now();
+ }
+ // called on the shard to get this shard device;
+ virtual Device& get_sharded_device() {
+ return *this;
+ }
+
+ using access_ertr = crimson::errorator<
+ crimson::ct_error::input_output_error,
+ crimson::ct_error::permission_denied,
+ crimson::ct_error::enoent>;
+
+ using mkfs_ertr = access_ertr;
+ using mkfs_ret = mkfs_ertr::future<>;
+ virtual mkfs_ret mkfs(device_config_t) = 0;
+
+ using mount_ertr = access_ertr;
+ using mount_ret = access_ertr::future<>;
+ virtual mount_ret mount() = 0;
+
+ static seastar::future<DeviceRef> make_device(
+ const std::string &device,
+ device_type_t dtype);
+
+// interfaces used by each device shard
+public:
virtual device_id_t get_device_id() const = 0;
virtual magic_t get_magic() const = 0;
virtual secondary_device_set_t& get_secondary_devices() = 0;
- using access_ertr = crimson::errorator<
- crimson::ct_error::input_output_error,
- crimson::ct_error::permission_denied,
- crimson::ct_error::enoent>;
-
- using mkfs_ertr = access_ertr;
- using mkfs_ret = mkfs_ertr::future<>;
- virtual mkfs_ret mkfs(device_config_t) = 0;
-
- using mount_ertr = access_ertr;
- using mount_ret = access_ertr::future<>;
- virtual mount_ret mount() = 0;
-
using close_ertr = crimson::errorator<
crimson::ct_error::input_output_error>;
virtual close_ertr::future<> close() = 0;
return read_ertr::make_ready_future<bufferptr>(std::move(*ptrref));
});
}
-
- static seastar::future<DeviceRef> make_device(
- const std::string &device,
- device_type_t dtype);
};
}
throttler(
get_conf<uint64_t>("seastore_max_concurrent_transactions"))
{
- device.reset(dev);
+ device = &(dev->get_sharded_device());
register_metrics();
}
#else
bool is_test = false;
#endif
- return shard_stores.start(root, nullptr, is_test)
- .then([this] {
- return shard_stores.invoke_on_all([](auto& local_store) {
- return local_store.make_shard_stores();
- });
+ using crimson::common::get_conf;
+ std::string type = get_conf<std::string>("seastore_main_device_type");
+ device_type_t d_type = string_to_device_type(type);
+ assert(d_type == device_type_t::SSD ||
+ d_type == device_type_t::RANDOM_BLOCK_SSD);
+
+ ceph_assert(root != "");
+ return Device::make_device(root, d_type
+ ).then([this](DeviceRef device_obj) {
+ device = std::move(device_obj);
+ return device->start();
+ }).then([this, is_test] {
+ ceph_assert(device);
+ return shard_stores.start(root, device.get(), is_test);
});
}
-seastar::future<> SeaStore::test_start(DeviceRef device)
+seastar::future<> SeaStore::test_start(DeviceRef device_obj)
{
- if (device) {
- ceph_assert(root == "");
- return shard_stores.start_single(root, device.release(), true);
- } else {
- ceph_assert(0 == "impossible no device");
- }
+ ceph_assert(device_obj);
+ ceph_assert(root == "");
+ device = std::move(device_obj);
+ return shard_stores.start_single(root, device.get(), true);
}
-
seastar::future<> SeaStore::stop()
{
ceph_assert(seastar::this_shard_id() == primary_core);
- return shard_stores.stop();
-}
-
-seastar::future<> SeaStore::Shard::make_shard_stores()
-{
- if (root != "") {
- using crimson::common::get_conf;
- std::string type = get_conf<std::string>("seastore_main_device_type");
- device_type_t d_type = string_to_device_type(type);
- assert(d_type == device_type_t::SSD ||
- d_type == device_type_t::RANDOM_BLOCK_SSD);
-
- return Device::make_device(
- root, d_type
- ).then([this](DeviceRef device_obj) {
- device = std::move(device_obj);
- });
- }
- return seastar::now();
+ return seastar::do_for_each(secondaries, [](auto& sec_dev) {
+ return sec_dev->stop();
+ }).then([this] {
+ secondaries.clear();
+ if (device) {
+ return device->stop();
+ } else {
+ return seastar::now();
+ }
+ }).then([this] {
+ return shard_stores.stop();
+ });
}
SeaStore::mount_ertr::future<> SeaStore::test_mount()
{
-
ceph_assert(seastar::this_shard_id() == primary_core);
- shard_stores.local().init_managers();
- return shard_stores.local().get_transaction_manager()->mount(
- ).handle_error(
- crimson::ct_error::assert_all{
- "Invalid error in SeaStore::test_mount"
- }
- );
+ return shard_stores.local().mount_managers();
}
-SeaStore::mount_ertr::future<> SeaStore::Shard::mount()
+SeaStore::mount_ertr::future<> SeaStore::mount()
{
+ ceph_assert(seastar::this_shard_id() == primary_core);
return device->mount(
).safe_then([this] {
- auto sec_devices = device->get_secondary_devices();
+ auto sec_devices = device->get_sharded_device().get_secondary_devices();
return crimson::do_for_each(sec_devices, [this](auto& device_entry) {
device_id_t id = device_entry.first;
magic_t magic = device_entry.second.magic;
std::string path =
fmt::format("{}/block.{}.{}", root, dtype, std::to_string(id));
return Device::make_device(path, dtype
- ).then([this, magic](DeviceRef sec_dev) {
- return sec_dev->mount(
- ).safe_then([this, sec_dev=std::move(sec_dev), magic]() mutable {
- boost::ignore_unused(magic); // avoid clang warning;
- assert(sec_dev->get_magic() == magic);
- secondaries.emplace_back(std::move(sec_dev));
+ ).then([this, path, magic](DeviceRef sec_dev) {
+ return sec_dev->start(
+ ).then([this, magic, sec_dev = std::move(sec_dev)]() mutable {
+ return sec_dev->mount(
+ ).safe_then([this, sec_dev=std::move(sec_dev), magic]() mutable {
+ boost::ignore_unused(magic); // avoid clang warning;
+ assert(sec_dev->get_sharded_device().get_magic() == magic);
+ secondaries.emplace_back(std::move(sec_dev));
+ });
+ }).safe_then([this] {
+ return set_secondaries();
});
});
+ }).safe_then([this] {
+ return shard_stores.invoke_on_all([](auto &local_store) {
+ return local_store.mount_managers();
+ });
});
- }).safe_then([this] {
- init_managers();
- return transaction_manager->mount();
}).handle_error(
crimson::ct_error::assert_all{
- "Invalid error in Shard::mount"
+ "Invalid error in SeaStore::mount"
}
);
}
+seastar::future<> SeaStore::Shard::mount_managers()
+{
+ init_managers();
+ return transaction_manager->mount(
+ ).handle_error(
+ crimson::ct_error::assert_all{
+ "Invalid error in mount_managers"
+ });
+}
+
+seastar::future<> SeaStore::umount()
+{
+ ceph_assert(seastar::this_shard_id() == primary_core);
+ return shard_stores.invoke_on_all([](auto &local_store) {
+ return local_store.umount();
+ });
+}
+
seastar::future<> SeaStore::Shard::umount()
{
return [this] {
);
}
-seastar::future<>
-SeaStore::Shard::mkfs(
- secondary_device_set_t &sds,
- uuid_d new_osd_fsid)
-{
- device_type_t d_type = device->get_device_type();
- device_id_t id = (d_type == device_type_t::RANDOM_BLOCK_SSD) ?
- static_cast<device_id_t>(DEVICE_ID_RANDOM_BLOCK_MIN) : 0;
-
- return device->mkfs(
- device_config_t{
- true,
- device_spec_t{
- (magic_t)std::rand(),
- d_type,
- id},
- seastore_meta_t{new_osd_fsid},
- sds}
- ).safe_then([this] {
- return crimson::do_for_each(secondaries, [](auto& sec_dev) {
- return sec_dev->mount();
- });
- }).safe_then([this] {
- return device->mount();
- }).safe_then([this] {
- return mkfs_managers();
- }).handle_error(
- crimson::ct_error::assert_all{
- "Invalid error in SeaStore::Shard::mkfs"
- }
- );
-}
-
-seastar::future<> SeaStore::Shard::sec_mkfs(
- const std::string path,
- device_type_t dtype,
- device_id_t id,
- secondary_device_set_t &sds,
- uuid_d new_osd_fsid)
-{
- return Device::make_device(path, dtype
- ).then([this, &sds, id, dtype, new_osd_fsid](DeviceRef sec_dev) {
- magic_t magic = (magic_t)std::rand();
- sds.emplace(
- (device_id_t)id,
- device_spec_t{magic, dtype, (device_id_t)id});
- return sec_dev->mkfs(
- device_config_t{
- false,
- device_spec_t{
- magic,
- dtype,
- (device_id_t)id},
- seastore_meta_t{new_osd_fsid},
- secondary_device_set_t()}
- ).safe_then([this, sec_dev=std::move(sec_dev), id]() mutable {
- LOG_PREFIX(SeaStore::Shard::sec_mkfs);
- DEBUG("mkfs: finished for device {}", id);
- secondaries.emplace_back(std::move(sec_dev));
- }).handle_error(crimson::ct_error::assert_all{"not possible"});
- });
-}
-
-seastar::future<> SeaStore::_mkfs(uuid_d new_osd_fsid)
+seastar::future<> SeaStore::set_secondaries()
{
- ceph_assert(seastar::this_shard_id() == primary_core);
- return shard_stores.local().mkfs_managers(
- ).then([this, new_osd_fsid] {
- return prepare_meta(new_osd_fsid);
+ auto sec_dev_ite = secondaries.rbegin();
+ Device* sec_dev = sec_dev_ite->get();
+ return shard_stores.invoke_on_all([sec_dev](auto &local_store) {
+ local_store.set_secondaries(sec_dev->get_sharded_device());
});
}
if (done == 0) {
return seastar::now();
}
- return _mkfs(new_osd_fsid);
+ return shard_stores.local().mkfs_managers(
+ ).then([this, new_osd_fsid] {
+ return prepare_meta(new_osd_fsid);
+ });
});
}
return seastar::now();
} else {
return seastar::do_with(
- std::vector<secondary_device_set_t>(),
+ secondary_device_set_t(),
[this, new_osd_fsid](auto& sds) {
- sds.resize(seastar::smp::count);
auto fut = seastar::now();
LOG_PREFIX(SeaStore::mkfs);
DEBUG("root: {}", root);
}
auto id = std::stoi(entry_name.substr(dtype_end + 1));
std::string path = fmt::format("{}/{}", root, entry_name);
- return shard_stores.invoke_on_all(
- [&sds, id, path, dtype, new_osd_fsid]
- (auto &local_store) {
- return local_store.sec_mkfs(
- path,
- dtype,
- id,
- sds[seastar::this_shard_id()],
- new_osd_fsid);
+ return Device::make_device(path, dtype
+ ).then([this, &sds, id, dtype, new_osd_fsid](DeviceRef sec_dev) {
+ auto p_sec_dev = sec_dev.get();
+ secondaries.emplace_back(std::move(sec_dev));
+ return p_sec_dev->start(
+ ).then([&sds, id, dtype, new_osd_fsid, p_sec_dev]() {
+ magic_t magic = (magic_t)std::rand();
+ sds.emplace(
+ (device_id_t)id,
+ device_spec_t{magic, dtype, (device_id_t)id});
+ return p_sec_dev->mkfs(device_config_t::create_secondary(
+ new_osd_fsid, id, dtype, magic)
+ ).handle_error(crimson::ct_error::assert_all{"not possible"});
+ });
+ }).then([this] {
+ return set_secondaries();
});
}
return seastar::now();
});
}
return fut.then([this, &sds, new_osd_fsid] {
- return shard_stores.invoke_on_all(
- [&sds, new_osd_fsid](auto &local_store) {
- return local_store.mkfs(
- sds[seastar::this_shard_id()], new_osd_fsid);
+ device_id_t id = 0;
+ device_type_t d_type = device->get_device_type();
+ assert(d_type == device_type_t::SSD ||
+ d_type == device_type_t::RANDOM_BLOCK_SSD);
+ if (d_type == device_type_t::RANDOM_BLOCK_SSD) {
+ id = static_cast<device_id_t>(DEVICE_ID_RANDOM_BLOCK_MIN);
+ }
+
+ return device->mkfs(
+ device_config_t::create_primary(new_osd_fsid, id, d_type, sds)
+ );
+ }).safe_then([this] {
+ return crimson::do_for_each(secondaries, [](auto& sec_dev) {
+ return sec_dev->mount();
});
});
- }).then([this, new_osd_fsid] {
+ }).safe_then([this] {
+ return device->mount();
+ }).safe_then([this] {
+ return shard_stores.invoke_on_all([] (auto &local_store) {
+ return local_store.mkfs_managers();
+ });
+ }).safe_then([this, new_osd_fsid] {
return prepare_meta(new_osd_fsid);
- }).then([this] {
+ }).safe_then([this] {
return umount();
- });
+ }).handle_error(
+ crimson::ct_error::assert_all{
+ "Invalid error in SeaStore::mkfs"
+ }
+ );
}
});
}
collection_manager.reset();
onode_manager.reset();
- std::vector<Device*> sec_devices;
- for (auto &dev : secondaries) {
- sec_devices.emplace_back(dev.get());
- }
transaction_manager = make_transaction_manager(
- device.get(), sec_devices, is_test);
+ device, secondaries, is_test);
collection_manager = std::make_unique<collection_manager::FlatCollectionManager>(
*transaction_manager);
onode_manager = std::make_unique<crimson::os::seastore::onode::FLTreeOnodeManager>(
// only exposed to SeaStore
public:
- mount_ertr::future<> mount();
-
seastar::future<> umount();
+ // init managers and mount transaction_manager
+ seastar::future<> mount_managers();
- seastar::future<> mkfs(
- secondary_device_set_t &sds,
- uuid_d new_osd_fsid);
+ void set_secondaries(Device& sec_dev) {
+ secondaries.emplace_back(&sec_dev);
+ }
using coll_core_t = FuturizedStore::coll_core_t;
seastar::future<std::vector<coll_core_t>> list_collections();
store_statfs_t stat() const;
uuid_d get_fsid() const;
- // for each shard store make device
- seastar::future<> make_shard_stores();
seastar::future<> mkfs_managers();
void init_managers();
- TransactionManagerRef& get_transaction_manager() {
- return transaction_manager;
- }
- // for secondaries device mkfs
- seastar::future<> sec_mkfs(
- const std::string path,
- device_type_t dtype,
- device_id_t id,
- secondary_device_set_t &sds,
- uuid_d new_osd_fsid);
-
- DeviceRef get_primary_device_ref() {
- return std::move(device);
- }
-
private:
struct internal_context_t {
CollectionRef ch;
private:
std::string root;
- DeviceRef device;
+ Device* device;
const uint32_t max_object_size;
bool is_test;
- std::vector<DeviceRef> secondaries;
+ std::vector<Device*> secondaries;
TransactionManagerRef transaction_manager;
CollectionManagerRef collection_manager;
OnodeManagerRef onode_manager;
seastar::future<> start() final;
seastar::future<> stop() final;
- mount_ertr::future<> mount() final {
- ceph_assert(seastar::this_shard_id() == primary_core);
- return shard_stores.invoke_on_all(
- [](auto &local_store) {
- return local_store.mount().handle_error(
- crimson::ct_error::assert_all{
- "Invalid error in SeaStore::mount"
- });
- });
- }
-
- seastar::future<> umount() final {
- ceph_assert(seastar::this_shard_id() == primary_core);
- return shard_stores.invoke_on_all(
- [](auto &local_store) {
- return local_store.umount();
- });
- }
+ mount_ertr::future<> mount() final;
+ seastar::future<> umount() final;
mkfs_ertr::future<> mkfs(uuid_d new_osd_fsid) final;
seastar::future<store_statfs_t> stat() const final;
mkfs_ertr::future<> test_mkfs(uuid_d new_osd_fsid);
DeviceRef get_primary_device_ref() {
- ceph_assert(seastar::this_shard_id() == primary_core);
- return shard_stores.local().get_primary_device_ref();
+ return std::move(device);
}
seastar::future<> test_start(DeviceRef dev);
seastar::future<> prepare_meta(uuid_d new_osd_fsid);
- seastar::future<> _mkfs(uuid_d new_osd_fsid);
+ seastar::future<> set_secondaries();
private:
std::string root;
MDStoreRef mdstore;
+ DeviceRef device;
+ std::vector<DeviceRef> secondaries;
seastar::sharded<SeaStore::Shard> shard_stores;
};
namespace crimson::os::seastore {
+std::ostream& operator<<(std::ostream& out, const block_shard_info_t& sf)
+{
+ out << "("
+ << "size=" << sf.size
+ << ", segments=" <<sf.segments
+ << ", tracker_offset=" <<sf.tracker_offset
+ << ", first_segment_offset=" <<sf.first_segment_offset
+ <<")";
+ return out;
+}
+
std::ostream& operator<<(std::ostream& out, const block_sm_superblock_t& sb)
{
out << "superblock("
- << "size=" << sb.size
+ << "shard_num=" << sb.shard_num
<< ", segment_size=" << sb.segment_size
<< ", block_size=" << sb.block_size
- << ", segments=" << sb.segments
- << ", tracker_offset=" << sb.tracker_offset
- << ", first_segment_offset=" << sb.first_segment_offset
- << ", config=" << sb.config
+ << ", shard_info:";
+ for (auto &sf : sb.shard_infos) {
+ out << sf
+ << ",";
+ }
+ out << "config=" << sb.config
<< ")";
return out;
}
static_cast<size_t>(0),
[&](auto &nr_zones) {
return seastar::open_file_dma(
- device + "/block" + std::to_string(seastar::this_shard_id()),
+ device + "/block",
seastar::open_flags::rw
).then([&](auto file) {
return seastar::do_with(
if (nr_zones != 0) {
return std::make_unique<
segment_manager::zns::ZNSSegmentManager
- >(device + "/block" + std::to_string(seastar::this_shard_id()));
+ >(device + "/block");
} else {
return std::make_unique<
segment_manager::block::BlockSegmentManager
- >(device + "/block" + std::to_string(seastar::this_shard_id()), dtype);
+ >(device + "/block", dtype);
}
});
});
return seastar::make_ready_future<crimson::os::seastore::SegmentManagerRef>(
std::make_unique<
segment_manager::block::BlockSegmentManager
- >(device + "/block" + std::to_string(seastar::this_shard_id()), dtype));
+ >(device + "/block", dtype));
#endif
}
namespace crimson::os::seastore {
+using std::vector;
+struct block_shard_info_t {
+ std::size_t size;
+ std::size_t segments;
+ uint64_t tracker_offset;
+ uint64_t first_segment_offset;
+
+ DENC(block_shard_info_t, v, p) {
+ DENC_START(1, 1, p);
+ denc(v.size, p);
+ denc(v.segments, p);
+ denc(v.tracker_offset, p);
+ denc(v.first_segment_offset, p);
+ DENC_FINISH(p);
+ }
+};
+
struct block_sm_superblock_t {
- size_t size = 0;
+ unsigned int shard_num = 0;
size_t segment_size = 0;
size_t block_size = 0;
- size_t segments = 0;
- uint64_t tracker_offset = 0;
- uint64_t first_segment_offset = 0;
+ std::vector<block_shard_info_t> shard_infos;
device_config_t config;
DENC(block_sm_superblock_t, v, p) {
DENC_START(1, 1, p);
- denc(v.size, p);
+ denc(v.shard_num, p);
denc(v.segment_size, p);
denc(v.block_size, p);
- denc(v.segments, p);
- denc(v.tracker_offset, p);
- denc(v.first_segment_offset, p);
+ denc(v.shard_infos, p);
denc(v.config, p);
DENC_FINISH(p);
}
void validate() const {
+ ceph_assert(shard_num == seastar::smp::count);
ceph_assert(block_size > 0);
ceph_assert(segment_size > 0 &&
segment_size % block_size == 0);
ceph_assert_always(segment_size <= SEGMENT_OFF_MAX);
- ceph_assert(size > segment_size &&
- size % block_size == 0);
- ceph_assert_always(size <= DEVICE_OFF_MAX);
- ceph_assert(segments > 0);
- ceph_assert_always(segments <= DEVICE_SEGMENT_ID_MAX);
- ceph_assert(tracker_offset > 0 &&
- tracker_offset % block_size == 0);
- ceph_assert(first_segment_offset > tracker_offset &&
- first_segment_offset % block_size == 0);
+ for (unsigned int i = 0; i < seastar::smp::count; i ++) {
+ ceph_assert(shard_infos[i].size > segment_size &&
+ shard_infos[i].size % block_size == 0);
+ ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX);
+ ceph_assert(shard_infos[i].segments > 0);
+ ceph_assert_always(shard_infos[i].segments <= DEVICE_SEGMENT_ID_MAX);
+ ceph_assert(shard_infos[i].tracker_offset > 0 &&
+ shard_infos[i].tracker_offset % block_size == 0);
+ ceph_assert(shard_infos[i].first_segment_offset > shard_infos[i].tracker_offset &&
+ shard_infos[i].first_segment_offset % block_size == 0);
+ }
ceph_assert(config.spec.magic != 0);
ceph_assert(get_default_backend_of_device(config.spec.dtype) ==
backend_type_t::SEGMENTED);
}
};
+std::ostream& operator<<(std::ostream&, const block_shard_info_t&);
std::ostream& operator<<(std::ostream&, const block_sm_superblock_t&);
class Segment : public boost::intrusive_ref_counter<
}
+WRITE_CLASS_DENC(
+ crimson::os::seastore::block_shard_info_t
+)
WRITE_CLASS_DENC(
crimson::os::seastore::block_sm_superblock_t
)
#if FMT_VERSION >= 90000
+template <> struct fmt::formatter<crimson::os::seastore::block_shard_info_t> : fmt::ostream_formatter {};
template <> struct fmt::formatter<crimson::os::seastore::block_sm_superblock_t> : fmt::ostream_formatter {};
#endif
bptr.length(),
bptr);
}
-
+using std::vector;
static
block_sm_superblock_t make_superblock(
device_id_t device_id,
using crimson::common::get_conf;
auto config_size = get_conf<Option::size_t>(
- "seastore_device_size")/seastar::smp::count;
+ "seastore_device_size");
size_t size = (data.size == 0) ? config_size : data.size;
auto config_segment_size = get_conf<Option::size_t>(
"seastore_segment_size");
size_t raw_segments = size / config_segment_size;
- size_t tracker_size = SegmentStateTracker::get_raw_size(
- raw_segments,
+ size_t shard_tracker_size = SegmentStateTracker::get_raw_size(
+ raw_segments / seastar::smp::count,
data.block_size);
- size_t tracker_off = data.block_size;
- size_t first_seg_off = tracker_size + tracker_off;
- size_t segments = (size - first_seg_off) / config_segment_size;
- size_t available_size = segments * config_segment_size;
+ size_t total_tracker_size = shard_tracker_size * seastar::smp::count;
+ size_t tracker_off = data.block_size; //superblock
+ size_t segments = (size - tracker_off - total_tracker_size) / config_segment_size;
+ size_t segments_per_shard = segments / seastar::smp::count;
+
+ vector<block_shard_info_t> shard_infos(seastar::smp::count);
+ for (unsigned int i = 0; i < seastar::smp::count; i++) {
+ shard_infos[i].size = segments_per_shard * config_segment_size;
+ shard_infos[i].segments = segments_per_shard;
+ shard_infos[i].tracker_offset = tracker_off + i * shard_tracker_size;
+ shard_infos[i].first_segment_offset = tracker_off + total_tracker_size
+ + i * segments_per_shard * config_segment_size;
+ }
- INFO("{} disk_size={}, available_size={}, segment_size={}, segments={}, "
- "block_size={}, tracker_off={}, first_seg_off={}",
+ INFO("{} disk_size={}, segment_size={}, block_size={}",
device_id_printer_t{device_id},
size,
- available_size,
config_segment_size,
- segments,
- data.block_size,
- tracker_off,
- first_seg_off);
+ data.block_size);
+ for (unsigned int i = 0; i < seastar::smp::count; i++) {
+ INFO("shard {} infos:", i, shard_infos[i]);
+ }
return block_sm_superblock_t{
- available_size,
+ seastar::smp::count,
config_segment_size,
data.block_size,
- segments,
- tracker_off,
- first_seg_off,
+ shard_infos,
std::move(sm_config)
};
}
stats.closed_segments_unused_bytes += unused_bytes;
stats.metadata_write.increment(tracker->get_size());
return tracker->write_out(
- get_device_id(), device, superblock.tracker_offset);
+ get_device_id(), device,
+ shard_info.tracker_offset);
}
Segment::write_ertr::future<> BlockSegmentManager::segment_write(
BlockSegmentManager::mount_ret BlockSegmentManager::mount()
{
- LOG_PREFIX(BlockSegmentManager::mount);
+ return shard_devices.invoke_on_all([](auto &local_device) {
+ return local_device.shard_mount(
+ ).handle_error(
+ crimson::ct_error::assert_all{
+ "Invalid error in BlockSegmentManager::mount"
+ });
+ });
+}
+
+BlockSegmentManager::mount_ret BlockSegmentManager::shard_mount()
+{
+ LOG_PREFIX(BlockSegmentManager::shard_mount);
return open_device(
device_path
).safe_then([=, this](auto p) {
return read_superblock(device, sd);
}).safe_then([=, this](auto sb) {
set_device_id(sb.config.spec.id);
- INFO("{} read {}", device_id_printer_t{get_device_id()}, sb);
+ shard_info = sb.shard_infos[seastar::this_shard_id()];
+ INFO("{} read {}", device_id_printer_t{get_device_id()}, shard_info);
sb.validate();
superblock = sb;
stats.data_read.increment(
ceph::encoded_sizeof<block_sm_superblock_t>(superblock));
tracker = std::make_unique<SegmentStateTracker>(
- superblock.segments,
+ shard_info.segments,
superblock.block_size);
stats.data_read.increment(tracker->get_size());
return tracker->read_in(
get_device_id(),
device,
- superblock.tracker_offset
+ shard_info.tracker_offset
).safe_then([this] {
for (device_segment_id_t i = 0; i < tracker->get_capacity(); ++i) {
if (tracker->get(i) == segment_state_t::OPEN) {
}
stats.metadata_write.increment(tracker->get_size());
return tracker->write_out(
- get_device_id(), device, superblock.tracker_offset);
+ get_device_id(), device,
+ shard_info.tracker_offset);
});
}).safe_then([this, FNAME] {
INFO("{} complete", device_id_printer_t{get_device_id()});
BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs(
device_config_t sm_config)
{
- LOG_PREFIX(BlockSegmentManager::mkfs);
+ return shard_devices.local().primary_mkfs(sm_config
+ ).safe_then([this] {
+ return shard_devices.invoke_on_all([](auto &local_device) {
+ return local_device.shard_mkfs(
+ ).handle_error(
+ crimson::ct_error::assert_all{
+ "Invalid error in BlockSegmentManager::mkfs"
+ });
+ });
+ });
+}
+
+BlockSegmentManager::mkfs_ret BlockSegmentManager::primary_mkfs(
+ device_config_t sm_config)
+{
+ LOG_PREFIX(BlockSegmentManager::primary_mkfs);
ceph_assert(sm_config.spec.dtype == superblock.config.spec.dtype);
set_device_id(sm_config.spec.id);
INFO("{} path={}, {}",
check_create_device_ret maybe_create = check_create_device_ertr::now();
using crimson::common::get_conf;
if (get_conf<bool>("seastore_block_create")) {
- auto size =
- get_conf<Option::size_t>("seastore_device_size")/seastar::smp::count;
+ auto size = get_conf<Option::size_t>("seastore_device_size");
maybe_create = check_create_device(device_path, size);
}
stats.metadata_write.increment(
ceph::encoded_sizeof<block_sm_superblock_t>(sb));
return write_superblock(get_device_id(), device, sb);
- }).safe_then([&, FNAME, this] {
- DEBUG("{} superblock written", device_id_printer_t{get_device_id()});
- tracker.reset(new SegmentStateTracker(sb.segments, sb.block_size));
- stats.metadata_write.increment(tracker->get_size());
- return tracker->write_out(
- get_device_id(), device, sb.tracker_offset);
}).finally([&] {
return device.close();
}).safe_then([FNAME, this] {
});
}
+BlockSegmentManager::mkfs_ret BlockSegmentManager::shard_mkfs()
+{
+ LOG_PREFIX(BlockSegmentManager::shard_mkfs);
+ return open_device(
+ device_path
+ ).safe_then([this](auto p) {
+ device = std::move(p.first);
+ auto sd = p.second;
+ return read_superblock(device, sd);
+ }).safe_then([this, FNAME](auto sb) {
+ set_device_id(sb.config.spec.id);
+ shard_info = sb.shard_infos[seastar::this_shard_id()];
+ INFO("{} read {}", device_id_printer_t{get_device_id()}, shard_info);
+ sb.validate();
+ tracker.reset(new SegmentStateTracker(
+ shard_info.segments, sb.block_size));
+ stats.metadata_write.increment(tracker->get_size());
+ return tracker->write_out(
+ get_device_id(), device,
+ shard_info.tracker_offset);
+ }).finally([this] {
+ return device.close();
+ }).safe_then([FNAME, this] {
+ INFO("{} complete", device_id_printer_t{get_device_id()});
+ return mkfs_ertr::now();
+ });
+}
+
BlockSegmentManager::close_ertr::future<> BlockSegmentManager::close()
{
LOG_PREFIX(BlockSegmentManager::close);
tracker->set(s_id, segment_state_t::OPEN);
stats.metadata_write.increment(tracker->get_size());
return tracker->write_out(
- get_device_id(), device, superblock.tracker_offset
+ get_device_id(), device,
+ shard_info.tracker_offset
).safe_then([this, id, FNAME] {
++stats.opened_segments;
DEBUG("{} done", id);
++stats.released_segments;
stats.metadata_write.increment(tracker->get_size());
return tracker->write_out(
- get_device_id(), device, superblock.tracker_offset);
+ get_device_id(), device,
+ shard_info.tracker_offset);
}
SegmentManager::read_ertr::future<> BlockSegmentManager::read(
* state analagous to that of the segments of a zns device.
*/
class BlockSegmentManager final : public SegmentManager {
+// interfaces used by Device
public:
+ seastar::future<> start() {
+ return shard_devices.start(device_path, superblock.config.spec.dtype);
+ }
+
+ seastar::future<> stop() {
+ return shard_devices.stop();
+ }
+
+ Device& get_sharded_device() final {
+ return shard_devices.local();
+ }
mount_ret mount() final;
mkfs_ret mkfs(device_config_t) final;
-
+// interfaces used by each shard device
+public:
close_ertr::future<> close();
BlockSegmentManager(
return superblock.config.spec.dtype;
}
size_t get_available_size() const final {
- return superblock.size;
+ return shard_info.size;
}
extent_len_t get_block_size() const {
return superblock.block_size;
std::string device_path;
std::unique_ptr<SegmentStateTracker> tracker;
+ block_shard_info_t shard_info;
block_sm_superblock_t superblock;
seastar::file device;
size_t get_offset(paddr_t addr) {
auto& seg_addr = addr.as_seg_paddr();
- return superblock.first_segment_offset +
+ return shard_info.first_segment_offset +
(seg_addr.get_segment_id().device_segment_id() * superblock.segment_size) +
seg_addr.get_segment_off();
}
Segment::close_ertr::future<> segment_close(
segment_id_t id, segment_off_t write_pointer);
+
+private:
+ // shard 0 mkfs
+ mkfs_ret primary_mkfs(device_config_t);
+ // all shards mkfs
+ mkfs_ret shard_mkfs();
+ // all shards mount
+ mount_ret shard_mount();
+
+ seastar::sharded<BlockSegmentManager> shard_devices;
};
}