From 72f564cc7d9fcbd5b853f75fa9fa9b35e7f7d000 Mon Sep 17 00:00:00 2001 From: chunmei Date: Tue, 4 Apr 2023 07:37:16 +0000 Subject: [PATCH] crimson/os: static assign segments to each shard and make device sharded Signed-off-by: chunmei (cherry picked from commit ad4f7bbf15785b851db8e7d6f8a8608a362b2f74) --- src/crimson/os/seastore/device.h | 77 +++-- src/crimson/os/seastore/seastore.cc | 269 ++++++++---------- src/crimson/os/seastore/seastore.h | 58 +--- src/crimson/os/seastore/segment_manager.cc | 31 +- src/crimson/os/seastore/segment_manager.h | 55 ++-- .../os/seastore/segment_manager/block.cc | 129 ++++++--- .../os/seastore/segment_manager/block.h | 30 +- 7 files changed, 377 insertions(+), 272 deletions(-) diff --git a/src/crimson/os/seastore/device.h b/src/crimson/os/seastore/device.h index c49d1ff6aa205..2cdc5d02e9bf6 100644 --- a/src/crimson/os/seastore/device.h +++ b/src/crimson/os/seastore/device.h @@ -45,6 +45,34 @@ struct device_config_t { denc(v.secondary_devices, p); DENC_FINISH(p); } + static device_config_t create_primary( + uuid_d new_osd_fsid, + device_id_t id, + device_type_t d_type, + secondary_device_set_t sds) { + return device_config_t{ + true, + device_spec_t{ + (magic_t)std::rand(), + d_type, + id}, + seastore_meta_t{new_osd_fsid}, + sds}; + } + static device_config_t create_secondary( + uuid_d new_osd_fsid, + device_id_t id, + device_type_t d_type, + magic_t magic) { + return device_config_t{ + false, + device_spec_t{ + magic, + d_type, + id}, + seastore_meta_t{new_osd_fsid}, + secondary_device_set_t()}; + } }; std::ostream& operator<<(std::ostream&, const device_config_t&); @@ -58,9 +86,41 @@ using DeviceRef = std::unique_ptr; * Represents a general device regardless of the underlying medium. */ class Device { +// interfaces used by device public: virtual ~Device() {} + virtual seastar::future<> start() { + return seastar::now(); + } + + virtual seastar::future<> stop() { + return seastar::now(); + } + // called on the shard to get this shard device; + virtual Device& get_sharded_device() { + return *this; + } + + using access_ertr = crimson::errorator< + crimson::ct_error::input_output_error, + crimson::ct_error::permission_denied, + crimson::ct_error::enoent>; + + using mkfs_ertr = access_ertr; + using mkfs_ret = mkfs_ertr::future<>; + virtual mkfs_ret mkfs(device_config_t) = 0; + + using mount_ertr = access_ertr; + using mount_ret = access_ertr::future<>; + virtual mount_ret mount() = 0; + + static seastar::future make_device( + const std::string &device, + device_type_t dtype); + +// interfaces used by each device shard +public: virtual device_id_t get_device_id() const = 0; virtual magic_t get_magic() const = 0; @@ -77,19 +137,6 @@ public: virtual secondary_device_set_t& get_secondary_devices() = 0; - using access_ertr = crimson::errorator< - crimson::ct_error::input_output_error, - crimson::ct_error::permission_denied, - crimson::ct_error::enoent>; - - using mkfs_ertr = access_ertr; - using mkfs_ret = mkfs_ertr::future<>; - virtual mkfs_ret mkfs(device_config_t) = 0; - - using mount_ertr = access_ertr; - using mount_ret = access_ertr::future<>; - virtual mount_ret mount() = 0; - using close_ertr = crimson::errorator< crimson::ct_error::input_output_error>; virtual close_ertr::future<> close() = 0; @@ -115,10 +162,6 @@ public: return read_ertr::make_ready_future(std::move(*ptrref)); }); } - - static seastar::future make_device( - const std::string &device, - device_type_t dtype); }; } diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 95cef4d1d0e25..b44d6696701df 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -124,7 +124,7 @@ SeaStore::Shard::Shard( throttler( get_conf("seastore_max_concurrent_transactions")) { - device.reset(dev); + device = &(dev->get_sharded_device()); register_metrics(); } @@ -200,67 +200,60 @@ seastar::future<> SeaStore::start() #else bool is_test = false; #endif - return shard_stores.start(root, nullptr, is_test) - .then([this] { - return shard_stores.invoke_on_all([](auto& local_store) { - return local_store.make_shard_stores(); - }); + using crimson::common::get_conf; + std::string type = get_conf("seastore_main_device_type"); + device_type_t d_type = string_to_device_type(type); + assert(d_type == device_type_t::SSD || + d_type == device_type_t::RANDOM_BLOCK_SSD); + + ceph_assert(root != ""); + return Device::make_device(root, d_type + ).then([this](DeviceRef device_obj) { + device = std::move(device_obj); + return device->start(); + }).then([this, is_test] { + ceph_assert(device); + return shard_stores.start(root, device.get(), is_test); }); } -seastar::future<> SeaStore::test_start(DeviceRef device) +seastar::future<> SeaStore::test_start(DeviceRef device_obj) { - if (device) { - ceph_assert(root == ""); - return shard_stores.start_single(root, device.release(), true); - } else { - ceph_assert(0 == "impossible no device"); - } + ceph_assert(device_obj); + ceph_assert(root == ""); + device = std::move(device_obj); + return shard_stores.start_single(root, device.get(), true); } - seastar::future<> SeaStore::stop() { ceph_assert(seastar::this_shard_id() == primary_core); - return shard_stores.stop(); -} - -seastar::future<> SeaStore::Shard::make_shard_stores() -{ - if (root != "") { - using crimson::common::get_conf; - std::string type = get_conf("seastore_main_device_type"); - device_type_t d_type = string_to_device_type(type); - assert(d_type == device_type_t::SSD || - d_type == device_type_t::RANDOM_BLOCK_SSD); - - return Device::make_device( - root, d_type - ).then([this](DeviceRef device_obj) { - device = std::move(device_obj); - }); - } - return seastar::now(); + return seastar::do_for_each(secondaries, [](auto& sec_dev) { + return sec_dev->stop(); + }).then([this] { + secondaries.clear(); + if (device) { + return device->stop(); + } else { + return seastar::now(); + } + }).then([this] { + return shard_stores.stop(); + }); } SeaStore::mount_ertr::future<> SeaStore::test_mount() { - ceph_assert(seastar::this_shard_id() == primary_core); - shard_stores.local().init_managers(); - return shard_stores.local().get_transaction_manager()->mount( - ).handle_error( - crimson::ct_error::assert_all{ - "Invalid error in SeaStore::test_mount" - } - ); + return shard_stores.local().mount_managers(); } -SeaStore::mount_ertr::future<> SeaStore::Shard::mount() +SeaStore::mount_ertr::future<> SeaStore::mount() { + ceph_assert(seastar::this_shard_id() == primary_core); return device->mount( ).safe_then([this] { - auto sec_devices = device->get_secondary_devices(); + auto sec_devices = device->get_sharded_device().get_secondary_devices(); return crimson::do_for_each(sec_devices, [this](auto& device_entry) { device_id_t id = device_entry.first; magic_t magic = device_entry.second.magic; @@ -268,25 +261,49 @@ SeaStore::mount_ertr::future<> SeaStore::Shard::mount() std::string path = fmt::format("{}/block.{}.{}", root, dtype, std::to_string(id)); return Device::make_device(path, dtype - ).then([this, magic](DeviceRef sec_dev) { - return sec_dev->mount( - ).safe_then([this, sec_dev=std::move(sec_dev), magic]() mutable { - boost::ignore_unused(magic); // avoid clang warning; - assert(sec_dev->get_magic() == magic); - secondaries.emplace_back(std::move(sec_dev)); + ).then([this, path, magic](DeviceRef sec_dev) { + return sec_dev->start( + ).then([this, magic, sec_dev = std::move(sec_dev)]() mutable { + return sec_dev->mount( + ).safe_then([this, sec_dev=std::move(sec_dev), magic]() mutable { + boost::ignore_unused(magic); // avoid clang warning; + assert(sec_dev->get_sharded_device().get_magic() == magic); + secondaries.emplace_back(std::move(sec_dev)); + }); + }).safe_then([this] { + return set_secondaries(); }); }); + }).safe_then([this] { + return shard_stores.invoke_on_all([](auto &local_store) { + return local_store.mount_managers(); + }); }); - }).safe_then([this] { - init_managers(); - return transaction_manager->mount(); }).handle_error( crimson::ct_error::assert_all{ - "Invalid error in Shard::mount" + "Invalid error in SeaStore::mount" } ); } +seastar::future<> SeaStore::Shard::mount_managers() +{ + init_managers(); + return transaction_manager->mount( + ).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in mount_managers" + }); +} + +seastar::future<> SeaStore::umount() +{ + ceph_assert(seastar::this_shard_id() == primary_core); + return shard_stores.invoke_on_all([](auto &local_store) { + return local_store.umount(); + }); +} + seastar::future<> SeaStore::Shard::umount() { return [this] { @@ -367,75 +384,12 @@ SeaStore::Shard::mkfs_managers() ); } -seastar::future<> -SeaStore::Shard::mkfs( - secondary_device_set_t &sds, - uuid_d new_osd_fsid) -{ - device_type_t d_type = device->get_device_type(); - device_id_t id = (d_type == device_type_t::RANDOM_BLOCK_SSD) ? - static_cast(DEVICE_ID_RANDOM_BLOCK_MIN) : 0; - - return device->mkfs( - device_config_t{ - true, - device_spec_t{ - (magic_t)std::rand(), - d_type, - id}, - seastore_meta_t{new_osd_fsid}, - sds} - ).safe_then([this] { - return crimson::do_for_each(secondaries, [](auto& sec_dev) { - return sec_dev->mount(); - }); - }).safe_then([this] { - return device->mount(); - }).safe_then([this] { - return mkfs_managers(); - }).handle_error( - crimson::ct_error::assert_all{ - "Invalid error in SeaStore::Shard::mkfs" - } - ); -} - -seastar::future<> SeaStore::Shard::sec_mkfs( - const std::string path, - device_type_t dtype, - device_id_t id, - secondary_device_set_t &sds, - uuid_d new_osd_fsid) -{ - return Device::make_device(path, dtype - ).then([this, &sds, id, dtype, new_osd_fsid](DeviceRef sec_dev) { - magic_t magic = (magic_t)std::rand(); - sds.emplace( - (device_id_t)id, - device_spec_t{magic, dtype, (device_id_t)id}); - return sec_dev->mkfs( - device_config_t{ - false, - device_spec_t{ - magic, - dtype, - (device_id_t)id}, - seastore_meta_t{new_osd_fsid}, - secondary_device_set_t()} - ).safe_then([this, sec_dev=std::move(sec_dev), id]() mutable { - LOG_PREFIX(SeaStore::Shard::sec_mkfs); - DEBUG("mkfs: finished for device {}", id); - secondaries.emplace_back(std::move(sec_dev)); - }).handle_error(crimson::ct_error::assert_all{"not possible"}); - }); -} - -seastar::future<> SeaStore::_mkfs(uuid_d new_osd_fsid) +seastar::future<> SeaStore::set_secondaries() { - ceph_assert(seastar::this_shard_id() == primary_core); - return shard_stores.local().mkfs_managers( - ).then([this, new_osd_fsid] { - return prepare_meta(new_osd_fsid); + auto sec_dev_ite = secondaries.rbegin(); + Device* sec_dev = sec_dev_ite->get(); + return shard_stores.invoke_on_all([sec_dev](auto &local_store) { + local_store.set_secondaries(sec_dev->get_sharded_device()); }); } @@ -447,7 +401,10 @@ SeaStore::mkfs_ertr::future<> SeaStore::test_mkfs(uuid_d new_osd_fsid) if (done == 0) { return seastar::now(); } - return _mkfs(new_osd_fsid); + return shard_stores.local().mkfs_managers( + ).then([this, new_osd_fsid] { + return prepare_meta(new_osd_fsid); + }); }); } @@ -481,9 +438,8 @@ SeaStore::mkfs_ertr::future<> SeaStore::mkfs(uuid_d new_osd_fsid) return seastar::now(); } else { return seastar::do_with( - std::vector(), + secondary_device_set_t(), [this, new_osd_fsid](auto& sds) { - sds.resize(seastar::smp::count); auto fut = seastar::now(); LOG_PREFIX(SeaStore::mkfs); DEBUG("root: {}", root); @@ -510,15 +466,22 @@ SeaStore::mkfs_ertr::future<> SeaStore::mkfs(uuid_d new_osd_fsid) } auto id = std::stoi(entry_name.substr(dtype_end + 1)); std::string path = fmt::format("{}/{}", root, entry_name); - return shard_stores.invoke_on_all( - [&sds, id, path, dtype, new_osd_fsid] - (auto &local_store) { - return local_store.sec_mkfs( - path, - dtype, - id, - sds[seastar::this_shard_id()], - new_osd_fsid); + return Device::make_device(path, dtype + ).then([this, &sds, id, dtype, new_osd_fsid](DeviceRef sec_dev) { + auto p_sec_dev = sec_dev.get(); + secondaries.emplace_back(std::move(sec_dev)); + return p_sec_dev->start( + ).then([&sds, id, dtype, new_osd_fsid, p_sec_dev]() { + magic_t magic = (magic_t)std::rand(); + sds.emplace( + (device_id_t)id, + device_spec_t{magic, dtype, (device_id_t)id}); + return p_sec_dev->mkfs(device_config_t::create_secondary( + new_osd_fsid, id, dtype, magic) + ).handle_error(crimson::ct_error::assert_all{"not possible"}); + }); + }).then([this] { + return set_secondaries(); }); } return seastar::now(); @@ -527,17 +490,37 @@ SeaStore::mkfs_ertr::future<> SeaStore::mkfs(uuid_d new_osd_fsid) }); } return fut.then([this, &sds, new_osd_fsid] { - return shard_stores.invoke_on_all( - [&sds, new_osd_fsid](auto &local_store) { - return local_store.mkfs( - sds[seastar::this_shard_id()], new_osd_fsid); + device_id_t id = 0; + device_type_t d_type = device->get_device_type(); + assert(d_type == device_type_t::SSD || + d_type == device_type_t::RANDOM_BLOCK_SSD); + if (d_type == device_type_t::RANDOM_BLOCK_SSD) { + id = static_cast(DEVICE_ID_RANDOM_BLOCK_MIN); + } + + return device->mkfs( + device_config_t::create_primary(new_osd_fsid, id, d_type, sds) + ); + }).safe_then([this] { + return crimson::do_for_each(secondaries, [](auto& sec_dev) { + return sec_dev->mount(); }); }); - }).then([this, new_osd_fsid] { + }).safe_then([this] { + return device->mount(); + }).safe_then([this] { + return shard_stores.invoke_on_all([] (auto &local_store) { + return local_store.mkfs_managers(); + }); + }).safe_then([this, new_osd_fsid] { return prepare_meta(new_osd_fsid); - }).then([this] { + }).safe_then([this] { return umount(); - }); + }).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in SeaStore::mkfs" + } + ); } }); } @@ -2057,12 +2040,8 @@ void SeaStore::Shard::init_managers() collection_manager.reset(); onode_manager.reset(); - std::vector sec_devices; - for (auto &dev : secondaries) { - sec_devices.emplace_back(dev.get()); - } transaction_manager = make_transaction_manager( - device.get(), sec_devices, is_test); + device, secondaries, is_test); collection_manager = std::make_unique( *transaction_manager); onode_manager = std::make_unique( diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h index 8e43f275f18fc..df4323df55736 100644 --- a/src/crimson/os/seastore/seastore.h +++ b/src/crimson/os/seastore/seastore.h @@ -173,13 +173,13 @@ public: // only exposed to SeaStore public: - mount_ertr::future<> mount(); - seastar::future<> umount(); + // init managers and mount transaction_manager + seastar::future<> mount_managers(); - seastar::future<> mkfs( - secondary_device_set_t &sds, - uuid_d new_osd_fsid); + void set_secondaries(Device& sec_dev) { + secondaries.emplace_back(&sec_dev); + } using coll_core_t = FuturizedStore::coll_core_t; seastar::future> list_collections(); @@ -190,28 +190,11 @@ public: store_statfs_t stat() const; uuid_d get_fsid() const; - // for each shard store make device - seastar::future<> make_shard_stores(); seastar::future<> mkfs_managers(); void init_managers(); - TransactionManagerRef& get_transaction_manager() { - return transaction_manager; - } - // for secondaries device mkfs - seastar::future<> sec_mkfs( - const std::string path, - device_type_t dtype, - device_id_t id, - secondary_device_set_t &sds, - uuid_d new_osd_fsid); - - DeviceRef get_primary_device_ref() { - return std::move(device); - } - private: struct internal_context_t { CollectionRef ch; @@ -452,11 +435,11 @@ public: private: std::string root; - DeviceRef device; + Device* device; const uint32_t max_object_size; bool is_test; - std::vector secondaries; + std::vector secondaries; TransactionManagerRef transaction_manager; CollectionManagerRef collection_manager; OnodeManagerRef onode_manager; @@ -476,24 +459,8 @@ public: seastar::future<> start() final; seastar::future<> stop() final; - mount_ertr::future<> mount() final { - ceph_assert(seastar::this_shard_id() == primary_core); - return shard_stores.invoke_on_all( - [](auto &local_store) { - return local_store.mount().handle_error( - crimson::ct_error::assert_all{ - "Invalid error in SeaStore::mount" - }); - }); - } - - seastar::future<> umount() final { - ceph_assert(seastar::this_shard_id() == primary_core); - return shard_stores.invoke_on_all( - [](auto &local_store) { - return local_store.umount(); - }); - } + mount_ertr::future<> mount() final; + seastar::future<> umount() final; mkfs_ertr::future<> mkfs(uuid_d new_osd_fsid) final; seastar::future stat() const final; @@ -532,8 +499,7 @@ public: mkfs_ertr::future<> test_mkfs(uuid_d new_osd_fsid); DeviceRef get_primary_device_ref() { - ceph_assert(seastar::this_shard_id() == primary_core); - return shard_stores.local().get_primary_device_ref(); + return std::move(device); } seastar::future<> test_start(DeviceRef dev); @@ -543,11 +509,13 @@ private: seastar::future<> prepare_meta(uuid_d new_osd_fsid); - seastar::future<> _mkfs(uuid_d new_osd_fsid); + seastar::future<> set_secondaries(); private: std::string root; MDStoreRef mdstore; + DeviceRef device; + std::vector secondaries; seastar::sharded shard_stores; }; diff --git a/src/crimson/os/seastore/segment_manager.cc b/src/crimson/os/seastore/segment_manager.cc index 44192965fcee2..098a9b068f8fe 100644 --- a/src/crimson/os/seastore/segment_manager.cc +++ b/src/crimson/os/seastore/segment_manager.cc @@ -13,16 +13,29 @@ SET_SUBSYS(seastore_device); namespace crimson::os::seastore { +std::ostream& operator<<(std::ostream& out, const block_shard_info_t& sf) +{ + out << "(" + << "size=" << sf.size + << ", segments=" <(0), [&](auto &nr_zones) { return seastar::open_file_dma( - device + "/block" + std::to_string(seastar::this_shard_id()), + device + "/block", seastar::open_flags::rw ).then([&](auto file) { return seastar::do_with( @@ -67,11 +80,11 @@ LOG_PREFIX(SegmentManager::get_segment_manager); if (nr_zones != 0) { return std::make_unique< segment_manager::zns::ZNSSegmentManager - >(device + "/block" + std::to_string(seastar::this_shard_id())); + >(device + "/block"); } else { return std::make_unique< segment_manager::block::BlockSegmentManager - >(device + "/block" + std::to_string(seastar::this_shard_id()), dtype); + >(device + "/block", dtype); } }); }); @@ -79,7 +92,7 @@ LOG_PREFIX(SegmentManager::get_segment_manager); return seastar::make_ready_future( std::make_unique< segment_manager::block::BlockSegmentManager - >(device + "/block" + std::to_string(seastar::this_shard_id()), dtype)); + >(device + "/block", dtype)); #endif } diff --git a/src/crimson/os/seastore/segment_manager.h b/src/crimson/os/seastore/segment_manager.h index b3e0d16184672..1669d124a6b8a 100644 --- a/src/crimson/os/seastore/segment_manager.h +++ b/src/crimson/os/seastore/segment_manager.h @@ -20,43 +20,59 @@ namespace crimson::os::seastore { +using std::vector; +struct block_shard_info_t { + std::size_t size; + std::size_t segments; + uint64_t tracker_offset; + uint64_t first_segment_offset; + + DENC(block_shard_info_t, v, p) { + DENC_START(1, 1, p); + denc(v.size, p); + denc(v.segments, p); + denc(v.tracker_offset, p); + denc(v.first_segment_offset, p); + DENC_FINISH(p); + } +}; + struct block_sm_superblock_t { - size_t size = 0; + unsigned int shard_num = 0; size_t segment_size = 0; size_t block_size = 0; - size_t segments = 0; - uint64_t tracker_offset = 0; - uint64_t first_segment_offset = 0; + std::vector shard_infos; device_config_t config; DENC(block_sm_superblock_t, v, p) { DENC_START(1, 1, p); - denc(v.size, p); + denc(v.shard_num, p); denc(v.segment_size, p); denc(v.block_size, p); - denc(v.segments, p); - denc(v.tracker_offset, p); - denc(v.first_segment_offset, p); + denc(v.shard_infos, p); denc(v.config, p); DENC_FINISH(p); } void validate() const { + ceph_assert(shard_num == seastar::smp::count); ceph_assert(block_size > 0); ceph_assert(segment_size > 0 && segment_size % block_size == 0); ceph_assert_always(segment_size <= SEGMENT_OFF_MAX); - ceph_assert(size > segment_size && - size % block_size == 0); - ceph_assert_always(size <= DEVICE_OFF_MAX); - ceph_assert(segments > 0); - ceph_assert_always(segments <= DEVICE_SEGMENT_ID_MAX); - ceph_assert(tracker_offset > 0 && - tracker_offset % block_size == 0); - ceph_assert(first_segment_offset > tracker_offset && - first_segment_offset % block_size == 0); + for (unsigned int i = 0; i < seastar::smp::count; i ++) { + ceph_assert(shard_infos[i].size > segment_size && + shard_infos[i].size % block_size == 0); + ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX); + ceph_assert(shard_infos[i].segments > 0); + ceph_assert_always(shard_infos[i].segments <= DEVICE_SEGMENT_ID_MAX); + ceph_assert(shard_infos[i].tracker_offset > 0 && + shard_infos[i].tracker_offset % block_size == 0); + ceph_assert(shard_infos[i].first_segment_offset > shard_infos[i].tracker_offset && + shard_infos[i].first_segment_offset % block_size == 0); + } ceph_assert(config.spec.magic != 0); ceph_assert(get_default_backend_of_device(config.spec.dtype) == backend_type_t::SEGMENTED); @@ -75,6 +91,7 @@ struct block_sm_superblock_t { } }; +std::ostream& operator<<(std::ostream&, const block_shard_info_t&); std::ostream& operator<<(std::ostream&, const block_sm_superblock_t&); class Segment : public boost::intrusive_ref_counter< @@ -186,10 +203,14 @@ public: } +WRITE_CLASS_DENC( + crimson::os::seastore::block_shard_info_t +) WRITE_CLASS_DENC( crimson::os::seastore::block_sm_superblock_t ) #if FMT_VERSION >= 90000 +template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; #endif diff --git a/src/crimson/os/seastore/segment_manager/block.cc b/src/crimson/os/seastore/segment_manager/block.cc index 8ed119cb9f707..ca1060d09816d 100644 --- a/src/crimson/os/seastore/segment_manager/block.cc +++ b/src/crimson/os/seastore/segment_manager/block.cc @@ -195,7 +195,7 @@ SegmentStateTracker::read_in( bptr.length(), bptr); } - +using std::vector; static block_sm_superblock_t make_superblock( device_id_t device_id, @@ -206,39 +206,44 @@ block_sm_superblock_t make_superblock( using crimson::common::get_conf; auto config_size = get_conf( - "seastore_device_size")/seastar::smp::count; + "seastore_device_size"); size_t size = (data.size == 0) ? config_size : data.size; auto config_segment_size = get_conf( "seastore_segment_size"); size_t raw_segments = size / config_segment_size; - size_t tracker_size = SegmentStateTracker::get_raw_size( - raw_segments, + size_t shard_tracker_size = SegmentStateTracker::get_raw_size( + raw_segments / seastar::smp::count, data.block_size); - size_t tracker_off = data.block_size; - size_t first_seg_off = tracker_size + tracker_off; - size_t segments = (size - first_seg_off) / config_segment_size; - size_t available_size = segments * config_segment_size; + size_t total_tracker_size = shard_tracker_size * seastar::smp::count; + size_t tracker_off = data.block_size; //superblock + size_t segments = (size - tracker_off - total_tracker_size) / config_segment_size; + size_t segments_per_shard = segments / seastar::smp::count; + + vector shard_infos(seastar::smp::count); + for (unsigned int i = 0; i < seastar::smp::count; i++) { + shard_infos[i].size = segments_per_shard * config_segment_size; + shard_infos[i].segments = segments_per_shard; + shard_infos[i].tracker_offset = tracker_off + i * shard_tracker_size; + shard_infos[i].first_segment_offset = tracker_off + total_tracker_size + + i * segments_per_shard * config_segment_size; + } - INFO("{} disk_size={}, available_size={}, segment_size={}, segments={}, " - "block_size={}, tracker_off={}, first_seg_off={}", + INFO("{} disk_size={}, segment_size={}, block_size={}", device_id_printer_t{device_id}, size, - available_size, config_segment_size, - segments, - data.block_size, - tracker_off, - first_seg_off); + data.block_size); + for (unsigned int i = 0; i < seastar::smp::count; i++) { + INFO("shard {} infos:", i, shard_infos[i]); + } return block_sm_superblock_t{ - available_size, + seastar::smp::count, config_segment_size, data.block_size, - segments, - tracker_off, - first_seg_off, + shard_infos, std::move(sm_config) }; } @@ -449,7 +454,8 @@ Segment::close_ertr::future<> BlockSegmentManager::segment_close( stats.closed_segments_unused_bytes += unused_bytes; stats.metadata_write.increment(tracker->get_size()); return tracker->write_out( - get_device_id(), device, superblock.tracker_offset); + get_device_id(), device, + shard_info.tracker_offset); } Segment::write_ertr::future<> BlockSegmentManager::segment_write( @@ -474,7 +480,18 @@ BlockSegmentManager::~BlockSegmentManager() BlockSegmentManager::mount_ret BlockSegmentManager::mount() { - LOG_PREFIX(BlockSegmentManager::mount); + return shard_devices.invoke_on_all([](auto &local_device) { + return local_device.shard_mount( + ).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in BlockSegmentManager::mount" + }); + }); +} + +BlockSegmentManager::mount_ret BlockSegmentManager::shard_mount() +{ + LOG_PREFIX(BlockSegmentManager::shard_mount); return open_device( device_path ).safe_then([=, this](auto p) { @@ -483,19 +500,20 @@ BlockSegmentManager::mount_ret BlockSegmentManager::mount() return read_superblock(device, sd); }).safe_then([=, this](auto sb) { set_device_id(sb.config.spec.id); - INFO("{} read {}", device_id_printer_t{get_device_id()}, sb); + shard_info = sb.shard_infos[seastar::this_shard_id()]; + INFO("{} read {}", device_id_printer_t{get_device_id()}, shard_info); sb.validate(); superblock = sb; stats.data_read.increment( ceph::encoded_sizeof(superblock)); tracker = std::make_unique( - superblock.segments, + shard_info.segments, superblock.block_size); stats.data_read.increment(tracker->get_size()); return tracker->read_in( get_device_id(), device, - superblock.tracker_offset + shard_info.tracker_offset ).safe_then([this] { for (device_segment_id_t i = 0; i < tracker->get_capacity(); ++i) { if (tracker->get(i) == segment_state_t::OPEN) { @@ -504,7 +522,8 @@ BlockSegmentManager::mount_ret BlockSegmentManager::mount() } stats.metadata_write.increment(tracker->get_size()); return tracker->write_out( - get_device_id(), device, superblock.tracker_offset); + get_device_id(), device, + shard_info.tracker_offset); }); }).safe_then([this, FNAME] { INFO("{} complete", device_id_printer_t{get_device_id()}); @@ -515,7 +534,22 @@ BlockSegmentManager::mount_ret BlockSegmentManager::mount() BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs( device_config_t sm_config) { - LOG_PREFIX(BlockSegmentManager::mkfs); + return shard_devices.local().primary_mkfs(sm_config + ).safe_then([this] { + return shard_devices.invoke_on_all([](auto &local_device) { + return local_device.shard_mkfs( + ).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in BlockSegmentManager::mkfs" + }); + }); + }); +} + +BlockSegmentManager::mkfs_ret BlockSegmentManager::primary_mkfs( + device_config_t sm_config) +{ + LOG_PREFIX(BlockSegmentManager::primary_mkfs); ceph_assert(sm_config.spec.dtype == superblock.config.spec.dtype); set_device_id(sm_config.spec.id); INFO("{} path={}, {}", @@ -530,8 +564,7 @@ BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs( check_create_device_ret maybe_create = check_create_device_ertr::now(); using crimson::common::get_conf; if (get_conf("seastore_block_create")) { - auto size = - get_conf("seastore_device_size")/seastar::smp::count; + auto size = get_conf("seastore_device_size"); maybe_create = check_create_device(device_path, size); } @@ -544,12 +577,6 @@ BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs( stats.metadata_write.increment( ceph::encoded_sizeof(sb)); return write_superblock(get_device_id(), device, sb); - }).safe_then([&, FNAME, this] { - DEBUG("{} superblock written", device_id_printer_t{get_device_id()}); - tracker.reset(new SegmentStateTracker(sb.segments, sb.block_size)); - stats.metadata_write.increment(tracker->get_size()); - return tracker->write_out( - get_device_id(), device, sb.tracker_offset); }).finally([&] { return device.close(); }).safe_then([FNAME, this] { @@ -559,6 +586,34 @@ BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs( }); } +BlockSegmentManager::mkfs_ret BlockSegmentManager::shard_mkfs() +{ + LOG_PREFIX(BlockSegmentManager::shard_mkfs); + return open_device( + device_path + ).safe_then([this](auto p) { + device = std::move(p.first); + auto sd = p.second; + return read_superblock(device, sd); + }).safe_then([this, FNAME](auto sb) { + set_device_id(sb.config.spec.id); + shard_info = sb.shard_infos[seastar::this_shard_id()]; + INFO("{} read {}", device_id_printer_t{get_device_id()}, shard_info); + sb.validate(); + tracker.reset(new SegmentStateTracker( + shard_info.segments, sb.block_size)); + stats.metadata_write.increment(tracker->get_size()); + return tracker->write_out( + get_device_id(), device, + shard_info.tracker_offset); + }).finally([this] { + return device.close(); + }).safe_then([FNAME, this] { + INFO("{} complete", device_id_printer_t{get_device_id()}); + return mkfs_ertr::now(); + }); +} + BlockSegmentManager::close_ertr::future<> BlockSegmentManager::close() { LOG_PREFIX(BlockSegmentManager::close); @@ -589,7 +644,8 @@ SegmentManager::open_ertr::future BlockSegmentManager::open( tracker->set(s_id, segment_state_t::OPEN); stats.metadata_write.increment(tracker->get_size()); return tracker->write_out( - get_device_id(), device, superblock.tracker_offset + get_device_id(), device, + shard_info.tracker_offset ).safe_then([this, id, FNAME] { ++stats.opened_segments; DEBUG("{} done", id); @@ -622,7 +678,8 @@ SegmentManager::release_ertr::future<> BlockSegmentManager::release( ++stats.released_segments; stats.metadata_write.increment(tracker->get_size()); return tracker->write_out( - get_device_id(), device, superblock.tracker_offset); + get_device_id(), device, + shard_info.tracker_offset); } SegmentManager::read_ertr::future<> BlockSegmentManager::read( diff --git a/src/crimson/os/seastore/segment_manager/block.h b/src/crimson/os/seastore/segment_manager/block.h index 4fa715ba71ff4..495d0d104516a 100644 --- a/src/crimson/os/seastore/segment_manager/block.h +++ b/src/crimson/os/seastore/segment_manager/block.h @@ -110,11 +110,24 @@ public: * state analagous to that of the segments of a zns device. */ class BlockSegmentManager final : public SegmentManager { +// interfaces used by Device public: + seastar::future<> start() { + return shard_devices.start(device_path, superblock.config.spec.dtype); + } + + seastar::future<> stop() { + return shard_devices.stop(); + } + + Device& get_sharded_device() final { + return shard_devices.local(); + } mount_ret mount() final; mkfs_ret mkfs(device_config_t) final; - +// interfaces used by each shard device +public: close_ertr::future<> close(); BlockSegmentManager( @@ -140,7 +153,7 @@ public: return superblock.config.spec.dtype; } size_t get_available_size() const final { - return superblock.size; + return shard_info.size; } extent_len_t get_block_size() const { return superblock.block_size; @@ -205,6 +218,7 @@ private: std::string device_path; std::unique_ptr tracker; + block_shard_info_t shard_info; block_sm_superblock_t superblock; seastar::file device; @@ -218,7 +232,7 @@ private: size_t get_offset(paddr_t addr) { auto& seg_addr = addr.as_seg_paddr(); - return superblock.first_segment_offset + + return shard_info.first_segment_offset + (seg_addr.get_segment_id().device_segment_id() * superblock.segment_size) + seg_addr.get_segment_off(); } @@ -233,6 +247,16 @@ private: Segment::close_ertr::future<> segment_close( segment_id_t id, segment_off_t write_pointer); + +private: + // shard 0 mkfs + mkfs_ret primary_mkfs(device_config_t); + // all shards mkfs + mkfs_ret shard_mkfs(); + // all shards mount + mount_ret shard_mount(); + + seastar::sharded shard_devices; }; } -- 2.39.5