From: Kefu Chai Date: Wed, 23 Jan 2019 05:49:37 +0000 (+0800) Subject: crimson/osd: load/store osdmap from/to store X-Git-Tag: v14.1.0~263^2~6 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=3b0db1db56cd3135aff1d2a65d01a30f40b359f3;p=ceph-ci.git crimson/osd: load/store osdmap from/to store Signed-off-by: Kefu Chai --- diff --git a/src/crimson/osd/CMakeLists.txt b/src/crimson/osd/CMakeLists.txt index 9121bc1025d..6bf330eebaa 100644 --- a/src/crimson/osd/CMakeLists.txt +++ b/src/crimson/osd/CMakeLists.txt @@ -1,6 +1,7 @@ add_executable(crimson-osd + chained_dispatchers.cc main.cc osd.cc - chained_dispatchers.cc) + osd_meta.cc) target_link_libraries(crimson-osd crimson-common crimson-os crimson) diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 062b7b74188..a0e8a2cb773 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -9,6 +9,7 @@ #include "crimson/os/cyan_object.h" #include "crimson/os/cyan_store.h" #include "crimson/os/Transaction.h" +#include "crimson/osd/osd_meta.h" namespace { seastar::logger& logger() { @@ -94,14 +95,12 @@ seastar::future<> OSD::mkfs(uuid_d cluster_fsid) superblock.whoami = whoami; superblock.compat_features = get_osd_initial_compat_set(); - bufferlist bl; - encode(superblock, bl); - - auto ch = store->create_new_collection(coll_t::meta()); + meta_coll = make_unique( + store->create_new_collection(coll_t::meta()), store.get()); ceph::os::Transaction t; - t.create_collection(coll_t::meta(), 0); - t.write(coll_t::meta(), OSD_SUPERBLOCK_GOBJECT, 0, bl.length(), bl); - return store->do_transaction(ch, std::move(t)); + meta_coll->create(t); + meta_coll->store_superblock(t, superblock); + return store->do_transaction(meta_coll->collection(), std::move(t)); }).then([cluster_fsid, this] { store->write_meta("ceph_fsid", cluster_fsid.to_string()); store->write_meta("whoami", std::to_string(whoami)); @@ -115,10 +114,14 @@ seastar::future<> OSD::start() const auto data_path = local_conf().get_val("osd_data"); store = std::make_unique(data_path); return store->mount().then([this] { - meta_coll = store->open_collection(coll_t::meta()); - return read_superblock(); - }).then([this] { - osdmap = get_map(superblock.current_epoch); + meta_coll = make_unique(store->open_collection(coll_t::meta()), + store.get()); + return meta_coll->load_superblock(); + }).then([this](OSDSuperblock&& sb) { + superblock = std::move(sb); + return get_map(superblock.current_epoch); + }).then([this](seastar::lw_shared_ptr map) { + osdmap = std::move(map); return client_msgr->start(&dispatchers); }).then([this] { return monc.start(); @@ -238,35 +241,77 @@ seastar::future<> OSD::ms_handle_remote_reset(ceph::net::ConnectionRef conn) return seastar::now(); } -seastar::lw_shared_ptr OSD::get_map(epoch_t e) +seastar::future> OSD::get_map(epoch_t e) { // TODO: use LRU cache for managing osdmap, fallback to disk if we have to - return osdmaps[e]; + if (auto found = osdmaps.find(e); found != osdmaps.end()) { + return seastar::make_ready_future>( + found->second); + } else { + return load_map_bl(e).then([e, this](bufferlist bl) { + auto osdmap = seastar::make_lw_shared(); + osdmap->decode(bl); + osdmaps.emplace(e, osdmap); + return seastar::make_ready_future(std::move(osdmap)); + }); + } +} + +void OSD::store_map_bl(ceph::os::Transaction& t, + epoch_t e, bufferlist&& bl) +{ + meta_coll->store_map(t, e, bl); + map_bl_cache[e] = std::move(bl); +} + +seastar::future OSD::load_map_bl(epoch_t e) +{ + if (auto found = map_bl_cache.find(e); found != map_bl_cache.end()) { + return seastar::make_ready_future(found->second); + } else { + return meta_coll->load_map(e); + } } -void OSD::store_maps(epoch_t start, Ref m) +seastar::future<> OSD::store_maps(ceph::os::Transaction& t, + epoch_t start, Ref m) { - for (epoch_t e = start; e <= m->get_last(); e++) { - seastar::lw_shared_ptr o; + return seastar::do_for_each(boost::counting_iterator(start), + boost::counting_iterator(m->get_last() + 1), + [&t, m, this](epoch_t e) { if (auto p = m->maps.find(e); p != m->maps.end()) { - o = seastar::make_lw_shared(); + auto o = seastar::make_lw_shared(); o->decode(p->second); + logger().info("store_maps osdmap.{}", e); + store_map_bl(t, e, std::move(std::move(p->second))); + osdmaps.emplace(e, std::move(o)); + return seastar::now(); } else if (auto p = m->incremental_maps.find(e); p != m->incremental_maps.end()) { - o = get_map(e - 1); OSDMap::Incremental inc; auto i = p->second.cbegin(); inc.decode(i); - o->apply_incremental(inc); + return load_map_bl(e - 1) + .then([&t, e, inc=std::move(inc), this](bufferlist bl) { + auto o = seastar::make_lw_shared(); + o->decode(bl); + o->apply_incremental(inc); + bufferlist fbl; + o->encode(fbl, inc.encode_features | CEPH_FEATURE_RESERVED); + store_map_bl(t, e, std::move(fbl)); + osdmaps.emplace(e, std::move(o)); + return seastar::now(); + }); } else { logger().error("MOSDMap lied about what maps it had?"); + return seastar::now(); } - osdmaps[e] = std::move(o); - } + }); } seastar::future<> OSD::osdmap_subscribe(version_t epoch, bool force_request) { + logger().info("{}({})", __func__, epoch); if (monc.sub_want_increment("osdmap", epoch, CEPH_SUBSCRIBE_ONETIME) || force_request) { return monc.renew_subs(); @@ -275,24 +320,6 @@ seastar::future<> OSD::osdmap_subscribe(version_t epoch, bool force_request) } } -void OSD::write_superblock(ceph::os::Transaction& t) -{ - bufferlist bl; - encode(superblock, bl); - t.write(meta_coll->cid, OSD_SUPERBLOCK_GOBJECT, 0, bl.length(), bl); -} - -seastar::future<> OSD::read_superblock() -{ - // just-enough superblock so mon can ack my MOSDBoot - return store->read(meta_coll, OSD_SUPERBLOCK_GOBJECT, 0, 0) - .then([this] (bufferlist&& bl) { - auto p = bl.cbegin(); - decode(superblock, p); - return seastar::now(); - }); -} - seastar::future<> OSD::handle_osd_map(ceph::net::ConnectionRef conn, Ref m) { @@ -333,24 +360,30 @@ seastar::future<> OSD::handle_osd_map(ceph::net::ConnectionRef conn, skip_maps = true; start = first; } - // TODO: store new maps: queue for disk and put in the osdmap cache - store_maps(start, m); - // even if this map isn't from a mon, we may have satisfied our subscription - monc.sub_got("osdmap", last); - if (!superblock.oldest_map || skip_maps) { - superblock.oldest_map = first; - } - superblock.newest_map = last; - superblock.current_epoch = last; + return seastar::do_with(ceph::os::Transaction{}, + [=](auto& t) { + return store_maps(t, start, m).then([=, &t] { + // even if this map isn't from a mon, we may have satisfied our subscription + monc.sub_got("osdmap", last); + if (!superblock.oldest_map || skip_maps) { + superblock.oldest_map = first; + } + superblock.newest_map = last; + superblock.current_epoch = last; - // note in the superblock that we were clean thru the prior epoch - if (boot_epoch && boot_epoch >= superblock.mounted) { - superblock.mounted = boot_epoch; - superblock.clean_thru = last; - } - // TODO: write to superblock and commit the transaction - return committed_osd_maps(start, last, m); + // note in the superblock that we were clean thru the prior epoch + if (boot_epoch && boot_epoch >= superblock.mounted) { + superblock.mounted = boot_epoch; + superblock.clean_thru = last; + } + meta_coll->store_superblock(t, superblock); + return store->do_transaction(meta_coll->collection(), std::move(t)); + }); + }).then([=] { + // TODO: write to superblock and commit the transaction + return committed_osd_maps(start, last, m); + }); } seastar::future<> OSD::committed_osd_maps(version_t first, @@ -359,54 +392,56 @@ seastar::future<> OSD::committed_osd_maps(version_t first, { logger().info("osd.{}: committed_osd_maps({}, {})", whoami, first, last); // advance through the new maps - for (epoch_t cur = first; cur <= last; cur++) { - osdmap = get_map(cur); - if (up_epoch != 0 && - osdmap->is_up(whoami) && - osdmap->get_addrs(whoami) == client_msgr->get_myaddrs()) { - up_epoch = osdmap->get_epoch(); - if (!boot_epoch) { - boot_epoch = osdmap->get_epoch(); + return seastar::parallel_for_each(boost::irange(first, last + 1), + [this](epoch_t cur) { + return get_map(cur).then([this](seastar::lw_shared_ptr o) { + if (up_epoch != 0 && + osdmap->is_up(whoami) && + osdmap->get_addrs(whoami) == client_msgr->get_myaddrs()) { + up_epoch = osdmap->get_epoch(); + if (!boot_epoch) { + boot_epoch = osdmap->get_epoch(); + } + } + }); + }).then([m, this] { + if (osdmap->is_up(whoami) && + osdmap->get_addrs(whoami) == client_msgr->get_myaddrs() && + bind_epoch < osdmap->get_up_from(whoami)) { + if (state.is_booting()) { + logger().info("osd.{}: activating...", whoami); + state.set_active(); + beacon_timer.arm_periodic( + std::chrono::seconds(local_conf()->osd_beacon_report_interval)); } } - } - - if (osdmap->is_up(whoami) && - osdmap->get_addrs(whoami) == client_msgr->get_myaddrs() && - bind_epoch < osdmap->get_up_from(whoami)) { - if (state.is_booting()) { - logger().info("osd.{}: activating...", whoami); - state.set_active(); - beacon_timer.arm_periodic( - std::chrono::seconds(local_conf()->osd_beacon_report_interval)); - } - } - if (state.is_active()) { - logger().info("osd.{}: now active", whoami); - if (!osdmap->exists(whoami)) { - return shutdown(); - } - if (should_restart()) { - return restart(); + if (state.is_active()) { + logger().info("osd.{}: now active", whoami); + if (!osdmap->exists(whoami)) { + return shutdown(); + } + if (should_restart()) { + return restart(); + } else { + return seastar::now(); + } + } else if (state.is_preboot()) { + logger().info("osd.{}: now preboot", whoami); + + if (m->get_source().is_mon()) { + logger().info("osd.{}: _preboot", whoami); + return _preboot(m->oldest_map, m->newest_map); + } else { + logger().info("osd.{}: start_boot", whoami); + return start_boot(); + } } else { + logger().info("osd.{}: now ???", whoami); + // XXX return seastar::now(); } - } else if (state.is_preboot()) { - logger().info("osd.{}: now preboot", whoami); - - if (m->get_source().is_mon()) { - logger().info("osd.{}: _preboot", whoami); - return _preboot(m->oldest_map, m->newest_map); - } else { - logger().info("osd.{}: start_boot", whoami); - return start_boot(); - } - } else { - logger().info("osd.{}: now ???", whoami); - // XXX - return seastar::now(); - } + }); } bool OSD::should_restart() const @@ -458,3 +493,8 @@ seastar::future<> OSD::send_beacon() min_last_epoch_clean); return monc.send_message(m); } + +ghobject_t OSD::get_osdmap_pobject_name(epoch_t epoch) { + string name = fmt::format("osdmap.{}", epoch); + return ghobject_t(hobject_t(sobject_t(object_t(name), 0))); +} diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h index 47d77e48f77..0498c97d23b 100644 --- a/src/crimson/osd/osd.h +++ b/src/crimson/osd/osd.h @@ -15,6 +15,7 @@ class MOSDMap; class OSDMap; +class OSDMeta; namespace ceph::net { class Messenger; @@ -23,6 +24,7 @@ namespace ceph::net { namespace ceph::os { class CyanStore; struct Collection; + class Transaction; } class OSD : public ceph::net::Dispatcher { @@ -38,11 +40,11 @@ class OSD : public ceph::net::Dispatcher { // TODO: use LRU cache std::map> osdmaps; + std::map map_bl_cache; seastar::lw_shared_ptr osdmap; // TODO: use a wrapper for ObjectStore std::unique_ptr store; - using CollectionRef = boost::intrusive_ptr; - CollectionRef meta_coll; + std::unique_ptr meta_coll; OSDState state; @@ -72,16 +74,22 @@ public: seastar::future<> start(); seastar::future<> stop(); + static ghobject_t get_osdmap_pobject_name(epoch_t epoch); + private: seastar::future<> start_boot(); seastar::future<> _preboot(version_t newest_osdmap, version_t oldest_osdmap); seastar::future<> _send_boot(); - seastar::lw_shared_ptr get_map(epoch_t e); - // TODO: should batch the write op along with superdisk modification as a - // transaction - void store_maps(epoch_t start, Ref m); + seastar::future> get_map(epoch_t e); + seastar::future load_map_bl(epoch_t e); + void store_map_bl(ceph::os::Transaction& t, + epoch_t e, bufferlist&& bl); + seastar::future<> store_maps(ceph::os::Transaction& t, + epoch_t start, Ref m); seastar::future<> osdmap_subscribe(version_t epoch, bool force_request); + + void write_superblock(ceph::os::Transaction& t); seastar::future<> read_superblock(); seastar::future<> handle_osd_map(ceph::net::ConnectionRef conn, diff --git a/src/crimson/osd/osd_meta.cc b/src/crimson/osd/osd_meta.cc new file mode 100644 index 00000000000..6eb225fe8b9 --- /dev/null +++ b/src/crimson/osd/osd_meta.cc @@ -0,0 +1,80 @@ +#include "osd_meta.h" + +#include "crimson/os/cyan_collection.h" +#include "crimson/os/cyan_store.h" +#include "crimson/os/Transaction.h" + +void OSDMeta::create(ceph::os::Transaction& t) +{ + t.create_collection(coll->cid, 0); +} + +void OSDMeta::store_map(ceph::os::Transaction& t, + epoch_t e, const bufferlist& m) +{ + t.write(coll->cid, osdmap_oid(e), 0, m.length(), m); +} + +seastar::future OSDMeta::load_map(epoch_t e) +{ + return store->read(coll, + osdmap_oid(e), 0, 0, + CEPH_OSD_OP_FLAG_FADVISE_WILLNEED); +} + +void OSDMeta::store_superblock(ceph::os::Transaction& t, + const OSDSuperblock& superblock) +{ + bufferlist bl; + encode(superblock, bl); + t.write(coll->cid, superblock_oid(), 0, bl.length(), bl); +} + +seastar::future OSDMeta::load_superblock() +{ + return store->read(coll, superblock_oid(), 0, 0) + .then([this] (bufferlist&& bl) { + auto p = bl.cbegin(); + OSDSuperblock superblock; + decode(superblock, p); + return seastar::make_ready_future(std::move(superblock)); + }); +} + +seastar::future +OSDMeta::load_final_pool_info(int64_t pool) { + return store->read(coll, final_pool_info_oid(pool), + 0, 0).then([this] (bufferlist&& bl) { + auto p = bl.cbegin(); + pg_pool_t pi; + string name; + ec_profile_t ec_profile; + decode(pi, p); + decode(name, p); + decode(ec_profile, p); + return seastar::make_ready_future(std::move(pi), + std::move(name), + std::move(ec_profile)); + }); +} + +ghobject_t OSDMeta::osdmap_oid(epoch_t epoch) +{ + string name = fmt::format("osdmap.{}", epoch); + return ghobject_t(hobject_t(sobject_t(object_t(name), 0))); +} + +ghobject_t OSDMeta::final_pool_info_oid(int64_t pool) +{ + string name = fmt::format("final_pool_{}", pool); + return ghobject_t(hobject_t(sobject_t(object_t(name), CEPH_NOSNAP))); +} + +ghobject_t OSDMeta::superblock_oid() +{ + return ghobject_t(hobject_t(sobject_t(object_t("osd_superblock"), 0))); +} diff --git a/src/crimson/osd/osd_meta.h b/src/crimson/osd/osd_meta.h new file mode 100644 index 00000000000..936d954815f --- /dev/null +++ b/src/crimson/osd/osd_meta.h @@ -0,0 +1,53 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include +#include +#include "osd/osd_types.h" + +namespace ceph::os { + class CyanStore; + class Collection; + class Transaction; +} + +/// metadata shared across PGs, or put in another way, +/// metadata not specific to certain PGs. +class OSDMeta { + template using Ref = boost::intrusive_ptr; + + ceph::os::CyanStore* store; + Ref coll; + +public: + OSDMeta(Ref coll, + ceph::os::CyanStore* store) + : store{store}, coll{coll} + {} + + + auto collection() { + return coll; + } + void create(ceph::os::Transaction& t); + + void store_map(ceph::os::Transaction& t, + epoch_t e, const bufferlist& m); + seastar::future load_map(epoch_t e); + + void store_superblock(ceph::os::Transaction& t, + const OSDSuperblock& sb); + seastar::future load_superblock(); + + using ec_profile_t = std::map; + seastar::future load_final_pool_info(int64_t pool); +private: + static ghobject_t osdmap_oid(epoch_t epoch); + static ghobject_t final_pool_info_oid(int64_t pool); + static ghobject_t superblock_oid(); +};