From 547338694a5ad9a62bc3f5d1490594754efe14df Mon Sep 17 00:00:00 2001 From: Shraddha Agrawal Date: Tue, 27 Jan 2026 19:12:54 +0530 Subject: [PATCH] crimson/osd: add verbose DEBUG logs for OSD startup This commit adds verbose logs for each step of OSD startup to aid in debugging OSD startup failures. Signed-off-by: Shraddha Agrawal --- src/crimson/osd/main.cc | 51 +++++++++++++++++++++++++++++++++---- src/crimson/osd/osd.cc | 42 +++++++++++++++++++++--------- src/crimson/osd/osd_meta.cc | 8 +++++- 3 files changed, 83 insertions(+), 18 deletions(-) diff --git a/src/crimson/osd/main.cc b/src/crimson/osd/main.cc index 461a2652ecc..a24ace8c470 100644 --- a/src/crimson/osd/main.cc +++ b/src/crimson/osd/main.cc @@ -33,6 +33,8 @@ #include "global/pidfile.h" #include "osd.h" +SET_SUBSYS(osd); + using namespace std::literals; namespace bpo = boost::program_options; using crimson::common::local_conf; @@ -89,6 +91,9 @@ static std::ofstream maybe_set_logger() int main(int argc, const char* argv[]) { + LOG_PREFIX(OSD::main); + + INFO("parsing early config"); auto early_config_result = crimson::osd::get_early_config(argc, argv); if (!early_config_result.has_value()) { int r = early_config_result.error(); @@ -96,14 +101,18 @@ int main(int argc, const char* argv[]) return r; } auto &early_config = early_config_result.value(); + INFO("early config parsed successfully"); auto seastar_n_early_args = early_config.get_early_args(); auto config_proxy_args = early_config.get_ceph_args(); + INFO("initializing seastar app_template"); seastar::app_template::config app_cfg; app_cfg.name = "Crimson"; app_cfg.auto_handle_sigint_sigterm = false; seastar::app_template app(std::move(app_cfg)); + + INFO("registering CLI options"); app.add_options() ("mkkey", "generate a new secret key. " "This is normally used in combination with --mkfs") @@ -120,6 +129,7 @@ int main(int argc, const char* argv[]) "Prometheus metrics prefix"); try { + INFO("entering seastar runtime"); return app.run( seastar_n_early_args.size(), const_cast(seastar_n_early_args.data()), @@ -127,31 +137,46 @@ int main(int argc, const char* argv[]) auto& config = app.configuration(); return seastar::async([&] { try { + INFO("seastar runtime started"); + FatalSignal fatal_signal; seastar_apps_lib::stop_signal should_stop; + if (config.count("debug")) { + INFO("enabling debug logging"); seastar::global_logger_registry().set_all_loggers_level( seastar::log_level::debug ); } if (config.count("trace")) { + INFO("enabling trace logging"); seastar::global_logger_registry().set_all_loggers_level( seastar::log_level::trace ); } + + DEBUG("starting sharded config service"); sharded_conf().start( early_config.init_params.name, early_config.cluster_name).get(); local_conf().start().get(); auto stop_conf = seastar::deferred_stop(sharded_conf()); + + DEBUG("starting performance counters"); sharded_perf_coll().start().get(); auto stop_perf_coll = seastar::deferred_stop(sharded_perf_coll()); + + DEBUG("parsing config files"); local_conf().parse_config_files(early_config.conf_file_list).get(); local_conf().parse_env().get(); local_conf().parse_argv(config_proxy_args).get(); + + DEBUG("initializing logger output"); auto log_file_stream = maybe_set_logger(); auto reset_logger = seastar::defer([] { logger().set_ostream(std::cerr); }); + + DEBUG("writing pidfile"); if (const auto ret = pidfile_write(local_conf()->pid_file); ret == -EACCES || ret == -EAGAIN) { ceph_abort_msg( @@ -160,6 +185,8 @@ int main(int argc, const char* argv[]) ceph_abort_msg(fmt::format("pidfile_write failed with {} {}", ret, cpp_strerror(-ret))); } + + DEBUG("setting ignore SIGHUP"); // just ignore SIGHUP, we don't reread settings. keep in mind signals // handled by S* must be blocked for alien threads (see AlienStore). seastar::handle_signal(SIGHUP, [] {}); @@ -169,6 +196,8 @@ int main(int argc, const char* argv[]) std::any stop_prometheus; if (uint16_t prom_port = config["prometheus_port"].as(); prom_port != 0) { + + DEBUG("starting prometheus server on port {}", prom_port); prom_server.start("prometheus").get(); stop_prometheus = seastar::make_shared(seastar::deferred_stop(prom_server)); @@ -184,6 +213,7 @@ int main(int argc, const char* argv[]) }).get(); } + DEBUG("creating messengers"); const int whoami = std::stoi(local_conf()->name.get_id()); const auto nonce = crimson::osd::get_nonce(); crimson::net::MessengerRef cluster_msgr, client_msgr; @@ -202,27 +232,35 @@ int main(int argc, const char* argv[]) nonce, true); } + + DEBUG("creating object store"); auto store = crimson::os::FuturizedStore::create( local_conf().get_val("osd_objectstore"), local_conf().get_val("osd_data"), local_conf().get_config_values()); - logger().info("passed objectstore is {}", local_conf().get_val("osd_objectstore")); + INFO("passed objectstore is {}", local_conf().get_val("osd_objectstore")); + crimson::osd::OSD osd( whoami, nonce, std::ref(should_stop.abort_source()), std::ref(*store), cluster_msgr, client_msgr, hb_front_msgr, hb_back_msgr); if (config.count("mkkey")) { + DEBUG("generating keyring"); make_keyring().get(); } + if (local_conf()->no_mon_config) { - logger().info("bypassing the config fetch due to --no-mon-config"); + INFO("bypassing the config fetch due to --no-mon-config"); } else { + DEBUG("fetching config from monitors"); crimson::osd::populate_config_from_mon().get(); } if (config.count("mkfs")) { + DEBUG("running mkfs"); auto osd_uuid = local_conf().get_val("osd_uuid"); if (osd_uuid.is_zero()) { + DEBUG("uuid not specified, generating random osd uuid"); // use a random osd uuid if not specified osd_uuid.generate_random(); } @@ -234,20 +272,23 @@ int main(int argc, const char* argv[]) config["osdspec-affinity"].as()).get(); } if (config.count("mkkey") || config.count("mkfs")) { + DEBUG("exiting, mkkey {}, mkfs {}", config.count("mkkey"), config.count("mkfs")); return EXIT_SUCCESS; } else { + DEBUG("starting OSD services"); osd.start().get(); } - logger().info("crimson startup completed"); + INFO("crimson startup completed"); + should_stop.wait().get(); - logger().info("crimson shutting down"); + INFO("crimson shutting down"); osd.stop().get(); // stop()s registered using defer() are called here } catch (...) { logger().error("startup failed: {}", std::current_exception()); return EXIT_FAILURE; } - logger().info("crimson shutdown complete"); + INFO("crimson shutdown complete"); return EXIT_SUCCESS; }); }); diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index b914b3fc42b..b3acb5f4c6d 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -130,6 +130,7 @@ OSD::OSD(int id, uint32_t nonce, clog(log_client.create_channel()) { LOG_PREFIX(OSD::OSD); + DEBUG(""); ceph_assert(seastar::this_shard_id() == PRIMARY_CORE); for (auto msgr : {std::ref(cluster_msgr), std::ref(public_msgr), std::ref(hb_front_msgr), std::ref(hb_back_msgr)}) { @@ -182,10 +183,13 @@ CompatSet get_osd_initial_compat_set() seastar::future<> OSD::open_meta_coll() { + LOG_PREFIX(OSD::open_meta_coll); ceph_assert(seastar::this_shard_id() == PRIMARY_CORE); + DEBUG("opening metadata collection"); return store.get_sharded_store().open_collection( coll_t::meta() - ).then([this](auto ch) { + ).then([this, FNAME](auto ch) { + DEBUG("registering metadata collection"); pg_shard_manager.init_meta_coll(ch, store.get_sharded_store()); return seastar::now(); }); @@ -257,14 +261,18 @@ seastar::future OSD::get_perf_reports() { seastar::future OSD::open_or_create_meta_coll(FuturizedStore &store) { - return store.get_sharded_store().open_collection(coll_t::meta()).then([&store](auto ch) { + LOG_PREFIX(OSD::open_or_create_meta_coll); + DEBUG(""); + return store.get_sharded_store().open_collection(coll_t::meta()).then([&store, FNAME](auto ch) { if (!ch) { + DEBUG("creating new metadata collection"); return store.get_sharded_store().create_new_collection( coll_t::meta() ).then([&store](auto ch) { return OSDMeta(ch, store.get_sharded_store()); }); } else { + DEBUG("meta collection already exists"); return seastar::make_ready_future(ch, store.get_sharded_store()); } }); @@ -278,15 +286,17 @@ seastar::future<> OSD::mkfs( std::string osdspec_affinity) { LOG_PREFIX(OSD::mkfs); - + DEBUG("starting store mkfs"); co_await store.start(); + DEBUG("calling store mkfs"); co_await store.mkfs(osd_uuid).handle_error( crimson::stateful_ec::assert_failure(fmt::format( "{} error creating empty object store in {}", FNAME, local_conf().get_val("osd_data")).c_str()) ); + DEBUG("mounting store mkfs"); co_await store.mount().handle_error( crimson::stateful_ec::assert_failure(fmt::format( "{} error mounting object store in {}", @@ -336,6 +346,7 @@ seastar::future<> OSD::_write_superblock( std::move(meta_coll), std::move(superblock), [&store, FNAME](auto &meta_coll, auto &superblock) { + DEBUG("try loading existing superblock"); return meta_coll.load_superblock( ).safe_then([&superblock, FNAME](OSDSuperblock&& sb) { if (sb.cluster_fsid != superblock.cluster_fsid) { @@ -352,19 +363,18 @@ seastar::future<> OSD::_write_superblock( crimson::ct_error::enoent::handle([&store, &meta_coll, &superblock, FNAME] { // meta collection does not yet, create superblock - INFO("{} writing superblock cluster_fsid {} osd_fsid {}", - "_write_superblock", + INFO("writing superblock cluster_fsid {} osd_fsid {}", superblock.cluster_fsid, superblock.osd_fsid); ceph::os::Transaction t; meta_coll.create(t); meta_coll.store_superblock(t, superblock); - DEBUG("OSD::_write_superblock: do_transaction..."); + DEBUG("do_transaction: create meta collection and store superblock"); return store.get_sharded_store().do_transaction( meta_coll.collection(), std::move(t)); }), - crimson::ct_error::assert_all("_write_superbock error") + crimson::ct_error::assert_all("_write_superblock error") ); }); } @@ -451,6 +461,7 @@ seastar::future<> OSD::start() } startup_time = ceph::mono_clock::now(); ceph_assert(seastar::this_shard_id() == PRIMARY_CORE); + DEBUG("starting store"); return store.start().then([this] { return pg_to_shard_mappings.start(0, seastar::smp::count ).then([this] { @@ -475,6 +486,7 @@ seastar::future<> OSD::start() heartbeat.reset(new Heartbeat{ whoami, get_shard_services(), *monc, *hb_front_msgr, *hb_back_msgr}); + DEBUG("mounting store"); return store.mount().handle_error( crimson::stateful_ec::assert_failure(fmt::format( "{} error mounting object store in {}", @@ -511,8 +523,10 @@ seastar::future<> OSD::start() stats_timer.arm_periodic(std::chrono::seconds(stats_seconds)); } + DEBUG("open metadata collection"); return open_meta_coll(); - }).then([this] { + }).then([this, FNAME] { + DEBUG("loading superblock"); return pg_shard_manager.get_meta_coll().load_superblock( ).handle_error( crimson::ct_error::assert_all("open_meta_coll error") @@ -532,8 +546,9 @@ seastar::future<> OSD::start() return shard_services.invoke_on_all([this](auto &local_service) { local_service.local_state.osdmap_gate.got_map(osdmap->get_epoch()); }); - }).then([this] { + }).then([this, FNAME] { bind_epoch = osdmap->get_epoch(); + DEBUG("loading PGs"); return pg_shard_manager.load_pgs(store); }).then([this, FNAME] { uint64_t osd_required = @@ -574,10 +589,12 @@ seastar::future<> OSD::start() [FNAME] (const std::error_code& e) { ERROR("public messenger bind(): {}", e); }))); - }).then_unpack([this] { + }).then_unpack([this, FNAME] { + DEBUG("starting mon and mgr clients"); return seastar::when_all_succeed(monc->start(), mgrc->start()); - }).then_unpack([this] { + }).then_unpack([this, FNAME] { + DEBUG("adding to crush"); return _add_me_to_crush(); }).then([this] { return _add_device_class(); @@ -609,7 +626,8 @@ seastar::future<> OSD::start() return start_asok_admin(); }).then([this] { return log_client.set_fsid(monc->get_fsid()); - }).then([this] { + }).then([this, FNAME] { + DEBUG("starting boot"); return start_boot(); }); } diff --git a/src/crimson/osd/osd_meta.cc b/src/crimson/osd/osd_meta.cc index 018ecba00af..5d60e0862a4 100644 --- a/src/crimson/osd/osd_meta.cc +++ b/src/crimson/osd/osd_meta.cc @@ -11,6 +11,8 @@ #include "os/Transaction.h" #include "osd/OSDMap.h" +SET_SUBSYS(osd); + using std::string; void OSDMeta::create(ceph::os::Transaction& t) @@ -68,11 +70,15 @@ void OSDMeta::store_superblock(ceph::os::Transaction& t, OSDMeta::load_superblock_ret OSDMeta::load_superblock() { + LOG_PREFIX(OSDMeta::load_superblock); + DEBUG(""); return store.read( coll, superblock_oid(), 0, 0 - ).safe_then([] (bufferlist&& bl) { + ).safe_then([FNAME] (bufferlist&& bl) { + DEBUG("successfully read superblock"); auto p = bl.cbegin(); OSDSuperblock superblock; + DEBUG("decoding superblock bufferlist"); decode(superblock, p); return seastar::make_ready_future(std::move(superblock)); }); -- 2.47.3