From 47d627736a7a199c6bf66e1117029ce2b77b404d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 1 Aug 2018 16:33:22 -0500 Subject: [PATCH] osd: drain peering wq in start_boot, not _committed_maps We can't safely block in _committed_osd_maps because we are being run by the store's finisher threads, and we may have to wait for a PG to split and then merge via that same queue and deadlock. Do not hold osd_lock while waiting as this can interfere with *other* objectstore completions that take osd_lock. Signed-off-by: Sage Weil --- src/osd/OSD.cc | 35 ++++++++++++++++++++++++++++------- src/osd/OSD.h | 1 + 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 668771af71013..41f3b9699dd94 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2048,6 +2048,7 @@ OSD::OSD(CephContext *cct_, ObjectStore *store_, map_lock("OSD::map_lock"), last_pg_create_epoch(0), mon_report_lock("OSD::mon_report_lock"), + boot_finisher(cct), up_thru_wanted(0), requested_full_first(0), requested_full_last(0), @@ -2527,6 +2528,8 @@ int OSD::init() service.recovery_request_timer.init(); service.sleep_timer.init(); + boot_finisher.start(); + // mount. dout(2) << "init " << dev_path << " (looks like " << (store_is_rotational ? "hdd" : "ssd") << ")" @@ -3495,8 +3498,11 @@ int OSD::shutdown() dout(10) << "stopping agent" << dendl; service.agent_stop(); + boot_finisher.wait_for_empty(); + osd_lock.Lock(); + boot_finisher.stop(); reset_heartbeat_peers(); tick_timer.shutdown(); @@ -5523,7 +5529,27 @@ void OSD::_preboot(epoch_t oldest, epoch_t newest) send_full_update(); } else if (osdmap->get_epoch() >= oldest - 1 && osdmap->get_epoch() + cct->_conf->osd_map_message_max > newest) { - _send_boot(); + + // wait for pgs to fully catch up in a different thread, since + // this thread might be required for splitting and merging PGs to + // make progress. + boot_finisher.queue( + new FunctionContext( + [this](int r) { + Mutex::Locker l(osd_lock); + if (is_preboot()) { + dout(10) << __func__ << " waiting for peering work to drain" + << dendl; + osd_lock.Unlock(); + for (auto shard : shards) { + shard->wait_min_pg_epoch(osdmap->get_epoch()); + } + osd_lock.Lock(); + } + if (is_preboot()) { + _send_boot(); + } + })); return; } @@ -7925,12 +7951,7 @@ void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m) if (is_active() || is_waiting_for_healthy()) maybe_update_heartbeat_peers(); - if (!is_active()) { - dout(10) << " not yet active; waiting for peering work to drain" << dendl; - for (auto shard : shards) { - shard->wait_min_pg_epoch(last); - } - } else { + if (is_active()) { activate_map(); } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 43bea30445809..a0e9b373b6e26 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1947,6 +1947,7 @@ protected: // == monitor interaction == Mutex mon_report_lock; utime_t last_mon_report; + Finisher boot_finisher; // -- boot -- void start_boot(); -- 2.39.5