From 492e6351e5723f7d97defb01d66a3505fd33a82c Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Mon, 7 May 2012 12:51:55 -0800 Subject: [PATCH] OSD: do not drop osd_lock in handle_osd_map PGs have their map updates done in a different thread. Thus, we no longer need to grab the pg locks. activate_map no longer requires the map_lock in order to allow us to queue events for the pgs. Signed-off-by: Samuel Just --- src/osd/OSD.cc | 103 +++++-------------------------------------------- src/osd/OSD.h | 6 +-- 2 files changed, 11 insertions(+), 98 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index bc5f4e6afc948..8764110536c0c 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -623,7 +623,6 @@ OSD::OSD(int id, Messenger *internal_messenger, Messenger *external_messenger, monc(mc), logger(NULL), store(NULL), - map_in_progress(false), clog(external_messenger->cct, client_messenger, &mc->monmap, LogClient::NO_FLAGS), whoami(id), dev_path(dev), journal_path(jdev), @@ -676,15 +675,11 @@ OSD::OSD(int id, Messenger *internal_messenger, Messenger *external_messenger, watch_timer(external_messenger->cct, watch_lock) { monc->set_messenger(client_messenger); - - map_in_progress_cond = new Cond(); - } OSD::~OSD() { delete authorize_handler_registry; - delete map_in_progress_cond; delete class_handler; g_ceph_context->get_perfcounters_collection()->remove(logger); delete logger; @@ -2891,14 +2886,6 @@ void OSD::_dispatch(Message *m) dout(20) << "_dispatch " << m << " " << *m << dendl; Session *session = NULL; - if (map_in_progress_cond) { //can't dispatch while map is being updated! - if (map_in_progress) { - dout(25) << "waiting for handle_osd_map to complete before dispatching" << dendl; - while (map_in_progress) - map_in_progress_cond->Wait(osd_lock); - } - } - logger->set(l_osd_buf, buffer::get_total_alloc()); switch (m->get_type()) { @@ -3242,17 +3229,6 @@ void OSD::handle_osd_map(MOSDMap *m) skip_maps = true; } - if (map_in_progress_cond) { - if (map_in_progress) { - dout(15) << "waiting for prior handle_osd_map to complete" << dendl; - while (map_in_progress) { - map_in_progress_cond->Wait(osd_lock); - } - } - dout(10) << "locking handle_osd_map permissions" << dendl; - map_in_progress = true; - } - ObjectStore::Transaction t; // store new maps: queue for disk and put in the osdmap cache @@ -3312,32 +3288,6 @@ void OSD::handle_osd_map(MOSDMap *m) assert(0 == "MOSDMap lied about what maps it had?"); } -/* - // TODOSAM: find strategy for cluster snaps - // check for cluster snapshot - string cluster_snap; - for (epoch_t cur = start; cur <= last && cluster_snap.length() == 0; cur++) { - OSDMapRef newmap = get_map(cur); - cluster_snap = newmap->get_cluster_snapshot(); - } - - // flush here so that the peering code can re-read any pg data off - // disk that it needs to... say for backlog generation. (hmm, is - // this really needed?) - osd_lock.Unlock(); - if (cluster_snap.length()) { - dout(0) << "creating cluster snapshot '" << cluster_snap << "'" << dendl; - int r = store->snapshot(cluster_snap); - if (r) - dout(0) << "failed to create cluster snapshot: " << cpp_strerror(r) << dendl; - } else { - store->flush(); - } - osd_lock.Lock(); - - assert(osd_lock.is_locked()); - */ - if (superblock.oldest_map) { for (epoch_t e = superblock.oldest_map; e < m->oldest_map; ++e) { dout(20) << " removing old osdmap epoch " << e << dendl; @@ -3395,9 +3345,6 @@ void OSD::handle_osd_map(MOSDMap *m) dout(1) << "state: booting -> active" << dendl; state = STATE_ACTIVE; } - - // yay! - activate_map(t, fin->contexts); } // write and unlock pgs @@ -3459,8 +3406,6 @@ void OSD::handle_osd_map(MOSDMap *m) } } - // process waiters - take_waiters(waiting_for_osdmap); // note in the superblock that we were clean thru the prior epoch if (boot_epoch && boot_epoch >= superblock.mounted) { @@ -3468,12 +3413,6 @@ void OSD::handle_osd_map(MOSDMap *m) superblock.clean_thru = osdmap->get_epoch(); } - // completion - Mutex ulock("OSD::handle_osd_map() ulock"); - Cond ucond; - bool udone; - fin->add(new C_SafeCond(&ulock, &ucond, &udone)); - // superblock and commit write_superblock(t); int r = store->apply_transaction(t, fin); @@ -3488,23 +3427,8 @@ void OSD::handle_osd_map(MOSDMap *m) clear_map_bl_cache_pins(); map_lock.put_write(); - /* - * wait for this to be stable. - * - * NOTE: This is almost certainly overkill. The important things - * that need to be stable to make the peering/recovery stuff correct - * include: - * - * - last_epoch_started, since that bounds how far back in time we - * check old peers - * - ??? - */ - osd_lock.Unlock(); - ulock.Lock(); - while (!udone) - ucond.Wait(ulock); - ulock.Unlock(); - osd_lock.Lock(); + // yay! + activate_map(); if (m->newest_map && m->newest_map > last) { dout(10) << " msg say newest map is " << m->newest_map << ", requesting more" << dendl; @@ -3521,12 +3445,6 @@ void OSD::handle_osd_map(MOSDMap *m) shutdown(); m->put(); - - if (map_in_progress_cond) { - map_in_progress = false; - dout(15) << "unlocking map_in_progress" << dendl; - map_in_progress_cond->Signal(); - } } void OSD::advance_pg(epoch_t osd_epoch, PG *pg, PG::RecoveryCtx *rctx) @@ -3659,7 +3577,7 @@ void OSD::advance_map(ObjectStore::Transaction& t, C_Contexts *tfin) } } -void OSD::activate_map(ObjectStore::Transaction& t, list& tfin) +void OSD::activate_map() { assert(osd_lock.is_locked()); @@ -3678,7 +3596,7 @@ void OSD::activate_map(ObjectStore::Transaction& t, list& tfin) it != pg_map.end(); it++) { PG *pg = it->second; - pg->lock_with_map_lock_held(); + pg->lock(); if (pg->is_primary()) num_pg_primary++; else if (pg->is_replica()) @@ -3707,16 +3625,15 @@ void OSD::activate_map(ObjectStore::Transaction& t, list& tfin) wake_all_pg_waiters(); // the pg mapping may have shifted maybe_update_heartbeat_peers(); -/* - // TODOSAM: solve map_cache trimming problem - trim_map_cache(oldest_last_clean); - */ if (osdmap->test_flag(CEPH_OSDMAP_FULL)) { dout(10) << " osdmap flagged full, doing onetime osdmap subscribe" << dendl; monc->sub_want("osdmap", osdmap->get_epoch() + 1, CEPH_SUBSCRIBE_ONETIME); monc->renew_subs(); } + + // process waiters + take_waiters(waiting_for_osdmap); } @@ -5349,11 +5266,11 @@ void OSD::process_peering_event(PG *pg) } pg->unlock(); } - do_notifies(notify_list); - do_queries(query_map); - do_infos(info_map); { Mutex::Locker l(osd_lock); + do_notifies(notify_list); + do_queries(query_map); + do_infos(info_map); send_pg_temp(); } } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 31f79b065f215..ffbfbf67dd7d8 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -142,10 +142,6 @@ protected: PerfCounters *logger; ObjectStore *store; - // cover OSDMap update data when using multiple msgrs - Cond *map_in_progress_cond; - bool map_in_progress; - LogClient clog; int whoami; @@ -442,7 +438,7 @@ private: void advance_pg(epoch_t advance_to, PG *pg, PG::RecoveryCtx *rctx); void advance_map(ObjectStore::Transaction& t, C_Contexts *tfin); - void activate_map(ObjectStore::Transaction& t, list& tfin); + void activate_map(); // osd map cache (past osd maps) Mutex map_cache_lock; -- 2.39.5