From 8b5f0f71bf66da1d6fa3f319d88eba34329794fb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 14 Nov 2008 16:56:59 -0800 Subject: [PATCH] osd: maintain a cache of past osd maps during repeering It's expensive and stupid to load and reparse them for each PG. --- src/osd/OSD.cc | 42 +++++++++++++++++++++++++++++++----------- src/osd/OSD.h | 8 +++++++- src/osd/PG.cc | 8 +++----- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index e7b82e7233026..cec7888a91ea2 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -270,6 +270,8 @@ OSD::OSD(int id, Messenger *m, MonMap *mm, const char *dev) : qlen_calc(3), iat_averager(g_conf.osd_flash_crowd_iat_alpha), finished_lock("OSD::finished_lock"), + osdmap(NULL), + map_cache_lock("OSD::map_cache_lock"), snap_trimmer_lock("OSD::snap_trimmer_lock"), snap_trimmer_thread(this), pg_stat_queue_lock("OSD::pg_stat_queue_lock"), @@ -732,10 +734,9 @@ void OSD::calc_priors_during(pg_t pgid, epoch_t start, epoch_t end, set& ps dout(15) << "calc_priors_during " << pgid << " [" << start << "," << end << ")" << dendl; for (epoch_t e = start; e < end; e++) { - OSDMap oldmap; - get_map(e, oldmap); + OSDMap *oldmap = get_map(e); vector acting; - oldmap.pg_to_acting_osds(pgid, acting); + oldmap->pg_to_acting_osds(pgid, acting); dout(20) << " " << pgid << " in epoch " << e << " was " << acting << dendl; int added = 0; for (unsigned i=0; i from; e--) { // verify during intermediate epoch (e-1) - OSDMap oldmap; - get_map(e-1, oldmap); + OSDMap *oldmap = get_map(e-1); vector acting; - oldmap.pg_to_acting_osds(pgid, acting); + oldmap->pg_to_acting_osds(pgid, acting); // acting set change? if (acting != last && @@ -937,8 +937,7 @@ void OSD::heartbeat() messenger->get_myinst()); return; } - - + // get CPU load avg ifstream in("/proc/loadavg"); if (in.is_open()) { @@ -1900,6 +1899,8 @@ void OSD::activate_map(ObjectStore::Transaction& t) map< int, map > query_map; // peer -> PG -> get_summary_since map info_map; // peer -> message + clear_map_cache(); // we're done with it + // scan pg's for (hash_map::iterator it = pg_map.begin(); it != pg_map.end(); @@ -1979,8 +1980,15 @@ bool OSD::get_inc_map_bl(epoch_t e, bufferlist& bl) return store->read(0, get_inc_osdmap_pobject_name(e), 0, 0, bl) >= 0; } -void OSD::get_map(epoch_t epoch, OSDMap &m) +OSDMap *OSD::get_map(epoch_t epoch) { + Mutex::Locker l(map_cache_lock); + + if (map_cache.count(epoch)) + return map_cache[epoch]; + + OSDMap *map = new OSDMap; + // find a complete map list incs; epoch_t e; @@ -1988,7 +1996,7 @@ void OSD::get_map(epoch_t epoch, OSDMap &m) bufferlist bl; if (get_map_bl(e, bl)) { //dout(10) << "get_map " << epoch << " full " << e << dendl; - m.decode(bl); + map->decode(bl); break; } else { OSDMap::Incremental inc; @@ -2002,11 +2010,23 @@ void OSD::get_map(epoch_t epoch, OSDMap &m) // apply incrementals for (e++; e <= epoch; e++) { //dout(10) << "get_map " << epoch << " inc " << e << dendl; - m.apply_incremental( incs.front() ); + map->apply_incremental( incs.front() ); incs.pop_front(); } + + map_cache[epoch] = map; + return map; } +void OSD::clear_map_cache() +{ + Mutex::Locker l(map_cache_lock); + for (map::iterator p = map_cache.begin(); + p != map_cache.end(); + p++) + delete p->second; + map_cache.clear(); +} bool OSD::get_inc_map(epoch_t e, OSDMap::Incremental &inc) { diff --git a/src/osd/OSD.h b/src/osd/OSD.h index b4ac9ed338277..aa8ed2a37d378 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -263,7 +263,13 @@ private: void advance_map(ObjectStore::Transaction& t, interval_set& removed_snaps); void activate_map(ObjectStore::Transaction& t); - void get_map(epoch_t e, OSDMap &m); + // osd map cache (past osd maps) + map map_cache; + Mutex map_cache_lock; + + OSDMap* get_map(epoch_t e); + void clear_map_cache(); + bool get_map_bl(epoch_t e, bufferlist& bl); bool get_inc_map_bl(epoch_t e, bufferlist& bl); bool get_inc_map(epoch_t e, OSDMap::Incremental &inc); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 78f7432770e07..a999081e0c587 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -657,9 +657,8 @@ void PG::build_prior() epoch_t stop = MAX(1, info.history.last_epoch_started); dout(10) << "build_prior considering interval " << first_epoch << " down to " << stop << dendl; - OSDMap *nextmap = new OSDMap; - osd->get_map(last_epoch, *nextmap); - + OSDMap *nextmap = osd->get_map(last_epoch); + for (; last_epoch >= stop; last_epoch = first_epoch-1) { OSDMap *lastmap = nextmap; assert(last_epoch == lastmap->get_epoch()); @@ -668,9 +667,8 @@ void PG::build_prior() lastmap->pg_to_acting_osds(get_pgid(), acting); // calc first_epoch, first_map - nextmap = new OSDMap; for (first_epoch = last_epoch; first_epoch > stop; first_epoch--) { - osd->get_map(first_epoch-1, *nextmap); + nextmap = osd->get_map(first_epoch-1); vector t; nextmap->pg_to_acting_osds(get_pgid(), t); if (t != acting) -- 2.39.5