From 6951d2345a5d837c3b14103bd4d8f5ee4407c937 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Mon, 22 Jul 2013 16:00:07 -0700 Subject: [PATCH] OSD: tolerate holes in stored maps We may have holes in stored maps during init_splits_between and advance_pg. In either case, we should simply skip the missing maps. Fixes: #5677 Signed-off-by: Samuel Just Reviewed-by: Sage Weil --- src/osd/OSD.cc | 20 ++++++++++++++------ src/osd/OSD.h | 7 ++++++- src/osd/PG.cc | 1 - src/osd/osd_types.h | 7 +++++++ 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 4be586f3d1556..bc3aa604fecf6 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -291,11 +291,13 @@ void OSDService::init_splits_between(pg_t pgid, // Ok, a split happened, so we need to walk the osdmaps set new_pgs; // pgs to scan on each map new_pgs.insert(pgid); + OSDMapRef curmap(get_map(frommap->get_epoch())); for (epoch_t e = frommap->get_epoch() + 1; e <= tomap->get_epoch(); ++e) { - OSDMapRef curmap(get_map(e-1)); - OSDMapRef nextmap(get_map(e)); + OSDMapRef nextmap(try_get_map(e)); + if (!nextmap) + continue; set even_newer_pgs; // pgs added in this loop for (set::iterator i = new_pgs.begin(); i != new_pgs.end(); ++i) { set split_pgs; @@ -307,7 +309,9 @@ void OSDService::init_splits_between(pg_t pgid, } } new_pgs.insert(even_newer_pgs.begin(), even_newer_pgs.end()); + curmap = nextmap; } + assert(curmap == tomap); // we must have had both frommap and tomap } } @@ -5177,7 +5181,9 @@ void OSD::advance_pg( for (; next_epoch <= osd_epoch; ++next_epoch) { - OSDMapRef nextmap = get_map(next_epoch); + OSDMapRef nextmap = service.try_get_map(next_epoch); + if (!nextmap) + continue; vector newup, newacting; nextmap->pg_to_up_acting_osds(pg->info.pgid, newup, newacting); @@ -5511,7 +5517,7 @@ OSDMapRef OSDService::_add_map(OSDMap *o) return l; } -OSDMapRef OSDService::get_map(epoch_t epoch) +OSDMapRef OSDService::try_get_map(epoch_t epoch) { Mutex::Locker l(map_cache_lock); OSDMapRef retval = map_cache.lookup(epoch); @@ -5524,8 +5530,10 @@ OSDMapRef OSDService::get_map(epoch_t epoch) if (epoch > 0) { dout(20) << "get_map " << epoch << " - loading and decoding " << map << dendl; bufferlist bl; - bool ok = _get_map_bl(epoch, bl); - assert(ok); + if (!_get_map_bl(epoch, bl)) { + delete map; + return OSDMapRef(); + } map->decode(bl); } else { dout(20) << "get_map " << epoch << " - return initial " << map << dendl; diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 04ad4dcd7d7e8..f9ceaf81bf391 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -441,7 +441,12 @@ public: SimpleLRU map_bl_cache; SimpleLRU map_bl_inc_cache; - OSDMapRef get_map(epoch_t e); + OSDMapRef try_get_map(epoch_t e); + OSDMapRef get_map(epoch_t e) { + OSDMapRef ret(try_get_map(e)); + assert(ret); + return ret; + } OSDMapRef add_map(OSDMap *o) { Mutex::Locker l(map_cache_lock); return _add_map(o); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 7373357db1123..9f957b8e05444 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -5032,7 +5032,6 @@ void PG::handle_advance_map(OSDMapRef osdmap, OSDMapRef lastmap, vector& newup, vector& newacting, RecoveryCtx *rctx) { - assert(osdmap->get_epoch() == (lastmap->get_epoch() + 1)); assert(lastmap->get_epoch() == osdmap_ref->get_epoch()); assert(lastmap == osdmap_ref); dout(10) << "handle_advance_map " << newup << "/" << newacting << dendl; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 3a6db4d8315fe..ca3dcc192b0c3 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1141,6 +1141,13 @@ struct pg_history_t { epoch_t last_epoch_clean; // lower bound on last epoch the PG was completely clean. epoch_t last_epoch_split; // as parent + /** + * In the event of a map discontinuity, same_*_since may reflect the first + * map the osd has seen in the new map sequence rather than the actual start + * of the interval. This is ok since a discontinuity at epoch e means there + * must have been a clean interval between e and now and that we cannot be + * in the active set during the interval containing e. + */ epoch_t same_up_since; // same acting set since epoch_t same_interval_since; // same acting AND up set since epoch_t same_primary_since; // same primary at least back through this epoch. -- 2.39.5