From 019b28b6397dbc3111faa6a8eb57349a48f408d8 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Mon, 20 Apr 2015 23:45:57 -0700 Subject: [PATCH] OSD: handle the case where we resurrected an old, deleted pg Prior to giant, we would skip pgs in load_pgs which were not present in the current osdmap. Those pgs would eventually refer to very old osdmaps, which we no longer have causing the assertion failure in 11429 once the osd is finally upgraded to a version which does not skip the pgs. Instead, if we do not have the map for the pg epoch, complain to the osd log and skip the pg. Fixes: 11429 Signed-off-by: Samuel Just (cherry picked from commit fbfd50de5b9b40d71d2e768418a8eca28b1afaca) Conflicts: src/osd/OSD.cc resolved by add a new comment line --- src/osd/OSD.cc | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 291e6f6f19b31..c0f4bdd0e8ebb 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2114,7 +2114,29 @@ void OSD::load_pgs() bufferlist bl; epoch_t map_epoch = PG::peek_map_epoch(store, coll_t(pgid), service.infos_oid, &bl); - PG *pg = _open_lock_pg(map_epoch == 0 ? osdmap : service.get_map(map_epoch), pgid); + PG *pg = NULL; + if (map_epoch > 0) { + OSDMapRef pgosdmap = service.try_get_map(map_epoch); + if (!pgosdmap) { + if (!osdmap->have_pg_pool(pgid.pool())) { + derr << __func__ << ": could not find map for epoch " << map_epoch + << " on pg " << pgid << ", but the pool is not present in the " + << "current map, so this is probably a result of bug 10617. " + << "Skipping the pg for now, you can use ceph_objectstore_tool " + << "to clean it up later." << dendl; + continue; + } else { + derr << __func__ << ": have pgid " << pgid << " at epoch " + << map_epoch << ", but missing map. Crashing." + << dendl; + assert(0 == "Missing map in load_pgs"); + } + } + pg = _open_lock_pg(pgosdmap, pgid); + } else { + pg = _open_lock_pg(osdmap, pgid); + } + // there can be no waiters here, so we don't call wake_pg_waiters // read pg state, log pg->read_state(store, bl); -- 2.39.5