From 951fc2fae45a7ee9be1deb204d73c85ff3a9b718 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Thu, 30 May 2013 15:11:58 -0700 Subject: [PATCH] OSD: resurrect a parent if it splits into the pg we want to create When attempting to create a new pg object in response to a peering message, there are 3 cases: 1) That pg is currently being deleted. In this case, we cancel the deletion and resurrect the pg at the epoch at which it had been deleted. 2) A pg is being deleted which would have split into the pg we want to create had it not been deleted. In that case, we resurrect that pg at the map at which it had been deleted and let the request wait on the impending split. 3) Neither that pg nor a parent can be resurrected. In this case, we create a new pg at the map epoch of the peering request. Fixes: #5154 Signed-off-by: Samuel Just --- src/osd/OSD.cc | 193 ++++++++++++++++++++++++++++++++++++++++--------- src/osd/OSD.h | 17 ++++- 2 files changed, 172 insertions(+), 38 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 8a4da41da8a3a..cdfa4d417bacb 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1639,9 +1639,68 @@ void OSD::add_newly_split_pg(PG *pg, PG::RecoveryCtx *rctx) _remove_pg(pg); } +OSD::res_result OSD::_try_resurrect_pg( + OSDMapRef curmap, pg_t pgid, pg_t *resurrected, PGRef *old_pg_state) +{ + assert(resurrected); + assert(old_pg_state); + // find nearest ancestor + DeletingStateRef df; + pg_t cur(pgid); + while (cur.ps()) { + df = service.deleting_pgs.lookup(pgid); + if (df) + break; + cur = cur.get_parent(); + } + if (!df) + return RES_NONE; // good to go + + df->old_pg_state->lock(); + OSDMapRef create_map = df->old_pg_state->get_osdmap(); + df->old_pg_state->unlock(); + + set children; + if (cur == pgid) { + if (df->try_stop_deletion()) { + dout(10) << __func__ << ": halted deletion on pg " << pgid << dendl; + *resurrected = cur; + *old_pg_state = df->old_pg_state; + service.deleting_pgs.remove(pgid); // PG is no longer being removed! + return RES_SELF; + } else { + // raced, ensure we don't see DeletingStateRef when we try to + // delete this pg + service.deleting_pgs.remove(pgid); + return RES_NONE; + } + } else if (cur.is_split(create_map->get_pg_num(cur.pool()), + curmap->get_pg_num(cur.pool()), + &children) && + children.count(pgid)) { + if (df->try_stop_deletion()) { + dout(10) << __func__ << ": halted deletion on ancestor pg " << pgid + << dendl; + *resurrected = cur; + *old_pg_state = df->old_pg_state; + service.deleting_pgs.remove(pgid); // PG is no longer being removed! + return RES_PARENT; + } else { + /* this is not a problem, failing to cancel proves that all objects + * have been removed, so no hobject_t overlap is possible + */ + return RES_NONE; + } + } + return RES_NONE; +} + PG *OSD::_create_lock_pg( OSDMapRef createmap, - pg_t pgid, bool newly_created, bool hold_map_lock, + pg_t pgid, + bool newly_created, + bool hold_map_lock, + bool backfill, int role, vector& up, vector& acting, pg_history_t history, pg_interval_map_t& pi, ObjectStore::Transaction& t) @@ -1651,22 +1710,7 @@ PG *OSD::_create_lock_pg( PG *pg = _open_lock_pg(createmap, pgid, true, hold_map_lock); - DeletingStateRef df = service.deleting_pgs.lookup(pgid); - bool backfill = false; - - if (df && df->try_stop_deletion()) { - dout(10) << __func__ << ": halted deletion on pg " << pgid << dendl; - backfill = true; - service.deleting_pgs.remove(pgid); // PG is no longer being removed! - } else { - if (df) { - // raced, ensure we don't see DeletingStateRef when we try to - // delete this pg - service.deleting_pgs.remove(pgid); - } - // either it's not deleting, or we failed to get to it in time - t.create_collection(coll_t(pgid)); - } + service.init_splits_between(pgid, pg->get_osdmap(), service.get_osdmap()); pg->init(role, up, acting, history, pi, backfill, &t); @@ -1980,8 +2024,6 @@ void OSD::handle_pg_peering_evt( return; } - PG *pg; - if (!_have_pg(info.pgid)) { // same primary? if (!osdmap->have_pg_pool(info.pgid.pool())) @@ -2028,24 +2070,104 @@ void OSD::handle_pg_peering_evt( assert(!info.dne()); // and pg exists if we are hearing about it } - // ok, create PG locally using provided Info and History + // do we need to resurrect a deleting pg? + pg_t resurrected; + PGRef old_pg_state; + res_result result = _try_resurrect_pg( + service.get_osdmap(), + info.pgid, + &resurrected, + &old_pg_state); + PG::RecoveryCtx rctx = create_context(); - pg = _create_lock_pg( - get_map(epoch), - info.pgid, create, false, role, up, acting, history, pi, - *rctx.transaction); - pg->handle_create(&rctx); - pg->write_if_dirty(*rctx.transaction); - dispatch_context(rctx, pg, osdmap); + switch (result) { + case RES_NONE: { + // ok, create the pg locally using provided Info and History + rctx.transaction->create_collection(coll_t(info.pgid)); + PG *pg = _create_lock_pg( + get_map(epoch), + info.pgid, create, false, result == RES_SELF, + role, up, acting, history, pi, + *rctx.transaction); + pg->handle_create(&rctx); + pg->write_if_dirty(*rctx.transaction); + dispatch_context(rctx, pg, osdmap); + + dout(10) << *pg << " is new" << dendl; + + // kick any waiters + wake_pg_waiters(pg->info.pgid); - dout(10) << *pg << " is new" << dendl; + pg->queue_peering_event(evt); + pg->unlock(); + return; + } + case RES_SELF: { + old_pg_state->lock(); + PG *pg = _create_lock_pg( + old_pg_state->get_osdmap(), + resurrected, + false, + false, + true, + old_pg_state->role, + old_pg_state->up, + old_pg_state->acting, + old_pg_state->info.history, + old_pg_state->past_intervals, + *rctx.transaction); + old_pg_state->unlock(); + pg->handle_create(&rctx); + pg->write_if_dirty(*rctx.transaction); + dispatch_context(rctx, pg, osdmap); - // kick any waiters - wake_pg_waiters(pg->info.pgid); + dout(10) << *pg << " is new (resurrected)" << dendl; + // kick any waiters + wake_pg_waiters(pg->info.pgid); + + pg->queue_peering_event(evt); + pg->unlock(); + return; + } + case RES_PARENT: { + assert(old_pg_state); + old_pg_state->lock(); + PG *parent = _create_lock_pg( + old_pg_state->get_osdmap(), + resurrected, + false, + false, + true, + old_pg_state->role, + old_pg_state->up, + old_pg_state->acting, + old_pg_state->info.history, + old_pg_state->past_intervals, + *rctx.transaction + ); + old_pg_state->unlock(); + parent->handle_create(&rctx); + parent->write_if_dirty(*rctx.transaction); + dispatch_context(rctx, parent, osdmap); + + dout(10) << *parent << " is new" << dendl; + + // kick any waiters + wake_pg_waiters(parent->info.pgid); + + assert(service.splitting(info.pgid)); + peering_wait_for_split[info.pgid].push_back(evt); + + //parent->queue_peering_event(evt); + parent->queue_null(osdmap->get_epoch(), osdmap->get_epoch()); + parent->unlock(); + return; + } + } } else { // already had it. did the mapping change? - pg = _lookup_lock_pg(info.pgid); + PG *pg = _lookup_lock_pg(info.pgid); if (epoch < pg->info.history.same_interval_since) { dout(10) << *pg << " get_or_create_pg acting changed in " << pg->info.history.same_interval_since @@ -2053,10 +2175,10 @@ void OSD::handle_pg_peering_evt( pg->unlock(); return; } + pg->queue_peering_event(evt); + pg->unlock(); + return; } - - pg->queue_peering_event(evt); - pg->unlock(); } @@ -5391,10 +5513,11 @@ void OSD::handle_pg_create(OpRequestRef op) if (can_create_pg(pgid)) { pg_interval_map_t pi; pg = _create_lock_pg( - osdmap, pgid, true, false, + osdmap, pgid, true, false, false, 0, creating_pgs[pgid].acting, creating_pgs[pgid].acting, history, pi, *rctx.transaction); + rctx.transaction->create_collection(coll_t(pgid)); pg->info.last_epoch_started = pg->info.history.last_epoch_started; creating_pgs.erase(pgid); wake_pg_waiters(pg->info.pgid); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index a91b657934b77..0e35250b79aa5 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1049,10 +1049,21 @@ protected: PG *_open_lock_pg(OSDMapRef createmap, pg_t pg, bool no_lockdep_check=false, bool hold_map_lock=false); + enum res_result { + RES_PARENT, // resurrected a parent + RES_SELF, // resurrected self + RES_NONE // nothing relevant deleting + }; + res_result _try_resurrect_pg( + OSDMapRef curmap, pg_t pgid, pg_t *resurrected, PGRef *old_pg_state); PG *_create_lock_pg(OSDMapRef createmap, - pg_t pgid, bool newly_created, - bool hold_map_lock, int role, - vector& up, vector& acting, + pg_t pgid, + bool newly_created, + bool hold_map_lock, + bool backfill, + int role, + vector& up, + vector& acting, pg_history_t history, pg_interval_map_t& pi, ObjectStore::Transaction& t); -- 2.39.5