_remove_pg(pg);
}
+OSD::res_result OSD::_try_resurrect_pg(
+ OSDMapRef curmap, pg_t pgid, pg_t *resurrected, PGRef *old_pg_state)
+{
+ assert(resurrected);
+ assert(old_pg_state);
+ // find nearest ancestor
+ DeletingStateRef df;
+ pg_t cur(pgid);
+ while (cur.ps()) {
+ df = service.deleting_pgs.lookup(pgid);
+ if (df)
+ break;
+ cur = cur.get_parent();
+ }
+ if (!df)
+ return RES_NONE; // good to go
+
+ df->old_pg_state->lock();
+ OSDMapRef create_map = df->old_pg_state->get_osdmap();
+ df->old_pg_state->unlock();
+
+ set<pg_t> children;
+ if (cur == pgid) {
+ if (df->try_stop_deletion()) {
+ dout(10) << __func__ << ": halted deletion on pg " << pgid << dendl;
+ *resurrected = cur;
+ *old_pg_state = df->old_pg_state;
+ service.deleting_pgs.remove(pgid); // PG is no longer being removed!
+ return RES_SELF;
+ } else {
+ // raced, ensure we don't see DeletingStateRef when we try to
+ // delete this pg
+ service.deleting_pgs.remove(pgid);
+ return RES_NONE;
+ }
+ } else if (cur.is_split(create_map->get_pg_num(cur.pool()),
+ curmap->get_pg_num(cur.pool()),
+ &children) &&
+ children.count(pgid)) {
+ if (df->try_stop_deletion()) {
+ dout(10) << __func__ << ": halted deletion on ancestor pg " << pgid
+ << dendl;
+ *resurrected = cur;
+ *old_pg_state = df->old_pg_state;
+ service.deleting_pgs.remove(pgid); // PG is no longer being removed!
+ return RES_PARENT;
+ } else {
+ /* this is not a problem, failing to cancel proves that all objects
+ * have been removed, so no hobject_t overlap is possible
+ */
+ return RES_NONE;
+ }
+ }
+ return RES_NONE;
+}
+
PG *OSD::_create_lock_pg(
OSDMapRef createmap,
- pg_t pgid, bool newly_created, bool hold_map_lock,
+ pg_t pgid,
+ bool newly_created,
+ bool hold_map_lock,
+ bool backfill,
int role, vector<int>& up, vector<int>& acting, pg_history_t history,
pg_interval_map_t& pi,
ObjectStore::Transaction& t)
PG *pg = _open_lock_pg(createmap, pgid, true, hold_map_lock);
- DeletingStateRef df = service.deleting_pgs.lookup(pgid);
- bool backfill = false;
-
- if (df && df->try_stop_deletion()) {
- dout(10) << __func__ << ": halted deletion on pg " << pgid << dendl;
- backfill = true;
- service.deleting_pgs.remove(pgid); // PG is no longer being removed!
- } else {
- if (df) {
- // raced, ensure we don't see DeletingStateRef when we try to
- // delete this pg
- service.deleting_pgs.remove(pgid);
- }
- // either it's not deleting, or we failed to get to it in time
- t.create_collection(coll_t(pgid));
- }
+ service.init_splits_between(pgid, pg->get_osdmap(), service.get_osdmap());
pg->init(role, up, acting, history, pi, backfill, &t);
return;
}
- PG *pg;
-
if (!_have_pg(info.pgid)) {
// same primary?
if (!osdmap->have_pg_pool(info.pgid.pool()))
assert(!info.dne()); // and pg exists if we are hearing about it
}
- // ok, create PG locally using provided Info and History
+ // do we need to resurrect a deleting pg?
+ pg_t resurrected;
+ PGRef old_pg_state;
+ res_result result = _try_resurrect_pg(
+ service.get_osdmap(),
+ info.pgid,
+ &resurrected,
+ &old_pg_state);
+
PG::RecoveryCtx rctx = create_context();
- pg = _create_lock_pg(
- get_map(epoch),
- info.pgid, create, false, role, up, acting, history, pi,
- *rctx.transaction);
- pg->handle_create(&rctx);
- pg->write_if_dirty(*rctx.transaction);
- dispatch_context(rctx, pg, osdmap);
+ switch (result) {
+ case RES_NONE: {
+ // ok, create the pg locally using provided Info and History
+ rctx.transaction->create_collection(coll_t(info.pgid));
+ PG *pg = _create_lock_pg(
+ get_map(epoch),
+ info.pgid, create, false, result == RES_SELF,
+ role, up, acting, history, pi,
+ *rctx.transaction);
+ pg->handle_create(&rctx);
+ pg->write_if_dirty(*rctx.transaction);
+ dispatch_context(rctx, pg, osdmap);
+
+ dout(10) << *pg << " is new" << dendl;
+
+ // kick any waiters
+ wake_pg_waiters(pg->info.pgid);
- dout(10) << *pg << " is new" << dendl;
+ pg->queue_peering_event(evt);
+ pg->unlock();
+ return;
+ }
+ case RES_SELF: {
+ old_pg_state->lock();
+ PG *pg = _create_lock_pg(
+ old_pg_state->get_osdmap(),
+ resurrected,
+ false,
+ false,
+ true,
+ old_pg_state->role,
+ old_pg_state->up,
+ old_pg_state->acting,
+ old_pg_state->info.history,
+ old_pg_state->past_intervals,
+ *rctx.transaction);
+ old_pg_state->unlock();
+ pg->handle_create(&rctx);
+ pg->write_if_dirty(*rctx.transaction);
+ dispatch_context(rctx, pg, osdmap);
- // kick any waiters
- wake_pg_waiters(pg->info.pgid);
+ dout(10) << *pg << " is new (resurrected)" << dendl;
+ // kick any waiters
+ wake_pg_waiters(pg->info.pgid);
+
+ pg->queue_peering_event(evt);
+ pg->unlock();
+ return;
+ }
+ case RES_PARENT: {
+ assert(old_pg_state);
+ old_pg_state->lock();
+ PG *parent = _create_lock_pg(
+ old_pg_state->get_osdmap(),
+ resurrected,
+ false,
+ false,
+ true,
+ old_pg_state->role,
+ old_pg_state->up,
+ old_pg_state->acting,
+ old_pg_state->info.history,
+ old_pg_state->past_intervals,
+ *rctx.transaction
+ );
+ old_pg_state->unlock();
+ parent->handle_create(&rctx);
+ parent->write_if_dirty(*rctx.transaction);
+ dispatch_context(rctx, parent, osdmap);
+
+ dout(10) << *parent << " is new" << dendl;
+
+ // kick any waiters
+ wake_pg_waiters(parent->info.pgid);
+
+ assert(service.splitting(info.pgid));
+ peering_wait_for_split[info.pgid].push_back(evt);
+
+ //parent->queue_peering_event(evt);
+ parent->queue_null(osdmap->get_epoch(), osdmap->get_epoch());
+ parent->unlock();
+ return;
+ }
+ }
} else {
// already had it. did the mapping change?
- pg = _lookup_lock_pg(info.pgid);
+ PG *pg = _lookup_lock_pg(info.pgid);
if (epoch < pg->info.history.same_interval_since) {
dout(10) << *pg << " get_or_create_pg acting changed in "
<< pg->info.history.same_interval_since
pg->unlock();
return;
}
+ pg->queue_peering_event(evt);
+ pg->unlock();
+ return;
}
-
- pg->queue_peering_event(evt);
- pg->unlock();
}
if (can_create_pg(pgid)) {
pg_interval_map_t pi;
pg = _create_lock_pg(
- osdmap, pgid, true, false,
+ osdmap, pgid, true, false, false,
0, creating_pgs[pgid].acting, creating_pgs[pgid].acting,
history, pi,
*rctx.transaction);
+ rctx.transaction->create_collection(coll_t(pgid));
pg->info.last_epoch_started = pg->info.history.last_epoch_started;
creating_pgs.erase(pgid);
wake_pg_waiters(pg->info.pgid);