From ae210722b4a3eea256ca3379ec1388c94e224695 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 3 Jan 2018 11:48:37 -0600 Subject: [PATCH] osd: prime pg_slots for to-be-split children Once we know which PGs are about to be created, we instantiate their pg_slot and mark them waiting_pg, which blocks all incoming events until the split completes, the PG is installed, and we call wake_pg_waiters(). Signed-off-by: Sage Weil --- src/osd/OSD.cc | 15 +++++++++++++++ src/osd/OSD.h | 3 +++ 2 files changed, 18 insertions(+) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 427418672c2..c26c26af975 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -8149,6 +8149,9 @@ void OSD::split_pgs( OSDMapRef nextmap, PG::RecoveryCtx *rctx) { + // make sure to-be-split children are blocked in wq + op_shardedwq.prime_splits(childpgids); + unsigned pg_num = nextmap->get_pg_num( parent->pg_id.pool()); parent->update_snap_mapper_bits( @@ -9477,6 +9480,18 @@ void OSD::ShardedOpWQ::wake_pg_waiters(spg_t pgid) } } +void OSD::ShardedOpWQ::prime_splits(const set& pgs) +{ + dout(20) << __func__ << " " << pgs << dendl; + for (auto pgid : pgs) { + unsigned shard_index = pgid.hash_to_shard(shard_list.size()); + ShardData* sdata = shard_list[shard_index]; + Mutex::Locker l(sdata->sdata_op_ordering_lock); + ShardData::pg_slot& slot = sdata->pg_slots[pgid]; + slot.waiting_for_pg = true; + } +} + void OSD::ShardedOpWQ::prune_pg_waiters(OSDMapRef osdmap, int whoami) { unsigned pushes_to_free = 0; diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 83df4fd5031..1b16133fc6a 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1680,6 +1680,9 @@ private: void _wake_pg_slot(spg_t pgid, ShardData *sdata, ShardData::pg_slot& slot, unsigned *pushes_to_free); + /// prime slots for splitting pgs + void prime_splits(const set& pgs); + /// prune ops (and possibly pg_slots) for pgs that shouldn't be here void prune_pg_waiters(OSDMapRef osdmap, int whoami); -- 2.39.5