service.await_reserved_maps();
service.publish_map(osdmap);
- int num_pg_primary = 0, num_pg_replica = 0, num_pg_stray = 0;
-
- unsigned pushes_to_free = 0;
+ // prime splits
set<spg_t> newly_split;
for (auto& shard : shards) {
- shard->consume_map(osdmap, &pushes_to_free, &newly_split);
+ shard->identify_splits(osdmap, &newly_split);
}
if (!newly_split.empty()) {
for (auto& shard : shards) {
assert(newly_split.empty());
}
+ unsigned pushes_to_free = 0;
+ for (auto& shard : shards) {
+ shard->consume_map(osdmap, &pushes_to_free);
+ }
+
vector<spg_t> pgids;
_get_pgids(&pgids);
// count (FIXME)
+ int num_pg_primary = 0, num_pg_replica = 0, num_pg_stray = 0;
vector<PGRef> pgs;
_get_pgs(&pgs);
for (auto& pg : pgs) {
void OSDShard::consume_map(
OSDMapRef& new_osdmap,
- unsigned *pushes_to_free,
- set<spg_t> *new_children)
+ unsigned *pushes_to_free)
{
Mutex::Locker l(sdata_op_ordering_lock);
OSDMapRef old_osdmap;
OSDShardPGSlot *slot = p->second.get();
const spg_t& pgid = p->first;
dout(20) << __func__ << " " << pgid << dendl;
- if (old_osdmap &&
- (slot->pg || slot->waiting_for_split)) {
- // only prime children for parent slots that are attached to a
- // pg or are waiting_for_split (because their ancestor is
- // attached to a pg).
- osd->service.identify_split_children(old_osdmap, new_osdmap, pgid,
- new_children);
- }
if (slot->waiting_for_split) {
dout(20) << __func__ << " " << pgid
<< " waiting for split" << dendl;
}
++p;
}
- _prime_splits(new_children);
if (queued) {
sdata_lock.Lock();
sdata_cond.SignalOne();
++slot->requeue_seq;
}
+void OSDShard::identify_splits(OSDMapRef as_of_osdmap, set<spg_t> *pgids)
+{
+ Mutex::Locker l(sdata_op_ordering_lock);
+ if (osdmap) {
+ for (auto& i : pg_slots) {
+ const spg_t& pgid = i.first;
+ auto *slot = i.second.get();
+ if (slot->pg || slot->waiting_for_split) {
+ osd->service.identify_split_children(osdmap, as_of_osdmap, pgid, pgids);
+ }
+ }
+ }
+}
+
void OSDShard::prime_splits(OSDMapRef as_of_osdmap, set<spg_t> *pgids)
{
Mutex::Locker l(sdata_op_ordering_lock);
discard any slots with no pg (and not waiting_for_split) that no
longer map to the current host.
- Some notes:
-
- - There is theoretical race between query (which can proceed if the pg doesn't
- exist) and split (which may materialize a PG in a different shard):
- - osd has epoch E
- - shard 0 processes notify on P from epoch E-1
- - shard 0 identifies P splits to P+C in epoch E
- - shard 1 receives query for P (epoch E), returns DNE
- - shard 1 installs C in shard 0 with waiting_for_split
-
- This can't really be fixed without ordering queries over all shards. In
- practice, it is very unlikely to occur, since only the primary sends a
- notify (or other creating event) and only the primary who sends a query.
- Even if it does happen, the instantiated P is empty, so reporting DNE vs
- empty C is minimal harm.
-
*/
struct OSDShardPGSlot {
/// push osdmap into shard
void consume_map(
OSDMapRef& osdmap,
- unsigned *pushes_to_free,
- set<spg_t> *new_children);
+ unsigned *pushes_to_free);
void _wake_pg_slot(spg_t pgid, OSDShardPGSlot *slot);
+ void identify_splits(OSDMapRef as_of_osdmap, set<spg_t> *pgids);
void _prime_splits(set<spg_t> *pgids);
void prime_splits(OSDMapRef as_of_osdmap, set<spg_t> *pgids);
void register_and_wake_split_child(PG *pg);