From: Sage Weil Date: Mon, 31 Dec 2018 17:05:03 +0000 (-0600) Subject: osd: reliably send pg_created messages to the mon X-Git-Tag: v14.1.0~526^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F25731%2Fhead;p=ceph.git osd: reliably send pg_created messages to the mon The OSD has to reliably deliver a pg_created message to the mon in order for the mon to clear the pool's CREATING flag. Previously, a mon connection reset would drop the message. Restructure this to: - queue a message any time a PG peers and the pool as the CREATING flag - track pending messages in OSDService - resend on mon connect - prune messages for pools that no longer have the CREATING flag This new strategy can result in resends of these messages to the mon in cases where the mon already knows the PG was created. However, pool creation is rare, and these extra messages are cheap. And we can avoid this overhead if we like by limiting the number of PGs that the mon can create explicitly if we choose (by lowering mon_osd_max_initial_pgs). Fixes: http://tracker.ceph.com/issues/37775 Signed-off-by: Sage Weil --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 287a1ed0b9ca..c3337d301cc7 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1028,12 +1028,46 @@ void OSDService::send_pg_temp() void OSDService::send_pg_created(pg_t pgid) { + std::lock_guard l(pg_created_lock); dout(20) << __func__ << dendl; - if (osdmap->require_osd_release >= CEPH_RELEASE_LUMINOUS) { + auto o = get_osdmap(); + if (o->require_osd_release >= CEPH_RELEASE_LUMINOUS) { + pg_created.insert(pgid); monc->send_mon_message(new MOSDPGCreated(pgid)); } } +void OSDService::send_pg_created() +{ + std::lock_guard l(pg_created_lock); + dout(20) << __func__ << dendl; + auto o = get_osdmap(); + if (o->require_osd_release >= CEPH_RELEASE_LUMINOUS) { + for (auto pgid : pg_created) { + monc->send_mon_message(new MOSDPGCreated(pgid)); + } + } +} + +void OSDService::prune_pg_created() +{ + std::lock_guard l(pg_created_lock); + dout(20) << __func__ << dendl; + auto o = get_osdmap(); + auto i = pg_created.begin(); + while (i != pg_created.end()) { + auto p = o->get_pg_pool(i->pool()); + if (!p || !p->has_flag(pg_pool_t::FLAG_CREATING)) { + dout(20) << __func__ << " pruning " << *i << dendl; + i = pg_created.erase(i); + } else { + dout(20) << __func__ << " keeping " << *i << dendl; + ++i; + } + } +} + + // -------------------------------------- // dispatch @@ -5395,6 +5429,7 @@ void OSD::ms_handle_connect(Connection *con) service.clear_sent_ready_to_merge(); service.send_pg_temp(); service.send_ready_to_merge(); + service.send_pg_created(); requeue_failures(); send_failures(); @@ -8373,6 +8408,8 @@ void OSD::consume_map() ceph_assert(merge_pgs.empty()); } + service.prune_pg_created(); + unsigned pushes_to_free = 0; for (auto& shard : shards) { shard->consume_map(osdmap, &pushes_to_free); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 358c3fd1bc63..d1c16d6bf332 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -748,7 +748,11 @@ public: void requeue_pg_temp(); void send_pg_temp(); + ceph::mutex pg_created_lock = ceph::make_mutex("OSDService::pg_created_lock"); + set pg_created; void send_pg_created(pg_t pgid); + void prune_pg_created(); + void send_pg_created(); AsyncReserver snap_reserver; void queue_recovery_context(PG *pg, GenContext *c); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 6ac9b4b9849f..88b84b9423e0 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -3376,10 +3376,6 @@ void PG::publish_stats_to_osd() if ((state & (PG_STATE_ACTIVE|PG_STATE_PEERED)) && !(info.stats.state & (PG_STATE_ACTIVE|PG_STATE_PEERED))) info.stats.last_became_peered = now; - if (!(state & PG_STATE_CREATING) && - (info.stats.state & PG_STATE_CREATING)) { - osd->send_pg_created(get_pgid().pgid); - } info.stats.state = state; } @@ -8477,8 +8473,7 @@ boost::statechart::result PG::RecoveryState::Active::react(const AllReplicasActi pg->state_set(PG_STATE_ACTIVE); } - // info.last_epoch_started is set during activate() - if (pg->info.history.last_epoch_started == 0) { + if (pg->pool.info.has_flag(pg_pool_t::FLAG_CREATING)) { pg->osd->send_pg_created(pgid); }