From 7ef7a13a9b4a020e42e7d73ab20089bceb083ac7 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Tue, 27 Feb 2024 17:56:52 -0800 Subject: [PATCH] mon/OSDMonitor: always create pgs in the epoch in which the pool was created The logic here didn't actually work. If update_pending_pgs doesn't get to a particular PG (because of mon_osd_max_creating_pgs limitation) in the epoch in which the pool was created, the pg will end up with a same_interval_since at that epoch rather than the pool creation epoch. This can cause an IO submitted by a client based on the epoch in which the pool was created to be rejected by the OSD without an interval change actually occuring and therefore without the client resending the op. In order to make this limit actually function, once we actually process a pending pg, we'd have to go back to the OSDMap at which the pool was created and work forward to get the correct interval bound. That seems even more expensive, so instead this patch simply removes the limit. Fixes: https://tracker.ceph.com/issues/64546 Signed-off-by: Samuel Just --- src/common/options/mon.yaml.in | 8 -------- src/mon/OSDMonitor.cc | 17 +++++------------ 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in index ff8813c982f94..947343799e555 100644 --- a/src/common/options/mon.yaml.in +++ b/src/common/options/mon.yaml.in @@ -1249,14 +1249,6 @@ options: services: - mon with_legacy: true -- name: mon_osd_max_creating_pgs - type: int - level: advanced - desc: maximum number of PGs the mon will create at once - default: 1024 - services: - - mon - with_legacy: true - name: mon_osd_max_initial_pgs type: int level: advanced diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index c3429b5fd2ed0..27347107c0cf3 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1251,10 +1251,8 @@ OSDMonitor::update_pending_pgs(const OSDMap::Incremental& inc, } // process queue - unsigned max = std::max(1, g_conf()->mon_osd_max_creating_pgs); const auto total = pending_creatings.pgs.size(); - while (pending_creatings.pgs.size() < max && - !pending_creatings.queue.empty()) { + while (!pending_creatings.queue.empty()) { auto p = pending_creatings.queue.begin(); int64_t poolid = p->first; dout(10) << __func__ << " pool " << poolid @@ -1262,21 +1260,16 @@ OSDMonitor::update_pending_pgs(const OSDMap::Incremental& inc, << " modified " << p->second.modified << " [" << p->second.start << "-" << p->second.end << ")" << dendl; - int64_t n = std::min(max - pending_creatings.pgs.size(), - p->second.end - p->second.start); - ps_t first = p->second.start; - ps_t end = first + n; - for (ps_t ps = first; ps < end; ++ps) { + for (ps_t ps = p->second.start; ps < p->second.end; ++ps) { const pg_t pgid{ps, static_cast(poolid)}; - // NOTE: use the *current* epoch as the PG creation epoch so that the - // OSD does not have to generate a long set of PastIntervals. + // The current epoch must be the pool creation epoch pending_creatings.pgs.emplace( pgid, - creating_pgs_t::pg_create_info(inc.epoch, + creating_pgs_t::pg_create_info(p->second.created, p->second.modified)); dout(10) << __func__ << " adding " << pgid << dendl; } - p->second.start = end; + p->second.start = p->second.end; if (p->second.done()) { dout(10) << __func__ << " done with queue for " << poolid << dendl; pending_creatings.queue.erase(p); -- 2.39.5