]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mon/OSDMonitor: always create pgs in the epoch in which the pool was created
authorSamuel Just <sjust@redhat.com>
Wed, 28 Feb 2024 01:56:52 +0000 (17:56 -0800)
committerSamuel Just <sjust@redhat.com>
Wed, 6 Mar 2024 20:55:28 +0000 (12:55 -0800)
The logic here didn't actually work.  If update_pending_pgs doesn't get
to a particular PG (because of mon_osd_max_creating_pgs limitation) in
the epoch in which the pool was created, the pg will end up with a
same_interval_since at that epoch rather than the pool creation epoch.
This can cause an IO submitted by a client based on the epoch in which
the pool was created to be rejected by the OSD without an interval
change actually occuring and therefore without the client resending the
op.

In order to make this limit actually function, once we actually process
a pending pg, we'd have to go back to the OSDMap at which the pool was
created and work forward to get the correct interval bound.  That seems
even more expensive, so instead this patch simply removes the limit.

Fixes: https://tracker.ceph.com/issues/64546
Signed-off-by: Samuel Just <sjust@redhat.com>
src/common/options/mon.yaml.in
src/mon/OSDMonitor.cc

index ff8813c982f940c38c51597639d1a5ee7fa0e558..947343799e55543b752bb5d7c295ecd63eb2b3b7 100644 (file)
@@ -1249,14 +1249,6 @@ options:
   services:
   - mon
   with_legacy: true
-- name: mon_osd_max_creating_pgs
-  type: int
-  level: advanced
-  desc: maximum number of PGs the mon will create at once
-  default: 1024
-  services:
-  - mon
-  with_legacy: true
 - name: mon_osd_max_initial_pgs
   type: int
   level: advanced
index c3429b5fd2ed0a410de55481c2ecf107a20dfe76..27347107c0cf36d9553870cf69472369dd910ec9 100644 (file)
@@ -1251,10 +1251,8 @@ OSDMonitor::update_pending_pgs(const OSDMap::Incremental& inc,
   }
 
   // process queue
-  unsigned max = std::max<int64_t>(1, g_conf()->mon_osd_max_creating_pgs);
   const auto total = pending_creatings.pgs.size();
-  while (pending_creatings.pgs.size() < max &&
-        !pending_creatings.queue.empty()) {
+  while (!pending_creatings.queue.empty()) {
     auto p = pending_creatings.queue.begin();
     int64_t poolid = p->first;
     dout(10) << __func__ << " pool " << poolid
@@ -1262,21 +1260,16 @@ OSDMonitor::update_pending_pgs(const OSDMap::Incremental& inc,
             << " modified " << p->second.modified
             << " [" << p->second.start << "-" << p->second.end << ")"
             << dendl;
-    int64_t n = std::min<int64_t>(max - pending_creatings.pgs.size(),
-                                 p->second.end - p->second.start);
-    ps_t first = p->second.start;
-    ps_t end = first + n;
-    for (ps_t ps = first; ps < end; ++ps) {
+    for (ps_t ps = p->second.start; ps < p->second.end; ++ps) {
       const pg_t pgid{ps, static_cast<uint64_t>(poolid)};
-      // NOTE: use the *current* epoch as the PG creation epoch so that the
-      // OSD does not have to generate a long set of PastIntervals.
+      // The current epoch must be the pool creation epoch
       pending_creatings.pgs.emplace(
        pgid,
-       creating_pgs_t::pg_create_info(inc.epoch,
+       creating_pgs_t::pg_create_info(p->second.created,
                                       p->second.modified));
       dout(10) << __func__ << " adding " << pgid << dendl;
     }
-    p->second.start = end;
+    p->second.start = p->second.end;
     if (p->second.done()) {
       dout(10) << __func__ << " done with queue for " << poolid << dendl;
       pending_creatings.queue.erase(p);