]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr: add simple controller to adjust pg[p]_num_actual
authorSage Weil <sage@redhat.com>
Fri, 6 Apr 2018 15:26:10 +0000 (10:26 -0500)
committerSage Weil <sage@redhat.com>
Fri, 7 Sep 2018 17:08:40 +0000 (12:08 -0500)
This is a pretty trivial controller.  It adds some constraints that were
obviously not there before when the user could set these values to anything
they wanted, but does not implement all of the "nice" stepping that we'll
eventually want.  That can come later.

Splits:
- throttle pg_num increases, currently using the same config option
(mon_osd_max_creating_pgs) that we used to throttle pg creation
- do not increase pg_num until the initial pg creation has completed.

Merges:
- wait until the source and target pgs for merge are active and clean
before doing a merge.

Adjust pgp_num all at once for now.

Signed-off-by: Sage Weil <sage@redhat.com>
src/mgr/DaemonServer.cc
src/mgr/DaemonServer.h

index 14e7db45d5ed94e9a0f754c7511e7bb7a5a31de2..7ea22bc9a3fbe5c33b0d24e40ace0730d8b93721 100644 (file)
@@ -352,6 +352,7 @@ void DaemonServer::tick()
 {
   dout(10) << dendl;
   send_report();
+  adjust_pgs();
 
   schedule_tick_locked(
     g_conf().get_val<std::chrono::seconds>("mgr_tick_period").count());
@@ -2214,6 +2215,182 @@ void DaemonServer::send_report()
   monc->send_mon_message(m);
 }
 
+void DaemonServer::adjust_pgs()
+{
+  dout(20) << dendl;
+  unsigned max = std::max<int64_t>(1, g_conf()->mon_osd_max_creating_pgs);
+
+  map<string,unsigned> pg_num_to_set;
+  map<string,unsigned> pgp_num_to_set;
+  cluster_state.with_pgmap([&](const PGMap& pg_map) {
+      unsigned creating_or_unknown = 0;
+      for (auto& i : pg_map.num_pg_by_state) {
+       if ((i.first & (PG_STATE_CREATING)) ||
+           i.first == 0) {
+         creating_or_unknown += i.second;
+       }
+      }
+      unsigned left = max;
+      if (creating_or_unknown >= max) {
+       return;
+      }
+      dout(10) << "creating_or_unknown " << creating_or_unknown
+              << " max_creating " << max << dendl;
+      cluster_state.with_osdmap([&](const OSDMap& osdmap) {
+         if (pg_map.last_osdmap_epoch != osdmap.get_epoch()) {
+           // do nothing if maps aren't in sync
+           dout(10) << "last_osdmap_epoch " << pg_map.last_osdmap_epoch
+                    << " osdmap " << osdmap.get_epoch() << dendl;
+           //return;
+         }
+         for (auto& i : osdmap.get_pools()) {
+           const pg_pool_t& p = i.second;
+
+           // adjust pg_num?
+           if (p.get_pg_num_target() != p.get_pg_num()) {
+             dout(20) << "pool " << i.first
+                      << " pg_num " << p.get_pg_num()
+                      << " target " << p.get_pg_num_target()
+                      << dendl;
+             if (p.get_pg_num() != p.get_pg_num_pending()) {
+               dout(10) << "pool " << i.first
+                        << " target " << p.get_pg_num_target()
+                        << " pg_num " << p.get_pg_num()
+                        << " - pg_num_pending != pg_num, waiting"
+                        << dendl;
+               // FIXME: we might consider allowing pg_num increases without
+               // waiting for the previously planned merge to complete.
+             } else if (p.get_pg_num_target() < p.get_pg_num()) {
+               // pg_num decrease (merge)
+               pg_t merge_source(p.get_pg_num() - 1, i.first);
+               pg_t merge_target = merge_source.get_parent();
+               bool ok = true;
+               auto q = pg_map.pg_stat.find(merge_source);
+               if (q == pg_map.pg_stat.end()) {
+                 dout(10) << "pool " << i.first
+                          << " target " << p.get_pg_num_target()
+                          << " pg_num " << p.get_pg_num()
+                          << " - no state for " << merge_source
+                          << " (merge source)"
+                          << dendl;
+                 ok = false;
+               } else if (!(q->second.state & (PG_STATE_ACTIVE |
+                                               PG_STATE_CLEAN))) {
+                 dout(10) << "pool " << i.first
+                          << " target " << p.get_pg_num_target()
+                          << " pg_num " << p.get_pg_num()
+                          << " - merge source " << merge_source
+                          << " not clean (" << pg_state_string(q->second.state)
+                          << ")" << dendl;
+                 ok = false;
+               }
+               q = pg_map.pg_stat.find(merge_target);
+               if (q == pg_map.pg_stat.end()) {
+                 dout(10) << "pool " << i.first
+                          << " target " << p.get_pg_num_target()
+                          << " pg_num " << p.get_pg_num()
+                          << " - no state for " << merge_target
+                          << " (merge target)"
+                          << dendl;
+                 ok = false;
+               } else if (!(q->second.state & (PG_STATE_ACTIVE |
+                                               PG_STATE_CLEAN))) {
+                 dout(10) << "pool " << i.first
+                          << " target " << p.get_pg_num_target()
+                          << " pg_num " << p.get_pg_num()
+                          << " - merge target " << merge_target
+                          << " not clean (" << pg_state_string(q->second.state)
+                          << ")" << dendl;
+                 ok = false;
+               }
+               if (ok) {
+                 unsigned target = p.get_pg_num() - 1;
+                 dout(10) << "pool " << i.first
+                          << " target " << p.get_pg_num_target()
+                          << " pg_num " << p.get_pg_num()
+                          << " -> " << target
+                          << " (merging " << merge_source
+                          << " and " << merge_target
+                          << ")" << dendl;
+                 pg_num_to_set[osdmap.get_pool_name(i.first)] = target;
+               }
+             } else if (p.get_pg_num_target() > p.get_pg_num()) {
+               // pg_num increase (split)
+               bool active = true;
+               auto q = pg_map.num_pg_by_pool_state.find(i.first);
+               if (q != pg_map.num_pg_by_pool_state.end()) {
+                 for (auto& j : q->second) {
+                   if ((j.first & (PG_STATE_ACTIVE|PG_STATE_PEERED)) == 0) {
+                     dout(20) << "pool " << i.first << " has " << j.second
+                              << " pgs in " << pg_state_string(j.first)
+                              << dendl;
+                     active = false;
+                   }
+                 }
+               } else {
+                 active = false;
+               }
+               if (!active) {
+                 dout(10) << "pool " << i.first
+                          << " target " << p.get_pg_num_target()
+                          << " pg_num " << p.get_pg_num()
+                          << " - not all pgs active"
+                          << dendl;
+               } else {
+                 unsigned add = std::min(
+                   left,
+                   p.get_pg_num_target() - p.get_pg_num());
+                 unsigned target = p.get_pg_num() + add;
+                 left -= add;
+                 dout(10) << "pool " << i.first
+                          << " target " << p.get_pg_num_target()
+                          << " pg_num " << p.get_pg_num()
+                          << " -> " << target << dendl;
+                 pg_num_to_set[osdmap.get_pool_name(i.first)] = target;
+               }
+             }
+           }
+
+           // adjust pgp_num?
+           unsigned target = std::min(p.get_pg_num_pending(),
+                                      p.get_pgp_num_target());
+           if (target != p.get_pgp_num()) {
+             // FIXME: we should throttle this to limit mispalced objects, like
+             // we do in the balancer module.
+             dout(10) << "pool " << i.first
+                      << " pgp target " << p.get_pgp_num_target()
+                      << " pgp_num " << p.get_pgp_num()
+                      << " -> " << target << dendl;
+             pgp_num_to_set[osdmap.get_pool_name(i.first)] = target;
+           }
+           if (left == 0) {
+             return;
+           }
+         }
+       });
+    });
+  for (auto i : pg_num_to_set) {
+    const string cmd =
+      "{"
+      "\"prefix\": \"osd pool set\", "
+      "\"pool\": \"" + i.first + "\", "
+      "\"var\": \"pg_num_actual\", "
+      "\"val\": \"" + stringify(i.second) + "\""
+      "}";
+    monc->start_mon_command({cmd}, {}, nullptr, nullptr, nullptr);
+  }
+  for (auto i : pgp_num_to_set) {
+    const string cmd =
+      "{"
+      "\"prefix\": \"osd pool set\", "
+      "\"pool\": \"" + i.first + "\", "
+      "\"var\": \"pgp_num_actual\", "
+      "\"val\": \"" + stringify(i.second) + "\""
+      "}";
+    monc->start_mon_command({cmd}, {}, nullptr, nullptr, nullptr);
+  }
+}
+
 void DaemonServer::got_service_map()
 {
   Mutex::Locker l(lock);
index 419a00ddf47fb553e70a9c9f3d24173062e08114..aa522612b0b8ca09b02d97eff3c2312564d25d2d 100644 (file)
@@ -150,6 +150,7 @@ public:
   void send_report();
   void got_service_map();
   void got_mgr_map();
+  void adjust_pgs();
 
   void _send_configure(ConnectionRef c);