From: Sage Weil Date: Fri, 6 Apr 2018 15:26:10 +0000 (-0500) Subject: mgr: add simple controller to adjust pg[p]_num_actual X-Git-Tag: v14.0.1~371^2~51 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=a5274c75e263dd61d9569cfb9dd730cf7921d1e6;p=ceph-ci.git mgr: add simple controller to adjust pg[p]_num_actual This is a pretty trivial controller. It adds some constraints that were obviously not there before when the user could set these values to anything they wanted, but does not implement all of the "nice" stepping that we'll eventually want. That can come later. Splits: - throttle pg_num increases, currently using the same config option (mon_osd_max_creating_pgs) that we used to throttle pg creation - do not increase pg_num until the initial pg creation has completed. Merges: - wait until the source and target pgs for merge are active and clean before doing a merge. Adjust pgp_num all at once for now. Signed-off-by: Sage Weil --- diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 14e7db45d5e..7ea22bc9a3f 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -352,6 +352,7 @@ void DaemonServer::tick() { dout(10) << dendl; send_report(); + adjust_pgs(); schedule_tick_locked( g_conf().get_val("mgr_tick_period").count()); @@ -2214,6 +2215,182 @@ void DaemonServer::send_report() monc->send_mon_message(m); } +void DaemonServer::adjust_pgs() +{ + dout(20) << dendl; + unsigned max = std::max(1, g_conf()->mon_osd_max_creating_pgs); + + map pg_num_to_set; + map pgp_num_to_set; + cluster_state.with_pgmap([&](const PGMap& pg_map) { + unsigned creating_or_unknown = 0; + for (auto& i : pg_map.num_pg_by_state) { + if ((i.first & (PG_STATE_CREATING)) || + i.first == 0) { + creating_or_unknown += i.second; + } + } + unsigned left = max; + if (creating_or_unknown >= max) { + return; + } + dout(10) << "creating_or_unknown " << creating_or_unknown + << " max_creating " << max << dendl; + cluster_state.with_osdmap([&](const OSDMap& osdmap) { + if (pg_map.last_osdmap_epoch != osdmap.get_epoch()) { + // do nothing if maps aren't in sync + dout(10) << "last_osdmap_epoch " << pg_map.last_osdmap_epoch + << " osdmap " << osdmap.get_epoch() << dendl; + //return; + } + for (auto& i : osdmap.get_pools()) { + const pg_pool_t& p = i.second; + + // adjust pg_num? + if (p.get_pg_num_target() != p.get_pg_num()) { + dout(20) << "pool " << i.first + << " pg_num " << p.get_pg_num() + << " target " << p.get_pg_num_target() + << dendl; + if (p.get_pg_num() != p.get_pg_num_pending()) { + dout(10) << "pool " << i.first + << " target " << p.get_pg_num_target() + << " pg_num " << p.get_pg_num() + << " - pg_num_pending != pg_num, waiting" + << dendl; + // FIXME: we might consider allowing pg_num increases without + // waiting for the previously planned merge to complete. + } else if (p.get_pg_num_target() < p.get_pg_num()) { + // pg_num decrease (merge) + pg_t merge_source(p.get_pg_num() - 1, i.first); + pg_t merge_target = merge_source.get_parent(); + bool ok = true; + auto q = pg_map.pg_stat.find(merge_source); + if (q == pg_map.pg_stat.end()) { + dout(10) << "pool " << i.first + << " target " << p.get_pg_num_target() + << " pg_num " << p.get_pg_num() + << " - no state for " << merge_source + << " (merge source)" + << dendl; + ok = false; + } else if (!(q->second.state & (PG_STATE_ACTIVE | + PG_STATE_CLEAN))) { + dout(10) << "pool " << i.first + << " target " << p.get_pg_num_target() + << " pg_num " << p.get_pg_num() + << " - merge source " << merge_source + << " not clean (" << pg_state_string(q->second.state) + << ")" << dendl; + ok = false; + } + q = pg_map.pg_stat.find(merge_target); + if (q == pg_map.pg_stat.end()) { + dout(10) << "pool " << i.first + << " target " << p.get_pg_num_target() + << " pg_num " << p.get_pg_num() + << " - no state for " << merge_target + << " (merge target)" + << dendl; + ok = false; + } else if (!(q->second.state & (PG_STATE_ACTIVE | + PG_STATE_CLEAN))) { + dout(10) << "pool " << i.first + << " target " << p.get_pg_num_target() + << " pg_num " << p.get_pg_num() + << " - merge target " << merge_target + << " not clean (" << pg_state_string(q->second.state) + << ")" << dendl; + ok = false; + } + if (ok) { + unsigned target = p.get_pg_num() - 1; + dout(10) << "pool " << i.first + << " target " << p.get_pg_num_target() + << " pg_num " << p.get_pg_num() + << " -> " << target + << " (merging " << merge_source + << " and " << merge_target + << ")" << dendl; + pg_num_to_set[osdmap.get_pool_name(i.first)] = target; + } + } else if (p.get_pg_num_target() > p.get_pg_num()) { + // pg_num increase (split) + bool active = true; + auto q = pg_map.num_pg_by_pool_state.find(i.first); + if (q != pg_map.num_pg_by_pool_state.end()) { + for (auto& j : q->second) { + if ((j.first & (PG_STATE_ACTIVE|PG_STATE_PEERED)) == 0) { + dout(20) << "pool " << i.first << " has " << j.second + << " pgs in " << pg_state_string(j.first) + << dendl; + active = false; + } + } + } else { + active = false; + } + if (!active) { + dout(10) << "pool " << i.first + << " target " << p.get_pg_num_target() + << " pg_num " << p.get_pg_num() + << " - not all pgs active" + << dendl; + } else { + unsigned add = std::min( + left, + p.get_pg_num_target() - p.get_pg_num()); + unsigned target = p.get_pg_num() + add; + left -= add; + dout(10) << "pool " << i.first + << " target " << p.get_pg_num_target() + << " pg_num " << p.get_pg_num() + << " -> " << target << dendl; + pg_num_to_set[osdmap.get_pool_name(i.first)] = target; + } + } + } + + // adjust pgp_num? + unsigned target = std::min(p.get_pg_num_pending(), + p.get_pgp_num_target()); + if (target != p.get_pgp_num()) { + // FIXME: we should throttle this to limit mispalced objects, like + // we do in the balancer module. + dout(10) << "pool " << i.first + << " pgp target " << p.get_pgp_num_target() + << " pgp_num " << p.get_pgp_num() + << " -> " << target << dendl; + pgp_num_to_set[osdmap.get_pool_name(i.first)] = target; + } + if (left == 0) { + return; + } + } + }); + }); + for (auto i : pg_num_to_set) { + const string cmd = + "{" + "\"prefix\": \"osd pool set\", " + "\"pool\": \"" + i.first + "\", " + "\"var\": \"pg_num_actual\", " + "\"val\": \"" + stringify(i.second) + "\"" + "}"; + monc->start_mon_command({cmd}, {}, nullptr, nullptr, nullptr); + } + for (auto i : pgp_num_to_set) { + const string cmd = + "{" + "\"prefix\": \"osd pool set\", " + "\"pool\": \"" + i.first + "\", " + "\"var\": \"pgp_num_actual\", " + "\"val\": \"" + stringify(i.second) + "\"" + "}"; + monc->start_mon_command({cmd}, {}, nullptr, nullptr, nullptr); + } +} + void DaemonServer::got_service_map() { Mutex::Locker l(lock); diff --git a/src/mgr/DaemonServer.h b/src/mgr/DaemonServer.h index 419a00ddf47..aa522612b0b 100644 --- a/src/mgr/DaemonServer.h +++ b/src/mgr/DaemonServer.h @@ -150,6 +150,7 @@ public: void send_report(); void got_service_map(); void got_mgr_map(); + void adjust_pgs(); void _send_configure(ConnectionRef c);