OPTION(mds_bal_split_wr, OPT_FLOAT)
OPTION(mds_bal_split_bits, OPT_INT)
OPTION(mds_bal_merge_size, OPT_INT)
-OPTION(mds_bal_fragment_interval, OPT_INT) // seconds
OPTION(mds_bal_fragment_size_max, OPT_INT) // order of magnitude higher than split size
OPTION(mds_bal_fragment_fast_factor, OPT_FLOAT) // multiple of size_max that triggers immediate split
OPTION(mds_bal_idle_threshold, OPT_FLOAT)
#include "MDCache.h"
#include "Locker.h"
#include "MDBalancer.h"
+#include "Migrator.h"
#include "CInode.h"
#include "CDir.h"
#include "CDentry.h"
#include "include/compat.h"
#include "mdstypes.h"
+#include "mon/MonClient.h"
#include "MDBalancer.h"
#include "MDSRank.h"
-#include "mon/MonClient.h"
#include "MDSMap.h"
#include "CInode.h"
#include "CDir.h"
return 0;
}
+MDBalancer::MDBalancer(MDSRank *m, Messenger *msgr, MonClient *monc) :
+ mds(m), messenger(msgr), mon_client(monc)
+{
+ bal_fragment_dirs = g_conf().get_val<bool>("mds_bal_fragment_dirs");
+ bal_fragment_interval = g_conf().get_val<int64_t>("mds_bal_fragment_interval");
+}
+
+void MDBalancer::handle_conf_change(const ConfigProxy& conf,
+ const std::set <std::string> &changed,
+ const MDSMap &mds_map)
+{
+ if (changed.count("mds_bal_fragment_dirs"))
+ bal_fragment_dirs = g_conf().get_val<bool>("mds_bal_fragment_dirs");
+ if (changed.count("mds_bal_fragment_interval"))
+ bal_fragment_interval = g_conf().get_val<int64_t>("mds_bal_fragment_interval");
+}
+
void MDBalancer::handle_export_pins(void)
{
auto &q = mds->mdcache->export_pin_queue;
void MDBalancer::tick()
{
static int num_bal_times = g_conf()->mds_bal_max;
- auto bal_interval = mds->cct->_conf.get_val<int64_t>("mds_bal_interval");
- auto bal_max_until = mds->cct->_conf.get_val<int64_t>("mds_bal_max_until");
+ auto bal_interval = g_conf().get_val<int64_t>("mds_bal_interval");
+ auto bal_max_until = g_conf().get_val<int64_t>("mds_bal_max_until");
time now = clock::now();
if (g_conf()->mds_bal_export_pin) {
<< " oid=" << oid << " oloc=" << oloc << dendl;
/* timeout: if we waste half our time waiting for RADOS, then abort! */
+ auto bal_interval = g_conf().get_val<int64_t>("mds_bal_interval");
lock.Lock();
- int ret_t = cond.WaitInterval(lock, utime_t(mds->cct->_conf.get_val<int64_t>("mds_bal_interval")/2, 0));
+ int ret_t = cond.WaitInterval(lock, utime_t(bal_interval / 2, 0));
lock.Unlock();
/* success: store the balancer in memory and set the version. */
// Set a timer to really do the split: we don't do it immediately
// so that bursts of ops on a directory have a chance to go through
// before we freeze it.
- mds->timer.add_event_after(g_conf()->mds_bal_fragment_interval,
+ mds->timer.add_event_after(bal_fragment_interval,
new FunctionContext(callback));
}
}
if (merge_pending.count(frag) == 0) {
dout(20) << __func__ << " enqueued dir " << *dir << dendl;
merge_pending.insert(frag);
- mds->timer.add_event_after(g_conf()->mds_bal_fragment_interval,
+ mds->timer.add_event_after(bal_fragment_interval,
new FunctionContext(callback));
} else {
dout(20) << __func__ << " dir already in queue " << *dir << dendl;
void MDBalancer::maybe_fragment(CDir *dir, bool hot)
{
// split/merge
- if (mds->cct->_conf.get_val<bool>("mds_bal_fragment_dirs") &&
- g_conf()->mds_bal_fragment_interval > 0 &&
- !dir->inode->is_base() && // not root/base (for now at least)
- dir->is_auth()) {
+ if (bal_fragment_dirs && bal_fragment_interval > 0 &&
+ dir->is_auth() && !dir->inode->is_base()) { // not root/base (for now at least)
// split
if (g_conf()->mds_bal_split_size > 0 && (dir->should_split() || hot)) {
#include "common/Clock.h"
#include "common/Cond.h"
+class MDSMap;
class MDSRank;
class Message;
class MHeartbeat;
using time = ceph::coarse_mono_time;
friend class C_Bal_SendHeartbeat;
- MDBalancer(MDSRank *m, Messenger *msgr, MonClient *monc) :
- mds(m), messenger(msgr), mon_client(monc) { }
+ MDBalancer(MDSRank *m, Messenger *msgr, MonClient *monc);
+
+ void handle_conf_change(const ConfigProxy& conf,
+ const std::set <std::string> &changed,
+ const MDSMap &mds_map);
int proc_message(Message *m);
int dump_loads(Formatter *f);
private:
+ bool bal_fragment_dirs;
+ int64_t bal_fragment_interval;
+
typedef struct {
std::map<mds_rank_t, double> targets;
std::map<mds_rank_t, double> imported;
cap_imports_num_opening = 0;
opening_root = open = false;
- lru.lru_set_midpoint(cache_mid());
+
+ cache_inode_limit = g_conf().get_val<int64_t>("mds_cache_size");
+ cache_memory_limit = g_conf().get_val<uint64_t>("mds_cache_memory_limit");
+ cache_reservation = g_conf().get_val<double>("mds_cache_reservation");
+ cache_health_threshold = g_conf().get_val<double>("mds_health_cache_threshold");
+
+ lru.lru_set_midpoint(g_conf().get_val<double>("mds_cache_mid"));
bottom_lru.lru_set_midpoint(0);
}
}
+void MDCache::handle_conf_change(const ConfigProxy& conf,
+ const std::set <std::string> &changed,
+ const MDSMap &mdsmap)
+{
+ if (changed.count("mds_cache_size"))
+ cache_inode_limit = g_conf().get_val<int64_t>("mds_cache_size");
+ if (changed.count("mds_cache_memory_limit"))
+ cache_memory_limit = g_conf().get_val<uint64_t>("mds_cache_memory_limit");
+ if (changed.count("mds_cache_reservation"))
+ cache_reservation = g_conf().get_val<double>("mds_cache_reservation");
+ if (changed.count("mds_health_cache_threshold"))
+ cache_health_threshold = g_conf().get_val<double>("mds_health_cache_threshold");
+ if (changed.count("mds_cache_mid"))
+ lru.lru_set_midpoint(g_conf().get_val<double>("mds_cache_mid"));
+ migrator->handle_conf_change(conf, changed, mdsmap);
+ mds->balancer->handle_conf_change(conf, changed, mdsmap);
+}
void MDCache::log_stat()
{
- mds->logger->set(l_mds_inode_max, cache_limit_inodes() == 0 ? INT_MAX : cache_limit_inodes());
+ mds->logger->set(l_mds_inode_max, cache_inode_limit ? : INT_MAX);
mds->logger->set(l_mds_inodes, lru.lru_get_size());
mds->logger->set(l_mds_inodes_pinned, lru.lru_get_num_pinned());
mds->logger->set(l_mds_inodes_top, lru.lru_get_top());
bool MDCache::trim(uint64_t count)
{
uint64_t used = cache_size();
- uint64_t limit = cache_limit_memory();
+ uint64_t limit = cache_memory_limit;
map<mds_rank_t, MCacheExpire*> expiremap;
dout(7) << "trim bytes_used=" << bytes2str(used)
<< " limit=" << bytes2str(limit)
- << " reservation=" << cache_reservation()
+ << " reservation=" << cache_reservation
<< "% count=" << count << dendl;
// process delayed eval_stray()
bool exceeded_size_limit;
+private:
+ uint64_t cache_inode_limit;
+ uint64_t cache_memory_limit;
+ double cache_reservation;
+ double cache_health_threshold;
+
public:
- static uint64_t cache_limit_inodes(void) {
- return g_conf().get_val<int64_t>("mds_cache_size");
- }
- static uint64_t cache_limit_memory(void) {
- return g_conf().get_val<uint64_t>("mds_cache_memory_limit");
- }
- static double cache_reservation(void) {
- return g_conf().get_val<double>("mds_cache_reservation");
- }
- static double cache_mid(void) {
- return g_conf().get_val<double>("mds_cache_mid");
+ uint64_t cache_limit_inodes(void) {
+ return cache_inode_limit;
}
- static double cache_health_threshold(void) {
- return g_conf().get_val<double>("mds_health_cache_threshold");
+ uint64_t cache_limit_memory(void) {
+ return cache_memory_limit;
}
double cache_toofull_ratio(void) const {
- uint64_t inode_limit = cache_limit_inodes();
- double inode_reserve = inode_limit*(1.0-cache_reservation());
- double memory_reserve = cache_limit_memory()*(1.0-cache_reservation());
- return fmax(0.0, fmax((cache_size()-memory_reserve)/memory_reserve, inode_limit == 0 ? 0.0 : (CInode::count()-inode_reserve)/inode_reserve));
+ double inode_reserve = cache_inode_limit*(1.0-cache_reservation);
+ double memory_reserve = cache_memory_limit*(1.0-cache_reservation);
+ return fmax(0.0, fmax((cache_size()-memory_reserve)/memory_reserve, cache_inode_limit == 0 ? 0.0 : (CInode::count()-inode_reserve)/inode_reserve));
}
bool cache_toofull(void) const {
return cache_toofull_ratio() > 0.0;
return mempool::get_pool(mempool::mds_co::id).allocated_bytes();
}
bool cache_overfull(void) const {
- uint64_t inode_limit = cache_limit_inodes();
- return (inode_limit > 0 && CInode::count() > inode_limit*cache_health_threshold()) || (cache_size() > cache_limit_memory()*cache_health_threshold());
+ return (cache_inode_limit > 0 && CInode::count() > cache_inode_limit*cache_health_threshold) || (cache_size() > cache_memory_limit*cache_health_threshold);
}
void advance_stray() {
public:
explicit MDCache(MDSRank *m, PurgeQueue &purge_queue_);
~MDCache();
+ void handle_conf_change(const ConfigProxy& conf,
+ const std::set <std::string> &changed,
+ const MDSMap &mds_map);
// debug
void log_stat();
"clog_to_syslog",
"clog_to_syslog_facility",
"clog_to_syslog_level",
+ "clog_to_graylog",
+ "clog_to_graylog_host",
+ "clog_to_graylog_port",
+ // MDCache
+ "mds_cache_size",
+ "mds_cache_memory_limit",
+ "mds_cache_reservation",
+ "mds_health_cache_threshold",
+ "mds_cache_mid",
+ // MDBalancer
+ "mds_bal_fragment_dirs",
+ "mds_bal_fragment_interval",
// PurgeQueue
"mds_max_purge_ops",
"mds_max_purge_ops_per_pg",
"mds_max_purge_files",
+ // Migrator
"mds_inject_migrator_session_race",
- "clog_to_graylog",
- "clog_to_graylog_host",
- "clog_to_graylog_port",
"host",
"fsid",
NULL
#include "SnapClient.h"
#include "SnapServer.h"
#include "MDBalancer.h"
+#include "Migrator.h"
#include "Locker.h"
#include "Server.h"
#include "InoTable.h"
#include "MDSMap.h"
#include "SessionMap.h"
#include "MDCache.h"
-#include "Migrator.h"
#include "MDLog.h"
#include "PurgeQueue.h"
#include "osdc/Journaler.h"
void handle_conf_change(const ConfigProxy& conf,
const std::set <std::string> &changed)
{
- mdcache->migrator->handle_conf_change(conf, changed, *mdsmap);
+ mdcache->handle_conf_change(conf, changed, *mdsmap);
purge_queue.handle_conf_change(conf, changed, *mdsmap);
}