* adjusting
*/
- void hit(double v = 1.0) {
+ void hit(utime_t now, double v = 1.0) {
+ decay(now);
delta += v;
}
void adjust(double a) {
- delta += a;
+ val += a;
+ }
+ void adjust(utime_t now, double a) {
+ decay(now);
+ val += a;
}
/**
}
void reset() {
+ reset(g_clock.now());
+ }
+ void reset(utime_t now) {
last_decay = g_clock.now();
val = delta = 0;
}
debug_mds: 1,
debug_mds_balancer: 1,
debug_mds_log: 1,
+ debug_mds_migrator: 1,
debug_buffer: 0,
debug_filer: 0,
debug_objecter: 0,
mds_cache_size: MDS_CACHE_SIZE,
mds_cache_mid: .7,
- mds_decay_halflife: 30,
+ mds_decay_halflife: 10,
mds_beacon_interval: 5, //30.0,
- mds_beacon_grace: 15, //60*60.0,
+ mds_beacon_grace: 30, //60*60.0,
mds_log: true,
mds_log_max_len: MDS_CACHE_SIZE / 3,
mds_bal_max_until: -1,
mds_bal_mode: 0,
+ mds_bal_min_rebalance: .2, // must be this much above average before we export anything
mds_bal_min_start: .2, // if we need less than this, we don't do anything
mds_bal_need_min: .8, // take within this range of what we need
mds_bal_need_max: 1.2,
g_conf.debug_mds_log = atoi(args[++i]);
else
g_debug_after_conf.debug_mds_log = atoi(args[++i]);
+ else if (strcmp(args[i], "--debug_mds_migrator") == 0)
+ if (!g_conf.debug_after)
+ g_conf.debug_mds_migrator = atoi(args[++i]);
+ else
+ g_debug_after_conf.debug_mds_migrator = atoi(args[++i]);
else if (strcmp(args[i], "--debug_buffer") == 0)
if (!g_conf.debug_after)
g_conf.debug_buffer = atoi(args[++i]);
int debug_mds;
int debug_mds_balancer;
int debug_mds_log;
+ int debug_mds_migrator;
int debug_buffer;
int debug_filer;
int debug_objecter;
int mds_bal_max_until;
int mds_bal_mode;
+ float mds_bal_min_rebalance;
float mds_bal_min_start;
float mds_bal_need_min;
float mds_bal_need_max;
set<int> dir_rep_by; // if dir_rep == REP_LIST
// popularity
- meta_load_t popularity[MDS_NPOP];
+ dirfrag_load_vec_t pop_me;
+ dirfrag_load_vec_t pop_nested;
+ dirfrag_load_vec_t pop_auth_subtree;
+ dirfrag_load_vec_t pop_auth_subtree_nested;
+
utime_t last_popularity_sample;
// friends
// for giving to clients
void get_dist_spec(set<int>& ls, int auth) {
- if (( popularity[MDS_POP_CURDOM].pop[META_POP_IRD].get() >
+ if (( pop_auth_subtree.get(META_POP_IRD).get() >
g_conf.mds_bal_replicate_threshold)) {
//if (!cached_by.empty() && inode.ino > 1) dout(1) << "distributed spec for " << *this << endl;
for (map<int,int>::iterator p = replicas_begin();
version_t committed_version;
version_t committed_version_equivalent;
uint32_t state;
- meta_load_t popularity_justme;
- meta_load_t popularity_curdom;
+ dirfrag_load_vec_t pop_me;
+ dirfrag_load_vec_t pop_auth_subtree;
int32_t dir_rep;
} st;
map<int,int> replicas;
public:
CDirExport() {}
- CDirExport(CDir *dir) {
+ CDirExport(CDir *dir, utime_t now) {
memset(&st, 0, sizeof(st));
assert(dir->get_version() == dir->get_projected_version());
st.committed_version_equivalent = dir->committed_version_equivalent;
st.state = dir->state;
st.dir_rep = dir->dir_rep;
-
- st.popularity_justme.take( dir->popularity[MDS_POP_JUSTME] );
- st.popularity_curdom.take( dir->popularity[MDS_POP_CURDOM] );
- dir->popularity[MDS_POP_ANYDOM] -= st.popularity_curdom;
- dir->popularity[MDS_POP_NESTED] -= st.popularity_curdom;
+
+ st.pop_me = dir->pop_me;
+ st.pop_auth_subtree = dir->pop_auth_subtree;
+ dir->pop_auth_subtree_nested -= dir->pop_auth_subtree;
+ dir->pop_me.zero(now);
+ dir->pop_auth_subtree.zero(now);
rep_by = dir->dir_rep_by;
replicas = dir->replica_map;
(st.state & CDir::MASK_STATE_EXPORTED);
dir->dir_rep = st.dir_rep;
- dir->popularity[MDS_POP_JUSTME] += st.popularity_justme;
- dir->popularity[MDS_POP_CURDOM] += st.popularity_curdom;
- dir->popularity[MDS_POP_ANYDOM] += st.popularity_curdom;
- dir->popularity[MDS_POP_NESTED] += st.popularity_curdom;
+ dir->pop_me = st.pop_me;
+ dir->pop_auth_subtree = st.pop_auth_subtree;
+ dir->pop_auth_subtree_nested += dir->pop_auth_subtree;
dir->replica_nonce = 0; // no longer defined
int nested_auth_pins;
public:
- meta_load_t popularity[MDS_NPOP];
+ inode_load_vec_t pop;
// friends
friend class Server;
struct st_ {
inode_t inode;
- meta_load_t popularity_justme;
- meta_load_t popularity_curdom;
+ inode_load_vec_t pop;
+
bool is_dirty; // dirty inode?
int num_caps;
public:
CInodeExport() {}
- CInodeExport(CInode *in) {
+ CInodeExport(CInode *in, utime_t now) {
st.inode = in->inode;
symlink = in->symlink;
dirfragtree = in->dirfragtree;
in->filelock._encode(locks);
in->dirlock._encode(locks);
- st.popularity_justme.take( in->popularity[MDS_POP_JUSTME] );
- st.popularity_curdom.take( in->popularity[MDS_POP_CURDOM] );
- in->popularity[MDS_POP_ANYDOM] -= st.popularity_curdom;
- in->popularity[MDS_POP_NESTED] -= st.popularity_curdom;
+ st.pop = in->pop;
+ in->pop.zero(now);
// steal WRITER caps from inode
in->take_client_caps(cap_map);
in->symlink = symlink;
in->dirfragtree = dirfragtree;
- in->popularity[MDS_POP_JUSTME] += st.popularity_justme;
- in->popularity[MDS_POP_CURDOM] += st.popularity_curdom;
- in->popularity[MDS_POP_ANYDOM] += st.popularity_curdom;
- in->popularity[MDS_POP_NESTED] += st.popularity_curdom;
+ in->pop = st.pop;
if (st.is_dirty)
in->_mark_dirty();
// sample?
if ((double)now - (double)last_sample > g_conf.mds_bal_sample_interval) {
- dout(10) << "tick last_sample now " << now << endl;
+ dout(15) << "tick last_sample now " << now << endl;
last_sample = now;
}
mds_load_t MDBalancer::get_load()
{
mds_load_t load;
- if (mds->mdcache->get_root())
- load.root =
- mds->mdcache->get_root()->popularity[MDS_POP_ANYDOM];
- // +
- // mds->mdcache->get_root()->popularity[MDS_POP_NESTED];
+
+ if (mds->mdcache->get_root()) {
+ list<CDir*> ls;
+ mds->mdcache->get_root()->get_dirfrags(ls);
+ for (list<CDir*>::iterator p = ls.begin();
+ p != ls.end();
+ p++) {
+ load.auth += (*p)->pop_auth_subtree_nested;
+ load.all += (*p)->pop_nested;
+ }
+ }
load.req_rate = mds->get_req_rate();
load.queue_len = mds->messenger->get_dispatch_queue_len();
void MDBalancer::send_heartbeat()
{
+ utime_t now = g_clock.now();
if (!mds->mdcache->get_root()) {
dout(5) << "no root on send_heartbeat" << endl;
mds->mdcache->open_root(new C_Bal_SendHeartbeat(mds));
int from = im->inode->authority().first;
if (from == mds->get_nodeid()) continue;
if (im->get_inode()->is_stray()) continue;
- import_map[from] += im->popularity[MDS_POP_CURDOM].meta_load();
+ import_map[from] += im->pop_auth_subtree.meta_load(now);
}
mds_import_map[ mds->get_nodeid() ] = import_map;
dout(25) << "=== got heartbeat " << m->get_beat() << " from " << m->get_source().num() << " " << m->get_load() << endl;
if (!mds->mdcache->get_root()) {
- dout(10) << "no root on handle" << endl;
+ dout(10) << "opening root on handle_heartbeat" << endl;
mds->mdcache->open_root(new C_MDS_RetryMessage(mds, m));
return;
}
{
int cluster_size = mds->get_mds_map()->get_num_mds();
int whoami = mds->get_nodeid();
+ utime_t now = g_clock.now();
dump_pop_map();
// rescale! turn my mds_load back into meta_load units
double load_fac = 1.0;
- if (mds_load[whoami].mds_load() > 0) {
- load_fac = mds_load[whoami].root.meta_load() / mds_load[whoami].mds_load();
+ if (mds_load[whoami].mds_load(now) > 0) {
+ load_fac = mds_load[whoami].auth.meta_load(now) / mds_load[whoami].mds_load(now);
dout(7) << " load_fac is " << load_fac
- << " <- " << mds_load[whoami].root.meta_load()
- << " / " << mds_load[whoami].mds_load()
- << endl;
+ << " <- " << mds_load[whoami].auth.meta_load(now)
+ << " / " << mds_load[whoami].mds_load(now)
+ << endl;
}
double total_load = 0;
multimap<double,int> load_map;
for (int i=0; i<cluster_size; i++) {
- double l = mds_load[i].mds_load() * load_fac;
+ double l = mds_load[i].mds_load(now) * load_fac;
mds_meta_load[i] = l;
if (whoami == 0)
dout(-5) << " mds" << i
- << " meta load " << mds_load[i]
- << " = " << mds_load[i].mds_load()
- << " --> " << l << endl;
+ << " " << mds_load[i]
+ << " = " << mds_load[i].mds_load(now)
+ << " ~ " << l << endl;
if (whoami == i) my_load = l;
total_load += l;
<< endl;
// under or over?
- if (my_load < target_load) {
- dout(5) << " i am underloaded, doing nothing." << endl;
+ if (my_load < target_load * (1.0 + g_conf.mds_bal_min_rebalance)) {
+ dout(5) << " i am underloaded or barely overloaded, doing nothing." << endl;
show_imports();
return;
}
- dout(5) << " i am overloaded" << endl;
+ dout(5) << " i am sufficiently overloaded" << endl;
// first separate exporters and importers
if (true) {
// analyze import_map; do any matches i can
- dout(5) << " matching exporters to import sources" << endl;
+ dout(15) << " matching exporters to import sources" << endl;
// big -> small exporters
for (multimap<double,int>::reverse_iterator ex = exporters.rbegin();
if (1) {
if (beat % 2 == 1) {
// old way
- dout(5) << " matching big exporters to big importers" << endl;
+ dout(15) << " matching big exporters to big importers" << endl;
// big exporters to big importers
multimap<double,int>::reverse_iterator ex = exporters.rbegin();
multimap<double,int>::iterator im = importers.begin();
}
} else {
// new way
- dout(5) << " matching small exporters to big importers" << endl;
+ dout(15) << " matching small exporters to big importers" << endl;
// small exporters to big importers
multimap<double,int>::iterator ex = exporters.begin();
multimap<double,int>::iterator im = importers.begin();
CDir *im = *it;
if (im->get_inode()->is_stray()) continue;
- double pop = im->popularity[MDS_POP_CURDOM].meta_load();
+ double pop = im->pop_auth_subtree.meta_load(now);
if (pop < g_conf.mds_bal_idle_threshold &&
im->inode != mds->mdcache->get_root() &&
im->inode->authority().first != mds->get_nodeid()) {
mds->mdcache->migrator->export_dir(im, im->inode->authority().first);
continue;
}
+
import_pop_map[ pop ] = im;
int from = im->inode->authority().first;
dout(15) << " map: i imported " << *im << " from " << from << endl;
if (amount < MIN_OFFLOAD) continue;
- dout(-5) << " sending " << amount << " to mds" << target
+ dout(5) << "want to send " << amount << " to mds" << target
//<< " .. " << (*it).second << " * " << load_fac
<< " -> " << amount
<< endl;//" .. fudge is " << fudge << endl;
if (dir->inode->is_root()) continue;
if (dir->is_freezing() || dir->is_frozen()) continue; // export pbly already in progress
- double pop = dir->popularity[MDS_POP_CURDOM].meta_load();
+ double pop = dir->pop_auth_subtree.meta_load(now);
assert(dir->inode->authority().first == target); // cuz that's how i put it in the map, dummy
if (pop <= amount-have) {
pot != candidates.end();
pot++) {
if ((*pot)->get_inode()->is_stray()) continue;
- find_exports(*pot, amount, exports, have, already_exporting);
- if (have > amount-MIN_OFFLOAD) {
+ find_exports(*pot, amount, exports, have, already_exporting, now);
+ if (have > amount-MIN_OFFLOAD)
break;
- }
}
//fudge = amount - have;
total_sent += have;
for (list<CDir*>::iterator it = exports.begin(); it != exports.end(); it++) {
- dout(-5) << " exporting to mds" << target
- << " fragment " << **it
- << " pop " << (*it)->popularity[MDS_POP_CURDOM].meta_load()
+ dout(-5) << " - exporting "
+ << (*it)->pop_auth_subtree.meta_load(now)
+ << " to mds" << target
+ << " " << **it
<< endl;
mds->mdcache->migrator->export_dir(*it, target);
-
- // hack! only do one dir.
- break;
}
}
double amount,
list<CDir*>& exports,
double& have,
- set<CDir*>& already_exporting)
+ set<CDir*>& already_exporting,
+ utime_t now)
{
double need = amount - have;
if (need < amount * g_conf.mds_bal_min_start)
double midchunk = need * g_conf.mds_bal_midchunk;
double minchunk = need * g_conf.mds_bal_minchunk;
- list<CDir*> bigger;
+ list<CDir*> bigger_rep, bigger_unrep;
multimap<double, CDir*> smaller;
- double dir_pop = dir->popularity[MDS_POP_CURDOM].meta_load();
- double dir_sum = 0;
- dout(-7) << " find_exports in " << dir_pop << " " << *dir << " need " << need << " (" << needmin << " - " << needmax << ")" << endl;
+ double dir_pop = dir->pop_auth_subtree.meta_load(now);
+ dout(7) << " find_exports in " << dir_pop << " " << *dir << " need " << need << " (" << needmin << " - " << needmax << ")" << endl;
+ double subdir_sum = 0;
for (CDir_map_t::iterator it = dir->begin();
it != dir->end();
it++) {
for (list<CDir*>::iterator p = dfls.begin();
p != dfls.end();
++p) {
- CDir *dir = *p;
- if (!dir->is_auth()) continue;
- if (already_exporting.count(dir)) continue;
+ CDir *subdir = *p;
+ if (!subdir->is_auth()) continue;
+ if (already_exporting.count(subdir)) continue;
- if (dir->is_frozen()) continue; // can't export this right now!
- //if (in->dir->get_size() == 0) continue; // don't export empty dirs, even if they're not complete. for now!
+ if (subdir->is_frozen()) continue; // can't export this right now!
// how popular?
- double pop = dir->popularity[MDS_POP_CURDOM].meta_load();
- dir_sum += pop;
- dout(20) << " pop " << pop << " " << *dir << endl;
+ double pop = subdir->pop_auth_subtree.meta_load(now);
+ subdir_sum += pop;
+ dout(15) << " subdir pop " << pop << " " << *subdir << endl;
if (pop < minchunk) continue;
// lucky find?
if (pop > needmin && pop < needmax) {
- exports.push_back(dir);
+ exports.push_back(subdir);
+ already_exporting.insert(subdir);
have += pop;
return;
}
- if (pop > need)
- bigger.push_back(dir);
- else
- smaller.insert(pair<double,CDir*>(pop, dir));
+ if (pop > need) {
+ if (subdir->is_rep())
+ bigger_rep.push_back(subdir);
+ else
+ bigger_unrep.push_back(subdir);
+ } else
+ smaller.insert(pair<double,CDir*>(pop, subdir));
}
}
- dout(7) << " .. sum " << dir_sum << " / " << dir_pop << endl;
+ dout(15) << " sum " << subdir_sum << " / " << dir_pop << endl;
// grab some sufficiently big small items
multimap<double,CDir*>::reverse_iterator it;
if ((*it).first < midchunk)
break; // try later
- dout(7) << " taking smaller " << *(*it).second << endl;
+ dout(7) << " taking smaller " << *(*it).second << endl;
exports.push_back((*it).second);
already_exporting.insert((*it).second);
}
// apprently not enough; drill deeper into the hierarchy (if non-replicated)
- for (list<CDir*>::iterator it = bigger.begin();
- it != bigger.end();
+ for (list<CDir*>::iterator it = bigger_unrep.begin();
+ it != bigger_unrep.end();
it++) {
- if ((*it)->is_rep()) continue;
- dout(7) << " descending into " << **it << endl;
- find_exports(*it, amount, exports, have, already_exporting);
+ dout(15) << " descending into " << **it << endl;
+ find_exports(*it, amount, exports, have, already_exporting, now);
if (have > needmin)
return;
}
for (;
it != smaller.rend();
it++) {
-
- dout(7) << " taking (much) smaller " << it->first << " " << *(*it).second << endl;
+ dout(7) << " taking (much) smaller " << it->first << " " << *(*it).second << endl;
exports.push_back((*it).second);
already_exporting.insert((*it).second);
return;
}
- // ok fine, drill inot replicated dirs
- for (list<CDir*>::iterator it = bigger.begin();
- it != bigger.end();
+ // ok fine, drill into replicated dirs
+ for (list<CDir*>::iterator it = bigger_rep.begin();
+ it != bigger_rep.end();
it++) {
- if (!(*it)->is_rep()) continue;
- dout(7) << " descending into replicated " << **it << endl;
- find_exports(*it, amount, exports, have, already_exporting);
+ dout(7) << " descending into replicated " << **it << endl;
+ find_exports(*it, amount, exports, have, already_exporting, now);
if (have > needmin)
return;
}
-void MDBalancer::hit_inode(CInode *in, int type)
+void MDBalancer::hit_inode(utime_t now, CInode *in, int type)
{
+ // hit inode
+ in->pop.get(type).hit(now);
+
+ if (in->get_parent_dir())
+ hit_dir(now, in->get_parent_dir(), type);
+}
+/*
// hit me
- in->popularity[MDS_POP_JUSTME].pop[type].hit();
- in->popularity[MDS_POP_NESTED].pop[type].hit();
+ in->popularity[MDS_POP_JUSTME].pop[type].hit(now);
+ in->popularity[MDS_POP_NESTED].pop[type].hit(now);
if (in->is_auth()) {
- in->popularity[MDS_POP_CURDOM].pop[type].hit();
- in->popularity[MDS_POP_ANYDOM].pop[type].hit();
+ in->popularity[MDS_POP_CURDOM].pop[type].hit(now);
+ in->popularity[MDS_POP_ANYDOM].pop[type].hit(now);
dout(20) << "hit_inode " << type << " pop "
- << in->popularity[MDS_POP_JUSTME].pop[type].get() << " me, "
- << in->popularity[MDS_POP_NESTED].pop[type].get() << " nested, "
- << in->popularity[MDS_POP_CURDOM].pop[type].get() << " curdom, "
- << in->popularity[MDS_POP_CURDOM].pop[type].get() << " anydom"
+ << in->popularity[MDS_POP_JUSTME].pop[type].get(now) << " me, "
+ << in->popularity[MDS_POP_NESTED].pop[type].get(now) << " nested, "
+ << in->popularity[MDS_POP_CURDOM].pop[type].get(now) << " curdom, "
+ << in->popularity[MDS_POP_CURDOM].pop[type].get(now) << " anydom"
<< " on " << *in
<< endl;
} else {
dout(20) << "hit_inode " << type << " pop "
- << in->popularity[MDS_POP_JUSTME].pop[type].get() << " me, "
- << in->popularity[MDS_POP_NESTED].pop[type].get() << " nested, "
+ << in->popularity[MDS_POP_JUSTME].pop[type].get(now) << " me, "
+ << in->popularity[MDS_POP_NESTED].pop[type].get(now) << " nested, "
<< " on " << *in
<< endl;
}
// hit auth up to import
CDir *dir = in->get_parent_dir();
- if (dir) hit_dir(dir, type);
-}
+ if (dir) hit_dir(now, dir, type);
+*/
-void MDBalancer::hit_dir(CDir *dir, int type)
+void MDBalancer::hit_dir(utime_t now, CDir *dir, int type, double amount)
{
// hit me
- dir->popularity[MDS_POP_JUSTME].pop[type].hit();
+ dir->pop_me.get(type).hit(now, amount);
+
+ /*
+ dir->popularity[MDS_POP_JUSTME].pop[type].hit(now, amount);
// hit modify counter, if this was a modify
if (g_conf.num_mds > 2 && // FIXME >2 thing
}
}
+ */
- hit_recursive(dir, type);
-}
-
-
-
-void MDBalancer::hit_recursive(CDir *dir, int type)
-{
- bool anydom = dir->is_auth();
- bool curdom = dir->is_auth();
-
- float rd_adj = 0.0;
-
// replicate?
- if (dir->last_popularity_sample < last_sample) {
- float dir_pop = dir->popularity[MDS_POP_CURDOM].pop[type].get(); // hmm??
+ double rd_adj = 0;
+ if (type == META_POP_IRD &&
+ dir->last_popularity_sample < last_sample) {
+ float dir_pop = dir->pop_auth_subtree.get(type).get(now); // hmm??
dir->last_popularity_sample = last_sample;
- dout(20) << "hit_recursive " << type << " pop " << dir_pop << " curdom " << *dir << endl;
+ dout(20) << "hit_dir " << type << " pop " << dir_pop << " in " << *dir << endl;
if (dir->is_auth()) {
if (!dir->is_rep() &&
dir_pop >= g_conf.mds_bal_replicate_threshold) {
// replicate
- float rdp = dir->popularity[MDS_POP_JUSTME].pop[META_POP_IRD].get();
+ float rdp = dir->pop_me.get(META_POP_IRD).get(now);
rd_adj = rdp / mds->get_mds_map()->get_num_mds() - rdp;
rd_adj /= 2.0; // temper somewhat
dir->dir_rep = CDir::REP_ALL;
mds->mdcache->send_dir_updates(dir, true);
- dir->popularity[MDS_POP_JUSTME].pop[META_POP_IRD].adjust(rd_adj);
- dir->popularity[MDS_POP_CURDOM].pop[META_POP_IRD].adjust(rd_adj);
+ dir->pop_me.get(META_POP_IRD).adjust(rd_adj);
+ dir->pop_auth_subtree.get(META_POP_IRD).adjust(rd_adj);
}
if (!dir->ino() != 1 &&
}
}
+ // adjust ancestors
+ bool hit_subtree = dir->is_auth(); // current auth subtree (if any)
+ bool hit_subtree_nested = dir->is_auth(); // all nested auth subtrees
while (dir) {
- CInode *in = dir->inode;
-
- dir->popularity[MDS_POP_NESTED].pop[type].hit();
- in->popularity[MDS_POP_NESTED].pop[type].hit();
-
- if (rd_adj != 0.0) dir->popularity[MDS_POP_NESTED].pop[META_POP_IRD].adjust(rd_adj);
-
- if (anydom) {
- dir->popularity[MDS_POP_ANYDOM].pop[type].hit();
- in->popularity[MDS_POP_ANYDOM].pop[type].hit();
- }
+ dir->pop_nested.get(type).hit(now, amount);
+ if (rd_adj != 0.0)
+ dir->pop_nested.get(META_POP_IRD).adjust(now, rd_adj);
- if (curdom) {
- dir->popularity[MDS_POP_CURDOM].pop[type].hit();
- in->popularity[MDS_POP_CURDOM].pop[type].hit();
+ if (hit_subtree) {
+ dir->pop_auth_subtree.get(type).hit(now, amount);
+ if (rd_adj != 0.0)
+ dir->pop_auth_subtree.get(META_POP_IRD).adjust(now, rd_adj);
}
+
+ if (hit_subtree_nested) {
+ dir->pop_auth_subtree_nested.get(type).hit(now, amount);
+ if (rd_adj != 0.0)
+ dir->pop_auth_subtree_nested.get(META_POP_IRD).adjust(now, rd_adj);
+ }
if (dir->is_subtree_root())
- curdom = false; // end of auth domain, stop hitting auth counters.
+ hit_subtree = false; // end of auth domain, stop hitting auth counters.
dir = dir->inode->get_parent_dir();
}
}
/*
- * subtract off an exported chunk
+ * subtract off an exported chunk.
+ * this excludes *dir itself (encode_export_dir should have take care of that)
+ * we _just_ do the parents' nested counters.
+ *
+ * NOTE: call me _after_ forcing *dir into a subtree root,
+ * but _before_ doing the encode_export_dirs.
*/
void MDBalancer::subtract_export(CDir *dir)
{
- meta_load_t curdom = dir->popularity[MDS_POP_CURDOM];
-
- bool in_domain = !dir->is_subtree_root();
-
+ dirfrag_load_vec_t subload = dir->pop_auth_subtree;
+
while (true) {
- CInode *in = dir->inode;
-
- in->popularity[MDS_POP_ANYDOM] -= curdom;
- if (in_domain) in->popularity[MDS_POP_CURDOM] -= curdom;
-
- dir = in->get_parent_dir();
+ dir = dir->inode->get_parent_dir();
if (!dir) break;
- if (dir->is_subtree_root()) in_domain = false;
-
- dir->popularity[MDS_POP_ANYDOM] -= curdom;
- if (in_domain) dir->popularity[MDS_POP_CURDOM] -= curdom;
+ dir->pop_nested -= subload;
+ dir->pop_auth_subtree_nested -= subload;
}
}
void MDBalancer::add_import(CDir *dir)
{
- meta_load_t curdom = dir->popularity[MDS_POP_CURDOM];
-
- bool in_domain = !dir->is_subtree_root();
+ dirfrag_load_vec_t subload = dir->pop_auth_subtree;
while (true) {
- CInode *in = dir->inode;
-
- in->popularity[MDS_POP_ANYDOM] += curdom;
- if (in_domain) in->popularity[MDS_POP_CURDOM] += curdom;
-
- dir = in->get_parent_dir();
+ dir = dir->inode->get_parent_dir();
if (!dir) break;
- if (dir->is_subtree_root()) in_domain = false;
-
- dir->popularity[MDS_POP_ANYDOM] += curdom;
- if (in_domain) dir->popularity[MDS_POP_CURDOM] += curdom;
- }
-
+ dir->pop_nested += subload;
+ dir->pop_auth_subtree_nested += subload;
+ }
}
void MDBalancer::dump_pop_map()
{
char fn[20];
- sprintf(fn, "popdump.%d.mds%d", beat_epoch, mds->get_nodeid());
+ static int x = 0;
+ sprintf(fn, "popdump.%d.mds%d.%d", beat_epoch, mds->get_nodeid(), x);
+ x++;
dout(1) << "dump_pop_map to " << fn << endl;
iq.pop_front();
// pop stats
- for (int a=0; a<MDS_NPOP; a++)
+ /*for (int a=0; a<MDS_NPOP; a++)
for (int b=0; b<META_NPOP; b++)
myfile << in->popularity[a].pop[b].get(now) << "\t";
+ */
// filename last
string p;
double amount,
list<CDir*>& exports,
double& have,
- set<CDir*>& already_exporting);
+ set<CDir*>& already_exporting,
+ utime_t now);
void subtract_export(class CDir *ex);
void add_import(class CDir *im);
- void hit_inode(class CInode *in, int type=0);
- void hit_dir(class CDir *dir, int type=0);
- void hit_recursive(class CDir *dir, int type=0);
+ void hit_inode(utime_t now, class CInode *in, int type);
+ void hit_dir(utime_t now, class CDir *dir, int type, double amount=1.0);
+ void hit_recursive(utime_t now, class CDir *dir, int type, double amount, double rd_adj);
void show_imports(bool external=false);
void MDCache::log_stat(Logger *logger)
{
if (get_root()) {
- logger->set("popanyd", (int)get_root()->popularity[MDS_POP_ANYDOM].meta_load());
- logger->set("popnest", (int)get_root()->popularity[MDS_POP_NESTED].meta_load());
+ utime_t now = g_clock.now();
+ //logger->set("pop", (int)get_root()->pop_nested.meta_load(now));
+ //logger->set("popauth", (int)get_root()->pop_auth_subtree_nested.meta_load(now));
}
logger->set("c", lru.lru_get_size());
logger->set("cpin", lru.lru_get_num_pinned());
// i am now the subtree root.
root = dir;
+ // adjust recursive pop counters
+ if (dir->is_auth()) {
+ CDir *p = dir->get_parent_dir();
+ while (p) {
+ p->pop_auth_subtree -= dir->pop_auth_subtree;
+ if (p->is_subtree_root()) break;
+ p = p->inode->get_parent_dir();
+ }
+ }
+
eval_subtree_root(dir);
}
subtrees.erase(dir);
subtrees[parent].erase(dir);
+ // adjust popularity?
+ if (dir->is_auth()) {
+ CDir *p = dir->get_parent_dir();
+ while (p) {
+ p->pop_auth_subtree += dir->pop_auth_subtree;
+ if (p->is_subtree_root()) break;
+ p = p->inode->get_parent_dir();
+ }
+ }
+
eval_subtree_root(dir);
// journal inode?
}
// done!
- dout(1) << "shutdown done." << endl;
+ dout(2) << "shutdown done." << endl;
return true;
}
if (logger) {
req_rate = logger->get("req");
- logger->set("l", (int)load.mds_load());
+ logger->set("l", (int)load.mds_load(g_clock.now()));
logger->set("q", messenger->get_dispatch_queue_len());
logger->set("buf", buffer_total_alloc);
if (oldstate == MDSMap::STATE_REJOIN ||
oldstate == MDSMap::STATE_RECONNECT)
recovery_done();
-
- dout(1) << "now active" << endl;
finish_contexts(waiting_for_active); // kick waiters
} else if (is_replay()) {
replay_start();
#include "config.h"
#undef dout
-#define dout(l) if (l<=g_conf.debug || l <= g_conf.debug_mds) cout << g_clock.now() << " mds" << mds->get_nodeid() << ".migrator "
+#define dout(l) if (l<=g_conf.debug || l <= g_conf.debug_mds || l <= g_conf.debug_mds_migrator) cout << g_clock.now() << " mds" << mds->get_nodeid() << ".migrator "
// set ambiguous auth
cache->adjust_subtree_auth(dir, dest, mds->get_nodeid());
+
+ // take away the popularity we're sending.
+ mds->balancer->subtract_export(dir);
// fill export message with cache data
+ utime_t now = g_clock.now();
C_Contexts *fin = new C_Contexts; // collect all the waiters
map<int,entity_inst_t> exported_client_map;
int num_exported_inodes = encode_export_dir( export_data[dir],
dir, // base
dir, // recur start point
dest,
- exported_client_map );
+ exported_client_map,
+ now );
bufferlist bl;
::_encode(exported_client_map, bl);
export_data[dir].push_front(bl);
// queue up the finisher
dir->add_waiter( CDir::WAIT_UNFREEZE, fin );
- // take away the popularity we're sending. FIXME: do this later?
- mds->balancer->subtract_export(dir);
-
// stats
if (mds->logger) mds->logger->inc("ex");
if (mds->logger) mds->logger->inc("iex", num_exported_inodes);
* used by: encode_export_dir, file_rename (if foreign)
*/
void Migrator::encode_export_inode(CInode *in, bufferlist& enc_state, int new_auth,
- map<int,entity_inst_t>& exported_client_map)
+ map<int,entity_inst_t>& exported_client_map,
+ utime_t now)
{
// tell (all) clients about migrating caps.. mark STALE
for (map<int, Capability>::iterator it = in->client_caps.begin();
// add inode
assert(!in->is_replica(mds->get_nodeid()));
- CInodeExport istate( in );
+ CInodeExport istate(in, now);
istate._encode( enc_state );
// we're export this inode; fix inode state
CDir *basedir,
CDir *dir,
int newauth,
- map<int,entity_inst_t>& exported_client_map)
+ map<int,entity_inst_t>& exported_client_map,
+ utime_t now)
{
int num_exported = 0;
// dir
bufferlist enc_dir;
- CDirExport dstate(dir);
+ CDirExport dstate(dir, now);
dstate._encode( enc_dir );
// release open_by
// -- inode
enc_dir.append("I", 1); // inode dentry
- encode_export_inode(in, enc_dir, newauth, exported_client_map); // encode, and (update state for) export
+ encode_export_inode(in, enc_dir, newauth, exported_client_map, now); // encode, and (update state for) export
// directory?
list<CDir*> dfs;
// subdirs
for (list<CDir*>::iterator it = subdirs.begin(); it != subdirs.end(); it++)
num_exported += encode_export_dir(dirstatelist, fin, basedir, *it, newauth,
- exported_client_map);
+ exported_client_map, now);
return num_exported;
}
void Migrator::export_finish(CDir *dir)
{
- dout(7) << "export_finish " << *dir << endl;
+ dout(5) << "export_finish " << *dir << endl;
if (export_state.count(dir) == 0) {
dout(7) << "target must have failed, not sending final commit message. export succeeded anyway." << endl;
cache->process_delayed_expire(dir);
// ok now finish contexts
- dout(5) << "finishing any waiters on imported data" << endl;
+ dout(10) << "finishing any waiters on imported data" << endl;
dir->finish_waiting(CDir::WAIT_IMPORTED);
cache->show_subtrees();
void export_empty_import(CDir *dir);
void encode_export_inode(CInode *in, bufferlist& enc_state, int newauth,
- map<int,entity_inst_t>& exported_client_map);
+ map<int,entity_inst_t>& exported_client_map,
+ utime_t now);
int encode_export_dir(list<bufferlist>& dirstatelist,
class C_Contexts *fin,
CDir *basedir,
CDir *dir,
int newauth,
- map<int,entity_inst_t>& exported_client_map);
+ map<int,entity_inst_t>& exported_client_map,
+ utime_t now);
void add_export_finish_waiter(CDir *dir, Context *c) {
export_finish_waiters[dir].push_back(c);
// apply
in->pop_and_dirty_projected_inode();
+ mds->balancer->hit_inode(mdr->now, in, META_POP_IWR);
+
// reply
MClientReply *reply = new MClientReply(mdr->client_request, 0);
reply->set_result(0);
mdlog->submit_entry(le);
mdlog->wait_for_sync(new C_MDS_inode_update_finish(mds, mdr, cur));
-
- mds->balancer->hit_inode(cur, META_POP_IWR);
}
mdlog->submit_entry(le);
mdlog->wait_for_sync(new C_MDS_inode_update_finish(mds, mdr, cur));
-
- mds->balancer->hit_inode(cur, META_POP_IWR);
}
mdlog->submit_entry(le);
mdlog->wait_for_sync(new C_MDS_inode_update_finish(mds, mdr, cur));
-
- mds->balancer->hit_inode(cur, META_POP_IWR);
}
reply->set_result(0);
// bump popularity. NOTE: this doesn't quite capture it.
- mds->balancer->hit_dir(dir, META_POP_IRD);
+ mds->balancer->hit_dir(g_clock.now(), dir, META_POP_IRD, numfiles);
// reply
reply_request(mdr, reply, diri);
// dir inode's mtime
mds->server->dirty_dn_diri(dn, dirpv, newi->inode.ctime);
+ // hit pop
+ mds->balancer->hit_inode(mdr->now, newi, META_POP_IWR);
+ //mds->balancer->hit_dir(mdr->now, dn->get_dir(), META_POP_DWR);
+
// reply
MClientReply *reply = new MClientReply(mdr->client_request, 0);
reply->set_result(0);
// log + wait
mdlog->submit_entry(le);
mdlog->wait_for_sync(new C_MDS_mknod_finish(mds, mdr, dn, newi, dirpv));
-
- // hit pop
- mds->balancer->hit_inode(newi, META_POP_IWR);
- mds->balancer->hit_dir(dn->get_dir(), META_POP_DWR);
}
mdlog->submit_entry(le);
mdlog->wait_for_sync(new C_MDS_mknod_finish(mds, mdr, dn, newi, dirpv));
- // hit pop
- mds->balancer->hit_inode(newi, META_POP_IWR);
- mds->balancer->hit_dir(dn->get_dir(), META_POP_DWR);
-
-
/* old export heuristic. pbly need to reimplement this at some point.
if (
diri->dir->is_auth() &&
// log + wait
mdlog->submit_entry(le);
mdlog->wait_for_sync(new C_MDS_mknod_finish(mds, mdr, dn, newi, dirpv));
-
- // hit pop
- mds->balancer->hit_inode(newi, META_POP_IWR);
- mds->balancer->hit_dir(dn->get_dir(), META_POP_DWR);
}
mdlog->submit_entry(le);
mdlog->wait_for_sync(new C_MDS_link_local_finish(mds, mdr, dn, targeti, dnpv, tipv, dirpv));
-
- // bump target popularity
- mds->balancer->hit_inode(targeti, META_POP_IWR);
- mds->balancer->hit_dir(dn->get_dir(), META_POP_DWR);
}
void Server::_link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti,
// new dentry dir mtime
dirty_dn_diri(dn, dirpv, mdr->now);
+ // bump target popularity
+ mds->balancer->hit_inode(mdr->now, targeti, META_POP_IWR);
+ //mds->balancer->hit_dir(mdr->now, dn->get_dir(), META_POP_DWR);
+
// reply
MClientReply *reply = new MClientReply(mdr->client_request, 0);
reply_request(mdr, reply, dn->get_dir()->get_inode()); // FIXME: imprecise ref
// log + wait
mdlog->submit_entry(le);
mdlog->wait_for_sync(new C_MDS_link_remote_finish(mds, mdr, dn, targeti, dirpv));
-
- // bump target popularity
- mds->balancer->hit_inode(targeti, META_POP_IWR);
- mds->balancer->hit_dir(dn->get_dir(), META_POP_DWR);
}
void Server::_link_remote_finish(MDRequest *mdr, CDentry *dn, CInode *targeti,
// dir inode's mtime
dirty_dn_diri(dn, dirpv, mdr->now);
+ // bump target popularity
+ mds->balancer->hit_inode(mdr->now, targeti, META_POP_IWR);
+ //mds->balancer->hit_dir(mdr->now, dn->get_dir(), META_POP_DWR);
+
// reply
MClientReply *reply = new MClientReply(mdr->client_request, 0);
reply_request(mdr, reply, dn->get_dir()->get_inode()); // FIXME: imprecise ref
le->metablob.add_dir_context(targeti->get_parent_dir());
le->metablob.add_primary_dentry(dn, true, targeti, pi); // update old primary
mds->mdlog->submit_entry(le, new C_MDS_SlaveLinkPrep(this, mdr, targeti, old_ctime, inc));
-
- mds->balancer->hit_inode(targeti, META_POP_IWR);
}
class C_MDS_SlaveLinkCommit : public Context {
// update the target
targeti->pop_and_dirty_projected_inode();
+ // hit pop
+ mds->balancer->hit_inode(mdr->now, targeti, META_POP_IWR);
+
// ack
MMDSSlaveRequest *reply = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_LINKPREPACK);
mds->send_message_mds(reply, mdr->slave_to_mds, MDS_PORT_SERVER);
mdlog->submit_entry(le);
mdlog->wait_for_sync(new C_MDS_unlink_local_finish(mds, mdr, dn, straydn,
dirpv));
-
- // bump pop
- mds->balancer->hit_dir(dn->dir, META_POP_DWR);
}
void Server::_unlink_local_finish(MDRequest *mdr,
if (mdr->dst_reanchor_atid)
mds->anchorclient->commit(mdr->dst_reanchor_atid);
+ // bump pop
+ //mds->balancer->hit_dir(mdr->now, dn->dir, META_POP_DWR);
+
// reply
MClientReply *reply = new MClientReply(mdr->client_request, 0);
reply_request(mdr, reply, dn->dir->get_inode()); // FIXME: imprecise ref
// log + wait
mdlog->submit_entry(le);
mdlog->wait_for_sync(fin);
-
- mds->balancer->hit_dir(dn->dir, META_POP_DWR);
}
void Server::_unlink_remote_finish(MDRequest *mdr,
if (mdr->dst_reanchor_atid)
mds->anchorclient->commit(mdr->dst_reanchor_atid);
+ //mds->balancer->hit_dir(mdr->now, dn->dir, META_POP_DWR);
+
// reply
MClientReply *reply = new MClientReply(mdr->client_request, 0);
reply_request(mdr, reply, dn->dir->get_inode()); // FIXME: imprecise ref
// log + wait
mdlog->submit_entry(le);
mdlog->wait_for_sync(fin);
-
- // bump popularity
- if (srcdn->is_auth())
- mds->balancer->hit_dir(srcdn->get_dir(), META_POP_DWR);
- mds->balancer->hit_dir(destdn->get_dir(), META_POP_DWR);
- if (destdn->is_remote() &&
- destdn->inode->is_auth())
- mds->balancer->hit_inode(destdn->get_inode(), META_POP_IWR);
}
if (mdr->src_reanchor_atid) mds->anchorclient->commit(mdr->src_reanchor_atid);
if (mdr->dst_reanchor_atid) mds->anchorclient->commit(mdr->dst_reanchor_atid);
+ // bump popularity
+ //if (srcdn->is_auth())
+ //mds->balancer->hit_dir(mdr->now, srcdn->get_dir(), META_POP_DWR);
+ // mds->balancer->hit_dir(mdr->now, destdn->get_dir(), META_POP_DWR);
+ if (destdn->is_remote() &&
+ destdn->inode->is_auth())
+ mds->balancer->hit_inode(mdr->now, destdn->get_inode(), META_POP_IWR);
+
// reply
MClientReply *reply = new MClientReply(mdr->client_request, 0);
reply_request(mdr, reply, destdn->dir->get_inode()); // FIXME: imprecise ref
ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_rename_prep", mdr->reqid, mdr->slave_to_mds, ESlaveUpdate::OP_PREPARE);
_rename_prepare(mdr, &le->metablob, srcdn, destdn, straydn);
mds->mdlog->submit_entry(le, new C_MDS_SlaveRenamePrep(this, mdr, srcdn, destdn, straydn));
-
- // bump popularity
- if (srcdn->is_auth())
- mds->balancer->hit_dir(srcdn->get_dir(), META_POP_DWR);
- if (destdn->inode->is_auth())
- mds->balancer->hit_inode(destdn->inode, META_POP_IWR);
-
} else {
// don't journal.
dout(10) << "not journaling, i'm not auth for anything, and srci isn't open" << endl;
// set up commit waiter
mdr->slave_commit = new C_MDS_SlaveRenameCommit(this, mdr, srcdn, destdn, straydn);
+ // bump popularity
+ //if (srcdn->is_auth())
+ //mds->balancer->hit_dir(mdr->now, srcdn->get_dir(), META_POP_DWR);
+ if (destdn->inode->is_auth())
+ mds->balancer->hit_inode(mdr->now, destdn->inode, META_POP_IWR);
+
// done.
delete mdr->slave_request;
mdr->slave_request = 0;
map<int,entity_inst_t> exported_client_map;
bufferlist inodebl;
mdcache->migrator->encode_export_inode(mdr->srcdn->inode, inodebl, mdr->slave_to_mds,
- exported_client_map);
+ exported_client_map,
+ mdr->now);
::_encode(exported_client_map, reply->inode_export);
reply->inode_export.claim_append(inodebl);
mdlog->submit_entry(le);
mdlog->wait_for_sync(fin);
-
- // hit pop
- mds->balancer->hit_inode(cur, META_POP_IWR);
}
<< " on " << *cur << endl;
// hit pop
+ mdr->now = g_clock.now();
if (cmode == FILE_MODE_RW ||
cmode == FILE_MODE_W)
- mds->balancer->hit_inode(cur, META_POP_IWR);
+ mds->balancer->hit_inode(mdr->now, cur, META_POP_IWR);
else
- mds->balancer->hit_inode(cur, META_POP_IRD);
+ mds->balancer->hit_inode(mdr->now, cur, META_POP_IRD);
// reply
MClientReply *reply = new MClientReply(req, 0);
void finish(int r) {
assert(r == 0);
+ // hit pop
+ mds->balancer->hit_inode(mdr->now, in, META_POP_IWR);
+
// purge also...
mds->mdcache->purge_inode(&in->inode, 0);
mds->mdcache->wait_for_purge(in->inode.ino, 0,
// ================================================================
+#define META_POP_IRD 0
+#define META_POP_IWR 1
+#define META_POP_READDIR 2
+#define META_POP_FETCH 3
+#define META_POP_STORE 4
+#define META_NPOP 5
+
+class inode_load_vec_t {
+ static const int NUM = 2;
+ DecayCounter vec[NUM];
+public:
+ DecayCounter &get(int t) {
+ assert(t < NUM);
+ return vec[t];
+ }
+ void zero(utime_t now) {
+ for (int i=0; i<NUM; i++)
+ vec[i].reset(now);
+ }
+};
+
+class dirfrag_load_vec_t {
+public:
+ static const int NUM = 5;
+ DecayCounter vec[NUM];
+ DecayCounter &get(int t) {
+ assert(t < NUM);
+ return vec[t];
+ }
+ void adjust(utime_t now, double d) {
+ for (int i=0; i<NUM; i++)
+ vec[i].adjust(now, d);
+ }
+ void zero(utime_t now) {
+ for (int i=0; i<NUM; i++)
+ vec[i].reset(now);
+ }
+ double meta_load(utime_t now) {
+ return
+ 1*vec[META_POP_IRD].get(now) +
+ 2*vec[META_POP_IWR].get(now) +
+ 1*vec[META_POP_READDIR].get(now) +
+ 2*vec[META_POP_FETCH].get(now) +
+ 4*vec[META_POP_STORE].get(now);
+ }
+};
+
+inline dirfrag_load_vec_t& operator+=(dirfrag_load_vec_t& l, dirfrag_load_vec_t& r)
+{
+ for (int i=0; i<dirfrag_load_vec_t::NUM; i++)
+ l.vec[i].adjust(r.vec[i].get_last());
+ return l;
+}
+
+inline dirfrag_load_vec_t& operator-=(dirfrag_load_vec_t& l, dirfrag_load_vec_t& r)
+{
+ for (int i=0; i<dirfrag_load_vec_t::NUM; i++)
+ l.vec[i].adjust(-r.vec[i].get_last());
+ return l;
+}
+
+inline ostream& operator<<(ostream& out, dirfrag_load_vec_t& dl)
+{
+ return out << "[" << dl.vec[0].get() << "," << dl.vec[1].get() << "]";
+}
+
+
/* meta_load_t
* hierarchical load for an inode/dir and it's children
*/
-#define META_POP_IRD 0
-#define META_POP_IWR 1
-#define META_POP_DWR 2
-//#define META_POP_LOG 3
-//#define META_POP_FDIR 4
-//#define META_POP_CDIR 4
-#define META_NPOP 3
-
+/*
class meta_load_t {
public:
DecayCounter pop[META_NPOP];
- double meta_load() {
- return pop[META_POP_IRD].get() + 2*pop[META_POP_IWR].get();
+ double meta_load(utime_t now) {
+ return
+ pop[META_POP_IRD].get(now) +
+ 2*(pop[META_POP_IWR].get(now));
}
void take(meta_load_t& other) {
inline ostream& operator<<( ostream& out, meta_load_t& load )
{
- return out << "<rd " << load.pop[META_POP_IRD].get()
- << ", wr " << load.pop[META_POP_IWR].get()
- << ">";
+ return out << "<rwd "
+ << load.pop[META_POP_IRD].get() << "/"
+ << load.pop[META_POP_IWR].get()
+ << " "
+ << load.pop[META_POP_IRD].get_last_vel() << "/"
+ << load.pop[META_POP_IWR].get_last_vel()
+ << ">";
}
l.pop[i].adjust(r.pop[i].get());
return l;
}
-
+*/
/* mds_load_t
// popularity classes
#define MDS_POP_JUSTME 0 // just me (this dir or inode)
#define MDS_POP_NESTED 1 // me + children, auth or not
-#define MDS_POP_CURDOM 2 // me + children in current auth domain
-#define MDS_POP_ANYDOM 3 // me + children in any (nested) auth domain
+#define MDS_POP_CURDOM 2 // (if auth) me + children in current auth domain
+#define MDS_POP_ANYDOM 3 // (if auth) me + children in any (nested) auth domain
#define MDS_NPOP 4
class mds_load_t {
public:
- meta_load_t root;
+ dirfrag_load_vec_t auth;
+ dirfrag_load_vec_t all;
double req_rate;
double cache_hit_rate;
mds_load_t() :
req_rate(0), cache_hit_rate(0), queue_len(0) { }
- double mds_load() {
+ double mds_load(utime_t now) {
switch(g_conf.mds_bal_mode) {
case 0:
- return root.meta_load()
- + req_rate
- + 10.0*queue_len;
+ return
+ .8 * auth.meta_load(now) +
+ .2 * all.meta_load(now) +
+ req_rate +
+ 10.0 * queue_len;
case 1:
return req_rate + 10.0*queue_len;
inline ostream& operator<<( ostream& out, mds_load_t& load )
{
- return out << "mdsload<" << load.root
+ return out << "mdsload<" << load.auth << "/" << load.all
<< ", req " << load.req_rate
<< ", hr " << load.cache_hit_rate
<< ", qlen " << load.queue_len
!g_conf.objecter_buffer_uncommitted) {
dout(0) << "kick_requests missing commit, cannot replay: objecter_buffer_uncommitted == FALSE" << endl;
} else {
- dout(0) << "kick_requests missing commit, replay write " << tid
+ dout(3) << "kick_requests missing commit, replay write " << tid
<< " v " << wr->tid_version[tid] << endl;
modifyx_submit(wr, wr->waitfor_commit[tid], tid);
}
}
else if (wr->waitfor_ack.count(tid)) {
- dout(0) << "kick_requests missing ack, resub write " << tid << endl;
+ dout(3) << "kick_requests missing ack, resub write " << tid << endl;
modifyx_submit(wr, wr->waitfor_ack[tid], tid);
}
}
// READ
OSDRead *rd = op_read[tid];
op_read.erase(tid);
- dout(0) << "kick_requests resub read " << tid << endl;
+ dout(3) << "kick_requests resub read " << tid << endl;
// resubmit
readx_submit(rd, rd->ops[tid], true);
rd->ops.erase(tid);
}
- else if (op_stat.count(tid)) {
- OSDStat *st = op_stat[tid];
- op_stat.erase(tid);
-
- dout(0) << "kick_requests resub stat " << tid << endl;
+ else if (op_stat.count(tid)) {
+ OSDStat *st = op_stat[tid];
+ op_stat.erase(tid);
+
+ dout(3) << "kick_requests resub stat " << tid << endl;
// resubmit
stat_submit(st);
- }
+ }
else
assert(0);