#include "DecayCounter.h"
#include "Formatter.h"
+#include "include/encoding.h"
+
void DecayCounter::encode(bufferlist& bl) const
{
ENCODE_START(4, 4, bl);
ENCODE_FINISH(bl);
}
-void DecayCounter::decode(const utime_t &t, bufferlist::const_iterator &p)
+void DecayCounter::decode(bufferlist::const_iterator &p)
{
DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, p);
if (struct_v < 2) {
decode(val, p);
decode(delta, p);
decode(vel, p);
- last_decay = t;
+ last_decay = clock::now();
DECODE_FINISH(p);
}
void DecayCounter::dump(Formatter *f) const
{
+ decay(rate);
f->dump_float("value", val);
f->dump_float("delta", delta);
f->dump_float("velocity", vel);
}
-void DecayCounter::generate_test_instances(list<DecayCounter*>& ls)
+void DecayCounter::generate_test_instances(std::list<DecayCounter*>& ls)
{
- utime_t fake_time;
- DecayCounter *counter = new DecayCounter(fake_time);
+ DecayCounter *counter = new DecayCounter();
counter->val = 3.0;
counter->delta = 2.0;
counter->vel = 1.0;
ls.push_back(counter);
- counter = new DecayCounter(fake_time);
+ counter = new DecayCounter();
ls.push_back(counter);
}
-void DecayCounter::decay(utime_t now, const DecayRate &rate)
+void DecayCounter::decay(const DecayRate &rate) const
{
- if (now >= last_decay) {
- double el = (double)(now - last_decay);
- if (el >= 1.0) {
- // calculate new value
- double newval = (val+delta) * exp(el * rate.k);
- if (newval < .01)
- newval = 0.0;
-
- // calculate velocity approx
- vel += (newval - val) * el;
- vel *= exp(el * rate.k);
+ auto now = clock::now();
+ if (now > last_decay) {
+ double el = std::chrono::duration<double>(now - last_decay).count();
+ if (el <= 0.1)
+ return; /* no need to decay for small differences */
- val = newval;
- delta = 0;
- last_decay = now;
+ // calculate new value
+ double newval = (val+delta) * exp(el * rate.k);
+ if (newval < .01) {
+ newval = 0.0;
}
- } else {
- last_decay = now;
+
+ // calculate velocity approx
+ vel += (newval - val) * el;
+ vel *= exp(el * rate.k);
+
+ val = newval;
+ delta = 0;
+ last_decay = now;
}
}
#ifndef CEPH_DECAYCOUNTER_H
#define CEPH_DECAYCOUNTER_H
-#include "include/utime.h"
+#include "include/buffer.h"
+#include "common/Formatter.h"
+#include "common/ceph_time.h"
+
+#include <cmath>
+#include <list>
+#include <sstream>
/**
*
*/
class DecayRate {
- double k; // k = ln(.5)/half_life
-
- friend class DecayCounter;
-
public:
- DecayRate() : k(0) {}
- DecayRate(const DecayRate &dr) : k(dr.k) {}
+ friend class DecayCounter;
+ DecayRate() {}
// cppcheck-suppress noExplicitConstructor
DecayRate(double hl) { set_halflife(hl); }
+ DecayRate(const DecayRate &dr) : k(dr.k) {}
+
void set_halflife(double hl) {
- k = ::log(.5) / hl;
+ k = log(.5) / hl;
}
+
+private:
+ double k = 0; // k = ln(.5)/half_life
};
class DecayCounter {
public:
- double val; // value
- double delta; // delta since last decay
- double vel; // recent velocity
- utime_t last_decay; // time of last decay
- DecayRate rate;
+ using time = ceph::coarse_mono_time;
+ using clock = ceph::coarse_mono_clock;
+
+ DecayCounter() : DecayCounter(DecayRate()) {}
+ explicit DecayCounter(const DecayRate &rate) : last_decay(clock::now()), rate(rate) {}
void encode(bufferlist& bl) const;
- void decode(const utime_t &t, bufferlist::const_iterator& p);
+ void decode(bufferlist::const_iterator& p);
void dump(Formatter *f) const;
- static void generate_test_instances(list<DecayCounter*>& ls);
-
- explicit DecayCounter(const utime_t &now)
- : val(0), delta(0), vel(0), last_decay(now)
- {
- }
-
- explicit DecayCounter(const utime_t &now, const DecayRate &rate)
- : val(0), delta(0), vel(0), last_decay(now), rate(rate)
- {
- }
-
- // these two functions are for the use of our dencoder testing infrastructure
- DecayCounter() : val(0), delta(0), vel(0), last_decay() {}
-
- void decode(bufferlist::const_iterator& p) {
- utime_t fake_time;
- decode(fake_time, p);
- }
+ static void generate_test_instances(std::list<DecayCounter*>& ls);
/**
* reading
*/
- double get(utime_t now, const DecayRate& rate) {
- decay(now, rate);
- return val+delta;
- }
- double get(utime_t now) {
- decay(now, rate);
+ double get() const {
+ decay(rate);
return val+delta;
}
return vel;
}
- utime_t get_last_decay() const {
+ time get_last_decay() const {
return last_decay;
}
* adjusting
*/
- double hit(utime_t now, const DecayRate& rate, double v = 1.0) {
- decay(now, rate);
- delta += v;
- return val+delta;
- }
- double hit(utime_t now, double v = 1.0) {
- decay(now, rate);
+ double hit(double v = 1.0) {
+ decay(rate);
delta += v;
return val+delta;
}
void adjust(double a) {
+ decay(rate);
val += a;
if (val < 0)
val = 0;
}
- void adjust(utime_t now, const DecayRate& rate, double a) {
- decay(now, rate);
- adjust(a);
- }
+
void scale(double f) {
val *= f;
delta *= f;
* decay etc.
*/
- void reset(utime_t now) {
- last_decay = now;
+ void reset() {
+ last_decay = clock::now();
val = delta = 0;
}
- void decay(utime_t now, const DecayRate &rate);
+protected:
+ void decay(const DecayRate &rate) const;
+
+private:
+ mutable double val = 0.0; // value
+ mutable double delta = 0.0; // delta since last decay
+ mutable double vel = 0.0; // recent velocity
+ mutable time last_decay = clock::zero(); // time of last decay
+ DecayRate rate;
};
-inline void encode(const DecayCounter &c, bufferlist &bl) { c.encode(bl); }
-inline void decode(DecayCounter &c, const utime_t &t, bufferlist::const_iterator &p) {
- c.decode(t, p);
+inline void encode(const DecayCounter &c, bufferlist &bl) {
+ c.encode(bl);
}
-// for dencoder
inline void decode(DecayCounter &c, bufferlist::const_iterator &p) {
- utime_t t;
- c.decode(t, p);
+ c.decode(p);
}
+inline std::ostream& operator<<(std::ostream& out, const DecayCounter& d) {
+ std::ostringstream oss;
+ oss.precision(2);
+ double val = d.get();
+ oss << "[C " << std::scientific << val << "]";
+ return out << oss.str();
+}
#endif
#endif
return time_point(seconds(ts.tv_sec) + nanoseconds(ts.tv_nsec));
}
+
+ static bool is_zero(const time_point& t) {
+ return (t == time_point::min());
+ }
+
+ static time_point zero() {
+ return time_point::min();
+ }
};
// So that our subtractions produce negative spans rather than
OPTION(mds_bal_split_wr, OPT_FLOAT)
OPTION(mds_bal_split_bits, OPT_INT)
OPTION(mds_bal_merge_size, OPT_INT)
-OPTION(mds_bal_interval, OPT_INT) // seconds
OPTION(mds_bal_fragment_interval, OPT_INT) // seconds
OPTION(mds_bal_fragment_size_max, OPT_INT) // order of magnitude higher than split size
OPTION(mds_bal_fragment_fast_factor, OPT_FLOAT) // multiple of size_max that triggers immediate split
num_dirty(0), committing_version(0), committed_version(0),
dir_auth_pins(0), request_pins(0),
dir_rep(REP_NONE),
- pop_me(ceph_clock_now()),
- pop_nested(ceph_clock_now()),
- pop_auth_subtree(ceph_clock_now()),
- pop_auth_subtree_nested(ceph_clock_now()),
+ pop_me(mdcache->decayrate),
+ pop_nested(mdcache->decayrate),
+ pop_auth_subtree(mdcache->decayrate),
+ pop_auth_subtree_nested(mdcache->decayrate),
+ pop_spread(mdcache->decayrate),
pop_lru_subdirs(member_offset(CInode, item_pop_lru)),
num_dentries_nested(0), num_dentries_auth_subtree(0),
num_dentries_auth_subtree_nested(0),
get(PIN_TEMPEXPORTING);
}
-void CDir::finish_export(utime_t now)
+void CDir::finish_export()
{
state &= MASK_STATE_EXPORT_KEPT;
- pop_nested.sub(now, cache->decayrate, pop_auth_subtree);
- pop_auth_subtree_nested.sub(now, cache->decayrate, pop_auth_subtree);
- pop_me.zero(now);
- pop_auth_subtree.zero(now);
+ pop_nested.sub(pop_auth_subtree);
+ pop_auth_subtree_nested.sub(pop_auth_subtree);
+ pop_me.zero();
+ pop_auth_subtree.zero();
put(PIN_TEMPEXPORTING);
dirty_old_rstat.clear();
}
-void CDir::decode_import(bufferlist::const_iterator& blp, utime_t now, LogSegment *ls)
+void CDir::decode_import(bufferlist::const_iterator& blp, LogSegment *ls)
{
decode(first, blp);
decode(fnode, blp);
decode(dir_rep, blp);
- decode(pop_me, now, blp);
- decode(pop_auth_subtree, now, blp);
- pop_nested.add(now, cache->decayrate, pop_auth_subtree);
- pop_auth_subtree_nested.add(now, cache->decayrate, pop_auth_subtree);
+ decode(pop_me, blp);
+ decode(pop_auth_subtree, blp);
+ pop_nested.add(pop_auth_subtree);
+ pop_auth_subtree_nested.add(pop_auth_subtree);
decode(dir_rep_by, blp);
decode(get_replicas(), blp);
}
}
-void CDir::abort_import(utime_t now)
+void CDir::abort_import()
{
assert(is_auth());
state_clear(CDir::STATE_AUTH);
if (is_dirty())
mark_clean();
- pop_nested.sub(now, cache->decayrate, pop_auth_subtree);
- pop_auth_subtree_nested.sub(now, cache->decayrate, pop_auth_subtree);
- pop_me.zero(now);
- pop_auth_subtree.zero(now);
+ pop_nested.sub(pop_auth_subtree);
+ pop_auth_subtree_nested.sub(pop_auth_subtree);
+ pop_me.zero();
+ pop_auth_subtree.zero();
}
}
}
-void CDir::dump_load(Formatter *f, utime_t now, const DecayRate& rate)
+void CDir::dump_load(Formatter *f)
{
f->dump_stream("path") << get_path();
f->dump_stream("dirfrag") << dirfrag();
f->open_object_section("pop_me");
- pop_me.dump(f, now, rate);
+ pop_me.dump(f);
f->close_section();
f->open_object_section("pop_nested");
- pop_nested.dump(f, now, rate);
+ pop_nested.dump(f);
f->close_section();
f->open_object_section("pop_auth_subtree");
- pop_auth_subtree.dump(f, now, rate);
+ pop_auth_subtree.dump(f);
f->close_section();
f->open_object_section("pop_auth_subtree_nested");
- pop_auth_subtree_nested.dump(f, now, rate);
+ pop_auth_subtree_nested.dump(f);
f->close_section();
}
#include <string>
#include <string_view>
-#include "common/DecayCounter.h"
#include "common/bloom_filter.hpp"
#include "common/config.h"
#include "include/buffer_fwd.h"
ostream& operator<<(ostream& out, const class CDir& dir);
class CDir : public MDSCacheObject, public Counter<CDir> {
+ using time = ceph::coarse_mono_time;
+ using clock = ceph::coarse_mono_clock;
+
friend ostream& operator<<(ostream& out, const class CDir& dir);
public:
dirfrag_load_vec_t pop_auth_subtree;
dirfrag_load_vec_t pop_auth_subtree_nested;
- mono_time last_popularity_sample;
+ time last_popularity_sample = clock::zero();
load_spread_t pop_spread;
// -- import/export --
void encode_export(bufferlist& bl);
- void finish_export(utime_t now);
+ void finish_export();
void abort_export() {
put(PIN_TEMPEXPORTING);
}
- void decode_import(bufferlist::const_iterator& blp, utime_t now, LogSegment *ls);
- void abort_import(utime_t now);
+ void decode_import(bufferlist::const_iterator& blp, LogSegment *ls);
+ void abort_import();
// -- auth pins --
bool can_auth_pin() const override { return is_auth() && !(is_frozen() || is_freezing()); }
ostream& print_db_line_prefix(ostream& out) override;
void print(ostream& out) override;
void dump(Formatter *f, int flags = DUMP_DEFAULT) const;
- void dump_load(Formatter *f, utime_t now, const DecayRate& rate);
+ void dump_load(Formatter *f);
};
#endif
return out;
}
-
+CInode::CInode(MDCache *c, bool auth, snapid_t f, snapid_t l)
+ :
+ mdcache(c),
+ first(f), last(l),
+ item_dirty(this),
+ item_caps(this),
+ item_open_file(this),
+ item_dirty_parent(this),
+ item_dirty_dirfrag_dir(this),
+ item_dirty_dirfrag_nest(this),
+ item_dirty_dirfrag_dirfragtree(this),
+ pop(c->decayrate),
+ versionlock(this, &versionlock_type),
+ authlock(this, &authlock_type),
+ linklock(this, &linklock_type),
+ dirfragtreelock(this, &dirfragtreelock_type),
+ filelock(this, &filelock_type),
+ xattrlock(this, &xattrlock_type),
+ snaplock(this, &snaplock_type),
+ nestlock(this, &nestlock_type),
+ flocklock(this, &flocklock_type),
+ policylock(this, &policylock_type)
+{
+ if (auth) state_set(STATE_AUTH);
+}
void CInode::print(ostream& out)
{
out << *this;
}
-
-
void CInode::add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client)
{
dout(10) << __func__ << " client." << client << " snapid " << snapid << " on " << snapin << dendl;
get(PIN_TEMPEXPORTING);
}
-void CInode::finish_export(utime_t now)
+void CInode::finish_export()
{
state &= MASK_STATE_EXPORT_KEPT;
- pop.zero(now);
+ pop.zero();
// just in case!
//dirlock.clear_updated();
mark_dirty_parent(ls);
}
- decode(pop, ceph_clock_now(), p);
+ decode(pop, p);
decode(get_replicas(), p);
if (is_replicated())
// ---------------------------
CInode() = delete;
- CInode(MDCache *c, bool auth=true, snapid_t f=2, snapid_t l=CEPH_NOSNAP) :
- mdcache(c),
- first(f), last(l),
- item_dirty(this),
- item_caps(this),
- item_open_file(this),
- item_dirty_parent(this),
- item_dirty_dirfrag_dir(this),
- item_dirty_dirfrag_nest(this),
- item_dirty_dirfrag_dirfragtree(this),
- pop(ceph_clock_now()),
- versionlock(this, &versionlock_type),
- authlock(this, &authlock_type),
- linklock(this, &linklock_type),
- dirfragtreelock(this, &dirfragtreelock_type),
- filelock(this, &filelock_type),
- xattrlock(this, &xattrlock_type),
- snaplock(this, &snaplock_type),
- nestlock(this, &nestlock_type),
- flocklock(this, &flocklock_type),
- policylock(this, &policylock_type)
- {
- if (auth) state_set(STATE_AUTH);
- }
+ CInode(MDCache *c, bool auth=true, snapid_t f=2, snapid_t l=CEPH_NOSNAP);
~CInode() override {
close_dirfrags();
close_snaprealm();
// -- import/export --
void encode_export(bufferlist& bl);
- void finish_export(utime_t now);
+ void finish_export();
void abort_export() {
put(PIN_TEMPEXPORTING);
assert(state_test(STATE_EXPORTINGCAPS));
}
issue_caps_set(need_issue);
- utime_t now = ceph_clock_now();
- mds->balancer->hit_inode(now, in, META_POP_IWR);
+ mds->balancer->hit_inode(in, META_POP_IWR);
// auth unpin after issuing caps
mut->cleanup();
void MDBalancer::tick()
{
static int num_bal_times = g_conf->mds_bal_max;
- static mono_time first = mono_clock::now();
- mono_time now = mono_clock::now();
- ceph::timespan elapsed = now - first;
+ auto bal_interval = mds->cct->_conf->get_val<int64_t>("mds_bal_interval");
+ auto bal_max_until = mds->cct->_conf->get_val<int64_t>("mds_bal_max_until");
+ time now = clock::now();
if (g_conf->mds_bal_export_pin) {
handle_export_pins();
}
// sample?
- if (chrono::duration<double> (now - last_sample).count() >
+ if (chrono::duration<double>(now-last_sample).count() >
g_conf->mds_bal_sample_interval) {
dout(15) << "tick last_sample now " << now << dendl;
last_sample = now;
// We can use duration_cast below, although the result is an int,
// because the values from g_conf are also integers.
// balance?
- if (mds->get_nodeid() == 0 &&
- g_conf->mds_bal_interval > 0 &&
- (num_bal_times ||
- (g_conf->mds_bal_max_until >= 0 &&
- duration_cast<chrono::seconds>(elapsed).count() >
- g_conf->mds_bal_max_until)) &&
- mds->is_active() &&
- duration_cast<chrono::seconds>(now - last_heartbeat).count() >=
- g_conf->mds_bal_interval) {
+ if (mds->get_nodeid() == 0
+ && mds->is_active()
+ && bal_interval > 0
+ && duration_cast<chrono::seconds>(now - last_heartbeat).count() >= bal_interval
+ && (num_bal_times || (bal_max_until >= 0 && mds->get_uptime().count() > bal_max_until))) {
last_heartbeat = now;
send_heartbeat();
num_bal_times--;
};
-double mds_load_t::mds_load()
+double mds_load_t::mds_load() const
{
switch(g_conf->mds_bal_mode) {
case 0:
return 0;
}
-mds_load_t MDBalancer::get_load(utime_t now)
+mds_load_t MDBalancer::get_load()
{
- mds_load_t load(now);
+ auto now = clock::now();
+
+ mds_load_t load{DecayRate()}; /* zero DecayRate! */
if (mds->mdcache->get_root()) {
list<CDir*> ls;
mds->mdcache->get_root()->get_dirfrags(ls);
- for (list<CDir*>::iterator p = ls.begin();
- p != ls.end();
- ++p) {
- load.auth.add(now, mds->mdcache->decayrate, (*p)->pop_auth_subtree_nested);
- load.all.add(now, mds->mdcache->decayrate, (*p)->pop_nested);
+ for (auto &d : ls) {
+ load.auth.add(d->pop_auth_subtree_nested);
+ load.all.add(d->pop_nested);
}
} else {
dout(20) << "get_load no root, no load" << dendl;
uint64_t num_requests = mds->get_num_requests();
bool new_req_rate = false;
- if (last_get_load != utime_t() &&
+ if (last_get_load != clock::zero() &&
now > last_get_load &&
num_requests >= last_num_requests) {
- utime_t el = now;
- el -= last_get_load;
- if (el.sec() >= 1) {
+ double el = std::chrono::duration<double>(now-last_get_load).count();
+ if (el >= 1.0) {
load.req_rate = (num_requests - last_num_requests) / (double)el;
new_req_rate = true;
}
<< " oid=" << oid << " oloc=" << oloc << dendl;
/* timeout: if we waste half our time waiting for RADOS, then abort! */
- double t = ceph_clock_now() + g_conf->mds_bal_interval/2;
- utime_t timeout;
- timeout.set_from_double(t);
lock.Lock();
- int ret_t = cond.WaitUntil(lock, timeout);
+ int ret_t = cond.WaitInterval(lock, utime_t(mds->cct->_conf->get_val<int64_t>("mds_bal_interval")/2, 0));
lock.Unlock();
/* success: store the balancer in memory and set the version. */
void MDBalancer::send_heartbeat()
{
- utime_t now = ceph_clock_now();
-
if (mds->is_cluster_degraded()) {
dout(10) << "send_heartbeat degraded" << dendl;
return;
}
// my load
- mds_load_t load = get_load(now);
+ mds_load_t load = get_load();
mds->logger->set(l_mds_load_cent, 100 * load.mds_load());
mds->logger->set(l_mds_dispatch_queue_len, load.queue_len);
- mds_load[mds->get_nodeid()] = load;
+ auto em = mds_load.emplace(std::piecewise_construct, std::forward_as_tuple(mds->get_nodeid()), std::forward_as_tuple(load));
+ if (!em.second) {
+ em.first->second = load;
+ }
// import_map -- how much do i import from whom
map<mds_rank_t, float> import_map;
mds_rank_t from = im->inode->authority().first;
if (from == mds->get_nodeid()) continue;
if (im->get_inode()->is_stray()) continue;
- import_map[from] += im->pop_auth_subtree.meta_load(now, mds->mdcache->decayrate);
+ import_map[from] += im->pop_auth_subtree.meta_load();
}
mds_import_map[ mds->get_nodeid() ] = import_map;
}
}
- mds_load[who] = m->get_load();
+ {
+ auto em = mds_load.emplace(std::piecewise_construct, std::forward_as_tuple(who), std::forward_as_tuple(m->get_load()));
+ if (!em.second) {
+ em.first->second = m->get_load();
+ }
+ }
mds_import_map[who] = m->get_import_map();
{
} else {
int cluster_size = mds->get_mds_map()->get_num_in_mds();
mds_rank_t whoami = mds->get_nodeid();
- rebalance_time = ceph_clock_now();
+ rebalance_time = clock::now();
dout(5) << " prep_rebalance: cluster loads are" << dendl;
double load_fac = 1.0;
map<mds_rank_t, mds_load_t>::iterator m = mds_load.find(whoami);
if ((m != mds_load.end()) && (m->second.mds_load() > 0)) {
- double metald = m->second.auth.meta_load(rebalance_time, mds->mdcache->decayrate);
+ double metald = m->second.auth.meta_load();
double mdsld = m->second.mds_load();
load_fac = metald / mdsld;
dout(7) << " load_fac is " << load_fac
/* prepare for balancing */
int cluster_size = mds->get_mds_map()->get_num_in_mds();
- rebalance_time = ceph_clock_now();
+ rebalance_time = clock::now();
mds->mdcache->migrator->clear_export_queue();
/* fill in the metrics for each mds by grabbing load struct */
continue; // export pbly already in progress
mds_rank_t from = diri->authority().first;
- double pop = dir->pop_auth_subtree.meta_load(rebalance_time, mds->mdcache->decayrate);
+ double pop = dir->pop_auth_subtree.meta_load();
if (g_conf->mds_bal_idle_threshold > 0 &&
pop < g_conf->mds_bal_idle_threshold &&
diri != mds->mdcache->get_root() &&
for (auto dir : exports) {
dout(5) << " - exporting " << dir->pop_auth_subtree
- << " " << dir->pop_auth_subtree.meta_load(rebalance_time, mds->mdcache->decayrate)
+ << " " << dir->pop_auth_subtree.meta_load()
<< " to mds." << target << " " << *dir << dendl;
mds->mdcache->migrator->export_dir_nicely(dir, target);
}
double& have,
set<CDir*>& already_exporting)
{
- utime_t now = ceph_clock_now();
- if ((double)(now - rebalance_time) > 0.1) {
+ auto now = clock::now();
+ auto duration = std::chrono::duration<double>(now-rebalance_time).count();
+ if (duration > 0.1) {
derr << " balancer runs too long" << dendl_impl;
have = amount;
return;
list<CDir*> bigger_rep, bigger_unrep;
multimap<double, CDir*> smaller;
- double dir_pop = dir->pop_auth_subtree.meta_load(rebalance_time, mds->mdcache->decayrate);
+ double dir_pop = dir->pop_auth_subtree.meta_load();
dout(7) << " find_exports in " << dir_pop << " " << *dir << " need " << need << " (" << needmin << " - " << needmax << ")" << dendl;
double subdir_sum = 0;
continue; // can't export this right now!
// how popular?
- double pop = subdir->pop_auth_subtree.meta_load(rebalance_time, mds->mdcache->decayrate);
+ double pop = subdir->pop_auth_subtree.meta_load();
subdir_sum += pop;
dout(15) << " subdir pop " << pop << " " << *subdir << dendl;
}
}
-void MDBalancer::hit_inode(const utime_t& now, CInode *in, int type, int who)
+void MDBalancer::hit_inode(CInode *in, int type, int who)
{
// hit inode
- in->pop.get(type).hit(now, mds->mdcache->decayrate);
+ in->pop.get(type).hit();
if (in->get_parent_dn())
- hit_dir(now, in->get_parent_dn()->get_dir(), type, who);
+ hit_dir(in->get_parent_dn()->get_dir(), type, who);
}
void MDBalancer::maybe_fragment(CDir *dir, bool hot)
}
}
-void MDBalancer::hit_dir(const utime_t& now, CDir *dir, int type, int who, double amount)
+void MDBalancer::hit_dir(CDir *dir, int type, int who, double amount)
{
// hit me
- double v = dir->pop_me.get(type).hit(now, mds->mdcache->decayrate, amount);
+ double v = dir->pop_me.get(type).hit(amount);
const bool hot = (v > g_conf->mds_bal_split_rd && type == META_POP_IRD) ||
(v > g_conf->mds_bal_split_wr && type == META_POP_IWR);
// replicate?
if (type == META_POP_IRD && who >= 0) {
- dir->pop_spread.hit(now, mds->mdcache->decayrate, who);
+ dir->pop_spread.hit(who);
}
double rd_adj = 0.0;
if (type == META_POP_IRD &&
dir->last_popularity_sample < last_sample) {
- double dir_pop = dir->pop_auth_subtree.get(type).get(now, mds->mdcache->decayrate); // hmm??
+ double dir_pop = dir->pop_auth_subtree.get(type).get(); // hmm??
dir->last_popularity_sample = last_sample;
- double pop_sp = dir->pop_spread.get(now, mds->mdcache->decayrate);
+ double pop_sp = dir->pop_spread.get();
dir_pop += pop_sp * 10;
//if (dir->ino() == inodeno_t(0x10000000002))
if (!dir->is_rep() &&
dir_pop >= g_conf->mds_bal_replicate_threshold) {
// replicate
- double rdp = dir->pop_me.get(META_POP_IRD).get(now, mds->mdcache->decayrate);
+ double rdp = dir->pop_me.get(META_POP_IRD).get();
rd_adj = rdp / mds->get_mds_map()->get_num_in_mds() - rdp;
rd_adj /= 2.0; // temper somewhat
while (true) {
CDir *pdir = dir->inode->get_parent_dir();
- dir->pop_nested.get(type).hit(now, mds->mdcache->decayrate, amount);
+ dir->pop_nested.get(type).hit(amount);
if (rd_adj != 0.0)
- dir->pop_nested.get(META_POP_IRD).adjust(now, mds->mdcache->decayrate, rd_adj);
+ dir->pop_nested.get(META_POP_IRD).adjust(rd_adj);
if (hit_subtree) {
- dir->pop_auth_subtree.get(type).hit(now, mds->mdcache->decayrate, amount);
+ dir->pop_auth_subtree.get(type).hit(amount);
if (rd_adj != 0.0)
- dir->pop_auth_subtree.get(META_POP_IRD).adjust(now, mds->mdcache->decayrate, rd_adj);
+ dir->pop_auth_subtree.get(META_POP_IRD).adjust(rd_adj);
if (dir->is_subtree_root())
hit_subtree = false; // end of auth domain, stop hitting auth counters.
}
if (hit_subtree_nested) {
- dir->pop_auth_subtree_nested.get(type).hit(now, mds->mdcache->decayrate, amount);
+ dir->pop_auth_subtree_nested.get(type).hit(amount);
if (rd_adj != 0.0)
- dir->pop_auth_subtree_nested.get(META_POP_IRD).adjust(now, mds->mdcache->decayrate, rd_adj);
+ dir->pop_auth_subtree_nested.get(META_POP_IRD).adjust(rd_adj);
}
if (!pdir) break;
dir = pdir;
* NOTE: call me _after_ forcing *dir into a subtree root,
* but _before_ doing the encode_export_dirs.
*/
-void MDBalancer::subtract_export(CDir *dir, utime_t now)
+void MDBalancer::subtract_export(CDir *dir)
{
dirfrag_load_vec_t subload = dir->pop_auth_subtree;
dir = dir->inode->get_parent_dir();
if (!dir) break;
- dir->pop_nested.sub(now, mds->mdcache->decayrate, subload);
- dir->pop_auth_subtree_nested.sub(now, mds->mdcache->decayrate, subload);
+ dir->pop_nested.sub(subload);
+ dir->pop_auth_subtree_nested.sub(subload);
}
}
-void MDBalancer::add_import(CDir *dir, utime_t now)
+void MDBalancer::add_import(CDir *dir)
{
dirfrag_load_vec_t subload = dir->pop_auth_subtree;
dir = dir->inode->get_parent_dir();
if (!dir) break;
- dir->pop_nested.add(now, mds->mdcache->decayrate, subload);
- dir->pop_auth_subtree_nested.add(now, mds->mdcache->decayrate, subload);
+ dir->pop_nested.add(subload);
+ dir->pop_auth_subtree_nested.add(subload);
}
}
-void MDBalancer::adjust_pop_for_rename(CDir *pdir, CDir *dir, utime_t now, bool inc)
+void MDBalancer::adjust_pop_for_rename(CDir *pdir, CDir *dir, bool inc)
{
- DecayRate& rate = mds->mdcache->decayrate;
-
bool adjust_subtree_nest = dir->is_auth();
bool adjust_subtree = adjust_subtree_nest && !dir->is_subtree_root();
CDir *cur = dir;
while (true) {
if (inc) {
- pdir->pop_nested.add(now, rate, dir->pop_nested);
+ pdir->pop_nested.add(dir->pop_nested);
if (adjust_subtree) {
- pdir->pop_auth_subtree.add(now, rate, dir->pop_auth_subtree);
+ pdir->pop_auth_subtree.add(dir->pop_auth_subtree);
pdir->pop_lru_subdirs.push_front(&cur->get_inode()->item_pop_lru);
}
if (adjust_subtree_nest)
- pdir->pop_auth_subtree_nested.add(now, rate, dir->pop_auth_subtree_nested);
+ pdir->pop_auth_subtree_nested.add(dir->pop_auth_subtree_nested);
} else {
- pdir->pop_nested.sub(now, rate, dir->pop_nested);
+ pdir->pop_nested.sub(dir->pop_nested);
if (adjust_subtree)
- pdir->pop_auth_subtree.sub(now, rate, dir->pop_auth_subtree);
+ pdir->pop_auth_subtree.sub(dir->pop_auth_subtree);
if (adjust_subtree_nest)
- pdir->pop_auth_subtree_nested.sub(now, rate, dir->pop_auth_subtree_nested);
+ pdir->pop_auth_subtree_nested.sub(dir->pop_auth_subtree_nested);
}
if (pdir->is_subtree_root())
int MDBalancer::dump_loads(Formatter *f)
{
- utime_t now = ceph_clock_now();
- DecayRate& decayrate = mds->mdcache->decayrate;
-
list<CDir*> dfs;
if (mds->mdcache->get_root()) {
mds->mdcache->get_root()->get_dirfrags(dfs);
dfs.pop_front();
f->open_object_section("dir");
- dir->dump_load(f, now, decayrate);
+ dir->dump_load(f);
f->close_section();
for (auto it = dir->begin(); it != dir->end(); ++it) {
f->open_object_section("mds_load");
{
- auto dump_mds_load = [f, now](mds_load_t& load) {
+ auto dump_mds_load = [f](mds_load_t& load) {
f->dump_float("request_rate", load.req_rate);
f->dump_float("cache_hit_rate", load.cache_hit_rate);
f->dump_float("queue_length", load.queue_len);
f->dump_float("cpu_load", load.cpu_load_avg);
f->dump_float("mds_load", load.mds_load());
- DecayRate rate; // no decay
f->open_object_section("auth_dirfrags");
- load.auth.dump(f, now, rate);
+ load.auth.dump(f);
f->close_section();
f->open_object_section("all_dirfrags");
- load.all.dump(f, now, rate);
+ load.all.dump(f);
f->close_section();
};
class MonClient;
class MDBalancer {
- friend class C_Bal_SendHeartbeat;
public:
+ using clock = ceph::coarse_mono_clock;
+ using time = ceph::coarse_mono_time;
+ friend class C_Bal_SendHeartbeat;
+
MDBalancer(MDSRank *m, Messenger *msgr, MonClient *monc) :
mds(m), messenger(msgr), mon_client(monc) { }
*/
void tick();
- void subtract_export(CDir *ex, utime_t now);
- void add_import(CDir *im, utime_t now);
- void adjust_pop_for_rename(CDir *pdir, CDir *dir, utime_t now, bool inc);
+ void subtract_export(CDir *ex);
+ void add_import(CDir *im);
+ void adjust_pop_for_rename(CDir *pdir, CDir *dir, bool inc);
- void hit_inode(const utime_t& now, CInode *in, int type, int who=-1);
- void hit_dir(const utime_t& now, CDir *dir, int type, int who=-1, double amount=1.0);
+ void hit_inode(CInode *in, int type, int who=-1);
+ void hit_dir(CDir *dir, int type, int who=-1, double amount=1.0);
void queue_split(const CDir *dir, bool fast);
void queue_merge(CDir *dir);
void handle_export_pins(void);
- mds_load_t get_load(utime_t now);
+ mds_load_t get_load();
int localize_balancer();
void send_heartbeat();
void handle_heartbeat(MHeartbeat *m);
string bal_code;
string bal_version;
- mono_time last_heartbeat = mono_clock::zero();
- mono_time last_sample = mono_clock::zero();
- utime_t rebalance_time; //ensure a consistent view of load for rebalance
+ time last_heartbeat = clock::zero();
+ time last_sample = clock::zero();
+ time rebalance_time = clock::zero(); //ensure a consistent view of load for rebalance
- utime_t last_get_load;
+ time last_get_load = clock::zero();
uint64_t last_num_requests = 0;
// Dirfrags which are marked to be passed on to MDCache::[split|merge]_dir
// adjust recursive pop counters
if (adjust_pop && dir->is_auth()) {
- utime_t now = ceph_clock_now();
CDir *p = dir->get_parent_dir();
while (p) {
- p->pop_auth_subtree.sub(now, decayrate, dir->pop_auth_subtree);
+ p->pop_auth_subtree.sub(dir->pop_auth_subtree);
if (p->is_subtree_root()) break;
p = p->inode->get_parent_dir();
}
// adjust popularity?
if (adjust_pop && dir->is_auth()) {
- utime_t now = ceph_clock_now();
CDir *cur = dir;
CDir *p = dir->get_parent_dir();
while (p) {
- p->pop_auth_subtree.add(now, decayrate, dir->pop_auth_subtree);
+ p->pop_auth_subtree.add(dir->pop_auth_subtree);
p->pop_lru_subdirs.push_front(&cur->get_inode()->item_pop_lru);
if (p->is_subtree_root()) break;
cur = p;
{
dout(10) << "adjust_subtree_after_rename " << *diri << " from " << *olddir << dendl;
- //show_subtrees();
- utime_t now = ceph_clock_now();
-
CDir *newdir = diri->get_parent_dir();
if (pop) {
dout(10) << " new parent " << *newparent << dendl;
if (olddir != newdir)
- mds->balancer->adjust_pop_for_rename(olddir, dir, now, false);
+ mds->balancer->adjust_pop_for_rename(olddir, dir, false);
if (oldparent == newparent) {
dout(10) << "parent unchanged for " << *dir << " at " << *oldparent << dendl;
}
if (olddir != newdir)
- mds->balancer->adjust_pop_for_rename(newdir, dir, now, true);
+ mds->balancer->adjust_pop_for_rename(newdir, dir, true);
}
show_subtrees();
finisher->start();
}
-void MDSRank::update_targets(utime_t now)
+void MDSRank::update_targets()
{
// get MonMap's idea of my export_targets
const set<mds_rank_t>& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets;
auto it = export_targets.begin();
while (it != export_targets.end()) {
mds_rank_t rank = it->first;
- double val = it->second.get(now);
- dout(20) << "export target mds." << rank << " value is " << val << " @ " << now << dendl;
+ auto &counter = it->second;
+ dout(20) << "export target mds." << rank << " is " << counter << dendl;
+ double val = counter.get();
if (val <= 0.01) {
dout(15) << "export target mds." << rank << " is no longer an export target" << dendl;
export_targets.erase(it++);
}
}
-void MDSRank::hit_export_target(utime_t now, mds_rank_t rank, double amount)
+void MDSRank::hit_export_target(mds_rank_t rank, double amount)
{
double rate = g_conf->mds_bal_target_decay;
if (amount < 0.0) {
amount = 100.0/g_conf->mds_bal_target_decay; /* a good default for "i am trying to keep this export_target active" */
}
- auto em = export_targets.emplace(std::piecewise_construct, std::forward_as_tuple(rank), std::forward_as_tuple(now, DecayRate(rate)));
+ auto em = export_targets.emplace(std::piecewise_construct, std::forward_as_tuple(rank), std::forward_as_tuple(DecayRate(rate)));
+ auto &counter = em.first->second;
+ counter.hit(amount);
if (em.second) {
- dout(15) << "hit export target (new) " << amount << " @ " << now << dendl;
+ dout(15) << "hit export target (new) is " << counter << dendl;
} else {
- dout(15) << "hit export target " << amount << " @ " << now << dendl;
+ dout(15) << "hit export target is " << counter << dendl;
}
- em.first->second.hit(now, amount);
}
class C_MDS_MonCommand : public MDSInternalContext {
}
if (is_active() || is_stopping()) {
- update_targets(ceph_clock_now());
+ update_targets();
}
// shut down?
void dump_status(Formatter *f) const;
- void hit_export_target(utime_t now, mds_rank_t rank, double amount=-1.0);
+ void hit_export_target(mds_rank_t rank, double amount=-1.0);
bool is_export_target(mds_rank_t rank) {
const set<mds_rank_t>& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets;
return map_targets.count(rank);
// <<<
/* Update MDSMap export_targets for this rank. Called on ::tick(). */
- void update_targets(utime_t now);
+ void update_targets();
friend class C_MDS_MonCommand;
void _mon_command_finish(int r, std::string_view cmd, std::string_view outs);
}
}
- mds->hit_export_target(ceph_clock_now(), dest, -1);
+ mds->hit_export_target(dest, -1);
dir->auth_pin(this);
dir->state_set(CDir::STATE_EXPORTING);
{
CDir *dir = cache->get_dirfrag(m->get_dirfrag());
mds_rank_t dest(m->get_source().num());
- utime_t now = ceph_clock_now();
assert(dir);
dout(7) << "export_discover_ack from " << m->get_source()
<< " on " << *dir << dendl;
- mds->hit_export_target(now, dest, -1);
+ mds->hit_export_target(dest, -1);
map<CDir*,export_state_t>::iterator it = export_state.find(dir);
if (it == export_state.end() ||
{
CDir *dir = cache->get_dirfrag(m->get_dirfrag());
mds_rank_t dest(m->get_source().num());
- utime_t now = ceph_clock_now();
assert(dir);
dout(7) << "export_prep_ack " << *dir << dendl;
- mds->hit_export_target(now, dest, -1);
+ mds->hit_export_target(dest, -1);
map<CDir*,export_state_t>::iterator it = export_state.find(dir);
if (it == export_state.end() ||
cache->adjust_subtree_auth(dir, mds->get_nodeid(), dest);
// take away the popularity we're sending.
- utime_t now = ceph_clock_now();
- mds->balancer->subtract_export(dir, now);
+ mds->balancer->subtract_export(dir);
// fill export message with cache data
MExportDir *req = new MExportDir(dir->dirfrag(), it->second.tid);
map<client_t,entity_inst_t> exported_client_map;
uint64_t num_exported_inodes = encode_export_dir(req->export_data,
dir, // recur start point
- exported_client_map,
- now);
+ exported_client_map);
encode(exported_client_map, req->client_map,
mds->mdsmap->get_up_features());
mds->send_message_mds(req, dest);
assert(g_conf->mds_kill_export_at != 8);
- mds->hit_export_target(now, dest, num_exported_inodes+1);
+ mds->hit_export_target(dest, num_exported_inodes+1);
// stats
if (mds->logger) mds->logger->inc(l_mds_exported);
mds->locker->eval(in, CEPH_CAP_LOCKS);
}
-void Migrator::finish_export_inode(CInode *in, utime_t now, mds_rank_t peer,
+void Migrator::finish_export_inode(CInode *in, mds_rank_t peer,
map<client_t,Capability::Import>& peer_imported,
list<MDSInternalContextBase*>& finished)
{
// waiters
in->take_waiting(CInode::WAIT_ANY_MASK, finished);
- in->finish_export(now);
+ in->finish_export();
finish_export_inode_caps(in, peer, peer_imported);
}
uint64_t Migrator::encode_export_dir(bufferlist& exportbl,
CDir *dir,
- map<client_t,entity_inst_t>& exported_client_map,
- utime_t now)
+ map<client_t,entity_inst_t>& exported_client_map)
{
uint64_t num_exported = 0;
// subdirs
for (auto &dir : subdirs)
- num_exported += encode_export_dir(exportbl, dir, exported_client_map, now);
+ num_exported += encode_export_dir(exportbl, dir, exported_client_map);
return num_exported;
}
-void Migrator::finish_export_dir(CDir *dir, utime_t now, mds_rank_t peer,
+void Migrator::finish_export_dir(CDir *dir, mds_rank_t peer,
map<inodeno_t,map<client_t,Capability::Import> >& peer_imported,
list<MDSInternalContextBase*>& finished, int *num_dentries)
{
dir->take_waiting(CDir::WAIT_ANY_MASK, finished); // all dir waiters
// pop
- dir->finish_export(now);
+ dir->finish_export();
// dentries
list<CDir*> subdirs;
// inode?
if (dn->get_linkage()->is_primary()) {
- finish_export_inode(in, now, peer, peer_imported[in->ino()], finished);
+ finish_export_inode(in, peer, peer_imported[in->ino()], finished);
// subdirs?
in->get_nested_dirfrags(subdirs);
// subdirs
for (list<CDir*>::iterator it = subdirs.begin(); it != subdirs.end(); ++it)
- finish_export_dir(*it, now, peer, peer_imported, finished, num_dentries);
+ finish_export_dir(*it, peer, peer_imported, finished, num_dentries);
}
class C_MDS_ExportFinishLogged : public MigratorLogContext {
{
CDir *dir = cache->get_dirfrag(m->get_dirfrag());
mds_rank_t dest(m->get_source().num());
- utime_t now = ceph_clock_now();
assert(dir);
assert(dir->is_frozen_tree_root()); // i'm exporting!
// yay!
dout(7) << "handle_export_ack " << *dir << dendl;
- mds->hit_export_target(now, dest, -1);
+ mds->hit_export_target(dest, -1);
map<CDir*,export_state_t>::iterator it = export_state.find(dir);
assert(it != export_state.end());
{
CDir *dir = cache->get_dirfrag(m->get_dirfrag());
mds_rank_t dest(m->get_source().num());
- utime_t now = ceph_clock_now();
assert(dir);
mds_rank_t from = mds_rank_t(m->get_source().num());
- mds->hit_export_target(now, dest, -1);
+ mds->hit_export_target(dest, -1);
auto export_state_entry = export_state.find(dir);
if (export_state_entry != export_state.end()) {
// finish export (adjust local cache state)
int num_dentries = 0;
list<MDSInternalContextBase*> finished;
- finish_export_dir(dir, ceph_clock_now(), it->second.peer,
+ finish_export_dir(dir, it->second.peer,
it->second.peer_imported, finished, &num_dentries);
assert(!dir->is_auth());
assert(it->second.tid == m->get_tid());
assert(it->second.peer == oldauth);
- utime_t now = ceph_clock_now();
-
if (!dir->get_inode()->dirfragtree.is_leaf(dir->get_frag()))
dir->get_inode()->dirfragtree.force_to_leaf(g_ceph_context, dir->get_frag());
le,
mds->mdlog->get_current_segment(),
it->second.peer_exports,
- it->second.updated_scatterlocks,
- now);
+ it->second.updated_scatterlocks);
}
dout(10) << " " << m->bounds.size() << " imported bounds" << dendl;
cache->verify_subtree_bounds(dir, import_bounds);
// adjust popularity
- mds->balancer->add_import(dir, now);
+ mds->balancer->add_import(dir);
dout(7) << "handle_export_dir did " << *dir << dendl;
import_state_t& stat = import_state[dir->dirfrag()];
stat.state = IMPORT_ABORTING;
- utime_t now = ceph_clock_now();
-
set<CDir*> bounds;
cache->get_subtree_bounds(dir, bounds);
q.pop_front();
// dir
- cur->abort_import(now);
+ cur->abort_import();
for (auto &p : *cur) {
CDentry *dn = p.second;
EImportStart *le,
LogSegment *ls,
map<CInode*,map<client_t,Capability::Export> >& peer_exports,
- list<ScatterLock*>& updated_scatterlocks, utime_t now)
+ list<ScatterLock*>& updated_scatterlocks)
{
// set up dir
dirfrag_t df;
dout(7) << "decode_import_dir " << *dir << dendl;
// assimilate state
- dir->decode_import(blp, now, ls);
+ dir->decode_import(blp, ls);
// adjust replica list
//assert(!dir->is_replica(oldauth)); // not true on failed export
map<client_t,entity_inst_t>& exported_client_map);
void encode_export_inode_caps(CInode *in, bool auth_cap, bufferlist& bl,
map<client_t,entity_inst_t>& exported_client_map);
- void finish_export_inode(CInode *in, utime_t now, mds_rank_t target,
+ void finish_export_inode(CInode *in, mds_rank_t target,
map<client_t,Capability::Import>& peer_imported,
list<MDSInternalContextBase*>& finished);
void finish_export_inode_caps(CInode *in, mds_rank_t target,
uint64_t encode_export_dir(bufferlist& exportbl,
CDir *dir,
- map<client_t,entity_inst_t>& exported_client_map,
- utime_t now);
- void finish_export_dir(CDir *dir, utime_t now, mds_rank_t target,
+ map<client_t,entity_inst_t>& exported_client_map);
+ void finish_export_dir(CDir *dir, mds_rank_t target,
map<inodeno_t,map<client_t,Capability::Import> >& peer_imported,
list<MDSInternalContextBase*>& finished, int *num_dentries);
EImportStart *le,
LogSegment *ls,
map<CInode*, map<client_t,Capability::Export> >& cap_imports,
- list<ScatterLock*>& updated_scatterlocks, utime_t now);
+ list<ScatterLock*>& updated_scatterlocks);
void import_reverse(CDir *dir);
// value for them. (currently this matters for xattrs and inline data)
mdr->getattr_caps = mask;
- mds->balancer->hit_inode(now, ref, META_POP_IRD,
- req->get_source().num());
+ mds->balancer->hit_inode(ref, META_POP_IRD, req->get_source().num());
// reply
dout(10) << "reply to stat on " << *req << dendl;
// hit pop
if (cmode & CEPH_FILE_MODE_WR)
- mds->balancer->hit_inode(now, cur, META_POP_IWR);
+ mds->balancer->hit_inode(cur, META_POP_IWR);
else
- mds->balancer->hit_inode(now, cur, META_POP_IRD,
+ mds->balancer->hit_inode(cur, META_POP_IRD,
mdr->client_request->get_source().num());
CDentry *dn = 0;
MDRequestRef null_ref;
get_mds()->mdcache->send_dentry_link(dn, null_ref);
- utime_t now = ceph_clock_now();
- get_mds()->balancer->hit_inode(now, newi, META_POP_IWR);
+ get_mds()->balancer->hit_inode(newi, META_POP_IWR);
server->respond_to_request(mdr, 0);
mdr->reply_extra_bl = dirbl;
// bump popularity. NOTE: this doesn't quite capture it.
- mds->balancer->hit_dir(now, dir, META_POP_IRD, -1, numfiles);
+ mds->balancer->hit_dir(dir, META_POP_IRD, -1, numfiles);
// reply
mdr->tracei = diri;
mds->mdcache->do_realm_invalidate_and_update_notify(in, op);
}
- utime_t now = ceph_clock_now();
- get_mds()->balancer->hit_inode(now, in, META_POP_IWR);
+ get_mds()->balancer->hit_inode(in, META_POP_IWR);
server->respond_to_request(mdr, 0);
mdr->apply();
- utime_t now = ceph_clock_now();
- get_mds()->balancer->hit_inode(now, in, META_POP_IWR);
+ get_mds()->balancer->hit_inode(in, META_POP_IWR);
server->respond_to_request(mdr, 0);
}
get_mds()->locker->share_inode_max_size(newi);
// hit pop
- utime_t now = ceph_clock_now();
- get_mds()->balancer->hit_inode(now, newi, META_POP_IWR);
+ get_mds()->balancer->hit_inode(newi, META_POP_IWR);
// reply
server->respond_to_request(mdr, 0);
}
// bump target popularity
- utime_t now = ceph_clock_now();
- mds->balancer->hit_inode(now, targeti, META_POP_IWR);
- mds->balancer->hit_dir(now, dn->get_dir(), META_POP_IWR);
+ mds->balancer->hit_inode(targeti, META_POP_IWR);
+ mds->balancer->hit_dir(dn->get_dir(), META_POP_IWR);
// reply
respond_to_request(mdr, 0);
mdcache->send_dentry_unlink(dn, NULL, null_ref);
// bump target popularity
- utime_t now = ceph_clock_now();
- mds->balancer->hit_inode(now, targeti, META_POP_IWR);
- mds->balancer->hit_dir(now, dn->get_dir(), META_POP_IWR);
+ mds->balancer->hit_inode(targeti, META_POP_IWR);
+ mds->balancer->hit_dir(dn->get_dir(), META_POP_IWR);
// reply
respond_to_request(mdr, 0);
mdr->apply();
// hit pop
- utime_t now = ceph_clock_now();
- mds->balancer->hit_inode(now, targeti, META_POP_IWR);
+ mds->balancer->hit_inode(targeti, META_POP_IWR);
// done.
mdr->slave_request->put();
}
// bump pop
- utime_t now = ceph_clock_now();
- mds->balancer->hit_dir(now, dn->get_dir(), META_POP_IWR);
+ mds->balancer->hit_dir(dn->get_dir(), META_POP_IWR);
// reply
respond_to_request(mdr, 0);
assert(g_conf->mds_kill_rename_at != 6);
// bump popularity
- utime_t now = ceph_clock_now();
- mds->balancer->hit_dir(now, srcdn->get_dir(), META_POP_IWR);
+ mds->balancer->hit_dir(srcdn->get_dir(), META_POP_IWR);
if (destdnl->is_remote() && in->is_auth())
- mds->balancer->hit_inode(now, in, META_POP_IWR);
+ mds->balancer->hit_inode(in, META_POP_IWR);
// did we import srci? if so, explicitly ack that import that, before we unlock and reply.
CDentry::linkage_t *destdnl = destdn->get_linkage();
// bump popularity
- utime_t now = ceph_clock_now();
- mds->balancer->hit_dir(now, srcdn->get_dir(), META_POP_IWR);
+ mds->balancer->hit_dir(srcdn->get_dir(), META_POP_IWR);
if (destdnl->get_inode() && destdnl->get_inode()->is_auth())
- mds->balancer->hit_inode(now, destdnl->get_inode(), META_POP_IWR);
+ mds->balancer->hit_inode(destdnl->get_inode(), META_POP_IWR);
// done.
mdr->slave_request->put();
decode(peer_imported, bp);
dout(10) << " finishing inode export on " << *destdnl->get_inode() << dendl;
- mdcache->migrator->finish_export_inode(destdnl->get_inode(), ceph_clock_now(),
+ mdcache->migrator->finish_export_inode(destdnl->get_inode(),
mdr->slave_to_mds, peer_imported, finished);
mds->queue_waiters(finished); // this includes SINGLEAUTH waiters.
ENCODE_FINISH(bl);
}
-void inode_load_vec_t::decode(const utime_t &t, bufferlist::const_iterator &p)
+void inode_load_vec_t::decode(bufferlist::const_iterator &p)
{
DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p);
for (auto &i : vec) {
- decode(i, t, p);
+ decode(i, p);
}
DECODE_FINISH(p);
}
-void inode_load_vec_t::dump(Formatter *f)
+void inode_load_vec_t::dump(Formatter *f) const
{
f->open_array_section("Decay Counters");
for (const auto &i : vec) {
void inode_load_vec_t::generate_test_instances(list<inode_load_vec_t*>& ls)
{
- utime_t sample;
- ls.push_back(new inode_load_vec_t(sample));
+ ls.push_back(new inode_load_vec_t(DecayRate()));
}
f->close_section();
}
-void dirfrag_load_vec_t::dump(Formatter *f, utime_t now, const DecayRate& rate)
+void dirfrag_load_vec_t::dump(Formatter *f, const DecayRate& rate) const
{
- f->dump_float("meta_load", meta_load(now, rate));
- f->dump_float("IRD", get(META_POP_IRD).get(now, rate));
- f->dump_float("IWR", get(META_POP_IWR).get(now, rate));
- f->dump_float("READDIR", get(META_POP_READDIR).get(now, rate));
- f->dump_float("FETCH", get(META_POP_FETCH).get(now, rate));
- f->dump_float("STORE", get(META_POP_STORE).get(now, rate));
+ f->dump_float("meta_load", meta_load());
+ f->dump_float("IRD", get(META_POP_IRD).get());
+ f->dump_float("IWR", get(META_POP_IWR).get());
+ f->dump_float("READDIR", get(META_POP_READDIR).get());
+ f->dump_float("FETCH", get(META_POP_FETCH).get());
+ f->dump_float("STORE", get(META_POP_STORE).get());
}
-void dirfrag_load_vec_t::generate_test_instances(list<dirfrag_load_vec_t*>& ls)
+void dirfrag_load_vec_t::generate_test_instances(std::list<dirfrag_load_vec_t*>& ls)
{
- utime_t sample;
- ls.push_back(new dirfrag_load_vec_t(sample));
+ ls.push_back(new dirfrag_load_vec_t(DecayRate()));
}
/*
ENCODE_FINISH(bl);
}
-void mds_load_t::decode(const utime_t &t, bufferlist::const_iterator &bl) {
+void mds_load_t::decode(bufferlist::const_iterator &bl) {
DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
- decode(auth, t, bl);
- decode(all, t, bl);
+ decode(auth, bl);
+ decode(all, bl);
decode(req_rate, bl);
decode(cache_hit_rate, bl);
decode(queue_len, bl);
f->close_section();
}
-void mds_load_t::generate_test_instances(list<mds_load_t*>& ls)
+void mds_load_t::generate_test_instances(std::list<mds_load_t*>& ls)
{
- utime_t sample;
- ls.push_back(new mds_load_t(sample));
+ ls.push_back(new mds_load_t(DecayRate()));
}
/*
#define META_NPOP 5
class inode_load_vec_t {
- static const int NUM = 2;
- std::array<DecayCounter, NUM> vec;
public:
- explicit inode_load_vec_t(const utime_t &now)
- : vec{DecayCounter(now), DecayCounter(now)}
- {}
- // for dencoder infrastructure
- inode_load_vec_t() {}
+ using time = DecayCounter::time;
+ using clock = DecayCounter::clock;
+ static const size_t NUM = 2;
+
+ inode_load_vec_t() : vec{DecayCounter(DecayRate()), DecayCounter(DecayRate())} {}
+ inode_load_vec_t(const DecayRate &rate) : vec{DecayCounter(rate), DecayCounter(rate)} {}
+
DecayCounter &get(int t) {
- assert(t < NUM);
return vec[t];
}
- void zero(utime_t now) {
- for (int i=0; i<NUM; i++)
- vec[i].reset(now);
+ void zero() {
+ for (auto &d : vec) {
+ d.reset();
+ }
}
void encode(bufferlist &bl) const;
- void decode(const utime_t &t, bufferlist::const_iterator &p);
- // for dencoder
- void decode(bufferlist::const_iterator& p) { utime_t sample; decode(sample, p); }
- void dump(Formatter *f);
+ void decode(bufferlist::const_iterator& p);
+ void dump(Formatter *f) const;
static void generate_test_instances(list<inode_load_vec_t*>& ls);
+
+private:
+ std::array<DecayCounter, NUM> vec;
};
-inline void encode(const inode_load_vec_t &c, bufferlist &bl) { c.encode(bl); }
-inline void decode(inode_load_vec_t & c, const utime_t &t, bufferlist::const_iterator &p) {
- c.decode(t, p);
+inline void encode(const inode_load_vec_t &c, bufferlist &bl) {
+ c.encode(bl);
}
-// for dencoder
inline void decode(inode_load_vec_t & c, bufferlist::const_iterator &p) {
- utime_t sample;
- c.decode(sample, p);
+ c.decode(p);
}
class dirfrag_load_vec_t {
public:
- static const int NUM = 5;
- std::array<DecayCounter, NUM> vec;
- explicit dirfrag_load_vec_t(const utime_t &now)
- : vec{
- DecayCounter(now),
- DecayCounter(now),
- DecayCounter(now),
- DecayCounter(now),
- DecayCounter(now)
- }
+ using time = DecayCounter::time;
+ using clock = DecayCounter::clock;
+ static const size_t NUM = 5;
+
+ dirfrag_load_vec_t() :
+ vec{DecayCounter(DecayRate()),
+ DecayCounter(DecayRate()),
+ DecayCounter(DecayRate()),
+ DecayCounter(DecayRate()),
+ DecayCounter(DecayRate())
+ }
+ {}
+ dirfrag_load_vec_t(const DecayRate &rate) :
+ vec{DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate)}
{}
- // for dencoder infrastructure
- dirfrag_load_vec_t() {}
+
void encode(bufferlist &bl) const {
ENCODE_START(2, 2, bl);
for (const auto &i : vec) {
}
ENCODE_FINISH(bl);
}
- void decode(const utime_t &t, bufferlist::const_iterator &p) {
+ void decode(bufferlist::const_iterator &p) {
DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p);
for (auto &i : vec) {
- decode(i, t, p);
+ decode(i, p);
}
DECODE_FINISH(p);
}
- // for dencoder infrastructure
- void decode(bufferlist::const_iterator& p) {
- utime_t sample;
- decode(sample, p);
- }
void dump(Formatter *f) const;
- void dump(Formatter *f, utime_t now, const DecayRate& rate);
- static void generate_test_instances(list<dirfrag_load_vec_t*>& ls);
+ void dump(Formatter *f, const DecayRate& rate) const;
+ static void generate_test_instances(std::list<dirfrag_load_vec_t*>& ls);
- DecayCounter &get(int t) {
- assert(t < NUM);
- return vec[t];
+ const DecayCounter &get(int t) const {
+ return vec[t];
+ }
+ DecayCounter &get(int t) {
+ return vec[t];
}
- void adjust(utime_t now, const DecayRate& rate, double d) {
+ void adjust(double d) {
for (auto &i : vec) {
- i.adjust(now, rate, d);
+ i.adjust(d);
}
}
- void zero(utime_t now) {
+ void zero() {
for (auto &i : vec) {
- i.reset(now);
+ i.reset();
}
}
- double meta_load(utime_t now, const DecayRate& rate) {
- return
- 1*vec[META_POP_IRD].get(now, rate) +
- 2*vec[META_POP_IWR].get(now, rate) +
- 1*vec[META_POP_READDIR].get(now, rate) +
- 2*vec[META_POP_FETCH].get(now, rate) +
- 4*vec[META_POP_STORE].get(now, rate);
- }
double meta_load() const {
return
- 1*vec[META_POP_IRD].get_last() +
- 2*vec[META_POP_IWR].get_last() +
- 1*vec[META_POP_READDIR].get_last() +
- 2*vec[META_POP_FETCH].get_last() +
- 4*vec[META_POP_STORE].get_last();
+ 1*vec[META_POP_IRD].get() +
+ 2*vec[META_POP_IWR].get() +
+ 1*vec[META_POP_READDIR].get() +
+ 2*vec[META_POP_FETCH].get() +
+ 4*vec[META_POP_STORE].get();
}
- void add(utime_t now, DecayRate& rate, dirfrag_load_vec_t& r) {
- for (int i=0; i<dirfrag_load_vec_t::NUM; i++)
- vec[i].adjust(r.vec[i].get(now, rate));
+ void add(dirfrag_load_vec_t& r) {
+ for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++)
+ vec[i].adjust(r.vec[i].get());
}
- void sub(utime_t now, DecayRate& rate, dirfrag_load_vec_t& r) {
- for (int i=0; i<dirfrag_load_vec_t::NUM; i++)
- vec[i].adjust(-r.vec[i].get(now, rate));
+ void sub(dirfrag_load_vec_t& r) {
+ for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++)
+ vec[i].adjust(-r.vec[i].get());
}
void scale(double f) {
- for (int i=0; i<dirfrag_load_vec_t::NUM; i++)
+ for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++)
vec[i].scale(f);
}
+
+private:
+ friend inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl);
+ std::array<DecayCounter, NUM> vec;
};
-inline void encode(const dirfrag_load_vec_t &c, bufferlist &bl) { c.encode(bl); }
-inline void decode(dirfrag_load_vec_t& c, const utime_t &t, bufferlist::const_iterator &p) {
- c.decode(t, p);
+inline void encode(const dirfrag_load_vec_t &c, bufferlist &bl) {
+ c.encode(bl);
}
-// this for dencoder
inline void decode(dirfrag_load_vec_t& c, bufferlist::const_iterator &p) {
- utime_t sample;
- c.decode(sample, p);
+ c.decode(p);
}
inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl)
}
-
-
-
-
/* mds_load_t
* mds load
*/
struct mds_load_t {
+ using clock = dirfrag_load_vec_t::clock;
+ using time = dirfrag_load_vec_t::time;
+
dirfrag_load_vec_t auth;
dirfrag_load_vec_t all;
+ mds_load_t() : auth(DecayRate()), all(DecayRate()) {}
+ mds_load_t(const DecayRate &rate) : auth(rate), all(rate) {}
+
double req_rate = 0.0;
double cache_hit_rate = 0.0;
double queue_len = 0.0;
double cpu_load_avg = 0.0;
- explicit mds_load_t(const utime_t &t) : auth(t), all(t) {}
- // mostly for the dencoder infrastructure
- mds_load_t() : auth(), all() {}
-
- double mds_load(); // defiend in MDBalancer.cc
+ double mds_load() const; // defiend in MDBalancer.cc
void encode(bufferlist& bl) const;
- void decode(const utime_t& now, bufferlist::const_iterator& bl);
- //this one is for dencoder infrastructure
- void decode(bufferlist::const_iterator& bl) { utime_t sample; decode(sample, bl); }
+ void decode(bufferlist::const_iterator& bl);
void dump(Formatter *f) const;
- static void generate_test_instances(list<mds_load_t*>& ls);
+ static void generate_test_instances(std::list<mds_load_t*>& ls);
};
-inline void encode(const mds_load_t &c, bufferlist &bl) { c.encode(bl); }
-inline void decode(mds_load_t &c, const utime_t &t, bufferlist::const_iterator &p) {
- c.decode(t, p);
+inline void encode(const mds_load_t &c, bufferlist &bl) {
+ c.encode(bl);
}
-// this one is for dencoder
inline void decode(mds_load_t &c, bufferlist::const_iterator &p) {
- utime_t sample;
- c.decode(sample, p);
+ c.decode(p);
}
inline std::ostream& operator<<(std::ostream& out, const mds_load_t& load)
class load_spread_t {
public:
+ using time = DecayCounter::time;
+ using clock = DecayCounter::clock;
static const int MAX = 4;
int last[MAX];
int p = 0, n = 0;
DecayCounter count;
public:
- load_spread_t() : count(ceph_clock_now())
+ load_spread_t() = delete;
+ load_spread_t(const DecayRate &rate) : count(rate)
{
for (int i=0; i<MAX; i++)
last[i] = -1;
}
- double hit(utime_t now, const DecayRate& rate, int who) {
+ double hit(int who) {
for (int i=0; i<n; i++)
if (last[i] == who)
return count.get_last();
if (p == MAX) p = 0;
- return count.hit(now, rate);
+ return count.hit();
}
- double get(utime_t now, const DecayRate& rate) {
- return count.get(now, rate);
+ double get() const {
+ return count.get();
}
};
#include "include/types.h"
#include "msg/Message.h"
+#include "common/DecayCounter.h"
class MHeartbeat : public Message {
mds_load_t load;
return import_map;
}
- MHeartbeat()
- : Message(MSG_MDS_HEARTBEAT), load(utime_t()) { }
+ MHeartbeat() : Message(MSG_MDS_HEARTBEAT), load(DecayRate()) {}
MHeartbeat(mds_load_t& load, int beat)
: Message(MSG_MDS_HEARTBEAT),
load(load) {
}
void decode_payload() override {
auto p = payload.cbegin();
- utime_t now(ceph_clock_now());
- decode(load, now, p);
+ decode(load, p);
decode(beat, p);
decode(import_map, p);
}
#include "common/DecayCounter.h"
#include <list>
+
+#include <unistd.h>
using namespace std;
struct RealCounter {
DecayCounter dc(hl);
RealCounter rc;
- utime_t now = ceph_clock_now();
+ DecayCounter::time now = DecayCounter::clock::now();
for (int ms=0; ms < 300*1000; ms++) {
if (ms % 30000 == 0) {
<< endl;
}
- now += .001;
+ usleep(1);
+ now = DecayCounter::clock::now();
}
}