From b65ef7d2d0c1c554a7ac97de6367b276e2192289 Mon Sep 17 00:00:00 2001 From: sage Date: Wed, 1 Mar 2006 13:54:34 +0000 Subject: [PATCH] *** empty log message *** git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@710 29311d96-e01e-0410-9327-a35deaab8ce9 --- ceph/client/SyntheticClient.cc | 2 +- ceph/mds/CDir.h | 2 +- ceph/mds/MDBalancer.cc | 65 +++++++++++++++++++++------------- ceph/mds/MDBalancer.h | 5 +-- ceph/mds/MDS.cc | 34 +++++++++--------- ceph/mds/mdstypes.h | 26 +++++++------- 6 files changed, 77 insertions(+), 57 deletions(-) diff --git a/ceph/client/SyntheticClient.cc b/ceph/client/SyntheticClient.cc index df501e6310554..d0dd65660f296 100644 --- a/ceph/client/SyntheticClient.cc +++ b/ceph/client/SyntheticClient.cc @@ -673,7 +673,7 @@ int SyntheticClient::make_dirs(const char *basedir, int dirs, int files, int dep // children char d[500]; - dout(5-depth) << "make_dirs " << basedir << " dirs " << dirs << " files " << files << " depth " << depth << endl; + dout(3) << "make_dirs " << basedir << " dirs " << dirs << " files " << files << " depth " << depth << endl; for (int i=0; imknod(d, 0644); diff --git a/ceph/mds/CDir.h b/ceph/mds/CDir.h index db2da4d321bb2..5aed7e608bf54 100644 --- a/ceph/mds/CDir.h +++ b/ceph/mds/CDir.h @@ -366,7 +366,7 @@ class CDir { // for giving to clients void get_dist_spec(set& ls, int auth) { - if (( popularity[MDS_POP_CURDOM].pop[META_POP_RD].get() > g_conf.mds_bal_replicate_threshold)) { + if (( popularity[MDS_POP_CURDOM].pop[META_POP_IRD].get() > g_conf.mds_bal_replicate_threshold)) { //if (!cached_by.empty() && inode.ino > 1) dout(1) << "distributed spec for " << *this << endl; ls = open_by; if (!ls.empty()) ls.insert(auth); diff --git a/ceph/mds/MDBalancer.cc b/ceph/mds/MDBalancer.cc index aa208f9481380..4ff240f450d8d 100644 --- a/ceph/mds/MDBalancer.cc +++ b/ceph/mds/MDBalancer.cc @@ -245,25 +245,36 @@ void MDBalancer::do_rebalance(int beat) dout(5) << " do_rebalance: cluster loads are" << endl; + // rescale! turn my mds_load back into meta_load units + double load_fac = mds_load[whoami].root.meta_load() / mds_load[whoami].mds_load(); + dout(-7) << " load_fac is " << load_fac + << " <- " << mds_load[whoami].root.meta_load() << " / " << mds_load[whoami].mds_load() + << endl; + double total_load = 0; multimap load_map; for (int i=0; i " << l << endl; - total_load += l; + dout(-5) << " mds" << i + << " meta load " << mds_load[i] + << " = " << mds_load[i].mds_load() + << " --> " << l << endl; + if (whoami == i) my_load = l; + total_load += l; load_map.insert(pair( l, i )); } - dout(5) << " total load " << total_load << endl; - - // my load - dout(5) << " my load " << my_load << endl; // target load target_load = total_load / (double)cluster_size; - dout(5) << " target load " << target_load << endl; + dout(5) << "do_rebalance: my load " << my_load + << " target " << target_load + << " total " << total_load + << endl; // under or over? if (my_load < target_load) { @@ -361,6 +372,7 @@ void MDBalancer::do_rebalance(int beat) } + // make a sorted list of my imports map import_pop_map; multimap import_from_map; @@ -381,6 +393,8 @@ void MDBalancer::do_rebalance(int beat) import_from_map.insert(pair(from, *it)); } + + // do my exports! set already_exporting; double total_sent = 0; @@ -390,22 +404,25 @@ void MDBalancer::do_rebalance(int beat) it != my_targets.end(); it++) { - double fac = 1.0; + /* + double fac = 1.0; if (false && total_goal > 0 && total_sent > 0) { fac = total_goal / total_sent; dout(-5) << " total sent is " << total_sent << " / " << total_goal << " -> fac 1/ " << fac << endl; if (fac > 1.0) fac = 1.0; } fac = .9 - .4 * ((float)g_conf.num_mds / 128.0); // hack magic fixme - + */ + int target = (*it).first; - double amount = (*it).second * fac; + double amount = (*it).second;// * load_fac; total_goal += amount; if (amount < MIN_OFFLOAD) continue; - dout(-5) << " sending " << amount << " to mds" << target - << " .. " << (*it).second << " * " << fac << " -> " << amount + dout(5) << " sending " << amount << " to mds" << target + //<< " .. " << (*it).second << " * " << load_fac + << " -> " << amount << endl;//" .. fudge is " << fudge << endl; double have = 0; @@ -511,9 +528,9 @@ void MDBalancer::find_exports(CDir *dir, list bigger; multimap smaller; - double dirpop = dir->popularity[MDS_POP_CURDOM].meta_load(); - dout(-7) << " find_exports .. pop is " << dirpop << " in " << *dir << " .. i need " << need << " (" << needmin << " - " << needmax << ")" << endl; - double dirsum = 0.0; + double dir_pop = dir->popularity[MDS_POP_CURDOM].meta_load(); + double dir_sum = 0; + dout(7) << " find_exports in " << dir_pop << " " << *dir << " need " << need << " (" << needmin << " - " << needmax << ")" << endl; for (CDir_map_t::iterator it = dir->begin(); it != dir->end(); @@ -532,7 +549,7 @@ void MDBalancer::find_exports(CDir *dir, // how popular? double pop = in->dir->popularity[MDS_POP_CURDOM].meta_load(); - dirsum += pop; + dir_sum += pop; dout(20) << " pop " << pop << " " << *in->dir << endl; if (pop < minchunk) continue; @@ -549,7 +566,7 @@ void MDBalancer::find_exports(CDir *dir, else smaller.insert(pair(pop, in->dir)); } - dout(-20) << " .. sum is " << dirsum << " / " << dirpop << endl; + dout(-7) << " .. sum " << dir_sum << " / " << dir_pop << endl; // grab some sufficiently big small items multimap::reverse_iterator it; @@ -636,8 +653,8 @@ void MDBalancer::hit_dir(CDir *dir, int type) dir->is_auth() && !dir->inode->is_root()) { // not root (for now at least) // hash this dir? (later?) - if (((v > g_conf.mds_bal_hash_rd && type == META_POP_RD) || - (v > g_conf.mds_bal_hash_wr && type == META_POP_WR)) && + if (((v > g_conf.mds_bal_hash_rd && type == META_POP_IRD) || + (v > g_conf.mds_bal_hash_wr && type == META_POP_IWR)) && !(dir->is_hashed() || dir->is_hashing()) && hash_queue.count(dir->ino()) == 0) { dout(0) << "hit_dir " << type << " pop is " << v << ", putting in hash_queue: " << *dir << endl; @@ -664,7 +681,7 @@ void MDBalancer::hit_recursive(CDir *dir, int type) if (!dir->is_rep() && dir_pop >= g_conf.mds_bal_replicate_threshold) { // replicate - float rdp = dir->popularity[MDS_POP_JUSTME].pop[META_POP_RD].get(); + float rdp = dir->popularity[MDS_POP_JUSTME].pop[META_POP_IRD].get(); rd_adj = rdp / mds->get_cluster()->get_num_mds() - rdp; rd_adj /= 2.0; // temper somewhat @@ -673,8 +690,8 @@ void MDBalancer::hit_recursive(CDir *dir, int type) dir->dir_rep = CDIR_REP_ALL; mds->mdcache->send_dir_updates(dir, true); - dir->popularity[MDS_POP_JUSTME].pop[META_POP_RD].adjust(rd_adj); - dir->popularity[MDS_POP_CURDOM].pop[META_POP_RD].adjust(rd_adj); + dir->popularity[MDS_POP_JUSTME].pop[META_POP_IRD].adjust(rd_adj); + dir->popularity[MDS_POP_CURDOM].pop[META_POP_IRD].adjust(rd_adj); } if (dir->is_rep() && @@ -694,7 +711,7 @@ void MDBalancer::hit_recursive(CDir *dir, int type) dir->popularity[MDS_POP_NESTED].pop[type].hit(); in->popularity[MDS_POP_NESTED].pop[type].hit(); - if (rd_adj != 0.0) dir->popularity[MDS_POP_NESTED].pop[META_POP_RD].adjust(rd_adj); + if (rd_adj != 0.0) dir->popularity[MDS_POP_NESTED].pop[META_POP_IRD].adjust(rd_adj); if (anydom) { dir->popularity[MDS_POP_ANYDOM].pop[type].hit(); diff --git a/ceph/mds/MDBalancer.h b/ceph/mds/MDBalancer.h index 3ce819586c4ed..f83ad748a3b7e 100644 --- a/ceph/mds/MDBalancer.h +++ b/ceph/mds/MDBalancer.h @@ -47,6 +47,7 @@ class MDBalancer { // per-epoch scatter/gathered info hash_map mds_load; + hash_map mds_meta_load; map > mds_import_map; // per-epoch state @@ -58,10 +59,10 @@ class MDBalancer { double try_match(int ex, double& maxex, int im, double& maxim); double get_maxim(int im) { - return target_load - mds_load[im].mds_load() - imported[im]; + return target_load - mds_meta_load[im] - imported[im]; } double get_maxex(int ex) { - return mds_load[ex].mds_load() - target_load - exported[ex]; + return mds_meta_load[ex] - target_load - exported[ex]; } public: diff --git a/ceph/mds/MDS.cc b/ceph/mds/MDS.cc index 0dce7237b2aba..bbf51ac0ea9ab 100644 --- a/ceph/mds/MDS.cc +++ b/ceph/mds/MDS.cc @@ -1130,7 +1130,7 @@ void MDS::handle_client_stat(MClientRequest *req, mdcache->inode_file_read_finish(ref); - balancer->hit_inode(ref, META_POP_RD); + balancer->hit_inode(ref, META_POP_IRD); // reply reply_request(req, reply, ref); @@ -1157,7 +1157,7 @@ void MDS::handle_client_utime(MClientRequest *req, mdcache->inode_file_write_finish(cur); - balancer->hit_inode(cur, META_POP_WR); + balancer->hit_inode(cur, META_POP_IWR); // init reply MClientReply *reply = new MClientReply(req, 0); @@ -1192,7 +1192,7 @@ void MDS::handle_client_chmod(MClientRequest *req, mdcache->inode_hard_write_finish(cur); - balancer->hit_inode(cur, META_POP_WR); + balancer->hit_inode(cur, META_POP_IWR); // start reply MClientReply *reply = new MClientReply(req, 0); @@ -1222,7 +1222,7 @@ void MDS::handle_client_chown(MClientRequest *req, mdcache->inode_hard_write_finish(cur); - balancer->hit_inode(cur, META_POP_WR); + balancer->hit_inode(cur, META_POP_IWR); // start reply MClientReply *reply = new MClientReply(req, 0); @@ -1516,7 +1516,7 @@ void MDS::handle_client_readdir(MClientRequest *req, dout(10) << "reply to " << *req << " readdir " << numfiles << " files" << endl; reply->set_result(0); - balancer->hit_dir(cur->dir); + //balancer->hit_dir(cur->dir); // reply reply_request(req, reply, cur); @@ -1536,7 +1536,7 @@ void MDS::handle_client_mknod(MClientRequest *req, CInode *ref) newi->inode.mode &= ~INODE_TYPE_MASK; newi->inode.mode |= INODE_MODE_FILE; - balancer->hit_inode(newi, META_POP_WR); + balancer->hit_inode(newi, META_POP_IWR); // commit commit_request(req, new MClientReply(req, 0), ref, @@ -1636,7 +1636,7 @@ CInode *MDS::mknod(MClientRequest *req, CInode *diri, bool okexist) dir->link_inode(dn, newi); // bump modify pop - balancer->hit_dir(dir, true); + balancer->hit_dir(dir, META_POP_DWR); // mark dirty dn->mark_dirty(); @@ -1859,7 +1859,7 @@ void MDS::handle_client_link_finish(MClientRequest *req, CInode *ref, dn->link_remote( targeti ); // since we have it dn->mark_dirty(); - balancer->hit_dir(dn->dir); + balancer->hit_dir(dn->dir, META_POP_DWR); // done! commit_request(req, new MClientReply(req, 0), ref, @@ -2044,7 +2044,7 @@ void MDS::handle_client_unlink(MClientRequest *req, } - balancer->hit_dir(dn->dir); + balancer->hit_dir(dn->dir, META_POP_DWR); // it's locked, unlink! MClientReply *reply = new MClientReply(req,0); @@ -2548,8 +2548,8 @@ void MDS::handle_client_rename_local(MClientRequest *req, } } - balancer->hit_dir(srcdn->dir); - balancer->hit_dir(destdn->dir); + balancer->hit_dir(srcdn->dir, META_POP_DWR); + balancer->hit_dir(destdn->dir, META_POP_DWR); // we're golden. // everything is xlocked by us, we rule, etc. @@ -2587,7 +2587,7 @@ void MDS::handle_client_mkdir(MClientRequest *req, CInode *diri) newdir->mark_complete(); newdir->mark_dirty(); - balancer->hit_dir(newdir); + balancer->hit_dir(newdir, META_POP_DWR); // commit commit_request(req, new MClientReply(req, 0), diri, @@ -2620,7 +2620,7 @@ void MDS::handle_client_symlink(MClientRequest *req, CInode *diri) // set target newi->symlink = req->get_sarg(); - balancer->hit_inode(newi, META_POP_WR); + balancer->hit_dir(diri->dir, META_POP_DWR); // commit commit_request(req, new MClientReply(req, 0), diri, @@ -2654,7 +2654,7 @@ void MDS::handle_client_truncate(MClientRequest *req, CInode *cur) mdcache->inode_file_write_finish(cur); - balancer->hit_inode(cur, META_POP_WR); + balancer->hit_inode(cur, META_POP_IWR); // start reply MClientReply *reply = new MClientReply(req, 0); @@ -2716,12 +2716,12 @@ void MDS::handle_client_open(MClientRequest *req, Capability *cap = mdcache->issue_new_caps(cur, mode, req); if (!cap) return; // can't issue (yet), so wait! - dout(12) << "open gets caps " << cap_string(cap->pending()) << endl; + dout(12) << "open gets caps " << cap_string(cap->pending()) << " for " << req->get_source() << " on " << *cur << endl; - balancer->hit_inode(cur, META_POP_RD); + balancer->hit_inode(cur, META_POP_IRD); // reply - MClientReply *reply = new MClientReply(req, 0); // fh # is return code + MClientReply *reply = new MClientReply(req, 0); reply->set_file_caps(cap->pending()); reply->set_file_caps_seq(cap->get_last_seq()); reply->set_file_data_version(fdv); diff --git a/ceph/mds/mdstypes.h b/ceph/mds/mdstypes.h index 99d60a7dc4fcb..0d83360545497 100644 --- a/ceph/mds/mdstypes.h +++ b/ceph/mds/mdstypes.h @@ -9,23 +9,26 @@ using namespace std; #include "config.h" #include "common/DecayCounter.h" +#include + /* meta_load_t * hierarchical load for an inode/dir and it's children */ -#define META_POP_RD 0 -#define META_POP_WR 1 -#define META_POP_LOG 2 -#define META_POP_FDIR 3 -#define META_POP_CDIR 4 -#define META_NPOP 5 +#define META_POP_IRD 0 +#define META_POP_IWR 1 +#define META_POP_DWR 2 +//#define META_POP_LOG 3 +//#define META_POP_FDIR 4 +//#define META_POP_CDIR 4 +#define META_NPOP 3 class meta_load_t { public: DecayCounter pop[META_NPOP]; double meta_load() { - return pop[META_POP_RD].get() + pop[META_POP_WR].get(); + return pop[META_POP_IRD].get() + 2*pop[META_POP_IWR].get(); } void take(meta_load_t& other) { @@ -38,8 +41,8 @@ class meta_load_t { inline ostream& operator<<( ostream& out, meta_load_t& load ) { - return out << "metaload"; } @@ -86,15 +89,14 @@ class mds_load_t { double mds_load() { switch(g_conf.mds_bal_mode) { case 0: - return root.pop[META_POP_RD].get() - + 2.0*root.pop[META_POP_WR].get() + return root.meta_load() + req_rate + 10.0*queue_len; case 1: return req_rate + 10.0*queue_len; - } + assert(0); return 0; } -- 2.39.5