SAGE:
-- mpimessenger: allow self-addressed messages
-- change export_proxy_*'s to lists (faster)
+- mount, unmount, distribute osdcluster
-- make balancer export empty dirs
+//- mpimessenger: allow self-addressed messages
+//- change export_proxy_*'s to lists (faster)
+
+//- make balancer export empty dirs
+
+/- finish testing foreign renames
+
+- no-cache tests
+
+- basic client read, write.
+
+- symlink loops
- fix MExportAck and others to use dir+dentry, not inode
(otherwise this all breaks with hard links.. altho it probably needs reworking already!)
- fix noperm crap
- clean up xlock interaction
-- finish testing foreign renames
-- symlink loops
- fix logging model for data safety
- inodeupdate
#define __CLOCK_H
#include <sys/time.h>
+#include <time.h>
class Clock {
protected:
public:
Clock();
- double gettime();
void settime(double tm);
+
+ double gettime();
+ time_t get_unixtime() {
+ return time(0);
+ }
+
};
extern Clock g_clock;
mds_log_before_reply: true,
// --- fakeclient (mds regression testing) ---
- num_fakeclient: 1000,
+ num_fakeclient: 100,
fakeclient_requests: 100,
fakeclient_deterministic: false,
//cout << " load is " << load << " have " << mds_load.size() << endl;
int cluster_size = mds->get_cluster()->get_num_mds();
- if (mds_load.size() == cluster_size)
+ if (mds_load.size() == cluster_size) {
+ // let's go!
+ export_empties();
do_rebalance();
+ }
// done
delete m;
}
+void MDBalancer::export_empties()
+{
+ dout(5) << "export_empties checking for empty imports" << endl;
+
+ for (set<CDir*>::iterator it = mds->mdcache->imports.begin();
+ it != mds->mdcache->imports.end();
+ it++) {
+ CDir *dir = *it;
+
+ if (!dir->inode->is_root() && dir->get_size() == 0)
+ mds->mdcache->export_empty_import(dir);
+ }
+}
+
void MDBalancer::do_rebalance()
{
int cluster_size = mds->get_cluster()->get_num_mds();
void send_heartbeat();
void handle_heartbeat(MHeartbeat *m);
+ void export_empties();
void do_rebalance();
void find_exports(CDir *dir,
double amount,
// proxy
dir->state_set(CDIR_STATE_PROXY);
dir->get(CDIR_PIN_PROXY);
- export_proxy_dirinos[basedir].insert(dir->ino());
+ export_proxy_dirinos[basedir].push_back(dir->ino());
if (!dir->is_clean())
dir->mark_clean();
}
// add to proxy
- export_proxy_inos[basedir].insert(in->ino());
+ export_proxy_inos[basedir].push_back(in->ino());
in->state_set(CINODE_STATE_PROXY);
in->get(CINODE_PIN_PROXY);
// unpin proxies
// inodes
- for (set<inodeno_t>::iterator it = export_proxy_inos[dir].begin();
+ for (list<inodeno_t>::iterator it = export_proxy_inos[dir].begin();
it != export_proxy_inos[dir].end();
it++) {
CInode *in = get_inode(*it);
export_proxy_inos.erase(dir);
// dirs
- for (set<inodeno_t>::iterator it = export_proxy_dirinos[dir].begin();
+ for (list<inodeno_t>::iterator it = export_proxy_dirinos[dir].begin();
it != export_proxy_dirinos[dir].end();
it++) {
CDir *dir = get_inode(*it)->dir;
set<CDir*> imports; // includes root (on mds0)
set<CDir*> exports;
map<CDir*,set<CDir*> > nested_exports;
- //multimap<CDir*,CDir*> nested_exports; // nested exports of (imports|root)
- // hashing madness
- multimap<CDir*, int> unhash_waiting; // nodes i am waiting for UnhashDirAck's from
- multimap<inodeno_t, inodeno_t> import_hashed_replicate_waiting; // nodes i am waiting to discover to complete my import of a hashed dir
- // maps frozen_dir_ino's to waiting-for-discover ino's.
- multimap<inodeno_t, inodeno_t> import_hashed_frozen_waiting; // dirs i froze (for the above)
- // maps import_root_ino's to frozen dir ino's (with pending discovers)
-
// export fun
map<CDir*, set<int> > export_notify_ack_waiting; // nodes i am waiting to get export_notify_ack's from
- map<CDir*, set<inodeno_t> > export_proxy_inos;
- map<CDir*, set<inodeno_t> > export_proxy_dirinos;
+ map<CDir*, list<inodeno_t> > export_proxy_inos;
+ map<CDir*, list<inodeno_t> > export_proxy_dirinos;
set<inodeno_t> stray_export_warnings; // notifies i haven't seen
map<inodeno_t, MExportDirNotify*> stray_export_notifies;
set<inodeno_t> stray_rename_warnings; // notifies i haven't seen
map<inodeno_t, MRenameNotify*> stray_rename_notifies;
+ // hashing madness
+ multimap<CDir*, int> unhash_waiting; // nodes i am waiting for UnhashDirAck's from
+ multimap<inodeno_t, inodeno_t> import_hashed_replicate_waiting; // nodes i am waiting to discover to complete my import of a hashed dir
+ // maps frozen_dir_ino's to waiting-for-discover ino's.
+ multimap<inodeno_t, inodeno_t> import_hashed_frozen_waiting; // dirs i froze (for the above)
+ // maps import_root_ino's to frozen dir ino's (with pending discovers)
+
+
+
public:
// active MDS requests
map<Message*, active_request_t> active_requests;
newi->inode.mode = req->get_iarg();
newi->inode.uid = req->get_caller_uid();
newi->inode.gid = req->get_caller_gid();
- newi->inode.ctime = 1; // now, FIXME
- newi->inode.mtime = 1; // now, FIXME
- newi->inode.atime = 1; // now, FIXME
+ newi->inode.ctime = newi->inode.mtime = newi->inode.atime = g_clock.get_unixtime(); // now
// link
if (!dn)
// ok, done passing buck.
// src dentry
- CDentry *srcdn = srcdir->lookup(srcname); // FIXME for hard links
+ CDentry *srcdn = srcdir->lookup(srcname);
// xlocked?
if (srcdn && !srcdn->can_read(req)) {
// FIXME: is this necessary?
- /*
if (destdn->inode) {
if (destdn->inode->is_dir()) {
dout(7) << "handle_client_rename_local failing, dest exists and is a dir: " << *destdn->inode << endl;
+ assert(0);
reply_request(req, -EINVAL);
return;
}
if (srcdn->inode->is_dir()) {
dout(7) << "handle_client_rename_local failing, dest exists and src is a dir: " << *destdn->inode << endl;
+ assert(0);
reply_request(req, -EINVAL);
return;
}
// REQXLOCKC, which will only allow you to lock a file.
// so we know dest is a file, or non-existent
if (!destlocal) {
-
+ if (srcdn->inode->is_dir()) {
+ // help: maybe the dest exists and is a file? ..... FIXME
+ } else {
+ // we're fine, src is file, dest is file|dne
+ }
}
}
- */
+
// we're golden.
dout(10) << "mpi_sending " << size << " byte message to rank " << rank << " tag " << tag << endl;
// sending
- ASSERT(MPI_Send((void*)buf,
- size,
- MPI_CHAR,
- rank,
- tag,
- MPI_COMM_WORLD) == MPI_SUCCESS);
+ MPI_Request req; // non-blocking, in case we send to ourselves from same thread
+ ASSERT(MPI_Isend((void*)buf,
+ size,
+ MPI_CHAR,
+ rank,
+ tag,
+ MPI_COMM_WORLD,
+ &req) == MPI_SUCCESS);
}