From 008a98e1440bb41a010c0acbfb9d40073602757b Mon Sep 17 00:00:00 2001 From: sageweil Date: Fri, 2 Nov 2007 18:00:32 +0000 Subject: [PATCH] migrate_stray; filepath fixup; messenger _myaddr/inst cleanup; server rename pin bugfix git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@2013 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/mds/TODO | 37 +++--- branches/sage/mds/client/Client.cc | 29 +++-- branches/sage/mds/client/Client.h | 4 +- branches/sage/mds/client/SyntheticClient.cc | 16 +-- branches/sage/mds/include/filepath.h | 77 +++++++----- branches/sage/mds/include/types.h | 22 +--- branches/sage/mds/mds/CDentry.cc | 28 +++-- branches/sage/mds/mds/CDentry.h | 5 +- branches/sage/mds/mds/CDir.cc | 2 +- branches/sage/mds/mds/CInode.cc | 18 ++- branches/sage/mds/mds/CInode.h | 7 +- branches/sage/mds/mds/ClientMap.h | 21 ++-- branches/sage/mds/mds/Locker.cc | 11 +- branches/sage/mds/mds/MDBalancer.cc | 2 +- branches/sage/mds/mds/MDCache.cc | 114 +++++++++++++++--- branches/sage/mds/mds/MDCache.h | 3 +- branches/sage/mds/mds/MDS.cc | 12 +- branches/sage/mds/mds/MDS.h | 4 + branches/sage/mds/mds/Migrator.cc | 15 ++- branches/sage/mds/mds/Migrator.h | 4 +- branches/sage/mds/mds/Server.cc | 82 +++++++------ branches/sage/mds/mds/journal.cc | 4 +- branches/sage/mds/mds/mdstypes.h | 24 ++-- branches/sage/mds/messages/MClientFileCaps.h | 8 +- branches/sage/mds/messages/MClientReply.h | 4 + branches/sage/mds/messages/MClientRequest.h | 38 +++--- branches/sage/mds/messages/MDirUpdate.h | 13 +- .../sage/mds/messages/MExportDirDiscover.h | 8 +- branches/sage/mds/messages/MExportStrays.h | 43 ------- branches/sage/mds/messages/MMDSSlaveRequest.h | 12 +- branches/sage/mds/msg/FakeMessenger.cc | 9 +- branches/sage/mds/msg/FakeMessenger.h | 9 -- branches/sage/mds/msg/Message.cc | 1 + branches/sage/mds/msg/Message.h | 6 - branches/sage/mds/msg/Messenger.h | 20 +-- branches/sage/mds/msg/SimpleMessenger.cc | 37 +----- branches/sage/mds/msg/SimpleMessenger.h | 6 +- branches/sage/mds/msg/msg_types.h | 2 +- 38 files changed, 417 insertions(+), 340 deletions(-) delete mode 100644 branches/sage/mds/messages/MExportStrays.h diff --git a/branches/sage/mds/TODO b/branches/sage/mds/TODO index 5e1a37b7441a4..5c670b9694ce1 100644 --- a/branches/sage/mds/TODO +++ b/branches/sage/mds/TODO @@ -53,36 +53,26 @@ mdsmon -- clean up client mds session vs mdsmap behavior mds bugs -- open file rejournaling vs capped log... - - open files vs shutdown in general! need to export any caps on replicated metadata -- stray purge on shutdown - +- stray migration + - purge on shutdown - rename slave in-memory rollback on failure - - fix purge_stray bug - try_remove_unlinked_dn thing - - proper handling of cache expire messages during rejoin phase? mds +- stray reintegration - extend/clean up filepath to allow paths relative to an ino - fix path_traverse - fix reconnect/rejoin open file weirdness - -- get rid of C*Discover objects for replicate_to .. encode to bufferlists directly - -- stray reintegration - - real chdir (directory "open") - relative metadata ops - +- get rid of C*Discover objects for replicate_to .. encode to bufferlists directly? - consistency points/snapshots - dentry versions vs dirfrags... - - detect and deal with client failure - failure during reconnect vs clientmap. although probalby the whole thing needs a larger overhaul... @@ -113,6 +103,13 @@ mds - delayed replica caps release... we need to set a timer event? (and cancel it when appropriate?) +client +- clean up client mds session vs mdsmap behavior? +- client caps migration races + - caps need a seq number; reap logic needs to be a bit smarter + - also needs cope with mds failures +- fstat + osdmon - allow fresh replacement osds. add osd_created in osdmap, probably @@ -202,11 +199,14 @@ reliability ebofs - allow holes +- allow btree sets +- optionally scrub deallocated extents +- clone() + +- map ObjectStore - verify proper behavior of conflicting/overlapping reads of clones - combine inodes and/or cnodes into same blocks -- allow btree sets instead of maps -- eliminate nodepools - nonblocking write on missing onodes? - fix bug in node rotation on insert (and reenable) - fix NEAR_LAST_FWD (?) @@ -255,11 +255,6 @@ crush -client -- fstat -- mixed lazy and non-lazy io will clobber each others' caps in the buffer cache.. how to isolate.. -- test client caps migration w/ mds exports -- some heuristic behavior to consolidate caps to inode auth? diff --git a/branches/sage/mds/client/Client.cc b/branches/sage/mds/client/Client.cc index 117b2b08e401d..08f431bf84835 100644 --- a/branches/sage/mds/client/Client.cc +++ b/branches/sage/mds/client/Client.cc @@ -508,8 +508,7 @@ Dentry *Client::lookup(filepath& path) Dentry *dn = 0; for (unsigned i=0; isend_message(r, mdsmap->get_inst(mds), MDS_PORT_SERVER); @@ -1107,6 +1108,17 @@ void Client::handle_file_caps(MClientFileCaps *m) if (m->get_op() == MClientFileCaps::OP_IMPORT) { int other = m->get_mds(); + /* + * FIXME: there is a race here.. if the caps are exported twice in succession, + * you may get the second import before the first, in which case the middle MDS's + * import and then export won't be handled properly. + * there should be a sequence number attached to the cap, incremented each time + * it is exported... + */ + /* + * FIXME: handle mds failures + */ + if (in && in->stale_caps.count(other)) { dout(5) << "handle_file_caps on ino " << m->get_ino() << " from mds" << mds << " imported from mds" << other << dendl; @@ -1196,6 +1208,7 @@ void Client::handle_file_caps(MClientFileCaps *m) << " seq " << m->get_seq() << " " << cap_string(m->get_caps()) << ", which we don't want caps for, releasing." << dendl; + m->set_op(MClientFileCaps::OP_ACK); m->set_caps(0); m->set_wanted(0); messenger->send_message(m, m->get_source_inst(), MDS_PORT_LOCKER); @@ -1585,7 +1598,7 @@ int Client::_link(const char *existing, const char *newname) MClientRequest *req = new MClientRequest(MDS_OP_LINK, messenger->get_myinst()); req->set_path(newname); - req->set_sarg(existing); + req->set_path2(existing); // FIXME where does FUSE maintain user information req->set_caller_uid(getuid()); @@ -1664,7 +1677,7 @@ int Client::_rename(const char *from, const char *to) { MClientRequest *req = new MClientRequest(MDS_OP_RENAME, messenger->get_myinst()); req->set_path(from); - req->set_sarg(to); + req->set_path2(to); // FIXME where does FUSE maintain user information req->set_caller_uid(getuid()); @@ -1792,7 +1805,7 @@ int Client::_symlink(const char *target, const char *link) { MClientRequest *req = new MClientRequest(MDS_OP_SYMLINK, messenger->get_myinst()); req->set_path(link); - req->set_sarg(target); + req->set_path2(target); // FIXME where does FUSE maintain user information req->set_caller_uid(getuid()); @@ -1879,7 +1892,7 @@ int Client::_do_lstat(const char *path, int mask, Inode **in) req = new MClientRequest(MDS_OP_LSTAT, messenger->get_myinst()); req->args.stat.mask = mask; - req->set_path(fpath); + req->set_filepath(fpath); MClientReply *reply = make_request(req); res = reply->get_result(); @@ -2297,7 +2310,7 @@ int Client::_readdir_get_frag(DirResult *dirp) diri = inode_map[ino]; dout(10) << "_readdir_get_frag got diri " << diri << " " << diri->inode.ino << dendl; assert(diri); - assert(diri->inode.mode & INODE_MODE_DIR); + assert(diri->inode.is_dir()); } if (!dirp->inode && diri) { diff --git a/branches/sage/mds/client/Client.h b/branches/sage/mds/client/Client.h index 727098906c617..fff9829c90ec5 100644 --- a/branches/sage/mds/client/Client.h +++ b/branches/sage/mds/client/Client.h @@ -210,9 +210,7 @@ class Inode { inodeno_t ino() { return inode.ino; } - bool is_dir() { - return (inode.mode & INODE_TYPE_MASK) == INODE_MODE_DIR; - } + bool is_dir() { return inode.is_dir(); } int file_caps() { int c = 0; diff --git a/branches/sage/mds/client/SyntheticClient.cc b/branches/sage/mds/client/SyntheticClient.cc index 270ebafec010f..4ac6ab356081a 100644 --- a/branches/sage/mds/client/SyntheticClient.cc +++ b/branches/sage/mds/client/SyntheticClient.cc @@ -264,7 +264,7 @@ string SyntheticClient::get_sarg(int seq) } if (a.length() == 0 || a == "~") { char s[20]; - sprintf(s,"syn.%d.%d", client->whoami, seq); + sprintf(s,"/syn.%d.%d", client->whoami, seq); a = s; } return a; @@ -1365,7 +1365,7 @@ int SyntheticClient::clean_dir(string& basedir) continue; } - if ((st.st_mode & INODE_TYPE_MASK) == INODE_MODE_DIR) { + if ((st.st_mode & S_IFMT) == S_IFDIR) { clean_dir(file); client->rmdir(file.c_str()); } else { @@ -1434,7 +1434,7 @@ int SyntheticClient::full_walk(string& basedir) file.c_str()); - if ((st.st_mode & INODE_TYPE_MASK) == INODE_MODE_DIR) { + if ((st.st_mode & S_IFMT) == S_IFDIR) { dirq.push_back(file); } } @@ -1650,10 +1650,10 @@ int SyntheticClient::open_shared(int num, int count) for (int n=0; nopen(d,O_RDONLY); - fds.push_back(fd); + if (fd > 0) fds.push_back(fd); } - if (1) + if (false && client->get_nodeid() == 0) for (int n=0; nunlink(d); @@ -2825,10 +2825,10 @@ void SyntheticClient::import_find(const char *base, const char *find, bool data) assert(modestring.length() == 10); mode_t mode = 0; switch (modestring[0]) { - case 'd': mode |= INODE_MODE_DIR; break; - case 'l': mode |= INODE_MODE_SYMLINK; break; + case 'd': mode |= S_IFDIR; break; + case 'l': mode |= S_IFLNK; break; default: - case '-': mode |= INODE_MODE_FILE; break; + case '-': mode |= S_IFREG; break; } if (modestring[1] == 'r') mode |= 0400; if (modestring[2] == 'w') mode |= 0200; diff --git a/branches/sage/mds/include/filepath.h b/branches/sage/mds/include/filepath.h index 4425e1d7c5b3a..c92663049bf1f 100644 --- a/branches/sage/mds/include/filepath.h +++ b/branches/sage/mds/include/filepath.h @@ -30,13 +30,13 @@ using namespace std; #include "buffer.h" - +#include "encodable.h" class filepath { /** path - * can be relative "a/b/c" or absolute "/a/b/c". */ - string path; + inodeno_t ino; // base inode + string path; // relative path /** bits - path segemtns * this is ['a', 'b', 'c'] for both the aboslute and relative case. @@ -74,40 +74,29 @@ class filepath { } public: - filepath() {} - filepath(const string& s) { - set_path(s); - } - filepath(const char* s) { - set_path(s); - } + filepath() : ino(0) {} + filepath(const string& s, inodeno_t i=1) : ino(i), path(s) { } + filepath(const char* s, inodeno_t i=1) : ino(i), path(s) { } filepath(const filepath& o) { - set_path(o.get_path()); + ino = o.ino; + path = o.path; + bits = o.bits; } - // accessors - const string& get_path() const { - return path; - } - const char *c_str() const { - return path.c_str(); - } + inodeno_t get_ino() const { return ino; } + const string& get_path() const { return path; } + const char *c_str() const { return path.c_str(); } - int length() const { - return path.length(); - } + int length() const { return path.length(); } unsigned depth() const { if (bits.empty() && path.length() > 0) parse_bits(); return bits.size(); } - bool empty() const { - return path.length() == 0; - } + bool empty() const { return path.length() == 0; } - // FIXME: const-edness - bool absolute() { return path.length() && path[0] == '/'; } - bool relative() { return !absolute(); } + bool absolute() const { return ino > 0; } + bool relative() const { return !absolute(); } const string& operator[](int i) const { if (bits.empty() && path.length() > 0) parse_bits(); @@ -121,6 +110,7 @@ class filepath { filepath prefixpath(int s) const { filepath t; + t.ino = ino; for (int i=0; i 1) + out << '#' << hex << path.get_ino() << dec; + if (path.get_ino() > 0 && path.depth()) + out << '/'; return out << path.get_path(); } diff --git a/branches/sage/mds/include/types.h b/branches/sage/mds/include/types.h index cf8374d329a77..39f6afdf2376f 100644 --- a/branches/sage/mds/include/types.h +++ b/branches/sage/mds/include/types.h @@ -150,11 +150,6 @@ namespace __gnu_cxx { } -#define INODE_MODE_FILE 0100000 // S_IFREG -#define INODE_MODE_SYMLINK 0120000 // S_IFLNK -#define INODE_MODE_DIR 0040000 // S_IFDIR -#define INODE_TYPE_MASK 0170000 - #define FILE_MODE_R 1 #define FILE_MODE_W 2 #define FILE_MODE_RW (1|2) @@ -179,6 +174,9 @@ namespace __gnu_cxx { inline int DT_TO_MODE(int dt) { return dt << 12; } +inline unsigned char MODE_TO_DT(int mode) { + return mode >> 12; +} struct inode_t { // base (immutable) @@ -208,19 +206,11 @@ struct inode_t { version_t file_data_version; // auth only // file type - bool is_symlink() { return (mode & INODE_TYPE_MASK) == INODE_MODE_SYMLINK; } - bool is_dir() { return (mode & INODE_TYPE_MASK) == INODE_MODE_DIR; } - bool is_file() { return (mode & INODE_TYPE_MASK) == INODE_MODE_FILE; } - - // corresponding d_types - static const unsigned char DT_REG = 8; - static const unsigned char DT_DIR = 4; - static const unsigned char DT_LNK = 10; + bool is_symlink() { return (mode & S_IFMT) == S_IFLNK; } + bool is_dir() { return (mode & S_IFMT) == S_IFDIR; } + bool is_file() { return (mode & S_IFMT) == S_IFREG; } }; -inline unsigned char MODE_TO_DT(int mode) { - return mode >> 12; -} diff --git a/branches/sage/mds/mds/CDentry.cc b/branches/sage/mds/mds/CDentry.cc index 2b6bb3470e8a8..0ffedb20bc330 100644 --- a/branches/sage/mds/mds/CDentry.cc +++ b/branches/sage/mds/mds/CDentry.cc @@ -41,7 +41,7 @@ ostream& CDentry::print_db_line_prefix(ostream& out) ostream& operator<<(ostream& out, CDentry& dn) { - string path; + filepath path; dn.make_path(path); out << "[dentry " << path; @@ -59,10 +59,10 @@ ostream& operator<<(ostream& out, CDentry& dn) if (dn.is_null()) out << " NULL"; if (dn.is_remote()) { out << " REMOTE("; - switch (dn.get_remote_d_type()) { - case inode_t::DT_REG: out << "reg"; break; - case inode_t::DT_DIR: out << "dir"; break; - case inode_t::DT_LNK: out << "lnk"; break; + switch (dn.get_remote_d_type() << 12) { + case S_IFREG: out << "reg"; break; + case S_IFDIR: out << "dir"; break; + case S_IFLNK: out << "lnk"; break; default: assert(0); } out << ")"; @@ -186,10 +186,10 @@ void CDentry::mark_new() state_set(STATE_NEW); } -void CDentry::make_path(string& s) +void CDentry::make_path_string(string& s) { if (dir) { - dir->inode->make_path(s); + dir->inode->make_path_string(s); } else { s = "???"; } @@ -197,6 +197,19 @@ void CDentry::make_path(string& s) s += name; } +void CDentry::make_path(filepath& fp) +{ + assert(dir); + if (dir->inode->is_base()) + fp.set_ino(dir->inode->ino()); // base case + else if (dir->inode->get_parent_dn()) + dir->inode->get_parent_dn()->make_path(fp); // recurse + else + fp.set_ino(dir->inode->ino()); // relative but not base? hrm! + fp.push_dentry(name); +} + +/* void CDentry::make_path(string& s, inodeno_t tobase) { assert(dir); @@ -211,6 +224,7 @@ void CDentry::make_path(string& s, inodeno_t tobase) } s += name; } +*/ /** make_anchor_trace * construct an anchor trace for this dentry, as if it were linked to *in. diff --git a/branches/sage/mds/mds/CDentry.h b/branches/sage/mds/mds/CDentry.h index 416792beb8778..e28aa2e380b09 100644 --- a/branches/sage/mds/mds/CDentry.h +++ b/branches/sage/mds/mds/CDentry.h @@ -26,6 +26,7 @@ using namespace std; #include "include/buffer.h" #include "include/lru.h" #include "include/xlist.h" +#include "include/filepath.h" #include "mdstypes.h" #include "SimpleLock.h" @@ -184,8 +185,8 @@ public: const CDentry& operator= (const CDentry& right); // misc - void make_path(string& p); - void make_path(string& p, inodeno_t tobase); + void make_path_string(string& s); + void make_path(filepath& fp); void make_anchor_trace(vector& trace, CInode *in); // -- version -- diff --git a/branches/sage/mds/mds/CDir.cc b/branches/sage/mds/mds/CDir.cc index b4663b269c659..adaf5fa6c0d4f 100644 --- a/branches/sage/mds/mds/CDir.cc +++ b/branches/sage/mds/mds/CDir.cc @@ -45,7 +45,7 @@ ostream& operator<<(ostream& out, CDir& dir) { - string path; + filepath path; dir.get_inode()->make_path(path); out << "[dir " << dir.dirfrag() << " " << path << "/"; if (dir.is_auth()) { diff --git a/branches/sage/mds/mds/CInode.cc b/branches/sage/mds/mds/CInode.cc index d5981eea5ca3b..cb436254de13e 100644 --- a/branches/sage/mds/mds/CInode.cc +++ b/branches/sage/mds/mds/CInode.cc @@ -46,7 +46,7 @@ ostream& CInode::print_db_line_prefix(ostream& out) ostream& operator<<(ostream& out, CInode& in) { - string path; + filepath path; in.make_path(path); out << "[inode " << in.inode.ino << " " << path << (in.is_dir() ? "/ ":" "); if (in.is_auth()) { @@ -351,10 +351,10 @@ CInode *CInode::get_parent_inode() -void CInode::make_path(string& s) +void CInode::make_path_string(string& s) { if (parent) { - parent->make_path(s); + parent->make_path_string(s); } else if (is_root()) { s = ""; // root @@ -370,6 +370,14 @@ void CInode::make_path(string& s) } } +void CInode::make_path(filepath& fp) +{ + if (parent) + parent->make_path(fp); + else + fp.set_ino(ino()); +} + void CInode::make_anchor_trace(vector& trace) { if (parent) { @@ -738,8 +746,8 @@ void CInode::adjust_nested_auth_pins(int a) pair CInode::authority() { - if (force_auth.first >= 0) - return force_auth; + if (inode_auth.first >= 0) + return inode_auth; if (parent) return parent->dir->authority(); diff --git a/branches/sage/mds/mds/CInode.h b/branches/sage/mds/mds/CInode.h index ef05b81b2c6cf..2677f3d227881 100644 --- a/branches/sage/mds/mds/CInode.h +++ b/branches/sage/mds/mds/CInode.h @@ -185,7 +185,7 @@ public: CDentry *parent; // primary link set remote_parents; // if hard linked - pair force_auth; + pair inode_auth; // -- distributed state -- protected: @@ -229,7 +229,7 @@ public: last_journaled(0), last_open_journaled(0), //hack_accessed(true), stickydir_ref(0), - parent(0), force_auth(CDIR_AUTH_DEFAULT), + parent(0), inode_auth(CDIR_AUTH_DEFAULT), replica_caps_wanted(0), xlist_dirty(this), xlist_open_file(this), xlist_dirty_inode_mtime(this), xlist_purging_inode(this), @@ -280,7 +280,8 @@ public: } // -- misc -- - void make_path(string& s); + void make_path_string(string& s); + void make_path(filepath& s); void make_anchor_trace(vector& trace); void name_stray_dentry(string& dname); diff --git a/branches/sage/mds/mds/ClientMap.h b/branches/sage/mds/mds/ClientMap.h index b16abdbef7b91..59b3dde49b6be 100644 --- a/branches/sage/mds/mds/ClientMap.h +++ b/branches/sage/mds/mds/ClientMap.h @@ -125,19 +125,18 @@ public: private: // -- completed requests -- - // client id -> tid -> result code - map > completed_requests; // completed client requests - map > waiting_for_trim; + // who -> { tid set ... } + map > completed_requests; + map > waiting_for_trim; version_t requestmapv; public: void add_completed_request(metareqid_t ri) { - completed_requests[ri.client].insert(ri.tid); + completed_requests[ri.name].insert(ri.tid); requestmapv++; } - void trim_completed_requests(int client, - tid_t mintid) { // zero means trim all! - map >::iterator p = completed_requests.find(client); + void trim_completed_requests(entity_name_t who, tid_t mintid) { // zero means trim all! + map >::iterator p = completed_requests.find(who); if (p == completed_requests.end()) return; @@ -148,7 +147,7 @@ public: completed_requests.erase(p); // kick waiters - map >::iterator q = waiting_for_trim.find(client); + map >::iterator q = waiting_for_trim.find(who); if (q != waiting_for_trim.end()) { list fls; while (!q->second.empty() && @@ -162,11 +161,11 @@ public: } } void add_trim_waiter(metareqid_t ri, Context *c) { - waiting_for_trim[ri.client][ri.tid] = c; + waiting_for_trim[ri.name][ri.tid] = c; } bool have_completed_request(metareqid_t ri) { - return completed_requests.count(ri.client) && - completed_requests[ri.client].count(ri.tid); + return completed_requests.count(ri.name) && + completed_requests[ri.name].count(ri.tid); } diff --git a/branches/sage/mds/mds/Locker.cc b/branches/sage/mds/mds/Locker.cc index f5503d1345f31..f9b29bf23fee8 100644 --- a/branches/sage/mds/mds/Locker.cc +++ b/branches/sage/mds/mds/Locker.cc @@ -237,6 +237,7 @@ bool Locker::acquire_locks(MDRequest *mdr, MDSCacheObjectInfo info; (*q)->set_object_info(info); req->get_authpins().push_back(info); + mdr->pin(*q); } mds->send_message_mds(req, p->first, MDS_PORT_SERVER); @@ -452,7 +453,7 @@ Capability* Locker::issue_new_caps(CInode *in, dout(7) << "issue_new_caps for mode " << mode << " on " << *in << dendl; // my needs - int my_client = req->get_client(); + int my_client = req->get_client().num(); int my_want = 0; if (mode & FILE_MODE_R) my_want |= CAP_FILE_RDCACHE | CAP_FILE_RD; if (mode & FILE_MODE_W) my_want |= CAP_FILE_WRBUFFER | CAP_FILE_WR; @@ -1565,6 +1566,14 @@ void Locker::scatter_writebehind(ScatterLock *lock) CInode *in = (CInode*)lock->get_parent(); dout(10) << "scatter_writebehind " << in->inode.mtime << " on " << *lock << " on " << *in << dendl; + // hack: + if (in->is_base()) { + dout(10) << "scatter_writebehind just clearing updated flag for base inode " << *in << dendl; + lock->clear_updated(); + scatter_eval_gather(lock); + return; + } + // journal write-behind. inode_t *pi = in->project_inode(); pi->mtime = in->inode.mtime; // make sure an intermediate version isn't goofing us up diff --git a/branches/sage/mds/mds/MDBalancer.cc b/branches/sage/mds/mds/MDBalancer.cc index 8e9d0e2dd46fa..933c8306a7526 100644 --- a/branches/sage/mds/mds/MDBalancer.cc +++ b/branches/sage/mds/mds/MDBalancer.cc @@ -1001,7 +1001,7 @@ void MDBalancer::dump_pop_map() // filename last string p; - in->make_path(p); + in->make_path_string(p); myfile << "." << p; if (dir->get_frag() != frag_t()) myfile << "___" << (unsigned)dir->get_frag(); diff --git a/branches/sage/mds/mds/MDCache.cc b/branches/sage/mds/mds/MDCache.cc index 8456b5bb555b1..25ae8e6030379 100644 --- a/branches/sage/mds/mds/MDCache.cc +++ b/branches/sage/mds/mds/MDCache.cc @@ -211,7 +211,7 @@ CInode *MDCache::create_root_inode() root->inode.ino = MDS_INO_ROOT; // make it up (FIXME) - root->inode.mode = 0755 | INODE_MODE_DIR; + root->inode.mode = 0755 | S_IFDIR; root->inode.size = 0; root->inode.ctime = root->inode.mtime = g_clock.now(); @@ -219,7 +219,7 @@ CInode *MDCache::create_root_inode() root->inode.nlink = 1; root->inode.layout = g_OSD_MDDirLayout; - root->force_auth = pair(0, CDIR_AUTH_UNKNOWN); + root->inode_auth = pair(0, CDIR_AUTH_UNKNOWN); add_inode( root ); @@ -262,7 +262,7 @@ CInode *MDCache::create_stray_inode(int whose) in->inode.ino = MDS_INO_STRAY(whose); // make it up (FIXME) - in->inode.mode = 0755 | INODE_MODE_DIR; + in->inode.mode = 0755 | S_IFDIR; in->inode.size = 0; in->inode.ctime = in->inode.mtime = g_clock.now(); @@ -2224,7 +2224,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong) if (!in) in = rejoin_invent_inode(p->first.ino); if (!in->is_dir()) { assert(in->state_test(CInode::STATE_REJOINUNDEF)); - in->inode.mode = INODE_MODE_DIR; + in->inode.mode = S_IFDIR; } dir = in->get_or_open_dirfrag(this, p->first.frag); } else { @@ -3570,6 +3570,61 @@ bool MDCache::shutdown_pass() } } + // export caps? + // note: this runs more often than it should. + static bool exported_caps = false; + static set exported_caps_in; + if (!exported_caps) { + dout(7) << "searching for caps to export" << dendl; + exported_caps = true; + + list dirq; + for (map >::iterator p = subtrees.begin(); + p != subtrees.end(); + ++p) { + if (exported_caps_in.count(p->first)) continue; + if (p->first->is_auth() || + p->first->is_ambiguous_auth()) + exported_caps = false; // we'll have to try again + else { + dirq.push_back(p->first); + exported_caps_in.insert(p->first); + } + } + while (!dirq.empty()) { + CDir *dir = dirq.front(); + dirq.pop_front(); + for (CDir::map_t::iterator p = dir->items.begin(); + p != dir->items.end(); + ++p) { + CDentry *dn = p->second; + if (!dn->is_primary()) continue; + CInode *in = dn->get_inode(); + if (in->is_dir()) + in->get_nested_dirfrags(dirq); + if (in->is_any_caps() && + !in->state_test(CInode::STATE_EXPORTINGCAPS)) + migrator->export_caps(in); + } + } + } + + static bool exported_strays = false; + if (!exported_strays && stray && mds->get_nodeid() > 0) { + list dfs; + stray->get_dirfrags(dfs); + while (!dfs.empty()) { + CDir *dir = dfs.front(); + dfs.pop_front(); + for (CDir::map_t::iterator p = dir->items.begin(); + p != dir->items.end(); + p++) { + CDentry *dn = p->second; + migrate_stray(dn, 0); // send to root! + } + } + exported_strays = true; + } // subtrees map not empty yet? if (!subtrees.empty()) { @@ -3730,7 +3785,7 @@ Context *MDCache::_get_waiter(MDRequest *mdr, Message *req) } int MDCache::path_traverse(MDRequest *mdr, Message *req, // who - CInode *base, filepath& origpath, // what + filepath& origpath, // what vector& trace, // result bool follow_trailing_symlink, // how int onfail) @@ -3746,11 +3801,17 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who set< pair > symlinks_resolved; // root - CInode *cur = base; - if (!cur) cur = get_root(); + CInode *cur = get_inode(origpath.get_ino()); if (cur == NULL) { - dout(7) << "traverse: i don't have root" << dendl; - open_root(_get_waiter(mdr, req)); + dout(7) << "traverse: opening base ino " << origpath.get_ino() << dendl; + if (origpath.get_ino() == MDS_INO_ROOT) + open_root(_get_waiter(mdr, req)); + else if (MDS_INO_IS_STRAY(origpath.get_ino())) + open_foreign_stray(origpath.get_ino() - MDS_INO_STRAY_OFFSET, _get_waiter(mdr, req)); + else { + assert(0); // hrm.. broken + return -EIO; + } return 1; } @@ -3766,7 +3827,6 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who while (depth < path.depth()) { dout(12) << "traverse: path seg depth " << depth << " = " << path[depth] << dendl; - // ENOTDIR? if (!cur->is_dir()) { dout(7) << "traverse: " << *cur << " not a dir " << dendl; return -ENOTDIR; @@ -4739,8 +4799,20 @@ void MDCache::reintegrate_stray(CDentry *dn, CDentry *rlink) void MDCache::migrate_stray(CDentry *dn, int dest) { - dout(10) << "migrate_stray to mds" << dest << " " << *dn << dendl; + dout(10) << "migrate_stray to mds" << dest << " " << *dn << " " << *dn->inode << dendl; + + // rename it to another mds. + string dname; + dn->get_inode()->name_stray_dentry(dname); + filepath src(dname, MDS_INO_STRAY(mds->get_nodeid())); + filepath dst(dname, MDS_INO_STRAY(dest)); + + MClientRequest *req = new MClientRequest(MDS_OP_RENAME, mds->messenger->get_myinst()); + req->set_filepath(src); + req->set_filepath2(dst); + req->set_tid(mds->issue_tid()); + mds->send_message_mds(req, dest, MDS_PORT_SERVER); } @@ -5216,7 +5288,6 @@ void MDCache::handle_discover_reply(MDiscoverReply *m) // add base inode cur = add_replica_inode(m->get_inode(0), NULL, finished); - cur->force_auth = pair(m->get_source().num(), CDIR_AUTH_UNKNOWN); dout(7) << "discover_reply got base inode " << *cur << dendl; @@ -5460,12 +5531,20 @@ CInode *MDCache::add_replica_inode(CInodeDiscover& dis, CDentry *dn, listis_base()) { + if (in->ino() == MDS_INO_ROOT) + in->inode_auth.first = 0; + else if (MDS_INO_IS_STRAY(in->ino())) + in->inode_auth.first = in->ino() - MDS_INO_STRAY_OFFSET; + else + assert(0); + } + dout(10) << "add_replica_inode added " << *in << dendl; if (dn && dn->is_null()) dn->dir->link_primary_inode(dn, in); } else { dis.update_inode(in); - dout(10) << "add_replica_inode added " << *in << dendl; + dout(10) << "add_replica_inode had " << *in << dendl; } if (dn) { @@ -5488,7 +5567,6 @@ CDentry *MDCache::add_replica_stray(bufferlist &bl, CInode *in, int from) CInodeDiscover indis; indis._decode(bl, off); CInode *strayin = add_replica_inode(indis, NULL, finished); - strayin->force_auth = pair(from, CDIR_AUTH_UNKNOWN); dout(15) << "strayin " << *strayin << dendl; // dir @@ -5579,7 +5657,7 @@ int MDCache::send_dir_updates(CDir *dir, bool bcast) dout(7) << "sending dir_update on " << *dir << " bcast " << bcast << " to " << who << dendl; - string path; + filepath path; dir->inode->make_path(path); int whoami = mds->get_nodeid(); @@ -5619,7 +5697,7 @@ void MDCache::handle_dir_update(MDirUpdate *m) dout(5) << "trying discover on dir_update for " << path << dendl; int r = path_traverse(0, m, - 0, path, trace, true, + path, trace, true, MDS_TRAVERSE_DISCOVER); if (r > 0) return; @@ -5687,7 +5765,7 @@ void MDCache::handle_dentry_unlink(MDentryUnlink *m) // send caps to auth (if we're not already) if (in->is_any_caps() && !in->state_test(CInode::STATE_EXPORTINGCAPS)) - migrator->export_caps(in, in->authority().first); + migrator->export_caps(in); lru.lru_bottouch(straydn); // move stray to end of lru diff --git a/branches/sage/mds/mds/MDCache.h b/branches/sage/mds/mds/MDCache.h index 896ee791c367b..61a38f98049a7 100644 --- a/branches/sage/mds/mds/MDCache.h +++ b/branches/sage/mds/mds/MDCache.h @@ -582,8 +582,7 @@ public: CDentry *get_or_create_stray_dentry(CInode *in); Context *_get_waiter(MDRequest *mdr, Message *req); - int path_traverse(MDRequest *mdr, Message *req, - CInode *base, filepath& path, + int path_traverse(MDRequest *mdr, Message *req, filepath& path, vector& trace, bool follow_trailing_sym, int onfail); bool path_is_mine(filepath& path); diff --git a/branches/sage/mds/mds/MDS.cc b/branches/sage/mds/mds/MDS.cc index bf9f41cfac851..0ec62b7f4ed05 100644 --- a/branches/sage/mds/mds/MDS.cc +++ b/branches/sage/mds/mds/MDS.cc @@ -75,6 +75,8 @@ MDS::MDS(int whoami, Messenger *m, MonMap *mm) : this->whoami = whoami; + last_tid = 0; + monmap = mm; messenger = m; @@ -251,7 +253,8 @@ void MDS::send_message_mds(Message *m, int mds, int port, int fromport) void MDS::forward_message_mds(Message *req, int mds, int port) { // client request? - if (req->get_type() == MSG_CLIENT_REQUEST) { + if (req->get_type() == MSG_CLIENT_REQUEST && + ((MClientRequest*)req)->get_client_inst().name.is_client()) { MClientRequest *creq = (MClientRequest*)req; creq->inc_num_fwd(); // inc forward counter @@ -260,8 +263,13 @@ void MDS::forward_message_mds(Message *req, int mds, int port) creq->get_client_inst()); if (!creq->is_idempotent()) { + /* don't actually forward if non-idempotent! + * client has to do it. although the MDS will ignore duplicate requests, + * the affected metadata may migrate, in which case the new authority + * won't have the metareq_id in the completed request map. + */ delete req; - return; // don't actually forward if non-idempotent! client has to do it. + return; } } diff --git a/branches/sage/mds/mds/MDS.h b/branches/sage/mds/mds/MDS.h index 6d3f90dc465af..89f6e6d278dd3 100644 --- a/branches/sage/mds/mds/MDS.h +++ b/branches/sage/mds/mds/MDS.h @@ -118,6 +118,8 @@ class MDS : public Dispatcher { map peer_mdsmap_epoch; + tid_t last_tid; // for mds-initiated requests (e.g. stray rename) + public: void wait_for_active(Context *c) { waiting_for_active.push_back(c); @@ -142,6 +144,8 @@ class MDS : public Dispatcher { void set_want_state(int s); + tid_t issue_tid() { return ++last_tid; } + // -- waiters -- list finished_queue; diff --git a/branches/sage/mds/mds/Migrator.cc b/branches/sage/mds/mds/Migrator.cc index bc7c82396fe55..ea1e9f1b216c2 100644 --- a/branches/sage/mds/mds/Migrator.cc +++ b/branches/sage/mds/mds/Migrator.cc @@ -52,6 +52,10 @@ #include "messages/MExportCaps.h" #include "messages/MExportCapsAck.h" + + + + #include "config.h" #define dout(l) if (l<=g_conf.debug || l <= g_conf.debug_mds || l <= g_conf.debug_mds_migrator) *_dout << dbeginl << g_clock.now() << " mds" << mds->get_nodeid() << ".migrator " @@ -1394,9 +1398,7 @@ void Migrator::handle_export_discover(MExportDirDiscover *m) // must discover it! filepath fpath(m->get_path()); vector trace; - int r = cache->path_traverse(0, m, - 0, fpath, trace, true, - MDS_TRAVERSE_DISCOVER); + int r = cache->path_traverse(0, m, fpath, trace, true, MDS_TRAVERSE_DISCOVER); if (r > 0) return; // wait if (r < 0) { dout(7) << "handle_export_discover_2 failed to discover or not dir " << m->get_path() << ", NAK" << dendl; @@ -2223,8 +2225,9 @@ void Migrator::handle_export_notify(MExportDirNotify *m) -void Migrator::export_caps(CInode *in, int dest) +void Migrator::export_caps(CInode *in) { + int dest = in->authority().first; dout(7) << "export_caps to mds" << dest << " " << *in << dendl; assert(in->is_any_caps()); @@ -2306,3 +2309,7 @@ void Migrator::logged_import_caps(CInode *in, mds->send_message_mds(new MExportCapsAck(in->ino()), from, MDS_PORT_MIGRATOR); } + + + + diff --git a/branches/sage/mds/mds/Migrator.h b/branches/sage/mds/mds/Migrator.h index ca84968ac5d57..b32a6a1a4f33f 100644 --- a/branches/sage/mds/mds/Migrator.h +++ b/branches/sage/mds/mds/Migrator.h @@ -200,7 +200,7 @@ public: } void clear_export_proxy_pins(CDir *dir); - void export_caps(CInode *in, int dest); + void export_caps(CInode *in); protected: void handle_export_discover_ack(MExportDirDiscoverAck *m); @@ -216,6 +216,7 @@ public: void handle_export_caps_ack(MExportCapsAck *m); + friend class C_MDC_ExportFreeze; friend class C_MDS_ExportFinishLogged; friend class C_M_ExportGo; @@ -270,7 +271,6 @@ protected: // bystander void handle_export_notify(MExportDirNotify *m); - }; diff --git a/branches/sage/mds/mds/Server.cc b/branches/sage/mds/mds/Server.cc index 3a45b625f8a53..56d6960bba33c 100644 --- a/branches/sage/mds/mds/Server.cc +++ b/branches/sage/mds/mds/Server.cc @@ -93,7 +93,7 @@ void Server::dispatch(Message *m) } // active? - if (!mds->is_active()) { + if (!mds->is_active() && !mds->is_stopping()) { dout(3) << "not active yet, waiting" << dendl; mds->wait_for_active(new C_MDS_RetryMessage(mds, m)); return; @@ -196,7 +196,7 @@ void Server::_session_logged(entity_inst_t client_inst, bool open, version_t cma mds->clientmap.close_session(from); // purge completed requests from clientmap - mds->clientmap.trim_completed_requests(from, 0); + mds->clientmap.trim_completed_requests(client_inst.name, 0); } else { // close must have been canceled (by an import?) ... assert(!open); @@ -459,9 +459,14 @@ void Server::reply_request(MDRequest *mdr, MClientReply *reply, CInode *tracei) if (tracei) { reply->set_trace_dist( tracei, mds->get_nodeid() ); } + + reply->set_mdsmap_epoch(mds->mdsmap->get_epoch()); // send reply - messenger->send_message(reply, req->get_client_inst()); + if (req->get_client_inst().name.is_mds()) + delete reply; // mds doesn't need a reply + else + messenger->send_message(reply, req->get_client_inst()); // finish request mdcache->request_finish(mdr); @@ -477,12 +482,12 @@ void Server::reply_request(MDRequest *mdr, MClientReply *reply, CInode *tracei) void Server::handle_client_request(MClientRequest *req) { dout(4) << "handle_client_request " << *req << dendl; - int client = req->get_client(); if (logger) logger->inc("hcreq"); - if (!mds->is_active()) { - dout(5) << " not active, discarding client request." << dendl; + if (!mds->is_active() && + !(mds->is_stopping() && req->get_client_inst().name.is_mds())) { + dout(5) << " not active (or stopping+mds), discarding request." << dendl; delete req; return; } @@ -494,12 +499,18 @@ void Server::handle_client_request(MClientRequest *req) } // active session? - if (!mds->clientmap.have_session(client)) { - dout(5) << "no session for client" << client << ", dropping" << dendl; + if (req->get_client_inst().name.is_client() && + !mds->clientmap.have_session(req->get_client_inst().name.num())) { + dout(5) << "no session for " << req->get_client_inst().name << ", dropping" << dendl; delete req; return; } + // old mdsmap? + if (req->get_mdsmap_epoch() < mds->mdsmap->get_epoch()) { + // send it? hrm, this isn't ideal; they may get a lot of copies if + // they have a high request rate. + } // okay, i want CInode *ref = 0; @@ -508,8 +519,7 @@ void Server::handle_client_request(MClientRequest *req) if (req->get_retry_attempt()) { if (mds->clientmap.have_completed_request(req->get_reqid())) { dout(5) << "already completed " << req->get_reqid() << dendl; - mds->messenger->send_message(new MClientReply(req, 0), - req->get_client_inst()); + mds->messenger->send_message(new MClientReply(req, 0), req->get_client_inst()); delete req; return; } @@ -517,7 +527,7 @@ void Server::handle_client_request(MClientRequest *req) // trim completed_request list if (req->get_oldest_client_tid() > 0) { dout(15) << " oldest_client_tid=" << req->get_oldest_client_tid() << dendl; - mds->clientmap.trim_completed_requests(client, + mds->clientmap.trim_completed_requests(req->get_client_inst().name, req->get_oldest_client_tid()); } @@ -1046,7 +1056,7 @@ CDir *Server::traverse_to_auth_dir(MDRequest *mdr, vector &trace, file // traverse to parent dir int r = mdcache->path_traverse(mdr, mdr->client_request, - 0, refpath, trace, true, + refpath, trace, true, MDS_TRAVERSE_FORWARD); if (r > 0) return 0; // delayed if (r < 0) { @@ -1057,7 +1067,7 @@ CDir *Server::traverse_to_auth_dir(MDRequest *mdr, vector &trace, file // open inode CInode *diri; if (trace.empty()) - diri = mdcache->get_root(); + diri = mdcache->get_inode(refpath.get_ino()); else diri = mdcache->get_dentry_inode(trace[trace.size()-1], mdr); if (!diri) @@ -1086,7 +1096,7 @@ CInode* Server::rdlock_path_pin_ref(MDRequest *mdr, bool want_auth) filepath refpath = req->get_filepath(); vector trace; int r = mdcache->path_traverse(mdr, req, - 0, refpath, + refpath, trace, req->follow_trailing_symlink(), MDS_TRAVERSE_FORWARD); if (r > 0) return false; // delayed @@ -1296,7 +1306,7 @@ version_t Server::predirty_dn_diri(MDRequest *mdr, CDentry *dn, EMetaBlob *blob) version_t dirpv = 0; CInode *diri = dn->dir->inode; - if (diri->is_root()) return 0; + if (diri->is_base()) return 0; if (diri->is_auth()) { assert(mdr->wrlocks.count(&diri->dirlock)); @@ -1717,8 +1727,8 @@ void Server::handle_client_mknod(MDRequest *mdr) // it's a file. newi->inode.rdev = req->args.mknod.rdev; newi->inode.mode = req->args.mknod.mode; - newi->inode.mode &= ~INODE_TYPE_MASK; - newi->inode.mode |= INODE_MODE_FILE; + newi->inode.mode &= ~S_IFMT; + newi->inode.mode |= S_IFREG; newi->inode.version = dn->pre_dirty() - 1; // prepare finisher @@ -1752,8 +1762,8 @@ void Server::handle_client_mkdir(MDRequest *mdr) // it's a directory. newi->inode.mode = req->args.mkdir.mode; - newi->inode.mode &= ~INODE_TYPE_MASK; - newi->inode.mode |= INODE_MODE_DIR; + newi->inode.mode &= ~S_IFMT; + newi->inode.mode |= S_IFDIR; newi->inode.layout = g_OSD_MDDirLayout; newi->inode.version = dn->pre_dirty() - 1; @@ -1808,9 +1818,9 @@ void Server::handle_client_symlink(MDRequest *mdr) assert(newi); // it's a symlink - newi->inode.mode &= ~INODE_TYPE_MASK; - newi->inode.mode |= INODE_MODE_SYMLINK; - newi->symlink = req->get_sarg(); + newi->inode.mode &= ~S_IFMT; + newi->inode.mode |= S_IFLNK; + newi->symlink = req->get_path2(); newi->inode.version = dn->pre_dirty() - 1; // prepare finisher @@ -1837,7 +1847,7 @@ void Server::handle_client_link(MDRequest *mdr) MClientRequest *req = mdr->client_request; dout(7) << "handle_client_link " << req->get_filepath() - << " to " << req->get_sarg() + << " to " << req->get_filepath2() << dendl; // traverse to dest dir, make sure it's ours. @@ -1849,11 +1859,11 @@ void Server::handle_client_link(MDRequest *mdr) dout(7) << "handle_client_link link " << dname << " in " << *dir << dendl; // traverse to link target - filepath targetpath = req->get_sarg(); + filepath targetpath = req->get_filepath2(); dout(7) << "handle_client_link discovering target " << targetpath << dendl; vector targettrace; int r = mdcache->path_traverse(mdr, req, - 0, targetpath, targettrace, false, + targetpath, targettrace, false, MDS_TRAVERSE_DISCOVER); if (r > 0) return; // wait if (targettrace.empty()) r = -EINVAL; @@ -2267,7 +2277,7 @@ void Server::handle_client_unlink(MDRequest *mdr) // traverse to path vector trace; int r = mdcache->path_traverse(mdr, req, - 0, req->get_filepath(), trace, false, + req->get_filepath(), trace, false, MDS_TRAVERSE_FORWARD); if (r > 0) return; if (trace.empty()) r = -EINVAL; // can't unlink root @@ -2697,7 +2707,7 @@ void Server::handle_client_rename(MDRequest *mdr) // traverse to dest dir (not dest) // we do this FIRST, because the rename should occur on the // destdn's auth. - const filepath &destpath = req->get_sarg(); + const filepath &destpath = req->get_filepath2(); const string &destname = destpath.last_dentry(); vector desttrace; CDir *destdir = traverse_to_auth_dir(mdr, desttrace, destpath); @@ -2709,7 +2719,7 @@ void Server::handle_client_rename(MDRequest *mdr) filepath srcpath = req->get_filepath(); vector srctrace; int r = mdcache->path_traverse(mdr, req, - 0, srcpath, srctrace, false, + srcpath, srctrace, false, MDS_TRAVERSE_DISCOVER); if (r > 0) return; if (srctrace.empty()) r = -EINVAL; // can't rename root @@ -3257,7 +3267,6 @@ void Server::_rename_apply(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDen // srcdn inode import? if (!srcdn->is_auth() && destdn->is_auth()) { assert(mdr->more()->inode_import.length() > 0); - assert(destdn->inode->is_dirty()); // finish cap imports finish_force_open_sessions(mdr->more()->imported_client_map); @@ -3328,7 +3337,7 @@ void Server::handle_slave_rename_prep(MDRequest *mdr) dout(10) << " dest " << destpath << dendl; vector trace; int r = mdcache->path_traverse(mdr, mdr->slave_request, - 0, destpath, trace, false, + destpath, trace, false, MDS_TRAVERSE_DISCOVERXLOCK); if (r > 0) return; assert(r == 0); // we shouldn't get an error here! @@ -3342,7 +3351,7 @@ void Server::handle_slave_rename_prep(MDRequest *mdr) filepath srcpath(mdr->slave_request->srcdnpath); dout(10) << " src " << srcpath << dendl; r = mdcache->path_traverse(mdr, mdr->slave_request, - 0, srcpath, trace, false, + srcpath, trace, false, MDS_TRAVERSE_DISCOVERXLOCK); if (r > 0) return; assert(r == 0); // we shouldn't get an error here! @@ -3740,11 +3749,16 @@ void Server::handle_client_open(MDRequest *mdr) if (!cur) return; // regular file? - if ((cur->inode.mode & INODE_TYPE_MASK) != INODE_MODE_FILE) { - dout(7) << "not a regular file " << *cur << dendl; + if (!cur->inode.is_file() && !cur->inode.is_dir()) { + dout(7) << "not a file or dir " << *cur << dendl; reply_request(mdr, -EINVAL); // FIXME what error do we want? return; } + // can only open a dir rdonly, no flags. + if (cur->inode.is_dir() && (cmode != FILE_MODE_R || flags != 0)) { + reply_request(mdr, -EINVAL); + return; + } // hmm, check permissions or something. @@ -4007,7 +4021,7 @@ void Server::handle_client_openc(MDRequest *mdr) // it's a file. in->inode.mode = req->args.open.mode; - in->inode.mode |= INODE_MODE_FILE; + in->inode.mode |= S_IFREG; in->inode.version = dn->pre_dirty() - 1; // prepare finisher diff --git a/branches/sage/mds/mds/journal.cc b/branches/sage/mds/mds/journal.cc index 2cf32d99ca337..0d08b4e6aa456 100644 --- a/branches/sage/mds/mds/journal.cc +++ b/branches/sage/mds/mds/journal.cc @@ -756,7 +756,7 @@ void ESession::replay(MDS *mds) // hrm, this isn't very pretty. if (!open) - mds->clientmap.trim_completed_requests(client_inst.name.num(), 0); + mds->clientmap.trim_completed_requests(client_inst.name, 0); } else { dout(10) << "ESession.replay clientmap " << mds->clientmap.get_version() @@ -766,7 +766,7 @@ void ESession::replay(MDS *mds) mds->clientmap.open_session(client_inst); } else { mds->clientmap.close_session(client_inst.name.num()); - mds->clientmap.trim_completed_requests(client_inst.name.num(), 0); + mds->clientmap.trim_completed_requests(client_inst.name, 0); } mds->clientmap.reset_projected(); // make it follow version. } diff --git a/branches/sage/mds/mds/mdstypes.h b/branches/sage/mds/mds/mdstypes.h index a2f779757255e..f64bb908e9498 100644 --- a/branches/sage/mds/mds/mdstypes.h +++ b/branches/sage/mds/mds/mdstypes.h @@ -54,30 +54,30 @@ using namespace std; struct metareqid_t { + entity_name_t name; uint64_t tid; - int32_t client; - int32_t _pad; - metareqid_t() : tid(0), client(-1), _pad(0) {} - metareqid_t(int c, tid_t t) : tid(t), client(c), _pad(0) {} + metareqid_t() : tid(0) {} + //metareqid_t(int c, tid_t t) : tid(t) { name = entity_name_t::CLIENT(c); } + metareqid_t(entity_name_t n, tid_t t) : name(n), tid(t) {} }; inline ostream& operator<<(ostream& out, const metareqid_t& r) { - return out << "client" << r.client << ":" << r.tid; + return out << r.name << ":" << r.tid; } inline bool operator==(const metareqid_t& l, const metareqid_t& r) { - return (l.client == r.client) && (l.tid == r.tid); + return (l.name == r.name) && (l.tid == r.tid); } inline bool operator!=(const metareqid_t& l, const metareqid_t& r) { - return (l.client != r.client) || (l.tid != r.tid); + return (l.name != r.name) || (l.tid != r.tid); } inline bool operator<(const metareqid_t& l, const metareqid_t& r) { - return (l.client < r.client) || - (l.client == r.client && l.tid < r.tid); + return (l.name < r.name) || + (l.name == r.name && l.tid < r.tid); } inline bool operator<=(const metareqid_t& l, const metareqid_t& r) { - return (l.client < r.client) || - (l.client == r.client && l.tid <= r.tid); + return (l.name < r.name) || + (l.name == r.name && l.tid <= r.tid); } inline bool operator>(const metareqid_t& l, const metareqid_t& r) { return !(l <= r); } inline bool operator>=(const metareqid_t& l, const metareqid_t& r) { return !(l < r); } @@ -86,7 +86,7 @@ namespace __gnu_cxx { template<> struct hash { size_t operator()(const metareqid_t &r) const { hash H; - return H(r.client) ^ H(r.tid); + return H(r.name.num()) ^ H(r.name.type()) ^ H(r.tid); } }; } diff --git a/branches/sage/mds/messages/MClientFileCaps.h b/branches/sage/mds/messages/MClientFileCaps.h index e6fe60b37343b..758205902b41b 100644 --- a/branches/sage/mds/messages/MClientFileCaps.h +++ b/branches/sage/mds/messages/MClientFileCaps.h @@ -18,13 +18,19 @@ #include "msg/Message.h" #include "mds/Capability.h" + class MClientFileCaps : public Message { public: static const int OP_GRANT = 0; // mds->client grant. static const int OP_ACK = 1; // client->mds ack (if prior grant was a recall) - static const int OP_RELEASE = 2; // mds closed the cap + static const int OP_RELEASE = 2; // mds->client release cap (*) static const int OP_EXPORT = 3; // mds has exported the cap static const int OP_IMPORT = 4; // mds has imported the cap from get_mds() + /* + * (*) it's a bit counterintuitive, but the mds has to + * close the cap because the client isn't able to tell + * if a concurrent open() would map to the same inode. + */ static const char* get_opname(int op) { switch (op) { case OP_GRANT: return "grant"; diff --git a/branches/sage/mds/messages/MClientReply.h b/branches/sage/mds/messages/MClientReply.h index 760dcc971ebad..f50384000f2f5 100644 --- a/branches/sage/mds/messages/MClientReply.h +++ b/branches/sage/mds/messages/MClientReply.h @@ -127,6 +127,7 @@ class MClientReply : public Message { // reply data struct st_ { long tid; + epoch_t mdsmap_epoch; int op; int result; // error code unsigned char file_caps; // for open @@ -150,6 +151,9 @@ class MClientReply : public Message { long get_tid() { return st.tid; } int get_op() { return st.op; } + void set_mdsmap_epoch(epoch_t e) { st.mdsmap_epoch = e; } + epoch_t get_mdsmap_epoch() { return st.mdsmap_epoch; } + int get_result() { return st.result; } const string& get_path() { return path; } diff --git a/branches/sage/mds/messages/MClientRequest.h b/branches/sage/mds/messages/MClientRequest.h index 8f03044cf5a4f..d0c4a57a4af8a 100644 --- a/branches/sage/mds/messages/MClientRequest.h +++ b/branches/sage/mds/messages/MClientRequest.h @@ -81,15 +81,16 @@ class MClientRequest : public Message { inodeno_t mds_wants_replica_in_dirino; entity_inst_t client_inst; + epoch_t mdsmap_epoch; int op; int caller_uid, caller_gid; inodeno_t cwd_ino; + } st; // path arguments - filepath path; - string sarg; + filepath path, path2; public: // fixed size arguments. in a union. @@ -146,9 +147,12 @@ class MClientRequest : public Message { this->st.client_inst = ci; } + void set_mdsmap_epoch(epoch_t e) { st.mdsmap_epoch = e; } + epoch_t get_mdsmap_epoch() { return st.mdsmap_epoch; } + metareqid_t get_reqid() { // FIXME: for now, assume clients always have 1 incarnation - return metareqid_t(st.client_inst.name.num(), st.tid); + return metareqid_t(st.client_inst.name, st.tid); } int get_open_file_mode() { @@ -214,18 +218,19 @@ class MClientRequest : public Message { void set_retry_attempt(int a) { st.retry_attempt = a; } void set_path(string& p) { path.set_path(p); } void set_path(const char *p) { path.set_path(p); } - void set_path(const filepath& fp) { path = fp; } + void set_filepath(const filepath& fp) { path = fp; } + void set_path2(string& p) { path2.set_path(p); } + void set_path2(const char *p) { path2.set_path(p); } + void set_filepath2(const filepath& fp) { path2 = fp; } void set_caller_uid(int u) { st.caller_uid = u; } void set_caller_gid(int g) { st.caller_gid = g; } - void set_sarg(string& arg) { this->sarg = arg; } - void set_sarg(const char *arg) { this->sarg = arg; } void set_mds_wants_replica_in_dirino(inodeno_t dirino) { st.mds_wants_replica_in_dirino = dirino; } void set_client_inst(const entity_inst_t& i) { st.client_inst = i; } const entity_inst_t& get_client_inst() { return st.client_inst; } + entity_name_t get_client() { return st.client_inst.name; } - int get_client() { return st.client_inst.name.num(); } tid_t get_tid() { return st.tid; } tid_t get_oldest_client_tid() { return st.oldest_client_tid; } int get_num_fwd() { return st.num_fwd; } @@ -233,10 +238,11 @@ class MClientRequest : public Message { int get_op() { return st.op; } int get_caller_uid() { return st.caller_uid; } int get_caller_gid() { return st.caller_gid; } - //inodeno_t get_ino() { return st.ino; } const string& get_path() { return path.get_path(); } filepath& get_filepath() { return path; } - string& get_sarg() { return sarg; } + const string& get_path2() { return path.get_path(); } + filepath& get_filepath2() { return path2; } + inodeno_t get_mds_wants_replica_in_dirino() { return st.mds_wants_replica_in_dirino; } @@ -249,19 +255,19 @@ class MClientRequest : public Message { payload.copy(off, sizeof(args), (char*)&args); off += sizeof(args); path._decode(payload, off); - ::_decode(sarg, payload, off); + path2._decode(payload, off); } void encode_payload() { payload.append((char*)&st, sizeof(st)); payload.append((char*)&args, sizeof(args)); path._encode(payload); - ::_encode(sarg, payload); + path2._encode(payload); } char *get_type_name() { return "creq"; } void print(ostream& out) { - out << "clientreq(client" << get_client() + out << "clientreq(" << get_client() << "." << get_tid() << " "; switch(get_op()) { @@ -311,10 +317,10 @@ class MClientRequest : public Message { out << "unknown=" << get_op(); assert(0); } - if (get_path().length()) - out << " " << get_path(); - if (get_sarg().length()) - out << " " << get_sarg(); + if (!get_filepath().empty()) + out << " " << get_filepath(); + if (!get_filepath2().empty()) + out << " " << get_filepath2(); if (st.retry_attempt) out << " RETRY=" << st.retry_attempt; out << ")"; diff --git a/branches/sage/mds/messages/MDirUpdate.h b/branches/sage/mds/messages/MDirUpdate.h index 0db32208efd45..87d7e4fa7389b 100644 --- a/branches/sage/mds/messages/MDirUpdate.h +++ b/branches/sage/mds/messages/MDirUpdate.h @@ -25,14 +25,14 @@ class MDirUpdate : public Message { int discover; } st; set dir_rep_by; - string path; + filepath path; public: dirfrag_t get_dirfrag() { return st.dirfrag; } int get_dir_rep() { return st.dir_rep; } set& get_dir_rep_by() { return dir_rep_by; } bool should_discover() { return st.discover > 0; } - string& get_path() { return path; } + filepath& get_path() { return path; } void tried_discover() { if (st.discover) st.discover--; @@ -42,7 +42,7 @@ class MDirUpdate : public Message { MDirUpdate(dirfrag_t dirfrag, int dir_rep, set& dir_rep_by, - string& path, + filepath& path, bool discover = false) : Message(MSG_MDS_DIRUPDATE) { this->st.dirfrag = dirfrag; @@ -52,19 +52,22 @@ class MDirUpdate : public Message { this->path = path; } virtual char *get_type_name() { return "dir_update"; } + void print(ostream& out) { + out << "dir_update(" << get_dirfrag() << ")"; + } virtual void decode_payload() { int off = 0; payload.copy(off, sizeof(st), (char*)&st); off += sizeof(st); ::_decode(dir_rep_by, payload, off); - ::_decode(path, payload, off); + path._decode(payload, off); } virtual void encode_payload() { payload.append((char*)&st, sizeof(st)); ::_encode(dir_rep_by, payload); - ::_encode(path, payload); + path._encode(payload); } }; diff --git a/branches/sage/mds/messages/MExportDirDiscover.h b/branches/sage/mds/messages/MExportDirDiscover.h index c311d1e87e940..01c61a67648c3 100644 --- a/branches/sage/mds/messages/MExportDirDiscover.h +++ b/branches/sage/mds/messages/MExportDirDiscover.h @@ -21,12 +21,12 @@ class MExportDirDiscover : public Message { dirfrag_t dirfrag; - string path; + filepath path; public: inodeno_t get_ino() { return dirfrag.ino; } dirfrag_t get_dirfrag() { return dirfrag; } - string& get_path() { return path; } + filepath& get_path() { return path; } bool started; @@ -47,12 +47,12 @@ class MExportDirDiscover : public Message { virtual void decode_payload() { bufferlist::iterator p = payload.begin(); ::_decode_simple(dirfrag, p); - ::_decode_simple(path, p); + path._decode(p); } virtual void encode_payload() { ::_encode_simple(dirfrag, payload); - ::_encode_simple(path, payload); + path._encode(payload); } }; diff --git a/branches/sage/mds/messages/MExportStrays.h b/branches/sage/mds/messages/MExportStrays.h deleted file mode 100644 index be055c0312c89..0000000000000 --- a/branches/sage/mds/messages/MExportStrays.h +++ /dev/null @@ -1,43 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2004-2006 Sage Weil - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - - -#ifndef __MEXPORTSTRAYS_H -#define __MEXPORTSTRAYS_H - -#include "msg/Message.h" - - -class MExportStrays : public Message { - public: - bufferlist state; - - MExportStrays() : - Message(MSG_MDS_EXPORTSTRAYS) {} - - virtual char *get_type_name() { return "SEx"; } - void print(ostream& o) { - o << "export_strays"; - } - - virtual void decode_payload() { - state = payload; - } - virtual void encode_payload() { - payload = state; - } - -}; - -#endif diff --git a/branches/sage/mds/messages/MMDSSlaveRequest.h b/branches/sage/mds/messages/MMDSSlaveRequest.h index 5ef65223ec1c9..a5c2339fd4cd5 100644 --- a/branches/sage/mds/messages/MMDSSlaveRequest.h +++ b/branches/sage/mds/messages/MMDSSlaveRequest.h @@ -77,8 +77,8 @@ class MMDSSlaveRequest : public Message { public: // for rename prep - string srcdnpath; - string destdnpath; + filepath srcdnpath; + filepath destdnpath; set witnesses; bufferlist inode_export; version_t inode_export_v; @@ -110,8 +110,8 @@ public: ::_encode(lock_type, payload); object_info._encode(payload); ::_encode_complex(authpins, payload); - ::_encode(srcdnpath, payload); - ::_encode(destdnpath, payload); + srcdnpath._encode(payload); + destdnpath._encode(payload); ::_encode(witnesses, payload); ::_encode(now, payload); ::_encode(inode_export, payload); @@ -126,8 +126,8 @@ public: ::_decode_simple(lock_type, p); object_info._decode(p); ::_decode_complex(authpins, p); - ::_decode_simple(srcdnpath, p); - ::_decode_simple(destdnpath, p); + srcdnpath._decode(p); + destdnpath._decode(p); ::_decode_simple(witnesses, p); ::_decode_simple(now, p); ::_decode_simple(inode_export, p); diff --git a/branches/sage/mds/msg/FakeMessenger.cc b/branches/sage/mds/msg/FakeMessenger.cc index ee80df3dc0626..77434c35e64da 100644 --- a/branches/sage/mds/msg/FakeMessenger.cc +++ b/branches/sage/mds/msg/FakeMessenger.cc @@ -291,7 +291,8 @@ FakeMessenger::FakeMessenger(entity_name_t me) : Messenger(me) lock.Unlock(); - dout(0) << "fakemessenger " << get_myname() << " messenger is " << this << " at " << _myinst << dendl; + dout(0) << "fakemessenger " << get_myname() << " messenger is " << this + << " at " << get_myaddr() << dendl; qlen = 0; @@ -341,12 +342,8 @@ int FakeMessenger::shutdown() void FakeMessenger::reset_myname(entity_name_t m) { dout(1) << "reset_myname from " << get_myname() << " to " << m << dendl; - _set_myname(m); - - directory.erase(_myinst.addr); _myinst.name = m; - directory[_myinst.addr] = this; - + // put myself in the fail queue? if (g_fake_kill_after.count(m)) { utime_t w = start_time; diff --git a/branches/sage/mds/msg/FakeMessenger.h b/branches/sage/mds/msg/FakeMessenger.h index 2284ea110b51f..57379133812a9 100644 --- a/branches/sage/mds/msg/FakeMessenger.h +++ b/branches/sage/mds/msg/FakeMessenger.h @@ -32,8 +32,6 @@ class FakeMessenger : public Messenger { int qlen; list incoming; // incoming queue - entity_inst_t _myinst; - public: bool failed; @@ -42,13 +40,6 @@ class FakeMessenger : public Messenger { virtual int shutdown(); - const entity_inst_t& get_myinst() { - return _myinst; - }; - const entity_addr_t& get_myaddr() { - return _myinst.addr; - } - void reset_myname(entity_name_t m); // msg interface diff --git a/branches/sage/mds/msg/Message.cc b/branches/sage/mds/msg/Message.cc index ebffe647c62f0..e3de6ce0677ba 100644 --- a/branches/sage/mds/msg/Message.cc +++ b/branches/sage/mds/msg/Message.cc @@ -84,6 +84,7 @@ using namespace std; #include "messages/MExportCaps.h" #include "messages/MExportCapsAck.h" + #include "messages/MDentryUnlink.h" #include "messages/MHeartbeat.h" diff --git a/branches/sage/mds/msg/Message.h b/branches/sage/mds/msg/Message.h index b570722ad1888..cb106251a425b 100644 --- a/branches/sage/mds/msg/Message.h +++ b/branches/sage/mds/msg/Message.h @@ -127,12 +127,6 @@ #define MSG_MDS_EXPORTDIRNOTIFYACK 159 #define MSG_MDS_EXPORTDIRFINISH 160 -#define MSG_MDS_EXPORTSTRAY 161 -#define MSG_MDS_EXPORTSTRAYNOTIFY 162 -#define MSG_MDS_EXPORTSTRAYNOTIFYACK 163 -#define MSG_MDS_EXPORTSTRAYACK 164 -#define MSG_MDS_EXPORTSTRAYFINISH 165 - #define MSG_MDS_EXPORTCAPS 166 #define MSG_MDS_EXPORTCAPSACK 167 diff --git a/branches/sage/mds/msg/Messenger.h b/branches/sage/mds/msg/Messenger.h index d29441a744ca0..85ac2745ad0d7 100644 --- a/branches/sage/mds/msg/Messenger.h +++ b/branches/sage/mds/msg/Messenger.h @@ -34,21 +34,23 @@ class Timer; class Messenger { private: Dispatcher *dispatcher; - entity_name_t _myname; + +protected: + entity_inst_t _myinst; public: - Messenger(entity_name_t w) : dispatcher(0), _myname(w) { } + Messenger(entity_name_t w) : dispatcher(0) { + _myinst.name = w; + } virtual ~Messenger() { } // accessors - entity_name_t get_myname() { return _myname; } - void _set_myname(entity_name_t m) { _myname = m; } - + entity_name_t get_myname() { return _myinst.name; } + const entity_addr_t& get_myaddr() { return _myinst.addr; } + const entity_inst_t& get_myinst() { return _myinst; } + + void _set_myname(entity_name_t m) { _myinst.name = m; } virtual void reset_myname(entity_name_t m) = 0; - - virtual const entity_addr_t &get_myaddr() = 0; - - entity_inst_t get_myinst() { return entity_inst_t(_myname, get_myaddr()); } // hrmpf. virtual int get_dispatch_queue_len() { return 0; }; diff --git a/branches/sage/mds/msg/SimpleMessenger.cc b/branches/sage/mds/msg/SimpleMessenger.cc index 7e29f033d83b5..51ff661ebb7ec 100644 --- a/branches/sage/mds/msg/SimpleMessenger.cc +++ b/branches/sage/mds/msg/SimpleMessenger.cc @@ -1379,13 +1379,6 @@ int Rank::EntityMessenger::send_first_message(Dispatcher *d, return 0; } - -const entity_addr_t &Rank::EntityMessenger::get_myaddr() -{ - return rank.my_addr; -} - - void Rank::EntityMessenger::reset_myname(entity_name_t newname) { rank.lock.Lock(); @@ -1402,8 +1395,6 @@ void Rank::EntityMessenger::reset_myname(entity_name_t newname) } - - void Rank::EntityMessenger::mark_down(entity_addr_t a) { rank.mark_down(a); @@ -1411,34 +1402,8 @@ void Rank::EntityMessenger::mark_down(entity_addr_t a) void Rank::mark_down(entity_addr_t addr) { - //if (my_rank == 0) return; // ugh.. rank0 already handles this stuff in the namer lock.Lock(); - /* - if (entity_map.count(a) && - entity_map[a] > inst) { - dout(10) << "mark_down " << a << " inst " << inst << " < " << entity_map[a] << dendl; - derr(10) << "mark_down " << a << " inst " << inst << " < " << entity_map[a] << dendl; - // do nothing! - } else { - if (entity_map.count(a) == 0) { - // don't know it - dout(10) << "mark_down " << a << " inst " << inst << " ... unknown by me" << dendl; - derr(10) << "mark_down " << a << " inst " << inst << " ... unknown by me" << dendl; - } else { - // know it - assert(entity_map[a] <= inst); - dout(10) << "mark_down " << a << " inst " << inst << dendl; - derr(10) << "mark_down " << a << " inst " << inst << dendl; - - entity_map.erase(a); - - if (rank_pipe.count(inst)) { - rank_pipe[inst]->close(); - rank_pipe.erase(inst); - } - } - } - */ + // FIXME lock.Unlock(); } diff --git a/branches/sage/mds/msg/SimpleMessenger.h b/branches/sage/mds/msg/SimpleMessenger.h index 6bd417adc8e10..ae888a8f461b6 100644 --- a/branches/sage/mds/msg/SimpleMessenger.h +++ b/branches/sage/mds/msg/SimpleMessenger.h @@ -197,8 +197,8 @@ private: } public: - EntityMessenger(entity_name_t myaddr) : - Messenger(myaddr), + EntityMessenger(entity_name_t name) : + Messenger(name), stop(false), qlen(0), pqlen(0), dispatch_thread(this) { } @@ -215,8 +215,6 @@ private: dispatch_thread.join(); } - const entity_addr_t &get_myaddr(); - int get_dispatch_queue_len() { return qlen + pqlen; } void reset_myname(entity_name_t m); diff --git a/branches/sage/mds/msg/msg_types.h b/branches/sage/mds/msg/msg_types.h index 52b1e69c8886c..d5ce108583564 100644 --- a/branches/sage/mds/msg/msg_types.h +++ b/branches/sage/mds/msg/msg_types.h @@ -33,7 +33,7 @@ public: // cons entity_name_t() { v.type = v.num = 0; } - entity_name_t(int t, int n=NEW) { v.type = t; v.num = n; } + entity_name_t(int t, int n) { v.type = t; v.num = n; } // static cons static entity_name_t MON(int i=NEW) { return entity_name_t(TYPE_MON, i); } -- 2.39.5