From af52f93fbe315c1e043888ac6b97f5a25afbd47e Mon Sep 17 00:00:00 2001 From: sageweil Date: Fri, 27 Jul 2007 23:38:34 +0000 Subject: [PATCH] rewrote client readdir, cleaned up readdir handling on server etc. git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1564 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/mds/TODO | 5 - branches/sage/mds/client/Client.cc | 262 +++++++++++--------- branches/sage/mds/client/Client.h | 63 +++-- branches/sage/mds/client/SyntheticClient.cc | 40 +-- branches/sage/mds/client/SyntheticClient.h | 4 +- branches/sage/mds/client/fuse.cc | 87 ++++--- branches/sage/mds/fakefuse.cc | 4 + branches/sage/mds/include/frag.h | 12 + branches/sage/mds/include/types.h | 44 ++-- branches/sage/mds/mds/Server.cc | 39 +-- branches/sage/mds/mds/Server.h | 4 +- branches/sage/mds/messages/MClientReply.h | 42 ++-- 12 files changed, 372 insertions(+), 234 deletions(-) diff --git a/branches/sage/mds/TODO b/branches/sage/mds/TODO index d5c242c9a9cee..f91c4885e4348 100644 --- a/branches/sage/mds/TODO +++ b/branches/sage/mds/TODO @@ -80,11 +80,6 @@ sage mds / - via subtree_merge_at -- journal epoch, or something similar - - reduce size of EMetaBlob by skipping context when inode was already journaled since the last - SubtreeMap - - - hmm, should we move ESubtreeMap out of the journal? that would avoid all the icky weirdness in shutdown, with periodic logging, etc. diff --git a/branches/sage/mds/client/Client.cc b/branches/sage/mds/client/Client.cc index 5c28abf590610..a4531a186c4e5 100644 --- a/branches/sage/mds/client/Client.cc +++ b/branches/sage/mds/client/Client.cc @@ -319,14 +319,14 @@ Inode* Client::insert_inode(Dir *dir, InodeStat *st, const string& dname) dout(12) << " new dentry+node with ino " << st->inode.ino << endl; } else { // actually update info - dout(12) << " stat inode mask is " << st->inode.mask << endl; + dout(12) << " stat inode mask is " << st->mask << endl; dn->inode->inode = st->inode; // ...but don't clobber our mtime, size! - if ((dn->inode->inode.mask & INODE_MASK_SIZE) == 0 && + if ((dn->inode->mask & INODE_MASK_SIZE) == 0 && dn->inode->file_wr_size > dn->inode->inode.size) dn->inode->inode.size = dn->inode->file_wr_size; - if ((dn->inode->inode.mask & INODE_MASK_MTIME) == 0 && + if ((dn->inode->mask & INODE_MASK_MTIME) == 0 && dn->inode->file_wr_mtime > dn->inode->inode.mtime) dn->inode->inode.mtime = dn->inode->file_wr_mtime; } @@ -1746,7 +1746,7 @@ int Client::_lstat(const char *path, int mask, Inode **in) utime_t now = g_clock.real_now(); if (dn && now <= dn->inode->valid_until && - ((dn->inode->inode.mask & INODE_MASK_ALL_STAT) == INODE_MASK_ALL_STAT)) { + ((dn->inode->mask & INODE_MASK_ALL_STAT) == INODE_MASK_ALL_STAT)) { inode = dn->inode->inode; dout(10) << "lstat cache hit w/ sufficient inode.mask, valid until " << dn->inode->valid_until << endl; @@ -1785,40 +1785,23 @@ int Client::_lstat(const char *path, int mask, Inode **in) } -void Client::fill_stat(inode_t& inode, struct stat *st) +int Client::fill_stat(Inode *in, struct stat *st) { memset(st, 0, sizeof(struct stat)); - st->st_ino = inode.ino; - st->st_mode = inode.mode; - st->st_nlink = inode.nlink; - st->st_uid = inode.uid; - st->st_gid = inode.gid; - st->st_ctime = MAX(inode.ctime, inode.mtime); - st->st_atime = inode.atime; - st->st_mtime = inode.mtime; - st->st_size = inode.size; - st->st_blocks = inode.size ? ((inode.size - 1) / 4096 + 1):0; + st->st_ino = in->inode.ino; + st->st_mode = in->inode.mode; + st->st_nlink = in->inode.nlink; + st->st_uid = in->inode.uid; + st->st_gid = in->inode.gid; + st->st_ctime = MAX(in->inode.ctime, in->inode.mtime); + st->st_atime = in->inode.atime; + st->st_mtime = in->inode.mtime; + st->st_size = in->inode.size; + st->st_blocks = in->inode.size ? ((in->inode.size - 1) / 4096 + 1):0; st->st_blksize = 4096; + return in->mask; } -void Client::fill_statlite(inode_t& inode, struct statlite *st) -{ - memset(st, 0, sizeof(struct stat)); - st->st_ino = inode.ino; - st->st_mode = inode.mode; - st->st_nlink = inode.nlink; - st->st_uid = inode.uid; - st->st_gid = inode.gid; -#ifndef DARWIN - // FIXME what's going on here with darwin? - st->st_ctime = MAX(inode.ctime, inode.mtime); - st->st_atime = inode.atime; - st->st_mtime = inode.mtime; -#endif - st->st_size = inode.size; - st->st_blocks = inode.size ? ((inode.size - 1) / 4096 + 1):0; - st->st_blksize = 4096; - /* S_REQUIREBLKSIZE(st->st_litemask); if (inode.mask & INODE_MASK_BASE) S_REQUIRECTIME(st->st_litemask); @@ -1829,7 +1812,6 @@ void Client::fill_statlite(inode_t& inode, struct statlite *st) if (inode.mask & INODE_MASK_MTIME) S_REQUIREMTIME(st->st_litemask); if (inode.mask & INODE_MASK_ATIME) S_REQUIREATIME(st->st_litemask); */ -} int Client::lstat(const char *relpath, struct stat *stbuf) @@ -1849,7 +1831,7 @@ int Client::lstat(const char *relpath, struct stat *stbuf) int res = _lstat(path, INODE_MASK_ALL_STAT, &in); if (res == 0) { assert(in); - fill_stat(in->inode,stbuf); + fill_stat(in, stbuf); dout(10) << "stat sez size = " << in->inode.size << " mode = " << oct << stbuf->st_mode << dec << " ino = " << stbuf->st_ino << endl; } @@ -1859,6 +1841,7 @@ int Client::lstat(const char *relpath, struct stat *stbuf) } +/* int Client::lstatlite(const char *relpath, struct statlite *stl) { client_lock.Lock(); @@ -1891,7 +1874,7 @@ int Client::lstatlite(const char *relpath, struct statlite *stl) client_lock.Unlock(); return res; } - +*/ int Client::chmod(const char *relpath, mode_t mode) @@ -2046,88 +2029,112 @@ int Client::mknod(const char *relpath, mode_t mode) -//readdir usually include inode info for each entry except of locked entries +int Client::getdir(const char *relpath, list& contents) +{ + DIR *d; + int r = opendir(relpath, &d); + if (r < 0) return r; + + struct dirent de; + int n = 0; + while (readdir_r(d, &de) == 0) { + contents.push_back(de.d_name); + n++; + } + closedir(d); + + return n; +} + -// -// getdir -// fyi: typedef int (*dirfillerfunc_t) (void *handle, const char *name, int type, inodeno_t ino); +/** POSIX stubs **/ -int Client::getdir(const char *relpath, map& contents) +int Client::opendir(const char *name, DIR **dirpp) { - client_lock.Lock(); + *((DirResult**)dirpp) = new DirResult(name); + return 0; +} - string abspath; - mkabspath(relpath, abspath); - const char *path = abspath.c_str(); +bool Client::_readdir_have_next(DirResult *dirp) +{ + return dirp->buffer.count(dirp->frag()); +} - dout(3) << "op: client->getdir(\"" << path << "\", dir_contents);" << endl; - tout << "getdir" << endl; - tout << path << endl; +void Client::_readdir_add_dirent(DirResult *dirp, const string& name, Inode *in) +{ + frag_t fg = dirp->frag(); + struct stat st; + int stmask; + stmask = fill_stat(in, &st); + dirp->buffer[fg].push_back(DirEntry(name, st, stmask)); +} +void Client::_readdir_get_next(DirResult *dirp) +{ + // get the current frag. + frag_t fg = dirp->frag(); + assert(dirp->buffer.count(fg) == 0); + + client_lock.Lock(); MClientRequest *req = new MClientRequest(MDS_OP_READDIR, messenger->get_myinst()); - req->set_path(path); + req->set_path(dirp->path); + req->args.readdir.frag = fg; // FIXME where does FUSE maintain user information req->set_caller_uid(getuid()); req->set_caller_gid(getgid()); - - //FIXME enforce caller uid rights? - + MClientReply *reply = make_request(req); int res = reply->get_result(); insert_trace(reply); - + if (res == 0) { - - // dir contents to cache! + // stuff dir contents to cache, DirResult inodeno_t ino = reply->get_ino(); Inode *diri = inode_map[ ino ]; assert(diri); assert(diri->inode.mode & INODE_MODE_DIR); - // add . and ..? - string dot("."); - contents[dot] = diri->inode; - if (diri != root) { + if (fg.is_leftmost()) { + // add . and ..? + string dot("."); string dotdot(".."); - contents[dotdot] = diri->dn->dir->parent_inode->inode; + _readdir_add_dirent(dirp, dot, diri); + if (diri->dn) + _readdir_add_dirent(dirp, dotdot, diri->dn->dir->parent_inode); } - + // the rest? - if (!reply->get_dir_in().empty()) { + if (!reply->get_dir_dn().empty()) { // only open dir if we're actually adding stuff to it! Dir *dir = diri->open_dir(); assert(dir); utime_t now = g_clock.real_now(); - list::const_iterator pdn = reply->get_dir_dn().begin(); - for (list::const_iterator pin = reply->get_dir_in().begin(); - pin != reply->get_dir_in().end(); - ++pin, ++pdn) { - // ignore . - if (*pdn == ".") - continue; - + list::const_iterator pin = reply->get_dir_in().begin(); + for (list::const_iterator pdn = reply->get_dir_dn().begin(); + pdn != reply->get_dir_dn().end(); + ++pdn, ++pin) { // count entries res++; - - // put in cache - Inode *in = this->insert_inode(dir, *pin, *pdn); - - if (g_conf.client_cache_stat_ttl) { - in->valid_until = now; + + // put in cache + Inode *in = this->insert_inode(dir, *pin, *pdn); + + if (g_conf.client_cache_stat_ttl) { + in->valid_until = now; in->valid_until += g_conf.client_cache_stat_ttl; } - else if (g_conf.client_cache_readdir_ttl) { - in->valid_until = now; + else if (g_conf.client_cache_readdir_ttl) { + in->valid_until = now; in->valid_until += g_conf.client_cache_readdir_ttl; } - - // contents to caller too! + + // contents to caller too! dout(15) << "getdir including " << *pdn << " to " << in->inode.ino << endl; - contents[*pdn] = in->inode; + _readdir_add_dirent(dirp, *pdn, in); } if (dir->is_empty()) close_dir(dir); @@ -2135,30 +2142,53 @@ int Client::getdir(const char *relpath, map& contents) // FIXME: remove items in cache that weren't in my readdir? // *** + } else { + dirp->set_end(); } - delete reply; //fix thing above first + delete reply; client_lock.Unlock(); - return res; } - -/** POSIX stubs **/ - -DIR *Client::opendir(const char *name) +void Client::_readdir_advance_frag(DirResult *dirp) { - DirResult *d = new DirResult; - d->size = getdir(name, d->contents); - d->p = d->contents.begin(); - d->off = 0; - return (DIR*)d; + frag_t fg = dirp->frag(); + dirp->buffer.erase(fg); + dirp->next_frag(); } -int Client::closedir(DIR *dir) -{ - DirResult *d = (DirResult*)dir; - delete d; +int Client::readdir_r(DIR *d, struct dirent *de) +{ + return readdirplus_r(d, de, 0, 0); +} + +int Client::readdirplus_r(DIR *d, struct dirent *de, struct stat *st, int *stmask) +{ + DirResult *dirp = (DirResult*)d; + + // do i have this frag? + if (!_readdir_have_next(dirp)) + _readdir_get_next(dirp); + + if (dirp->is_end()) + return -1; // end of directory + + frag_t fg = dirp->frag(); + uint32_t pos = dirp->fragpos(); + + assert(dirp->buffer.count(fg)); + + vector &ent = dirp->buffer[fg]; + assert(pos < ent.size()); + _readdir_fill_dirent(de, &ent[pos], dirp->offset); + if (st) *st = ent[pos].st; + if (stmask) *stmask = ent[pos].stmask; + pos++; + dirp->offset++; + if (pos == ent.size()) + _readdir_advance_frag(dirp); + return 0; } @@ -2169,8 +2199,24 @@ int Client::closedir(DIR *dir) // unsigned char d_type; /* type of file */ // char d_name[256]; /* filename */ //}; +void Client::_readdir_fill_dirent(struct dirent *de, DirEntry *entry, off_t off) +{ + de->d_ino = entry->st.st_ino; + de->d_off = off + 1; + de->d_reclen = 1; + de->d_type = MODE_TO_DT(entry->st.st_mode); + strncpy(de->d_name, entry->d_name.c_str(), 256); +} + +int Client::closedir(DIR *dir) +{ + DirResult *d = (DirResult*)dir; + delete d; + return 0; +} + -struct dirent *Client::readdir(DIR *dirp) +/*struct dirent *Client::readdir(DIR *dirp) { DirResult *d = (DirResult*)dirp; @@ -2204,35 +2250,29 @@ struct dirent *Client::readdir(DIR *dirp) return &d->dp.d_dirent; } +*/ void Client::rewinddir(DIR *dirp) { DirResult *d = (DirResult*)dirp; - d->p = d->contents.begin(); - d->off = 0; + d->offset = 0; + d->buffer.clear(); } off_t Client::telldir(DIR *dirp) { DirResult *d = (DirResult*)dirp; - return d->off; + return d->offset; } void Client::seekdir(DIR *dirp, off_t offset) { DirResult *d = (DirResult*)dirp; - - d->p = d->contents.begin(); - d->off = 0; - - if (offset >= d->size) offset = d->size-1; - while (offset > 0) { - ++d->p; - ++d->off; - --offset; - } + d->offset = offset; } + +/* struct dirent_plus *Client::readdirplus(DIR *dirp) { DirResult *d = (DirResult*)dirp; @@ -2280,7 +2320,7 @@ struct dirent_plus *Client::readdirplus(DIR *dirp) return &d->dp; } - +*/ /* struct dirent_lite *Client::readdirlite(DIR *dirp) { diff --git a/branches/sage/mds/client/Client.h b/branches/sage/mds/client/Client.h index 457efca53ef2d..96c427a494c8c 100644 --- a/branches/sage/mds/client/Client.h +++ b/branches/sage/mds/client/Client.h @@ -124,6 +124,7 @@ class Inode { public: inode_t inode; // the actual inode utime_t valid_until; + int mask; // about the dir (if this is one!) int dir_auth; @@ -319,15 +320,36 @@ class Client : public Dispatcher { public: /* getdir result */ + struct DirEntry { + string d_name; + struct stat st; + int stmask; + DirEntry(const string &s) : d_name(s), stmask(0) {} + DirEntry(const string &n, struct stat& s, int stm) : d_name(n), st(s), stmask(stm) {} + }; + struct DirResult { + static const int SHIFT = 28; + static const int MASK = 0xfffffff; + static const off_t END = 1ULL << (SHIFT + 32); + string path; - map contents; - map::iterator p; - int off; - int size; - struct dirent_plus dp; - struct dirent_lite dl; - DirResult() : p(contents.end()), off(-1), size(0) {} + off_t offset; // high bits: frag_t, low bits: an offset + map > buffer; + DirResult(const char *p) : path(p), offset(0) { } + + frag_t frag() { return frag_t(offset >> SHIFT); } + unsigned fragpos() { return offset & MASK; } + + void next_frag() { + frag_t fg = offset >> SHIFT; + if (fg.is_rightmost()) + set_end(); + else + offset = fg.next() << SHIFT; + } + void set_end() { offset = END; } + bool is_end() { return (offset == END); } }; @@ -547,8 +569,7 @@ protected: // find dentry based on filepath Dentry *lookup(filepath& path); - void fill_stat(inode_t& inode, struct stat *st); - void fill_statlite(inode_t& inode, struct statlite *st); + int fill_stat(Inode *in, struct stat *st); // friends @@ -615,15 +636,21 @@ public: const string getcwd() { return cwd; } // namespace ops - int getdir(const char *path, list& contents); - int getdir(const char *path, map& contents); - - DIR *opendir(const char *name); - int closedir(DIR *dir); - struct dirent *readdir(DIR *dir); - void rewinddir(DIR *dir); - off_t telldir(DIR *dir); - void seekdir(DIR *dir, off_t offset); + int getdir(const char *relpath, list& names); // get the whole dir at once. + + bool _readdir_have_next(DirResult *dirp); + void _readdir_add_dirent(DirResult *dirp, const string& name, Inode *in); + void _readdir_get_next(DirResult *dirp); + void _readdir_advance_frag(DirResult *dirp); + void _readdir_fill_dirent(struct dirent *de, DirEntry *entry, off_t); + + int opendir(const char *name, DIR **dirpp); + int closedir(DIR *dirp); + int readdir_r(DIR *dirp, struct dirent *de); + int readdirplus_r(DIR *dirp, struct dirent *de, struct stat *st, int *stmask); + void rewinddir(DIR *dirp); + off_t telldir(DIR *dirp); + void seekdir(DIR *dirp, off_t offset); struct dirent_plus *readdirplus(DIR *dirp); int readdirplus_r(DIR *dirp, struct dirent_plus *entry, struct dirent_plus **result); diff --git a/branches/sage/mds/client/SyntheticClient.cc b/branches/sage/mds/client/SyntheticClient.cc index 63df511183748..ddcea8199fc13 100644 --- a/branches/sage/mds/client/SyntheticClient.cc +++ b/branches/sage/mds/client/SyntheticClient.cc @@ -747,7 +747,7 @@ int SyntheticClient::play_trace(Trace& t, string& prefix) client->mknod(a, b); } else if (strcmp(op, "getdir") == 0) { const char *a = t.get_string(p); - map contents; + list contents; client->getdir(a, contents); } else if (strcmp(op, "open") == 0) { const char *a = t.get_string(p); @@ -808,19 +808,19 @@ int SyntheticClient::play_trace(Trace& t, string& prefix) int SyntheticClient::clean_dir(string& basedir) { // read dir - map contents; + list contents; int r = client->getdir(basedir.c_str(), contents); if (r < 0) { dout(1) << "readdir on " << basedir << " returns " << r << endl; return r; } - for (map::iterator it = contents.begin(); + for (list::iterator it = contents.begin(); it != contents.end(); it++) { - if (it->first == ".") continue; - if (it->first == "..") continue; - string file = basedir + "/" + it->first; + if (*it == ".") continue; + if (*it == "..") continue; + string file = basedir + "/" + *it; if (time_to_stop()) break; @@ -856,19 +856,20 @@ int SyntheticClient::full_walk(string& basedir) dirq.pop_front(); // read dir - map contents; + list contents; int r = client->getdir(dir.c_str(), contents); if (r < 0) { dout(1) << "readdir on " << dir << " returns " << r << endl; continue; } - for (map::iterator it = contents.begin(); + for (list::iterator it = contents.begin(); it != contents.end(); it++) { - if (it->first == ".") continue; - if (it->first == "..") continue; - string file = dir + "/" + it->first; + if (*it == "." || + *it == "..") + continue; + string file = dir + "/" + *it; struct stat st; int r = client->lstat(file.c_str(), &st); @@ -975,7 +976,7 @@ int SyntheticClient::read_dirs(const char *basedir, int dirs, int files, int dep char d[500]; dout(3) << "read_dirs " << basedir << " dirs " << dirs << " files " << files << " depth " << depth << endl; - map contents; + list contents; utime_t s = g_clock.now(); int r = client->getdir(basedir, contents); utime_t e = g_clock.now(); @@ -1629,16 +1630,19 @@ int SyntheticClient::random_walk(int num_req) if (op == MDS_OP_READDIR) { clear_dir(); - map c; + list c; r = client->getdir( cwd.c_str(), c ); - for (map::iterator it = c.begin(); + for (list::iterator it = c.begin(); it != c.end(); it++) { - //dout(DBL) << " got " << it->first << endl; - contents[it->first] = it->second; - if (it->second.is_dir()) - subdirs.insert(it->first); + //dout(DBL) << " got " << *it << endl; + assert(0); + /*contents[*it] = it->second; + if (it->second && + S_ISDIR(it->second->st_mode)) + subdirs.insert(*it); + */ } did_readdir = true; diff --git a/branches/sage/mds/client/SyntheticClient.h b/branches/sage/mds/client/SyntheticClient.h index dc1cf58121d26..c66393f67e203 100644 --- a/branches/sage/mds/client/SyntheticClient.h +++ b/branches/sage/mds/client/SyntheticClient.h @@ -80,7 +80,7 @@ class SyntheticClient { filepath cwd; - map contents; + map contents; set subdirs; bool did_readdir; set open_files; @@ -120,7 +120,7 @@ class SyntheticClient { r += cwd.last_dentry().c_str()[0]; // slightly permuted r %= contents.size(); - map::iterator it = contents.begin(); + map::iterator it = contents.begin(); while (r--) it++; n2 = cwd; diff --git a/branches/sage/mds/client/fuse.cc b/branches/sage/mds/client/fuse.cc index 855a3eb4a6766..5f4166dc541ca 100644 --- a/branches/sage/mds/client/fuse.cc +++ b/branches/sage/mds/client/fuse.cc @@ -28,7 +28,7 @@ #define _XOPEN_SOURCE 500 #endif -#define FUSE_USE_VERSION 25 +#define FUSE_USE_VERSION 26 #include #include @@ -76,28 +76,6 @@ static int ceph_readlink(const char *path, char *buf, size_t size) return 0; } - -static int ceph_getdir(const char *path, fuse_dirh_t h, fuse_dirfil_t filler) -{ - map contents; - - int res = client->getdir(path, contents); - if (res < 0) return res; - - // return contents to fuse via callback - for (map::iterator it = contents.begin(); - it != contents.end(); - it++) { - // (immutable) inode contents too. - res = filler(h, // fuse's handle - it->first.c_str(), // dentry as char* - it->second.mode & INODE_TYPE_MASK, // mask type bits from mode - it->second.ino); // ino.. 64->32 bit issue here? FIXME - if (res != 0) break; // fuse has had enough - } - return res; -} - static int ceph_mknod(const char *path, mode_t mode, dev_t rdev) { return client->mknod(path, mode); @@ -154,6 +132,9 @@ static int ceph_utime(const char *path, struct utimbuf *buf) } +// ------------------ +// file i/o + static int ceph_open(const char *path, struct fuse_file_info *fi) { int res; @@ -185,14 +166,11 @@ static int ceph_flush(const char *path, struct fuse_file_info *fi) return 0; } - static int ceph_statfs(const char *path, struct statvfs *stbuf) { return client->statfs(path, stbuf); } - - static int ceph_release(const char *path, struct fuse_file_info *fi) { fh_t fh = fi->fh; @@ -208,10 +186,54 @@ static int ceph_fsync(const char *path, int isdatasync, } +// --------------------- +// directory i/o + +static int ceph_opendir(const char *path, struct fuse_file_info *fi) +{ + DIR *dirp; + int r = client->opendir(path, &dirp); + if (r < 0) return r; + fi->fh = (uint64_t)(void*)dirp; + return 0; +} + +static int ceph_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t off, fuse_file_info *fi) +{ + DIR *dirp = (DIR*)fi->fh; + + client->seekdir(dirp, off); + + int res = 0; + struct dirent de; + struct stat st; + int stmask = 0; + while (res == 0) { + int r = client->readdirplus_r(dirp, &de, &st, &stmask); + if (r != 0) break; + res = filler(buf, + de.d_name, + &st, + client->telldir(dirp)); + } + return res; +} + +static int ceph_releasedir(const char *path, struct fuse_file_info *fi) +{ + DIR *dirp = (DIR*)fi->fh; + int r = client->closedir(dirp); // close the file + return r; +} + + + + + static struct fuse_operations ceph_oper = { getattr: ceph_getattr, readlink: ceph_readlink, - getdir: ceph_getdir, + getdir: 0, mknod: ceph_mknod, mkdir: ceph_mkdir, unlink: ceph_unlink, @@ -229,7 +251,14 @@ static struct fuse_operations ceph_oper = { statfs: ceph_statfs, flush: ceph_flush, release: ceph_release, - fsync: ceph_fsync + fsync: ceph_fsync, + setxattr: 0, + getxattr: 0, + listxattr: 0, + removexattr: 0, + opendir: ceph_opendir, + readdir: ceph_readdir, + releasedir: ceph_releasedir }; @@ -276,6 +305,6 @@ int ceph_fuse_main(Client *c, int argc, char *argv[]) // go fuse go cout << "ok, calling fuse_main" << endl; - int r = fuse_main(newargc, newargv, &ceph_oper); + int r = fuse_main(newargc, newargv, &ceph_oper, 0); return r; } diff --git a/branches/sage/mds/fakefuse.cc b/branches/sage/mds/fakefuse.cc index 66e5d550c1543..60dff87a8567c 100644 --- a/branches/sage/mds/fakefuse.cc +++ b/branches/sage/mds/fakefuse.cc @@ -59,6 +59,10 @@ public: int main(int argc, char **argv) { cerr << "fakefuse starting" << endl; + // stop on our own (by default) + g_conf.mon_stop_on_last_unmount = true; + g_conf.mon_stop_with_last_mds = true; + vector args; argv_to_vec(argc, argv, args); parse_config_options(args); diff --git a/branches/sage/mds/include/frag.h b/branches/sage/mds/include/frag.h index 6e64646eb691f..bdaf9f2b10a81 100644 --- a/branches/sage/mds/include/frag.h +++ b/branches/sage/mds/include/frag.h @@ -132,6 +132,18 @@ class frag_t { frag_t right_child() const { return frag_t(value() | (1<& inls, - list& dnls) + list& dnls, + list& inls) { int numfiles = 0; @@ -1463,15 +1463,26 @@ int Server::encode_dir_contents(CDir *dir, if (dn->is_null()) continue; CInode *in = dn->inode; - if (!in) - continue; // hmm, fixme!, what about REMOTE links? - - dout(12) << "including inode " << *in << endl; + InodeStat *st; + if (in) { + dout(12) << "including inode " << *in << endl; + + // add this item + // note: InodeStat makes note of whether inode data is readable. + st = new InodeStat(in, mds->get_nodeid()); + } else { + assert(dn->is_remote()); + dout(12) << "including inode-less (remote) dentry " << *dn << endl; + st = new InodeStat; + st->mask = 0; + memset(&st->inode, 0, sizeof(st->inode)); + st->inode.ino = dn->get_remote_ino(); + st->inode.mode = DT_TO_MODE(dn->get_remote_d_type()); + st->mask = InodeStat::MASK_INO | InodeStat::MASK_TYPE; + } - // add this item - // note: InodeStat makes note of whether inode data is readable. dnls.push_back( it->first ); - inls.push_back( new InodeStat(in, mds->get_nodeid()) ); + inls.push_back(st); numfiles++; } return numfiles; @@ -1525,7 +1536,7 @@ void Server::handle_client_readdir(MDRequest *mdr) // build dir contents list inls; list dnls; - int numfiles = encode_dir_contents(dir, inls, dnls); + int numfiles = encode_dir_contents(dir, dnls, inls); // . too //dnls.push_back("."); @@ -1534,7 +1545,7 @@ void Server::handle_client_readdir(MDRequest *mdr) // yay, reply MClientReply *reply = new MClientReply(req); - reply->take_dir_items(inls, dnls, numfiles); + reply->take_dir_items(dnls, inls, numfiles); dout(10) << "reply to " << *req << " readdir " << numfiles << " files" << endl; reply->set_result(fg); @@ -1858,7 +1869,7 @@ void Server::_link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti, dout(10) << "_link_local_finish " << *dn << " to " << *targeti << endl; // link and unlock the NEW dentry - dn->dir->link_remote_inode(dn, targeti->ino(), targeti->inode.get_d_type()); + dn->dir->link_remote_inode(dn, targeti->ino(), MODE_TO_DT(targeti->inode.mode)); dn->mark_dirty(dnpv); // target inode @@ -1941,7 +1952,7 @@ void Server::_link_remote_finish(MDRequest *mdr, CDentry *dn, CInode *targeti, dout(10) << "_link_remote_finish " << *dn << " to " << *targeti << endl; // link the new dentry - dn->dir->link_remote_inode(dn, targeti->ino(), targeti->inode.get_d_type()); + dn->dir->link_remote_inode(dn, targeti->ino(), MODE_TO_DT(targeti->inode.mode)); dn->mark_dirty(dpv); // dir inode's mtime @@ -3052,7 +3063,7 @@ void Server::_rename_apply(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDen if (srcdn->is_remote()) { // srcdn was remote. srcdn->dir->unlink_inode(srcdn); - destdn->dir->link_remote_inode(destdn, in->ino(), in->inode.get_d_type()); + destdn->dir->link_remote_inode(destdn, in->ino(), MODE_TO_DT(in->inode.mode)); if (destdn->is_auth()) destdn->mark_dirty(mdr->pvmap[destdn]); } else { diff --git a/branches/sage/mds/mds/Server.h b/branches/sage/mds/mds/Server.h index 2a32c1b41b968..c38e7589bd28d 100644 --- a/branches/sage/mds/mds/Server.h +++ b/branches/sage/mds/mds/Server.h @@ -93,7 +93,9 @@ public: void handle_client_chmod(MDRequest *mdr); void handle_client_chown(MDRequest *mdr); void handle_client_readdir(MDRequest *mdr); - int encode_dir_contents(CDir *dir, list& inls, list& dnls); + int encode_dir_contents(CDir *dir, + list& dnls, + list& inls); void handle_client_truncate(MDRequest *mdr); void handle_client_fsync(MDRequest *mdr); diff --git a/branches/sage/mds/messages/MClientReply.h b/branches/sage/mds/messages/MClientReply.h index e88c31ca47400..eff1636ce0e18 100644 --- a/branches/sage/mds/messages/MClientReply.h +++ b/branches/sage/mds/messages/MClientReply.h @@ -51,11 +51,19 @@ class CInode; */ class InodeStat { - public: + const static int MASK_INO = 1; + const static int MASK_TYPE = 2; + const static int MASK_BASE = 4; + const static int MASK_AUTH = 8; + const static int MASK_LINK = 16; + const static int MASK_FILE = 32; + const static int MASK_ALL = 0xffff; + inode_t inode; string symlink; // symlink content (if symlink) fragtree_t dirfragtree; + uint32_t mask; // mds distribution hints map dirfrag_auth; @@ -68,10 +76,9 @@ class InodeStat { inode(in->inode) { // inode.mask - inode.mask = INODE_MASK_BASE; - if (in->authlock.can_rdlock(0)) inode.mask |= INODE_MASK_AUTH; - if (in->linklock.can_rdlock(0)) inode.mask |= INODE_MASK_LINK; - if (in->filelock.can_rdlock(0)) inode.mask |= INODE_MASK_FILE; + if (!in->authlock.can_rdlock(0)) mask &= ~MASK_AUTH; + if (!in->linklock.can_rdlock(0)) mask &= ~MASK_LINK; + if (!in->filelock.can_rdlock(0)) mask &= ~MASK_FILE; // symlink content? if (in->is_symlink()) @@ -96,6 +103,7 @@ class InodeStat { } void _encode(bufferlist &bl) { + ::_encode(mask, bl); ::_encode(inode, bl); ::_encode(dirfrag_auth, bl); ::_encode(dirfrag_dist, bl); @@ -105,6 +113,7 @@ class InodeStat { } void _decode(bufferlist &bl, int& off) { + ::_decode(mask, bl, off); ::_decode(inode, bl, off); ::_decode(dirfrag_auth, bl, off); ::_decode(dirfrag_dist, bl, off); @@ -133,8 +142,8 @@ class MClientReply : public Message { list trace_in; list trace_dn; + list dir_dn; list dir_in; - list dir_dn; public: long get_tid() { return st.tid; } @@ -210,13 +219,12 @@ class MClientReply : public Message { trace_in.push_back(ci); } + // dir contents + ::_decode(dir_dn, payload, off); for (int i=0; i_decode(payload, off); dir_in.push_back(ci); - string dn; - ::_decode(dn, payload, off); - dir_dn.push_back(dn); } } virtual void encode_payload() { @@ -237,13 +245,11 @@ class MClientReply : public Message { } // dir contents - pdn = dir_dn.begin(); + ::_encode(dir_dn, payload); for (pin = dir_in.begin(); pin != dir_in.end(); - ++pin, ++pdn) { + ++pin) (*pin)->_encode(payload); - ::_encode(*pdn, payload); - } } // builders @@ -253,13 +259,14 @@ class MClientReply : public Message { dir_in.push_back(in); ++st._dir_size; }*/ - void take_dir_items(list& inls, - list& dnls, - int num) { - dir_in.swap(inls); + void take_dir_items(list& dnls, + list& inls, + int num) { dir_dn.swap(dnls); + dir_in.swap(inls); st._dir_size = num; } + /* void copy_dir_items(const list& inls, const list& dnls) { list::const_iterator pdn = dnls.begin(); @@ -275,6 +282,7 @@ class MClientReply : public Message { ++st._dir_size; } } + */ void set_trace_dist(CInode *in, int whoami) { st._num_trace_in = 0; -- 2.39.5