From 9bbc4720a51b51d4ecd616ddc82e953b8460311b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 14 Jul 2008 13:22:43 -0700 Subject: [PATCH] mds: .snap mostly working --- src/client/Client.cc | 88 +++++++++++++++++++++--------- src/client/Client.h | 6 +-- src/include/nstring.h | 4 ++ src/include/types.h | 2 + src/mds/CDir.cc | 15 +++++- src/mds/CDir.h | 10 +--- src/mds/CInode.cc | 4 +- src/mds/MDCache.cc | 26 ++++++--- src/mds/MDCache.h | 10 ++-- src/mds/Migrator.cc | 4 +- src/mds/Server.cc | 121 +++++++++++++++++++++++++++--------------- src/mds/Server.h | 5 +- src/mds/mdstypes.h | 29 ++++++---- 13 files changed, 217 insertions(+), 107 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index dd6371c77ea9a..a1a644de28f18 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -85,6 +85,7 @@ ostream& operator<<(ostream &out, Inode &in) << " cap_refs=" << in.cap_refs << " open=" << in.open_by_mode << " ref=" << in.ref + << " parent=" << in.dn << ")"; return out; } @@ -540,13 +541,16 @@ Inode* Client::insert_trace(MClientReply *reply, utime_t from) } __u16 numi, numd; + __s16 snapdirpos; ::decode(numi, p); ::decode(numd, p); - dout(10) << "insert_trace got " << numi << " inodes, " << numd << " dentries" << dendl; + ::decode(snapdirpos, p); + dout(10) << "insert_trace got " << numi << " inodes, " << numd << " dentries, snapdir at " << snapdirpos << dendl; + int icount = 0; // decode - LeaseStat ilease[numi]; - InodeStat ist[numi]; + LeaseStat ilease[numi], snapdirlease; + InodeStat ist[numi], snapdirst; DirStat dst[numd]; string dname[numd]; LeaseStat dlease[numd]; @@ -561,7 +565,11 @@ Inode* Client::insert_trace(MClientReply *reply, utime_t from) inode: if (!ileft) goto done; - ileft--; + ileft--; icount++; + if (icount == snapdirpos) { + snapdirst.decode(p); + ::decode(snapdirlease, p); + } ist[ileft].decode(p); ::decode(ilease[ileft], p); @@ -590,6 +598,7 @@ Inode* Client::insert_trace(MClientReply *reply, utime_t from) curi = inode_map[vino]; } update_inode(curi, &ist[0], &ilease[0], from); + dout(10) << " (base) curi " << *curi << dendl; for (unsigned i=0; iopen_dir(); @@ -603,6 +612,20 @@ Inode* Client::insert_trace(MClientReply *reply, utime_t from) } curi = insert_dentry_inode(dir, dname[i], &dlease[i], &ist[i+1], &ilease[i+1], from); + dout(10) << " curi " << *curi << dendl; + + if ((int)i == numi-snapdirpos-1) { + Inode *snapdiri = open_snapdir(curi); + dout(10) << " snapdir " << *snapdiri << dendl; + char s[20]; + sprintf(s, "%llu", (unsigned long long)snapdirst.vino.snapid); + string snapname = s; + Dir *snapdir = snapdiri->open_dir(); + curi = insert_dentry_inode(snapdir, snapname, &snapdirlease, // FIXME + &snapdirst, &snapdirlease, from); + dout(10) << " snapped diri " << *curi << dendl; + } + update_dir_dist(curi, &dst[i]); // dir stat info is attached to inode... } assert(p.end()); @@ -2767,7 +2790,7 @@ int Client::_readdir_get_frag(DirResult *dirp) dout(10) << "_readdir_get_frag " << dirp << " on " << dirp->path << " fg " << fg << dendl; int op = CEPH_MDS_OP_READDIR; - if (dirp->inode && dirp->inode->snapid == SNAPDIR) + if (dirp->inode && dirp->inode->snapid == CEPH_SNAPDIR) op = CEPH_MDS_OP_LSSNAP; MClientRequest *req = new MClientRequest(op); @@ -3801,6 +3824,28 @@ int Client::_rmsnap(const filepath& path, const char *name, int uid, int gid) #define FUSE_SET_ATTR_ATIME (1 << 4) #define FUSE_SET_ATTR_MTIME (1 << 5) + +Inode *Client::open_snapdir(Inode *diri) +{ + Inode *in; + vinodeno_t vino(diri->ino(), CEPH_SNAPDIR); + if (!inode_map.count(vino)) { + in = new Inode(vino, &diri->inode.layout); + in->inode = diri->inode; + in->snapid = CEPH_SNAPDIR; + in->inode.mode = S_IFDIR | 0600; + in->dirfragtree.clear(); + inode_map[vino] = in; + in->snapdir_parent = diri; + diri->get(); + dout(10) << "open_snapdir created snapshot inode " << *in << dendl; + } else { + in = inode_map[vino]; + dout(10) << "open_snapdir had snapshot inode " << *in << dendl; + } + return in; +} + int Client::ll_lookup(vinodeno_t parent, const char *name, struct stat *attr, int uid, int gid) { Mutex::Locker lock(client_lock); @@ -3832,21 +3877,7 @@ int Client::ll_lookup(vinodeno_t parent, const char *name, struct stat *attr, in // .snapshot? if (dname == g_conf.client_snapdir && diri->snapid == CEPH_NOSNAP) { - vinodeno_t vino(parent.ino, SNAPDIR); - if (!inode_map.count(vino)) { - in = new Inode(vino, &diri->inode.layout); - in->inode = diri->inode; - in->snapid = SNAPDIR; - in->inode.mode = S_IFDIR | 0600; - in->dirfragtree.clear(); - inode_map[vino] = in; - in->snapdir_parent = diri; - diri->get(); - dout(10) << " created snapshot inode " << *in << dendl; - } else { - in = inode_map[vino]; - dout(10) << " had snapshot inode " << *in << dendl; - } + in = open_snapdir(diri); } else { // get the inode if (diri->dir && @@ -3862,7 +3893,16 @@ int Client::ll_lookup(vinodeno_t parent, const char *name, struct stat *attr, in filepath path; diri->make_path(path); path.push_dentry(name); - _do_lstat(path, 0, &in, uid, gid); + dout(10) << "ll_lookup on " << path << dendl; + //_do_lstat(path, 0, &in, uid, gid); + MClientRequest *req = new MClientRequest(CEPH_MDS_OP_LSTAT); + req->head.args.stat.mask = 0; + req->set_filepath(path); + + MClientReply *reply = make_request(req, uid, gid, &in, 0); + r = reply->get_result(); + dout(10) << "ll_lookup lstat res is " << r << dendl; + delete reply; } } @@ -3870,7 +3910,7 @@ int Client::ll_lookup(vinodeno_t parent, const char *name, struct stat *attr, in fill_stat(in, attr); _ll_get(in); } else { - r = -ENOENT; + if (!r) r = -ENOENT; attr->st_ino = 0; } @@ -4385,7 +4425,7 @@ int Client::ll_opendir(vinodeno_t vino, void **dirpp, int uid, int gid) assert(diri); filepath path; - if (vino.snapid == SNAPDIR) { + if (vino.snapid == CEPH_SNAPDIR) { Inode *livediri = inode_map[vinodeno_t(vino.ino, CEPH_NOSNAP)]; assert(livediri); livediri->make_path(path); @@ -4394,7 +4434,7 @@ int Client::ll_opendir(vinodeno_t vino, void **dirpp, int uid, int gid) dout(10) << " ino path is " << path << dendl; int r = 0; - if (vino.snapid == SNAPDIR) { + if (vino.snapid == CEPH_SNAPDIR) { *dirpp = new DirResult(path, diri); } else { r = _opendir(path.c_str(), (DirResult**)dirpp); diff --git a/src/client/Client.h b/src/client/Client.h index 7d8cb9f8e2f9c..c2e3b66f3c767 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -50,9 +50,6 @@ using std::fstream; using namespace __gnu_cxx; -#define SNAPDIR (CEPH_NOSNAP-1) - - class MStatfsReply; class MClientSession; @@ -628,6 +625,9 @@ protected: } } + Inode *open_snapdir(Inode *diri); + + // file handles, etc. filepath cwd; interval_set free_fd_set; // unused fds diff --git a/src/include/nstring.h b/src/include/nstring.h index ee7d448d4323b..01812dc1b6249 100644 --- a/src/include/nstring.h +++ b/src/include/nstring.h @@ -9,6 +9,10 @@ typedef tstring nstring; typedef cstring nstring; #endif +static inline bool operator==(const nstring &l, const char *s) { + return strcmp(l.c_str(), s) == 0; +} + static inline bool operator==(const nstring &l, const nstring &r) { return l.length() == r.length() && memcmp(l.data(), r.data(), l.length()) == 0; } diff --git a/src/include/types.h b/src/include/types.h index bdf65acf19819..03a7c3d3a701b 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -154,6 +154,8 @@ inline void decode(snapid_t &i, bufferlist::iterator &p) { decode(i.val, p); } inline ostream& operator<<(ostream& out, snapid_t s) { if (s == CEPH_NOSNAP) return out << "head"; + else if (s == CEPH_SNAPDIR) + return out << "snapdir"; else return out << s.val; } diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 06d071ddfe21f..96e398b23e1f3 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -179,7 +179,20 @@ CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) : - +CDentry *CDir::lookup(const char *name, snapid_t snap) +{ + dout(20) << "lookup (" << snap << ", '" << name << "')" << dendl; + map_t::iterator iter = items.lower_bound(dentry_key_t(snap, name)); + if (iter == items.end()) + return 0; + if (iter->second->name == name && + iter->second->first <= snap) { + dout(20) << " hit -> " << iter->first << dendl; + return iter->second; + } + dout(20) << " miss -> " << iter->first << dendl; + return 0; +} diff --git a/src/mds/CDir.h b/src/mds/CDir.h index c96caf159cd7d..17073b8b90ce8 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -197,7 +197,7 @@ public: public: //typedef hash_map map_t; // there is a bug somewhere, valgrind me. //typedef map map_t; - typedef map map_t; + typedef map map_t; protected: // contents @@ -292,13 +292,7 @@ protected: CDentry* lookup(const nstring& ns, snapid_t snap=CEPH_NOSNAP) { return lookup(ns.c_str(), snap); } - CDentry* lookup(const char *n, snapid_t snap=CEPH_NOSNAP) { - map_t::iterator iter = items.find(dentry_key_t(snap, n)); - if (iter == items.end()) - return 0; - else - return iter->second; - } + CDentry* lookup(const char *n, snapid_t snap=CEPH_NOSNAP); CDentry* add_null_dentry(const nstring& dname, snapid_t first=1, snapid_t last=CEPH_NOSNAP); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index bf6bd65708b6c..46ea94888cc23 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1051,7 +1051,7 @@ void CInode::encode_inodestat(bufferlist& bl, snapid_t snapid) // pick a version! inode_t *i = &inode; bufferlist xbl; - if (!old_inodes.empty()) { + if (snapid && !old_inodes.empty()) { map::iterator p = old_inodes.lower_bound(snapid); if (p != old_inodes.end()) { assert(p->second.first <= snapid && snapid <= p->first); @@ -1066,7 +1066,7 @@ void CInode::encode_inodestat(bufferlist& bl, snapid_t snapid) struct ceph_mds_reply_inode e; memset(&e, 0, sizeof(e)); e.ino = i->ino; - e.snapid = snapid; + e.snapid = snapid ? (__u64)snapid:CEPH_NOSNAP; // 0 -> NOSNAP e.version = i->version; e.layout = i->layout; i->ctime.encode_timeval(&e.ctime); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index dddd053cdb4fc..ec7669fa7face 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -1010,8 +1010,8 @@ void MDCache::journal_dirty_inode(EMetaBlob *metablob, CInode *in, snapid_t foll << *in << dendl; } } else { - // is dn within current snap? - if (follows < dn->first) { + // are we within the current snap? + if (follows < in->first) { metablob->add_primary_dentry(dn, true, in, in->get_projected_inode()); } else { snapid_t oldfirst = dn->first; @@ -1023,7 +1023,12 @@ void MDCache::journal_dirty_inode(EMetaBlob *metablob, CInode *in, snapid_t foll dout(10) << " olddn " << *olddn << dendl; metablob->add_primary_dentry(olddn, true); - metablob->add_primary_dentry(dn, true, in, in->get_projected_inode()); + inode_t *pi; + if (in->is_projected()) + pi = in->get_previous_projected_inode(); // mkdir/mknod/symlink don't bother to project new inodes + else + pi = &in->inode; + metablob->add_primary_dentry(dn, true, in, pi); } } } @@ -4196,7 +4201,7 @@ Context *MDCache::_get_waiter(MDRequest *mdr, Message *req) int MDCache::path_traverse(MDRequest *mdr, Message *req, // who filepath& origpath, // what vector& trace, // result - snapid_t *psnapid, + snapid_t *psnapid, CInode **psnapdiri, bool follow_trailing_symlink, // how int onfail) { @@ -4209,6 +4214,8 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who set< pair > symlinks_resolved; snapid_t snapid = CEPH_NOSNAP; + if (psnapdiri) + *psnapdiri = 0; // root CInode *cur = get_inode(origpath.get_ino()); @@ -4235,7 +4242,8 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who unsigned depth = 0; while (depth < path.depth()) { - dout(12) << "traverse: path seg depth " << depth << " = " << path[depth] << dendl; + dout(12) << "traverse: path seg depth " << depth << " '" << path[depth] + << "' snapid " << snapid << dendl; if (!cur->is_dir()) { dout(7) << "traverse: " << *cur << " not a dir " << dendl; @@ -4247,6 +4255,8 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who dout(10) << "traverse: snapdir" << dendl; snapid = CEPH_SNAPDIR; depth++; + assert(psnapdiri); + *psnapdiri = cur; continue; } if (snapid == CEPH_SNAPDIR) { @@ -4502,7 +4512,7 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who // success. if (psnapid) - *psnapid = CEPH_NOSNAP; + *psnapid = snapid; if (mds->logger) mds->logger->inc("thit"); return 0; } @@ -6252,8 +6262,8 @@ void MDCache::handle_dir_update(MDirUpdate *m) dout(5) << "trying discover on dir_update for " << path << dendl; int r = path_traverse(0, m, - path, trace, NULL, true, - MDS_TRAVERSE_DISCOVER); + path, trace, NULL, NULL, + true, MDS_TRAVERSE_DISCOVER); if (r > 0) return; assert(r == 0); diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index b4485dd5cb343..22d4d30177e88 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -225,6 +225,8 @@ struct MDRequest : public Mutation { vector trace; // original path traversal. CInode *ref; // reference inode. if there is only one, and its path is pinned. + CInode *ref_snapdiri; + snapid_t ref_snapid; // -- i am a slave request MMDSSlaveRequest *slave_request; // slave request (if one is pending; implies slave == true) @@ -276,19 +278,19 @@ struct MDRequest : public Mutation { // --------------------------------------------------- MDRequest() : - session(0), client_request(0), ref(0), + session(0), client_request(0), ref(0), ref_snapdiri(0), ref_snapid(CEPH_NOSNAP), slave_request(0), internal_op(-1), _more(0) {} MDRequest(metareqid_t ri, MClientRequest *req) : Mutation(ri), - session(0), client_request(req), ref(0), + session(0), client_request(req), ref(0), ref_snapdiri(0), slave_request(0), internal_op(-1), _more(0) {} MDRequest(metareqid_t ri, int by) : Mutation(ri, by), - session(0), client_request(0), ref(0), + session(0), client_request(0), ref(0), ref_snapdiri(0), slave_request(0), internal_op(-1), _more(0) {} @@ -742,7 +744,7 @@ public: Context *_get_waiter(MDRequest *mdr, Message *req); int path_traverse(MDRequest *mdr, Message *req, filepath& path, - vector& trace, snapid_t *psnap, + vector& trace, snapid_t *psnap, CInode **psnapdiri, bool follow_trailing_sym, int onfail); bool path_is_mine(filepath& path); diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index aae86f38a6ed9..6cc4e1dc0015f 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -986,7 +986,7 @@ int Migrator::encode_export_dir(bufferlist& exportbl, dout(7) << "encode_export_dir exporting " << *dn << dendl; // dn name - ::encode(it->first, exportbl); + ::encode(dn->name, exportbl); // state dn->encode_export(exportbl); @@ -1402,7 +1402,7 @@ void Migrator::handle_export_discover(MExportDirDiscover *m) // must discover it! filepath fpath(m->get_path()); vector trace; - int r = cache->path_traverse(0, m, fpath, trace, NULL, true, MDS_TRAVERSE_DISCOVER); + int r = cache->path_traverse(0, m, fpath, trace, NULL, NULL, true, MDS_TRAVERSE_DISCOVER); if (r > 0) return; // wait if (r < 0) { dout(7) << "handle_export_discover_2 failed to discover or not dir " << m->get_path() << ", NAK" << dendl; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 573471672ad31..6f65c155b580f 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -555,8 +555,12 @@ void Server::reply_request(MDRequest *mdr, MClientReply *reply, CInode *tracei, // infer tracei/tracedn from mdr? + snapid_t snapid = CEPH_NOSNAP; + CInode *snapdiri = 0; if (!tracei && !tracedn && mdr->ref) { tracei = mdr->ref; + snapdiri = mdr->ref_snapdiri; + snapid = mdr->ref_snapid; dout(20) << "inferring tracei to be " << *tracei << dendl; if (!mdr->trace.empty()) { tracedn = mdr->trace.back(); @@ -578,7 +582,7 @@ void Server::reply_request(MDRequest *mdr, MClientReply *reply, CInode *tracei, } else { // send reply, with trace, and possible leases if (tracei || tracedn) - set_trace_dist(session, reply, tracei, tracedn); + set_trace_dist(session, reply, tracei, tracedn, snapid, snapdiri); messenger->send_message(reply, client_inst); } @@ -595,13 +599,15 @@ void Server::reply_request(MDRequest *mdr, MClientReply *reply, CInode *tracei, * * trace is in reverse order (i.e. root inode comes last) */ -void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in, CDentry *dn) +void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in, CDentry *dn, + snapid_t snapid, CInode *snapdiri) { // inode, dentry, dir, ..., inode bufferlist bl; int whoami = mds->get_nodeid(); int client = session->get_client(); __u16 numi = 0, numdn = 0; + __s16 snapdirpos = -1; // choose lease duration utime_t now = g_clock.now(); @@ -615,10 +621,19 @@ void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in, C } inode: - in->encode_inodestat(bl); - lmask = mds->locker->issue_client_lease(in, client, bl, now, session); numi++; - dout(20) << " trace added " << lmask << " " << *in << dendl; + in->encode_inodestat(bl, snapid); + lmask = mds->locker->issue_client_lease(in, client, bl, now, session); + dout(20) << " trace added " << lmask << " snapid " << snapid << " " << *in << dendl; + + if (snapid != CEPH_NOSNAP && in == snapdiri) { + snapid = CEPH_NOSNAP; + snapdirpos = numi; + dout(10) << " snapdiri at pos " << snapdirpos << dendl; + in->encode_inodestat(bl, snapid); + lmask = mds->locker->issue_client_lease(in, client, bl, now, session); + dout(20) << " trace added " << lmask << " snapid " << snapid << " " << *in << dendl; + } if (!dn) dn = in->get_parent_dn(); @@ -629,7 +644,7 @@ void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in, C ::encode(dn->get_name(), bl); lmask = mds->locker->issue_client_lease(dn, client, bl, now, session); numdn++; - dout(20) << " trace added " << lmask << " " << *dn << dendl; + dout(20) << " trace added " << lmask << " snapid " << snapid << " " << *dn << dendl; // dir #ifdef MDS_VERIFY_FRAGSTAT @@ -649,6 +664,7 @@ done: bufferlist fbl; ::encode(numi, fbl); ::encode(numdn, fbl); + ::encode(snapdirpos, fbl); fbl.claim_append(bl); reply->set_trace(fbl); } @@ -1249,8 +1265,8 @@ CDir *Server::traverse_to_auth_dir(MDRequest *mdr, vector &trace, file // traverse to parent dir snapid_t snapid; int r = mdcache->path_traverse(mdr, mdr->client_request, - refpath, trace, &snapid, false, - MDS_TRAVERSE_FORWARD); + refpath, trace, &snapid, &mdr->ref_snapdiri, + false, MDS_TRAVERSE_FORWARD); if (r > 0) return 0; // delayed if (r < 0) { reply_request(mdr, r); @@ -1277,7 +1293,7 @@ CDir *Server::traverse_to_auth_dir(MDRequest *mdr, vector &trace, file -CInode* Server::rdlock_path_pin_ref(MDRequest *mdr, snapid_t *psnapid, +CInode* Server::rdlock_path_pin_ref(MDRequest *mdr, bool want_auth, bool rdlock_dft) { dout(10) << "rdlock_path_pin_ref " << *mdr << dendl; @@ -1299,8 +1315,8 @@ CInode* Server::rdlock_path_pin_ref(MDRequest *mdr, snapid_t *psnapid, vector trace; int r = mdcache->path_traverse(mdr, req, refpath, - trace, psnapid, req->follow_trailing_symlink(), - MDS_TRAVERSE_FORWARD); + trace, &mdr->ref_snapid, &mdr->ref_snapdiri, + req->follow_trailing_symlink(), MDS_TRAVERSE_FORWARD); if (r > 0) return false; // delayed if (r < 0) { // error reply_request(mdr, r); @@ -1844,8 +1860,7 @@ void Server::handle_client_readdir(MDRequest *mdr) { MClientRequest *req = mdr->client_request; int client = req->get_orig_source().num(); - snapid_t snapid; - CInode *diri = rdlock_path_pin_ref(mdr, &snapid, false, true); // rdlock dirfragtreelock! + CInode *diri = rdlock_path_pin_ref(mdr, false, true); // rdlock dirfragtreelock! if (!diri) return; // it's a directory, right? @@ -1893,6 +1908,9 @@ void Server::handle_client_readdir(MDRequest *mdr) mdr->now = g_clock.real_now(); + snapid_t snapid = mdr->ref_snapid; + dout(10) << "snapid " << snapid << dendl; + // build dir contents bufferlist dirbl, dnbl; dir->encode_dirstat(dirbl, mds->get_nodeid()); @@ -1930,13 +1948,13 @@ void Server::handle_client_readdir(MDRequest *mdr) } assert(in); - dout(12) << "including inode " << *in << dendl; - // dentry + dout(12) << "including dn " << *dn << dendl; ::encode(dn->name, dnbl); mds->locker->issue_client_lease(dn, client, dnbl, mdr->now, mdr->session); // inode + dout(12) << "including inode " << *in << dendl; in->encode_inodestat(dnbl); mds->locker->issue_client_lease(in, client, dnbl, mdr->now, mdr->session); numfiles++; @@ -1972,9 +1990,10 @@ class C_MDS_mknod_finish : public Context { MDRequest *mdr; CDentry *dn; CInode *newi; + snapid_t follows; public: - C_MDS_mknod_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ni) : - mds(m), mdr(r), dn(d), newi(ni) {} + C_MDS_mknod_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ni, snapid_t f) : + mds(m), mdr(r), dn(d), newi(ni), follows(f) {} void finish(int r) { assert(r == 0); @@ -2012,7 +2031,9 @@ void Server::handle_client_mknod(MDRequest *mdr) CDentry *dn = rdlock_path_xlock_dentry(mdr, false, false); if (!dn) return; + snapid_t follows = dn->dir->inode->find_snaprealm()->get_latest_snap(); mdr->now = g_clock.real_now(); + CInode *newi = prepare_new_inode(mdr, dn->dir); assert(newi); @@ -2025,7 +2046,8 @@ void Server::handle_client_mknod(MDRequest *mdr) newi->inode.dirstat.rfiles = 1; newi->projected_parent = dn; - + dn->first = newi->first = follows+1; + dout(10) << "mknod mode " << newi->inode.mode << " rdev " << newi->inode.rdev << dendl; // prepare finisher @@ -2033,12 +2055,12 @@ void Server::handle_client_mknod(MDRequest *mdr) EUpdate *le = new EUpdate(mdlog, "mknod"); le->metablob.add_client_req(req->get_reqid()); le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); - + mds->locker->predirty_nested(mdr, &le->metablob, newi, dn->dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); - mdcache->journal_dirty_inode(&le->metablob, newi, dn->dir->inode->find_snaprealm()->get_latest_snap()); + mdcache->journal_dirty_inode(&le->metablob, newi, follows); // log + wait - mdlog->submit_entry(le, new C_MDS_mknod_finish(mds, mdr, dn, newi)); + mdlog->submit_entry(le, new C_MDS_mknod_finish(mds, mdr, dn, newi, follows)); } @@ -2053,7 +2075,9 @@ void Server::handle_client_mkdir(MDRequest *mdr) if (!dn) return; // new inode + snapid_t follows = dn->dir->inode->find_snaprealm()->get_latest_snap(); mdr->now = g_clock.real_now(); + CInode *newi = prepare_new_inode(mdr, dn->dir); assert(newi); @@ -2066,6 +2090,8 @@ void Server::handle_client_mkdir(MDRequest *mdr) newi->inode.version = dn->pre_dirty() - 1; newi->inode.dirstat.rsubdirs = 1; + dn->first = newi->first = follows+1; + // ...and that new dir is empty. CDir *newdir = newi->get_or_open_dirfrag(mds->mdcache, frag_t()); newdir->mark_complete(); @@ -2080,11 +2106,11 @@ void Server::handle_client_mkdir(MDRequest *mdr) le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); mds->locker->predirty_nested(mdr, &le->metablob, newi, dn->dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); //le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); - mdcache->journal_dirty_inode(&le->metablob, newi, dn->dir->inode->find_snaprealm()->get_latest_snap()); + mdcache->journal_dirty_inode(&le->metablob, newi, follows); le->metablob.add_dir(newdir, true, true); // dirty AND complete // log + wait - mdlog->submit_entry(le, new C_MDS_mknod_finish(mds, mdr, dn, newi)); + mdlog->submit_entry(le, new C_MDS_mknod_finish(mds, mdr, dn, newi, follows)); } @@ -2098,6 +2124,7 @@ void Server::handle_client_symlink(MDRequest *mdr) if (!dn) return; mdr->now = g_clock.real_now(); + snapid_t follows = dn->dir->inode->find_snaprealm()->get_latest_snap(); CInode *newi = prepare_new_inode(mdr, dn->dir); assert(newi); @@ -2112,16 +2139,18 @@ void Server::handle_client_symlink(MDRequest *mdr) newi->inode.version = dn->pre_dirty() - 1; newi->inode.dirstat.rfiles = 1; + dn->first = newi->first = follows+1; + // prepare finisher mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "symlink"); le->metablob.add_client_req(req->get_reqid()); le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); mds->locker->predirty_nested(mdr, &le->metablob, newi, dn->dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); - mdcache->journal_dirty_inode(&le->metablob, newi, dn->dir->inode->find_snaprealm()->get_latest_snap()); + mdcache->journal_dirty_inode(&le->metablob, newi, follows); // log + wait - mdlog->submit_entry(le, new C_MDS_mknod_finish(mds, mdr, dn, newi)); + mdlog->submit_entry(le, new C_MDS_mknod_finish(mds, mdr, dn, newi, follows)); } @@ -2151,8 +2180,8 @@ void Server::handle_client_link(MDRequest *mdr) dout(7) << "handle_client_link discovering target " << targetpath << dendl; vector targettrace; int r = mdcache->path_traverse(mdr, req, - targetpath, targettrace, NULL, false, - MDS_TRAVERSE_DISCOVER); + targetpath, targettrace, NULL, NULL, + false, MDS_TRAVERSE_DISCOVER); if (r > 0) return; // wait if (targettrace.empty()) r = -EINVAL; if (r < 0) { @@ -2690,8 +2719,8 @@ void Server::handle_client_unlink(MDRequest *mdr) // traverse to path vector trace; int r = mdcache->path_traverse(mdr, req, - req->get_filepath(), trace, NULL, false, - MDS_TRAVERSE_FORWARD); + req->get_filepath(), trace, NULL, NULL, + false, MDS_TRAVERSE_FORWARD); if (r > 0) return; if (trace.empty()) r = -EINVAL; // can't unlink root if (r < 0) { @@ -3039,8 +3068,8 @@ void Server::handle_client_rename(MDRequest *mdr) // traverse to src vector srctrace; int r = mdcache->path_traverse(mdr, req, - srcpath, srctrace, NULL, false, - MDS_TRAVERSE_DISCOVER); + srcpath, srctrace, NULL, NULL, + false, MDS_TRAVERSE_DISCOVER); if (r > 0) return; if (r < 0) { reply_request(mdr, r); @@ -3743,8 +3772,8 @@ void Server::handle_slave_rename_prep(MDRequest *mdr) dout(10) << " dest " << destpath << dendl; vector trace; int r = mdcache->path_traverse(mdr, mdr->slave_request, - destpath, trace, NULL, false, - MDS_TRAVERSE_DISCOVERXLOCK); + destpath, trace, NULL, NULL, + false, MDS_TRAVERSE_DISCOVERXLOCK); if (r > 0) return; assert(r == 0); // we shouldn't get an error here! @@ -3756,8 +3785,8 @@ void Server::handle_slave_rename_prep(MDRequest *mdr) filepath srcpath(mdr->slave_request->srcdnpath); dout(10) << " src " << srcpath << dendl; r = mdcache->path_traverse(mdr, mdr->slave_request, - srcpath, trace, NULL, false, - MDS_TRAVERSE_DISCOVERXLOCK); + srcpath, trace, NULL, NULL, + false, MDS_TRAVERSE_DISCOVERXLOCK); if (r > 0) return; assert(r == 0); @@ -4567,9 +4596,10 @@ class C_MDS_openc_finish : public Context { MDRequest *mdr; CDentry *dn; CInode *newi; + snapid_t follows; public: - C_MDS_openc_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ni) : - mds(m), mdr(r), dn(d), newi(ni) {} + C_MDS_openc_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ni, snapid_t f) : + mds(m), mdr(r), dn(d), newi(ni), follows(f) {} void finish(int r) { assert(r == 0); @@ -4624,6 +4654,8 @@ void Server::handle_client_openc(MDRequest *mdr) // create inode. mdr->now = g_clock.real_now(); + snapid_t follows = dn->dir->inode->find_snaprealm()->get_latest_snap(); + CInode *in = prepare_new_inode(mdr, dn->dir); assert(in); @@ -4634,6 +4666,9 @@ void Server::handle_client_openc(MDRequest *mdr) in->inode.version = dn->pre_dirty() - 1; in->inode.max_size = in->get_layout_size_increment(); in->inode.dirstat.rfiles = 1; + + in->projected_parent = dn; + dn->first = in->first = follows+1; // prepare finisher mdr->ls = mdlog->get_current_segment(); @@ -4641,10 +4676,10 @@ void Server::handle_client_openc(MDRequest *mdr) le->metablob.add_client_req(req->get_reqid()); le->metablob.add_allocated_ino(in->ino(), mds->idalloc->get_version()); mds->locker->predirty_nested(mdr, &le->metablob, in, dn->dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); - le->metablob.add_primary_dentry(dn, true, in, &in->inode); + mdcache->journal_dirty_inode(&le->metablob, in, follows); // log + wait - C_MDS_openc_finish *fin = new C_MDS_openc_finish(mds, mdr, dn, in); + C_MDS_openc_finish *fin = new C_MDS_openc_finish(mds, mdr, dn, in, follows); mdlog->submit_entry(le, fin); } @@ -4680,8 +4715,8 @@ void Server::handle_client_lssnap(MDRequest *mdr) // traverse to path vector trace; int r = mdcache->path_traverse(mdr, req, - req->get_filepath(), trace, NULL, false, - MDS_TRAVERSE_FORWARD); + req->get_filepath(), trace, NULL, NULL, + false, MDS_TRAVERSE_FORWARD); if (r > 0) return; if (trace.empty()) r = -EINVAL; // can't snap root if (r < 0) { @@ -4766,8 +4801,8 @@ void Server::handle_client_mksnap(MDRequest *mdr) // traverse to path vector trace; int r = mdcache->path_traverse(mdr, req, - req->get_filepath(), trace, NULL, false, - MDS_TRAVERSE_FORWARD); + req->get_filepath(), trace, NULL, NULL, + false, MDS_TRAVERSE_FORWARD); if (r > 0) return; if (trace.empty()) r = -EINVAL; // can't snap root if (r < 0) { diff --git a/src/mds/Server.h b/src/mds/Server.h index cd3e93b4be999..c62027999630b 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -84,7 +84,8 @@ public: void dispatch_client_request(MDRequest *mdr); void reply_request(MDRequest *mdr, int r = 0, CInode *tracei = 0, CDentry *tracedn = 0); void reply_request(MDRequest *mdr, MClientReply *reply, CInode *tracei = 0, CDentry *tracedn = 0); - void set_trace_dist(Session *session, MClientReply *reply, CInode *in, CDentry *dn); + void set_trace_dist(Session *session, MClientReply *reply, CInode *in, CDentry *dn, + snapid_t snapid, CInode *snapdiri); void handle_slave_request(MMDSSlaveRequest *m); @@ -98,7 +99,7 @@ public: CDentry *prepare_null_dentry(MDRequest *mdr, CDir *dir, const string& dname, bool okexist=false); CInode* prepare_new_inode(MDRequest *mdr, CDir *dir); - CInode* rdlock_path_pin_ref(MDRequest *mdr, snapid_t *psnapid, bool want_auth, bool rdlock_dft=false); + CInode* rdlock_path_pin_ref(MDRequest *mdr, bool want_auth, bool rdlock_dft=false); CDentry* rdlock_path_xlock_dentry(MDRequest *mdr, bool okexist, bool mustexist); CDir* try_open_auth_dirfrag(CInode *diri, frag_t fg, MDRequest *mdr); diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 8504f3b86b24e..43da6c2fc828f 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -321,18 +321,27 @@ WRITE_CLASS_ENCODER(fnode_t) // ======= // dentries -typedef pair dentry_key_t; +struct dentry_key_t { + snapid_t snapid; + const char *name; + dentry_key_t() : snapid(0), name(0) {} + dentry_key_t(snapid_t s, const char *n) : snapid(s), name(n) {} +}; -struct ltdentrykey +inline ostream& operator<<(ostream& out, const dentry_key_t &k) { - bool operator()(const dentry_key_t& k1, - const dentry_key_t& k2) const - { - return - k1.first < k2.first || - (k1.first == k2.first && strcmp(k1.second, k2.second) < 0); - } -}; + return out << "(" << k.name << "," << k.snapid << ")"; +} + +inline bool operator<(const dentry_key_t& k1, const dentry_key_t& k2) +{ + /* + * order by name, then snap + */ + int c = strcmp(k1.name, k2.name); + return + c < 0 || (c == 0 && k1.snapid < k2.snapid); +} -- 2.39.5