From 83f2e4fc46d78ec6d7915acb67591b713361e1ad Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 11 Jul 2008 21:45:07 -0700 Subject: [PATCH] client: hidden .snap dir, lssnap fixes --- src/client/Client.cc | 80 +++++++++++++++++++++++++++++++++---------- src/client/Client.h | 7 ++++ src/client/fuse_ll.cc | 11 ++++-- src/include/ceph_fs.h | 1 + src/include/frag.h | 3 ++ src/mds/CInode.cc | 66 +++++++++++++++++++++++++++++++++++ src/mds/CInode.h | 47 ++----------------------- src/mds/Server.cc | 54 ++++++++++++++++++++++++++--- src/mds/snap.cc | 8 ++--- src/mds/snap.h | 3 +- 10 files changed, 204 insertions(+), 76 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index b34a723a1d735..3740f20fa55eb 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -80,7 +80,8 @@ Logger *client_logger = 0; ostream& operator<<(ostream &out, Inode &in) { - out << in.inode.ino << "(" + out << in.inode.ino + << "s" << in.snapid << "(" << " cap_refs=" << in.cap_refs << " open=" << in.open_by_mode << " ref=" << in.ref @@ -2766,7 +2767,11 @@ int Client::_readdir_get_frag(DirResult *dirp) dout(10) << "_readdir_get_frag " << dirp << " on " << dirp->path << " fg " << fg << dendl; - MClientRequest *req = new MClientRequest(CEPH_MDS_OP_READDIR); + int op = CEPH_MDS_OP_READDIR; + if (dirp->inode && dirp->inode->snapid == SNAPDIR) + op = CEPH_MDS_OP_LSSNAP; + + MClientRequest *req = new MClientRequest(op); req->set_filepath(dirp->path); req->head.args.readdir.frag = fg; @@ -2786,6 +2791,8 @@ int Client::_readdir_get_frag(DirResult *dirp) dirp->inode = diri; diri->get(); } + if (!diri) + diri = dirp->inode; if (res == -EAGAIN) { dout(10) << "_readdir_get_frag got EAGAIN, retrying" << dendl; @@ -3823,22 +3830,42 @@ int Client::ll_lookup(vinodeno_t parent, const char *name, struct stat *attr, in goto out; } - // get the inode - if (diri->dir && - diri->dir->dentries.count(dname)) { - Dentry *dn = diri->dir->dentries[dname]; - if (dn->lease_mds >= 0 && dn->lease_ttl > now) { - touch_dn(dn); - in = dn->inode; - dout(1) << "ll_lookup " << parent << " " << name << " -> have valid lease on dentry" << dendl; + // .snapshot? + if (dname == g_conf.client_snapdir && + diri->snapid == CEPH_NOSNAP) { + vinodeno_t vino(parent.ino, SNAPDIR); + if (!inode_map.count(vino)) { + in = new Inode(vino, &diri->inode.layout); + in->inode = diri->inode; + in->snapid = SNAPDIR; + in->inode.mode = S_IFDIR | 0600; + in->dirfragtree.clear(); + inode_map[vino] = in; + diri->get(); + dout(10) << " created snapshot inode " << *in << dendl; + } else { + in = inode_map[vino]; + dout(10) << " had snapshot inode " << *in << dendl; + } + } else { + // get the inode + if (diri->dir && + diri->dir->dentries.count(dname)) { + Dentry *dn = diri->dir->dentries[dname]; + if (dn->lease_mds >= 0 && dn->lease_ttl > now) { + touch_dn(dn); + in = dn->inode; + dout(1) << "ll_lookup " << parent << " " << name << " -> have valid lease on dentry" << dendl; + } + } + if (!in) { + filepath path; + diri->make_path(path); + path.push_dentry(name); + _do_lstat(path, 0, &in, uid, gid); } - } - if (!in) { - filepath path; - diri->make_path(path); - path.push_dentry(name); - _do_lstat(path, 0, &in, uid, gid); } + if (in) { fill_stat(in, attr); _ll_get(in); @@ -3942,7 +3969,11 @@ int Client::ll_getattr(vinodeno_t vino, struct stat *attr, int uid, int gid) Inode *in = _ll_get_inode(vino); filepath path(in->ino()); - int res = _do_lstat(path, CEPH_STAT_MASK_INODE_ALL, &in, uid, gid); + int res; + if (vino.snapid < CEPH_NOSNAP) + res = 0; + else + res = _do_lstat(path, CEPH_STAT_MASK_INODE_ALL, &in, uid, gid); if (res == 0) fill_stat(in, attr); dout(3) << "ll_getattr " << vino << " = " << res << dendl; @@ -4352,11 +4383,22 @@ int Client::ll_opendir(vinodeno_t vino, void **dirpp, int uid, int gid) Inode *diri = inode_map[vino]; assert(diri); + filepath path; - diri->make_path(path); + if (vino.snapid == SNAPDIR) { + Inode *livediri = inode_map[vinodeno_t(vino.ino, CEPH_NOSNAP)]; + assert(livediri); + livediri->make_path(path); + } else + diri->make_path(path); dout(10) << " ino path is " << path << dendl; - int r = _opendir(path.c_str(), (DirResult**)dirpp); + int r = 0; + if (vino.snapid == SNAPDIR) { + *dirpp = new DirResult(path, diri); + } else { + r = _opendir(path.c_str(), (DirResult**)dirpp); + } tout << (unsigned long)*dirpp << std::endl; diff --git a/src/client/Client.h b/src/client/Client.h index 61582fa60a0ca..75ef31f85f041 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -50,6 +50,9 @@ using std::fstream; using namespace __gnu_cxx; +#define SNAPDIR (CEPH_NOSNAP-1) + + class MStatfsReply; class MClientSession; @@ -661,6 +664,10 @@ protected: in->put(n); if (in->ref == 0) { //cout << "put_inode deleting " << in << " " << in->inode.ino << std::endl; + if (in->snapid == SNAPDIR) { + vinodeno_t live(in->inode.ino, CEPH_NOSNAP); + put_inode(inode_map[live]); + } inode_map.erase(in->vino()); if (in == root) root = 0; delete in; diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc index df89412f583bf..ec77555a6b1a5 100644 --- a/src/client/fuse_ll.cc +++ b/src/client/fuse_ll.cc @@ -35,9 +35,9 @@ int last_stag = 0; hash_map<__u64,int> snap_stag_map; hash_map stag_snap_map; -#define FINO_INO(x) ((x) & ((1ull<<40)-1ull)) -#define FINO_STAG(x) ((x) >> 40) -#define MAKE_FINO(i,s) ((i) | ((s) << 40)) +#define FINO_INO(x) ((x) & ((1ull<<48)-1ull)) +#define FINO_STAG(x) ((x) >> 48) +#define MAKE_FINO(i,s) ((i) | ((s) << 48)) static __u64 fino_snap(__u64 fino) { @@ -452,6 +452,9 @@ int ceph_fuse_ll_main(Client *c, int argc, const char *argv[]) client = c; + snap_stag_map[CEPH_NOSNAP] = 0; + stag_snap_map[0] = CEPH_NOSNAP; + // set up fuse argc/argv int newargc = 0; const char **newargv = (const char **) malloc((argc + 10) * sizeof(char *)); @@ -461,6 +464,8 @@ int ceph_fuse_ll_main(Client *c, int argc, const char *argv[]) newargv[newargc++] = "-o"; newargv[newargc++] = "allow_other"; + newargv[newargc++] = "-d"; + for (int argctr = 1; argctr < argc; argctr++) newargv[newargc++] = argv[argctr]; // go go gadget fuse diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 155d69a39f0f4..51e317c1f6cf2 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -599,6 +599,7 @@ static inline const char *ceph_mds_op_name(int op) case CEPH_MDS_OP_TRUNCATE: return "truncate"; case CEPH_MDS_OP_LTRUNCATE: return "ltruncate"; case CEPH_MDS_OP_FSYNC: return "fsync"; + case CEPH_MDS_OP_LSSNAP: return "lssnap"; case CEPH_MDS_OP_MKSNAP: return "mksnap"; case CEPH_MDS_OP_RMSNAP: return "rmsnap"; default: return "unknown"; diff --git a/src/include/frag.h b/src/include/frag.h index b142413c6247d..c62c55f67c8b7 100644 --- a/src/include/frag.h +++ b/src/include/frag.h @@ -162,6 +162,9 @@ public: void swap(fragtree_t& other) { _splits.swap(other._splits); } + void clear() { + _splits.clear(); + } // ------------- // accessors diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 08433b3307416..bf6bd65708b6c 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -983,6 +983,14 @@ CInodeDiscover* CInode::replicate_to( int rep ) // SNAP +snapid_t CInode::get_oldest_snap() +{ + snapid_t t = CEPH_NOSNAP; + if (!old_inodes.empty()) + t = old_inodes.begin()->second.first; + return MIN(t, first); +} + void CInode::open_snaprealm() { if (!snaprealm) { @@ -1038,6 +1046,64 @@ void CInode::decode_snap(bufferlist& snapbl) } +void CInode::encode_inodestat(bufferlist& bl, snapid_t snapid) +{ + // pick a version! + inode_t *i = &inode; + bufferlist xbl; + if (!old_inodes.empty()) { + map::iterator p = old_inodes.lower_bound(snapid); + if (p != old_inodes.end()) { + assert(p->second.first <= snapid && snapid <= p->first); + i = &p->second.inode; + ::encode(p->second.xattrs, xbl); + } + } + + /* + * note: encoding matches struct ceph_client_reply_inode + */ + struct ceph_mds_reply_inode e; + memset(&e, 0, sizeof(e)); + e.ino = i->ino; + e.snapid = snapid; + e.version = i->version; + e.layout = i->layout; + i->ctime.encode_timeval(&e.ctime); + i->mtime.encode_timeval(&e.mtime); + i->atime.encode_timeval(&e.atime); + e.time_warp_seq = i->time_warp_seq; + e.mode = i->mode; + e.uid = i->uid; + e.gid = i->gid; + e.nlink = i->nlink; + e.size = i->size; + e.max_size = i->max_size; + + e.files = i->dirstat.nfiles; + e.subdirs = i->dirstat.nsubdirs; + i->dirstat.rctime.encode_timeval(&e.rctime); + e.rbytes = i->dirstat.rbytes; + e.rfiles = i->dirstat.rfiles; + e.rsubdirs = i->dirstat.rsubdirs; + + e.rdev = i->rdev; + e.fragtree.nsplits = dirfragtree._splits.size(); + ::encode(e, bl); + for (map::iterator p = dirfragtree._splits.begin(); + p != dirfragtree._splits.end(); + p++) { + ::encode(p->first, bl); + ::encode(p->second, bl); + } + ::encode(symlink, bl); + + if (!xattrs.empty() && xbl.length() == 0) + ::encode(xattrs, xbl); + ::encode(xbl, bl); +} + + // IMPORT/EXPORT diff --git a/src/mds/CInode.h b/src/mds/CInode.h index a08d96701f0ff..38704db763c85 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -141,6 +141,7 @@ class CInode : public MDSCacheObject { map old_inodes; // key = last, value.first = first bool is_multiversion() { return snaprealm || inode.is_dir(); } + snapid_t get_oldest_snap(); loff_t last_journaled; // log offset for the last time i was journaled loff_t last_open_journaled; // log offset for the last journaled EOpen @@ -368,51 +369,7 @@ private: // for giving to clients - void encode_inodestat(bufferlist& bl) { - /* - * note: encoding matches struct ceph_client_reply_inode - */ - struct ceph_mds_reply_inode e; - memset(&e, 0, sizeof(e)); - e.ino = inode.ino; - e.version = inode.version; - e.layout = inode.layout; - inode.ctime.encode_timeval(&e.ctime); - inode.mtime.encode_timeval(&e.mtime); - inode.atime.encode_timeval(&e.atime); - e.time_warp_seq = inode.time_warp_seq; - e.mode = inode.mode; - e.uid = inode.uid; - e.gid = inode.gid; - e.nlink = inode.nlink; - e.size = inode.size; - e.max_size = inode.max_size; - - e.files = inode.dirstat.nfiles; - e.subdirs = inode.dirstat.nsubdirs; - inode.dirstat.rctime.encode_timeval(&e.rctime); - e.rbytes = inode.dirstat.rbytes; - e.rfiles = inode.dirstat.rfiles; - e.rsubdirs = inode.dirstat.rsubdirs; - - e.rdev = inode.rdev; - e.fragtree.nsplits = dirfragtree._splits.size(); - ::encode(e, bl); - for (map::iterator p = dirfragtree._splits.begin(); - p != dirfragtree._splits.end(); - p++) { - ::encode(p->first, bl); - ::encode(p->second, bl); - } - ::encode(symlink, bl); - - bufferlist xbl; - if (!xattrs.empty()) - ::encode(xattrs, xbl); - ::encode(xbl, bl); - - - } + void encode_inodestat(bufferlist& bl, snapid_t snapid=CEPH_NOSNAP); // -- locks -- diff --git a/src/mds/Server.cc b/src/mds/Server.cc index ba87d5b7b9e7f..b07caf7b021cd 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4631,7 +4631,24 @@ void Server::handle_client_openc(MDRequest *mdr) +static void encode_empty_dirstat(bufferlist& bl) +{ + // encode fake dirstat + frag_t fg; + __s32 auth = CDIR_AUTH_PARENT; + __u32 zero; + ::encode(fg, bl); + ::encode(auth, bl); + ::encode(zero, bl); +} +static void encode_empty_lease(bufferlist& bl) +{ + LeaseStat e; + e.mask = -1; + e.duration_ms = -1; + ::encode(e, bl); +} // snaps @@ -4684,12 +4701,41 @@ void Server::handle_client_lssnap(MDRequest *mdr) return; SnapRealm *realm = diri->find_snaprealm(); - bufferlist snapinfo; - realm->get_snap_info(snapinfo); + map infomap; + realm->get_snap_info(infomap); + snapid_t oldest = diri->get_oldest_snap(); + dout(10) << " oldest snap for this inode is " << oldest << dendl; + + __u32 num = 0; + bufferlist dnbl; + for (map::iterator p = infomap.begin(); + p != infomap.end(); + p++) { + if (p->first < oldest) + continue; + + dout(10) << p->first << " -> " << *p->second << dendl; + + char nm[20]; + sprintf(nm, "%llu", (unsigned long long)p->second->snapid); + ::encode(nm, dnbl); + encode_empty_lease(dnbl); + diri->encode_inodestat(dnbl, p->first); + encode_empty_lease(dnbl); + num++; + } + + bufferlist dirbl; + + encode_empty_dirstat(dirbl); + + ::encode(num, dirbl); + dirbl.claim_append(dnbl); + MClientReply *reply = new MClientReply(req); - reply->set_dir_bl(snapinfo); - reply_request(mdr, reply); + reply->set_dir_bl(dirbl); + reply_request(mdr, reply, diri); } void Server::handle_client_mksnap(MDRequest *mdr) diff --git a/src/mds/snap.cc b/src/mds/snap.cc index 4a893717259b8..a85167656c829 100644 --- a/src/mds/snap.cc +++ b/src/mds/snap.cc @@ -134,7 +134,7 @@ const set& SnapRealm::update_snaps(snapid_t creating) } -void SnapRealm::get_snap_info(bufferlist& bl, snapid_t first, snapid_t last) +void SnapRealm::get_snap_info(map& infomap, snapid_t first, snapid_t last) { dout(10) << "get_snap_info snaps " << get_snaps() << dendl; @@ -142,7 +142,7 @@ void SnapRealm::get_snap_info(bufferlist& bl, snapid_t first, snapid_t last) for (map::iterator p = snaps.lower_bound(first); // first element >= first p != snaps.end() && p->first <= last; p++) - ::encode(p->second, bl); + infomap[p->first] = &p->second; // include snaps for parents during intervals that intersect [first,last] snapid_t thru = first; @@ -154,13 +154,13 @@ void SnapRealm::get_snap_info(bufferlist& bl, snapid_t first, snapid_t last) assert(oldparent->snaprealm); thru = MIN(last, p->first); - oldparent->snaprealm->get_snap_info(bl, + oldparent->snaprealm->get_snap_info(infomap, MAX(first, p->second.first), thru); ++thru; } if (thru <= last && parent) - parent->get_snap_info(bl, thru, last); + parent->get_snap_info(infomap, thru, last); } diff --git a/src/mds/snap.h b/src/mds/snap.h index 21555e5f82f19..83ffe46a94eef 100644 --- a/src/mds/snap.h +++ b/src/mds/snap.h @@ -122,9 +122,10 @@ struct SnapRealm { bool open_parents(MDRequest *mdr); void build_snap_set(set& s, snapid_t first, snapid_t last); + void get_snap_info(map& infomap, snapid_t first=0, snapid_t last=CEPH_NOSNAP); + const set& get_snaps(); const vector& get_snap_vector(); - void get_snap_info(bufferlist& snapinfo, snapid_t first=0, snapid_t last=CEPH_NOSNAP); const set& update_snaps(snapid_t adding=0); snapid_t get_latest_snap() { const set &snaps = get_snaps(); -- 2.39.5