From 82c970f6a21af87c082b8ecf1ed14ff8be957c2c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 8 Jan 2009 16:42:44 -0800 Subject: [PATCH] mds: send xattrs down to client efficiently Track what xattr_version the client (should) have in the capability so we can be smart about including the xattr blob in the inodestat and in caps messages. --- src/include/ceph_fs.h | 2 ++ src/mds/CInode.cc | 27 +++++++++++++++++++++------ src/mds/Capability.h | 3 ++- src/mds/Locker.cc | 25 ++++++++++++++++++------- src/mds/Server.cc | 4 +++- src/mds/mdstypes.h | 5 ++++- src/messages/MClientCaps.h | 4 ++++ 7 files changed, 54 insertions(+), 16 deletions(-) diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index d06e7a57890db..c201bab4a328f 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -852,6 +852,7 @@ struct ceph_mds_reply_inode { __le64 files, subdirs, rbytes, rfiles, rsubdirs; /* dir stats */ struct ceph_timespec rctime; struct ceph_frag_tree_head fragtree; + __le64 xattr_version; } __attribute__ ((packed)); /* followed by frag array, then symlink string, then xattr blob */ @@ -1049,6 +1050,7 @@ struct ceph_mds_caps { /* xattrlock */ __le32 xattr_len; + __le64 xattr_version; /* filelock */ __le64 size, max_size; diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 3e9b45d6ab07a..588d703ca207e 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1290,7 +1290,9 @@ bool CInode::encode_inodestat(bufferlist& bl, Session *session, i = get_projected_inode(); else i = &inode; - bufferlist xbl; + + map *pxattrs = &xattrs; + if (snapid && is_multiversion()) { // for now at least, old_inodes is only defined/valid on the auth @@ -1305,7 +1307,7 @@ bool CInode::encode_inodestat(bufferlist& bl, Session *session, << dendl; assert(p->second.first <= snapid && snapid <= p->first); i = &p->second.inode; - ::encode(p->second.xattrs, xbl); + pxattrs = &p->second.xattrs; } } @@ -1340,8 +1342,12 @@ bool CInode::encode_inodestat(bufferlist& bl, Session *session, e.rdev = i->rdev; e.fragtree.nsplits = dirfragtree._splits.size(); - // include capability? Capability *cap = get_client_cap(client); + + bool had_latest_xattrs = cap && (cap->issued() & CEPH_CAP_XATTR_RDCACHE) && + cap->client_xattr_version == i->xattr_version; + + // include capability? if (snapid != CEPH_NOSNAP && !cap) { e.cap.caps = valid ? get_caps_allowed(false) : CEPH_STAT_CAP_INODE; e.cap.seq = 0; @@ -1381,6 +1387,18 @@ bool CInode::encode_inodestat(bufferlist& bl, Session *session, << " seq " << e.cap.seq << " mseq " << e.cap.mseq << dendl; + // xattr + bufferlist xbl; + e.xattr_version = i->xattr_version; + if (!had_latest_xattrs && + cap && + (cap->pending() & CEPH_CAP_XATTR_RDCACHE)) { + ::encode(*pxattrs, xbl); + if (cap) + cap->client_xattr_version = i->xattr_version; + dout(10) << "including xattrs version " << i->xattr_version << dendl; + } + // encode ::encode(e, bl); for (map::iterator p = dirfragtree._splits.begin(); @@ -1390,9 +1408,6 @@ bool CInode::encode_inodestat(bufferlist& bl, Session *session, ::encode(p->second, bl); } ::encode(symlink, bl); - - if (!xattrs.empty() && xbl.length() == 0) - ::encode(xattrs, xbl); ::encode(xbl, bl); return valid; diff --git a/src/mds/Capability.h b/src/mds/Capability.h index fb322db8b07a9..9e63e7e643ab7 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -203,6 +203,7 @@ public: int releasing; // only allow a single in-progress release (it may be waiting for log to flush) snapid_t client_follows; + version_t client_xattr_version; xlist::item session_caps_item; xlist *rdcaps_list; @@ -217,7 +218,7 @@ public: last_sent(0), mseq(0), suppress(0), stale(false), releasing(0), - client_follows(0), + client_follows(0), client_xattr_version(0), session_caps_item(this), rdcaps_list(rl), rdcaps_item(this), snaprealm_caps_item(this) { } ceph_seq_t get_mseq() { return mseq; } diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 9b6123afaf4c9..2e25643eebb5d 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -651,13 +651,24 @@ bool Locker::issue_caps(CInode *in) << " seq " << cap->get_last_seq() << " new pending " << ccap_string(after) << " was " << ccap_string(before) << dendl; - mds->send_message_client(new MClientCaps(CEPH_CAP_OP_GRANT, - in->inode, - in->find_snaprealm()->inode->ino(), - cap->get_last_seq(), - after, wanted, 0, - cap->get_mseq()), - it->first); + + MClientCaps *m = new MClientCaps(CEPH_CAP_OP_GRANT, + in->inode, + in->find_snaprealm()->inode->ino(), + cap->get_last_seq(), + after, wanted, 0, + cap->get_mseq()); + + // include xattrs if they're newer than what the client has + if ((after & CEPH_CAP_XATTR_RDCACHE) && + in->inode.xattr_version > cap->client_xattr_version) { + dout(10) << " including xattrs v " << in->inode.xattr_version << dendl; + ::encode(in->xattrs, m->xattrbl); + m->head.xattr_version = in->inode.xattr_version; + cap->client_xattr_version = in->inode.xattr_version; + } + + mds->send_message_client(m, it->first); } } } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index de1658e29b89f..4ffde1ffeefc4 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2027,6 +2027,7 @@ void Server::handle_client_setxattr(MDRequest *mdr) inode_t *pi = cur->project_inode(); pi->version = cur->pre_dirty(); pi->ctime = g_clock.real_now(); + pi->xattr_version++; // log + wait mdr->ls = mdlog->get_current_segment(); @@ -2086,7 +2087,8 @@ void Server::handle_client_removexattr(MDRequest *mdr) inode_t *pi = cur->project_inode(); pi->version = cur->pre_dirty(); pi->ctime = g_clock.real_now(); - + pi->xattr_version++; + // log + wait mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "removexattr"); diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 8cd8d4e12c34b..e83e681cf4e91 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -299,13 +299,14 @@ struct inode_t { utime_t atime; // file data access time. uint32_t time_warp_seq; // count of (potential) mtime/atime timewarps (i.e., utimes()) - // dirfrag, recursive accounting + // dirfrag, recursive accountin frag_info_t dirstat; nest_info_t rstat, accounted_rstat; // special stuff version_t version; // auth only version_t file_data_version; // auth only + version_t xattr_version; // file type bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; } @@ -338,6 +339,7 @@ struct inode_t { ::encode(version, bl); ::encode(file_data_version, bl); + ::encode(xattr_version, bl); } void decode(bufferlist::iterator &p) { ::decode(ino, p); @@ -365,6 +367,7 @@ struct inode_t { ::decode(version, p); ::decode(file_data_version, p); + ::decode(xattr_version, p); } }; WRITE_CLASS_ENCODER(inode_t) diff --git a/src/messages/MClientCaps.h b/src/messages/MClientCaps.h index 4fe1f7f9dc8b6..139f96016a51c 100644 --- a/src/messages/MClientCaps.h +++ b/src/messages/MClientCaps.h @@ -128,6 +128,9 @@ class MClientCaps : public Message { if (head.time_warp_seq) out << " tws " << head.time_warp_seq; + if (head.xattr_version) + out << " xattrs(v=" << head.xattr_version << " l=" << xattrbl.length() << ")"; + out << ")"; } @@ -139,6 +142,7 @@ class MClientCaps : public Message { } void encode_payload() { head.snap_trace_len = snapbl.length(); + head.xattr_len = xattrbl.length(); ::encode(head, payload); ::encode_nohead(snapbl, payload); ::encode_nohead(xattrbl, payload); -- 2.39.5