From 8276576ce28e5c4c76374872b26f316291718b1f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 3 Jan 2009 11:33:35 -0800 Subject: [PATCH] mds: issue RDCACHE caps to client when possible. add cap to do so if inode auth. --- src/TODO | 6 +++++- src/include/ceph_fs.h | 5 +++++ src/mds/CInode.cc | 27 ++++++++++++++++++++++++--- src/mds/CInode.h | 3 ++- src/mds/Locker.cc | 26 ++++++++++++++++---------- src/mds/Server.cc | 8 ++++---- 6 files changed, 56 insertions(+), 19 deletions(-) diff --git a/src/TODO b/src/TODO index f3e18ad8f7fea..c027e1785cd7d 100644 --- a/src/TODO +++ b/src/TODO @@ -105,6 +105,11 @@ userspace client - fix readdir vs fragment race by keeping a separate frag pos, and ignoring dentries below it mds +- caps + - make unwanted caps time out, like a lease + - simplify ro cap release + - share cap across multiple locks + - dentry caps? - xlock vs wrlock.. need more lock states? - dftlock is missing from rejoin phase - file size recovery gives (wrong) 4MB-increment results? @@ -166,7 +171,6 @@ crush - allow forcefeed for more complicated rule structures. (e.g. make force_stack a list< set >) osd -- 'sync' op should kick store to do an immediate commit - pg split should be a work queue - pg split needs to fix up pg stats. this is tricky with the clone overlap business... - generalize ack semantics? or just change ack from memory to journal? memory/journal/disk... diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 3321e6b15e74f..e9a12a8d8f542 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -937,6 +937,11 @@ static inline int ceph_flags_to_mode(int flags) CEPH_CAP_FILE_RDCACHE | \ CEPH_CAP_XATTR_RDCACHE) +#define CEPH_CAP_ANY_RD (CEPH_CAP_AUTH_RDCACHE | \ + CEPH_CAP_LINK_RDCACHE | \ + CEPH_CAP_XATTR_RDCACHE | \ + CEPH_CAP_FILE_RDCACHE | CEPH_CAP_FILE_RD) + #define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL | \ CEPH_CAP_LINK_EXCL | \ CEPH_CAP_XATTR_EXCL | \ diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 2f12ba7eba259..fa6d9500a42f7 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1277,8 +1277,11 @@ void CInode::decode_snap_blob(bufferlist& snapbl) } -bool CInode::encode_inodestat(bufferlist& bl, Capability *cap, snapid_t snapid, bool projected) +bool CInode::encode_inodestat(bufferlist& bl, Session *session, + snapid_t snapid, bool projected) { + int client = session->inst.name.num(); + bool valid = true; // pick a version! @@ -1338,14 +1341,32 @@ bool CInode::encode_inodestat(bufferlist& bl, Capability *cap, snapid_t snapid, e.fragtree.nsplits = dirfragtree._splits.size(); // include capability? - if (snapid != CEPH_NOSNAP) { + Capability *cap = get_client_cap(client); + if (snapid != CEPH_NOSNAP && !cap) { e.cap.caps = valid ? get_caps_allowed(false) : CEPH_STAT_CAP_INODE; e.cap.seq = 0; e.cap.mseq = 0; e.cap.realm = 0; } else { + if (valid && !cap && is_auth()) { + // add a new cap + cap = add_client_cap(client, find_snaprealm()); + session->touch_cap(cap); + } if (cap && valid) { - e.cap.caps = cap->pending(); + bool loner = (get_loner() == client); + int issue = (cap->wanted() | CEPH_CAP_ANY_RD) & get_caps_allowed(loner); + int pending = cap->pending(); + if (issue & ~pending) { + dout(10) << "encode_inodestat issuing " << ccap_string(issue) + << ", pending was " << ccap_string(pending) << dendl; + cap->issue(issue); + pending = issue; + } else { + dout(10) << "encode_inodestat wanted to issue " << ccap_string(issue) + << ", already pending " << ccap_string(pending) << dendl; + } + e.cap.caps = pending; e.cap.seq = cap->get_last_seq(); e.cap.mseq = cap->get_mseq(); e.cap.realm = find_snaprealm()->inode->ino(); diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 7b9081013ad04..c7842ee4f560b 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -46,6 +46,7 @@ class CInode; class MDCache; class LogSegment; class SnapRealm; +class Session; ostream& operator<<(ostream& out, CInode& in); @@ -419,7 +420,7 @@ private: // for giving to clients - bool encode_inodestat(bufferlist& bl, Capability *cap, snapid_t snapid=CEPH_NOSNAP, bool projected=false); + bool encode_inodestat(bufferlist& bl, Session *session, snapid_t snapid=CEPH_NOSNAP, bool projected=false); // -- locks -- diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 6d93884851742..4ae34b2614119 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -553,11 +553,11 @@ Capability* Locker::issue_new_caps(CInode *in, } // issue caps (pot. incl new one) - issue_caps(in); // note: _eval above may have done this already... + //issue_caps(in); // note: _eval above may have done this already... // re-issue whatever we can - cap->issue(cap->pending()); - cap->set_last_open(); + //cap->issue(cap->pending()); + //cap->set_last_open(); // not used, atm. return cap; } @@ -613,31 +613,37 @@ bool Locker::issue_caps(CInode *in) int pending = cap->pending(); allowed &= ~careful | pending; // only allow "careful" bits if already issued + int wanted = cap->wanted(); + dout(20) << " client" << it->first << " pending " << ccap_string(pending) << " allowed " << ccap_string(allowed) - << " wanted " << ccap_string(cap->wanted()) + << " wanted " << ccap_string(wanted) << dendl; - if (cap->pending() != (cap->wanted() & allowed)) { + if (pending != (wanted & allowed)) { // issue nissued++; - int before = cap->pending(); - long seq = cap->issue(cap->wanted() & allowed); + // include caps that clients generally like, while we're at it. + int likes = CEPH_CAP_ANY_RD; + + int before = pending; + long seq = cap->issue((wanted|likes) & allowed); + int after = cap->pending(); if (seq > 0 && !cap->is_suppress()) { dout(7) << " sending MClientCaps to client" << it->first << " seq " << cap->get_last_seq() - << " new pending " << ccap_string(cap->pending()) << " was " << ccap_string(before) + << " new pending " << ccap_string(after) << " was " << ccap_string(before) << dendl; mds->send_message_client(new MClientCaps(CEPH_CAP_OP_GRANT, in->inode, in->find_snaprealm()->inode->ino(), cap->get_last_seq(), - cap->pending(), - cap->wanted(), + after, + wanted, cap->get_mseq()), it->first); } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 95465dc2b6e82..23389688cf1e7 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -691,7 +691,7 @@ void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in, C inode: numi++; - in->encode_inodestat(bl, in->get_client_cap(client), snapid, projected); + in->encode_inodestat(bl, session, snapid, projected); dout(20) << "set_trace_dist added snapid " << snapid << " " << *in << dendl; if (snapid != CEPH_NOSNAP && in == snapdiri) { @@ -705,7 +705,7 @@ void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in, C // back to the live tree snapid = CEPH_NOSNAP; - in->encode_inodestat(bl, in->get_client_cap(client), snapid, false); + in->encode_inodestat(bl, session, snapid, false); numi++; dout(20) << "set_trace_dist added snapid " << snapid << " " << *in << dendl; @@ -2230,7 +2230,7 @@ void Server::handle_client_readdir(MDRequest *mdr) // inode dout(12) << "including inode " << *in << dendl; - bool valid = in->encode_inodestat(dnbl, in->get_client_cap(client), snapid); + bool valid = in->encode_inodestat(dnbl, mdr->session, snapid); assert(valid); numfiles++; @@ -5167,7 +5167,7 @@ void Server::handle_client_lssnap(MDRequest *mdr) else ::encode(p->second->get_long_name(), dnbl); encode_infinite_lease(dnbl); - diri->encode_inodestat(dnbl, NULL, p->first); + diri->encode_inodestat(dnbl, mdr->session, p->first); mds->locker->issue_client_lease(diri, client, dnbl, now, mdr->session); num++; } -- 2.39.5