From 24694e36d91fa7c3744e74d2189d2585e14312f9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 8 Apr 2009 14:40:46 -0700 Subject: [PATCH] mds: adjust mds client request format to include optional releases The goal is to release caps and/or dentry leases in the same message as the request we are dropping them for. We will already get caps/leases reissued with the response, in most cases. Kill the mds replication hack while we're at it. That should be cleaned up if/when it is reincarnated. --- src/client/Client.cc | 2 +- src/include/ceph_fs.h | 18 ++++++++------ src/include/cstring.h | 15 ++++++++++- src/include/types.h | 2 +- src/kernel/mds_client.c | 34 +++++++++++-------------- src/mds/MDCache.cc | 4 +++ src/mds/Server.cc | 12 ++++----- src/messages/MClientRequest.h | 47 ++++++++++++++++++++++++++--------- 8 files changed, 85 insertions(+), 49 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index ca8b0838739e7..59673c139ffda 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -922,7 +922,7 @@ void Client::send_request(MetaRequest *request, int mds) r->copy_payload(request->request_payload); r->decode_payload(); r->set_retry_attempt(request->retry_attempt); - r->set_num_dentries_wanted(1); + r->set_dentry_wanted(); } request->request = 0; diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index f43d07d26e02e..5f8a5ba392221 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -44,7 +44,7 @@ #define CEPH_MDS_PROTOCOL 9 /* cluster internal */ #define CEPH_MON_PROTOCOL 4 /* cluster internal */ #define CEPH_OSDC_PROTOCOL 6 /* public/client */ -#define CEPH_MDSC_PROTOCOL 17 /* public/client */ +#define CEPH_MDSC_PROTOCOL 18 /* public/client */ #define CEPH_MONC_PROTOCOL 11 /* public/client */ @@ -792,24 +792,26 @@ union ceph_mds_request_args { } __attribute__ ((packed)) setlayout; } __attribute__ ((packed)); -#define CEPH_MDS_REQUEST_REPLAY 0xffff +#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ +#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ struct ceph_mds_request_head { ceph_tid_t tid, oldest_client_tid; ceph_epoch_t mdsmap_epoch; /* on client */ - __le32 retry_attempt; /* REQUEST_REPLAY if replay */ - __le16 num_fwd; - __le16 num_dentries_wanted; - __le64 mds_wants_replica_in_dirino; + __le32 flags; + __u8 num_retry, num_fwd; + __u16 num_releases; __le32 op; __le32 caller_uid, caller_gid; __le64 ino; /* use this ino for openc, mkdir, mknod, etc. */ union ceph_mds_request_args args; } __attribute__ ((packed)); -struct ceph_inopath_item { +struct ceph_mds_request_release { __le64 ino; - __le32 dname_hash; + __le32 caps; + __le32 seq; + __le32 dname_len; /* if releasing a dentry lease, too. string follows. */ } __attribute__ ((packed)); /* client reply */ diff --git a/src/include/cstring.h b/src/include/cstring.h index eef7cda27716e..0307b854c4acf 100644 --- a/src/include/cstring.h +++ b/src/include/cstring.h @@ -106,9 +106,12 @@ class cstring { bl.append(_data, _len); } void decode(bufferlist::iterator &bl) { - if (_data) delete[] _data; __u32 l; ::decode(l, bl); + decode_nohead(l, bl); + } + void decode_nohead(int l, bufferlist::iterator& bl) { + if (_data) delete[] _data; _len = l; _data = new char[_len + 1]; bl.copy(_len, _data); @@ -117,4 +120,14 @@ class cstring { }; WRITE_CLASS_ENCODER(cstring) +inline void encode_nohead(const cstring& s, bufferlist& bl) +{ + bl.append(s.data(), s.length()); +} +inline void decode_nohead(int len, cstring& s, bufferlist::iterator& p) +{ + s.decode_nohead(len, p); +} + + #endif diff --git a/src/include/types.h b/src/include/types.h index 8fe5aace9c0df..76b4d4e08126a 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -201,6 +201,7 @@ WRITE_RAW_ENCODER(ceph_fsid_t) WRITE_RAW_ENCODER(ceph_file_layout) WRITE_RAW_ENCODER(ceph_client_ticket) WRITE_RAW_ENCODER(ceph_mds_request_head) +WRITE_RAW_ENCODER(ceph_mds_request_release) WRITE_RAW_ENCODER(ceph_mds_caps) WRITE_RAW_ENCODER(ceph_mds_cap_release) WRITE_RAW_ENCODER(ceph_mds_cap_item) @@ -212,7 +213,6 @@ WRITE_RAW_ENCODER(ceph_mds_reply_inode) WRITE_RAW_ENCODER(ceph_mds_cap_reconnect) WRITE_RAW_ENCODER(ceph_mds_snaprealm_reconnect) WRITE_RAW_ENCODER(ceph_frag_tree_split) -WRITE_RAW_ENCODER(ceph_inopath_item) WRITE_RAW_ENCODER(ceph_osd_request_head) WRITE_RAW_ENCODER(ceph_osd_reply_head) WRITE_RAW_ENCODER(ceph_osd_op) diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index f9f14636deb88..c05e7f23a98c6 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -1060,27 +1060,22 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, *ino = 0; if (rinode) { - *freepath = build_inode_path(rinode, ppath, - pathlen, ino); - dout(10, "create_request_message inode %p %llx.%llx\n", - rinode, ceph_ino(rinode), + *freepath = build_inode_path(rinode, ppath, pathlen, ino); + dout(10, " inode %p %llx.%llx\n", rinode, ceph_ino(rinode), ceph_snap(rinode)); } else if (rdentry) { - *freepath = build_dentry_path(rdentry, ppath, - pathlen, ino); - dout(10, "create_request_message dentry %p %llx/%.*s\n", - rdentry, *ino, *pathlen, *ppath); + *freepath = build_dentry_path(rdentry, ppath, pathlen, ino); + dout(10, " dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, + *ppath); } else if (rpath) { *ino = rino; *ppath = rpath; *pathlen = strlen(rpath); - dout(10, "create_request_message path %.*s\n", - *pathlen, rpath); + dout(10, " path %.*s\n", *pathlen, rpath); } if (*freepath < 0) return *freepath; - return 0; } @@ -1113,14 +1108,12 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ret = set_request_path_attr(NULL, req->r_old_dentry, req->r_path2, req->r_ino2.ino, &path2, &pathlen2, &ino2, &freepath2); - if (ret < 0) { msg = ERR_PTR(ret); goto out_free1; } pathlen = pathlen1 + pathlen2 + 2*(sizeof(u32) + sizeof(u64)); - msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, sizeof(*head) + pathlen, 0, 0, NULL); if (IS_ERR(msg)) @@ -1131,8 +1124,6 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, end = msg->front.iov_base + msg->front.iov_len; head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); - head->num_fwd = 0; - head->mds_wants_replica_in_dirino = 0; head->op = cpu_to_le32(req->r_op); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) head->caller_uid = cpu_to_le32(current_fsuid()); @@ -1143,6 +1134,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, #endif head->args = req->r_args; + head->num_releases = 0; ceph_encode_filepath(&p, end, ino1, path1); ceph_encode_filepath(&p, end, ino2, path2); @@ -1180,6 +1172,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, { struct ceph_mds_request_head *rhead; struct ceph_msg *msg; + int flags = 0; req->r_attempts++; dout(10, "prepare_send_request %p tid %lld %s (attempt %d)\n", req, @@ -1199,15 +1192,16 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, rhead = msg->front.iov_base; rhead->tid = cpu_to_le64(req->r_tid); - if (req->r_got_safe) - rhead->retry_attempt = cpu_to_le32(CEPH_MDS_REQUEST_REPLAY); - else - rhead->retry_attempt = cpu_to_le32(req->r_attempts - 1); rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc)); + if (req->r_got_safe) + flags |= CEPH_MDS_FLAG_REPLAY; + if (req->r_locked_dir) + flags |= CEPH_MDS_FLAG_WANT_DENTRY; + rhead->flags = cpu_to_le32(flags); rhead->num_fwd = cpu_to_le32(req->r_num_fwd); + rhead->num_retry = cpu_to_le32(req->r_attempts - 1); dout(20, " r_locked_dir = %p\n", req->r_locked_dir); - rhead->num_dentries_wanted = req->r_locked_dir ? 1:0; if (req->r_target_inode && req->r_got_unsafe) rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode)); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 7eed98fbe932b..57d5fd3da7479 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -5691,6 +5691,7 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who } // forwarder wants replicas? +#if 0 if (mdr && mdr->client_request && mdr->client_request->get_mds_wants_replica_in_dirino()) { dout(30) << "traverse: REP is here, " @@ -5721,6 +5722,7 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who } } } +#endif // add to trace, continue. trace.push_back(dn); @@ -5772,6 +5774,7 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who dout(7) << "traverse: forwarding, not auth for " << *curdir << dendl; +#if 0 // request replication? if (mdr && mdr->client_request && curdir->is_rep()) { dout(15) << "traverse: REP fw to mds" << dauth << ", requesting rep under " @@ -5779,6 +5782,7 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who mdr->client_request->set_mds_wants_replica_in_dirino(curdir->ino()); req->clear_payload(); // reencode! } +#endif if (mdr) request_forward(mdr, dauth.first); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 7a2e2167d080c..c90646ccf8d7c 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -585,7 +585,7 @@ void Server::early_reply(MDRequest *mdr, CInode *tracei, CDentry *tracedn) if (tracei || tracedn) set_trace_dist(mdr->session, reply, tracei, tracedn, mdr->ref_snapid, mdr->ref_snapdiri, mdr->client_request->is_replay(), - mdr->client_request->get_num_dentries_wanted()); + mdr->client_request->get_dentry_wanted()); messenger->send_message(reply, client_inst); @@ -649,7 +649,7 @@ void Server::reply_request(MDRequest *mdr, MClientReply *reply, CInode *tracei, Session *session = mdr->session; bool did_early_reply = mdr->did_early_reply; entity_inst_t client_inst = req->get_orig_source_inst(); - int num_dentries_wanted = req->get_num_dentries_wanted(); + int dentry_wanted = req->get_dentry_wanted(); mdcache->request_finish(mdr); mdr = 0; @@ -661,7 +661,7 @@ void Server::reply_request(MDRequest *mdr, MClientReply *reply, CInode *tracei, // send reply, with trace, and possible leases if (!did_early_reply && // don't issue leases if we sent an earlier reply already (tracei || tracedn)) - set_trace_dist(session, reply, tracei, tracedn, snapid, snapdiri, is_replay, num_dentries_wanted); + set_trace_dist(session, reply, tracei, tracedn, snapid, snapdiri, is_replay, dentry_wanted); messenger->send_message(reply, client_inst); } @@ -708,7 +708,7 @@ void Server::encode_null_lease(bufferlist& bl) void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in, CDentry *dn, snapid_t snapid, CInode *snapdiri, - bool is_replay, int num_dentries_wanted) + bool is_replay, int dentry_wanted) { // inode, dentry, dir, ..., inode bufferlist bl; @@ -718,7 +718,7 @@ void Server::set_trace_dist(Session *session, MClientReply *reply, dout(20) << "set_trace_dist snapid " << snapid << dendl; - //assert((bool)dn == (bool)num_dentries_wanted); // not true for snapshot lookups + //assert((bool)dn == (bool)dentry_wanted); // not true for snapshot lookups // realm SnapRealm *realm = 0; @@ -4913,7 +4913,7 @@ void Server::handle_client_open(MDRequest *mdr) mdr->client_request->get_orig_source().num()); CDentry *dn = 0; - if (req->get_num_dentries_wanted()) { + if (req->get_dentry_wanted()) { assert(mdr->trace.size()); dn = mdr->trace.back(); } diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h index 3a118893b4c17..62d29178bfe15 100644 --- a/src/messages/MClientRequest.h +++ b/src/messages/MClientRequest.h @@ -50,9 +50,27 @@ class MClientRequest : public Message { public: struct ceph_mds_request_head head; + struct Release { + mutable ceph_mds_request_release item; + nstring dname; + + void encode(bufferlist& bl) const { + item.dname_len = dname.length(); + ::encode(item, bl); + ::encode_nohead(dname, bl); + } + void decode(bufferlist::iterator& bl) { + ::decode(item, bl); + ::decode_nohead(item.dname_len, dname, bl); + } + }; + vector releases; + // path arguments filepath path, path2; + + public: // cons MClientRequest() : Message(CEPH_MSG_CLIENT_REQUEST) {} @@ -95,28 +113,31 @@ public: return false; } + int get_flags() { + return head.flags; + } bool is_replay() { - return head.retry_attempt == CEPH_MDS_REQUEST_REPLAY; + return get_flags() & CEPH_MDS_FLAG_REPLAY; } // normal fields void set_tid(tid_t t) { head.tid = t; } void set_oldest_client_tid(tid_t t) { head.oldest_client_tid = t; } void inc_num_fwd() { head.num_fwd = head.num_fwd + 1; } - void set_retry_attempt(int a) { head.retry_attempt = a; } + void set_retry_attempt(int a) { head.num_retry = a; } void set_filepath(const filepath& fp) { path = fp; } void set_filepath2(const filepath& fp) { path2 = fp; } void set_string2(const char *s) { path2.set_path(s, 0); } void set_caller_uid(unsigned u) { head.caller_uid = u; } void set_caller_gid(unsigned g) { head.caller_gid = g; } - void set_mds_wants_replica_in_dirino(inodeno_t dirino) { - head.mds_wants_replica_in_dirino = dirino; } - void set_num_dentries_wanted(int n) { head.num_dentries_wanted = n; } + void set_dentry_wanted() { + head.flags = head.flags | CEPH_MDS_FLAG_WANT_DENTRY; + } tid_t get_tid() { return head.tid; } tid_t get_oldest_client_tid() { return head.oldest_client_tid; } int get_num_fwd() { return head.num_fwd; } - int get_retry_attempt() { return head.retry_attempt; } + int get_retry_attempt() { return head.num_retry; } int get_op() { return head.op; } unsigned get_caller_uid() { return head.caller_uid; } unsigned get_caller_gid() { return head.caller_gid; } @@ -126,22 +147,22 @@ public: const string& get_path2() { return path2.get_path(); } filepath& get_filepath2() { return path2; } - inodeno_t get_mds_wants_replica_in_dirino() { - return inodeno_t(head.mds_wants_replica_in_dirino); - } - int get_num_dentries_wanted() { return head.num_dentries_wanted; } + int get_dentry_wanted() { return get_flags() & CEPH_MDS_FLAG_WANT_DENTRY; } void decode_payload() { bufferlist::iterator p = payload.begin(); ::decode(head, p); ::decode(path, p); ::decode(path2, p); + ::decode_nohead(head.num_releases, releases, p); } void encode_payload() { + head.num_releases = releases.size(); ::encode(head, payload); ::encode(path, payload); ::encode(path2, payload); + ::encode_nohead(releases, payload); } const char *get_type_name() { return "creq"; } @@ -153,11 +174,13 @@ public: out << " " << get_filepath(); if (!get_filepath2().empty()) out << " " << get_filepath2(); - if (head.retry_attempt) - out << " RETRY=" << head.retry_attempt; + if (head.num_retry) + out << " RETRY=" << head.num_retry; out << ")"; } }; +WRITE_CLASS_ENCODER(MClientRequest::Release) + #endif -- 2.39.5