From 4c89deb8c59a3d7512f385020e03cd7ebd88fe78 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 1 Apr 2009 14:52:13 -0700 Subject: [PATCH] mds: replace FINDINODE with LOOKUPHASH The nfs export code still needs to be fixed. --- src/TODO | 2 +- src/include/ceph_fs.h | 6 +-- src/kernel/debugfs.c | 3 +- src/kernel/export.c | 2 +- src/kernel/mds_client.c | 85 +++++++++++++++-------------------- src/mds/MDCache.cc | 70 ----------------------------- src/mds/MDCache.h | 2 - src/mds/Server.cc | 46 +++++++++++++++---- src/mds/Server.h | 2 +- src/messages/MClientRequest.h | 23 ++-------- 10 files changed, 83 insertions(+), 158 deletions(-) diff --git a/src/TODO b/src/TODO index b486cce23309b..c4574011eb71e 100644 --- a/src/TODO +++ b/src/TODO @@ -52,7 +52,7 @@ kclient caps /- size limit on readdir result, partial dirfrag readdir /- revisit unmount /- make request paths relative to a non-snapshotted inode. - +- fix nfs exportin - ENOSPC - flock diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 9193f25dbf0ee..2cbfc1fcc5949 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -693,12 +693,10 @@ struct ceph_mds_session_head { */ #define CEPH_MDS_OP_WRITE 0x001000 #define CEPH_MDS_OP_FOLLOW_LINK 0x010000 -#define CEPH_MDS_OP_INO_PATH 0x100000 enum { - CEPH_MDS_OP_FINDINODE = 0x100100, - CEPH_MDS_OP_LOOKUP = 0x00100, CEPH_MDS_OP_GETATTR = 0x00101, + CEPH_MDS_OP_LOOKUPHASH = 0x00102, CEPH_MDS_OP_SETXATTR = 0x01105, CEPH_MDS_OP_RMXATTR = 0x01106, CEPH_MDS_OP_SETLAYOUT = 0x01107, @@ -723,8 +721,8 @@ enum { static inline const char *ceph_mds_op_name(int op) { switch (op) { - case CEPH_MDS_OP_FINDINODE: return "findinode"; case CEPH_MDS_OP_LOOKUP: return "lookup"; + case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; case CEPH_MDS_OP_GETATTR: return "getattr"; case CEPH_MDS_OP_SETXATTR: return "setxattr"; case CEPH_MDS_OP_SETATTR: return "setattr"; diff --git a/src/kernel/debugfs.c b/src/kernel/debugfs.c index 10a892bc39ac8..97f637e430196 100644 --- a/src/kernel/debugfs.c +++ b/src/kernel/debugfs.c @@ -300,8 +300,7 @@ static int mdsc_show(struct seq_file *s, void *p) seq_printf(s, " %s", path); kfree(path); } - } else if (req->r_path2 && - req->r_op != CEPH_MDS_OP_FINDINODE) { + } else if (req->r_path2) { seq_printf(s, " %s", req->r_path2); } diff --git a/src/kernel/export.c b/src/kernel/export.c index ffc06d05fd5f1..ba09cbcbd63c1 100644 --- a/src/kernel/export.c +++ b/src/kernel/export.c @@ -81,7 +81,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, if (!inode) { struct ceph_mds_request *req; derr(10, "fh_to_dentry %llx.%x -- no inode\n", vino.ino, hash); - req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_FINDINODE, + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH, USE_ANY_MDS); if (IS_ERR(req)) return ERR_PTR(PTR_ERR(req)); diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index b9a2641894318..fb0b8b72a7f56 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -1006,52 +1006,45 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, int pathlen; int freepath1 = 0, freepath2 = 0; void *p, *end; - u32 fhlen = 0; - if (req->r_op == CEPH_MDS_OP_FINDINODE) { - fhlen = *(int *)req->r_path2; - path2 = NULL; - pathlen = sizeof(u32) + fhlen*sizeof(struct ceph_inopath_item); - } else { - if (req->r_inode) { - freepath1 = build_inode_path(req->r_inode, &path1, - &pathlen1, &ino1); - dout(10, "create_request_message inode %p %llx.%llx\n", - req->r_inode, ceph_ino(req->r_inode), - ceph_snap(req->r_inode)); - } else if (req->r_dentry) { - freepath1 = build_dentry_path(req->r_dentry, &path1, - &pathlen1, &ino1); - dout(10, "create_request_message dentry %p %llx/%.*s\n", - req->r_dentry, ino1, pathlen1, path1); - } else if (path1) { - pathlen1 = strlen(path1); - dout(10, "create_request_message path1 %.*s\n", - pathlen1, path1); - } - if (freepath1 < 0) { - msg = ERR_PTR(freepath1); - goto out; - } + if (req->r_inode) { + freepath1 = build_inode_path(req->r_inode, &path1, + &pathlen1, &ino1); + dout(10, "create_request_message inode %p %llx.%llx\n", + req->r_inode, ceph_ino(req->r_inode), + ceph_snap(req->r_inode)); + } else if (req->r_dentry) { + freepath1 = build_dentry_path(req->r_dentry, &path1, + &pathlen1, &ino1); + dout(10, "create_request_message dentry %p %llx/%.*s\n", + req->r_dentry, ino1, pathlen1, path1); + } else if (path1) { + pathlen1 = strlen(path1); + dout(10, "create_request_message path1 %.*s\n", + pathlen1, path1); + } + if (freepath1 < 0) { + msg = ERR_PTR(freepath1); + goto out; + } - if (req->r_old_dentry) { - freepath2 = build_dentry_path(req->r_old_dentry, &path2, - &pathlen2, &ino2); - dout(10, "create_request_message dentry %p %llx/%.*s\n", - req->r_old_dentry, ino2, pathlen2, path2); - if (freepath2 < 0) { - msg = ERR_PTR(freepath2); - goto out_free1; - } - } else if (path2) { - pathlen2 = strlen(path2); - dout(10, "create_request_message path2 %.*s\n", - pathlen2, path2); + if (req->r_old_dentry) { + freepath2 = build_dentry_path(req->r_old_dentry, &path2, + &pathlen2, &ino2); + dout(10, "create_request_message dentry %p %llx/%.*s\n", + req->r_old_dentry, ino2, pathlen2, path2); + if (freepath2 < 0) { + msg = ERR_PTR(freepath2); + goto out_free1; } - - pathlen = pathlen1 + pathlen2 + 2*(sizeof(u32) + sizeof(u64)); + } else if (path2) { + pathlen2 = strlen(path2); + dout(10, "create_request_message path2 %.*s\n", + pathlen2, path2); } + pathlen = pathlen1 + pathlen2 + 2*(sizeof(u32) + sizeof(u64)); + msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, sizeof(*head) + pathlen, 0, 0, NULL); if (IS_ERR(msg)) @@ -1074,14 +1067,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, #endif head->args = req->r_args; - if (req->r_op == CEPH_MDS_OP_FINDINODE) { - ceph_encode_32(&p, fhlen); - memcpy(p, path1, fhlen * sizeof(struct ceph_inopath_item)); - p += fhlen * sizeof(struct ceph_inopath_item); - } else { - ceph_encode_filepath(&p, end, ino1, path1); - ceph_encode_filepath(&p, end, ino2, path2); - } + ceph_encode_filepath(&p, end, ino1, path1); + ceph_encode_filepath(&p, end, ino2, path2); BUG_ON(p != end); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index ab7b7f09b444f..1782897ba5780 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -5820,76 +5820,6 @@ bool MDCache::path_is_mine(filepath& path) -int MDCache::inopath_traverse(MDRequest *mdr, vector &inopath) -{ - dout(10) << "inopath_traverse mdr " << *mdr << " inopath " << inopath << dendl; - - // find first... - int i; - CInode *cur = 0; - for (i=0; i<(int)inopath.size(); i++) { - cur = get_inode(inodeno_t(inopath[i].ino)); - if (cur) break; - dout(10) << " don't have " << inopath[i].ino << dendl; - } - if (!cur) - return -ESTALE; - - if (i == 0) { - dout(10) << " found " << *cur << dendl; - mdr->pin(cur); - mdr->ref = cur; - return 0; // yay - } - - dout(10) << " have ancestor " << *cur << dendl; - - // load up subdir - if (!cur->is_dir()) - return -ENOTDIR; - - frag_t fg = cur->dirfragtree[inopath[i].dname_hash]; - dout(10) << " hash " << inopath[i].dname_hash << " is frag " << fg << dendl; - - CDir *curdir = cur->get_dirfrag(fg); - if (!curdir) { - if (cur->is_auth()) { - // parent dir frozen_dir? - if (cur->is_frozen_dir()) { - dout(7) << "inopath_traverse: " << *cur->get_parent_dir() << " is frozen_dir, waiting" << dendl; - cur->get_parent_dn()->get_dir()->add_waiter(CDir::WAIT_UNFREEZE, _get_waiter(mdr, 0)); - return 1; - } - curdir = cur->get_or_open_dirfrag(this, fg); - } else { - open_remote_dirfrag(cur, fg, _get_waiter(mdr, 0)); - return 1; - } - } - assert(curdir); - - // forward to dir auth? - if (!curdir->is_auth()) { - if (curdir->is_ambiguous_auth()) { - // wait - dout(7) << "traverse: waiting for single auth in " << *curdir << dendl; - curdir->add_waiter(CDir::WAIT_SINGLEAUTH, _get_waiter(mdr, 0)); - return 1; - } - request_forward(mdr, curdir->authority().first); - return 2; - } - - if (curdir->is_complete()) - return -ESTALE; // give up? :( we _could_ try other frags... - - touch_inode(cur); - curdir->fetch(_get_waiter(mdr, 0)); - return 1; -} - - - /** * path_traverse_to_dir -- traverse to deepest dir we have * diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index a7bd66014691a..809d86963d7a9 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -897,8 +897,6 @@ public: } CDir *path_traverse_to_dir(filepath& path); - int inopath_traverse(MDRequest *mdr, vector& inopath); - void open_remote_dirfrag(CInode *diri, frag_t fg, Context *fin); CInode *get_dentry_inode(CDentry *dn, MDRequest *mdr, bool projected=false); void open_remote_ino(inodeno_t ino, Context *fin, inodeno_t hadino=0, version_t hadv=0); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 46d1662fa9f65..3bc6071fa008b 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -852,8 +852,8 @@ void Server::dispatch_client_request(MDRequest *mdr) assert(mdr->more()->waiting_on_slave.empty()); switch (req->get_op()) { - case CEPH_MDS_OP_FINDINODE: - handle_client_findinode(mdr); + case CEPH_MDS_OP_LOOKUPHASH: + handle_client_lookup_hash(mdr); break; // inodes ops. @@ -1759,15 +1759,43 @@ void Server::handle_client_stat(MDRequest *mdr) } -void Server::handle_client_findinode(MDRequest *mdr) +void Server::handle_client_lookup_hash(MDRequest *mdr) { MClientRequest *req = mdr->client_request; - int r = mdcache->inopath_traverse(mdr, req->inopath); - if (r > 0) - return; // delayed - dout(10) << "reply to findinode on " << *mdr->ref << dendl; - MClientReply *reply = new MClientReply(req, r); - reply_request(mdr, reply); + + CInode *in = mdcache->get_inode(req->get_filepath().get_ino()); + if (!in) { + // try the directory + CInode *diri = mdcache->get_inode(req->get_filepath2().get_ino()); + if (!diri) { + reply_request(mdr, -ESTALE); + return; + } + unsigned hash = atoi(req->get_filepath2()[0].c_str()); + frag_t fg = diri->dirfragtree[hash]; + CDir *dir = diri->get_or_open_dirfrag(mdcache, fg); + assert(dir); + if (!dir->is_auth()) { + if (dir->is_ambiguous_auth()) { + // wait + dout(7) << " waiting for single auth in " << *dir << dendl; + dir->add_waiter(CDir::WAIT_SINGLEAUTH, new C_MDS_RetryRequest(mdcache, mdr)); + return; + } + mdcache->request_forward(mdr, dir->authority().first); + return; + } + if (!dir->is_complete()) { + dir->fetch(0, new C_MDS_RetryRequest(mdcache, mdr)); + return; + } + reply_request(mdr, -ESTALE); + return; + } + + dout(10) << "reply to lookup_hash on " << *in << dendl; + MClientReply *reply = new MClientReply(req, 0); + reply_request(mdr, reply, in, in->get_parent_dn()); } diff --git a/src/mds/Server.h b/src/mds/Server.h index 6f81432bf7e8c..efe2d56ce9081 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -117,7 +117,7 @@ public: // requests on existing inodes. void handle_client_stat(MDRequest *mdr); - void handle_client_findinode(MDRequest *mdr); + void handle_client_lookup_hash(MDRequest *mdr); void handle_client_readdir(MDRequest *mdr); void handle_client_setattr(MDRequest *mdr); diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h index 4e9705b9c495e..04996da05405b 100644 --- a/src/messages/MClientRequest.h +++ b/src/messages/MClientRequest.h @@ -46,17 +46,12 @@ // metadata ops. -static inline ostream& operator<<(ostream &out, const ceph_inopath_item &i) { - return out << i.ino << "." << i.dname_hash; -} - class MClientRequest : public Message { public: struct ceph_mds_request_head head; // path arguments filepath path, path2; - vector inopath; public: // cons @@ -139,22 +134,14 @@ public: void decode_payload() { bufferlist::iterator p = payload.begin(); ::decode(head, p); - if (head.op == CEPH_MDS_OP_FINDINODE) { - ::decode(inopath, p); - } else { - ::decode(path, p); - ::decode(path2, p); - } + ::decode(path, p); + ::decode(path2, p); } void encode_payload() { ::encode(head, payload); - if (head.op == CEPH_MDS_OP_FINDINODE) { - ::encode(inopath, payload); - } else { - ::encode(path, payload); - ::encode(path2, payload); - } + ::encode(path, payload); + ::encode(path2, payload); } const char *get_type_name() { return "creq"; } @@ -166,8 +153,6 @@ public: out << " " << get_filepath(); if (!get_filepath2().empty()) out << " " << get_filepath2(); - if (!inopath.empty()) - out << " " << inopath; if (head.retry_attempt) out << " RETRY=" << head.retry_attempt; out << ")"; -- 2.39.5