From 6612493d769dc517f1a76fe4831ecba37d643e8e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 27 Mar 2009 12:54:01 -0700 Subject: [PATCH] kclient: handle mds replies with no trace. Passes posix test suite. --- src/kernel/dir.c | 52 +++++++++++++++++++++++++++++----------------- src/kernel/file.c | 2 ++ src/kernel/inode.c | 42 ++++++++++++++++++++++++++----------- src/kernel/super.h | 1 + 4 files changed, 66 insertions(+), 31 deletions(-) diff --git a/src/kernel/dir.c b/src/kernel/dir.c index 77c540fe6b9e1..b26d98ab469bb 100644 --- a/src/kernel/dir.c +++ b/src/kernel/dir.c @@ -328,6 +328,29 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, return dentry; } +/* + * If we do a create but get no trace back from the MDS, follow up with + * a lookup (the VFS expects us to link up the provided dentry). + */ +int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) +{ + struct dentry *result = ceph_lookup(dir, dentry, NULL); + + if (result && !IS_ERR(result)) { + /* + * We created the item, then did a lookup, and found + * it was already linked to another inode we already + * had in our cache (and thus got spliced). Link our + * dentry to that inode, but don't hash it, just in + * case the VFS wants to dereference it. + */ + BUG_ON(!result->d_inode); + d_instantiate(dentry, result->d_inode); + return 0; + } + return PTR_ERR(result); +} + static int ceph_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) { @@ -353,13 +376,8 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, if (!ceph_caps_issued_mask(ceph_inode(dir), CEPH_CAP_FILE_EXCL)) ceph_release_caps(dir, CEPH_CAP_FILE_RDCACHE); err = ceph_mdsc_do_request(mdsc, dir, req); - if (!err && !req->r_reply_info.head->is_dentry) { - /* - * no trace. do lookup, in case we are called from create - * and the VFS needs a valid dentry. - */ - err = ceph_do_getattr(dentry, CEPH_STAT_CAP_INODE_ALL); - } + if (!err && !req->r_reply_info.head->is_dentry) + err = ceph_handle_notrace_create(dir, dentry); ceph_mdsc_put_request(req); if (err) d_drop(dentry); @@ -411,6 +429,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, if (!ceph_caps_issued_mask(ceph_inode(dir), CEPH_CAP_FILE_EXCL)) ceph_release_caps(dir, CEPH_CAP_FILE_RDCACHE); err = ceph_mdsc_do_request(mdsc, dir, req); + if (!err && !req->r_reply_info.head->is_dentry) + err = ceph_handle_notrace_create(dir, dentry); ceph_mdsc_put_request(req); if (err) d_drop(dentry); @@ -448,6 +468,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (!ceph_caps_issued_mask(ceph_inode(dir), CEPH_CAP_FILE_EXCL)) ceph_release_caps(dir, CEPH_CAP_FILE_RDCACHE); err = ceph_mdsc_do_request(mdsc, dir, req); + if (!err && !req->r_reply_info.head->is_dentry) + err = ceph_handle_notrace_create(dir, dentry); ceph_mdsc_put_request(req); out: if (err < 0) @@ -482,14 +504,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, err = ceph_mdsc_do_request(mdsc, dir, req); if (err) { d_drop(dentry); - } else if (!req->r_reply_info.head->is_dentry) { - /* no trace */ - struct inode *inode = old_dentry->d_inode; - inc_nlink(inode); - atomic_inc(&inode->i_count); - dget(dentry); - d_instantiate(dentry, inode); - } + } else if (!req->r_reply_info.head->is_dentry) + d_instantiate(dentry, igrab(old_dentry->d_inode)); ceph_mdsc_put_request(req); return err; } @@ -531,6 +547,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) ceph_mdsc_lease_release(mdsc, dir, dentry, CEPH_LOCK_DN); ceph_release_caps(inode, CEPH_CAP_LINK_RDCACHE); err = ceph_mdsc_do_request(mdsc, dir, req); + if (!err && !req->r_reply_info.head->is_dentry) + d_delete(dentry); ceph_mdsc_put_request(req); out: return err; @@ -567,14 +585,10 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, err = ceph_mdsc_do_request(mdsc, old_dir, req); if (!err && !req->r_reply_info.head->is_dentry) { /* - * no trace - * * Normally d_move() is done by fill_trace (called by * do_request, above). If there is no trace, we need * to do it here. */ - if (new_dentry->d_inode) - dput(new_dentry); d_move(old_dentry, new_dentry); } ceph_mdsc_put_request(req); diff --git a/src/kernel/file.c b/src/kernel/file.c index 110d5ff54b67a..573dfbe70ab57 100644 --- a/src/kernel/file.c +++ b/src/kernel/file.c @@ -196,6 +196,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, req->r_locked_dir = dir; /* caller holds dir->i_mutex */ err = ceph_mdsc_do_request(mdsc, parent_inode, req); dentry = ceph_finish_lookup(req, dentry, err); + if (!err && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) + err = ceph_handle_notrace_create(dir, dentry); if (!err) err = ceph_init_file(req->r_dentry->d_inode, file, req->r_fmode); diff --git a/src/kernel/inode.c b/src/kernel/inode.c index cc6384d15e87e..41adb67f03a1c 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -786,17 +786,35 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, #if 0 /* - * if we resend completed ops to a recovering mds, we get no - * trace. pretend this is the case to ensure the 'no trace' - * handlers in the callers behave. + * Debugging hook: + * + * If we resend completed ops to a recovering mds, we get no + * trace. Since that is very rare, pretend this is the case + * to ensure the 'no trace' handlers in the callers behave. + * + * Fill in inodes unconditionally to avoid breaking cap + * invariants. */ if (rinfo->head->op & CEPH_MDS_OP_WRITE) { - dout(0, "fill_trace faking empty trace on %d %s\n", - rinfo->head->op, - ceph_mds_op_name(rinfo->head->op)); - rinfo->trace_numi = 0; - rinfo->trace_numd = 0; - return 0; + dout(0, "fill_trace faking empty trace on %lld %s\n", + req->r_tid, ceph_mds_op_name(rinfo->head->op)); + if (rinfo->head->is_dentry) { + rinfo->head->is_dentry = 0; + err = fill_inode(req->r_locked_dir, + &rinfo->diri, rinfo->dirfrag, + session, req->r_request_started, -1); + } + if (rinfo->head->is_target) { + rinfo->head->is_target = 0; + ininfo = rinfo->targeti.in; + vino.ino = le64_to_cpu(ininfo->ino); + vino.snap = le64_to_cpu(ininfo->snapid); + in = ceph_get_inode(sb, vino); + err = fill_inode(in, &rinfo->targeti, NULL, + session, req->r_request_started, + req->r_fmode); + iput(in); + } } #endif @@ -815,7 +833,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, /* * update a dentry? */ - if (req->r_locked_dir) { + if (rinfo->head->is_dentry) { /* * lookup link rename : null -> possibly existing inode * mknod symlink mkdir : null -> new inode @@ -826,7 +844,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, bool have_dir_cap, have_lease; BUG_ON(!dn); - BUG_ON(!rinfo->head->is_dentry); + BUG_ON(!dir); BUG_ON(dn->d_parent->d_inode != dir); BUG_ON(ceph_ino(dir) != le64_to_cpu(rinfo->diri.in->ino)); @@ -894,7 +912,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, vino.ino = le64_to_cpu(ininfo->ino); vino.snap = le64_to_cpu(ininfo->snapid); if (!dn->d_inode) { - in = ceph_get_inode(dn->d_sb, vino); + in = ceph_get_inode(sb, vino); if (IS_ERR(in)) { derr(30, "get_inode badness\n"); err = PTR_ERR(in); diff --git a/src/kernel/super.h b/src/kernel/super.h index d076203f1c32e..c82948a33852f 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -781,6 +781,7 @@ extern const struct inode_operations ceph_dir_iops; extern struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, ceph_snapdir_dentry_ops; +extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, struct dentry *dentry, int err); -- 2.39.5