From 60ec1cbe0ecc1e57d22c0eccffcb3b30faf7f5b9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 15 Jun 2009 15:35:10 -0700 Subject: [PATCH] kclient: fix I_COMPLETE The previous use of I_READDIR vs I_COMPLETE was flawed, mainly because the state was maintained on a per-inode basis, but readdir proceeds on a per-file basis. Instead of flags, maintain a counter in the inode that is incremented each time a dentry is released. When readdir starts, note the counter, and if it is the same when readdir completes, AND we did not do any forward seeks on the file handle, AND prepopulate succeeded on each hunk, then we can set I_COMPLETE. --- src/kernel/dir.c | 28 +++++++++++++++------------- src/kernel/inode.c | 5 ++++- src/kernel/mds_client.h | 2 ++ src/kernel/super.h | 3 ++- src/mds/Server.cc | 2 +- 5 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/kernel/dir.c b/src/kernel/dir.c index 0c472e7ad39e3..fdeff4b1e242c 100644 --- a/src/kernel/dir.c +++ b/src/kernel/dir.c @@ -157,7 +157,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) int err; u32 ftype; struct ceph_mds_reply_info_parsed *rinfo; - int complete = 0, len; + int len; const int max_entries = client->mount_args.max_readdir; dout(5, "readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); @@ -165,8 +165,8 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) return 0; if (filp->f_pos == 0) { - /* set I_READDIR at start of readdir */ - ceph_i_set(inode, CEPH_I_READDIR); + /* note dir version at start of readdir */ + fi->dir_release_count = ci->i_release_count; dout(10, "readdir off 0 -> '.'\n"); if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0), @@ -242,8 +242,10 @@ more: (int)req->r_reply_info.dir_end, (int)req->r_reply_info.dir_complete); - if (req->r_reply_info.dir_complete) - complete = 1; + if (!req->r_did_prepopulate) { + dout(10, "readdir !did_prepopulate"); + fi->dir_release_count--; + } fi->off = fi->next_off; kfree(fi->last_name); @@ -312,15 +314,14 @@ more: fi->at_end = 1; /* - * if I_READDIR is still set, no dentries were released - * during the whole readdir, and we should have the complete - * dir contents in our cache. + * if dir_release_count still matches the dir, no dentries + * were released during the whole readdir, and we should have + * the complete dir contents in our cache. */ spin_lock(&inode->i_lock); - if (complete && (ci->i_ceph_flags & CEPH_I_READDIR)) { + if (ci->i_release_count == fi->dir_release_count) { dout(10, " marking %p complete\n", inode); ci->i_ceph_flags |= CEPH_I_COMPLETE; - ci->i_ceph_flags &= ~CEPH_I_READDIR; ci->i_max_offset = filp->f_pos; } spin_unlock(&inode->i_lock); @@ -364,9 +365,9 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) fi->at_end = 0; } - /* clear I_READDIR if we did a forward seek */ + /* bump dir_release_count if we did a forward seek */ if (offset > old_offset) - ceph_inode(inode)->i_ceph_flags &= ~CEPH_I_READDIR; + fi->dir_release_count--; } mutex_unlock(&inode->i_mutex); return retval; @@ -903,7 +904,8 @@ static void ceph_dentry_release(struct dentry *dentry) if (ci->i_rdcache_gen == di->lease_rdcache_gen) { dout(10, " clearing %p complete (d_release)\n", parent_inode); - ci->i_ceph_flags &= ~(CEPH_I_COMPLETE|CEPH_I_READDIR); + ci->i_ceph_flags &= ~CEPH_I_COMPLETE; + ci->i_release_count++; } spin_unlock(&parent_inode->i_lock); } diff --git a/src/kernel/inode.c b/src/kernel/inode.c index 5473ab6455289..7c2a90c892d2a 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -251,6 +251,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_version = 0; ci->i_time_warp_seq = 0; ci->i_ceph_flags = 0; + ci->i_release_count = 0; ci->i_symlink = NULL; ci->i_fragtree = RB_ROOT; @@ -854,7 +855,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, ceph_inode(req->r_locked_dir); dout(10, " clearing %p complete (empty trace)\n", req->r_locked_dir); - ci->i_ceph_flags &= ~(CEPH_I_READDIR | CEPH_I_COMPLETE); + ci->i_ceph_flags &= ~CEPH_I_COMPLETE; + ci->i_release_count++; } return 0; } @@ -1134,6 +1136,7 @@ retry_lookup: req->r_session, req->r_request_started); dput(dn); } + req->r_did_prepopulate = true; out: if (snapdir) { diff --git a/src/kernel/mds_client.h b/src/kernel/mds_client.h index 4661e35cd2cbe..4316c5b2cbe43 100644 --- a/src/kernel/mds_client.h +++ b/src/kernel/mds_client.h @@ -220,6 +220,8 @@ struct ceph_mds_request { struct list_head r_unsafe_item; /* per-session unsafe list item */ bool r_got_unsafe, r_got_safe; + bool r_did_prepopulate; + struct ceph_cap_reservation r_caps_reservation; int r_num_caps; }; diff --git a/src/kernel/super.h b/src/kernel/super.h index 4651284acef19..4a0381c009d0f 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -274,7 +274,6 @@ struct ceph_inode_xattrs_info { * Ceph inode. */ #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ -#define CEPH_I_READDIR 2 /* no dentries trimmed since readdir start */ #define CEPH_I_NODELAY 4 /* do not delay cap release */ #define CEPH_I_FLUSH 8 /* do not delay cap send */ @@ -285,6 +284,7 @@ struct ceph_inode_info { u32 i_time_warp_seq; unsigned i_ceph_flags; + unsigned long i_release_count; struct ceph_file_layout i_layout; char *i_symlink; @@ -622,6 +622,7 @@ struct ceph_file_info { unsigned next_off; struct dentry *dentry; int at_end; + unsigned long dir_release_count; /* used for -o dirstat read() on directory thing */ char *dir_info; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index a894243b1021f..4212f20f51b44 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2209,7 +2209,7 @@ void Server::handle_client_readdir(MDRequest *mdr) } __u8 end = (it == dir->end()); - __u8 complete = (end && !offset); + __u8 complete = (end && !offset); // FIXME: what purpose does this serve // final blob bufferlist dirbl; -- 2.39.5