From 0a7e55946a0dc8204e6ffb895c487191da22df8b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 15 Jun 2009 16:16:57 -0700 Subject: [PATCH] kclient: fix di->off calculation The dentry dir offset calculation wasn't taking into account the possibility of multiple readdi requests, which in turn meant bad results for readdir-from-dcache. Since doing this on the client side was a mess, the MDS includes a dentry offset for each readdir dentry within the dirfrag. This value is stored in di->offset (with adjustment in leftmost frag for . and ..), and that's the value that's passed back via filldir. --- src/client/Client.cc | 2 ++ src/include/ceph_fs.h | 2 +- src/kernel/dir.c | 10 ++++++---- src/kernel/inode.c | 4 ++-- src/kernel/mds_client.c | 8 ++++++-- src/kernel/mds_client.h | 1 + src/mds/Server.cc | 9 ++++++--- 7 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index dea62357915ed..4cd6ffc7b4c2f 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -638,6 +638,8 @@ Inode* Client::insert_trace(MetaRequest *request, utime_t from, int mds) string dname; LeaseStat dlease; while (numdn) { + __u32 pos; // dentry pos within the fragment + ::decode(pos, p); ::decode(dname, p); ::decode(dlease, p); InodeStat ist(p); diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 3be87ed6b2022..d443ada1bef56 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -27,7 +27,7 @@ #define CEPH_MDS_PROTOCOL 9 /* cluster internal */ #define CEPH_MON_PROTOCOL 4 /* cluster internal */ #define CEPH_OSDC_PROTOCOL 17 /* public/client */ -#define CEPH_MDSC_PROTOCOL 21 /* public/client */ +#define CEPH_MDSC_PROTOCOL 22 /* public/client */ #define CEPH_MONC_PROTOCOL 12 /* public/client */ diff --git a/src/kernel/dir.c b/src/kernel/dir.c index fdeff4b1e242c..c0fca4da62609 100644 --- a/src/kernel/dir.c +++ b/src/kernel/dir.c @@ -279,22 +279,24 @@ more: dout(10, "readdir frag %x num %d off %d fragoff %d skew %d\n", frag, rinfo->dir_nr, off, fi->off, skew); while (off >= skew && off+skew < rinfo->dir_nr) { - dout(10, "readdir off %d -> %d / %d name '%.*s'\n", + u64 pos = ceph_make_fpos(frag, rinfo->dir_pos[off+skew]); + + dout(10, "readdir off %d -> %d / %d %lld name '%.*s'\n", off, off+skew, - rinfo->dir_nr, rinfo->dir_dname_len[off+skew], + rinfo->dir_nr, pos, rinfo->dir_dname_len[off+skew], rinfo->dir_dname[off+skew]); ftype = le32_to_cpu(rinfo->dir_in[off+skew].in->mode) >> 12; if (filldir(dirent, rinfo->dir_dname[off+skew], rinfo->dir_dname_len[off+skew], - ceph_make_fpos(frag, off), + pos, le64_to_cpu(rinfo->dir_in[off+skew].in->ino), ftype) < 0) { dout(20, "filldir stopping us...\n"); return 0; } off++; - filp->f_pos++; + filp->f_pos = pos + 1; } if (fi->last_name) { diff --git a/src/kernel/inode.c b/src/kernel/inode.c index 7c2a90c892d2a..6c03b50212d05 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -1107,8 +1107,8 @@ retry_lookup: } di = dn->d_fsdata; - di->offset = ceph_make_fpos(frag, - i + (frag_is_leftmost(frag) ? 2 : 0)); + di->offset = ceph_make_fpos(frag, rinfo->dir_pos[i] + + (frag_is_leftmost(frag) ? 2 : 0)); /* inode */ if (dn->d_inode) { diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index 68a82aa24e58a..3d949a288496a 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -136,19 +136,23 @@ static int parse_reply_info_dir(void **p, void *end, info->dir_in = kmalloc(num * (sizeof(*info->dir_in) + sizeof(*info->dir_dname) + sizeof(*info->dir_dname_len) + + sizeof(*info->dir_pos) + sizeof(*info->dir_dlease)), GFP_NOFS); if (info->dir_in == NULL) { err = -ENOMEM; goto out_bad; } - info->dir_dname = (void *)(info->dir_in + num); + info->dir_pos = (void *)(info->dir_in + num); + info->dir_dname = (void *)(info->dir_pos + num); info->dir_dname_len = (void *)(info->dir_dname + num); info->dir_dlease = (void *)(info->dir_dname_len + num); while (num) { /* dentry */ - ceph_decode_32_safe(p, end, info->dir_dname_len[i], bad); + ceph_decode_need(p, end, sizeof(u32)*2, bad); + ceph_decode_32(p, info->dir_pos[i]); + ceph_decode_32(p, info->dir_dname_len[i]); ceph_decode_need(p, end, info->dir_dname_len[i], bad); info->dir_dname[i] = *p; *p += info->dir_dname_len[i]; diff --git a/src/kernel/mds_client.h b/src/kernel/mds_client.h index 4316c5b2cbe43..fe0906973056a 100644 --- a/src/kernel/mds_client.h +++ b/src/kernel/mds_client.h @@ -86,6 +86,7 @@ struct ceph_mds_reply_info_parsed { u32 *dir_dname_len; struct ceph_mds_reply_lease **dir_dlease; struct ceph_mds_reply_info_in *dir_in; + u32 *dir_pos; u8 dir_complete, dir_end; /* encoded blob describing snapshot contexts for certain diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 4212f20f51b44..fe39108acd429 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2143,13 +2143,11 @@ void Server::handle_client_readdir(MDRequest *mdr) __u32 numfiles = 0; + __u32 pos = 0; while (it != dir->end() && numfiles < max) { CDentry *dn = it->second; it++; - if (offset && strcmp(dn->get_name().c_str(), offset) <= 0) - continue; - if (dn->state_test(CDentry::STATE_PURGING)) continue; @@ -2168,6 +2166,10 @@ void Server::handle_client_readdir(MDRequest *mdr) if (dn->last < snapid || dn->first > snapid) continue; + __u32 dpos = pos++; + if (offset && strcmp(dn->get_name().c_str(), offset) <= 0) + continue; + CInode *in = dnl->get_inode(); // remote link? @@ -2195,6 +2197,7 @@ void Server::handle_client_readdir(MDRequest *mdr) // dentry dout(12) << "including dn " << *dn << dendl; + ::encode(dpos, dnbl); ::encode(dn->name, dnbl); mds->locker->issue_client_lease(dn, client, dnbl, mdr->now, mdr->session); -- 2.39.5