From: Sage Weil Date: Sat, 10 May 2008 23:31:08 +0000 (-0700) Subject: kclient: make readdir handle fragments X-Git-Tag: v0.3~231 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ba6e926922b1f02c47b1b5b6701acf7d5ead7653;p=ceph.git kclient: make readdir handle fragments --- diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 6b6eed999a80..0853d276f6aa 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -74,6 +74,32 @@ static inline __u32 frag_bits(__u32 f) { return f >> 24; } static inline __u32 frag_value(__u32 f) { return f & 0xffffffu; } static inline __u32 frag_mask(__u32 f) { return 0xffffffu >> (24-frag_bits(f)); } static inline __u32 frag_next(__u32 f) { return frag_make(frag_bits(f), frag_value(f)+1); } +static inline bool frag_is_leftmost(__u32 f) { + return frag_value(f) == 0; +} +static inline bool frag_is_rightmost(__u32 f) { + return frag_value(f) == frag_mask(f); +} +static inline int frag_compare(__u32 a, __u32 b) { + unsigned va = frag_value(a); + unsigned vb = frag_value(b); + if (va < vb) + return -1; + if (va > vb) + return 1; + va = frag_bits(a); + vb = frag_bits(b); + if (va < vb) + return -1; + if (va > vb) + return 1; + return 0; +} +static inline bool frag_contains_value(__u32 f, __u32 v) +{ + return (v & frag_mask(f)) == frag_value(f); +} + /* * object layout - how objects are mapped into PGs diff --git a/src/include/frag.h b/src/include/frag.h index bd6e85289c14..50e441373df2 100644 --- a/src/include/frag.h +++ b/src/include/frag.h @@ -77,17 +77,16 @@ class frag_t { _frag_t _enc; frag_t() : _enc(0) { } - frag_t(unsigned v, unsigned b) : _enc((b << 24) + - (v & (0xffffffffULL >> (32-b)))) { } + frag_t(unsigned v, unsigned b) : _enc(frag_make(b, v)) { } frag_t(_frag_t e) : _enc(e) { } // constructors void from_unsigned(unsigned e) { _enc = e; } // accessors - unsigned value() const { return _enc & 0xffffff; } - unsigned bits() const { return _enc >> 24; } - unsigned mask() const { return 0xffffffffULL >> (32-bits()); } + unsigned value() const { return frag_value(_enc); } + unsigned bits() const { return frag_bits(_enc); } + unsigned mask() const { return frag_mask(_enc); } operator _frag_t() const { return _enc; } @@ -119,7 +118,7 @@ class frag_t { // binary splitting frag_t get_sibling() const { assert(!is_root()); - return frag_t(_enc ^ (1 << (bits()-1))); + return frag_t(value() ^ (1 << (bits()-1)), bits()); } bool is_left() const { return @@ -140,14 +139,14 @@ class frag_t { // sequencing bool is_leftmost() const { - return value() == 0; + return frag_is_leftmost(_enc); } bool is_rightmost() const { - return value() == mask(); + return frag_is_rightmost(_enc); } frag_t next() const { assert(!is_rightmost()); - return frag_t(value() + 1, bits()); + return frag_t(frag_next(_enc)); } }; @@ -221,13 +220,13 @@ public: std::list q; q.push_back(under); while (!q.empty()) { - frag_t t = q.front(); - q.pop_front(); + frag_t t = q.back(); + q.pop_back(); int nb = get_split(t); if (nb) t.split(nb, q); // queue up children else - ls.push_back(t); // not spit, it's a leaf. + ls.push_front(t); // not spit, it's a leaf. } } diff --git a/src/kernel/dir.c b/src/kernel/dir.c index 7343e5a764a2..4bf70332ffed 100644 --- a/src/kernel/dir.c +++ b/src/kernel/dir.c @@ -102,7 +102,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; unsigned frag = fpos_frag(filp->f_pos); unsigned off = fpos_off(filp->f_pos); - unsigned skew = -2; + unsigned skew; int err; __u32 ftype; struct ceph_mds_reply_info *rinfo; @@ -113,8 +113,10 @@ nextfrag: struct ceph_mds_request *req; struct ceph_mds_request_head *rhead; + frag = ceph_choose_frag(ceph_inode(inode), frag); + /* query mds */ - dout(10, "dir_readdir querying mds for ino %llx frag %u\n", + dout(10, "dir_readdir querying mds for ino %llx frag %x\n", ceph_ino(inode), frag); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_READDIR, ceph_ino(inode), "", 0, 0); @@ -128,14 +130,14 @@ nextfrag: return err; } dout(10, "dir_readdir got and parsed readdir result=%d" - " on frag %u\n", err, frag); + " on frag %x\n", err, frag); if (fi->last_readdir) ceph_mdsc_put_request(fi->last_readdir); fi->last_readdir = req; } /* include . and .. with first fragment */ - if (frag == 0) { + if (frag_is_leftmost(frag)) { switch (off) { case 0: dout(10, "dir_readdir off 0 -> '.'\n"); @@ -154,10 +156,13 @@ nextfrag: off++; filp->f_pos++; } - } else skew = -2; + } else + skew = 0; rinfo = &fi->last_readdir->r_reply_info; + dout(10, "dir_readdir frag %x num %d off %d skew %d\n", frag, + rinfo->dir_nr, off, skew); while (off+skew < rinfo->dir_nr) { dout(10, "dir_readdir off %d -> %d / %d name '%.*s'\n", off, off+skew, @@ -182,7 +187,7 @@ nextfrag: frag = frag_next(frag); off = 0; filp->f_pos = make_fpos(frag, off); - dout(10, "dir_readdir next frag is %u\n", frag); + dout(10, "dir_readdir next frag is %x\n", frag); goto nextfrag; } diff --git a/src/kernel/inode.c b/src/kernel/inode.c index cae067114df1..e878bd59fca5 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -69,13 +69,15 @@ struct ceph_inode_frag *ceph_get_frag(struct ceph_inode_info *ci, u32 f) struct rb_node **p = &ci->i_fragtree.rb_node; struct rb_node *parent = NULL; struct ceph_inode_frag *frag; + int c; while (*p) { parent = *p; frag = rb_entry(parent, struct ceph_inode_frag, node); - if (f < frag->frag) + c = frag_compare(f, frag->frag); + if (c < 0) p = &(*p)->rb_left; - else if (f > frag->frag) + else if (c > 0) p = &(*p)->rb_right; else return frag; @@ -97,6 +99,40 @@ struct ceph_inode_frag *ceph_get_frag(struct ceph_inode_info *ci, u32 f) return frag; } +__u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v) +{ + u32 t = frag_make(0, 0); + struct ceph_inode_frag *frag; + unsigned nway, i; + u32 n; + + spin_lock(&ci->vfs_inode.i_lock); + while (1) { + WARN_ON(!frag_contains_value(t, v)); + frag = ceph_find_frag(ci, t); + if (!frag || frag->split_by == 0) + break; /* t is a leaf */ + + /* choose child */ + nway = 1 << frag->split_by; + dout(30, "choose_frag(%x) %x splits by %d (%d ways)\n", v, t, + frag->split_by, nway); + for (i = 0; i < nway; i++) { + n = frag_make(frag_bits(t) + frag->split_by, + frag_value(t) | (i << frag_bits(t))); + if (frag_contains_value(n, v)) { + t = n; + break; + } + } + BUG_ON(i == nway); + } + dout(30, "choose_frag(%x) = %x\n", v, t); + + spin_unlock(&ci->vfs_inode.i_lock); + return t; +} + static int ceph_fill_dirfrag(struct inode *inode, struct ceph_mds_reply_dirfrag *dirinfo) { diff --git a/src/kernel/super.h b/src/kernel/super.h index e90e1b7d24bd..c06ea22ea13a 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -227,9 +227,10 @@ static inline struct ceph_inode_frag *ceph_find_frag(struct ceph_inode_info *ci, while (n) { struct ceph_inode_frag *frag = rb_entry(n, struct ceph_inode_frag, node); - if (f < frag->frag) + int c = frag_compare(f, frag->frag); + if (c < 0) n = n->rb_left; - else if (f > frag->frag) + else if (c > 0) n = n->rb_right; else return frag; @@ -237,6 +238,8 @@ static inline struct ceph_inode_frag *ceph_find_frag(struct ceph_inode_info *ci, return NULL; } +extern __u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v); + struct ceph_dentry_info { struct dentry *dentry; struct ceph_mds_session *lease_session; diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 0bfa9929e791..5e6d4dad2753 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -140,7 +140,7 @@ frag_t CInode::pick_dirfrag(const string& dn) return frag_t(); // avoid the string hash if we can. __u32 h = ceph_full_name_hash((const unsigned char *)dn.data(), dn.length()); - return dirfragtree[h]; + return dirfragtree[h*h]; } void CInode::get_dirfrags_under(frag_t fg, list& ls) diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 6b23bc4c052d..0eea6d384d29 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -4196,7 +4196,7 @@ int MDCache::inopath_traverse(MDRequest *mdr, vector &inopath if (!cur->is_dir()) return -ENOTDIR; - frag_t fg = cur->dirfragtree[frag_t(inopath[i].dname_hash)]; + frag_t fg = cur->dirfragtree[inopath[i].dname_hash]; dout(10) << " hash " << inopath[i].dname_hash << " is frag " << fg << dendl; CDir *curdir = cur->get_dirfrag(fg); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 5838d3307291..060f38573989 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -1768,7 +1768,7 @@ void Server::handle_client_readdir(MDRequest *mdr) frag_t fg = (__u32)req->head.args.readdir.frag; // does the frag exist? - if (diri->dirfragtree[fg] != fg) { + if (diri->dirfragtree[fg.value()] != fg) { dout(10) << "frag " << fg << " doesn't appear in fragtree " << diri->dirfragtree << dendl; reply_request(mdr, -EAGAIN); return;