static inline __u32 frag_value(__u32 f) { return f & 0xffffffu; }
static inline __u32 frag_mask(__u32 f) { return 0xffffffu >> (24-frag_bits(f)); }
static inline __u32 frag_next(__u32 f) { return frag_make(frag_bits(f), frag_value(f)+1); }
+static inline bool frag_is_leftmost(__u32 f) {
+ return frag_value(f) == 0;
+}
+static inline bool frag_is_rightmost(__u32 f) {
+ return frag_value(f) == frag_mask(f);
+}
+static inline int frag_compare(__u32 a, __u32 b) {
+ unsigned va = frag_value(a);
+ unsigned vb = frag_value(b);
+ if (va < vb)
+ return -1;
+ if (va > vb)
+ return 1;
+ va = frag_bits(a);
+ vb = frag_bits(b);
+ if (va < vb)
+ return -1;
+ if (va > vb)
+ return 1;
+ return 0;
+}
+static inline bool frag_contains_value(__u32 f, __u32 v)
+{
+ return (v & frag_mask(f)) == frag_value(f);
+}
+
/*
* object layout - how objects are mapped into PGs
_frag_t _enc;
frag_t() : _enc(0) { }
- frag_t(unsigned v, unsigned b) : _enc((b << 24) +
- (v & (0xffffffffULL >> (32-b)))) { }
+ frag_t(unsigned v, unsigned b) : _enc(frag_make(b, v)) { }
frag_t(_frag_t e) : _enc(e) { }
// constructors
void from_unsigned(unsigned e) { _enc = e; }
// accessors
- unsigned value() const { return _enc & 0xffffff; }
- unsigned bits() const { return _enc >> 24; }
- unsigned mask() const { return 0xffffffffULL >> (32-bits()); }
+ unsigned value() const { return frag_value(_enc); }
+ unsigned bits() const { return frag_bits(_enc); }
+ unsigned mask() const { return frag_mask(_enc); }
operator _frag_t() const { return _enc; }
// binary splitting
frag_t get_sibling() const {
assert(!is_root());
- return frag_t(_enc ^ (1 << (bits()-1)));
+ return frag_t(value() ^ (1 << (bits()-1)), bits());
}
bool is_left() const {
return
// sequencing
bool is_leftmost() const {
- return value() == 0;
+ return frag_is_leftmost(_enc);
}
bool is_rightmost() const {
- return value() == mask();
+ return frag_is_rightmost(_enc);
}
frag_t next() const {
assert(!is_rightmost());
- return frag_t(value() + 1, bits());
+ return frag_t(frag_next(_enc));
}
};
std::list<frag_t> q;
q.push_back(under);
while (!q.empty()) {
- frag_t t = q.front();
- q.pop_front();
+ frag_t t = q.back();
+ q.pop_back();
int nb = get_split(t);
if (nb)
t.split(nb, q); // queue up children
else
- ls.push_back(t); // not spit, it's a leaf.
+ ls.push_front(t); // not spit, it's a leaf.
}
}
struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
unsigned frag = fpos_frag(filp->f_pos);
unsigned off = fpos_off(filp->f_pos);
- unsigned skew = -2;
+ unsigned skew;
int err;
__u32 ftype;
struct ceph_mds_reply_info *rinfo;
struct ceph_mds_request *req;
struct ceph_mds_request_head *rhead;
+ frag = ceph_choose_frag(ceph_inode(inode), frag);
+
/* query mds */
- dout(10, "dir_readdir querying mds for ino %llx frag %u\n",
+ dout(10, "dir_readdir querying mds for ino %llx frag %x\n",
ceph_ino(inode), frag);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_READDIR,
ceph_ino(inode), "", 0, 0);
return err;
}
dout(10, "dir_readdir got and parsed readdir result=%d"
- " on frag %u\n", err, frag);
+ " on frag %x\n", err, frag);
if (fi->last_readdir)
ceph_mdsc_put_request(fi->last_readdir);
fi->last_readdir = req;
}
/* include . and .. with first fragment */
- if (frag == 0) {
+ if (frag_is_leftmost(frag)) {
switch (off) {
case 0:
dout(10, "dir_readdir off 0 -> '.'\n");
off++;
filp->f_pos++;
}
- } else
skew = -2;
+ } else
+ skew = 0;
rinfo = &fi->last_readdir->r_reply_info;
+ dout(10, "dir_readdir frag %x num %d off %d skew %d\n", frag,
+ rinfo->dir_nr, off, skew);
while (off+skew < rinfo->dir_nr) {
dout(10, "dir_readdir off %d -> %d / %d name '%.*s'\n",
off, off+skew,
frag = frag_next(frag);
off = 0;
filp->f_pos = make_fpos(frag, off);
- dout(10, "dir_readdir next frag is %u\n", frag);
+ dout(10, "dir_readdir next frag is %x\n", frag);
goto nextfrag;
}
struct rb_node **p = &ci->i_fragtree.rb_node;
struct rb_node *parent = NULL;
struct ceph_inode_frag *frag;
+ int c;
while (*p) {
parent = *p;
frag = rb_entry(parent, struct ceph_inode_frag, node);
- if (f < frag->frag)
+ c = frag_compare(f, frag->frag);
+ if (c < 0)
p = &(*p)->rb_left;
- else if (f > frag->frag)
+ else if (c > 0)
p = &(*p)->rb_right;
else
return frag;
return frag;
}
+__u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v)
+{
+ u32 t = frag_make(0, 0);
+ struct ceph_inode_frag *frag;
+ unsigned nway, i;
+ u32 n;
+
+ spin_lock(&ci->vfs_inode.i_lock);
+ while (1) {
+ WARN_ON(!frag_contains_value(t, v));
+ frag = ceph_find_frag(ci, t);
+ if (!frag || frag->split_by == 0)
+ break; /* t is a leaf */
+
+ /* choose child */
+ nway = 1 << frag->split_by;
+ dout(30, "choose_frag(%x) %x splits by %d (%d ways)\n", v, t,
+ frag->split_by, nway);
+ for (i = 0; i < nway; i++) {
+ n = frag_make(frag_bits(t) + frag->split_by,
+ frag_value(t) | (i << frag_bits(t)));
+ if (frag_contains_value(n, v)) {
+ t = n;
+ break;
+ }
+ }
+ BUG_ON(i == nway);
+ }
+ dout(30, "choose_frag(%x) = %x\n", v, t);
+
+ spin_unlock(&ci->vfs_inode.i_lock);
+ return t;
+}
+
static int ceph_fill_dirfrag(struct inode *inode,
struct ceph_mds_reply_dirfrag *dirinfo)
{
while (n) {
struct ceph_inode_frag *frag =
rb_entry(n, struct ceph_inode_frag, node);
- if (f < frag->frag)
+ int c = frag_compare(f, frag->frag);
+ if (c < 0)
n = n->rb_left;
- else if (f > frag->frag)
+ else if (c > 0)
n = n->rb_right;
else
return frag;
return NULL;
}
+extern __u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v);
+
struct ceph_dentry_info {
struct dentry *dentry;
struct ceph_mds_session *lease_session;
return frag_t(); // avoid the string hash if we can.
__u32 h = ceph_full_name_hash((const unsigned char *)dn.data(), dn.length());
- return dirfragtree[h];
+ return dirfragtree[h*h];
}
void CInode::get_dirfrags_under(frag_t fg, list<CDir*>& ls)
if (!cur->is_dir())
return -ENOTDIR;
- frag_t fg = cur->dirfragtree[frag_t(inopath[i].dname_hash)];
+ frag_t fg = cur->dirfragtree[inopath[i].dname_hash];
dout(10) << " hash " << inopath[i].dname_hash << " is frag " << fg << dendl;
CDir *curdir = cur->get_dirfrag(fg);
frag_t fg = (__u32)req->head.args.readdir.frag;
// does the frag exist?
- if (diri->dirfragtree[fg] != fg) {
+ if (diri->dirfragtree[fg.value()] != fg) {
dout(10) << "frag " << fg << " doesn't appear in fragtree " << diri->dirfragtree << dendl;
reply_request(mdr, -EAGAIN);
return;