From 6bdbe2a946df2c97e094688a24abf0a190e87760 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 19 Aug 2008 15:46:56 -0700 Subject: [PATCH] kclient: use rbtree for inode caps; avoid looping in ceph_check_caps() --- src/kernel/addr.c | 16 +++++----- src/kernel/caps.c | 80 +++++++++++++++++++++++++++++++++------------- src/kernel/super.c | 2 +- src/kernel/super.h | 12 +++---- 4 files changed, 72 insertions(+), 38 deletions(-) diff --git a/src/kernel/addr.c b/src/kernel/addr.c index 40b59c40ac16d..8426326c1935e 100644 --- a/src/kernel/addr.c +++ b/src/kernel/addr.c @@ -25,12 +25,12 @@ static int ceph_set_page_dirty(struct page *page) return !TestSetPageDirty(page); if (TestSetPageDirty(page)) { - dout(20, "%p set_page_dirty %p -- already dirty\n", + dout(20, "%p set_page_dirty %p -- already dirty\n", mapping->host, page); return 0; } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) spin_lock_irq(&mapping->tree_lock); #else write_lock_irq(&mapping->tree_lock); @@ -58,7 +58,7 @@ static int ceph_set_page_dirty(struct page *page) snapc = ceph_get_snap_context(ci->i_snaprealm->cached_context); page->private = (unsigned long)snapc; SetPagePrivate(page); - dout(20, "%p set_page_dirty %p %d -> %d (?)\n", + dout(20, "%p set_page_dirty %p %d -> %d (?)\n", mapping->host, page, atomic_read(&ci->i_wrbuffer_ref)-1, atomic_read(&ci->i_wrbuffer_ref)); @@ -66,7 +66,7 @@ static int ceph_set_page_dirty(struct page *page) mapping->host, page, snapc, snapc->seq, snapc->num_snaps); } else dout(20, "ANON set_page_dirty %p (raced truncate?)\n", page); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) spin_unlock_irq(&mapping->tree_lock); #else write_unlock_irq(&mapping->tree_lock); @@ -95,13 +95,13 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset) } ci = ceph_inode(page->mapping->host); if (offset == 0) { - dout(20, "%p invalidatepage %p idx %lu full dirty page %lu\n", + dout(20, "%p invalidatepage %p idx %lu full dirty page %lu\n", &ci->vfs_inode, page, page->index, offset); atomic_dec(&ci->i_wrbuffer_ref); ceph_put_snap_context((void *)page->private); ClearPagePrivate(page); } else - dout(20, "%p invalidatepage %p idx %lu partial dirty page\n", + dout(20, "%p invalidatepage %p idx %lu partial dirty page\n", &ci->vfs_inode, page, page->index); } @@ -435,7 +435,7 @@ get_more_pages: dout(20, "%p locked+cleaned page %p idx %lu\n", inode, page, page->index); - + if (pages) pages[locked_pages] = page; else if (locked_pages == 0) @@ -494,7 +494,7 @@ get_more_pages: if (i < wrote) SetPageUptodate(page); else { - dout(20, "%p redirtying page %p\n", + dout(20, "%p redirtying page %p\n", inode, page); wbc->pages_skipped++; ceph_set_page_dirty(page); diff --git a/src/kernel/caps.c b/src/kernel/caps.c index 240568a2441f4..e874ee77c7a0b 100644 --- a/src/kernel/caps.c +++ b/src/kernel/caps.c @@ -16,26 +16,55 @@ static struct ceph_inode_cap *__get_cap_for_mds(struct inode *inode, int mds) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_cap *cap; - struct list_head *p; - - list_for_each(p, &ci->i_caps) { - cap = list_entry(p, struct ceph_inode_cap, ci_caps); - if (cap->mds == mds) + struct rb_node *n = ci->i_caps.rb_node; + + while (n) { + cap = rb_entry(n, struct ceph_inode_cap, ci_node); + if (mds < cap->mds) + n = n->rb_left; + else if (mds > cap->mds) + n = n->rb_right; + else return cap; } return 0; } +static void __insert_cap_node(struct ceph_inode_info *ci, + struct ceph_inode_cap *new) +{ + struct rb_node **p = &ci->i_caps.rb_node; + struct rb_node *parent = NULL; + struct ceph_inode_cap *cap = 0; + + while (*p) { + parent = *p; + cap = rb_entry(parent, struct ceph_inode_cap, ci_node); + if (new->mds < cap->mds) + p = &(*p)->rb_left; + else if (new->mds > cap->mds) + p = &(*p)->rb_right; + else + BUG(); + } + + rb_link_node(&new->ci_node, parent, p); + rb_insert_color(&new->ci_node, &ci->i_caps); +} + int ceph_get_cap_mds(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_cap *cap; int mds = -1; + /* + * hmm, i guess _any_ cap will do, here? + */ spin_lock(&inode->i_lock); - if (!list_empty(&ci->i_caps)) { - cap = list_first_entry(&ci->i_caps, struct ceph_inode_cap, - ci_caps); + if (!RB_EMPTY_ROOT(&ci->i_caps)) { + cap = rb_entry(ci->i_caps.rb_node, struct ceph_inode_cap, + ci_node); mds = cap->mds; } spin_unlock(&inode->i_lock); @@ -97,7 +126,7 @@ retry: cap->flushed_snap = 0; cap->ci = ci; - list_add(&cap->ci_caps, &ci->i_caps); + __insert_cap_node(ci, cap); /* add to session cap list */ cap->session = session; @@ -138,12 +167,12 @@ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) { int have = ci->i_snap_caps; struct ceph_inode_cap *cap; - struct list_head *p; u32 gen; unsigned long ttl; + struct rb_node *p; - list_for_each(p, &ci->i_caps) { - cap = list_entry(p, struct ceph_inode_cap, ci_caps); + for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { + cap = rb_entry(p, struct ceph_inode_cap, ci_node); spin_lock(&cap->session->s_cap_lock); gen = cap->session->s_cap_gen; @@ -181,7 +210,7 @@ int __ceph_remove_cap(struct ceph_inode_cap *cap) session->s_nr_caps--; /* remove from inode list */ - list_del_init(&cap->ci_caps); + rb_erase(&cap->ci_node, &ci->i_caps); cap->session = 0; cap->mds = -1; /* mark unused */ @@ -189,7 +218,7 @@ int __ceph_remove_cap(struct ceph_inode_cap *cap) cap >= ci->i_static_caps + STATIC_CAPS) kfree(cap); - if (list_empty(&ci->i_caps)) { + if (RB_EMPTY_ROOT(&ci->i_caps)) { list_del_init(&ci->i_snaprealm_item); return 1; } @@ -242,11 +271,13 @@ void ceph_check_caps(struct ceph_inode_info *ci, int is_delayed, int flush_snap) struct ceph_mds_client *mdsc = &client->mdsc; struct inode *inode = &ci->vfs_inode; struct ceph_inode_cap *cap; - struct list_head *p; int wanted, used; struct ceph_mds_session *session = 0; /* if non-NULL, i hold s_mutex */ int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ - + int revoking; + int mds = -1; + struct rb_node *p; + retry: spin_lock(&inode->i_lock); wanted = __ceph_caps_wanted(ci); @@ -256,10 +287,11 @@ retry: if (!is_delayed) __ceph_cap_delay_requeue(mdsc, ci); - - list_for_each(p, &ci->i_caps) { - int revoking; - cap = list_entry(p, struct ceph_inode_cap, ci_caps); + + for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { + cap = rb_entry(p, struct ceph_inode_cap, ci_node); + if (mds >= cap->mds) + continue; /* note: no side-effects allowed, until we take s_mutex */ revoking = cap->implemented & ~cap->issued; @@ -337,6 +369,8 @@ ack: } } + mds = cap->mds; /* remember mds, so we don't repeat */ + /* send_cap drops i_lock */ __ceph_mdsc_send_cap(mdsc, session, cap, used, wanted, flush_snap); @@ -759,7 +793,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, int mds = session->s_mds; unsigned mseq = le32_to_cpu(ex->migrate_seq); struct ceph_inode_cap *cap = 0, *t; - struct list_head *p; + struct rb_node *p; int was_last = 0; dout(10, "handle_cap_export inode %p ci %p mds%d mseq %d\n", @@ -768,8 +802,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, spin_lock(&inode->i_lock); /* make sure we haven't seen a higher mseq */ - list_for_each(p, &ci->i_caps) { - t = list_entry(p, struct ceph_inode_cap, ci_caps); + for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { + t = rb_entry(p, struct ceph_inode_cap, ci_node); if (t->mseq > mseq) { dout(10, " higher mseq on cap from mds%d\n", t->session->s_mds); diff --git a/src/kernel/super.c b/src/kernel/super.c index 60ccdb6165a10..3115c9fdf84ba 100644 --- a/src/kernel/super.c +++ b/src/kernel/super.c @@ -167,7 +167,7 @@ static struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_xattr_len = 0; ci->i_xattr_data = 0; - INIT_LIST_HEAD(&ci->i_caps); + ci->i_caps = RB_ROOT; for (i = 0; i < STATIC_CAPS; i++) ci->i_static_caps[i].mds = -1; for (i = 0; i < CEPH_FILE_MODE_NUM; i++) diff --git a/src/kernel/super.h b/src/kernel/super.h index cc7ac0d4f23e3..c8e6c2ba94491 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -170,16 +170,16 @@ static inline struct ceph_client *ceph_client(struct super_block *sb) */ struct ceph_inode_cap { - int mds; /* -1 if not used */ + struct ceph_inode_info *ci; + struct rb_node ci_node; /* per-ci cap tree */ + struct ceph_mds_session *session; + struct list_head session_caps; /* per-session caplist */ + int mds; /* must be -1 if not in use */ int issued; /* latest, from the mds */ int implemented; /* what we've implemneted (for tracking revocation) */ u32 seq, mseq, gen; int flags; /* stale, etc.? */ u64 flushed_snap; - struct ceph_inode_info *ci; - struct list_head ci_caps; /* per-ci caplist */ - struct ceph_mds_session *session; - struct list_head session_caps; /* per-session caplist */ }; #define MAX_DIRFRAG_REP 4 @@ -233,7 +233,7 @@ struct ceph_inode_info { int i_xattr_len; char *i_xattr_data; - struct list_head i_caps; + struct rb_root i_caps; struct ceph_inode_cap i_static_caps[STATIC_CAPS]; wait_queue_head_t i_cap_wq; unsigned long i_hold_caps_until; /* jiffies */ -- 2.39.5