From 215fb5b72a9bc7dd790f33bb9843abf21021fec1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 18 Jan 2008 15:38:05 -0800 Subject: [PATCH] close mds sessions on umount --- src/TODO | 1 + src/kernel/client.c | 6 +- src/kernel/inode.c | 45 ++++++++++++-- src/kernel/mds_client.c | 127 ++++++++++++++++++++++++++++++---------- src/kernel/mds_client.h | 2 +- src/kernel/super.c | 3 +- src/kernel/super.h | 1 + src/start.sh | 2 +- 8 files changed, 144 insertions(+), 43 deletions(-) diff --git a/src/TODO b/src/TODO index f4ea3ca20ae08..0fba624a275d1 100644 --- a/src/TODO +++ b/src/TODO @@ -32,6 +32,7 @@ kernel client - mds client - handle file caps, ack back to mds, etc. - actually flush dirty data, too + - test ceph_fill_trace when files/directories are moved around by another client - osd client - readpages (readahead) - async (caching) mode diff --git a/src/kernel/client.c b/src/kernel/client.c index 7c5a6c90001ab..47caa323dd8f6 100644 --- a/src/kernel/client.c +++ b/src/kernel/client.c @@ -105,10 +105,11 @@ static int open_root_inode(struct ceph_client *client, struct ceph_mount_args *a err = le32_to_cpu(rinfo.head->result); if (err != 0) - goto out; + return err; if (rinfo.trace_nr == 0) { dout(10, "open_root_inode wtf, mds returns 0 but no trace\n"); - return -EINVAL; + err = -EINVAL; + goto out; } fs_root = client->sb->s_root; @@ -313,6 +314,7 @@ void ceph_destroy_client(struct ceph_client *cl) ceph_messenger_destroy(cl->msgr); put_client_counter(); kfree(cl); + dout(10, "destroy_client %p done\n", cl); } diff --git a/src/kernel/inode.c b/src/kernel/inode.c index da2ceef6a857b..ef76563ad386f 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -168,6 +168,8 @@ struct ceph_inode_cap *ceph_add_cap(struct inode *inode, struct ceph_mds_session for (i=0; ii_nr_caps; i++) if (ci->i_caps[i].mds == mds) break; if (i == ci->i_nr_caps) { + for (i=0; ii_nr_caps; i++) + if (ci->i_caps[i].mds < 0) break; if (i == ci->i_max_caps) { /* realloc */ void *o = ci->i_caps; @@ -182,26 +184,31 @@ struct ceph_inode_cap *ceph_add_cap(struct inode *inode, struct ceph_mds_session kfree(o); ci->i_max_caps *= 2; } + if (i == ci->i_nr_caps) + ci->i_nr_caps++; + ci->i_caps[i].ci = ci; ci->i_caps[i].caps = 0; ci->i_caps[i].mds = mds; ci->i_caps[i].seq = 0; ci->i_caps[i].flags = 0; - ci->i_nr_caps++; ci->i_caps[i].session = session; spin_lock(&session->s_cap_lock); list_add(&ci->i_caps[i].session_caps, &session->s_caps); session->s_nr_caps++; spin_unlock(&session->s_cap_lock); + + if (ci->i_nr_caps == 1) { + dout(10, "igrab on %p\n", inode); + igrab(inode); + } } dout(10, "add_cap inode %p (%lu) got cap %d %xh now %xh seq %d from %d\n", inode, inode->i_ino, i, cap, cap|ci->i_caps[i].caps, seq, mds); ci->i_caps[i].caps |= cap; ci->i_caps[i].seq = seq; - if (ci->i_nr_caps == 1) - igrab(inode); return &ci->i_caps[i]; } @@ -221,8 +228,35 @@ void __remove_cap(struct ceph_inode_cap *cap) spin_lock(&session->s_cap_lock); list_del(&cap->session_caps); session->s_nr_caps--; - spin_unlock(&session->s_cap_lock); cap->session = 0; + spin_unlock(&session->s_cap_lock); +} + +void ceph_remove_cap(struct ceph_inode_info *ci, int mds) +{ + int i; + int was = ci->i_nr_caps; + dout(10, "remove_cap on %p for mds%d\n", &ci->vfs_inode, mds); + for (i=0; ii_nr_caps; i++) { + if (ci->i_caps[i].mds != mds) + continue; + dout(10, "remove_cap removing %p\n", &ci->i_caps[i]); + __remove_cap(&ci->i_caps[i]); /* remove from list */ + if (i == ci->i_nr_caps-1) { + do { + ci->i_nr_caps--; + } while (ci->i_nr_caps && + ci->i_caps[ci->i_nr_caps-1].mds < 0); + break; + } + ci->i_caps[i].mds = -1; + ci->i_caps[i].caps = 0; + ci->i_caps[i].seq = 0; + } + if (was > 0 && ci->i_nr_caps == 0) { + dout(10, "iput on %p\n", &ci->vfs_inode); + iput(&ci->vfs_inode); + } } void ceph_remove_caps(struct ceph_inode_info *ci) @@ -232,13 +266,14 @@ void ceph_remove_caps(struct ceph_inode_info *ci) if (ci->i_nr_caps) { for (i=0; ii_nr_caps; i++) __remove_cap(&ci->i_caps[i]); - iput(&ci->vfs_inode); ci->i_nr_caps = 0; if (ci->i_caps != ci->i_caps_static) { kfree(ci->i_caps); ci->i_caps = ci->i_caps_static; ci->i_max_caps = STATIC_CAPS; } + dout(10, "iput on %p\n", &ci->vfs_inode); + iput(&ci->vfs_inode); } } diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index 0f0e25b931586..69c56d83b1e32 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -230,6 +230,43 @@ static int resume_session(struct ceph_mds_client *mdsc, struct ceph_mds_session return 0; } +static void close_session(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) +{ + int mds = session->s_mds; + struct ceph_msg *msg; + + dout(10, "close_session to mds%d\n", mds); + msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_cap_seq); + if (IS_ERR(msg)) + return;// PTR_ERR(msg); /* fixme */ + session->s_state = CEPH_MDS_SESSION_CLOSING; + send_msg_mds(mdsc, msg, mds); +} + +static void remove_session_caps(struct ceph_mds_session *session) +{ + struct ceph_inode_cap *cap; + struct ceph_inode_info *ci; + + /* + * fixme: when we start locking the inode, make sure + * we don't deadlock with __remove_cap in inode.c. + */ + dout(10, "remove_session_caps on %p\n", session); + spin_lock(&session->s_cap_lock); + while (session->s_nr_caps > 0) { + cap = list_entry(session->s_caps.next, struct ceph_inode_cap, session_caps); + ci = cap->ci; + igrab(&ci->vfs_inode); + dout(10, "removing cap %p, ci is %p, inode is %p\n", cap, ci, &ci->vfs_inode); + spin_unlock(&session->s_cap_lock); + ceph_remove_cap(ci, session->s_mds); + spin_lock(&session->s_cap_lock); + } + BUG_ON(session->s_nr_caps > 0); + spin_unlock(&session->s_cap_lock); +} + void ceph_mdsc_handle_session(struct ceph_mds_client *mdsc, struct ceph_msg *msg) { __u32 op; @@ -266,7 +303,9 @@ void ceph_mdsc_handle_session(struct ceph_mds_client *mdsc, struct ceph_msg *msg dout(1, "ignoring session close from mds%d, seq %llu < my seq %llu\n", msg->hdr.src.name.num, seq, session->s_cap_seq); } + remove_session_caps(session); ceph_mdsc_put_session(session); + complete(&mdsc->session_close_waiters); break; case CEPH_SESSION_RENEWCAPS: @@ -301,38 +340,6 @@ bad: /* exported functions */ -void schedule_delayed(struct ceph_mds_client *mdsc) -{ - schedule_delayed_work(&mdsc->delayed_work, HZ*60); -} - -void delayed_work(struct work_struct *work); - -void ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) -{ - spin_lock_init(&mdsc->lock); - mdsc->client = client; - mdsc->mdsmap = 0; /* none yet */ - mdsc->sessions = 0; - mdsc->max_sessions = 0; - mdsc->last_tid = 0; - INIT_RADIX_TREE(&mdsc->request_tree, GFP_KERNEL); - mdsc->last_requested_map = 0; - init_completion(&mdsc->map_waiters); - INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); - - /* hack fixme */ - schedule_delayed(mdsc); -} - -void ceph_mdsc_stop(struct ceph_mds_client *mdsc) -{ - /* close sessions, caps */ - /* IMPLEMENT ME */ - cancel_delayed_work_sync(&mdsc->delayed_work); -} - - struct ceph_msg * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, ceph_ino_t ino1, const char *path1, @@ -1108,6 +1115,15 @@ int ceph_mdsc_renew_caps(struct ceph_mds_client *mdsc) return 0; } + +/* + * delayed work -- renew caps with mds + */ +void schedule_delayed(struct ceph_mds_client *mdsc) +{ + schedule_delayed_work(&mdsc->delayed_work, HZ*60); +} + void delayed_work(struct work_struct *work) { int i; @@ -1126,4 +1142,51 @@ void delayed_work(struct work_struct *work) schedule_delayed(mdsc); } + +void ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) +{ + spin_lock_init(&mdsc->lock); + mdsc->client = client; + mdsc->mdsmap = 0; /* none yet */ + mdsc->sessions = 0; + mdsc->max_sessions = 0; + mdsc->last_tid = 0; + INIT_RADIX_TREE(&mdsc->request_tree, GFP_KERNEL); + mdsc->last_requested_map = 0; + init_completion(&mdsc->map_waiters); + init_completion(&mdsc->session_close_waiters); + INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); + + /* hack fixme */ + schedule_delayed(mdsc); +} + +void ceph_mdsc_stop(struct ceph_mds_client *mdsc) +{ + int i; + int n; + + dout(10, "stop\n"); + + /* close sessions, caps */ + for (;;) { + dout(10, "closing sessions\n"); + n = 0; + for (i=0; imax_sessions; i++) { + if (mdsc->sessions[i] == 0 || + mdsc->sessions[i]->s_state >= CEPH_MDS_SESSION_CLOSING) + continue; + close_session(mdsc, mdsc->sessions[i]); + n++; + } + if (n == 0) break; + dout(10, "waiting for sessions to close\n"); + wait_for_completion(&mdsc->session_close_waiters); + } + + cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ +} + + + /* eof */ diff --git a/src/kernel/mds_client.h b/src/kernel/mds_client.h index 73408b11f7a1d..6a4945bcf3a41 100644 --- a/src/kernel/mds_client.h +++ b/src/kernel/mds_client.h @@ -62,7 +62,7 @@ struct ceph_mds_client { __u64 last_tid; /* id of most recent mds request */ struct radix_tree_root request_tree; /* pending mds requests */ __u64 last_requested_map; - struct completion map_waiters; + struct completion map_waiters, session_close_waiters; struct delayed_work delayed_work; /* delayed work */ }; diff --git a/src/kernel/super.c b/src/kernel/super.c index 6d34e1381bba5..0f7766e9a70d4 100644 --- a/src/kernel/super.c +++ b/src/kernel/super.c @@ -453,10 +453,9 @@ out: static void ceph_kill_sb(struct super_block *s) { struct ceph_client *client = s->s_fs_info; - dout(1, "kill_sb %p\n", s); - kill_anon_super(s); ceph_destroy_client(client); + kill_anon_super(s); } diff --git a/src/kernel/super.h b/src/kernel/super.h index 83eaded495e7a..528fe045f8af0 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -219,6 +219,7 @@ extern int ceph_get_inode(struct super_block *sb, unsigned long ino, struct inod extern int ceph_fill_inode(struct inode *inode, struct ceph_mds_reply_inode *info); extern struct ceph_inode_cap *ceph_find_cap(struct inode *inode, int want); extern struct ceph_inode_cap *ceph_add_cap(struct inode *inode, struct ceph_mds_session *session, u32 cap, u32 seq); +extern void ceph_remove_cap(struct ceph_inode_info *ci, int mds); extern void ceph_remove_caps(struct ceph_inode_info *ci); extern int ceph_handle_cap_grant(struct inode *inode, struct ceph_mds_file_caps *grant, struct ceph_mds_session *session); diff --git a/src/start.sh b/src/start.sh index f77c005d50d63..bf200ccfd0a8f 100755 --- a/src/start.sh +++ b/src/start.sh @@ -6,4 +6,4 @@ ./cosd --mkfs --osd 1 & ./cosd --mkfs --osd 2 & ./cosd --mkfs --osd 3 & -./cmds & +./cmds --debug_ms 1 --debug_mds 10 & -- 2.39.5