From 71b37d52b21dba3198083fb42c0caf5a5e9e135a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 19 Dec 2007 17:39:11 -0800 Subject: [PATCH] kernel mds_client cleanup --- src/kernel/mds_client.c | 70 ++++++++++++++++++++++++--------------- src/kernel/mds_client.h | 73 ++++++++++++++++++++--------------------- 2 files changed, 79 insertions(+), 64 deletions(-) diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index d8132f84eab2f..55119f66faa8e 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -162,7 +162,7 @@ static void open_session(struct ceph_mds_client *mdsc, struct ceph_mds_session * /* connect */ if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { - ceph_monc_request_mdsmap(&mdsc->client->monc, mdsc->mdsmap->m_epoch); /* race fixme */ + ceph_monc_request_mdsmap(&mdsc->client->monc, mdsc->mdsmap->m_epoch); return; } @@ -280,7 +280,6 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, /* encode head */ head->op = cpu_to_le32(op); ceph_encode_inst(&head->client_inst, &mdsc->client->msgr->inst); - /*FIXME: head->oldest_client_tid = cpu_to_le64(....);*/ /* encode paths */ ceph_encode_filepath(&p, end, ino1, path1); @@ -291,10 +290,24 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, return req; } +/* + * return oldest (lowest) tid in request tree, 0 if none. + */ +__u64 get_oldest_tid(struct ceph_mds_client *mdsc) +{ + struct ceph_mds_request *first; + if (radix_tree_gang_lookup(&mdsc->request_tree, + (void**)&first, 0, 1) <= 0) + return 0; + dout(10, "oldest tid is %llu\n", first->r_tid); + return first->r_tid; +} + int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, struct ceph_msg *msg, struct ceph_mds_reply_info *rinfo, int mds) { struct ceph_mds_request *req; + struct ceph_mds_request_head *rhead; struct ceph_mds_session *session; struct ceph_msg *reply = 0; int err; @@ -326,7 +339,9 @@ retry: if (mdsc->sessions[mds]->s_state != CEPH_MDS_SESSION_OPEN) { /* wait for session to open (or fail, or close) */ spin_unlock(&mdsc->lock); + dout(30, "mdsc_do_request waiting on session %p\n", session); wait_for_completion(&session->s_completion); + dout(30, "mdsc_do_request done waiting on session %p\n", session); put_session(session); spin_lock(&mdsc->lock); goto retry; @@ -334,41 +349,34 @@ retry: put_session(session); /* make request? */ - if (req->r_num_mds < 4) { - req->r_mds[req->r_num_mds++] = mds; - req->r_resend_mds = -1; /* forget any specific mds hint */ - req->r_attempts++; - send_msg_mds(mdsc, req->r_request, mds); - } + BUG_ON(req->r_num_mds >= 2); + req->r_mds[req->r_num_mds++] = mds; + req->r_resend_mds = -1; /* forget any specific mds hint */ + req->r_attempts++; + rhead = req->r_request->front.iov_base; + rhead->retry_attempt = cpu_to_le32(req->r_attempts); + rhead->oldest_client_tid = cpu_to_le64(get_oldest_tid(mdsc)); + send_msg_mds(mdsc, req->r_request, mds); /* wait */ spin_unlock(&mdsc->lock); wait_for_completion(&req->r_completion); + spin_lock(&mdsc->lock); - if (!req->r_reply) { - spin_lock(&mdsc->lock); + /* clean up request, parse reply */ + if (!req->r_reply) goto retry; - } reply = req->r_reply; - - spin_lock(&mdsc->lock); unregister_request(mdsc, req); spin_unlock(&mdsc->lock); - put_request(req); - - /* parse reply */ - err = ceph_mdsc_parse_reply_info(reply, rinfo); - if (err < 0) + if ((err = ceph_mdsc_parse_reply_info(reply, rinfo)) < 0) return err; dout(30, "mdsc_do_request done on %p\n", msg); return 0; } - - - void ceph_mdsc_handle_reply(struct ceph_mds_client *mdsc, struct ceph_msg *msg) { struct ceph_mds_request *req; @@ -404,7 +412,9 @@ done: return; } - +/* + * mds reply parsing + */ int parse_reply_info_in(void **p, void *end, struct ceph_mds_reply_info_in *info) { int err; @@ -569,6 +579,9 @@ void ceph_mdsc_destroy_reply_info(struct ceph_mds_reply_info *info) } +/* + * handle mds notification that our request has been forwarded. + */ void ceph_mdsc_handle_forward(struct ceph_mds_client *mdsc, struct ceph_msg *msg) { struct ceph_mds_request *req; @@ -619,16 +632,16 @@ void ceph_mdsc_handle_forward(struct ceph_mds_client *mdsc, struct ceph_msg *msg } put_request(req); - -out: return; bad: - derr(0, "corrupt forward message\n"); - goto out; + derr(0, "problem decoding message, err=%d\n", err); } +/* + * handle mds map update. + */ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) { ceph_epoch_t epoch; @@ -680,6 +693,9 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) } complete(&mdsc->map_waiters); + /* kick any requests pending for failed/recovering mds's */ + // FIXME + out: return; bad: @@ -689,3 +705,5 @@ bad2: dout(1, "no memory to decode new mdsmap\n"); goto out; } + +/* eof */ diff --git a/src/kernel/mds_client.h b/src/kernel/mds_client.h index b7bfd1948b293..bb262b7f0bcb6 100644 --- a/src/kernel/mds_client.h +++ b/src/kernel/mds_client.h @@ -12,7 +12,7 @@ struct ceph_client; /* - * state associated with an individual MDS<->client session + * state associated with each MDS<->client session */ enum { CEPH_MDS_SESSION_IDLE, @@ -21,47 +21,44 @@ enum { CEPH_MDS_SESSION_CLOSING }; struct ceph_mds_session { - int s_state; - __u64 s_cap_seq; /* cap message count/seq from mds */ - atomic_t s_ref; + int s_state; + __u64 s_cap_seq; /* cap message count/seq from mds */ + atomic_t s_ref; struct completion s_completion; }; +/* + * an in-flight request + */ struct ceph_mds_request { - __u64 r_tid; - struct ceph_msg *r_request; - struct ceph_msg *r_reply; + __u64 r_tid; + struct ceph_msg * r_request; /* original request */ + struct ceph_msg * r_reply; - __u32 r_mds[4]; /* set of mds's with whom request may be outstanding */ - int r_num_mds; /* items in r_mds */ + __u32 r_mds[2]; /* set of mds's with whom request may be outstanding */ + int r_num_mds; /* items in r_mds */ - int r_attempts; - int r_num_fwd; /* number of forward attempts */ - int r_resend_mds; /* mds to resend to next, if any*/ + int r_attempts; /* resend attempts */ + int r_num_fwd; /* number of forward attempts */ + int r_resend_mds; /* mds to resend to next, if any*/ - atomic_t r_ref; + atomic_t r_ref; struct completion r_completion; }; - /* * mds client state */ struct ceph_mds_client { - spinlock_t lock; - - struct ceph_client *client; - struct ceph_mdsmap *mdsmap; /* mds map */ - - /* mds sessions */ - struct ceph_mds_session **sessions; /* NULL if no session */ - int max_sessions; /* size of s_mds_sessions array */ - - __u64 last_tid; /* id of last mds request */ - struct radix_tree_root request_tree; /* pending mds requests */ - - __u64 last_requested_map; - struct completion map_waiters; + spinlock_t lock; /* protects all nested structures */ + struct ceph_client *client; + struct ceph_mdsmap *mdsmap; + struct ceph_mds_session **sessions; /* NULL if no session */ + int max_sessions; /* size of s_mds_sessions array */ + __u64 last_tid; /* id of most recent mds request */ + struct radix_tree_root request_tree; /* pending mds requests */ + __u64 last_requested_map; + struct completion map_waiters; }; /* @@ -69,25 +66,25 @@ struct ceph_mds_client { */ struct ceph_mds_reply_info_in { struct ceph_mds_reply_inode *in; - __u32 symlink_len; - char *symlink; + __u32 symlink_len; + char *symlink; }; struct ceph_mds_reply_info { - struct ceph_msg *reply; - struct ceph_mds_reply_head *head; + struct ceph_msg *reply; + struct ceph_mds_reply_head *head; - int trace_nr; + int trace_nr; struct ceph_mds_reply_info_in *trace_in; struct ceph_mds_reply_dirfrag **trace_dir; - char **trace_dname; - __u32 *trace_dname_len; + char **trace_dname; + __u32 *trace_dname_len; struct ceph_mds_reply_dirfrag *dir_dir; - int dir_nr; + int dir_nr; struct ceph_mds_reply_info_in *dir_in; - char **dir_dname; - __u32 *dir_dname_len; + char **dir_dname; + __u32 *dir_dname_len; }; -- 2.39.5