From cd539a72e545feb221d1a173b1a01ad2cdba12f5 Mon Sep 17 00:00:00 2001 From: sageweil Date: Tue, 13 Nov 2007 01:36:05 +0000 Subject: [PATCH] more bits for mounting git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@2059 29311d96-e01e-0410-9327-a35deaab8ce9 --- trunk/ceph/kernel/bufferlist.c | 61 ++++++----- trunk/ceph/kernel/bufferlist.h | 10 +- trunk/ceph/kernel/client.c | 190 +++++++++++++++++++++++++-------- trunk/ceph/kernel/client.h | 40 ++++--- trunk/ceph/kernel/mds_client.h | 2 +- trunk/ceph/kernel/messenger.c | 19 ++++ trunk/ceph/kernel/messenger.h | 9 +- trunk/ceph/kernel/mon_client.h | 6 +- trunk/ceph/kernel/monmap.h | 13 ++- trunk/ceph/kernel/osd_client.h | 5 +- 10 files changed, 248 insertions(+), 107 deletions(-) diff --git a/trunk/ceph/kernel/bufferlist.c b/trunk/ceph/kernel/bufferlist.c index 11ba113261dcc..fac98b2cb7615 100644 --- a/trunk/ceph/kernel/bufferlist.c +++ b/trunk/ceph/kernel/bufferlist.c @@ -138,42 +138,47 @@ void ceph_bl_iterator_advance(struct ceph_bufferlist *bl, { } - -__u64 ceph_bl_decode_u64(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli) +int ceph_bl_decode_have(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli, int s) { - __u64 r; - r = le64_to_cpu((__u64*)(bl->b_kv[bli->i_kv].iov_base + bli->i_off)); - ceph_bl_iterator_advance(bl, bli, sizeof(__u64)); - return r; + return 1; /* FIXME */ } -__s64 ceph_bl_decode_s64(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli) + +int ceph_bl_copy(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli, void *dest, int len) { - __s64 r; - r = le64_to_cpu((__s64*)(bl->b_kv[bli->i_kv].iov_base + bli->i_off)); - ceph_bl_iterator_advance(bl, bli, sizeof(__s64)); - return r; + if (!ceph_bl_decode_have(bl, bli, len)) + return -EINVAL; + + return 0; } -__u32 ceph_bl_decode_u32(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli) +int ceph_bl_decode_64(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli, __u64 *v) { - __u32 r; - r = le32_to_cpu(*(__u32*)(bl->b_kv[bli->i_kv].iov_base + bli->i_off)); - ceph_bl_iterator_advance(bl, bli, sizeof(__u32)); - return r; + if (!ceph_bl_decode_have(bl, bli, sizeof(*v))) + return -EINVAL; + *v = le64_to_cpu((__u64*)(bl->b_kv[bli->i_kv].iov_base + bli->i_off)); + ceph_bl_iterator_advance(bl, bli, sizeof(*v)); + return 0; } -__s32 ceph_bl_decode_s32(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli) +int ceph_bl_decode_32(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli, __u32 *v) { - __s32 r; - r = le32_to_cpu(*(__s32*)(bl->b_kv[bli->i_kv].iov_base + bli->i_off)); - ceph_bl_iterator_advance(bl, bli, sizeof(__s32)); - return r; + if (!ceph_bl_decode_have(bl, bli, sizeof(*v))) + return -EINVAL; + *v = le32_to_cpu((__u64*)(bl->b_kv[bli->i_kv].iov_base + bli->i_off)); + ceph_bl_iterator_advance(bl, bli, sizeof(*v)); + return 0; } - -__u8 ceph_bl_decode_u8(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli) +int ceph_bl_decode_16(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli, __u16 *v) { - __u8 r; - r = *(__u8*)(bl->b_kv[bli->i_kv].iov_base + bli->i_off); - ceph_bl_iterator_advance(bl, bli, sizeof(__u8)); - return r; + if (!ceph_bl_decode_have(bl, bli, sizeof(*v))) + return -EINVAL; + *v = le16_to_cpu((__u64*)(bl->b_kv[bli->i_kv].iov_base + bli->i_off)); + ceph_bl_iterator_advance(bl, bli, sizeof(*v)); + return 0; +} +int ceph_bl_decode_8(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli, __u8 *v) +{ + if (!ceph_bl_decode_have(bl, bli, sizeof(*v))) + return -EINVAL; + ceph_bl_copy(bl, bli, v, sizeof(v)); + return 0; } - diff --git a/trunk/ceph/kernel/bufferlist.h b/trunk/ceph/kernel/bufferlist.h index 22bdca39f215d..0eef9739e97df 100644 --- a/trunk/ceph/kernel/bufferlist.h +++ b/trunk/ceph/kernel/bufferlist.h @@ -35,11 +35,11 @@ extern void ceph_bl_prepare_append(struct ceph_bufferlist *bl, int len); extern void ceph_bl_iterator_init(struct ceph_bufferlist_iterator *bli); -extern __u64 ceph_bl_decode_u64(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli); -extern __s64 ceph_bl_decode_s64(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli); -extern __u32 ceph_bl_decode_u32(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli); -extern __s32 ceph_bl_decode_s32(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli); -extern __u8 ceph_bl_decode_u8(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli); +extern int ceph_bl_decode_64(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli, __u64 *v); +extern int ceph_bl_decode_32(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli, __u32 *v); +extern int ceph_bl_decode_16(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli, __u16 *v); +extern int ceph_bl_decode_8(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli, __u8 *v); +extern int ceph_bl_copy(struct ceph_bufferlist *bl, struct ceph_bufferlist_iterator *bli, void *p, int len); #endif diff --git a/trunk/ceph/kernel/client.c b/trunk/ceph/kernel/client.c index 42da02161f4f6..89538a19695df 100644 --- a/trunk/ceph/kernel/client.c +++ b/trunk/ceph/kernel/client.c @@ -7,45 +7,89 @@ /* debug level; defined in include/ceph_fs.h */ int ceph_debug = 10; + + /* - * dispatch -- called with incoming messages. - * - * should be fast and non-blocking, as it is called with locks held. + * create a fresh client instance */ -static void dispatch(struct ceph_client *client, struct ceph_message *msg) +static struct ceph_client *create_client(ceph_mount_args *args) { - dout(5, "dispatch %p type %d\n", (void*)msg, msg->hdr.type); + struct ceph_client *cl; - /* deliver the message */ - switch (msg->hdr.type) { - /* mds client */ - case CEPH_MSG_MDS_MAP: - ceph_mdsc_handle_map(&client->mds_client, msg); - break; - case CEPH_MSG_CLIENT_REPLY: - ceph_mdsc_handle_reply(&client->mds_client, msg); - break; - case CEPH_MSG_CLIENT_REQUEST_FORWARD: - ceph_mdsc_handle_forward(&client->mds_client, msg); - break; + cl = kmalloc(sizeof(*cl), GFP_KERNEL); + if (cl == NULL) + return ERR_PTR(-ENOMEM); + memset(cl, 0, sizeof(*cl)); - /* osd client */ - case CEPH_MSG_OSD_MAP: - ceph_osdc_handle_map(&client->osd_client, msg); - break; - case CEPH_MSG_OSD_OPREPLY: - ceph_osdc_handle_reply(&client->osd_client, msg); - break; + cl->whoami = -1; + ceph_monc_init(&cl->monc); + ceph_mdsc_init(&cl->mdsc, cl); + ceph_osdc_init(&cl->osdc); - default: - derr(1, "dispatch unknown message type %d\n", msg->hdr.type); - ceph_put_msg(msg); - } + return cl; } +/* + * try to mount + */ +static int mount(struct ceph_client *client, struct ceph_mount_args *args) +{ + struct ceph_message *mount_msg; + struct ceph_entity_inst inst; + int ret; + int attempts = 10; + + atomic_set(&client->mounting, 1); + /* send mount request */ + mount_msg = ceph_new_message(CEPH_MSG_CLIENT_MOUNT, 0); + if (IS_ERR(mount_msg)) + return PTR_ERR(mount_msg); + ceph_get_msg(mount_msg); /* grab ref; we may retry */ +trymount: + inst.name.type = CEPH_ENTITY_TYPE_MON; + inst.name.num = get_random_int() % args->num_mon; + inst.addr = args->mon_addr[inst.name.num]; + dout(1, "ceph_get_client requesting mount from mon%d, %d attempts left\n", + inst.name.num, attempts); + ceph_messenger_send(client->msgr, mount_msg, &inst); + + /* wait */ + err = wait_event_interruptible_timeout(client->mounted_wq, + atomic_read(&client->mounting) == 0, + 6*HZ); + if (err == -EINTR) + return err; + if (atomic_read(&client->mounting)) { + dout(1, "ceph_get_client still waiting for mount, attempts=%d\n", attempts); + if (--attempts) + goto trymount; + return -EIO; + } + + return 0; +} +/* + * the monitor responds to monmap to indicate mount success. + * (or, someday, to indicate a change in the monitor cluster?) + */ +static void handle_mon_map(struct ceph_client *client, struct ceph_message *msg) +{ + int err; + + dout(1, "handle_mon_map"); + /* parse */ + err = ceph_monmap_decode(&client->monc->monmap, &msg->payload); + if (err != 0) + return; + + /* mounted! */ + client->whoami = msg->dst.name.num; + if (atomic_dec_and_test(&client->mounting)) + wake_up(&client->mount_wq); +} @@ -55,21 +99,33 @@ static void dispatch(struct ceph_client *client, struct ceph_message *msg) * key: fsid.major ^ fsid.minor * value: struct ceph_client.fsid_item */ + +/* ignore all this until later RADIX_TREE(ceph_clients, GFP_KERNEL); +static struct ceph_client *get_client_fsid(struct ceph_fsid *fsid) +{ + +} + +static struct ceph_client *get_client_monaddr(struct ceph_entity_addr *monaddr) +{ + +} +*/ + struct ceph_client *ceph_get_client(ceph_mount_args *args) { struct ceph_client *client = 0; - struct sockaddr_in monaddr; - struct ceph_message *mount_msg; - int which; + int ret; /* existing, by fsid? */ + /* if (args->flags & CEPH_MOUNT_FSID) client = ceph_get_client_fsid(&args->fsid); if (client) return client; - + */ /* existing, by monitors? */ /* write me. */ @@ -77,29 +133,69 @@ struct ceph_client *ceph_get_client(ceph_mount_args *args) client = create_client(); if (IS_ERR(client)) return PTR_ERR(client); + atomic_inc(&client->nref); - /* send mount request */ - mount_msg = prepare_mount_request(client); - ceph_get_msg(mount_msg); /* grab ref; we may retry */ - which = 0; -trymount: - ceph_msgr_send(client->msgr, mount_msg, args->mon_addr[which]); - - /* ... wait ... */ - - - - - - + /* request mount */ + ret = mount(client, args); + if (ret < 0) { + ceph_put_client(client); + return ERR_PTR(ret); + } + return client; } void ceph_put_client(struct ceph_client *cl) { - + if (atomic_dec_and_test(&cl->nref)) { + dout(1, "ceph_put_client last put on %p\n", cl); + + /* unmount */ + /* ... */ + kfree(cl); + } } +/* + * dispatch -- called with incoming messages. + * + * should be fast and non-blocking, as it is called with locks held. + */ +static void dispatch(struct ceph_client *client, struct ceph_message *msg) +{ + dout(5, "dispatch %p type %d\n", (void*)msg, msg->hdr.type); + + /* deliver the message */ + switch (msg->hdr.type) { + /* me */ + case CEPH_MSG_MON_MAP: + handle_mon_map(client, msg); + break; + + /* mds client */ + case CEPH_MSG_MDS_MAP: + ceph_mdsc_handle_map(&client->mds_client, msg); + break; + case CEPH_MSG_CLIENT_REPLY: + ceph_mdsc_handle_reply(&client->mds_client, msg); + break; + case CEPH_MSG_CLIENT_REQUEST_FORWARD: + ceph_mdsc_handle_forward(&client->mds_client, msg); + break; + + /* osd client */ + case CEPH_MSG_OSD_MAP: + ceph_osdc_handle_map(&client->osd_client, msg); + break; + case CEPH_MSG_OSD_OPREPLY: + ceph_osdc_handle_reply(&client->osd_client, msg); + break; + + default: + derr(1, "dispatch unknown message type %d\n", msg->hdr.type); + ceph_put_msg(msg); + } +} diff --git a/trunk/ceph/kernel/client.h b/trunk/ceph/kernel/client.h index 8f106abc5f142..60253c63ec008 100644 --- a/trunk/ceph/kernel/client.h +++ b/trunk/ceph/kernel/client.h @@ -1,9 +1,12 @@ +#ifndef __FS_CEPH_CLIENT_H +#define __FS_CEPH_CLIENT_H + /* * client.h * - * ceph client instance. may be shared by multiple supers (mount points), - * if we are mounting the same cluster multiple times (e.g. at different - * offsets) + * ceph client instance. may be shared by multiple superblocks, + * if we are mounting the same cluster multiple times (e.g. at + * different relative server paths) */ #include @@ -17,25 +20,38 @@ struct ceph_mount_args; +enum { + MOUNTING, + MOUNTED, + UNMOUNTING, + UNMOUNTED +}; + /* - * CEPH per-filesystem client state + * per-filesystem client state * * possibly shared by multiple mount points, if they are * mounting the same ceph filesystem/cluster. */ struct ceph_client { - __u32 s_whoami; /* my client number */ - struct ceph_messenger *msgr; /* messenger instance */ + __u32 whoami; /* my client number */ + atomic_t nref; - struct ceph_monmap *monmap; /* monitor map */ + atomic_t mounting; + struct wait_queue mount_wq; - struct ceph_mon_client mon_client; - struct ceph_mds_client mds_client; - struct ceph_osd_client osd_client; + struct ceph_messenger *msgr; /* messenger instance */ + struct ceph_mon_client monc; + struct ceph_mds_client mdss; + struct ceph_osd_client osdc; - int s_ref; /* reference count (for each sb_info that points to me) */ - struct list_head fsid_item; + /* lets ignore all this until later */ + spinlock_t sb_lock; + int num_sb; /* reference count (for each sb_info that points to me) */ + struct list_head sb_list; }; extern struct ceph_client *ceph_get_client(ceph_mount_args *args); extern void ceph_put_client(struct ceph_client *cl); + +#endif diff --git a/trunk/ceph/kernel/mds_client.h b/trunk/ceph/kernel/mds_client.h index 72dc579a2fa84..e240f0f6e901d 100644 --- a/trunk/ceph/kernel/mds_client.h +++ b/trunk/ceph/kernel/mds_client.h @@ -61,7 +61,7 @@ struct ceph_mds_client { struct completion map_waiters; }; -extern void ceph_mdsc_init(struct ceph_mds_client *mdsc, +extern void ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client); extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct ceph_message *msg, int mds); extern void ceph_mdsc_handle_reply(struct ceph_mds_client *mdsc, struct ceph_message *msg); diff --git a/trunk/ceph/kernel/messenger.c b/trunk/ceph/kernel/messenger.c index 62e22210c6ae9..e6b2331ffdb1f 100644 --- a/trunk/ceph/kernel/messenger.c +++ b/trunk/ceph/kernel/messenger.c @@ -680,3 +680,22 @@ static struct ceph_messenger *new_messenger(void) return msgr; } + + +struct ceph_message *ceph_new_message(int type, int size) +{ + struct ceph_message *m; + + m = kmalloc(sizeof(*m), GFP_KERNEL); + if (m == NULL) + return ERR_PTR(-ENOMEM); + memset(m, 0, sizeof(*m)); + m.hdr.type = type; + + if (size) { + BUG_ON(size); /* implement me */ + } + + return m; +} + diff --git a/trunk/ceph/kernel/messenger.h b/trunk/ceph/kernel/messenger.h index b48c8283e13f4..3f95b56abc07a 100644 --- a/trunk/ceph/kernel/messenger.h +++ b/trunk/ceph/kernel/messenger.h @@ -82,12 +82,14 @@ struct ceph_connection { int error; /* error on connection */ }; -/* - * Inline functions.. - */ +/* messenger */ extern void ceph_messenger_send(struct ceph_messenger *msgr, struct ceph_message *msg, struct ceph_entity_inst *dest); + +/* messages */ +extern struct ceph_message *ceph_new_message(int type, int size); + static __inline__ void ceph_put_msg(struct ceph_message *msg) { if (atomic_dec_and_test(&msg->nref)) { ceph_bl_clear(&msg->payload); @@ -99,4 +101,5 @@ static __inline__ void ceph_get_msg(struct ceph_message *msg) { atomic_inc(&msg->nref); } + #endif diff --git a/trunk/ceph/kernel/mon_client.h b/trunk/ceph/kernel/mon_client.h index 2ee03eb2c5514..1bf30c094d196 100644 --- a/trunk/ceph/kernel/mon_client.h +++ b/trunk/ceph/kernel/mon_client.h @@ -1,16 +1,18 @@ #ifndef _FS_CEPH_MON_CLIENT_H #define _FS_CEPH_MON_CLIENT_H +struct ceph_mount_args; struct ceph_mon_client { int last_mon; /* last monitor i contacted */ - + struct ceph_monmap monmap; }; +extern void ceph_monc_init(struct ceph_mon_client *monc); +extern void ceph_monc_handle_monmap(struct ceph_mon_client *monc, struct ceph_message *m); extern void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u64 have); extern void ceph_monc_request_osdmap(struct ceph_mon_client *monc, __u64 have); -extern void ceph_monc_request_mount(struct ceph_mon_client *monc); extern void ceph_monc_request_umount(struct ceph_mon_client *monc); extern void ceph_monc_report_failure(struct ceph_mon_client *monc, struct ceph_entity_inst *who); diff --git a/trunk/ceph/kernel/monmap.h b/trunk/ceph/kernel/monmap.h index 2f60c8a0c3436..e76a45977b7c9 100644 --- a/trunk/ceph/kernel/monmap.h +++ b/trunk/ceph/kernel/monmap.h @@ -1,19 +1,18 @@ #ifndef _FS_CEPH_MONMAP_H #define _FS_CEPH_MONMAP_H -#include +#include +#include "bufferlist.h" /* * monitor map */ struct ceph_monmap { - __u64 m_epoch; - __u32 m_num_mon; - __u32 m_last_mon; - struct ceph_entity_inst m_mon_inst; + __u64 epoch; + __u32 num_mon; + struct ceph_entity_inst *mon_inst; }; -extern int ceph_monmap_pick_mon(struct ceph_monmap *m); -extern int ceph_monmap_decode(struct ceph_monmap *m, struct kvec *v); +extern int ceph_monmap_decode(struct ceph_monmap *m, struct ceph_bufferlist *bl); #endif diff --git a/trunk/ceph/kernel/osd_client.h b/trunk/ceph/kernel/osd_client.h index 5d87864c90741..6d8cef636f1a3 100644 --- a/trunk/ceph/kernel/osd_client.h +++ b/trunk/ceph/kernel/osd_client.h @@ -14,8 +14,9 @@ struct ceph_osd_client { }; -extern void ceph_osdc_handle_reply(struct ceph_osd_client *mdsc, struct ceph_message *msg); -extern void ceph_osdc_handle_map(struct ceph_osd_client *mdsc, struct ceph_message *msg); +extern void ceph_osdc_init(struct ceph_osdc_init *osdc); +extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, struct ceph_message *msg); +extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_message *msg); #endif -- 2.39.5