From 53b5c467582901fe5b0760a1099360b072ea40e9 Mon Sep 17 00:00:00 2001 From: sageweil Date: Fri, 30 Nov 2007 23:42:10 +0000 Subject: [PATCH] monmap epoch fix; mon sends monmap to client on mount git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@2164 29311d96-e01e-0410-9327-a35deaab8ce9 --- trunk/ceph/kernel/Makefile | 2 +- trunk/ceph/kernel/client.c | 32 +++++++++++++++-------------- trunk/ceph/kernel/client.h | 4 ++-- trunk/ceph/kernel/mds_client.c | 7 ++++++- trunk/ceph/kernel/messenger.c | 6 +++++- trunk/ceph/kernel/mon_client.c | 36 +++++++++++++++++++++++++++++++++ trunk/ceph/kernel/mon_client.h | 12 +++++++++-- trunk/ceph/kernel/monmap.c | 26 ------------------------ trunk/ceph/kernel/monmap.h | 17 ---------------- trunk/ceph/kernel/osd_client.c | 29 +++++++++++++++++++++----- trunk/ceph/kernel/osd_client.h | 6 ++++++ trunk/ceph/mkmonmap.cc | 2 +- trunk/ceph/mon/ClientMonitor.cc | 6 ++++++ trunk/ceph/mon/MonMap.h | 3 ++- 14 files changed, 116 insertions(+), 72 deletions(-) delete mode 100644 trunk/ceph/kernel/monmap.c delete mode 100644 trunk/ceph/kernel/monmap.h diff --git a/trunk/ceph/kernel/Makefile b/trunk/ceph/kernel/Makefile index 3be93433beb56..3f62291a3c1d3 100644 --- a/trunk/ceph/kernel/Makefile +++ b/trunk/ceph/kernel/Makefile @@ -8,5 +8,5 @@ ceph-objs := super.o inode.o \ bufferlist.o ktcp.o messenger.o \ client.o \ mds_client.o mdsmap.o \ - mon_client.o monmap.o \ + mon_client.o \ osd_client.o crush/crush.o crush/mapper.o diff --git a/trunk/ceph/kernel/client.c b/trunk/ceph/kernel/client.c index a397bcce40f3f..03ffb907a7d68 100644 --- a/trunk/ceph/kernel/client.c +++ b/trunk/ceph/kernel/client.c @@ -98,7 +98,7 @@ static int mount(struct ceph_client *client, struct ceph_mount_args *args) int which; char r; - client->mounting = 07; /* wait for mon+mds+osd */ + client->mounting = 0; /* wait for mon+mds+osd */ /* send mount request */ trymount: @@ -117,11 +117,11 @@ trymount: /* wait */ dout(10, "mount waiting\n"); err = wait_event_interruptible_timeout(client->mount_wq, - (client->mounting == 0), + (find_first_zero_bit(&client->mounting, 4) == 4), 6*HZ); if (err == -EINTR) return err; - if (client->mounting) { + if (client->mounting < 7) { dout(10, "mount still waiting for mount, attempts=%d\n", attempts); if (--attempts) goto trymount; @@ -129,6 +129,7 @@ trymount: } /* get handle for mount path */ + dout(10, "mount got all maps; opening root directory\n"); err = ceph_mdsc_do(&client->mdsc, CEPH_MDS_OP_OPEN, CEPH_INO_ROOT, args->path, 0, 0); if (err < 0) @@ -142,16 +143,17 @@ trymount: * the monitor responds to monmap to indicate mount success. * (or, someday, to indicate a change in the monitor cluster?) */ -static void handle_mon_map(struct ceph_client *client, struct ceph_msg *msg) +static void handle_monmap(struct ceph_client *client, struct ceph_msg *msg) { int err; - dout(1, "handle_mon_map"); + dout(1, "handle_monmap had epoch %d\n", client->monc.monmap.epoch); /* parse */ - err = ceph_monmap_decode(&client->monc.monmap, msg->front.iov_base, + err = ceph_monmap_decode(&client->monc.monmap, + msg->front.iov_base, msg->front.iov_base + msg->front.iov_len); - if (err != 0) + if (err != 0) return; if (client->whoami < 0) { @@ -220,11 +222,11 @@ void ceph_put_client(struct ceph_client *cl) } -void got_first_map(struct ceph_client *client, int type) +void got_first_map(struct ceph_client *client, int num) { - dout(10, "got_first_map type %d\n", type); - clear_bit(type, &client->mounting); - if (client->mounting == 0) + set_bit(num, &client->mounting); + dout(10, "got_first_map num %d mounting now %lu\n", num, client->mounting); + if (find_first_bit(&client->mounting, 4) == 4) wake_up(&client->mount_wq); } @@ -247,9 +249,9 @@ void ceph_dispatch(struct ceph_client *client, struct ceph_msg *msg) /* me */ case CEPH_MSG_MON_MAP: had = client->monc.monmap.epoch ? 1:0; - handle_mon_map(client, msg); + handle_monmap(client, msg); if (!had && client->monc.monmap.epoch) - got_first_map(client, 4); + got_first_map(client, 0); break; /* mds client */ @@ -257,7 +259,7 @@ void ceph_dispatch(struct ceph_client *client, struct ceph_msg *msg) had = client->mdsc.mdsmap ? 1:0; ceph_mdsc_handle_map(&client->mdsc, msg); if (!had && client->mdsc.mdsmap) - got_first_map(client, 2); + got_first_map(client, 1); break; case CEPH_MSG_CLIENT_REPLY: ceph_mdsc_handle_reply(&client->mdsc, msg); @@ -271,7 +273,7 @@ void ceph_dispatch(struct ceph_client *client, struct ceph_msg *msg) had = client->osdc.osdmap ? 1:0; ceph_osdc_handle_map(&client->osdc, msg); if (!had && client->osdc.osdmap) - got_first_map(client, 1); + got_first_map(client, 2); break; case CEPH_MSG_OSD_OPREPLY: ceph_osdc_handle_reply(&client->osdc, msg); diff --git a/trunk/ceph/kernel/client.h b/trunk/ceph/kernel/client.h index 41e87847adee1..9c2b02c9dd920 100644 --- a/trunk/ceph/kernel/client.h +++ b/trunk/ceph/kernel/client.h @@ -11,9 +11,9 @@ #include #include +#include #include "messenger.h" -#include "monmap.h" #include "mon_client.h" #include "mds_client.h" @@ -39,7 +39,7 @@ struct ceph_client { struct ceph_fsid fsid; atomic_t nref; - int mounting; /* map bitset; 4=mon, 2=mds, 1=osd map */ + unsigned long mounting; /* map bitset; 4=mon, 2=mds, 1=osd map */ wait_queue_head_t mount_wq; struct ceph_messenger *msgr; /* messenger instance */ diff --git a/trunk/ceph/kernel/mds_client.c b/trunk/ceph/kernel/mds_client.c index 63e4464e5bd80..1c7c26e5f03c6 100644 --- a/trunk/ceph/kernel/mds_client.c +++ b/trunk/ceph/kernel/mds_client.c @@ -87,8 +87,9 @@ static void register_session(struct ceph_mds_client *mdsc, int mds) /* register */ if (mds >= mdsc->max_sessions) { - /* realloc */ struct ceph_mds_session **sa; + /* realloc */ + dout(50, "mdsc register_session realloc to %d\n", mds); sa = kzalloc(mds * sizeof(struct ceph_mds_session), GFP_KERNEL); BUG_ON(sa == NULL); /* i am lazy */ if (mdsc->sessions) { @@ -293,14 +294,18 @@ retry: if (mds < 0) { /* wait for new mdsmap */ spin_unlock(&mdsc->lock); + dout(30, "mdsc_do_request waiting for new mdsmap\n"); wait_for_new_map(mdsc); spin_lock(&mdsc->lock); goto retry; } + dout(30, "mdsc_do_request chose mds%d\n", mds); /* get session */ session = get_session(mdsc, mds); + dout(30, "mdsc_do_request got session %p\n", session); + /* open? */ if (mdsc->sessions[mds]->s_state == CEPH_MDS_SESSION_IDLE) open_session(mdsc, session, mds); diff --git a/trunk/ceph/kernel/messenger.c b/trunk/ceph/kernel/messenger.c index 64ecfdd3edcd9..25c6df1a47ea3 100644 --- a/trunk/ceph/kernel/messenger.c +++ b/trunk/ceph/kernel/messenger.c @@ -392,7 +392,9 @@ static void try_write(struct work_struct *work) struct ceph_messenger *msgr; int ret = 1; + dout(30, "try_write start\n"); con = container_of(work, struct ceph_connection, swork); + spin_lock(&con->lock); msgr = con->msgr; more: @@ -437,6 +439,8 @@ more: clear_bit(WRITE_PENDING, &con->state); done: + dout(30, "try_write done\n"); + spin_unlock(&con->lock); return; } @@ -919,6 +923,7 @@ int ceph_msg_send(struct ceph_messenger *msgr, struct ceph_msg *msg) prepare_write_connect(msgr, con); dout(5, "ceph_msg_send initiating connect on %p new state %u\n", con, con->state); ret = ceph_tcp_connect(con); + dout(5, "ceph_msg_send done initiating connect on %p new state %u\n", con, con->state); if (ret < 0) { derr(1, "connection failure to peer %x:%d\n", ntohl(msg->hdr.dst.addr.ipaddr.sin_addr.s_addr), @@ -965,7 +970,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, int page_len, int page_of m->front.iov_base = kmalloc(front_len, GFP_KERNEL); if (m->front.iov_base == NULL) goto out2; - dout(50, "ceph_msg_new front is %p len %d\n", m->front.iov_base, front_len); } else { m->front.iov_base = 0; } diff --git a/trunk/ceph/kernel/mon_client.c b/trunk/ceph/kernel/mon_client.c index f379955804ad9..35984ac636a97 100644 --- a/trunk/ceph/kernel/mon_client.c +++ b/trunk/ceph/kernel/mon_client.c @@ -4,6 +4,41 @@ #include "mon_client.h" +int ceph_monmap_decode(struct ceph_monmap *m, void *p, void *end) +{ + int err; + void *old; + + dout(30, "monmap_decode %p %p\n", p, end); + + if ((err = ceph_decode_32(&p, end, &m->epoch)) < 0) + goto bad; + if ((err = ceph_decode_64(&p, end, &m->fsid.major)) < 0) + goto bad; + if ((err = ceph_decode_64(&p, end, &m->fsid.minor)) < 0) + goto bad; + if ((err = ceph_decode_32(&p, end, &m->num_mon)) < 0) + return err; + + old = m->mon_inst; + m->mon_inst = kmalloc(m->num_mon*sizeof(*m->mon_inst), GFP_KERNEL); + if (m->mon_inst == NULL) { + m->mon_inst = old; + return -ENOMEM; + } + kfree(old); + + if ((err = ceph_decode_copy(&p, end, m->mon_inst, m->num_mon*sizeof(m->mon_inst[0]))) < 0) + goto bad; + + dout(30, "monmap_decode got epoch %d, num_mon %d\n", m->epoch, m->num_mon); + return 0; + +bad: + dout(30, "monmap_decode failed with %d\n", err); + return err; +} + static int pick_mon(struct ceph_mon_client *monc, int notmon) { char r; @@ -22,6 +57,7 @@ void ceph_monc_init(struct ceph_mon_client *monc) } + void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u64 have) { dout(5, "ceph_monc_request_mdsmap\n"); diff --git a/trunk/ceph/kernel/mon_client.h b/trunk/ceph/kernel/mon_client.h index d44b9c5e687ec..478ed1c10100a 100644 --- a/trunk/ceph/kernel/mon_client.h +++ b/trunk/ceph/kernel/mon_client.h @@ -1,18 +1,26 @@ #ifndef _FS_CEPH_MON_CLIENT_H #define _FS_CEPH_MON_CLIENT_H -#include "monmap.h" #include "messenger.h" struct ceph_mount_args; +struct ceph_monmap { + ceph_epoch_t epoch; + struct ceph_fsid fsid; + __u32 num_mon; + struct ceph_entity_inst *mon_inst; +}; + + struct ceph_mon_client { int last_mon; /* last monitor i contacted */ struct ceph_monmap monmap; }; +extern int ceph_monmap_decode(struct ceph_monmap *m, void *p, void *end); + extern void ceph_monc_init(struct ceph_mon_client *monc); -extern void ceph_monc_handle_monmap(struct ceph_mon_client *monc, struct ceph_msg *m); extern void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u64 have); extern void ceph_monc_request_osdmap(struct ceph_mon_client *monc, __u64 have); diff --git a/trunk/ceph/kernel/monmap.c b/trunk/ceph/kernel/monmap.c deleted file mode 100644 index 05765efe37b2a..0000000000000 --- a/trunk/ceph/kernel/monmap.c +++ /dev/null @@ -1,26 +0,0 @@ -#include -#include "monmap.h" -#include "messenger.h" - -int ceph_monmap_decode(struct ceph_monmap *m, void **p, void *end) -{ - int err; - - if ((err = ceph_decode_64(p, end, &m->epoch)) < 0) - return err; - if ((err = ceph_decode_32(p, end, &m->num_mon)) < 0) - return err; - - m->mon_inst = kmalloc(m->num_mon*sizeof(*m->mon_inst), GFP_KERNEL); - if (m->mon_inst == NULL) - return -ENOMEM; - if ((err = ceph_decode_copy(p, end, m->mon_inst, m->num_mon*sizeof(m->mon_inst[0]))) < 0) - goto bad; - - return 0; - -bad: - kfree(m->mon_inst); - m->mon_inst = 0; - return err; -} diff --git a/trunk/ceph/kernel/monmap.h b/trunk/ceph/kernel/monmap.h deleted file mode 100644 index 5e623b10352d2..0000000000000 --- a/trunk/ceph/kernel/monmap.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _FS_CEPH_MONMAP_H -#define _FS_CEPH_MONMAP_H - -#include - -/* - * monitor map - */ -struct ceph_monmap { - __u64 epoch; - __u32 num_mon; - struct ceph_entity_inst *mon_inst; -}; - -extern int ceph_monmap_decode(struct ceph_monmap *m, void **p, void *end); - -#endif diff --git a/trunk/ceph/kernel/osd_client.c b/trunk/ceph/kernel/osd_client.c index fc1c6a610dab2..0a3081c53cf22 100644 --- a/trunk/ceph/kernel/osd_client.c +++ b/trunk/ceph/kernel/osd_client.c @@ -295,7 +295,8 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) static struct ceph_osdmap *osdmap_decode(void **p, void *end) { struct ceph_osdmap *map; - __u32 crushlen, max; + __u32 len, max; + int i; int err; void *start = *p; @@ -345,10 +346,28 @@ static struct ceph_osdmap *osdmap_decode(void **p, void *end) if ((err = ceph_decode_copy(p, end, map->osd_addr, map->max_osd*sizeof(*map->osd_addr))) < 0) goto bad; + /* pg primary swapping */ + if ((err = ceph_decode_32(p, end, &len)) < 0) + goto bad; + if (len) { + map->pg_swap_primary = kmalloc(len * sizeof(*map->pg_swap_primary), GFP_KERNEL); + if (map->pg_swap_primary == NULL) { + err = -ENOMEM; + goto bad; + } + map->num_pg_swap_primary = len; + for (i=0; ipg_swap_primary[i].pg.pg64)) < 0) + goto bad; + if ((err = ceph_decode_32(p, end, &map->pg_swap_primary[i].osd)) < 0) + goto bad; + } + } + /* crush */ - if ((err = ceph_decode_32(p, end, &crushlen)) < 0) + if ((err = ceph_decode_32(p, end, &len)) < 0) goto bad; - dout(30, "osdmap_decode crush len %d from off %x\n", crushlen, (int)(*p - start)); + dout(30, "osdmap_decode crush len %d from off %x\n", len, (int)(*p - start)); map->crush = crush_decode(p, end); if (IS_ERR(map->crush)) { err = PTR_ERR(map->crush); @@ -550,13 +569,13 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) osdc->osdmap = newmap; } } - dout(1, "done\n"); + dout(1, "osdc handle_map done\n"); out: return; bad: - derr(1, "corrupt osd map message\n"); + derr(1, "osdc handle_map corrupt msg\n"); goto out; } diff --git a/trunk/ceph/kernel/osd_client.h b/trunk/ceph/kernel/osd_client.h index 598d58bae0ac1..3277ee15e1987 100644 --- a/trunk/ceph/kernel/osd_client.h +++ b/trunk/ceph/kernel/osd_client.h @@ -23,6 +23,12 @@ struct ceph_osdmap { __u32 *osd_offload; /* 0 = normal, 0x10000 = 100% offload (failed) */ struct ceph_entity_addr *osd_addr; struct crush_map *crush; + + __u32 num_pg_swap_primary; + struct { + ceph_pg_t pg; + __u32 osd; + } *pg_swap_primary; }; enum { diff --git a/trunk/ceph/mkmonmap.cc b/trunk/ceph/mkmonmap.cc index 1e2dd2050c4fa..32d88a0f3b2f1 100644 --- a/trunk/ceph/mkmonmap.cc +++ b/trunk/ceph/mkmonmap.cc @@ -60,7 +60,7 @@ int main(int argc, char **argv) } // write it out - cout << "mkmonmap: writing monmap to " << outfn << " (" << monmap.size() << " monitors)" << std::endl; + cout << "mkmonmap: writing monmap epoch " << monmap.epoch << " to " << outfn << " (" << monmap.size() << " monitors)" << std::endl; int r = monmap.write(outfn); assert(r >= 0); diff --git a/trunk/ceph/mon/ClientMonitor.cc b/trunk/ceph/mon/ClientMonitor.cc index 9082db0b2db8f..80d0c24cec2dc 100644 --- a/trunk/ceph/mon/ClientMonitor.cc +++ b/trunk/ceph/mon/ClientMonitor.cc @@ -19,6 +19,7 @@ #include "OSDMonitor.h" #include "MonitorStore.h" +#include "messages/MMonMap.h" #include "messages/MClientMount.h" #include "messages/MClientUnmount.h" @@ -222,6 +223,11 @@ void ClientMonitor::_mounted(int client, MClientMount *m) mon->mdsmon->send_latest(to); mon->osdmon->send_latest(to); + dout(10) << "sending monmap to " << to << dendl; + bufferlist bl; + mon->monmap->encode(bl); + mon->messenger->send_message(new MMonMap(bl), to); + delete m; } diff --git a/trunk/ceph/mon/MonMap.h b/trunk/ceph/mon/MonMap.h index 3b6bad339716d..f8adebd3f405d 100644 --- a/trunk/ceph/mon/MonMap.h +++ b/trunk/ceph/mon/MonMap.h @@ -30,7 +30,7 @@ class MonMap { int last_mon; // last mon i talked to - MonMap(int s=0) : epoch(0), mon_inst(s), last_mon(-1) { + MonMap(int s=0) : epoch(s?1:0), mon_inst(s), last_mon(-1) { generate_fsid(); } @@ -39,6 +39,7 @@ class MonMap { } void add_mon(entity_inst_t inst) { + if (!epoch) epoch = 1; mon_inst.push_back(inst); } -- 2.39.5