bufferlist.o ktcp.o messenger.o \
client.o \
mds_client.o mdsmap.o \
- mon_client.o monmap.o \
+ mon_client.o \
osd_client.o crush/crush.o crush/mapper.o
int which;
char r;
- client->mounting = 07; /* wait for mon+mds+osd */
+ client->mounting = 0; /* wait for mon+mds+osd */
/* send mount request */
trymount:
/* wait */
dout(10, "mount waiting\n");
err = wait_event_interruptible_timeout(client->mount_wq,
- (client->mounting == 0),
+ (find_first_zero_bit(&client->mounting, 4) == 4),
6*HZ);
if (err == -EINTR)
return err;
- if (client->mounting) {
+ if (client->mounting < 7) {
dout(10, "mount still waiting for mount, attempts=%d\n", attempts);
if (--attempts)
goto trymount;
}
/* get handle for mount path */
+ dout(10, "mount got all maps; opening root directory\n");
err = ceph_mdsc_do(&client->mdsc, CEPH_MDS_OP_OPEN,
CEPH_INO_ROOT, args->path, 0, 0);
if (err < 0)
* the monitor responds to monmap to indicate mount success.
* (or, someday, to indicate a change in the monitor cluster?)
*/
-static void handle_mon_map(struct ceph_client *client, struct ceph_msg *msg)
+static void handle_monmap(struct ceph_client *client, struct ceph_msg *msg)
{
int err;
- dout(1, "handle_mon_map");
+ dout(1, "handle_monmap had epoch %d\n", client->monc.monmap.epoch);
/* parse */
- err = ceph_monmap_decode(&client->monc.monmap, msg->front.iov_base,
+ err = ceph_monmap_decode(&client->monc.monmap,
+ msg->front.iov_base,
msg->front.iov_base + msg->front.iov_len);
- if (err != 0)
+ if (err != 0)
return;
if (client->whoami < 0) {
}
-void got_first_map(struct ceph_client *client, int type)
+void got_first_map(struct ceph_client *client, int num)
{
- dout(10, "got_first_map type %d\n", type);
- clear_bit(type, &client->mounting);
- if (client->mounting == 0)
+ set_bit(num, &client->mounting);
+ dout(10, "got_first_map num %d mounting now %lu\n", num, client->mounting);
+ if (find_first_bit(&client->mounting, 4) == 4)
wake_up(&client->mount_wq);
}
/* me */
case CEPH_MSG_MON_MAP:
had = client->monc.monmap.epoch ? 1:0;
- handle_mon_map(client, msg);
+ handle_monmap(client, msg);
if (!had && client->monc.monmap.epoch)
- got_first_map(client, 4);
+ got_first_map(client, 0);
break;
/* mds client */
had = client->mdsc.mdsmap ? 1:0;
ceph_mdsc_handle_map(&client->mdsc, msg);
if (!had && client->mdsc.mdsmap)
- got_first_map(client, 2);
+ got_first_map(client, 1);
break;
case CEPH_MSG_CLIENT_REPLY:
ceph_mdsc_handle_reply(&client->mdsc, msg);
had = client->osdc.osdmap ? 1:0;
ceph_osdc_handle_map(&client->osdc, msg);
if (!had && client->osdc.osdmap)
- got_first_map(client, 1);
+ got_first_map(client, 2);
break;
case CEPH_MSG_OSD_OPREPLY:
ceph_osdc_handle_reply(&client->osdc, msg);
#include <linux/ceph_fs.h>
#include <linux/wait.h>
+#include <linux/completion.h>
#include "messenger.h"
-#include "monmap.h"
#include "mon_client.h"
#include "mds_client.h"
struct ceph_fsid fsid;
atomic_t nref;
- int mounting; /* map bitset; 4=mon, 2=mds, 1=osd map */
+ unsigned long mounting; /* map bitset; 4=mon, 2=mds, 1=osd map */
wait_queue_head_t mount_wq;
struct ceph_messenger *msgr; /* messenger instance */
/* register */
if (mds >= mdsc->max_sessions) {
- /* realloc */
struct ceph_mds_session **sa;
+ /* realloc */
+ dout(50, "mdsc register_session realloc to %d\n", mds);
sa = kzalloc(mds * sizeof(struct ceph_mds_session), GFP_KERNEL);
BUG_ON(sa == NULL); /* i am lazy */
if (mdsc->sessions) {
if (mds < 0) {
/* wait for new mdsmap */
spin_unlock(&mdsc->lock);
+ dout(30, "mdsc_do_request waiting for new mdsmap\n");
wait_for_new_map(mdsc);
spin_lock(&mdsc->lock);
goto retry;
}
+ dout(30, "mdsc_do_request chose mds%d\n", mds);
/* get session */
session = get_session(mdsc, mds);
+ dout(30, "mdsc_do_request got session %p\n", session);
+
/* open? */
if (mdsc->sessions[mds]->s_state == CEPH_MDS_SESSION_IDLE)
open_session(mdsc, session, mds);
struct ceph_messenger *msgr;
int ret = 1;
+ dout(30, "try_write start\n");
con = container_of(work, struct ceph_connection, swork);
+ spin_lock(&con->lock);
msgr = con->msgr;
more:
clear_bit(WRITE_PENDING, &con->state);
done:
+ dout(30, "try_write done\n");
+ spin_unlock(&con->lock);
return;
}
prepare_write_connect(msgr, con);
dout(5, "ceph_msg_send initiating connect on %p new state %u\n", con, con->state);
ret = ceph_tcp_connect(con);
+ dout(5, "ceph_msg_send done initiating connect on %p new state %u\n", con, con->state);
if (ret < 0) {
derr(1, "connection failure to peer %x:%d\n",
ntohl(msg->hdr.dst.addr.ipaddr.sin_addr.s_addr),
m->front.iov_base = kmalloc(front_len, GFP_KERNEL);
if (m->front.iov_base == NULL)
goto out2;
- dout(50, "ceph_msg_new front is %p len %d\n", m->front.iov_base, front_len);
} else {
m->front.iov_base = 0;
}
#include "mon_client.h"
+int ceph_monmap_decode(struct ceph_monmap *m, void *p, void *end)
+{
+ int err;
+ void *old;
+
+ dout(30, "monmap_decode %p %p\n", p, end);
+
+ if ((err = ceph_decode_32(&p, end, &m->epoch)) < 0)
+ goto bad;
+ if ((err = ceph_decode_64(&p, end, &m->fsid.major)) < 0)
+ goto bad;
+ if ((err = ceph_decode_64(&p, end, &m->fsid.minor)) < 0)
+ goto bad;
+ if ((err = ceph_decode_32(&p, end, &m->num_mon)) < 0)
+ return err;
+
+ old = m->mon_inst;
+ m->mon_inst = kmalloc(m->num_mon*sizeof(*m->mon_inst), GFP_KERNEL);
+ if (m->mon_inst == NULL) {
+ m->mon_inst = old;
+ return -ENOMEM;
+ }
+ kfree(old);
+
+ if ((err = ceph_decode_copy(&p, end, m->mon_inst, m->num_mon*sizeof(m->mon_inst[0]))) < 0)
+ goto bad;
+
+ dout(30, "monmap_decode got epoch %d, num_mon %d\n", m->epoch, m->num_mon);
+ return 0;
+
+bad:
+ dout(30, "monmap_decode failed with %d\n", err);
+ return err;
+}
+
static int pick_mon(struct ceph_mon_client *monc, int notmon)
{
char r;
}
+
void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u64 have)
{
dout(5, "ceph_monc_request_mdsmap\n");
#ifndef _FS_CEPH_MON_CLIENT_H
#define _FS_CEPH_MON_CLIENT_H
-#include "monmap.h"
#include "messenger.h"
struct ceph_mount_args;
+struct ceph_monmap {
+ ceph_epoch_t epoch;
+ struct ceph_fsid fsid;
+ __u32 num_mon;
+ struct ceph_entity_inst *mon_inst;
+};
+
+
struct ceph_mon_client {
int last_mon; /* last monitor i contacted */
struct ceph_monmap monmap;
};
+extern int ceph_monmap_decode(struct ceph_monmap *m, void *p, void *end);
+
extern void ceph_monc_init(struct ceph_mon_client *monc);
-extern void ceph_monc_handle_monmap(struct ceph_mon_client *monc, struct ceph_msg *m);
extern void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u64 have);
extern void ceph_monc_request_osdmap(struct ceph_mon_client *monc, __u64 have);
+++ /dev/null
-#include <linux/slab.h>
-#include "monmap.h"
-#include "messenger.h"
-
-int ceph_monmap_decode(struct ceph_monmap *m, void **p, void *end)
-{
- int err;
-
- if ((err = ceph_decode_64(p, end, &m->epoch)) < 0)
- return err;
- if ((err = ceph_decode_32(p, end, &m->num_mon)) < 0)
- return err;
-
- m->mon_inst = kmalloc(m->num_mon*sizeof(*m->mon_inst), GFP_KERNEL);
- if (m->mon_inst == NULL)
- return -ENOMEM;
- if ((err = ceph_decode_copy(p, end, m->mon_inst, m->num_mon*sizeof(m->mon_inst[0]))) < 0)
- goto bad;
-
- return 0;
-
-bad:
- kfree(m->mon_inst);
- m->mon_inst = 0;
- return err;
-}
+++ /dev/null
-#ifndef _FS_CEPH_MONMAP_H
-#define _FS_CEPH_MONMAP_H
-
-#include <linux/ceph_fs.h>
-
-/*
- * monitor map
- */
-struct ceph_monmap {
- __u64 epoch;
- __u32 num_mon;
- struct ceph_entity_inst *mon_inst;
-};
-
-extern int ceph_monmap_decode(struct ceph_monmap *m, void **p, void *end);
-
-#endif
static struct ceph_osdmap *osdmap_decode(void **p, void *end)
{
struct ceph_osdmap *map;
- __u32 crushlen, max;
+ __u32 len, max;
+ int i;
int err;
void *start = *p;
if ((err = ceph_decode_copy(p, end, map->osd_addr, map->max_osd*sizeof(*map->osd_addr))) < 0)
goto bad;
+ /* pg primary swapping */
+ if ((err = ceph_decode_32(p, end, &len)) < 0)
+ goto bad;
+ if (len) {
+ map->pg_swap_primary = kmalloc(len * sizeof(*map->pg_swap_primary), GFP_KERNEL);
+ if (map->pg_swap_primary == NULL) {
+ err = -ENOMEM;
+ goto bad;
+ }
+ map->num_pg_swap_primary = len;
+ for (i=0; i<len; i++) {
+ if ((err = ceph_decode_64(p, end, &map->pg_swap_primary[i].pg.pg64)) < 0)
+ goto bad;
+ if ((err = ceph_decode_32(p, end, &map->pg_swap_primary[i].osd)) < 0)
+ goto bad;
+ }
+ }
+
/* crush */
- if ((err = ceph_decode_32(p, end, &crushlen)) < 0)
+ if ((err = ceph_decode_32(p, end, &len)) < 0)
goto bad;
- dout(30, "osdmap_decode crush len %d from off %x\n", crushlen, (int)(*p - start));
+ dout(30, "osdmap_decode crush len %d from off %x\n", len, (int)(*p - start));
map->crush = crush_decode(p, end);
if (IS_ERR(map->crush)) {
err = PTR_ERR(map->crush);
osdc->osdmap = newmap;
}
}
- dout(1, "done\n");
+ dout(1, "osdc handle_map done\n");
out:
return;
bad:
- derr(1, "corrupt osd map message\n");
+ derr(1, "osdc handle_map corrupt msg\n");
goto out;
}
__u32 *osd_offload; /* 0 = normal, 0x10000 = 100% offload (failed) */
struct ceph_entity_addr *osd_addr;
struct crush_map *crush;
+
+ __u32 num_pg_swap_primary;
+ struct {
+ ceph_pg_t pg;
+ __u32 osd;
+ } *pg_swap_primary;
};
enum {
}
// write it out
- cout << "mkmonmap: writing monmap to " << outfn << " (" << monmap.size() << " monitors)" << std::endl;
+ cout << "mkmonmap: writing monmap epoch " << monmap.epoch << " to " << outfn << " (" << monmap.size() << " monitors)" << std::endl;
int r = monmap.write(outfn);
assert(r >= 0);
#include "OSDMonitor.h"
#include "MonitorStore.h"
+#include "messages/MMonMap.h"
#include "messages/MClientMount.h"
#include "messages/MClientUnmount.h"
mon->mdsmon->send_latest(to);
mon->osdmon->send_latest(to);
+ dout(10) << "sending monmap to " << to << dendl;
+ bufferlist bl;
+ mon->monmap->encode(bl);
+ mon->messenger->send_message(new MMonMap(bl), to);
+
delete m;
}
int last_mon; // last mon i talked to
- MonMap(int s=0) : epoch(0), mon_inst(s), last_mon(-1) {
+ MonMap(int s=0) : epoch(s?1:0), mon_inst(s), last_mon(-1) {
generate_fsid();
}
}
void add_mon(entity_inst_t inst) {
+ if (!epoch) epoch = 1;
mon_inst.push_back(inst);
}