From 5687ac2312609f783e225ba8d0905418587b4791 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 8 May 2008 14:07:27 -0700 Subject: [PATCH] fix up mds map requests, kclient mds timeouts --- src/include/ceph_fs.h | 2 +- src/kernel/mds_client.c | 26 ++++++++++------ src/kernel/mds_client.h | 2 +- src/kernel/mon_client.c | 65 ++++++++++++++++++++++----------------- src/kernel/mon_client.h | 5 +-- src/kernel/super.c | 1 + src/messages/MMDSGetMap.h | 13 +++++--- src/mon/MDSMonitor.cc | 2 +- 8 files changed, 67 insertions(+), 49 deletions(-) diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 08c4188642792..eb8e03c3dd78e 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -308,7 +308,7 @@ struct ceph_osd_getmap { struct ceph_mds_getmap { struct ceph_fsid fsid; - __le32 have; + __le32 want; } __attribute__ ((packed)); diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index b99cdbc69e1ea..ae2510971ba4c 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -289,6 +289,7 @@ __register_session(struct ceph_mds_client *mdsc, int mds) s = kmalloc(sizeof(struct ceph_mds_session), GFP_NOFS); s->s_mds = mds; + s->s_ttl = 0; s->s_state = CEPH_MDS_SESSION_NEW; s->s_seq = 0; mutex_init(&s->s_mutex); @@ -494,12 +495,8 @@ static void wait_for_new_map(struct ceph_mds_client *mdsc) __u32 have; dout(30, "wait_for_new_map enter\n"); have = mdsc->mdsmap->m_epoch; - if (mdsc->last_requested_map < mdsc->mdsmap->m_epoch) { - mdsc->last_requested_map = have; - spin_unlock(&mdsc->lock); - ceph_monc_request_mdsmap(&mdsc->client->monc, have); - } else - spin_unlock(&mdsc->lock); + spin_unlock(&mdsc->lock); + ceph_monc_request_mdsmap(&mdsc->client->monc, have+1); wait_for_completion(&mdsc->map_waiters); spin_lock(&mdsc->lock); dout(30, "wait_for_new_map exit\n"); @@ -777,6 +774,8 @@ void ceph_mdsc_handle_session(struct ceph_mds_client *mdsc, /* handle */ spin_lock(&mdsc->lock); session = __get_session(mdsc, mds); + if (session && mdsc->mdsmap) + session->s_ttl = jiffies + mdsc->mdsmap->m_session_autoclose; spin_unlock(&mdsc->lock); mutex_lock(&session->s_mutex); @@ -1812,6 +1811,7 @@ static void delayed_work(struct work_struct *work) int renew_interval = mdsc->mdsmap->m_cap_bit_timeout >> 2; int renew_caps = time_after_eq(jiffies, HZ*renew_interval + mdsc->last_renew_caps); + u32 want_map = 0; dout(10, "delayed_work on %p renew_caps=%d\n", mdsc, renew_caps); @@ -1825,6 +1825,11 @@ static void delayed_work(struct work_struct *work) struct ceph_mds_session *session = __get_session(mdsc, i); if (session == 0) continue; + if (session->s_ttl && time_after(jiffies, session->s_ttl)) { + derr(1, "mds%d session probably timed out, " + "requesting mds map\n", session->s_mds); + want_map = mdsc->mdsmap->m_epoch; + } if (session->s_state < CEPH_MDS_SESSION_OPEN) { put_session(session); continue; @@ -1841,6 +1846,10 @@ static void delayed_work(struct work_struct *work) spin_lock(&mdsc->lock); } spin_unlock(&mdsc->lock); + + if (want_map) + ceph_monc_request_mdsmap(&mdsc->client->monc, want_map); + schedule_delayed(mdsc); } @@ -1854,7 +1863,6 @@ void ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) mdsc->max_sessions = 0; mdsc->last_tid = 0; INIT_RADIX_TREE(&mdsc->request_tree, GFP_ATOMIC); - mdsc->last_requested_map = 0; init_completion(&mdsc->map_waiters); init_completion(&mdsc->session_close_waiters); INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); @@ -1976,6 +1984,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) /* do we need it? */ spin_lock(&mdsc->lock); + ceph_monc_got_mdsmap(&mdsc->client->monc, epoch); if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { dout(2, "ceph_mdsc_handle_map epoch %u < our %u\n", epoch, mdsc->mdsmap->m_epoch); @@ -2016,9 +2025,6 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) mdsc->mdsmap = newmap; } - /* stop asking */ - ceph_monc_got_mdsmap(&mdsc->client->monc, newmap->m_epoch); - spin_unlock(&mdsc->lock); /* (re)schedule work */ diff --git a/src/kernel/mds_client.h b/src/kernel/mds_client.h index 9b62a30282b1b..7f2a3ece624c5 100644 --- a/src/kernel/mds_client.h +++ b/src/kernel/mds_client.h @@ -54,6 +54,7 @@ enum { struct ceph_mds_session { int s_mds; int s_state; + unsigned long s_ttl; /* time until mds kills us */ __u64 s_seq; /* incoming msg seq # */ struct mutex s_mutex; spinlock_t s_cap_lock; /* protects s_cap_gen, s_cap_ttl */ @@ -103,7 +104,6 @@ struct ceph_mds_client { int max_sessions; /* len of s_mds_sessions */ __u64 last_tid; /* most recent mds request */ struct radix_tree_root request_tree; /* pending mds requests */ - __u64 last_requested_map; struct completion map_waiters, session_close_waiters; struct delayed_work delayed_work; /* delayed work */ unsigned long last_renew_caps; diff --git a/src/kernel/mon_client.c b/src/kernel/mon_client.c index e47849b29dfc0..7b7143a75b3bb 100644 --- a/src/kernel/mon_client.c +++ b/src/kernel/mon_client.c @@ -95,48 +95,48 @@ static void do_request_mdsmap(struct work_struct *work) mds_delayed_work.work); int mon = pick_mon(monc, -1); - dout(5, "request_mdsmap from mon%d have %u\n", mon, monc->have_mdsmap); + dout(5, "request_mdsmap from mon%d want %u\n", mon, monc->want_mdsmap); msg = ceph_msg_new(CEPH_MSG_MDS_GETMAP, sizeof(*h), 0, 0, 0); if (IS_ERR(msg)) return; h = msg->front.iov_base; h->fsid = monc->monmap->fsid; - h->have = cpu_to_le32(monc->have_mdsmap); + h->want = cpu_to_le32(monc->want_mdsmap); msg->hdr.dst = monc->monmap->mon_inst[mon]; ceph_msg_send(monc->client->msgr, msg, 0); /* keep sending request until we receive mds map */ - if (monc->have_mdsmap) + if (monc->want_mdsmap) delayed_work(&monc->mds_delayed_work, &monc->mds_delay); } -void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u32 have) +void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u32 want) { - monc->mds_delay = BASE_DELAY_INTERVAL; - monc->have_mdsmap = have; - do_request_mdsmap(&monc->mds_delayed_work.work); + if (want > monc->want_mdsmap) { + monc->mds_delay = BASE_DELAY_INTERVAL; + monc->want_mdsmap = want; + do_request_mdsmap(&monc->mds_delayed_work.work); + } } -int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, __u32 have) +int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, __u32 got) { - dout(5, "ceph_monc_got_mdsmap calling cancel_delayed_work_sync\n"); - + if (got < monc->want_mdsmap) { + dout(5, "got_mdsmap got %u <= wanted %u\n", + got, monc->want_mdsmap); + return -EAGAIN; + } + + dout(5, "got_mdsmap have %u > wanted %u\n", + got, monc->want_mdsmap); + monc->want_mdsmap = 0; + /* we got map so take map request out of queue */ cancel_delayed_work_sync(&monc->mds_delayed_work); monc->mds_delay = BASE_DELAY_INTERVAL; - - if (have > monc->have_mdsmap) { - monc->have_mdsmap = 0; - dout(5, "ceph_monc_got_mdsmap have %u > wanted %u\n", - have, monc->have_mdsmap); - return 0; - } else { - dout(5, "ceph_monc_got_mdsmap have %u <= wanted %u *****\n", - have, monc->have_mdsmap); - return -EAGAIN; - } + return 0; } @@ -169,7 +169,7 @@ static void do_request_osdmap(struct work_struct *work) void ceph_monc_request_osdmap(struct ceph_mon_client *monc, __u32 have) { - dout(5, "ceph_monc_request_osdmap have %u\n", have); + dout(5, "request_osdmap have %u\n", have); monc->osd_delay = BASE_DELAY_INTERVAL; monc->have_osdmap = have; do_request_osdmap(&monc->osd_delayed_work.work); @@ -178,17 +178,16 @@ void ceph_monc_request_osdmap(struct ceph_mon_client *monc, __u32 have) int ceph_monc_got_osdmap(struct ceph_mon_client *monc, __u32 got) { if (got <= monc->have_osdmap) { - dout(5, "ceph_monc_got_osdmap got %u <= had %u, will retry\n", + dout(5, "got_osdmap got %u <= had %u, will retry\n", got, monc->have_osdmap); return -EAGAIN; } /* we got map so take map request out of queue */ - dout(5, "ceph_monc_got_osdmap got %u > had %u\n", - got, monc->have_osdmap); + dout(5, "got_osdmap got %u > had %u\n", got, monc->have_osdmap); + monc->have_osdmap = 0; cancel_delayed_work_sync(&monc->osd_delayed_work); monc->osd_delay = BASE_DELAY_INTERVAL; - monc->have_osdmap = 0; return 0; } @@ -223,7 +222,7 @@ void ceph_monc_request_umount(struct ceph_mon_client *monc) void ceph_monc_handle_umount(struct ceph_mon_client *monc, struct ceph_msg *msg) { - dout(5, "ceph_monc_handle_umount\n"); + dout(5, "handle_umount\n"); cancel_delayed_work_sync(&monc->umount_delayed_work); monc->client->mount_state = CEPH_MOUNT_UNMOUNTED; wake_up(&monc->client->mount_wq); @@ -314,7 +313,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) { - dout(5, "ceph_monc_init\n"); + dout(5, "init\n"); memset(monc, 0, sizeof(*monc)); monc->client = cl; monc->monmap = kzalloc(sizeof(struct ceph_monmap), GFP_KERNEL); @@ -326,7 +325,15 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) INIT_DELAYED_WORK(&monc->osd_delayed_work, do_request_osdmap); INIT_DELAYED_WORK(&monc->umount_delayed_work, do_request_umount); monc->last_tid = 0; - monc->have_mdsmap = 0; + monc->want_mdsmap = 0; monc->have_osdmap = 0; return 0; } + +void ceph_monc_stop(struct ceph_mon_client *monc) +{ + dout(5, "stop\n"); + cancel_delayed_work_sync(&monc->mds_delayed_work); + cancel_delayed_work_sync(&monc->osd_delayed_work); + cancel_delayed_work_sync(&monc->umount_delayed_work); +} diff --git a/src/kernel/mon_client.h b/src/kernel/mon_client.h index a0ddce5864bdc..6db226e5e49da 100644 --- a/src/kernel/mon_client.h +++ b/src/kernel/mon_client.h @@ -39,7 +39,7 @@ struct ceph_mon_client { unsigned long osd_delay; unsigned long umount_delay; - u32 have_mdsmap; /* protected by caller's lock */ + u32 want_mdsmap; /* protected by caller's lock */ u32 have_osdmap; /* protected by caller's lock */ }; @@ -48,8 +48,9 @@ extern int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr); extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl); +extern void ceph_monc_stop(struct ceph_mon_client *monc); -extern void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u32 have); +extern void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u32 want); extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, __u32 have); extern void ceph_monc_request_osdmap(struct ceph_mon_client *monc, __u32 have); diff --git a/src/kernel/super.c b/src/kernel/super.c index fc8b147bfeb91..260e7cc513848 100644 --- a/src/kernel/super.c +++ b/src/kernel/super.c @@ -599,6 +599,7 @@ void ceph_destroy_client(struct ceph_client *cl) /* unmount */ /* ... */ + ceph_monc_stop(&cl->monc); ceph_osdc_stop(&cl->osdc); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) diff --git a/src/messages/MMDSGetMap.h b/src/messages/MMDSGetMap.h index da34e4128ccff..ae47b4ce6bf32 100644 --- a/src/messages/MMDSGetMap.h +++ b/src/messages/MMDSGetMap.h @@ -23,24 +23,27 @@ class MMDSGetMap : public Message { public: ceph_fsid fsid; - epoch_t have; + epoch_t want; MMDSGetMap() {} - MMDSGetMap(ceph_fsid &f, epoch_t h=0) : + MMDSGetMap(ceph_fsid &f, epoch_t w=0) : Message(CEPH_MSG_MDS_GETMAP), fsid(f), - have(h) { } + want(w) { } const char *get_type_name() { return "mds_getmap"; } + void print(ostream& out) { + out << "mds_getmap(want " << want << ")"; + } void encode_payload() { ::_encode_simple(fsid, payload); - ::_encode_simple(have, payload); + ::_encode_simple(want, payload); } void decode_payload() { bufferlist::iterator p = payload.begin(); ::_decode_simple(fsid, p); - ::_decode_simple(have, p); + ::_decode_simple(want, p); } }; diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 4de75ce3f011d..ae697cd8722f8 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -199,7 +199,7 @@ bool MDSMonitor::preprocess_query(Message *m) void MDSMonitor::handle_mds_getmap(MMDSGetMap *m) { - if (m->have < mdsmap.get_epoch()) + if (m->want <= mdsmap.get_epoch()) send_full(m->get_source_inst()); else waiting_for_map.push_back(m->get_source_inst()); -- 2.39.5