]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
fix up mds map requests, kclient mds timeouts
authorSage Weil <sage@newdream.net>
Thu, 8 May 2008 21:07:27 +0000 (14:07 -0700)
committerSage Weil <sage@newdream.net>
Thu, 8 May 2008 21:07:27 +0000 (14:07 -0700)
src/include/ceph_fs.h
src/kernel/mds_client.c
src/kernel/mds_client.h
src/kernel/mon_client.c
src/kernel/mon_client.h
src/kernel/super.c
src/messages/MMDSGetMap.h
src/mon/MDSMonitor.cc

index 08c41886427924bcdfb55a114221e2b5f39e5463..eb8e03c3dd78e2c105753c9c37328367261df7cd 100644 (file)
@@ -308,7 +308,7 @@ struct ceph_osd_getmap {
 
 struct ceph_mds_getmap {
        struct ceph_fsid fsid;
-       __le32 have;
+       __le32 want;
 } __attribute__ ((packed));
 
 
index b99cdbc69e1eaf8a71989da40caf69563ae51ece..ae2510971ba4caacd6cc45044e6951e88dc0fe11 100644 (file)
@@ -289,6 +289,7 @@ __register_session(struct ceph_mds_client *mdsc, int mds)
 
        s = kmalloc(sizeof(struct ceph_mds_session), GFP_NOFS);
        s->s_mds = mds;
+       s->s_ttl = 0;
        s->s_state = CEPH_MDS_SESSION_NEW;
        s->s_seq = 0;
        mutex_init(&s->s_mutex);
@@ -494,12 +495,8 @@ static void wait_for_new_map(struct ceph_mds_client *mdsc)
        __u32 have;
        dout(30, "wait_for_new_map enter\n");
        have = mdsc->mdsmap->m_epoch;
-       if (mdsc->last_requested_map < mdsc->mdsmap->m_epoch) {
-               mdsc->last_requested_map = have;
-               spin_unlock(&mdsc->lock);
-               ceph_monc_request_mdsmap(&mdsc->client->monc, have);
-       } else
-               spin_unlock(&mdsc->lock);
+       spin_unlock(&mdsc->lock);
+       ceph_monc_request_mdsmap(&mdsc->client->monc, have+1);
        wait_for_completion(&mdsc->map_waiters);
        spin_lock(&mdsc->lock);
        dout(30, "wait_for_new_map exit\n");
@@ -777,6 +774,8 @@ void ceph_mdsc_handle_session(struct ceph_mds_client *mdsc,
        /* handle */
        spin_lock(&mdsc->lock);
        session = __get_session(mdsc, mds);
+       if (session && mdsc->mdsmap)
+               session->s_ttl = jiffies + mdsc->mdsmap->m_session_autoclose;
        spin_unlock(&mdsc->lock);
 
        mutex_lock(&session->s_mutex);
@@ -1812,6 +1811,7 @@ static void delayed_work(struct work_struct *work)
        int renew_interval = mdsc->mdsmap->m_cap_bit_timeout >> 2;
        int renew_caps = time_after_eq(jiffies, HZ*renew_interval + 
                                       mdsc->last_renew_caps);
+       u32 want_map = 0;
 
        dout(10, "delayed_work on %p renew_caps=%d\n", mdsc, renew_caps);
 
@@ -1825,6 +1825,11 @@ static void delayed_work(struct work_struct *work)
                struct ceph_mds_session *session = __get_session(mdsc, i);
                if (session == 0)
                        continue;
+               if (session->s_ttl && time_after(jiffies, session->s_ttl)) {
+                       derr(1, "mds%d session probably timed out, "
+                            "requesting mds map\n", session->s_mds);
+                       want_map = mdsc->mdsmap->m_epoch;
+               }
                if (session->s_state < CEPH_MDS_SESSION_OPEN) {
                        put_session(session);
                        continue;
@@ -1841,6 +1846,10 @@ static void delayed_work(struct work_struct *work)
                spin_lock(&mdsc->lock);
        }
        spin_unlock(&mdsc->lock);
+
+       if (want_map)
+               ceph_monc_request_mdsmap(&mdsc->client->monc, want_map);
+
        schedule_delayed(mdsc);
 }
 
@@ -1854,7 +1863,6 @@ void ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
        mdsc->max_sessions = 0;
        mdsc->last_tid = 0;
        INIT_RADIX_TREE(&mdsc->request_tree, GFP_ATOMIC);
-       mdsc->last_requested_map = 0;
        init_completion(&mdsc->map_waiters);
        init_completion(&mdsc->session_close_waiters);
        INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
@@ -1976,6 +1984,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 
        /* do we need it? */
        spin_lock(&mdsc->lock);
+       ceph_monc_got_mdsmap(&mdsc->client->monc, epoch);
        if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
                dout(2, "ceph_mdsc_handle_map epoch %u < our %u\n",
                     epoch, mdsc->mdsmap->m_epoch);
@@ -2016,9 +2025,6 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
                mdsc->mdsmap = newmap;
        }
 
-       /* stop asking */
-       ceph_monc_got_mdsmap(&mdsc->client->monc, newmap->m_epoch);
-
        spin_unlock(&mdsc->lock);
 
        /* (re)schedule work */
index 9b62a30282b1ba45c09c6715b9b528845870fb69..7f2a3ece624c5134c7197b0f9e50d55fad988164 100644 (file)
@@ -54,6 +54,7 @@ enum {
 struct ceph_mds_session {
        int               s_mds;
        int               s_state;
+       unsigned long     s_ttl;      /* time until mds kills us */
        __u64             s_seq;      /* incoming msg seq # */
        struct mutex      s_mutex;
        spinlock_t        s_cap_lock; /* protects s_cap_gen, s_cap_ttl */
@@ -103,7 +104,6 @@ struct ceph_mds_client {
        int                     max_sessions;  /* len of s_mds_sessions */
        __u64                   last_tid;      /* most recent mds request */
        struct radix_tree_root  request_tree;  /* pending mds requests */
-       __u64                   last_requested_map;
        struct completion       map_waiters, session_close_waiters;
        struct delayed_work     delayed_work;  /* delayed work */
        unsigned long last_renew_caps;
index e47849b29dfc0e52c706c4e92f22fdc7473b5b2e..7b7143a75b3bb44d0fa3246f1a86d72ec8f8363f 100644 (file)
@@ -95,48 +95,48 @@ static void do_request_mdsmap(struct work_struct *work)
                             mds_delayed_work.work);
        int mon = pick_mon(monc, -1);
 
-       dout(5, "request_mdsmap from mon%d have %u\n", mon, monc->have_mdsmap);
+       dout(5, "request_mdsmap from mon%d want %u\n", mon, monc->want_mdsmap);
 
        msg = ceph_msg_new(CEPH_MSG_MDS_GETMAP, sizeof(*h), 0, 0, 0);
        if (IS_ERR(msg))
                return;
        h = msg->front.iov_base;
        h->fsid = monc->monmap->fsid;
-       h->have = cpu_to_le32(monc->have_mdsmap);
+       h->want = cpu_to_le32(monc->want_mdsmap);
        msg->hdr.dst = monc->monmap->mon_inst[mon];
 
        ceph_msg_send(monc->client->msgr, msg, 0);
 
        /* keep sending request until we receive mds map */
-       if (monc->have_mdsmap)
+       if (monc->want_mdsmap)
                delayed_work(&monc->mds_delayed_work, &monc->mds_delay);
 }
 
-void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u32 have)
+void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u32 want)
 {
-       monc->mds_delay = BASE_DELAY_INTERVAL;
-       monc->have_mdsmap = have;
-       do_request_mdsmap(&monc->mds_delayed_work.work);
+       if (want > monc->want_mdsmap) {
+               monc->mds_delay = BASE_DELAY_INTERVAL;
+               monc->want_mdsmap = want;
+               do_request_mdsmap(&monc->mds_delayed_work.work);
+       }
 }
 
-int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, __u32 have)
+int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, __u32 got)
 {
-       dout(5, "ceph_monc_got_mdsmap calling cancel_delayed_work_sync\n");
-
+       if (got < monc->want_mdsmap) {
+               dout(5, "got_mdsmap got %u <= wanted %u\n",
+                    got, monc->want_mdsmap);
+               return -EAGAIN;
+       }
+       
+       dout(5, "got_mdsmap have %u > wanted %u\n",
+            got, monc->want_mdsmap);
+       monc->want_mdsmap = 0;
+       
        /* we got map so take map request out of queue */
        cancel_delayed_work_sync(&monc->mds_delayed_work);
        monc->mds_delay = BASE_DELAY_INTERVAL;
-
-       if (have > monc->have_mdsmap) {
-               monc->have_mdsmap = 0;
-               dout(5, "ceph_monc_got_mdsmap have %u > wanted %u\n",
-                    have, monc->have_mdsmap);
-               return 0;
-       } else {
-               dout(5, "ceph_monc_got_mdsmap have %u <= wanted %u *****\n",
-                    have, monc->have_mdsmap);
-               return -EAGAIN;
-       }
+       return 0;
 }
 
 
@@ -169,7 +169,7 @@ static void do_request_osdmap(struct work_struct *work)
 
 void ceph_monc_request_osdmap(struct ceph_mon_client *monc, __u32 have)
 {
-       dout(5, "ceph_monc_request_osdmap have %u\n", have);
+       dout(5, "request_osdmap have %u\n", have);
        monc->osd_delay = BASE_DELAY_INTERVAL;
        monc->have_osdmap = have;
        do_request_osdmap(&monc->osd_delayed_work.work);
@@ -178,17 +178,16 @@ void ceph_monc_request_osdmap(struct ceph_mon_client *monc, __u32 have)
 int ceph_monc_got_osdmap(struct ceph_mon_client *monc, __u32 got)
 {
        if (got <= monc->have_osdmap) {
-               dout(5, "ceph_monc_got_osdmap got %u <= had %u, will retry\n",
+               dout(5, "got_osdmap got %u <= had %u, will retry\n",
                     got, monc->have_osdmap);
                return -EAGAIN;
        }
 
        /* we got map so take map request out of queue */
-       dout(5, "ceph_monc_got_osdmap got %u > had %u\n",
-            got, monc->have_osdmap);
+       dout(5, "got_osdmap got %u > had %u\n", got, monc->have_osdmap);
+       monc->have_osdmap = 0;
        cancel_delayed_work_sync(&monc->osd_delayed_work);
        monc->osd_delay = BASE_DELAY_INTERVAL;
-       monc->have_osdmap = 0;
        return 0;
 }
 
@@ -223,7 +222,7 @@ void ceph_monc_request_umount(struct ceph_mon_client *monc)
 void ceph_monc_handle_umount(struct ceph_mon_client *monc,
                             struct ceph_msg *msg)
 {
-       dout(5, "ceph_monc_handle_umount\n");
+       dout(5, "handle_umount\n");
        cancel_delayed_work_sync(&monc->umount_delayed_work);
        monc->client->mount_state = CEPH_MOUNT_UNMOUNTED;
        wake_up(&monc->client->mount_wq);
@@ -314,7 +313,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
 
 int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
 {
-       dout(5, "ceph_monc_init\n");
+       dout(5, "init\n");
        memset(monc, 0, sizeof(*monc));
        monc->client = cl;
        monc->monmap = kzalloc(sizeof(struct ceph_monmap), GFP_KERNEL);
@@ -326,7 +325,15 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
        INIT_DELAYED_WORK(&monc->osd_delayed_work, do_request_osdmap);
        INIT_DELAYED_WORK(&monc->umount_delayed_work, do_request_umount);
        monc->last_tid = 0;
-       monc->have_mdsmap = 0;
+       monc->want_mdsmap = 0;
        monc->have_osdmap = 0;
        return 0;
 }
+
+void ceph_monc_stop(struct ceph_mon_client *monc)
+{
+       dout(5, "stop\n");
+       cancel_delayed_work_sync(&monc->mds_delayed_work);
+       cancel_delayed_work_sync(&monc->osd_delayed_work);
+       cancel_delayed_work_sync(&monc->umount_delayed_work);
+}
index a0ddce5864bdc7232064203830f999465b129339..6db226e5e49daefeebc5789ebdb9f2c9d17912c1 100644 (file)
@@ -39,7 +39,7 @@ struct ceph_mon_client {
        unsigned long osd_delay;
        unsigned long umount_delay;
 
-       u32 have_mdsmap;  /* protected by caller's lock */
+       u32 want_mdsmap;  /* protected by caller's lock */
        u32 have_osdmap;  /* protected by caller's lock */
 };
 
@@ -48,8 +48,9 @@ extern int ceph_monmap_contains(struct ceph_monmap *m,
                                struct ceph_entity_addr *addr);
 
 extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
+extern void ceph_monc_stop(struct ceph_mon_client *monc);
 
-extern void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u32 have);
+extern void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, __u32 want);
 extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, __u32 have);
 
 extern void ceph_monc_request_osdmap(struct ceph_mon_client *monc, __u32 have);
index fc8b147bfeb91e1d19e92c7efa7b53ccbe282694..260e7cc513848f409a0a0ce15cb7d76240175425 100644 (file)
@@ -599,6 +599,7 @@ void ceph_destroy_client(struct ceph_client *cl)
        /* unmount */
        /* ... */
 
+       ceph_monc_stop(&cl->monc);
        ceph_osdc_stop(&cl->osdc);
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25)
index da34e4128ccffef5f5f58ea8dfdd7d14cb070ed6..ae47b4ce6bf32545317822bd580dbd051c5dd6f8 100644 (file)
 class MMDSGetMap : public Message {
  public:
   ceph_fsid fsid;
-  epoch_t have;
+  epoch_t want;
 
   MMDSGetMap() {}
-  MMDSGetMap(ceph_fsid &f, epoch_t h=0) : 
+  MMDSGetMap(ceph_fsid &f, epoch_t w=0) : 
     Message(CEPH_MSG_MDS_GETMAP), 
     fsid(f),
-    have(h) { }
+    want(w) { }
 
   const char *get_type_name() { return "mds_getmap"; }
+  void print(ostream& out) {
+    out << "mds_getmap(want " << want << ")";
+  }
   
   void encode_payload() {
     ::_encode_simple(fsid, payload);
-    ::_encode_simple(have, payload);
+    ::_encode_simple(want, payload);
   }
   void decode_payload() {
     bufferlist::iterator p = payload.begin();
     ::_decode_simple(fsid, p);
-    ::_decode_simple(have, p);
+    ::_decode_simple(want, p);
   }
 };
 
index 4de75ce3f011dc1e3e5ddd1ed72e068c20816087..ae697cd8722f83508dbfde8f3fa1223569a8238f 100644 (file)
@@ -199,7 +199,7 @@ bool MDSMonitor::preprocess_query(Message *m)
 
 void MDSMonitor::handle_mds_getmap(MMDSGetMap *m)
 {
-  if (m->have < mdsmap.get_epoch())
+  if (m->want <= mdsmap.get_epoch())
     send_full(m->get_source_inst());
   else
     waiting_for_map.push_back(m->get_source_inst());