]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
kclient: unify mount timeouts
authorSage Weil <sage@newdream.net>
Sat, 20 Sep 2008 20:34:11 +0000 (13:34 -0700)
committerSage Weil <sage@newdream.net>
Sat, 20 Sep 2008 20:34:11 +0000 (13:34 -0700)
Make both mon map and mount messages and mds session, root inode open time out.  Previously the mon interaction would time out but we could hang on a non-responsive (e.g., not active) mds.

src/TODO
src/kernel/mds_client.c
src/kernel/mds_client.h
src/kernel/super.c
src/kernel/super.h

index e9f613ee9535d1ba2d30fb1b0acd5bdd1396834c..0cc1d038885c13f31ed0447440f931d6e180509f 100644 (file)
--- a/src/TODO
+++ b/src/TODO
@@ -51,7 +51,6 @@ kernel client
 - set mapping bits for ENOSPC, EIO?
 - flush caps on sync, fsync, etc.
   - do we need to block?
-- timeout root inode open on mount
 - timeout session close on umount
 - forced unmount?
 - deal with CAP_RDCACHE properly: are we invalidating cache pages correctly?
index 8f1b33d4c50d9f2e47b10345445da1788b25f0c2..6d6afcaf9d6067769305f37334262db4bfc079f5 100644 (file)
@@ -386,6 +386,8 @@ static struct ceph_mds_request *new_request(struct ceph_msg *msg)
        req = kzalloc(sizeof(*req), GFP_NOFS);
        req->r_request = msg;
        req->r_reply = 0;
+       req->r_timeout = 0;
+       req->r_started = jiffies;
        req->r_err = 0;
        req->r_direct_dentry = 0;
        req->r_direct_mode = USE_ANY_MDS;
@@ -549,30 +551,44 @@ static struct ceph_msg *create_session_msg(__u32 op, __u64 seq)
        return msg;
 }
 
-static void wait_for_new_map(struct ceph_mds_client *mdsc)
+static int wait_for_new_map(struct ceph_mds_client *mdsc,
+                            unsigned long timeout)
 {
        __u32 have;
+       int err = 0;
+
        dout(30, "wait_for_new_map enter\n");
        have = mdsc->mdsmap->m_epoch;
        mutex_unlock(&mdsc->mutex);
        ceph_monc_request_mdsmap(&mdsc->client->monc, have+1);
-       wait_for_completion(&mdsc->map_waiters);
+       if (timeout) {
+               err = wait_for_completion_timeout(&mdsc->map_waiters, timeout);
+               if (err > 0)
+                       err = 0;
+               else if (err == 0)
+                       err = -EIO;
+       } else
+               wait_for_completion(&mdsc->map_waiters);
        mutex_lock(&mdsc->mutex);
-       dout(30, "wait_for_new_map exit\n");
+       dout(30, "wait_for_new_map err %d\n", err);
+       return err;
 }
 
 static int open_session(struct ceph_mds_client *mdsc,
-                       struct ceph_mds_session *session)
+                       struct ceph_mds_session *session, unsigned long timeout)
 {
        struct ceph_msg *msg;
        int mstate;
        int mds = session->s_mds;
+       int err = 0;
 
        /* mds active? */
        mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
        dout(10, "open_session to mds%d, state %d\n", mds, mstate);
        if (mstate < CEPH_MDS_STATE_ACTIVE) {
-               wait_for_new_map(mdsc);
+               err = wait_for_new_map(mdsc, timeout);
+               if (err)
+                       return err;
                mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
                if (mstate < CEPH_MDS_STATE_ACTIVE) {
                        dout(30, "open_session mds%d now %d still not active\n",
@@ -593,12 +609,20 @@ static int open_session(struct ceph_mds_client *mdsc,
 
        /* wait for session to open (or fail, or close) */
        dout(30, "open_session waiting on session %p\n", session);
-       wait_for_completion(&session->s_completion);
+       if (timeout) {
+               err = wait_for_completion_timeout(&session->s_completion,
+                                                 timeout);
+               if (err > 0)
+                       err = 0;
+               else if (err == 0)
+                       err = -EIO;
+       } else
+               wait_for_completion(&session->s_completion);
        dout(30, "open_session done waiting on session %p, state %d\n",
             session, session->s_state);
 
        mutex_lock(&mdsc->mutex);
-       return 0;
+       return err;
 }
 
 /*
@@ -1026,10 +1050,19 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
        mutex_lock(&mdsc->mutex);
        __register_request(mdsc, req);
 retry:
+       if (req->r_timeout &&
+           time_after_eq(jiffies, req->r_started + req->r_timeout)) {
+               dout(10, "do_request timed out\n");
+               err = -EIO;
+               goto finish;
+       }
+
        mds = choose_mds(mdsc, req);
        if (mds < 0) {
                dout(30, "do_request waiting for new mdsmap\n");
-               wait_for_new_map(mdsc);
+               err = wait_for_new_map(mdsc, req->r_timeout);
+               if (err)
+                       return err;
                goto retry;
        }
 
@@ -1044,9 +1077,8 @@ retry:
        err = 0;
        if (session->s_state == CEPH_MDS_SESSION_NEW ||
            session->s_state == CEPH_MDS_SESSION_CLOSING) {
-               err = open_session(mdsc, session);
+               err = open_session(mdsc, session, req->r_timeout);
                dout(30, "do_request open_session err=%d\n", err);
-               BUG_ON(err && err != -EAGAIN);
        }
        if (session->s_state != CEPH_MDS_SESSION_OPEN ||
                err == -EAGAIN) {
@@ -1073,29 +1105,37 @@ retry:
        req->r_request = ceph_msg_maybe_dup(req->r_request);
        ceph_msg_get(req->r_request);
        ceph_send_msg_mds(mdsc, req->r_request, mds);
-       wait_for_completion(&req->r_completion);
+       if (req->r_timeout) {
+               err = wait_for_completion_timeout(&req->r_completion,
+                                                 req->r_timeout);
+               if (err > 0)
+                       err = 0;
+               else if (err == 0)
+                       err = -EIO;
+       } else {
+               err = 0;
+               wait_for_completion(&req->r_completion);
+       }
        mutex_lock(&mdsc->mutex);
-       if (req->r_reply == NULL) {
+       if (req->r_reply == NULL && !err) {
                put_request_sessions(req);
                goto retry;
        }
-
-       /* clean up request, parse reply */
-       __unregister_request(mdsc, req);
-       mutex_unlock(&mdsc->mutex);
-
        if (IS_ERR(req->r_reply)) {
                err = PTR_ERR(req->r_reply);
                req->r_reply = 0;
-               dout(10, "do_request returning err %d from reply handler\n",
-                    err);
-               return err;
        }
+       if (!err)
+               err = le32_to_cpu(req->r_reply_info.head->result);
+
+       /* clean up request, parse reply */
+finish:
+       __unregister_request(mdsc, req);
+       mutex_unlock(&mdsc->mutex);
 
        ceph_msg_put(req->r_request);
        req->r_request = 0;
 
-       err = le32_to_cpu(req->r_reply_info.head->result);
        dout(30, "do_request done on %p result %d\n", req, err);
        return err;
 }
index 36d4b5a1060a42c363a84c89aa046ee3b9156857..0cdab3b13abbc67f36a279c18243599c1addbdad 100644 (file)
@@ -91,6 +91,8 @@ struct ceph_mds_request {
        struct ceph_msg  *r_reply;
        struct ceph_mds_reply_info r_reply_info;
        int r_err;
+       unsigned long r_timeout;  /* optional.  jiffies */
+       unsigned long r_started;  /* this must be set if r_timeout */
 
        /* to direct request */
        struct dentry *r_direct_dentry;
index 598dd24b7144c4ed565662cd68dac2c3aa544f07..f96caab3ec755b1923e592116bf35d6d2c9fca1f 100644 (file)
@@ -367,7 +367,7 @@ enum {
        Opt_port,
        Opt_wsize,
        Opt_osdtimeout,
-       Opt_mount_attempts,
+       Opt_mount_timeout,
        /* int args above */
        Opt_ip,
        Opt_unsafewrites,
@@ -393,7 +393,7 @@ static match_table_t arg_tokens = {
        {Opt_port, "port=%d"},
        {Opt_wsize, "wsize=%d"},
        {Opt_osdtimeout, "osdtimeout=%d"},
-       {Opt_mount_attempts, "mount_attempts=%d"},
+       {Opt_mount_timeout, "mount_timeout=%d"},
        /* int args above */
        {Opt_ip, "ip=%s"},
        {Opt_debug_console, "debug_console"},
@@ -456,8 +456,8 @@ static int parse_mount_args(int flags, char *options, const char *dev_name,
        /* defaults */
        args->sb_flags = flags;
        args->flags = CEPH_MOUNT_DEFAULT;
-       args->osd_timeout = 5;  /* seconds */
-       args->mount_attempts = 2;  /* 2 attempts */
+       args->osd_timeout = 5;    /* seconds */
+       args->mount_timeout = 30; /* seconds */
        args->snapdir_name = ".snap";
 
        /* ip1[,ip2...]:/server/path */
@@ -567,8 +567,8 @@ static int parse_mount_args(int flags, char *options, const char *dev_name,
                case Opt_osdtimeout:
                        args->osd_timeout = intval;
                        break;
-               case Opt_mount_attempts:
-                       args->mount_attempts = intval;
+               case Opt_mount_timeout:
+                       args->mount_timeout = intval;
                        break;
                case Opt_unsafewrites:
                        args->flags |= CEPH_MOUNT_UNSAFE_WRITES;
@@ -671,7 +671,8 @@ static int have_all_maps(struct ceph_client *client)
 }
 
 static struct dentry *open_root_dentry(struct ceph_client *client,
-                                      const char *path)
+                                      const char *path,
+                                      unsigned long started)
 {
        struct ceph_mds_client *mdsc = &client->mdsc;
        struct ceph_mds_request *req = 0;
@@ -686,6 +687,8 @@ static struct dentry *open_root_dentry(struct ceph_client *client,
                                       NULL, USE_ANY_MDS);
        if (IS_ERR(req))
                return ERR_PTR(PTR_ERR(req));
+       req->r_started = started;
+       req->r_timeout = client->mount_args.mount_timeout * HZ;
        req->r_expects_cap = 1;
        reqhead = req->r_request->front.iov_base;
        reqhead->args.open.flags = O_DIRECTORY;
@@ -711,7 +714,8 @@ int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
        struct ceph_msg *mount_msg;
        struct dentry *root;
        int err;
-       int attempts = client->mount_args.mount_attempts;
+       unsigned long timeout = client->mount_args.mount_timeout * HZ;
+       unsigned long started = jiffies;
        int which;
        char r;
 
@@ -736,10 +740,9 @@ int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
        
        while (!have_all_maps(client)) {
                err = -EIO;
-               if (attempts == 0)
+               if (timeout && time_after_eq(jiffies, started + timeout))
                        goto out;
-               dout(10, "mount sending mount request, %d attempts left\n",
-                    attempts--);
+               dout(10, "mount sending mount request\n");
                get_random_bytes(&r, 1);
                which = r % client->mount_args.num_mon;
                mount_msg = ceph_msg_new(CEPH_MSG_CLIENT_MOUNT, 0, 0, 0, 0);
@@ -753,21 +756,20 @@ int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
                mount_msg->hdr.dst.addr = client->mount_args.mon_addr[which];
 
                ceph_msg_send(client->msgr, mount_msg, 0);
-               dout(10, "mount from mon%d, %d attempts left\n",
-                    which, attempts);
+               dout(10, "mount from mon%d\n", which);
 
                /* wait */
                dout(10, "mount sent mount request, waiting for maps\n");
                err = wait_event_interruptible_timeout(client->mount_wq,
                                                       have_all_maps(client),
-                                                      6*HZ);
+                                                      5*HZ);
                dout(10, "mount wait got %d\n", err);
                if (err == -EINTR)
                        goto out;
        }
 
        dout(30, "mount opening base mountpoint\n");
-       root = open_root_dentry(client, path);
+       root = open_root_dentry(client, path, started);
        if (IS_ERR(root)) {
                err = PTR_ERR(root);
                goto out;
index e4d56cc53ac75a9fbd97a46674298586470975d5..c4613c0e99ced9ea4d14a7a01dd5f162938498bd 100644 (file)
@@ -101,7 +101,7 @@ static inline unsigned long time_sub(unsigned long a, unsigned long b)
 struct ceph_mount_args {
        int sb_flags;
        int flags;
-       int mount_attempts;
+       int mount_timeout;
        struct ceph_fsid fsid;
        struct ceph_entity_addr my_addr;
        int num_mon;