]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
kclient: initiate a sync when doing writepages for O_SYNC writers
authorSage Weil <sage@newdream.net>
Fri, 23 Jan 2009 22:18:16 +0000 (14:18 -0800)
committerSage Weil <sage@newdream.net>
Fri, 23 Jan 2009 22:18:16 +0000 (14:18 -0800)
Include 'startsync' osd op in write if there are any O_SYNC writers
pending on the inode.

We should eventually do the same if our WRBUFFER cap is being
revoked.

src/include/ceph_fs.h
src/kernel/addr.c
src/kernel/file.c
src/kernel/inode.c
src/kernel/osd_client.c
src/kernel/osd_client.h
src/kernel/super.h

index 5c6c456a1ab486a224d5f67db6ab9a64fee9d7d3..3d17a7a83088734cce53fcd1355cee053789d92f 100644 (file)
@@ -1254,6 +1254,10 @@ static inline const char *ceph_osd_op_name(int op)
        case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
        case CEPH_OSD_OP_SCRUB: return "scrub";
 
+       case CEPH_OSD_OP_GREP: return "grep";
+       case CEPH_OSD_OP_APPEND: return "append";
+       case CEPH_OSD_OP_STARTSYNC: return "startsync";
+
        default: return "???";
        }
 }
index 5d3cdb56c9fa3c09f289675db5dc2bfc60ce9a6c..eeb3cbf07d5d9fba69f0ebc102405cb78f8cd8cf 100644 (file)
@@ -543,6 +543,7 @@ static int ceph_writepages_start(struct address_space *mapping,
        int rc = 0;
        unsigned wsize = 1 << inode->i_blkbits;
        struct ceph_osd_request *req = NULL;
+       int do_sync = atomic_read(&ci->i_want_sync_writeout);
 
        client = ceph_inode_to_client(inode);
        if (client->mount_state == CEPH_MOUNT_SHUTDOWN) {
@@ -700,7 +701,8 @@ get_more_pages:
                                                            ceph_vino(inode),
                                                            offset, &len,
                                                            CEPH_OSD_OP_WRITE,
-                                                           snapc);
+                                                           snapc,
+                                                           do_sync);
                                max_pages = req->r_num_pages;
                                pages = req->r_pages;
                                req->r_callback = writepages_finish;
index 018618da8e4f63ebb202c94a62e573d887becb6f..857b3d4f69e15dc9cca4460cfde4fba91fc80ff8 100644 (file)
@@ -370,6 +370,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
        loff_t endoff = pos + iov->iov_len;
        int got = 0;
        int ret;
+       int do_sync = (file->f_flags & O_SYNC) || IS_SYNC(inode);
 
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
@@ -396,12 +397,16 @@ retry_snap:
                ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
                        &iocb->ki_pos);
        } else {
+               if (do_sync)
+                       atomic_inc(&ci->i_want_sync_writeout);
                ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
 
                if (ret >= 0 &&
                    ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL)) {
                        ret = sync_page_range(inode, mapping, pos, ret);
                }
+               if (do_sync)
+                       atomic_dec(&ci->i_want_sync_writeout);
        }
        if (ret >= 0)
                ci->i_dirty_caps |= CEPH_CAP_FILE_WR;
index e302f709d43482a82c0a928a19121d8d7bceb3cc..c5d18f42a1854ab3befe4445f7afbde1f6cdb485 100644 (file)
@@ -260,20 +260,24 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 
        ci->i_caps = RB_ROOT;
        ci->i_dirty_caps = 0;
-       for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
-               ci->i_nr_by_mode[i] = 0;
        init_waitqueue_head(&ci->i_cap_wq);
+       ci->i_hold_caps_until = 0;
+       INIT_LIST_HEAD(&ci->i_cap_delay_list);
+       ci->i_cap_exporting_mds = 0;
+       ci->i_cap_exporting_mseq = 0;
+       ci->i_cap_exporting_issued = 0;
        INIT_LIST_HEAD(&ci->i_cap_snaps);
-       ci->i_snap_caps = 0;
        ci->i_head_snapc = NULL;
+       ci->i_snap_caps = 0;
 
+       for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
+               ci->i_nr_by_mode[i] = 0;
+
+       ci->i_max_size = 0;
+       ci->i_reported_size = 0;
        ci->i_wanted_max_size = 0;
        ci->i_requested_max_size = 0;
 
-       ci->i_cap_exporting_mds = 0;
-       ci->i_cap_exporting_mseq = 0;
-       ci->i_cap_exporting_issued = 0;
-
        ci->i_rd_ref = 0;
        ci->i_rdcache_ref = 0;
        ci->i_wr_ref = 0;
@@ -281,8 +285,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        ci->i_wrbuffer_ref_head = 0;
        ci->i_rdcache_gen = 0;
        ci->i_rdcache_revoking = 0;
-       ci->i_hold_caps_until = 0;
-       INIT_LIST_HEAD(&ci->i_cap_delay_list);
+       atomic_set(&ci->i_want_sync_writeout, 0);
 
        ci->i_snap_realm = NULL;
        INIT_LIST_HEAD(&ci->i_snap_realm_item);
index 888a80f9b496c541a4139ad20c1fbb8b5d8de34a..acb6aadfa8702063852ced2761dfdeaa0bc85e35 100644 (file)
@@ -89,13 +89,14 @@ void ceph_osdc_put_request(struct ceph_osd_request *req)
  * build osd request message only.
  */
 static struct ceph_msg *new_request_msg(struct ceph_osd_client *osdc, short opc,
-                                       struct ceph_snap_context *snapc)
+                                       struct ceph_snap_context *snapc,
+                                       int do_sync)
 {
        struct ceph_msg *req;
        struct ceph_osd_request_head *head;
        struct ceph_osd_op *op;
        __le64 *snaps;
-       size_t size = sizeof(*head) + sizeof(*op);
+       size_t size = sizeof(*head) + (1 + do_sync)*sizeof(*op);
        int i;
 
        if (snapc)
@@ -111,9 +112,14 @@ static struct ceph_msg *new_request_msg(struct ceph_osd_client *osdc, short opc,
        /* encode head */
        head->client_inc = cpu_to_le32(1); /* always, for now. */
        head->flags = 0;
-       head->num_ops = cpu_to_le16(1);
+       head->num_ops = cpu_to_le16(1 + do_sync);
        op->op = cpu_to_le16(opc);
 
+       if (do_sync) {
+               op++;
+               op->op = cpu_to_le16(CEPH_OSD_OP_STARTSYNC);
+       }
+
        if (snapc) {
                head->snap_seq = cpu_to_le64(snapc->seq);
                head->num_snaps = cpu_to_le32(snapc->num_snaps);
@@ -131,7 +137,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
                                               struct ceph_file_layout *layout,
                                               struct ceph_vino vino,
                                               u64 off, u64 *plen, int op,
-                                              struct ceph_snap_context *snapc)
+                                              struct ceph_snap_context *snapc,
+                                              int do_sync)
 {
        struct ceph_osd_request *req;
        struct ceph_msg *msg;
@@ -143,7 +150,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
        if (req == NULL)
                return ERR_PTR(-ENOMEM);
 
-       msg = new_request_msg(osdc, op, snapc);
+       msg = new_request_msg(osdc, op, snapc, do_sync);
        if (IS_ERR(msg)) {
                kfree(req);
                return ERR_PTR(PTR_ERR(msg));
@@ -804,7 +811,7 @@ int ceph_osdc_sync_read(struct ceph_osd_client *osdc, struct ceph_vino vino,
 
 more:
        req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
-                                   CEPH_OSD_OP_READ, NULL);
+                                   CEPH_OSD_OP_READ, NULL, 0);
        if (IS_ERR(req))
                return PTR_ERR(req);
 
@@ -876,7 +883,7 @@ int ceph_osdc_readpage(struct ceph_osd_client *osdc, struct ceph_vino vino,
        dout(10, "readpage on ino %llx.%llx at %lld~%lld\n", vino.ino,
             vino.snap, off, len);
        req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
-                                   CEPH_OSD_OP_READ, NULL);
+                                   CEPH_OSD_OP_READ, NULL, 0);
        if (IS_ERR(req))
                return PTR_ERR(req);
        BUG_ON(len != PAGE_CACHE_SIZE);
@@ -920,7 +927,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
 
        /* alloc request, w/ optimistically-sized page vector */
        req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
-                                   CEPH_OSD_OP_READ, NULL);
+                                   CEPH_OSD_OP_READ, NULL, 0);
        if (IS_ERR(req))
                return PTR_ERR(req);
 
@@ -986,7 +993,7 @@ int ceph_osdc_sync_write(struct ceph_osd_client *osdc, struct ceph_vino vino,
 
 more:
        req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
-                                   CEPH_OSD_OP_WRITE, snapc);
+                                   CEPH_OSD_OP_WRITE, snapc, 0);
        if (IS_ERR(req))
                return PTR_ERR(req);
        reqm = req->r_request;
@@ -1068,7 +1075,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
        BUG_ON(vino.snap != CEPH_NOSNAP);
 
        req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
-                                   CEPH_OSD_OP_WRITE, snapc);
+                                   CEPH_OSD_OP_WRITE, snapc, 0);
        if (IS_ERR(req))
                return PTR_ERR(req);
        reqm = req->r_request;
index 7e6aff8caba9c362ccdbf5ee12611c4f16479ad5..db757484bc975282a359ebe5860dc574461c8a55 100644 (file)
@@ -96,7 +96,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
                                      struct ceph_file_layout *layout,
                                      struct ceph_vino vino,
                                      u64 offset, u64 *len, int op,
-                                     struct ceph_snap_context *snapc);
+                                     struct ceph_snap_context *snapc,
+                                     int do_sync);
 extern void ceph_osdc_put_request(struct ceph_osd_request *req);
 
 extern int ceph_osdc_readpage(struct ceph_osd_client *osdc,
index 9f043e0fd3b5a829336d89552103d4045e233e25..b6087e99cc08886429484e86a5450b0570081f56 100644 (file)
@@ -270,6 +270,8 @@ struct ceph_inode_info {
                                   pages. */
        u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
 
+       atomic_t i_want_sync_writeout; /* non-zero if writepages should sync */
+
        struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
        struct list_head i_snap_realm_item;
        struct list_head i_snap_flush_item;