From 311d3a49ff43ea8fa0f2f67a6e066731513278fa Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 23 Jan 2009 14:18:16 -0800 Subject: [PATCH] kclient: initiate a sync when doing writepages for O_SYNC writers Include 'startsync' osd op in write if there are any O_SYNC writers pending on the inode. We should eventually do the same if our WRBUFFER cap is being revoked. --- src/include/ceph_fs.h | 4 ++++ src/kernel/addr.c | 4 +++- src/kernel/file.c | 5 +++++ src/kernel/inode.c | 21 ++++++++++++--------- src/kernel/osd_client.c | 27 +++++++++++++++++---------- src/kernel/osd_client.h | 3 ++- src/kernel/super.h | 2 ++ 7 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 5c6c456a1ab48..3d17a7a830887 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -1254,6 +1254,10 @@ static inline const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; case CEPH_OSD_OP_SCRUB: return "scrub"; + case CEPH_OSD_OP_GREP: return "grep"; + case CEPH_OSD_OP_APPEND: return "append"; + case CEPH_OSD_OP_STARTSYNC: return "startsync"; + default: return "???"; } } diff --git a/src/kernel/addr.c b/src/kernel/addr.c index 5d3cdb56c9fa3..eeb3cbf07d5d9 100644 --- a/src/kernel/addr.c +++ b/src/kernel/addr.c @@ -543,6 +543,7 @@ static int ceph_writepages_start(struct address_space *mapping, int rc = 0; unsigned wsize = 1 << inode->i_blkbits; struct ceph_osd_request *req = NULL; + int do_sync = atomic_read(&ci->i_want_sync_writeout); client = ceph_inode_to_client(inode); if (client->mount_state == CEPH_MOUNT_SHUTDOWN) { @@ -700,7 +701,8 @@ get_more_pages: ceph_vino(inode), offset, &len, CEPH_OSD_OP_WRITE, - snapc); + snapc, + do_sync); max_pages = req->r_num_pages; pages = req->r_pages; req->r_callback = writepages_finish; diff --git a/src/kernel/file.c b/src/kernel/file.c index 018618da8e4f6..857b3d4f69e15 100644 --- a/src/kernel/file.c +++ b/src/kernel/file.c @@ -370,6 +370,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, loff_t endoff = pos + iov->iov_len; int got = 0; int ret; + int do_sync = (file->f_flags & O_SYNC) || IS_SYNC(inode); if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; @@ -396,12 +397,16 @@ retry_snap: ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, &iocb->ki_pos); } else { + if (do_sync) + atomic_inc(&ci->i_want_sync_writeout); ret = generic_file_aio_write(iocb, iov, nr_segs, pos); if (ret >= 0 && ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL)) { ret = sync_page_range(inode, mapping, pos, ret); } + if (do_sync) + atomic_dec(&ci->i_want_sync_writeout); } if (ret >= 0) ci->i_dirty_caps |= CEPH_CAP_FILE_WR; diff --git a/src/kernel/inode.c b/src/kernel/inode.c index e302f709d4348..c5d18f42a1854 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -260,20 +260,24 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_caps = RB_ROOT; ci->i_dirty_caps = 0; - for (i = 0; i < CEPH_FILE_MODE_NUM; i++) - ci->i_nr_by_mode[i] = 0; init_waitqueue_head(&ci->i_cap_wq); + ci->i_hold_caps_until = 0; + INIT_LIST_HEAD(&ci->i_cap_delay_list); + ci->i_cap_exporting_mds = 0; + ci->i_cap_exporting_mseq = 0; + ci->i_cap_exporting_issued = 0; INIT_LIST_HEAD(&ci->i_cap_snaps); - ci->i_snap_caps = 0; ci->i_head_snapc = NULL; + ci->i_snap_caps = 0; + for (i = 0; i < CEPH_FILE_MODE_NUM; i++) + ci->i_nr_by_mode[i] = 0; + + ci->i_max_size = 0; + ci->i_reported_size = 0; ci->i_wanted_max_size = 0; ci->i_requested_max_size = 0; - ci->i_cap_exporting_mds = 0; - ci->i_cap_exporting_mseq = 0; - ci->i_cap_exporting_issued = 0; - ci->i_rd_ref = 0; ci->i_rdcache_ref = 0; ci->i_wr_ref = 0; @@ -281,8 +285,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_wrbuffer_ref_head = 0; ci->i_rdcache_gen = 0; ci->i_rdcache_revoking = 0; - ci->i_hold_caps_until = 0; - INIT_LIST_HEAD(&ci->i_cap_delay_list); + atomic_set(&ci->i_want_sync_writeout, 0); ci->i_snap_realm = NULL; INIT_LIST_HEAD(&ci->i_snap_realm_item); diff --git a/src/kernel/osd_client.c b/src/kernel/osd_client.c index 888a80f9b496c..acb6aadfa8702 100644 --- a/src/kernel/osd_client.c +++ b/src/kernel/osd_client.c @@ -89,13 +89,14 @@ void ceph_osdc_put_request(struct ceph_osd_request *req) * build osd request message only. */ static struct ceph_msg *new_request_msg(struct ceph_osd_client *osdc, short opc, - struct ceph_snap_context *snapc) + struct ceph_snap_context *snapc, + int do_sync) { struct ceph_msg *req; struct ceph_osd_request_head *head; struct ceph_osd_op *op; __le64 *snaps; - size_t size = sizeof(*head) + sizeof(*op); + size_t size = sizeof(*head) + (1 + do_sync)*sizeof(*op); int i; if (snapc) @@ -111,9 +112,14 @@ static struct ceph_msg *new_request_msg(struct ceph_osd_client *osdc, short opc, /* encode head */ head->client_inc = cpu_to_le32(1); /* always, for now. */ head->flags = 0; - head->num_ops = cpu_to_le16(1); + head->num_ops = cpu_to_le16(1 + do_sync); op->op = cpu_to_le16(opc); + if (do_sync) { + op++; + op->op = cpu_to_le16(CEPH_OSD_OP_STARTSYNC); + } + if (snapc) { head->snap_seq = cpu_to_le64(snapc->seq); head->num_snaps = cpu_to_le32(snapc->num_snaps); @@ -131,7 +137,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, struct ceph_vino vino, u64 off, u64 *plen, int op, - struct ceph_snap_context *snapc) + struct ceph_snap_context *snapc, + int do_sync) { struct ceph_osd_request *req; struct ceph_msg *msg; @@ -143,7 +150,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, if (req == NULL) return ERR_PTR(-ENOMEM); - msg = new_request_msg(osdc, op, snapc); + msg = new_request_msg(osdc, op, snapc, do_sync); if (IS_ERR(msg)) { kfree(req); return ERR_PTR(PTR_ERR(msg)); @@ -804,7 +811,7 @@ int ceph_osdc_sync_read(struct ceph_osd_client *osdc, struct ceph_vino vino, more: req = ceph_osdc_new_request(osdc, layout, vino, off, &len, - CEPH_OSD_OP_READ, NULL); + CEPH_OSD_OP_READ, NULL, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -876,7 +883,7 @@ int ceph_osdc_readpage(struct ceph_osd_client *osdc, struct ceph_vino vino, dout(10, "readpage on ino %llx.%llx at %lld~%lld\n", vino.ino, vino.snap, off, len); req = ceph_osdc_new_request(osdc, layout, vino, off, &len, - CEPH_OSD_OP_READ, NULL); + CEPH_OSD_OP_READ, NULL, 0); if (IS_ERR(req)) return PTR_ERR(req); BUG_ON(len != PAGE_CACHE_SIZE); @@ -920,7 +927,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, /* alloc request, w/ optimistically-sized page vector */ req = ceph_osdc_new_request(osdc, layout, vino, off, &len, - CEPH_OSD_OP_READ, NULL); + CEPH_OSD_OP_READ, NULL, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -986,7 +993,7 @@ int ceph_osdc_sync_write(struct ceph_osd_client *osdc, struct ceph_vino vino, more: req = ceph_osdc_new_request(osdc, layout, vino, off, &len, - CEPH_OSD_OP_WRITE, snapc); + CEPH_OSD_OP_WRITE, snapc, 0); if (IS_ERR(req)) return PTR_ERR(req); reqm = req->r_request; @@ -1068,7 +1075,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, BUG_ON(vino.snap != CEPH_NOSNAP); req = ceph_osdc_new_request(osdc, layout, vino, off, &len, - CEPH_OSD_OP_WRITE, snapc); + CEPH_OSD_OP_WRITE, snapc, 0); if (IS_ERR(req)) return PTR_ERR(req); reqm = req->r_request; diff --git a/src/kernel/osd_client.h b/src/kernel/osd_client.h index 7e6aff8caba9c..db757484bc975 100644 --- a/src/kernel/osd_client.h +++ b/src/kernel/osd_client.h @@ -96,7 +96,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, struct ceph_file_layout *layout, struct ceph_vino vino, u64 offset, u64 *len, int op, - struct ceph_snap_context *snapc); + struct ceph_snap_context *snapc, + int do_sync); extern void ceph_osdc_put_request(struct ceph_osd_request *req); extern int ceph_osdc_readpage(struct ceph_osd_client *osdc, diff --git a/src/kernel/super.h b/src/kernel/super.h index 9f043e0fd3b5a..b6087e99cc088 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -270,6 +270,8 @@ struct ceph_inode_info { pages. */ u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ + atomic_t i_want_sync_writeout; /* non-zero if writepages should sync */ + struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */ struct list_head i_snap_realm_item; struct list_head i_snap_flush_item; -- 2.39.5