From 4679c7dc12c4b962c0d7f799e79bb343cf4b9545 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 19 May 2022 14:26:11 -0400 Subject: [PATCH] libceph: add new ceph_msg_data_type and ceph_osd_data_type ...that uses an iov_iter. Signed-off-by: Jeff Layton --- include/linux/ceph/messenger.h | 7 +++++++ include/linux/ceph/osd_client.h | 6 ++++++ net/ceph/messenger.c | 26 ++++++++++++++++++++++++++ net/ceph/osd_client.c | 27 +++++++++++++++++++++++++++ 4 files changed, 66 insertions(+) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e7f2fb2fc207..edd27e12e1d9 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -98,6 +98,7 @@ enum ceph_msg_data_type { CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */ #endif /* CONFIG_BLOCK */ CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */ + CEPH_MSG_DATA_ITER, /* data source/destination is an iov_iter */ }; #ifdef CONFIG_BLOCK @@ -199,6 +200,10 @@ struct ceph_msg_data { bool own_pages; }; struct ceph_pagelist *pagelist; + struct { + struct iov_iter iter; + size_t iter_count; + }; }; }; @@ -575,6 +580,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, #endif /* CONFIG_BLOCK */ void ceph_msg_data_add_bvecs(struct ceph_msg *msg, struct ceph_bvec_iter *bvec_pos); +void ceph_msg_data_add_iter(struct ceph_msg *msg, + struct iov_iter *iter); struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, gfp_t flags, bool can_fail); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index cba8a6ffc329..5c1884ec88b3 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -59,6 +59,7 @@ enum ceph_osd_data_type { CEPH_OSD_DATA_TYPE_BIO, #endif /* CONFIG_BLOCK */ CEPH_OSD_DATA_TYPE_BVECS, + CEPH_OSD_DATA_TYPE_ITER, }; struct ceph_osd_data { @@ -82,6 +83,9 @@ struct ceph_osd_data { struct ceph_bvec_iter bvec_pos; u32 num_bvecs; }; + struct { + struct iov_iter iter; + }; }; }; @@ -450,6 +454,8 @@ void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, unsigned int which, struct ceph_bvec_iter *bvec_pos); +void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, + unsigned int which, struct iov_iter *iter); extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, unsigned int which, diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d3bb656308b4..9caa6eaee737 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -997,6 +997,11 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor, * track of which piece is next to process and how much remains to * be processed in that piece. It also tracks whether the current * piece is the last one in the data item. + * + * Except for the CEPH_MSG_DATA_ITER type where we use an iov_iter to track the + * buffer and pass the iterator to sendmsg() with MSG_ZEROCOPY to try and + * transfer as much as the buffer as we can with as few invocations of the TCP + * driver as we can. */ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor) { @@ -1017,6 +1022,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor) case CEPH_MSG_DATA_BVECS: ceph_msg_data_bvecs_cursor_init(cursor, length); break; + case CEPH_MSG_DATA_ITER: + cursor->last_piece = true; + break; case CEPH_MSG_DATA_NONE: default: /* BUG(); */ @@ -1064,6 +1072,8 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, case CEPH_MSG_DATA_BVECS: page = ceph_msg_data_bvecs_next(cursor, page_offset, length); break; + case CEPH_MSG_DATA_ITER: + BUG(); /* Shouldn't get here */ case CEPH_MSG_DATA_NONE: default: page = NULL; @@ -1104,6 +1114,8 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes) case CEPH_MSG_DATA_BVECS: new_piece = ceph_msg_data_bvecs_advance(cursor, bytes); break; + case CEPH_MSG_DATA_ITER: + BUG(); /* Shouldn't get here */ case CEPH_MSG_DATA_NONE: default: BUG(); @@ -1903,6 +1915,20 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg, } EXPORT_SYMBOL(ceph_msg_data_add_bvecs); +void ceph_msg_data_add_iter(struct ceph_msg *msg, + struct iov_iter *iter) +{ + struct ceph_msg_data *data; + + data = ceph_msg_data_add(msg); + data->type = CEPH_MSG_DATA_ITER; + data->iter = *iter; + data->iter_count = iov_iter_count(iter); + + msg->data_length += iov_iter_count(&data->iter); +} +EXPORT_SYMBOL(ceph_msg_data_add_iter); + /* * construct a new message with given type, size * the new msg has a ref count of 1. diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 9d82bb42e958..1743173047d3 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -171,6 +171,13 @@ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data, osd_data->num_bvecs = num_bvecs; } +static void ceph_osd_iter_init(struct ceph_osd_data *osd_data, + struct iov_iter *iter) +{ + osd_data->type = CEPH_OSD_DATA_TYPE_ITER; + osd_data->iter = *iter; +} + static struct ceph_osd_data * osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which) { @@ -264,6 +271,22 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, } EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos); +/** + * osd_req_op_extent_osd_iter - Set up an operation with an iterator buffer + * @osd_req: The request to set up + * @which: ? + * @iter: The buffer iterator + */ +void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, + unsigned int which, struct iov_iter *iter) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_data(osd_req, which, extent, osd_data); + ceph_osd_iter_init(osd_data, iter); +} +EXPORT_SYMBOL(osd_req_op_extent_osd_iter); + static void osd_req_op_cls_request_info_pagelist( struct ceph_osd_request *osd_req, unsigned int which, struct ceph_pagelist *pagelist) @@ -346,6 +369,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data) #endif /* CONFIG_BLOCK */ case CEPH_OSD_DATA_TYPE_BVECS: return osd_data->bvec_pos.iter.bi_size; + case CEPH_OSD_DATA_TYPE_ITER: + return iov_iter_count(&osd_data->iter); default: WARN(true, "unrecognized data type %d\n", (int)osd_data->type); return 0; @@ -951,6 +976,8 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg, #endif } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) { ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos); + } else if (osd_data->type == CEPH_OSD_DATA_TYPE_ITER) { + ceph_msg_data_add_iter(msg, &osd_data->iter); } else { BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE); } -- 2.47.3