From: changtao Date: Fri, 27 Nov 2015 13:54:27 +0000 (+0800) Subject: FileStore:Use pwritev instead of writev X-Git-Tag: v10.1.0~418^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f1113512d154683dd069409d3f2e11cf93fb334e;p=ceph.git FileStore:Use pwritev instead of writev Signed-off-by: Tao Chang --- diff --git a/src/common/buffer.cc b/src/common/buffer.cc index e2de936d8c43..f3c6f6dfe62a 100644 --- a/src/common/buffer.cc +++ b/src/common/buffer.cc @@ -2007,6 +2007,59 @@ int buffer::list::write_fd(int fd) const return 0; } +int buffer::list::write_fd(int fd, uint64_t offset) const +{ + // use writev! + iovec iov[IOV_MAX]; + int iovlen = 0; + ssize_t bytes = 0; + + std::list::const_iterator p = _buffers.begin(); + while (p != _buffers.end()) { + if (p->length() > 0) { + iov[iovlen].iov_base = (void *)p->c_str(); + iov[iovlen].iov_len = p->length(); + bytes += p->length(); + iovlen++; + } + ++p; + + if (iovlen == IOV_MAX-1 || + p == _buffers.end()) { + iovec *start = iov; + int num = iovlen; + ssize_t wrote; + retry: + wrote = ::pwritev(fd, start, num, offset); + if (wrote < 0) { + int err = errno; + if (err == EINTR) + goto retry; + return -err; + } + offset += wrote; + if (wrote < bytes) { + // partial write, recover! + while ((size_t)wrote >= start[0].iov_len) { + wrote -= start[0].iov_len; + bytes -= start[0].iov_len; + start++; + num--; + } + if (wrote > 0) { + start[0].iov_len -= wrote; + start[0].iov_base = (char *)start[0].iov_base + wrote; + bytes -= wrote; + } + goto retry; + } + iovlen = 0; + bytes = 0; + } + } + return 0; +} + void buffer::list::prepare_iov(std::vector *piov) const { piov->resize(_buffers.size()); diff --git a/src/include/buffer.h b/src/include/buffer.h index 5a8b05f5125f..428f0e3fcacb 100644 --- a/src/include/buffer.h +++ b/src/include/buffer.h @@ -528,6 +528,7 @@ namespace buffer CEPH_BUFFER_API { int read_fd_zero_copy(int fd, size_t len); int write_file(const char *fn, int mode=0644); int write_fd(int fd) const; + int write_fd(int fd, uint64_t offset) const; int write_fd_zero_copy(int fd) const; void prepare_iov(std::vector *piov) const; uint32_t crc32c(uint32_t crc) const; diff --git a/src/os/filestore/FileStore.cc b/src/os/filestore/FileStore.cc index 5cb73e7b01a3..89d728e34a7a 100644 --- a/src/os/filestore/FileStore.cc +++ b/src/os/filestore/FileStore.cc @@ -3188,8 +3188,6 @@ int FileStore::_write(coll_t cid, const ghobject_t& oid, dout(15) << "write " << cid << "/" << oid << " " << offset << "~" << len << dendl; int r; - int64_t actual; - FDRef fd; r = lfn_open(cid, oid, true, &fd); if (r < 0) { @@ -3199,23 +3197,8 @@ int FileStore::_write(coll_t cid, const ghobject_t& oid, goto out; } - // seek - actual = ::lseek64(**fd, offset, SEEK_SET); - if (actual < 0) { - r = -errno; - dout(0) << "write lseek64 to " << offset << " failed: " << cpp_strerror(r) << dendl; - lfn_close(fd); - goto out; - } - if (actual != (int64_t)offset) { - dout(0) << "write lseek64 to " << offset << " gave bad offset " << actual << dendl; - r = -EIO; - lfn_close(fd); - goto out; - } - // write - r = bl.write_fd(**fd); + r = bl.write_fd(**fd, offset); if (r == 0) r = bl.length(); diff --git a/src/test/bufferlist.cc b/src/test/bufferlist.cc index 77ca3ec2750a..06e100748771 100644 --- a/src/test/bufferlist.cc +++ b/src/test/bufferlist.cc @@ -2143,6 +2143,24 @@ TEST(BufferList, write_fd) { ::unlink(FILENAME); } +TEST(BufferList, write_fd_offset) { + ::unlink(FILENAME); + int fd = ::open(FILENAME, O_WRONLY|O_CREAT|O_TRUNC, 0600); + bufferlist bl; + for (unsigned i = 0; i < IOV_MAX * 2; i++) { + bufferptr ptr("A", 1); + bl.push_back(ptr); + } + uint64_t offset = 200; + EXPECT_EQ(0, bl.write_fd(fd, offset)); + ::close(fd); + struct stat st; + memset(&st, 0, sizeof(st)); + ::stat(FILENAME, &st); + EXPECT_EQ(IOV_MAX * 2 + offset, st.st_size); + ::unlink(FILENAME); +} + TEST(BufferList, crc32c) { bufferlist bl; __u32 crc = 0;