#ifdef AIO
#include <libaio.h>
#endif
+#ifdef URING
+#include <liburing.h>
+#endif
#include <sys/syscall.h>
#ifndef MAP_FILE
blksize_t block_size = 0;
off_t file_size = 0;
off_t biggest = 0;
-unsigned long testcalls = 0; /* calls to function "test" */
+long long testcalls = 0; /* calls to function "test" */
-unsigned long simulatedopcount = 0; /* -b flag */
+long long simulatedopcount = 0; /* -b flag */
int closeprob = 0; /* -c flag */
int debug = 0; /* -d flag */
-unsigned long debugstart = 0; /* -D flag */
+long long debugstart = 0; /* -D flag */
char filldata = 0; /* -g flag */
int flush = 0; /* -f flag */
int do_fsync = 0; /* -y flag */
int sizechecks = 1; /* -n flag disables them */
int maxoplen = 64 * 1024; /* -o flag */
int quiet = 0; /* -q flag */
-unsigned long progressinterval = 0; /* -p flag */
+long long progressinterval = 0; /* -p flag */
int readbdy = 1; /* -r flag */
int style = 0; /* -s flag */
int prealloc = 0; /* -x flag */
long monitorstart = -1; /* -m flag */
long monitorend = -1; /* -m flag */
int lite = 0; /* -L flag */
-long numops = -1; /* -N flag */
+long long numops = -1; /* -N flag */
int randomoplen = 1; /* -O flag disables it */
int seed = 1; /* -S flag */
int mapped_writes = 1; /* -W flag disables */
int fsxgoodfd = 0;
int o_direct; /* -Z */
int aio = 0;
+int uring = 0;
int mark_nr = 0;
int page_size;
int page_mask;
int mmap_mask;
-#ifdef AIO
-int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
+int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
#define READ 0
#define WRITE 1
-#define fsxread(a,b,c,d) aio_rw(READ, a,b,c,d)
-#define fsxwrite(a,b,c,d) aio_rw(WRITE, a,b,c,d)
-#else
-#define fsxread(a,b,c,d) read(a,b,c)
-#define fsxwrite(a,b,c,d) write(a,b,c)
-#endif
+#define fsxread(a,b,c,d) fsx_rw(READ, a,b,c,d)
+#define fsxwrite(a,b,c,d) fsx_rw(WRITE, a,b,c,d)
const char *replayops = NULL;
const char *recordops = NULL;
(monitorstart == -1 ||
(offset + size > monitorstart &&
(monitorend == -1 || offset <= monitorend))))))
- prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
+ prt("%lld read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
offset, offset + size - 1, size);
ret = lseek(fd, (off_t)offset, SEEK_SET);
if (ret == (off_t)-1) {
(monitorstart == -1 ||
(offset + size > monitorstart &&
(monitorend == -1 || offset <= monitorend))))))
- prt("%lu mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
+ prt("%lld mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
offset, offset + size - 1, size);
pg_offset = offset & PAGE_MASK;
(monitorstart == -1 ||
(offset + size > monitorstart &&
(monitorend == -1 || offset <= monitorend))))))
- prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
+ prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
offset, offset + size - 1, size);
ret = lseek(fd, (off_t)offset, SEEK_SET);
if (ret == (off_t)-1) {
(monitorstart == -1 ||
(offset + size > monitorstart &&
(monitorend == -1 || offset <= monitorend))))))
- prt("%lu mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
+ prt("%lld mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
offset, offset + size - 1, size);
if (file_size > cur_filesize) {
if (testcalls <= simulatedopcount)
return;
-
+
if ((progressinterval && testcalls % progressinterval == 0) ||
(debug && (monitorstart == -1 || monitorend == -1 ||
size <= monitorend)))
- prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
+ prt("%lld trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize,
+ size);
if (ftruncate(fd, (off_t)size) == -1) {
prt("ftruncate1: %x\n", size);
prterr("dotruncate: ftruncate");
if ((progressinterval && testcalls % progressinterval == 0) ||
(debug && (monitorstart == -1 || monitorend == -1 ||
end_offset <= monitorend))) {
- prt("%lu punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
+ prt("%lld punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
offset, offset+length, length);
}
if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
unsigned end_offset;
int mode = FALLOC_FL_ZERO_RANGE;
+ if (keep_size)
+ mode |= FALLOC_FL_KEEP_SIZE;
+
if (length == 0) {
if (!quiet && testcalls > simulatedopcount)
prt("skipping zero length zero range\n");
if ((progressinterval && testcalls % progressinterval == 0) ||
(debug && (monitorstart == -1 || monitorend == -1 ||
end_offset <= monitorend))) {
- prt("%lu zero\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
+ prt("%lld zero\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
offset, offset+length, length);
}
if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
}
memset(good_buf + offset, '\0', length);
+
+ if (!keep_size && end_offset > file_size) {
+ /*
+ * If there's a gap between the old file size and the offset of
+ * the zero range operation, fill the gap with zeroes.
+ */
+ if (offset > file_size)
+ memset(good_buf + file_size, '\0', offset - file_size);
+
+ file_size = end_offset;
+ }
}
#else
if ((progressinterval && testcalls % progressinterval == 0) ||
(debug && (monitorstart == -1 || monitorend == -1 ||
end_offset <= monitorend))) {
- prt("%lu collapse\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
- offset, offset+length, length);
+ prt("%lld collapse\tfrom 0x%x to 0x%x, (0x%x bytes)\n",
+ testcalls, offset, offset+length, length);
}
if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
prt("collapse range: 0x%x to 0x%x\n", offset, offset + length);
if ((progressinterval && testcalls % progressinterval == 0) ||
(debug && (monitorstart == -1 || monitorend == -1 ||
end_offset <= monitorend))) {
- prt("%lu insert\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
+ prt("%lld insert\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
offset, offset+length, length);
}
if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
if ((progressinterval && testcalls % progressinterval == 0) ||
(debug && (monitorstart == -1 || monitorend == -1 ||
end_offset <= monitorend)))
- prt("%lu falloc\tfrom 0x%x to 0x%x (0x%x bytes)\n", testcalls,
+ prt("%lld falloc\tfrom 0x%x to 0x%x (0x%x bytes)\n", testcalls,
offset, offset + length, length);
if (fallocate(fd, keep_size ? FALLOC_FL_KEEP_SIZE : 0, (loff_t)offset, (loff_t)length) == -1) {
prt("fallocate: 0x%x to 0x%x\n", offset, offset + length);
return;
if (debug)
- prt("%lu close/open\n", testcalls);
+ prt("%lld close/open\n", testcalls);
if (close(fd)) {
prterr("docloseopen: close");
report_failure(180);
if (testcalls <= simulatedopcount)
return;
if (debug)
- prt("%lu fsync\n", testcalls);
+ prt("%lld fsync\n", testcalls);
log4(OP_FSYNC, 0, 0, 0);
ret = fsync(fd);
if (ret < 0) {
{
if (sig)
prt("signal %d\n", sig);
- prt("testcalls = %lu\n", testcalls);
+ prt("testcalls = %lld\n", testcalls);
exit(sig);
}
return llabs((unsigned long long)off1 - off0) < size;
}
+static void generate_dest_range(bool bdy_align,
+ unsigned long max_range_end,
+ unsigned long *src_offset,
+ unsigned long *size,
+ unsigned long *dst_offset)
+{
+ int tries = 0;
+
+ TRIM_OFF_LEN(*src_offset, *size, file_size);
+ if (bdy_align) {
+ *src_offset -= *src_offset % readbdy;
+ if (o_direct)
+ *size -= *size % readbdy;
+ } else {
+ *src_offset = *src_offset & ~(block_size - 1);
+ *size = *size & ~(block_size - 1);
+ }
+
+ do {
+ if (tries++ >= 30) {
+ *size = 0;
+ break;
+ }
+ *dst_offset = random();
+ TRIM_OFF(*dst_offset, max_range_end);
+ if (bdy_align)
+ *dst_offset -= *dst_offset % writebdy;
+ else
+ *dst_offset = *dst_offset & ~(block_size - 1);
+ } while (range_overlaps(*src_offset, *dst_offset, *size) ||
+ *dst_offset + *size > max_range_end);
+}
+
int
test(void)
{
debug = 1;
if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
- prt("%lu...\n", testcalls);
+ prt("%lld...\n", testcalls);
if (replayopsf) {
struct log_entry log_entry;
keep_size = random() % 2;
break;
case OP_CLONE_RANGE:
- TRIM_OFF_LEN(offset, size, file_size);
- offset = offset & ~(block_size - 1);
- size = size & ~(block_size - 1);
- do {
- offset2 = random();
- TRIM_OFF(offset2, maxfilelen);
- offset2 = offset2 & ~(block_size - 1);
- } while (range_overlaps(offset, offset2, size) ||
- offset2 + size > maxfilelen);
+ generate_dest_range(false, maxfilelen, &offset, &size, &offset2);
break;
case OP_DEDUPE_RANGE:
- {
- int tries = 0;
-
- TRIM_OFF_LEN(offset, size, file_size);
- offset = offset & ~(block_size - 1);
- size = size & ~(block_size - 1);
- do {
- if (tries++ >= 30) {
- size = 0;
- break;
- }
- offset2 = random();
- TRIM_OFF(offset2, file_size);
- offset2 = offset2 & ~(block_size - 1);
- } while (range_overlaps(offset, offset2, size) ||
- offset2 + size > file_size);
- break;
- }
+ generate_dest_range(false, file_size, &offset, &size, &offset2);
+ break;
case OP_COPY_RANGE:
- TRIM_OFF_LEN(offset, size, file_size);
- offset -= offset % readbdy;
- if (o_direct)
- size -= size % readbdy;
- do {
- offset2 = random();
- TRIM_OFF(offset2, maxfilelen);
- offset2 -= offset2 % writebdy;
- } while (range_overlaps(offset, offset2, size) ||
- offset2 + size > maxfilelen);
+ generate_dest_range(true, maxfilelen, &offset, &size, &offset2);
break;
}
do_punch_hole(offset, size);
break;
case OP_ZERO_RANGE:
- TRIM_OFF_LEN(offset, size, file_size);
+ TRIM_OFF_LEN(offset, size, maxfilelen);
do_zero_range(offset, size, keep_size);
break;
case OP_COLLAPSE_RANGE:
usage(void)
{
fprintf(stdout, "usage: %s",
- "fsx [-dknqxABEFJLOWZ] [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
+ "fsx [-dknqxBEFJLOWZ][-A|-U] [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
-b opnum: beginning operation number (default 1)\n\
-c P: 1 in P chance of file close+open at each op (default infinity)\n\
-d: debug output for all operations\n\
-y synchronize changes to a file\n"
#ifdef AIO
-" -A: Use the AIO system calls\n"
+" -A: Use the AIO system calls, -A excludes -U\n"
#endif
+#ifdef URING
+" -U: Use the IO_URING system calls, -U excludes -A\n"
+ #endif
" -D startingop: debug output starting at specified operation\n"
#ifdef HAVE_LINUX_FALLOC_H
" -F: Do not use fallocate (preallocation) calls\n"
}
-int
+long long
getnum(char *s, char **e)
{
- int ret;
+ long long ret;
*e = (char *) 0;
- ret = strtol(s, e, 0);
+ ret = strtoll(s, e, 0);
if (*e)
switch (**e) {
case 'b':
io_context_t io_ctx;
struct iocb iocb;
-int aio_setup()
+int
+aio_setup()
{
int ret;
ret = io_queue_init(QSZ, &io_ctx);
}
int
-__aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
{
struct io_event event;
static struct timespec ts;
errno = -ret;
return -1;
}
+#else
+aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+{
+ fprintf(stderr, "io_rw: need AIO support!\n");
+ exit(111);
+}
+#endif
+
+#ifdef URING
+
+struct io_uring ring;
+#define URING_ENTRIES 1024
+
+int
+uring_setup()
+{
+ int ret;
+
+ ret = io_uring_queue_init(URING_ENTRIES, &ring, 0);
+ if (ret != 0) {
+ fprintf(stderr, "uring_setup: io_uring_queue_init failed: %s\n",
+ strerror(ret));
+ return -1;
+ }
+ return 0;
+}
+
+int
+uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ struct iovec iovec;
+ int ret;
+ int res = 0;
+ char *p = buf;
+ unsigned l = len;
+ unsigned o = offset;
+
+ /*
+ * Due to io_uring tries non-blocking IOs (especially read), that
+ * always cause 'normal' short reading. To avoid this short read
+ * fail, try to loop read/write (escpecilly read) data.
+ */
+ while (l > 0) {
+ sqe = io_uring_get_sqe(&ring);
+ if (!sqe) {
+ fprintf(stderr, "uring_rw: io_uring_get_sqe failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ iovec.iov_base = p;
+ iovec.iov_len = l;
+ if (rw == READ) {
+ io_uring_prep_readv(sqe, fd, &iovec, 1, o);
+ } else {
+ io_uring_prep_writev(sqe, fd, &iovec, 1, o);
+ }
+
+ ret = io_uring_submit_and_wait(&ring, 1);
+ if (ret != 1) {
+ fprintf(stderr, "errcode=%d\n", -ret);
+ fprintf(stderr, "uring %s: io_uring_submit failed: %s\n",
+ rw == READ ? "read":"write", strerror(-ret));
+ goto uring_error;
+ }
+
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret != 0) {
+ fprintf(stderr, "errcode=%d\n", -ret);
+ fprintf(stderr, "uring %s: io_uring_wait_cqe failed: %s\n",
+ rw == READ ? "read":"write", strerror(-ret));
+ goto uring_error;
+ }
+
+ ret = cqe->res;
+ io_uring_cqe_seen(&ring, cqe);
+
+ if (ret > 0) {
+ o += ret;
+ l -= ret;
+ p += ret;
+ res += ret;
+ } else if (ret < 0) {
+ fprintf(stderr, "errcode=%d\n", -ret);
+ fprintf(stderr, "uring %s: io_uring failed: %s\n",
+ rw == READ ? "read":"write", strerror(-ret));
+ goto uring_error;
+ } else {
+ fprintf(stderr, "uring %s bad io length: %d instead of %u\n",
+ rw == READ ? "read":"write", res, len);
+ break;
+ }
+ }
+ return res;
+
+ uring_error:
+ /*
+ * The caller expects error return in traditional libc
+ * convention, i.e. -1 and the errno set to error.
+ */
+ errno = -ret;
+ return -1;
+}
+#else
+int
+uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+{
+ fprintf(stderr, "io_rw: need IO_URING support!\n");
+ exit(111);
+}
+#endif
-int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+int
+fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
{
int ret;
if (aio) {
- ret = __aio_rw(rw, fd, buf, len, offset);
+ ret = aio_rw(rw, fd, buf, len, offset);
+ } else if (uring) {
+ ret = uring_rw(rw, fd, buf, len, offset);
} else {
if (rw == READ)
ret = read(fd, buf, len);
return ret;
}
-#endif
-
#define test_fallocate(mode) __test_fallocate(mode, #mode)
int
setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
while ((ch = getopt_long(argc, argv,
- "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:WXZ",
+ "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
longopts, NULL)) != EOF)
switch (ch) {
case 'b':
simulatedopcount = getnum(optarg, &endp);
if (!quiet)
- prt("Will begin at operation %ld\n", simulatedopcount);
+ prt("Will begin at operation %lld\n",
+ simulatedopcount);
if (simulatedopcount == 0)
usage();
simulatedopcount -= 1;
do_fsync = 1;
break;
case 'A':
- aio = 1;
+ aio = 1;
+ break;
+ case 'U':
+ uring = 1;
break;
case 'D':
debugstart = getnum(optarg, &endp);
randomoplen = 0;
break;
case 'P':
- strncpy(dname, optarg, sizeof(dname));
- strcat(dname, "/");
+ snprintf(dname, sizeof(dname), "%s/", optarg);
dirpath = strlen(dname);
break;
case 'R':
break;
case 255: /* --record-ops */
if (optarg)
- strncpy(opsfile, optarg, sizeof(opsfile));
+ snprintf(opsfile, sizeof(opsfile), "%s", optarg);
recordops = opsfile;
break;
case 256: /* --replay-ops */
if (argc != 1)
usage();
+ if (aio && uring) {
+ fprintf(stderr, "-A and -U shouldn't be used together\n");
+ usage();
+ }
+
if (integrity && !dirpath) {
fprintf(stderr, "option -i <logdev> requires -P <dirpath>\n");
usage();
if (aio)
aio_setup();
#endif
+#ifdef URING
+ if (uring)
+ uring_setup();
+#endif
if (!(o_flags & O_TRUNC)) {
off_t ret;
prterr("close");
report_failure(99);
}
- prt("All %lu operations completed A-OK!\n", testcalls);
+ prt("All %lld operations completed A-OK!\n", testcalls);
if (recordops)
logdump();