#ifdef AIO
#include <libaio.h>
#endif
+#ifdef URING
+#include <liburing.h>
+#endif
+#include <sys/syscall.h>
#ifndef MAP_FILE
# define MAP_FILE 0
OP_ZERO_RANGE,
OP_COLLAPSE_RANGE,
OP_INSERT_RANGE,
+ OP_CLONE_RANGE,
+ OP_DEDUPE_RANGE,
+ OP_COPY_RANGE,
OP_MAX_FULL,
/* integrity operations */
blksize_t block_size = 0;
off_t file_size = 0;
off_t biggest = 0;
-unsigned long testcalls = 0; /* calls to function "test" */
+long long testcalls = 0; /* calls to function "test" */
-unsigned long simulatedopcount = 0; /* -b flag */
+long long simulatedopcount = 0; /* -b flag */
int closeprob = 0; /* -c flag */
int debug = 0; /* -d flag */
-unsigned long debugstart = 0; /* -D flag */
+long long debugstart = 0; /* -D flag */
char filldata = 0; /* -g flag */
int flush = 0; /* -f flag */
int do_fsync = 0; /* -y flag */
int sizechecks = 1; /* -n flag disables them */
int maxoplen = 64 * 1024; /* -o flag */
int quiet = 0; /* -q flag */
-unsigned long progressinterval = 0; /* -p flag */
+long long progressinterval = 0; /* -p flag */
int readbdy = 1; /* -r flag */
int style = 0; /* -s flag */
int prealloc = 0; /* -x flag */
long monitorstart = -1; /* -m flag */
long monitorend = -1; /* -m flag */
int lite = 0; /* -L flag */
-long numops = -1; /* -N flag */
+long long numops = -1; /* -N flag */
int randomoplen = 1; /* -O flag disables it */
int seed = 1; /* -S flag */
int mapped_writes = 1; /* -W flag disables */
int insert_range_calls = 1; /* -I flag disables */
int mapped_reads = 1; /* -R flag disables it */
int check_file = 0; /* -X flag enables */
+int clone_range_calls = 1; /* -J flag disables */
+int dedupe_range_calls = 1; /* -B flag disables */
+int copy_range_calls = 1; /* -E flag disables */
int integrity = 0; /* -i flag */
int fsxgoodfd = 0;
int o_direct; /* -Z */
int aio = 0;
+int uring = 0;
int mark_nr = 0;
int page_size;
int page_mask;
int mmap_mask;
-#ifdef AIO
-int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
+int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
#define READ 0
#define WRITE 1
-#define fsxread(a,b,c,d) aio_rw(READ, a,b,c,d)
-#define fsxwrite(a,b,c,d) aio_rw(WRITE, a,b,c,d)
-#else
-#define fsxread(a,b,c,d) read(a,b,c)
-#define fsxwrite(a,b,c,d) write(a,b,c)
-#endif
+#define fsxread(a,b,c,d) fsx_rw(READ, a,b,c,d)
+#define fsxwrite(a,b,c,d) fsx_rw(WRITE, a,b,c,d)
const char *replayops = NULL;
const char *recordops = NULL;
{
unsigned long ret = (unsigned long)ptr;
- ret = ((ret + align - 1) & ~(align - 1));
+ ret = roundup_64(ret, align);
ret += offset;
return (void *)ret;
}
[OP_ZERO_RANGE] = "zero_range",
[OP_COLLAPSE_RANGE] = "collapse_range",
[OP_INSERT_RANGE] = "insert_range",
+ [OP_CLONE_RANGE] = "clone_range",
+ [OP_DEDUPE_RANGE] = "dedupe_range",
+ [OP_COPY_RANGE] = "copy_range",
[OP_FSYNC] = "fsync",
};
logptr = 0;
}
-
void
logdump(void)
{
count = LOGSIZE;
}
for ( ; count > 0; count--) {
- bool overlap;
+ bool overlap, overlap2;
int opnum;
opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE;
if (overlap)
prt("\t******IIII");
break;
+ case OP_CLONE_RANGE:
+ prt("CLONE 0x%x thru 0x%x\t(0x%x bytes) to 0x%x thru 0x%x",
+ lp->args[0], lp->args[0] + lp->args[1] - 1,
+ lp->args[1],
+ lp->args[2], lp->args[2] + lp->args[1] - 1);
+ overlap2 = badoff >= lp->args[2] &&
+ badoff < lp->args[2] + lp->args[1];
+ if (overlap && overlap2)
+ prt("\tJJJJ**JJJJ");
+ else if (overlap)
+ prt("\tJJJJ******");
+ else if (overlap2)
+ prt("\t******JJJJ");
+ break;
+ case OP_DEDUPE_RANGE:
+ prt("DEDUPE 0x%x thru 0x%x\t(0x%x bytes) to 0x%x thru 0x%x",
+ lp->args[0], lp->args[0] + lp->args[1] - 1,
+ lp->args[1],
+ lp->args[2], lp->args[2] + lp->args[1] - 1);
+ overlap2 = badoff >= lp->args[2] &&
+ badoff < lp->args[2] + lp->args[1];
+ if (overlap && overlap2)
+ prt("\tBBBB**BBBB");
+ else if (overlap)
+ prt("\tBBBB******");
+ else if (overlap2)
+ prt("\t******BBBB");
+ break;
+ case OP_COPY_RANGE:
+ prt("COPY 0x%x thru 0x%x\t(0x%x bytes) to 0x%x thru 0x%x",
+ lp->args[0], lp->args[0] + lp->args[1] - 1,
+ lp->args[1],
+ lp->args[2], lp->args[2] + lp->args[1] - 1);
+ overlap2 = badoff >= lp->args[2] &&
+ badoff < lp->args[2] + lp->args[1];
+ if (overlap && overlap2)
+ prt("\tEEEE**EEEE");
+ else if (overlap)
+ prt("\tEEEE******");
+ else if (overlap2)
+ prt("\t******EEEE");
+ break;
case OP_FSYNC:
prt("FSYNC");
break;
(monitorstart == -1 ||
(offset + size > monitorstart &&
(monitorend == -1 || offset <= monitorend))))))
- prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
+ prt("%lld read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
offset, offset + size - 1, size);
ret = lseek(fd, (off_t)offset, SEEK_SET);
if (ret == (off_t)-1) {
(monitorstart == -1 ||
(offset + size > monitorstart &&
(monitorend == -1 || offset <= monitorend))))))
- prt("%lu mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
+ prt("%lld mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
offset, offset + size - 1, size);
pg_offset = offset & PAGE_MASK;
(monitorstart == -1 ||
(offset + size > monitorstart &&
(monitorend == -1 || offset <= monitorend))))))
- prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
+ prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
offset, offset + size - 1, size);
ret = lseek(fd, (off_t)offset, SEEK_SET);
if (ret == (off_t)-1) {
(monitorstart == -1 ||
(offset + size > monitorstart &&
(monitorend == -1 || offset <= monitorend))))))
- prt("%lu mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
+ prt("%lld mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
offset, offset + size - 1, size);
if (file_size > cur_filesize) {
if (testcalls <= simulatedopcount)
return;
-
+
if ((progressinterval && testcalls % progressinterval == 0) ||
(debug && (monitorstart == -1 || monitorend == -1 ||
size <= monitorend)))
- prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
+ prt("%lld trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize,
+ size);
if (ftruncate(fd, (off_t)size) == -1) {
prt("ftruncate1: %x\n", size);
prterr("dotruncate: ftruncate");
if ((progressinterval && testcalls % progressinterval == 0) ||
(debug && (monitorstart == -1 || monitorend == -1 ||
end_offset <= monitorend))) {
- prt("%lu punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
+ prt("%lld punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
offset, offset+length, length);
}
if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
unsigned end_offset;
int mode = FALLOC_FL_ZERO_RANGE;
+ if (keep_size)
+ mode |= FALLOC_FL_KEEP_SIZE;
+
if (length == 0) {
if (!quiet && testcalls > simulatedopcount)
prt("skipping zero length zero range\n");
if ((progressinterval && testcalls % progressinterval == 0) ||
(debug && (monitorstart == -1 || monitorend == -1 ||
end_offset <= monitorend))) {
- prt("%lu zero\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
+ prt("%lld zero\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
offset, offset+length, length);
}
if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
}
memset(good_buf + offset, '\0', length);
+
+ if (!keep_size && end_offset > file_size) {
+ /*
+ * If there's a gap between the old file size and the offset of
+ * the zero range operation, fill the gap with zeroes.
+ */
+ if (offset > file_size)
+ memset(good_buf + file_size, '\0', offset - file_size);
+
+ file_size = end_offset;
+ }
}
#else
if ((progressinterval && testcalls % progressinterval == 0) ||
(debug && (monitorstart == -1 || monitorend == -1 ||
end_offset <= monitorend))) {
- prt("%lu collapse\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
- offset, offset+length, length);
+ prt("%lld collapse\tfrom 0x%x to 0x%x, (0x%x bytes)\n",
+ testcalls, offset, offset+length, length);
}
if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
prt("collapse range: 0x%x to 0x%x\n", offset, offset + length);
if ((progressinterval && testcalls % progressinterval == 0) ||
(debug && (monitorstart == -1 || monitorend == -1 ||
end_offset <= monitorend))) {
- prt("%lu insert\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
+ prt("%lld insert\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
offset, offset+length, length);
}
if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
}
#endif
+#ifdef FICLONERANGE
+int
+test_clone_range(void)
+{
+ struct file_clone_range fcr = {
+ .src_fd = fd,
+ };
+
+ if (ioctl(fd, FICLONERANGE, &fcr) &&
+ (errno == EOPNOTSUPP || errno == ENOTTY)) {
+ if (!quiet)
+ fprintf(stderr,
+ "main: filesystem does not support "
+ "clone range, disabling!\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+void
+do_clone_range(unsigned offset, unsigned length, unsigned dest)
+{
+ struct file_clone_range fcr = {
+ .src_fd = fd,
+ .src_offset = offset,
+ .src_length = length,
+ .dest_offset = dest,
+ };
+
+ if (length == 0) {
+ if (!quiet && testcalls > simulatedopcount)
+ prt("skipping zero length clone range\n");
+ log5(OP_CLONE_RANGE, offset, length, dest, FL_SKIPPED);
+ return;
+ }
+
+ if ((loff_t)offset >= file_size) {
+ if (!quiet && testcalls > simulatedopcount)
+ prt("skipping clone range behind EOF\n");
+ log5(OP_CLONE_RANGE, offset, length, dest, FL_SKIPPED);
+ return;
+ }
+
+ if (dest + length > biggest) {
+ biggest = dest + length;
+ if (!quiet && testcalls > simulatedopcount)
+ prt("cloning to largest ever: 0x%x\n", dest + length);
+ }
+
+ log5(OP_CLONE_RANGE, offset, length, dest, FL_NONE);
+
+ if (testcalls <= simulatedopcount)
+ return;
+
+ if ((progressinterval && testcalls % progressinterval == 0) ||
+ (debug && (monitorstart == -1 || monitorend == -1 ||
+ dest <= monitorstart || dest + length <= monitorend))) {
+ prt("%lu clone\tfrom 0x%x to 0x%x, (0x%x bytes) at 0x%x\n",
+ testcalls, offset, offset+length, length, dest);
+ }
+
+ if (ioctl(fd, FICLONERANGE, &fcr) == -1) {
+ prt("clone range: 0x%x to 0x%x at 0x%x\n", offset,
+ offset + length, dest);
+ prterr("do_clone_range: FICLONERANGE");
+ report_failure(161);
+ }
+
+ memcpy(good_buf + dest, good_buf + offset, length);
+ if (dest > file_size)
+ memset(good_buf + file_size, '\0', dest - file_size);
+ if (dest + length > file_size)
+ file_size = dest + length;
+}
+
+#else
+int
+test_clone_range(void)
+{
+ return 0;
+}
+
+void
+do_clone_range(unsigned offset, unsigned length, unsigned dest)
+{
+ return;
+}
+#endif
+
+#ifdef FIDEDUPERANGE
+int
+test_dedupe_range(void)
+{
+ struct file_dedupe_range *fdr;
+ off_t new_len;
+ int error;
+ int ret = 1;
+
+ /* Alloc memory */
+ fdr = calloc(sizeof(struct file_dedupe_range_info) +
+ sizeof(struct file_dedupe_range), 1);
+ if (!fdr) {
+ prterr("do_dedupe_range: malloc");
+ report_failure(161);
+ }
+
+ /* Make sure we have at least two blocks */
+ new_len = block_size * 2;
+ if (file_size < new_len && ftruncate(fd, new_len)) {
+ warn("main: ftruncate");
+ exit(132);
+ }
+
+ /* Try to dedupe them */
+ fdr->src_length = block_size;
+ fdr->dest_count = 1;
+ fdr->info[0].dest_fd = fd;
+ fdr->info[0].dest_offset = block_size;
+
+ if (ioctl(fd, FIDEDUPERANGE, fdr))
+ error = errno;
+ else if (fdr->info[0].status < 0)
+ error = -fdr->info[0].status;
+ else
+ error = 0;
+
+ /* Older kernels may return EINVAL... */
+ if (error == EOPNOTSUPP || error == ENOTTY || error == EINVAL) {
+ if (!quiet)
+ fprintf(stderr,
+ "main: filesystem does not support "
+ "dedupe range, disabling!\n");
+ ret = 0;
+ }
+
+ /* Put the file back the way it was. */
+ if (file_size < new_len && ftruncate(fd, file_size)) {
+ warn("main: ftruncate");
+ exit(132);
+ }
+
+ free(fdr);
+ return ret;
+}
+
+void
+do_dedupe_range(unsigned offset, unsigned length, unsigned dest)
+{
+ struct file_dedupe_range *fdr;
+
+ if (length == 0) {
+ if (!quiet && testcalls > simulatedopcount)
+ prt("skipping zero length dedupe range\n");
+ log5(OP_DEDUPE_RANGE, offset, length, dest, FL_SKIPPED);
+ return;
+ }
+
+ if ((loff_t)offset >= file_size) {
+ if (!quiet && testcalls > simulatedopcount)
+ prt("skipping dedupe range behind EOF\n");
+ log5(OP_DEDUPE_RANGE, offset, length, dest, FL_SKIPPED);
+ return;
+ }
+
+ log5(OP_DEDUPE_RANGE, offset, length, dest, FL_NONE);
+
+ if (testcalls <= simulatedopcount)
+ return;
+
+ if ((progressinterval && testcalls % progressinterval == 0) ||
+ (debug && (monitorstart == -1 || monitorend == -1 ||
+ dest <= monitorstart || dest + length <= monitorend))) {
+ prt("%lu dedupe\tfrom 0x%x to 0x%x, (0x%x bytes) at 0x%x\n",
+ testcalls, offset, offset+length, length, dest);
+ }
+
+ /* Alloc memory */
+ fdr = calloc(sizeof(struct file_dedupe_range_info) +
+ sizeof(struct file_dedupe_range), 1);
+ if (!fdr) {
+ prterr("do_dedupe_range: malloc");
+ report_failure(161);
+ }
+
+ /* Dedupe data blocks */
+ fdr->src_offset = offset;
+ fdr->src_length = length;
+ fdr->dest_count = 1;
+ fdr->info[0].dest_fd = fd;
+ fdr->info[0].dest_offset = dest;
+
+ if (ioctl(fd, FIDEDUPERANGE, fdr) == -1) {
+ prt("dedupe range: 0x%x to 0x%x at 0x%x\n", offset,
+ offset + length, dest);
+ prterr("do_dedupe_range(0): FIDEDUPERANGE");
+ report_failure(161);
+ } else if (fdr->info[0].status < 0) {
+ errno = -fdr->info[0].status;
+ prt("dedupe range: 0x%x to 0x%x at 0x%x\n", offset,
+ offset + length, dest);
+ prterr("do_dedupe_range(1): FIDEDUPERANGE");
+ report_failure(161);
+ }
+
+ free(fdr);
+}
+
+#else
+int
+test_dedupe_range(void)
+{
+ return 0;
+}
+
+void
+do_dedupe_range(unsigned offset, unsigned length, unsigned dest)
+{
+ return;
+}
+#endif
+
+#ifdef HAVE_COPY_FILE_RANGE
+int
+test_copy_range(void)
+{
+ loff_t o1 = 0, o2 = 1;
+
+ if (syscall(__NR_copy_file_range, fd, &o1, fd, &o2, 1, 0) == -1 &&
+ (errno == ENOSYS || errno == EOPNOTSUPP || errno == ENOTTY)) {
+ if (!quiet)
+ fprintf(stderr,
+ "main: filesystem does not support "
+ "copy range, disabling!\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+void
+do_copy_range(unsigned offset, unsigned length, unsigned dest)
+{
+ loff_t o1, o2;
+ size_t olen;
+ ssize_t nr;
+ int tries = 0;
+
+ if (length == 0) {
+ if (!quiet && testcalls > simulatedopcount)
+ prt("skipping zero length copy range\n");
+ log5(OP_COPY_RANGE, offset, length, dest, FL_SKIPPED);
+ return;
+ }
+
+ if ((loff_t)offset >= file_size) {
+ if (!quiet && testcalls > simulatedopcount)
+ prt("skipping copy range behind EOF\n");
+ log5(OP_COPY_RANGE, offset, length, dest, FL_SKIPPED);
+ return;
+ }
+
+ if (dest + length > biggest) {
+ biggest = dest + length;
+ if (!quiet && testcalls > simulatedopcount)
+ prt("copying to largest ever: 0x%x\n", dest + length);
+ }
+
+ log5(OP_COPY_RANGE, offset, length, dest, FL_NONE);
+
+ if (testcalls <= simulatedopcount)
+ return;
+
+ if ((progressinterval && testcalls % progressinterval == 0) ||
+ (debug && (monitorstart == -1 || monitorend == -1 ||
+ dest <= monitorstart || dest + length <= monitorend))) {
+ prt("%lu copy\tfrom 0x%x to 0x%x, (0x%x bytes) at 0x%x\n",
+ testcalls, offset, offset+length, length, dest);
+ }
+
+ o1 = offset;
+ o2 = dest;
+ olen = length;
+
+ while (olen > 0) {
+ nr = syscall(__NR_copy_file_range, fd, &o1, fd, &o2, olen, 0);
+ if (nr < 0) {
+ if (errno != EAGAIN || tries++ >= 300)
+ break;
+ } else if (nr > olen) {
+ prt("copy range: 0x%x to 0x%x at 0x%x\n", offset,
+ offset + length, dest);
+ prt("do_copy_range: asked %u, copied %u??\n",
+ olen, nr);
+ report_failure(161);
+ } else if (nr > 0)
+ olen -= nr;
+ }
+ if (nr < 0) {
+ prt("copy range: 0x%x to 0x%x at 0x%x\n", offset,
+ offset + length, dest);
+ prterr("do_copy_range:");
+ report_failure(161);
+ }
+
+ memcpy(good_buf + dest, good_buf + offset, length);
+ if (dest > file_size)
+ memset(good_buf + file_size, '\0', dest - file_size);
+ if (dest + length > file_size)
+ file_size = dest + length;
+}
+
+#else
+int
+test_copy_range(void)
+{
+ return 0;
+}
+
+void
+do_copy_range(unsigned offset, unsigned length, unsigned dest)
+{
+ return;
+}
+#endif
+
#ifdef HAVE_LINUX_FALLOC_H
/* fallocate is basically a no-op unless extending, then a lot like a truncate */
void
if ((progressinterval && testcalls % progressinterval == 0) ||
(debug && (monitorstart == -1 || monitorend == -1 ||
end_offset <= monitorend)))
- prt("%lu falloc\tfrom 0x%x to 0x%x (0x%x bytes)\n", testcalls,
+ prt("%lld falloc\tfrom 0x%x to 0x%x (0x%x bytes)\n", testcalls,
offset, offset + length, length);
if (fallocate(fd, keep_size ? FALLOC_FL_KEEP_SIZE : 0, (loff_t)offset, (loff_t)length) == -1) {
prt("fallocate: 0x%x to 0x%x\n", offset, offset + length);
return;
if (debug)
- prt("%lu close/open\n", testcalls);
+ prt("%lld close/open\n", testcalls);
if (close(fd)) {
prterr("docloseopen: close");
report_failure(180);
}
+ if (system("echo 3 > /proc/sys/vm/drop_caches")) {
+ prterr("docloseopen: drop_caches");
+ report_failure(181);
+ }
fd = open(fname, O_RDWR|o_direct, 0);
if (fd < 0) {
prterr("docloseopen: open");
- report_failure(181);
+ report_failure(182);
}
}
if (testcalls <= simulatedopcount)
return;
if (debug)
- prt("%lu fsync\n", testcalls);
+ prt("%lld fsync\n", testcalls);
log4(OP_FSYNC, 0, 0, 0);
ret = fsync(fd);
if (ret < 0) {
{
if (sig)
prt("signal %d\n", sig);
- prt("testcalls = %lu\n", testcalls);
+ prt("testcalls = %lld\n", testcalls);
exit(sig);
}
op_args_count(int operation)
{
switch (operation) {
+ case OP_CLONE_RANGE:
+ case OP_DEDUPE_RANGE:
+ case OP_COPY_RANGE:
+ return 4;
default:
return 3;
}
return 0;
}
+static inline bool
+range_overlaps(
+ unsigned long off0,
+ unsigned long off1,
+ unsigned long size)
+{
+ return llabs((unsigned long long)off1 - off0) < size;
+}
+
+static void generate_dest_range(bool bdy_align,
+ unsigned long max_range_end,
+ unsigned long *src_offset,
+ unsigned long *size,
+ unsigned long *dst_offset)
+{
+ int tries = 0;
+
+ TRIM_OFF_LEN(*src_offset, *size, file_size);
+ if (bdy_align) {
+ *src_offset = rounddown_64(*src_offset, readbdy);
+ if (o_direct)
+ *size = rounddown_64(*size, readbdy);
+ } else {
+ *src_offset = rounddown_64(*src_offset, block_size);
+ *size = rounddown_64(*size, block_size);
+ }
+
+ do {
+ if (tries++ >= 30) {
+ *size = 0;
+ break;
+ }
+ *dst_offset = random();
+ TRIM_OFF(*dst_offset, max_range_end);
+ if (bdy_align)
+ *dst_offset = rounddown_64(*dst_offset, writebdy);
+ else
+ *dst_offset = rounddown_64(*dst_offset, block_size);
+ } while (range_overlaps(*src_offset, *dst_offset, *size) ||
+ *dst_offset + *size > max_range_end);
+}
int
test(void)
{
- unsigned long offset;
+ unsigned long offset, offset2;
unsigned long size;
unsigned long rv;
unsigned long op;
debug = 1;
if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
- prt("%lu...\n", testcalls);
+ prt("%lld...\n", testcalls);
if (replayopsf) {
struct log_entry log_entry;
op = log_entry.operation;
offset = log_entry.args[0];
size = log_entry.args[1];
+ offset2 = log_entry.args[2];
closeopen = !!(log_entry.flags & FL_CLOSE_OPEN);
keep_size = !!(log_entry.flags & FL_KEEP_SIZE);
goto have_op;
closeopen = (rv >> 3) < (1 << 28) / closeprob;
offset = random();
+ offset2 = 0;
size = maxoplen;
if (randomoplen)
size = random() % (maxoplen + 1);
if (zero_range_calls && size && keep_size_calls)
keep_size = random() % 2;
break;
+ case OP_CLONE_RANGE:
+ generate_dest_range(false, maxfilelen, &offset, &size, &offset2);
+ break;
+ case OP_DEDUPE_RANGE:
+ generate_dest_range(false, file_size, &offset, &size, &offset2);
+ break;
+ case OP_COPY_RANGE:
+ generate_dest_range(true, maxfilelen, &offset, &size, &offset2);
+ break;
}
have_op:
goto out;
}
break;
+ case OP_CLONE_RANGE:
+ if (!clone_range_calls) {
+ log5(op, offset, size, offset2, FL_SKIPPED);
+ goto out;
+ }
+ break;
+ case OP_DEDUPE_RANGE:
+ if (!dedupe_range_calls) {
+ log5(op, offset, size, offset2, FL_SKIPPED);
+ goto out;
+ }
+ break;
+ case OP_COPY_RANGE:
+ if (!copy_range_calls) {
+ log5(op, offset, size, offset2, FL_SKIPPED);
+ goto out;
+ }
+ break;
}
switch (op) {
do_punch_hole(offset, size);
break;
case OP_ZERO_RANGE:
- TRIM_OFF_LEN(offset, size, file_size);
+ TRIM_OFF_LEN(offset, size, maxfilelen);
do_zero_range(offset, size, keep_size);
break;
case OP_COLLAPSE_RANGE:
TRIM_OFF_LEN(offset, size, file_size - 1);
- offset = offset & ~(block_size - 1);
- size = size & ~(block_size - 1);
+ offset = rounddown_64(offset, block_size);
+ size = rounddown_64(size, block_size);
if (size == 0) {
log4(OP_COLLAPSE_RANGE, offset, size, FL_SKIPPED);
goto out;
case OP_INSERT_RANGE:
TRIM_OFF(offset, file_size);
TRIM_LEN(file_size, size, maxfilelen);
- offset = offset & ~(block_size - 1);
- size = size & ~(block_size - 1);
+ offset = rounddown_64(offset, block_size);
+ size = rounddown_64(size, block_size);
if (size == 0) {
log4(OP_INSERT_RANGE, offset, size, FL_SKIPPED);
goto out;
do_insert_range(offset, size);
break;
+ case OP_CLONE_RANGE:
+ if (size == 0) {
+ log5(OP_CLONE_RANGE, offset, size, offset2, FL_SKIPPED);
+ goto out;
+ }
+ if (offset2 + size > maxfilelen) {
+ log5(OP_CLONE_RANGE, offset, size, offset2, FL_SKIPPED);
+ goto out;
+ }
+
+ do_clone_range(offset, size, offset2);
+ break;
+ case OP_DEDUPE_RANGE:
+ if (size == 0) {
+ log5(OP_DEDUPE_RANGE, offset, size, offset2, FL_SKIPPED);
+ goto out;
+ }
+ if (offset2 + size > maxfilelen) {
+ log5(OP_DEDUPE_RANGE, offset, size, offset2, FL_SKIPPED);
+ goto out;
+ }
+
+ do_dedupe_range(offset, size, offset2);
+ break;
+ case OP_COPY_RANGE:
+ if (size == 0) {
+ log5(OP_COPY_RANGE, offset, size, offset2, FL_SKIPPED);
+ goto out;
+ }
+ if (offset2 + size > maxfilelen) {
+ log5(OP_COPY_RANGE, offset, size, offset2, FL_SKIPPED);
+ goto out;
+ }
+
+ do_copy_range(offset, size, offset2);
+ break;
case OP_FSYNC:
dofsync();
break;
check_contents();
out:
- if (sizechecks && testcalls > simulatedopcount)
- check_size();
if (closeopen)
docloseopen();
+ if (sizechecks && testcalls > simulatedopcount)
+ check_size();
return 1;
}
usage(void)
{
fprintf(stdout, "usage: %s",
- "fsx [-dknqxAFLOWZ] [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
+ "fsx [-dknqxBEFJLOWZ][-A|-U] [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
-b opnum: beginning operation number (default 1)\n\
-c P: 1 in P chance of file close+open at each op (default infinity)\n\
-d: debug output for all operations\n\
-y synchronize changes to a file\n"
#ifdef AIO
-" -A: Use the AIO system calls\n"
+" -A: Use the AIO system calls, -A excludes -U\n"
#endif
+#ifdef URING
+" -U: Use the IO_URING system calls, -U excludes -A\n"
+ #endif
" -D startingop: debug output starting at specified operation\n"
#ifdef HAVE_LINUX_FALLOC_H
" -F: Do not use fallocate (preallocation) calls\n"
#ifdef FALLOC_FL_INSERT_RANGE
" -I: Do not use insert range calls\n"
#endif
+#ifdef FICLONERANGE
+" -J: Do not use clone range calls\n"
+#endif
+#ifdef FIDEDUPERANGE
+" -B: Do not use dedupe range calls\n"
+#endif
+#ifdef HAVE_COPY_FILE_RANGE
+" -E: Do not use copy range calls\n"
+#endif
" -L: fsxLite - no file creations & no file size changes\n\
-N numops: total # operations to do (default infinity)\n\
-O: use oplen (see -o flag) for every op (default random)\n\
}
-int
+long long
getnum(char *s, char **e)
{
- int ret;
+ long long ret;
*e = (char *) 0;
- ret = strtol(s, e, 0);
+ ret = strtoll(s, e, 0);
if (*e)
switch (**e) {
case 'b':
io_context_t io_ctx;
struct iocb iocb;
-int aio_setup()
+int
+aio_setup()
{
int ret;
ret = io_queue_init(QSZ, &io_ctx);
}
int
-__aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
{
struct io_event event;
static struct timespec ts;
errno = -ret;
return -1;
}
+#else
+aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+{
+ fprintf(stderr, "io_rw: need AIO support!\n");
+ exit(111);
+}
+#endif
+
+#ifdef URING
+
+struct io_uring ring;
+#define URING_ENTRIES 1024
+
+int
+uring_setup()
+{
+ int ret;
+
+ ret = io_uring_queue_init(URING_ENTRIES, &ring, 0);
+ if (ret != 0) {
+ fprintf(stderr, "uring_setup: io_uring_queue_init failed: %s\n",
+ strerror(ret));
+ return -1;
+ }
+ return 0;
+}
-int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+int
+uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ struct iovec iovec;
+ int ret;
+ int res = 0;
+ char *p = buf;
+ unsigned l = len;
+ unsigned o = offset;
+
+ /*
+ * Due to io_uring tries non-blocking IOs (especially read), that
+ * always cause 'normal' short reading. To avoid this short read
+ * fail, try to loop read/write (escpecilly read) data.
+ */
+ while (l > 0) {
+ sqe = io_uring_get_sqe(&ring);
+ if (!sqe) {
+ fprintf(stderr, "uring_rw: io_uring_get_sqe failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ iovec.iov_base = p;
+ iovec.iov_len = l;
+ if (rw == READ) {
+ io_uring_prep_readv(sqe, fd, &iovec, 1, o);
+ } else {
+ io_uring_prep_writev(sqe, fd, &iovec, 1, o);
+ }
+
+ ret = io_uring_submit_and_wait(&ring, 1);
+ if (ret != 1) {
+ fprintf(stderr, "errcode=%d\n", -ret);
+ fprintf(stderr, "uring %s: io_uring_submit failed: %s\n",
+ rw == READ ? "read":"write", strerror(-ret));
+ goto uring_error;
+ }
+
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret != 0) {
+ fprintf(stderr, "errcode=%d\n", -ret);
+ fprintf(stderr, "uring %s: io_uring_wait_cqe failed: %s\n",
+ rw == READ ? "read":"write", strerror(-ret));
+ goto uring_error;
+ }
+
+ ret = cqe->res;
+ io_uring_cqe_seen(&ring, cqe);
+
+ if (ret > 0) {
+ o += ret;
+ l -= ret;
+ p += ret;
+ res += ret;
+ } else if (ret < 0) {
+ fprintf(stderr, "errcode=%d\n", -ret);
+ fprintf(stderr, "uring %s: io_uring failed: %s\n",
+ rw == READ ? "read":"write", strerror(-ret));
+ goto uring_error;
+ } else {
+ fprintf(stderr, "uring %s bad io length: %d instead of %u\n",
+ rw == READ ? "read":"write", res, len);
+ break;
+ }
+ }
+ return res;
+
+ uring_error:
+ /*
+ * The caller expects error return in traditional libc
+ * convention, i.e. -1 and the errno set to error.
+ */
+ errno = -ret;
+ return -1;
+}
+#else
+int
+uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+{
+ fprintf(stderr, "io_rw: need IO_URING support!\n");
+ exit(111);
+}
+#endif
+
+int
+fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
{
int ret;
if (aio) {
- ret = __aio_rw(rw, fd, buf, len, offset);
+ ret = aio_rw(rw, fd, buf, len, offset);
+ } else if (uring) {
+ ret = uring_rw(rw, fd, buf, len, offset);
} else {
if (rw == READ)
ret = read(fd, buf, len);
return ret;
}
-#endif
-
#define test_fallocate(mode) __test_fallocate(mode, #mode)
int
#ifdef HAVE_LINUX_FALLOC_H
int ret = 0;
if (!lite) {
- if (fallocate(fd, mode, file_size, 1) && errno == EOPNOTSUPP) {
+ if (fallocate(fd, mode, file_size, 1) &&
+ (errno == ENOSYS || errno == EOPNOTSUPP)) {
if(!quiet)
fprintf(stderr,
"main: filesystem does not support "
setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
while ((ch = getopt_long(argc, argv,
- "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyAD:FKHzCILN:OP:RS:WXZ",
+ "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
longopts, NULL)) != EOF)
switch (ch) {
case 'b':
simulatedopcount = getnum(optarg, &endp);
if (!quiet)
- prt("Will begin at operation %ld\n", simulatedopcount);
+ prt("Will begin at operation %lld\n",
+ simulatedopcount);
if (simulatedopcount == 0)
usage();
simulatedopcount -= 1;
do_fsync = 1;
break;
case 'A':
- aio = 1;
+ aio = 1;
+ break;
+ case 'U':
+ uring = 1;
break;
case 'D':
debugstart = getnum(optarg, &endp);
case 'I':
insert_range_calls = 0;
break;
+ case 'J':
+ clone_range_calls = 0;
+ break;
+ case 'B':
+ dedupe_range_calls = 0;
+ break;
+ case 'E':
+ copy_range_calls = 0;
+ break;
case 'L':
lite = 1;
o_flags &= ~(O_CREAT|O_TRUNC);
randomoplen = 0;
break;
case 'P':
- strncpy(dname, optarg, sizeof(dname));
- strcat(dname, "/");
+ snprintf(dname, sizeof(dname), "%s/", optarg);
dirpath = strlen(dname);
break;
case 'R':
break;
case 255: /* --record-ops */
if (optarg)
- strncpy(opsfile, optarg, sizeof(opsfile));
+ snprintf(opsfile, sizeof(opsfile), "%s", optarg);
recordops = opsfile;
break;
case 256: /* --replay-ops */
if (argc != 1)
usage();
+ if (aio && uring) {
+ fprintf(stderr, "-A and -U shouldn't be used together\n");
+ usage();
+ }
+
if (integrity && !dirpath) {
fprintf(stderr, "option -i <logdev> requires -P <dirpath>\n");
usage();
if (aio)
aio_setup();
#endif
+#ifdef URING
+ if (uring)
+ uring_setup();
+#endif
if (!(o_flags & O_TRUNC)) {
off_t ret;
collapse_range_calls = test_fallocate(FALLOC_FL_COLLAPSE_RANGE);
if (insert_range_calls)
insert_range_calls = test_fallocate(FALLOC_FL_INSERT_RANGE);
+ if (clone_range_calls)
+ clone_range_calls = test_clone_range();
+ if (dedupe_range_calls)
+ dedupe_range_calls = test_dedupe_range();
+ if (copy_range_calls)
+ copy_range_calls = test_copy_range();
while (numops == -1 || numops--)
if (!test())
prterr("close");
report_failure(99);
}
- prt("All %lu operations completed A-OK!\n", testcalls);
+ prt("All %lld operations completed A-OK!\n", testcalls);
if (recordops)
logdump();