fsx/fsstress: round blocksize properly
[xfstests-dev.git] / ltp / fsx.c
1 /*
2  *      Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
3  *
4  *      File:   fsx.c
5  *      Author: Avadis Tevanian, Jr.
6  *
7  *      File system exerciser. 
8  *
9  *      Rewritten 8/98 by Conrad Minshall.
10  *
11  *      Small changes to work under Linux -- davej.
12  *
13  *      Checks for mmap last-page zero fill.
14  */
15
16 #include "global.h"
17
18 #include <limits.h>
19 #include <time.h>
20 #include <strings.h>
21 #include <sys/file.h>
22 #include <sys/mman.h>
23 #include <stdbool.h>
24 #ifdef HAVE_ERR_H
25 #include <err.h>
26 #endif
27 #include <signal.h>
28 #include <stdio.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <stdarg.h>
33 #include <errno.h>
34 #ifdef AIO
35 #include <libaio.h>
36 #endif
37 #ifdef URING
38 #include <liburing.h>
39 #endif
40 #include <sys/syscall.h>
41
42 #ifndef MAP_FILE
43 # define MAP_FILE 0
44 #endif
45
46 #define NUMPRINTCOLUMNS 32      /* # columns of data to print on each line */
47
48 /* Operation flags */
49
50 enum opflags { FL_NONE = 0, FL_SKIPPED = 1, FL_CLOSE_OPEN = 2, FL_KEEP_SIZE = 4 };
51
52 /*
53  *      A log entry is an operation and a bunch of arguments.
54  */
55
56 struct log_entry {
57         int     operation;
58         int     nr_args;
59         int     args[4];
60         enum opflags flags;
61 };
62
63 #define LOGSIZE 10000
64
65 struct log_entry        oplog[LOGSIZE]; /* the log */
66 int                     logptr = 0;     /* current position in log */
67 int                     logcount = 0;   /* total ops */
68
69 /*
70  * The operation matrix is complex due to conditional execution of different
71  * features. Hence when we come to deciding what operation to run, we need to
72  * be careful in how we select the different operations. The active operations
73  * are mapped to numbers as follows:
74  *
75  *                      lite    !lite   integrity
76  * READ:                0       0       0
77  * WRITE:               1       1       1
78  * MAPREAD:             2       2       2
79  * MAPWRITE:            3       3       3
80  * TRUNCATE:            -       4       4
81  * FALLOCATE:           -       5       5
82  * PUNCH HOLE:          -       6       6
83  * ZERO RANGE:          -       7       7
84  * COLLAPSE RANGE:      -       8       8
85  * FSYNC:               -       -       9
86  *
87  * When mapped read/writes are disabled, they are simply converted to normal
88  * reads and writes. When fallocate/fpunch calls are disabled, they are
89  * skipped.
90  *
91  * Because of the "lite" version, we also need to have different "maximum
92  * operation" defines to allow the ops to be selected correctly based on the
93  * mode being run.
94  */
95
96 enum {
97         /* common operations */
98         OP_READ = 0,
99         OP_WRITE,
100         OP_MAPREAD,
101         OP_MAPWRITE,
102         OP_MAX_LITE,
103
104         /* !lite operations */
105         OP_TRUNCATE = OP_MAX_LITE,
106         OP_FALLOCATE,
107         OP_PUNCH_HOLE,
108         OP_ZERO_RANGE,
109         OP_COLLAPSE_RANGE,
110         OP_INSERT_RANGE,
111         OP_CLONE_RANGE,
112         OP_DEDUPE_RANGE,
113         OP_COPY_RANGE,
114         OP_MAX_FULL,
115
116         /* integrity operations */
117         OP_FSYNC = OP_MAX_FULL,
118         OP_MAX_INTEGRITY,
119 };
120
121 #undef PAGE_SIZE
122 #define PAGE_SIZE       getpagesize()
123 #undef PAGE_MASK
124 #define PAGE_MASK       (PAGE_SIZE - 1)
125
126 char    *original_buf;                  /* a pointer to the original data */
127 char    *good_buf;                      /* a pointer to the correct data */
128 char    *temp_buf;                      /* a pointer to the current data */
129 char    *fname;                         /* name of our test file */
130 char    *bname;                         /* basename of our test file */
131 char    *logdev;                        /* -i flag */
132 char    *logid;                         /* -j flag */
133 char    dname[1024];                    /* -P flag */
134 char    goodfile[PATH_MAX];
135 int     dirpath = 0;                    /* -P flag */
136 int     fd;                             /* fd for our test file */
137
138 blksize_t       block_size = 0;
139 off_t           file_size = 0;
140 off_t           biggest = 0;
141 long long       testcalls = 0;          /* calls to function "test" */
142
143 long long       simulatedopcount = 0;   /* -b flag */
144 int     closeprob = 0;                  /* -c flag */
145 int     debug = 0;                      /* -d flag */
146 long long       debugstart = 0;         /* -D flag */
147 char    filldata = 0;                   /* -g flag */
148 int     flush = 0;                      /* -f flag */
149 int     do_fsync = 0;                   /* -y flag */
150 unsigned long   maxfilelen = 256 * 1024;        /* -l flag */
151 int     sizechecks = 1;                 /* -n flag disables them */
152 int     maxoplen = 64 * 1024;           /* -o flag */
153 int     quiet = 0;                      /* -q flag */
154 long long       progressinterval = 0;   /* -p flag */
155 int     readbdy = 1;                    /* -r flag */
156 int     style = 0;                      /* -s flag */
157 int     prealloc = 0;                   /* -x flag */
158 int     truncbdy = 1;                   /* -t flag */
159 int     writebdy = 1;                   /* -w flag */
160 long    monitorstart = -1;              /* -m flag */
161 long    monitorend = -1;                /* -m flag */
162 int     lite = 0;                       /* -L flag */
163 long long numops = -1;                  /* -N flag */
164 int     randomoplen = 1;                /* -O flag disables it */
165 int     seed = 1;                       /* -S flag */
166 int     mapped_writes = 1;              /* -W flag disables */
167 int     fallocate_calls = 1;            /* -F flag disables */
168 int     keep_size_calls = 1;            /* -K flag disables */
169 int     punch_hole_calls = 1;           /* -H flag disables */
170 int     zero_range_calls = 1;           /* -z flag disables */
171 int     collapse_range_calls = 1;       /* -C flag disables */
172 int     insert_range_calls = 1;         /* -I flag disables */
173 int     mapped_reads = 1;               /* -R flag disables it */
174 int     check_file = 0;                 /* -X flag enables */
175 int     clone_range_calls = 1;          /* -J flag disables */
176 int     dedupe_range_calls = 1;         /* -B flag disables */
177 int     copy_range_calls = 1;           /* -E flag disables */
178 int     integrity = 0;                  /* -i flag */
179 int     fsxgoodfd = 0;
180 int     o_direct;                       /* -Z */
181 int     aio = 0;
182 int     uring = 0;
183 int     mark_nr = 0;
184
185 int page_size;
186 int page_mask;
187 int mmap_mask;
188 int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
189 #define READ 0
190 #define WRITE 1
191 #define fsxread(a,b,c,d)        fsx_rw(READ, a,b,c,d)
192 #define fsxwrite(a,b,c,d)       fsx_rw(WRITE, a,b,c,d)
193
194 const char *replayops = NULL;
195 const char *recordops = NULL;
196 FILE *  fsxlogf = NULL;
197 FILE *  replayopsf = NULL;
198 char opsfile[PATH_MAX];
199 int badoff = -1;
200 int closeopen = 0;
201
202 static void *round_ptr_up(void *ptr, unsigned long align, unsigned long offset)
203 {
204         unsigned long ret = (unsigned long)ptr;
205
206         ret = roundup_64(ret, align);
207         ret += offset;
208         return (void *)ret;
209 }
210
211 void
212 vwarnc(int code, const char *fmt, va_list ap)
213 {
214         if (logid)
215                 fprintf(stderr, "%s: ", logid);
216         fprintf(stderr, "fsx: ");
217         if (fmt != NULL) {
218                 vfprintf(stderr, fmt, ap);
219                 fprintf(stderr, ": ");
220         }
221         fprintf(stderr, "%s\n", strerror(code));
222 }
223
224 void
225 warn(const char * fmt, ...)  {
226         va_list ap;
227         va_start(ap, fmt);
228         vwarnc(errno, fmt, ap);
229         va_end(ap);
230 }
231
232 void
233 prt(const char *fmt, ...)
234 {
235         va_list args;
236
237         if (logid)
238                 fprintf(stdout, "%s: ", logid);
239         va_start(args, fmt);
240         vfprintf(stdout, fmt, args);
241         va_end(args);
242         if (fsxlogf) {
243                 va_start(args, fmt);
244                 vfprintf(fsxlogf, fmt, args);
245                 va_end(args);
246         }
247 }
248
249 void
250 prterr(const char *prefix)
251 {
252         prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno));
253 }
254
255
256 static const char *op_names[] = {
257         [OP_READ] = "read",
258         [OP_WRITE] = "write",
259         [OP_MAPREAD] = "mapread",
260         [OP_MAPWRITE] = "mapwrite",
261         [OP_TRUNCATE] = "truncate",
262         [OP_FALLOCATE] = "fallocate",
263         [OP_PUNCH_HOLE] = "punch_hole",
264         [OP_ZERO_RANGE] = "zero_range",
265         [OP_COLLAPSE_RANGE] = "collapse_range",
266         [OP_INSERT_RANGE] = "insert_range",
267         [OP_CLONE_RANGE] = "clone_range",
268         [OP_DEDUPE_RANGE] = "dedupe_range",
269         [OP_COPY_RANGE] = "copy_range",
270         [OP_FSYNC] = "fsync",
271 };
272
273 static const char *op_name(int operation)
274 {
275         if (operation >= 0 &&
276             operation < sizeof(op_names) / sizeof(op_names[0]))
277                 return op_names[operation];
278         return NULL;
279 }
280
281 static int op_code(const char *name)
282 {
283         int i;
284
285         for (i = 0; i < sizeof(op_names) / sizeof(op_names[0]); i++)
286                 if (op_names[i] && strcmp(name, op_names[i]) == 0)
287                         return i;
288         return -1;
289 }
290
291 void
292 log5(int operation, int arg0, int arg1, int arg2, enum opflags flags)
293 {
294         struct log_entry *le;
295
296         le = &oplog[logptr];
297         le->operation = operation;
298         if (closeopen)
299                 flags |= FL_CLOSE_OPEN;
300         le->args[0] = arg0;
301         le->args[1] = arg1;
302         le->args[2] = arg2;
303         le->args[3] = file_size;
304         le->nr_args = 4;
305         le->flags = flags;
306         logptr++;
307         logcount++;
308         if (logptr >= LOGSIZE)
309                 logptr = 0;
310 }
311
312 void
313 log4(int operation, int arg0, int arg1, enum opflags flags)
314 {
315         struct log_entry *le;
316
317         le = &oplog[logptr];
318         le->operation = operation;
319         if (closeopen)
320                 flags |= FL_CLOSE_OPEN;
321         le->args[0] = arg0;
322         le->args[1] = arg1;
323         le->args[2] = file_size;
324         le->nr_args = 3;
325         le->flags = flags;
326         logptr++;
327         logcount++;
328         if (logptr >= LOGSIZE)
329                 logptr = 0;
330 }
331
332 void
333 logdump(void)
334 {
335         FILE    *logopsf;
336         int     i, count, down;
337         struct log_entry        *lp;
338
339         prt("LOG DUMP (%d total operations):\n", logcount);
340
341         logopsf = fopen(opsfile, "w");
342         if (!logopsf)
343                 prterr(opsfile);
344
345         if (logcount < LOGSIZE) {
346                 i = 0;
347                 count = logcount;
348         } else {
349                 i = logptr;
350                 count = LOGSIZE;
351         }
352         for ( ; count > 0; count--) {
353                 bool overlap, overlap2;
354                 int opnum;
355
356                 opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE;
357                 prt("%d(%3d mod 256): ", opnum, opnum%256);
358                 lp = &oplog[i];
359
360                 overlap = badoff >= lp->args[0] &&
361                           badoff < lp->args[0] + lp->args[1];
362
363                 if (lp->flags & FL_SKIPPED) {
364                         prt("SKIPPED (no operation)");
365                         goto skipped;
366                 }
367
368                 switch (lp->operation) {
369                 case OP_MAPREAD:
370                         prt("MAPREAD  0x%x thru 0x%x\t(0x%x bytes)",
371                             lp->args[0], lp->args[0] + lp->args[1] - 1,
372                             lp->args[1]);
373                         if (overlap)
374                                 prt("\t***RRRR***");
375                         break;
376                 case OP_MAPWRITE:
377                         prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)",
378                             lp->args[0], lp->args[0] + lp->args[1] - 1,
379                             lp->args[1]);
380                         if (overlap)
381                                 prt("\t******WWWW");
382                         break;
383                 case OP_READ:
384                         prt("READ     0x%x thru 0x%x\t(0x%x bytes)",
385                             lp->args[0], lp->args[0] + lp->args[1] - 1,
386                             lp->args[1]);
387                         if (overlap)
388                                 prt("\t***RRRR***");
389                         break;
390                 case OP_WRITE:
391                         prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
392                             lp->args[0], lp->args[0] + lp->args[1] - 1,
393                             lp->args[1]);
394                         if (lp->args[0] > lp->args[2])
395                                 prt(" HOLE");
396                         else if (lp->args[0] + lp->args[1] > lp->args[2])
397                                 prt(" EXTEND");
398                         overlap = (badoff >= lp->args[0] ||
399                                    badoff >=lp->args[2]) &&
400                                   badoff < lp->args[0] + lp->args[1];
401                         if (overlap)
402                                 prt("\t***WWWW");
403                         break;
404                 case OP_TRUNCATE:
405                         down = lp->args[1] < lp->args[2];
406                         prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
407                             down ? "DOWN" : "UP", lp->args[2], lp->args[1]);
408                         overlap = badoff >= lp->args[1 + !down] &&
409                                   badoff < lp->args[1 + !!down];
410                         if (overlap)
411                                 prt("\t******WWWW");
412                         break;
413                 case OP_FALLOCATE:
414                         /* 0: offset 1: length 2: where alloced */
415                         prt("FALLOC   0x%x thru 0x%x\t(0x%x bytes) ",
416                                 lp->args[0], lp->args[0] + lp->args[1],
417                                 lp->args[1]);
418                         if (lp->args[0] + lp->args[1] <= lp->args[2])
419                                 prt("INTERIOR");
420                         else if (lp->flags & FL_KEEP_SIZE)
421                                 prt("PAST_EOF");
422                         else
423                                 prt("EXTENDING");
424                         if (overlap)
425                                 prt("\t******FFFF");
426                         break;
427                 case OP_PUNCH_HOLE:
428                         prt("PUNCH    0x%x thru 0x%x\t(0x%x bytes)",
429                             lp->args[0], lp->args[0] + lp->args[1] - 1,
430                             lp->args[1]);
431                         if (overlap)
432                                 prt("\t******PPPP");
433                         break;
434                 case OP_ZERO_RANGE:
435                         prt("ZERO     0x%x thru 0x%x\t(0x%x bytes)",
436                             lp->args[0], lp->args[0] + lp->args[1] - 1,
437                             lp->args[1]);
438                         if (overlap)
439                                 prt("\t******ZZZZ");
440                         break;
441                 case OP_COLLAPSE_RANGE:
442                         prt("COLLAPSE 0x%x thru 0x%x\t(0x%x bytes)",
443                             lp->args[0], lp->args[0] + lp->args[1] - 1,
444                             lp->args[1]);
445                         if (overlap)
446                                 prt("\t******CCCC");
447                         break;
448                 case OP_INSERT_RANGE:
449                         prt("INSERT 0x%x thru 0x%x\t(0x%x bytes)",
450                             lp->args[0], lp->args[0] + lp->args[1] - 1,
451                             lp->args[1]);
452                         if (overlap)
453                                 prt("\t******IIII");
454                         break;
455                 case OP_CLONE_RANGE:
456                         prt("CLONE 0x%x thru 0x%x\t(0x%x bytes) to 0x%x thru 0x%x",
457                             lp->args[0], lp->args[0] + lp->args[1] - 1,
458                             lp->args[1],
459                             lp->args[2], lp->args[2] + lp->args[1] - 1);
460                         overlap2 = badoff >= lp->args[2] &&
461                                   badoff < lp->args[2] + lp->args[1];
462                         if (overlap && overlap2)
463                                 prt("\tJJJJ**JJJJ");
464                         else if (overlap)
465                                 prt("\tJJJJ******");
466                         else if (overlap2)
467                                 prt("\t******JJJJ");
468                         break;
469                 case OP_DEDUPE_RANGE:
470                         prt("DEDUPE 0x%x thru 0x%x\t(0x%x bytes) to 0x%x thru 0x%x",
471                             lp->args[0], lp->args[0] + lp->args[1] - 1,
472                             lp->args[1],
473                             lp->args[2], lp->args[2] + lp->args[1] - 1);
474                         overlap2 = badoff >= lp->args[2] &&
475                                   badoff < lp->args[2] + lp->args[1];
476                         if (overlap && overlap2)
477                                 prt("\tBBBB**BBBB");
478                         else if (overlap)
479                                 prt("\tBBBB******");
480                         else if (overlap2)
481                                 prt("\t******BBBB");
482                         break;
483                 case OP_COPY_RANGE:
484                         prt("COPY 0x%x thru 0x%x\t(0x%x bytes) to 0x%x thru 0x%x",
485                             lp->args[0], lp->args[0] + lp->args[1] - 1,
486                             lp->args[1],
487                             lp->args[2], lp->args[2] + lp->args[1] - 1);
488                         overlap2 = badoff >= lp->args[2] &&
489                                   badoff < lp->args[2] + lp->args[1];
490                         if (overlap && overlap2)
491                                 prt("\tEEEE**EEEE");
492                         else if (overlap)
493                                 prt("\tEEEE******");
494                         else if (overlap2)
495                                 prt("\t******EEEE");
496                         break;
497                 case OP_FSYNC:
498                         prt("FSYNC");
499                         break;
500                 default:
501                         prt("BOGUS LOG ENTRY (operation code = %d)!",
502                             lp->operation);
503                         continue;
504                 }
505
506             skipped:
507                 if (lp->flags & FL_CLOSE_OPEN)
508                         prt("\n\t\tCLOSE/OPEN");
509                 prt("\n");
510                 i++;
511                 if (i == LOGSIZE)
512                         i = 0;
513
514                 if (logopsf) {
515                         int j;
516
517                         if (lp->flags & FL_SKIPPED)
518                                 fprintf(logopsf, "skip ");
519                         fprintf(logopsf, "%s", op_name(lp->operation));
520                         for (j = 0; j < lp->nr_args; j++)
521                                 fprintf(logopsf, " 0x%x", lp->args[j]);
522                         if (lp->flags & FL_KEEP_SIZE)
523                                 fprintf(logopsf, " keep_size");
524                         if (lp->flags & FL_CLOSE_OPEN)
525                                 fprintf(logopsf, " close_open");
526                         if (overlap)
527                                 fprintf(logopsf, " *");
528                         fprintf(logopsf, "\n");
529                 }
530         }
531
532         if (logopsf) {
533                 if (fclose(logopsf) != 0)
534                         prterr(opsfile);
535                 else
536                         prt("Log of operations saved to \"%s\"; "
537                             "replay with --replay-ops\n",
538                             opsfile);
539         }
540 }
541
542
543 void
544 save_buffer(char *buffer, off_t bufferlength, int fd)
545 {
546         off_t ret;
547         ssize_t byteswritten;
548
549         if (fd <= 0 || bufferlength == 0)
550                 return;
551
552         if (bufferlength > SSIZE_MAX) {
553                 prt("fsx flaw: overflow in save_buffer\n");
554                 exit(67);
555         }
556         if (lite) {
557                 off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END);
558                 if (size_by_seek == (off_t)-1)
559                         prterr("save_buffer: lseek eof");
560                 else if (bufferlength > size_by_seek) {
561                         warn("save_buffer: .fsxgood file too short... will save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
562                              (unsigned long long)bufferlength);
563                         bufferlength = size_by_seek;
564                 }
565         }
566
567         ret = lseek(fd, (off_t)0, SEEK_SET);
568         if (ret == (off_t)-1)
569                 prterr("save_buffer: lseek 0");
570         
571         byteswritten = write(fd, buffer, (size_t)bufferlength);
572         if (byteswritten != bufferlength) {
573                 if (byteswritten == -1)
574                         prterr("save_buffer write");
575                 else
576                         warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
577                              (unsigned)byteswritten,
578                              (unsigned long long)bufferlength);
579         }
580 }
581
582
583 void
584 report_failure(int status)
585 {
586         logdump();
587         
588         if (fsxgoodfd) {
589                 if (good_buf) {
590                         save_buffer(good_buf, file_size, fsxgoodfd);
591                         prt("Correct content saved for comparison\n");
592                         prt("(maybe hexdump \"%s\" vs \"%s\")\n",
593                             fname, goodfile);
594                 }
595                 close(fsxgoodfd);
596         }
597         exit(status);
598 }
599
600
601 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
602                                         *(((unsigned char *)(cp)) + 1)))
603
604 void
605 mark_log(void)
606 {
607         char command[256];
608         int ret;
609
610         snprintf(command, 256, "dmsetup message %s 0 mark %s.mark%d", logdev,
611                  bname, mark_nr);
612         ret = system(command);
613         if (ret) {
614                 prterr("dmsetup mark failed");
615                 exit(211);
616         }
617 }
618
619 void
620 dump_fsync_buffer(void)
621 {
622         char fname_buffer[PATH_MAX];
623         int good_fd;
624
625         if (!good_buf)
626                 return;
627
628         snprintf(fname_buffer, sizeof(fname_buffer), "%s%s.mark%d", dname,
629                  bname, mark_nr);
630         good_fd = open(fname_buffer, O_WRONLY|O_CREAT|O_TRUNC, 0666);
631         if (good_fd < 0) {
632                 prterr(fname_buffer);
633                 exit(212);
634         }
635
636         save_buffer(good_buf, file_size, good_fd);
637         close(good_fd);
638         prt("Dumped fsync buffer to %s\n", fname_buffer + dirpath);
639 }
640
641 void
642 check_buffers(char *buf, unsigned offset, unsigned size)
643 {
644         unsigned char c, t;
645         unsigned i = 0;
646         unsigned n = 0;
647         unsigned op = 0;
648         unsigned bad = 0;
649
650         if (memcmp(good_buf + offset, buf, size) != 0) {
651                 prt("READ BAD DATA: offset = 0x%x, size = 0x%x, fname = %s\n",
652                     offset, size, fname);
653                 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
654                 while (size > 0) {
655                         c = good_buf[offset];
656                         t = buf[i];
657                         if (c != t) {
658                                 if (n < 16) {
659                                         bad = short_at(&buf[i]);
660                                         prt("0x%05x\t0x%04x\t0x%04x", offset,
661                                             short_at(&good_buf[offset]), bad);
662                                         op = buf[offset & 1 ? i+1 : i];
663                                         prt("\t0x%05x\n", n);
664                                         if (op)
665                                                 prt("operation# (mod 256) for "
666                                                   "the bad data may be %u\n",
667                                                 ((unsigned)op & 0xff));
668                                         else
669                                                 prt("operation# (mod 256) for "
670                                                   "the bad data unknown, check"
671                                                   " HOLE and EXTEND ops\n");
672                                 }
673                                 n++;
674                                 badoff = offset;
675                         }
676                         offset++;
677                         i++;
678                         size--;
679                 }
680                 report_failure(110);
681         }
682 }
683
684
685 void
686 check_size(void)
687 {
688         struct stat     statbuf;
689         off_t   size_by_seek;
690
691         if (fstat(fd, &statbuf)) {
692                 prterr("check_size: fstat");
693                 statbuf.st_size = -1;
694         }
695         size_by_seek = lseek(fd, (off_t)0, SEEK_END);
696         if (file_size != statbuf.st_size || file_size != size_by_seek) {
697                 prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n",
698                     (unsigned long long)file_size,
699                     (unsigned long long)statbuf.st_size,
700                     (unsigned long long)size_by_seek);
701                 report_failure(120);
702         }
703 }
704
705
706 void
707 check_trunc_hack(void)
708 {
709         struct stat statbuf;
710         off_t offset = file_size + (off_t)100000;
711
712         if (ftruncate(fd, file_size))
713                 goto ftruncate_err;
714         if (ftruncate(fd, offset))
715                 goto ftruncate_err;
716         fstat(fd, &statbuf);
717         if (statbuf.st_size != offset) {
718                 prt("no extend on truncate! not posix!\n");
719                 exit(130);
720         }
721         if (ftruncate(fd, file_size)) {
722 ftruncate_err:
723                 prterr("check_trunc_hack: ftruncate");
724                 exit(131);
725         }
726 }
727
728 void
729 doflush(unsigned offset, unsigned size)
730 {
731         unsigned pg_offset;
732         unsigned map_size;
733         char    *p;
734
735         if (o_direct == O_DIRECT)
736                 return;
737
738         pg_offset = offset & mmap_mask;
739         map_size  = pg_offset + size;
740
741         if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
742                               MAP_FILE | MAP_SHARED, fd,
743                               (off_t)(offset - pg_offset))) == (char *)-1) {
744                 prterr("doflush: mmap");
745                 report_failure(202);
746         }
747         if (msync(p, map_size, MS_INVALIDATE) != 0) {
748                 prterr("doflush: msync");
749                 report_failure(203);
750         }
751         if (munmap(p, map_size) != 0) {
752                 prterr("doflush: munmap");
753                 report_failure(204);
754         }
755 }
756
757 void
758 doread(unsigned offset, unsigned size)
759 {
760         off_t ret;
761         unsigned iret;
762
763         offset -= offset % readbdy;
764         if (o_direct)
765                 size -= size % readbdy;
766         if (size == 0) {
767                 if (!quiet && testcalls > simulatedopcount && !o_direct)
768                         prt("skipping zero size read\n");
769                 log4(OP_READ, offset, size, FL_SKIPPED);
770                 return;
771         }
772         if (size + offset > file_size) {
773                 if (!quiet && testcalls > simulatedopcount)
774                         prt("skipping seek/read past end of file\n");
775                 log4(OP_READ, offset, size, FL_SKIPPED);
776                 return;
777         }
778
779         log4(OP_READ, offset, size, FL_NONE);
780
781         if (testcalls <= simulatedopcount)
782                 return;
783
784         if (!quiet &&
785                 ((progressinterval && testcalls % progressinterval == 0)  ||
786                 (debug &&
787                        (monitorstart == -1 ||
788                         (offset + size > monitorstart &&
789                         (monitorend == -1 || offset <= monitorend))))))
790                 prt("%lld read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
791                     offset, offset + size - 1, size);
792         ret = lseek(fd, (off_t)offset, SEEK_SET);
793         if (ret == (off_t)-1) {
794                 prterr("doread: lseek");
795                 report_failure(140);
796         }
797         iret = fsxread(fd, temp_buf, size, offset);
798         if (iret != size) {
799                 if (iret == -1)
800                         prterr("doread: read");
801                 else
802                         prt("short read: 0x%x bytes instead of 0x%x\n",
803                             iret, size);
804                 report_failure(141);
805         }
806         check_buffers(temp_buf, offset, size);
807 }
808
809 void
810 check_eofpage(char *s, unsigned offset, char *p, int size)
811 {
812         unsigned long last_page, should_be_zero;
813
814         if (offset + size <= (file_size & ~page_mask))
815                 return;
816         /*
817          * we landed in the last page of the file
818          * test to make sure the VM system provided 0's 
819          * beyond the true end of the file mapping
820          * (as required by mmap def in 1996 posix 1003.1)
821          */
822         last_page = ((unsigned long)p + (offset & page_mask) + size) & ~page_mask;
823
824         for (should_be_zero = last_page + (file_size & page_mask);
825              should_be_zero < last_page + page_size;
826              should_be_zero++)
827                 if (*(char *)should_be_zero) {
828                         prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
829                             s, file_size - 1, should_be_zero & page_mask,
830                             short_at(should_be_zero));
831                         report_failure(205);
832                 }
833 }
834
835 void
836 check_contents(void)
837 {
838         static char *check_buf;
839         unsigned offset = 0;
840         unsigned size = file_size;
841         unsigned map_offset;
842         unsigned map_size;
843         char *p;
844         off_t ret;
845         unsigned iret;
846
847         if (!check_buf) {
848                 check_buf = (char *) malloc(maxfilelen + writebdy);
849                 assert(check_buf != NULL);
850                 check_buf = round_ptr_up(check_buf, writebdy, 0);
851                 memset(check_buf, '\0', maxfilelen);
852         }
853
854         if (o_direct)
855                 size -= size % readbdy;
856         if (size == 0)
857                 return;
858
859         ret = lseek(fd, (off_t)offset, SEEK_SET);
860         if (ret == (off_t)-1) {
861                 prterr("doread: lseek");
862                 report_failure(140);
863         }
864
865         iret = fsxread(fd, check_buf, size, offset);
866         if (iret != size) {
867                 if (iret == -1)
868                         prterr("check_contents: read");
869                 else
870                         prt("short check read: 0x%x bytes instead of 0x%x\n",
871                             iret, size);
872                 report_failure(141);
873         }
874         check_buffers(check_buf, offset, size);
875
876         /* Map eof page, check it */
877         map_offset = size - (size & PAGE_MASK);
878         if (map_offset == size)
879                 map_offset -= PAGE_SIZE;
880         map_size  = size - map_offset;
881
882         p = mmap(0, map_size, PROT_READ, MAP_SHARED, fd, map_offset);
883         if (p == MAP_FAILED) {
884                 prterr("check_contents: mmap");
885                 report_failure(190);
886         }
887         check_eofpage("check_contents", map_offset, p, map_size);
888
889         if (munmap(p, map_size) != 0) {
890                 prterr("check_contents: munmap");
891                 report_failure(191);
892         }
893 }
894
895 void
896 domapread(unsigned offset, unsigned size)
897 {
898         unsigned pg_offset;
899         unsigned map_size;
900         char    *p;
901
902         offset -= offset % readbdy;
903         if (size == 0) {
904                 if (!quiet && testcalls > simulatedopcount)
905                         prt("skipping zero size read\n");
906                 log4(OP_MAPREAD, offset, size, FL_SKIPPED);
907                 return;
908         }
909         if (size + offset > file_size) {
910                 if (!quiet && testcalls > simulatedopcount)
911                         prt("skipping seek/read past end of file\n");
912                 log4(OP_MAPREAD, offset, size, FL_SKIPPED);
913                 return;
914         }
915
916         log4(OP_MAPREAD, offset, size, FL_NONE);
917
918         if (testcalls <= simulatedopcount)
919                 return;
920
921         if (!quiet &&
922                 ((progressinterval && testcalls % progressinterval == 0) ||
923                        (debug &&
924                        (monitorstart == -1 ||
925                         (offset + size > monitorstart &&
926                         (monitorend == -1 || offset <= monitorend))))))
927                 prt("%lld mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
928                     offset, offset + size - 1, size);
929
930         pg_offset = offset & PAGE_MASK;
931         map_size  = pg_offset + size;
932
933         if ((p = (char *)mmap(0, map_size, PROT_READ, MAP_SHARED, fd,
934                               (off_t)(offset - pg_offset))) == (char *)-1) {
935                 prterr("domapread: mmap");
936                 report_failure(190);
937         }
938         memcpy(temp_buf, p + pg_offset, size);
939
940         check_eofpage("Read", offset, p, size);
941
942         if (munmap(p, map_size) != 0) {
943                 prterr("domapread: munmap");
944                 report_failure(191);
945         }
946
947         check_buffers(temp_buf, offset, size);
948 }
949
950
951 void
952 gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
953 {
954         while (size--) {
955                 if (filldata) {
956                         good_buf[offset] = filldata;
957                 } else {
958                         good_buf[offset] = testcalls % 256;
959                         if (offset % 2)
960                                 good_buf[offset] += original_buf[offset];
961                 }
962                 offset++;
963         }
964 }
965
966
967 void
968 dowrite(unsigned offset, unsigned size)
969 {
970         off_t ret;
971         unsigned iret;
972
973         offset -= offset % writebdy;
974         if (o_direct)
975                 size -= size % writebdy;
976         if (size == 0) {
977                 if (!quiet && testcalls > simulatedopcount && !o_direct)
978                         prt("skipping zero size write\n");
979                 log4(OP_WRITE, offset, size, FL_SKIPPED);
980                 return;
981         }
982
983         log4(OP_WRITE, offset, size, FL_NONE);
984
985         gendata(original_buf, good_buf, offset, size);
986         if (file_size < offset + size) {
987                 if (file_size < offset)
988                         memset(good_buf + file_size, '\0', offset - file_size);
989                 file_size = offset + size;
990                 if (lite) {
991                         warn("Lite file size bug in fsx!");
992                         report_failure(149);
993                 }
994         }
995
996         if (testcalls <= simulatedopcount)
997                 return;
998
999         if (!quiet &&
1000                 ((progressinterval && testcalls % progressinterval == 0) ||
1001                        (debug &&
1002                        (monitorstart == -1 ||
1003                         (offset + size > monitorstart &&
1004                         (monitorend == -1 || offset <= monitorend))))))
1005                 prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
1006                     offset, offset + size - 1, size);
1007         ret = lseek(fd, (off_t)offset, SEEK_SET);
1008         if (ret == (off_t)-1) {
1009                 prterr("dowrite: lseek");
1010                 report_failure(150);
1011         }
1012         iret = fsxwrite(fd, good_buf + offset, size, offset);
1013         if (iret != size) {
1014                 if (iret == -1)
1015                         prterr("dowrite: write");
1016                 else
1017                         prt("short write: 0x%x bytes instead of 0x%x\n",
1018                             iret, size);
1019                 report_failure(151);
1020         }
1021         if (do_fsync) {
1022                 if (fsync(fd)) {
1023                         prt("fsync() failed: %s\n", strerror(errno));
1024                         report_failure(152);
1025                 }
1026         }
1027         if (flush) {
1028                 doflush(offset, size);
1029         }
1030 }
1031
1032
1033 void
1034 domapwrite(unsigned offset, unsigned size)
1035 {
1036         unsigned pg_offset;
1037         unsigned map_size;
1038         off_t    cur_filesize;
1039         char    *p;
1040
1041         offset -= offset % writebdy;
1042         if (size == 0) {
1043                 if (!quiet && testcalls > simulatedopcount)
1044                         prt("skipping zero size write\n");
1045                 log4(OP_MAPWRITE, offset, size, FL_SKIPPED);
1046                 return;
1047         }
1048         cur_filesize = file_size;
1049
1050         log4(OP_MAPWRITE, offset, size, FL_NONE);
1051
1052         gendata(original_buf, good_buf, offset, size);
1053         if (file_size < offset + size) {
1054                 if (file_size < offset)
1055                         memset(good_buf + file_size, '\0', offset - file_size);
1056                 file_size = offset + size;
1057                 if (lite) {
1058                         warn("Lite file size bug in fsx!");
1059                         report_failure(200);
1060                 }
1061         }
1062
1063         if (testcalls <= simulatedopcount)
1064                 return;
1065
1066         if (!quiet &&
1067                 ((progressinterval && testcalls % progressinterval == 0) ||
1068                        (debug &&
1069                        (monitorstart == -1 ||
1070                         (offset + size > monitorstart &&
1071                         (monitorend == -1 || offset <= monitorend))))))
1072                 prt("%lld mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
1073                     offset, offset + size - 1, size);
1074
1075         if (file_size > cur_filesize) {
1076                 if (ftruncate(fd, file_size) == -1) {
1077                         prterr("domapwrite: ftruncate");
1078                         exit(201);
1079                 }
1080         }
1081         pg_offset = offset & PAGE_MASK;
1082         map_size  = pg_offset + size;
1083
1084         if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
1085                               MAP_FILE | MAP_SHARED, fd,
1086                               (off_t)(offset - pg_offset))) == (char *)-1) {
1087                 prterr("domapwrite: mmap");
1088                 report_failure(202);
1089         }
1090         memcpy(p + pg_offset, good_buf + offset, size);
1091         if (msync(p, map_size, MS_SYNC) != 0) {
1092                 prterr("domapwrite: msync");
1093                 report_failure(203);
1094         }
1095
1096         check_eofpage("Write", offset, p, size);
1097
1098         if (munmap(p, map_size) != 0) {
1099                 prterr("domapwrite: munmap");
1100                 report_failure(204);
1101         }
1102 }
1103
1104
1105 void
1106 dotruncate(unsigned size)
1107 {
1108         int oldsize = file_size;
1109
1110         size -= size % truncbdy;
1111         if (size > biggest) {
1112                 biggest = size;
1113                 if (!quiet && testcalls > simulatedopcount)
1114                         prt("truncating to largest ever: 0x%x\n", size);
1115         }
1116
1117         log4(OP_TRUNCATE, 0, size, FL_NONE);
1118
1119         if (size > file_size)
1120                 memset(good_buf + file_size, '\0', size - file_size);
1121         file_size = size;
1122
1123         if (testcalls <= simulatedopcount)
1124                 return;
1125
1126         if ((progressinterval && testcalls % progressinterval == 0) ||
1127             (debug && (monitorstart == -1 || monitorend == -1 ||
1128                       size <= monitorend)))
1129                 prt("%lld trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize,
1130                                 size);
1131         if (ftruncate(fd, (off_t)size) == -1) {
1132                 prt("ftruncate1: %x\n", size);
1133                 prterr("dotruncate: ftruncate");
1134                 report_failure(160);
1135         }
1136 }
1137
1138 #ifdef FALLOC_FL_PUNCH_HOLE
1139 void
1140 do_punch_hole(unsigned offset, unsigned length)
1141 {
1142         unsigned end_offset;
1143         int max_offset = 0;
1144         int max_len = 0;
1145         int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
1146
1147         if (length == 0) {
1148                 if (!quiet && testcalls > simulatedopcount)
1149                         prt("skipping zero length punch hole\n");
1150                 log4(OP_PUNCH_HOLE, offset, length, FL_SKIPPED);
1151                 return;
1152         }
1153
1154         if (file_size <= (loff_t)offset) {
1155                 if (!quiet && testcalls > simulatedopcount)
1156                         prt("skipping hole punch off the end of the file\n");
1157                 log4(OP_PUNCH_HOLE, offset, length, FL_SKIPPED);
1158                 return;
1159         }
1160
1161         end_offset = offset + length;
1162
1163         log4(OP_PUNCH_HOLE, offset, length, FL_NONE);
1164
1165         if (testcalls <= simulatedopcount)
1166                 return;
1167
1168         if ((progressinterval && testcalls % progressinterval == 0) ||
1169             (debug && (monitorstart == -1 || monitorend == -1 ||
1170                       end_offset <= monitorend))) {
1171                 prt("%lld punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
1172                         offset, offset+length, length);
1173         }
1174         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1175                 prt("punch hole: 0x%x to 0x%x\n", offset, offset + length);
1176                 prterr("do_punch_hole: fallocate");
1177                 report_failure(161);
1178         }
1179
1180
1181         max_offset = offset < file_size ? offset : file_size;
1182         max_len = max_offset + length <= file_size ? length :
1183                         file_size - max_offset;
1184         memset(good_buf + max_offset, '\0', max_len);
1185 }
1186
1187 #else
1188 void
1189 do_punch_hole(unsigned offset, unsigned length)
1190 {
1191         return;
1192 }
1193 #endif
1194
1195 #ifdef FALLOC_FL_ZERO_RANGE
1196 void
1197 do_zero_range(unsigned offset, unsigned length, int keep_size)
1198 {
1199         unsigned end_offset;
1200         int mode = FALLOC_FL_ZERO_RANGE;
1201
1202         if (keep_size)
1203                 mode |= FALLOC_FL_KEEP_SIZE;
1204
1205         if (length == 0) {
1206                 if (!quiet && testcalls > simulatedopcount)
1207                         prt("skipping zero length zero range\n");
1208                 log4(OP_ZERO_RANGE, offset, length, FL_SKIPPED |
1209                      (keep_size ? FL_KEEP_SIZE : FL_NONE));
1210                 return;
1211         }
1212
1213         end_offset = keep_size ? 0 : offset + length;
1214
1215         if (end_offset > biggest) {
1216                 biggest = end_offset;
1217                 if (!quiet && testcalls > simulatedopcount)
1218                         prt("zero_range to largest ever: 0x%x\n", end_offset);
1219         }
1220
1221         /*
1222          * last arg matches fallocate string array index in logdump:
1223          *      0: allocate past EOF
1224          *      1: extending prealloc
1225          *      2: interior prealloc
1226          */
1227         log4(OP_ZERO_RANGE, offset, length,
1228              keep_size ? FL_KEEP_SIZE : FL_NONE);
1229
1230         if (testcalls <= simulatedopcount)
1231                 return;
1232
1233         if ((progressinterval && testcalls % progressinterval == 0) ||
1234             (debug && (monitorstart == -1 || monitorend == -1 ||
1235                       end_offset <= monitorend))) {
1236                 prt("%lld zero\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
1237                         offset, offset+length, length);
1238         }
1239         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1240                 prt("zero range: 0x%x to 0x%x\n", offset, offset + length);
1241                 prterr("do_zero_range: fallocate");
1242                 report_failure(161);
1243         }
1244
1245         memset(good_buf + offset, '\0', length);
1246
1247         if (!keep_size && end_offset > file_size) {
1248                 /*
1249                  * If there's a gap between the old file size and the offset of
1250                  * the zero range operation, fill the gap with zeroes.
1251                  */
1252                 if (offset > file_size)
1253                         memset(good_buf + file_size, '\0', offset - file_size);
1254
1255                 file_size = end_offset;
1256         }
1257 }
1258
1259 #else
1260 void
1261 do_zero_range(unsigned offset, unsigned length, int keep_size)
1262 {
1263         return;
1264 }
1265 #endif
1266
1267 #ifdef FALLOC_FL_COLLAPSE_RANGE
1268 void
1269 do_collapse_range(unsigned offset, unsigned length)
1270 {
1271         unsigned end_offset;
1272         int mode = FALLOC_FL_COLLAPSE_RANGE;
1273
1274         if (length == 0) {
1275                 if (!quiet && testcalls > simulatedopcount)
1276                         prt("skipping zero length collapse range\n");
1277                 log4(OP_COLLAPSE_RANGE, offset, length, FL_SKIPPED);
1278                 return;
1279         }
1280
1281         end_offset = offset + length;
1282         if ((loff_t)end_offset >= file_size) {
1283                 if (!quiet && testcalls > simulatedopcount)
1284                         prt("skipping collapse range behind EOF\n");
1285                 log4(OP_COLLAPSE_RANGE, offset, length, FL_SKIPPED);
1286                 return;
1287         }
1288
1289         log4(OP_COLLAPSE_RANGE, offset, length, FL_NONE);
1290
1291         if (testcalls <= simulatedopcount)
1292                 return;
1293
1294         if ((progressinterval && testcalls % progressinterval == 0) ||
1295             (debug && (monitorstart == -1 || monitorend == -1 ||
1296                       end_offset <= monitorend))) {
1297                 prt("%lld collapse\tfrom 0x%x to 0x%x, (0x%x bytes)\n",
1298                                 testcalls, offset, offset+length, length);
1299         }
1300         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1301                 prt("collapse range: 0x%x to 0x%x\n", offset, offset + length);
1302                 prterr("do_collapse_range: fallocate");
1303                 report_failure(161);
1304         }
1305
1306         memmove(good_buf + offset, good_buf + end_offset,
1307                 file_size - end_offset);
1308         file_size -= length;
1309 }
1310
1311 #else
1312 void
1313 do_collapse_range(unsigned offset, unsigned length)
1314 {
1315         return;
1316 }
1317 #endif
1318
1319 #ifdef FALLOC_FL_INSERT_RANGE
1320 void
1321 do_insert_range(unsigned offset, unsigned length)
1322 {
1323         unsigned end_offset;
1324         int mode = FALLOC_FL_INSERT_RANGE;
1325
1326         if (length == 0) {
1327                 if (!quiet && testcalls > simulatedopcount)
1328                         prt("skipping zero length insert range\n");
1329                 log4(OP_INSERT_RANGE, offset, length, FL_SKIPPED);
1330                 return;
1331         }
1332
1333         if ((loff_t)offset >= file_size) {
1334                 if (!quiet && testcalls > simulatedopcount)
1335                         prt("skipping insert range behind EOF\n");
1336                 log4(OP_INSERT_RANGE, offset, length, FL_SKIPPED);
1337                 return;
1338         }
1339
1340         log4(OP_INSERT_RANGE, offset, length, FL_NONE);
1341
1342         if (testcalls <= simulatedopcount)
1343                 return;
1344
1345         end_offset = offset + length;
1346         if ((progressinterval && testcalls % progressinterval == 0) ||
1347             (debug && (monitorstart == -1 || monitorend == -1 ||
1348                       end_offset <= monitorend))) {
1349                 prt("%lld insert\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
1350                         offset, offset+length, length);
1351         }
1352         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1353                 prt("insert range: 0x%x to 0x%x\n", offset, offset + length);
1354                 prterr("do_insert_range: fallocate");
1355                 report_failure(161);
1356         }
1357
1358         memmove(good_buf + end_offset, good_buf + offset,
1359                 file_size - offset);
1360         memset(good_buf + offset, '\0', length);
1361         file_size += length;
1362 }
1363
1364 #else
1365 void
1366 do_insert_range(unsigned offset, unsigned length)
1367 {
1368         return;
1369 }
1370 #endif
1371
1372 #ifdef FICLONERANGE
1373 int
1374 test_clone_range(void)
1375 {
1376         struct file_clone_range fcr = {
1377                 .src_fd = fd,
1378         };
1379
1380         if (ioctl(fd, FICLONERANGE, &fcr) &&
1381             (errno == EOPNOTSUPP || errno == ENOTTY)) {
1382                 if (!quiet)
1383                         fprintf(stderr,
1384                                 "main: filesystem does not support "
1385                                 "clone range, disabling!\n");
1386                 return 0;
1387         }
1388
1389         return 1;
1390 }
1391
1392 void
1393 do_clone_range(unsigned offset, unsigned length, unsigned dest)
1394 {
1395         struct file_clone_range fcr = {
1396                 .src_fd = fd,
1397                 .src_offset = offset,
1398                 .src_length = length,
1399                 .dest_offset = dest,
1400         };
1401
1402         if (length == 0) {
1403                 if (!quiet && testcalls > simulatedopcount)
1404                         prt("skipping zero length clone range\n");
1405                 log5(OP_CLONE_RANGE, offset, length, dest, FL_SKIPPED);
1406                 return;
1407         }
1408
1409         if ((loff_t)offset >= file_size) {
1410                 if (!quiet && testcalls > simulatedopcount)
1411                         prt("skipping clone range behind EOF\n");
1412                 log5(OP_CLONE_RANGE, offset, length, dest, FL_SKIPPED);
1413                 return;
1414         }
1415
1416         if (dest + length > biggest) {
1417                 biggest = dest + length;
1418                 if (!quiet && testcalls > simulatedopcount)
1419                         prt("cloning to largest ever: 0x%x\n", dest + length);
1420         }
1421
1422         log5(OP_CLONE_RANGE, offset, length, dest, FL_NONE);
1423
1424         if (testcalls <= simulatedopcount)
1425                 return;
1426
1427         if ((progressinterval && testcalls % progressinterval == 0) ||
1428             (debug && (monitorstart == -1 || monitorend == -1 ||
1429                        dest <= monitorstart || dest + length <= monitorend))) {
1430                 prt("%lu clone\tfrom 0x%x to 0x%x, (0x%x bytes) at 0x%x\n",
1431                         testcalls, offset, offset+length, length, dest);
1432         }
1433
1434         if (ioctl(fd, FICLONERANGE, &fcr) == -1) {
1435                 prt("clone range: 0x%x to 0x%x at 0x%x\n", offset,
1436                                 offset + length, dest);
1437                 prterr("do_clone_range: FICLONERANGE");
1438                 report_failure(161);
1439         }
1440
1441         memcpy(good_buf + dest, good_buf + offset, length);
1442         if (dest > file_size)
1443                 memset(good_buf + file_size, '\0', dest - file_size);
1444         if (dest + length > file_size)
1445                 file_size = dest + length;
1446 }
1447
1448 #else
1449 int
1450 test_clone_range(void)
1451 {
1452         return 0;
1453 }
1454
1455 void
1456 do_clone_range(unsigned offset, unsigned length, unsigned dest)
1457 {
1458         return;
1459 }
1460 #endif
1461
1462 #ifdef FIDEDUPERANGE
1463 int
1464 test_dedupe_range(void)
1465 {
1466         struct file_dedupe_range *fdr;
1467         off_t new_len;
1468         int error;
1469         int ret = 1;
1470
1471         /* Alloc memory */
1472         fdr = calloc(sizeof(struct file_dedupe_range_info) +
1473                      sizeof(struct file_dedupe_range), 1);
1474         if (!fdr) {
1475                 prterr("do_dedupe_range: malloc");
1476                 report_failure(161);
1477         }
1478
1479         /* Make sure we have at least two blocks */
1480         new_len = block_size * 2;
1481         if (file_size < new_len && ftruncate(fd, new_len)) {
1482                 warn("main: ftruncate");
1483                 exit(132);
1484         }
1485
1486         /* Try to dedupe them */
1487         fdr->src_length = block_size;
1488         fdr->dest_count = 1;
1489         fdr->info[0].dest_fd = fd;
1490         fdr->info[0].dest_offset = block_size;
1491
1492         if (ioctl(fd, FIDEDUPERANGE, fdr))
1493                 error = errno;
1494         else if (fdr->info[0].status < 0)
1495                 error = -fdr->info[0].status;
1496         else
1497                 error = 0;
1498
1499         /* Older kernels may return EINVAL... */
1500         if (error == EOPNOTSUPP || error == ENOTTY || error == EINVAL) {
1501                 if (!quiet)
1502                         fprintf(stderr,
1503                                 "main: filesystem does not support "
1504                                 "dedupe range, disabling!\n");
1505                 ret = 0;
1506         }
1507
1508         /* Put the file back the way it was. */
1509         if (file_size < new_len && ftruncate(fd, file_size)) {
1510                 warn("main: ftruncate");
1511                 exit(132);
1512         }
1513
1514         free(fdr);
1515         return ret;
1516 }
1517
1518 void
1519 do_dedupe_range(unsigned offset, unsigned length, unsigned dest)
1520 {
1521         struct file_dedupe_range *fdr;
1522
1523         if (length == 0) {
1524                 if (!quiet && testcalls > simulatedopcount)
1525                         prt("skipping zero length dedupe range\n");
1526                 log5(OP_DEDUPE_RANGE, offset, length, dest, FL_SKIPPED);
1527                 return;
1528         }
1529
1530         if ((loff_t)offset >= file_size) {
1531                 if (!quiet && testcalls > simulatedopcount)
1532                         prt("skipping dedupe range behind EOF\n");
1533                 log5(OP_DEDUPE_RANGE, offset, length, dest, FL_SKIPPED);
1534                 return;
1535         }
1536
1537         log5(OP_DEDUPE_RANGE, offset, length, dest, FL_NONE);
1538
1539         if (testcalls <= simulatedopcount)
1540                 return;
1541
1542         if ((progressinterval && testcalls % progressinterval == 0) ||
1543             (debug && (monitorstart == -1 || monitorend == -1 ||
1544                        dest <= monitorstart || dest + length <= monitorend))) {
1545                 prt("%lu dedupe\tfrom 0x%x to 0x%x, (0x%x bytes) at 0x%x\n",
1546                         testcalls, offset, offset+length, length, dest);
1547         }
1548
1549         /* Alloc memory */
1550         fdr = calloc(sizeof(struct file_dedupe_range_info) +
1551                      sizeof(struct file_dedupe_range), 1);
1552         if (!fdr) {
1553                 prterr("do_dedupe_range: malloc");
1554                 report_failure(161);
1555         }
1556
1557         /* Dedupe data blocks */
1558         fdr->src_offset = offset;
1559         fdr->src_length = length;
1560         fdr->dest_count = 1;
1561         fdr->info[0].dest_fd = fd;
1562         fdr->info[0].dest_offset = dest;
1563
1564         if (ioctl(fd, FIDEDUPERANGE, fdr) == -1) {
1565                 prt("dedupe range: 0x%x to 0x%x at 0x%x\n", offset,
1566                                 offset + length, dest);
1567                 prterr("do_dedupe_range(0): FIDEDUPERANGE");
1568                 report_failure(161);
1569         } else if (fdr->info[0].status < 0) {
1570                 errno = -fdr->info[0].status;
1571                 prt("dedupe range: 0x%x to 0x%x at 0x%x\n", offset,
1572                                 offset + length, dest);
1573                 prterr("do_dedupe_range(1): FIDEDUPERANGE");
1574                 report_failure(161);
1575         }
1576
1577         free(fdr);
1578 }
1579
1580 #else
1581 int
1582 test_dedupe_range(void)
1583 {
1584         return 0;
1585 }
1586
1587 void
1588 do_dedupe_range(unsigned offset, unsigned length, unsigned dest)
1589 {
1590         return;
1591 }
1592 #endif
1593
1594 #ifdef HAVE_COPY_FILE_RANGE
1595 int
1596 test_copy_range(void)
1597 {
1598         loff_t o1 = 0, o2 = 1;
1599
1600         if (syscall(__NR_copy_file_range, fd, &o1, fd, &o2, 1, 0) == -1 &&
1601             (errno == ENOSYS || errno == EOPNOTSUPP || errno == ENOTTY)) {
1602                 if (!quiet)
1603                         fprintf(stderr,
1604                                 "main: filesystem does not support "
1605                                 "copy range, disabling!\n");
1606                 return 0;
1607         }
1608
1609         return 1;
1610 }
1611
1612 void
1613 do_copy_range(unsigned offset, unsigned length, unsigned dest)
1614 {
1615         loff_t o1, o2;
1616         size_t olen;
1617         ssize_t nr;
1618         int tries = 0;
1619
1620         if (length == 0) {
1621                 if (!quiet && testcalls > simulatedopcount)
1622                         prt("skipping zero length copy range\n");
1623                 log5(OP_COPY_RANGE, offset, length, dest, FL_SKIPPED);
1624                 return;
1625         }
1626
1627         if ((loff_t)offset >= file_size) {
1628                 if (!quiet && testcalls > simulatedopcount)
1629                         prt("skipping copy range behind EOF\n");
1630                 log5(OP_COPY_RANGE, offset, length, dest, FL_SKIPPED);
1631                 return;
1632         }
1633
1634         if (dest + length > biggest) {
1635                 biggest = dest + length;
1636                 if (!quiet && testcalls > simulatedopcount)
1637                         prt("copying to largest ever: 0x%x\n", dest + length);
1638         }
1639
1640         log5(OP_COPY_RANGE, offset, length, dest, FL_NONE);
1641
1642         if (testcalls <= simulatedopcount)
1643                 return;
1644
1645         if ((progressinterval && testcalls % progressinterval == 0) ||
1646             (debug && (monitorstart == -1 || monitorend == -1 ||
1647                        dest <= monitorstart || dest + length <= monitorend))) {
1648                 prt("%lu copy\tfrom 0x%x to 0x%x, (0x%x bytes) at 0x%x\n",
1649                         testcalls, offset, offset+length, length, dest);
1650         }
1651
1652         o1 = offset;
1653         o2 = dest;
1654         olen = length;
1655
1656         while (olen > 0) {
1657                 nr = syscall(__NR_copy_file_range, fd, &o1, fd, &o2, olen, 0);
1658                 if (nr < 0) {
1659                         if (errno != EAGAIN || tries++ >= 300)
1660                                 break;
1661                 } else if (nr > olen) {
1662                         prt("copy range: 0x%x to 0x%x at 0x%x\n", offset,
1663                                         offset + length, dest);
1664                         prt("do_copy_range: asked %u, copied %u??\n",
1665                                         olen, nr);
1666                         report_failure(161);
1667                 } else if (nr > 0)
1668                         olen -= nr;
1669         }
1670         if (nr < 0) {
1671                 prt("copy range: 0x%x to 0x%x at 0x%x\n", offset,
1672                                 offset + length, dest);
1673                 prterr("do_copy_range:");
1674                 report_failure(161);
1675         }
1676
1677         memcpy(good_buf + dest, good_buf + offset, length);
1678         if (dest > file_size)
1679                 memset(good_buf + file_size, '\0', dest - file_size);
1680         if (dest + length > file_size)
1681                 file_size = dest + length;
1682 }
1683
1684 #else
1685 int
1686 test_copy_range(void)
1687 {
1688         return 0;
1689 }
1690
1691 void
1692 do_copy_range(unsigned offset, unsigned length, unsigned dest)
1693 {
1694         return;
1695 }
1696 #endif
1697
1698 #ifdef HAVE_LINUX_FALLOC_H
1699 /* fallocate is basically a no-op unless extending, then a lot like a truncate */
1700 void
1701 do_preallocate(unsigned offset, unsigned length, int keep_size)
1702 {
1703         unsigned end_offset;
1704
1705         if (length == 0) {
1706                 if (!quiet && testcalls > simulatedopcount)
1707                         prt("skipping zero length fallocate\n");
1708                 log4(OP_FALLOCATE, offset, length, FL_SKIPPED |
1709                      (keep_size ? FL_KEEP_SIZE : FL_NONE));
1710                 return;
1711         }
1712
1713         end_offset = keep_size ? 0 : offset + length;
1714
1715         if (end_offset > biggest) {
1716                 biggest = end_offset;
1717                 if (!quiet && testcalls > simulatedopcount)
1718                         prt("fallocating to largest ever: 0x%x\n", end_offset);
1719         }
1720
1721         /*
1722          * last arg matches fallocate string array index in logdump:
1723          *      0: allocate past EOF
1724          *      1: extending prealloc
1725          *      2: interior prealloc
1726          */
1727         log4(OP_FALLOCATE, offset, length,
1728              keep_size ? FL_KEEP_SIZE : FL_NONE);
1729
1730         if (end_offset > file_size) {
1731                 memset(good_buf + file_size, '\0', end_offset - file_size);
1732                 file_size = end_offset;
1733         }
1734
1735         if (testcalls <= simulatedopcount)
1736                 return;
1737         
1738         if ((progressinterval && testcalls % progressinterval == 0) ||
1739             (debug && (monitorstart == -1 || monitorend == -1 ||
1740                       end_offset <= monitorend)))
1741                 prt("%lld falloc\tfrom 0x%x to 0x%x (0x%x bytes)\n", testcalls,
1742                                 offset, offset + length, length);
1743         if (fallocate(fd, keep_size ? FALLOC_FL_KEEP_SIZE : 0, (loff_t)offset, (loff_t)length) == -1) {
1744                 prt("fallocate: 0x%x to 0x%x\n", offset, offset + length);
1745                 prterr("do_preallocate: fallocate");
1746                 report_failure(161);
1747         }
1748 }
1749 #else
1750 void
1751 do_preallocate(unsigned offset, unsigned length, int keep_size)
1752 {
1753         return;
1754 }
1755 #endif
1756
1757 void
1758 writefileimage()
1759 {
1760         ssize_t iret;
1761
1762         if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
1763                 prterr("writefileimage: lseek");
1764                 report_failure(171);
1765         }
1766         iret = write(fd, good_buf, file_size);
1767         if ((off_t)iret != file_size) {
1768                 if (iret == -1)
1769                         prterr("writefileimage: write");
1770                 else
1771                         prt("short write: 0x%x bytes instead of 0x%llx\n",
1772                             iret, (unsigned long long)file_size);
1773                 report_failure(172);
1774         }
1775         if (lite ? 0 : ftruncate(fd, file_size) == -1) {
1776                 prt("ftruncate2: %llx\n", (unsigned long long)file_size);
1777                 prterr("writefileimage: ftruncate");
1778                 report_failure(173);
1779         }
1780 }
1781
1782
1783 void
1784 docloseopen(void)
1785
1786         if (testcalls <= simulatedopcount)
1787                 return;
1788
1789         if (debug)
1790                 prt("%lld close/open\n", testcalls);
1791         if (close(fd)) {
1792                 prterr("docloseopen: close");
1793                 report_failure(180);
1794         }
1795         if (system("echo 3 > /proc/sys/vm/drop_caches")) {
1796                 prterr("docloseopen: drop_caches");
1797                 report_failure(181);
1798         }
1799         fd = open(fname, O_RDWR|o_direct, 0);
1800         if (fd < 0) {
1801                 prterr("docloseopen: open");
1802                 report_failure(182);
1803         }
1804 }
1805
1806 void
1807 dofsync(void)
1808 {
1809         int ret;
1810
1811         if (testcalls <= simulatedopcount)
1812                 return;
1813         if (debug)
1814                 prt("%lld fsync\n", testcalls);
1815         log4(OP_FSYNC, 0, 0, 0);
1816         ret = fsync(fd);
1817         if (ret < 0) {
1818                 prterr("dofsync");
1819                 report_failure(210);
1820         }
1821         mark_log();
1822         dump_fsync_buffer();
1823         mark_nr++;
1824 }
1825
1826 #define TRIM_OFF(off, size)                     \
1827 do {                                            \
1828         if (size)                               \
1829                 (off) %= (size);                \
1830         else                                    \
1831                 (off) = 0;                      \
1832 } while (0)
1833
1834 #define TRIM_LEN(off, len, size)                \
1835 do {                                            \
1836         if ((off) + (len) > (size))             \
1837                 (len) = (size) - (off);         \
1838 } while (0)
1839
1840 #define TRIM_OFF_LEN(off, len, size)            \
1841 do {                                            \
1842         TRIM_OFF(off, size);                    \
1843         TRIM_LEN(off, len, size);               \
1844 } while (0)
1845
1846 void
1847 cleanup(int sig)
1848 {
1849         if (sig)
1850                 prt("signal %d\n", sig);
1851         prt("testcalls = %lld\n", testcalls);
1852         exit(sig);
1853 }
1854
1855 static int
1856 op_args_count(int operation)
1857 {
1858         switch (operation) {
1859         case OP_CLONE_RANGE:
1860         case OP_DEDUPE_RANGE:
1861         case OP_COPY_RANGE:
1862                 return 4;
1863         default:
1864                 return 3;
1865         }
1866 }
1867
1868 static int
1869 read_op(struct log_entry *log_entry)
1870 {
1871         char line[256];
1872
1873         memset(log_entry, 0, sizeof(*log_entry));
1874         log_entry->operation = -1;
1875
1876         while (log_entry->operation == -1) {
1877                 char *str;
1878                 int i;
1879
1880                 do {
1881                         if (!fgets(line, sizeof(line), replayopsf)) {
1882                                 if (feof(replayopsf)) {
1883                                         replayopsf = NULL;
1884                                         return 0;
1885                                 }
1886                                 goto fail;
1887                         }
1888                         str = strtok(line, " \t\n");
1889                 } while (!str || str[0] == '#');
1890
1891                 if (strcmp(str, "skip") == 0) {
1892                         log_entry->flags |= FL_SKIPPED;
1893                         str = strtok(NULL, " \t\n");
1894                         if (!str)
1895                                 goto fail;
1896                 }
1897                 log_entry->operation = op_code(str);
1898                 if (log_entry->operation == -1)
1899                         goto fail;
1900                 log_entry->nr_args = op_args_count(log_entry->operation);
1901                 for (i = 0; i < log_entry->nr_args; i++) {
1902                         char *end;
1903
1904                         str = strtok(NULL, " \t\n");
1905                         if (!str)
1906                                 goto fail;
1907                         log_entry->args[i] = strtoul(str, &end, 0);
1908                         if (*end)
1909                                 goto fail;
1910                 }
1911                 while ((str = strtok(NULL, " \t\n"))) {
1912                         if (strcmp(str, "keep_size") == 0)
1913                                 log_entry->flags |= FL_KEEP_SIZE;
1914                         else if (strcmp(str, "close_open") == 0)
1915                                 log_entry->flags |= FL_CLOSE_OPEN;
1916                         else if (strcmp(str, "*") == 0)
1917                                 ;  /* overlap marker; ignore */
1918                         else
1919                                 goto fail;
1920                 }
1921         }
1922         return 1;
1923
1924 fail:
1925         fprintf(stderr, "%s: parse error\n", replayops);
1926         fclose(replayopsf);
1927         replayopsf = NULL;
1928         cleanup(100);  /* doesn't return */
1929         return 0;
1930 }
1931
1932 static inline bool
1933 range_overlaps(
1934         unsigned long   off0,
1935         unsigned long   off1,
1936         unsigned long   size)
1937 {
1938         return llabs((unsigned long long)off1 - off0) < size;
1939 }
1940
1941 static void generate_dest_range(bool bdy_align,
1942                                 unsigned long max_range_end,
1943                                 unsigned long *src_offset,
1944                                 unsigned long *size,
1945                                 unsigned long *dst_offset)
1946 {
1947         int tries = 0;
1948
1949         TRIM_OFF_LEN(*src_offset, *size, file_size);
1950         if (bdy_align) {
1951                 *src_offset = rounddown_64(*src_offset, readbdy);
1952                 if (o_direct)
1953                         *size = rounddown_64(*size, readbdy);
1954         } else {
1955                 *src_offset = rounddown_64(*src_offset, block_size);
1956                 *size = rounddown_64(*size, block_size);
1957         }
1958
1959         do {
1960                 if (tries++ >= 30) {
1961                         *size = 0;
1962                         break;
1963                 }
1964                 *dst_offset = random();
1965                 TRIM_OFF(*dst_offset, max_range_end);
1966                 if (bdy_align)
1967                         *dst_offset = rounddown_64(*dst_offset, writebdy);
1968                 else
1969                         *dst_offset = rounddown_64(*dst_offset, block_size);
1970         } while (range_overlaps(*src_offset, *dst_offset, *size) ||
1971                  *dst_offset + *size > max_range_end);
1972 }
1973
1974 int
1975 test(void)
1976 {
1977         unsigned long   offset, offset2;
1978         unsigned long   size;
1979         unsigned long   rv;
1980         unsigned long   op;
1981         int             keep_size = 0;
1982
1983         if (simulatedopcount > 0 && testcalls == simulatedopcount)
1984                 writefileimage();
1985
1986         testcalls++;
1987
1988         if (debugstart > 0 && testcalls >= debugstart)
1989                 debug = 1;
1990
1991         if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
1992                 prt("%lld...\n", testcalls);
1993
1994         if (replayopsf) {
1995                 struct log_entry log_entry;
1996
1997                 while (read_op(&log_entry)) {
1998                         if (log_entry.flags & FL_SKIPPED) {
1999                                 log4(log_entry.operation,
2000                                      log_entry.args[0], log_entry.args[1],
2001                                      log_entry.flags);
2002                                 continue;
2003                         }
2004
2005                         op = log_entry.operation;
2006                         offset = log_entry.args[0];
2007                         size = log_entry.args[1];
2008                         offset2 = log_entry.args[2];
2009                         closeopen = !!(log_entry.flags & FL_CLOSE_OPEN);
2010                         keep_size = !!(log_entry.flags & FL_KEEP_SIZE);
2011                         goto have_op;
2012                 }
2013                 return 0;
2014         }
2015
2016         rv = random();
2017         if (closeprob)
2018                 closeopen = (rv >> 3) < (1 << 28) / closeprob;
2019
2020         offset = random();
2021         offset2 = 0;
2022         size = maxoplen;
2023         if (randomoplen)
2024                 size = random() % (maxoplen + 1);
2025
2026         /* calculate appropriate op to run */
2027         if (lite)
2028                 op = rv % OP_MAX_LITE;
2029         else if (!integrity)
2030                 op = rv % OP_MAX_FULL;
2031         else
2032                 op = rv % OP_MAX_INTEGRITY;
2033
2034         switch(op) {
2035         case OP_TRUNCATE:
2036                 if (!style)
2037                         size = random() % maxfilelen;
2038                 break;
2039         case OP_FALLOCATE:
2040                 if (fallocate_calls && size && keep_size_calls)
2041                         keep_size = random() % 2;
2042                 break;
2043         case OP_ZERO_RANGE:
2044                 if (zero_range_calls && size && keep_size_calls)
2045                         keep_size = random() % 2;
2046                 break;
2047         case OP_CLONE_RANGE:
2048                 generate_dest_range(false, maxfilelen, &offset, &size, &offset2);
2049                 break;
2050         case OP_DEDUPE_RANGE:
2051                 generate_dest_range(false, file_size, &offset, &size, &offset2);
2052                 break;
2053         case OP_COPY_RANGE:
2054                 generate_dest_range(true, maxfilelen, &offset, &size, &offset2);
2055                 break;
2056         }
2057
2058 have_op:
2059
2060         switch (op) {
2061         case OP_MAPREAD:
2062                 if (!mapped_reads)
2063                         op = OP_READ;
2064                 break;
2065         case OP_MAPWRITE:
2066                 if (!mapped_writes)
2067                         op = OP_WRITE;
2068                 break;
2069         case OP_FALLOCATE:
2070                 if (!fallocate_calls) {
2071                         log4(OP_FALLOCATE, offset, size, FL_SKIPPED);
2072                         goto out;
2073                 }
2074                 break;
2075         case OP_PUNCH_HOLE:
2076                 if (!punch_hole_calls) {
2077                         log4(OP_PUNCH_HOLE, offset, size, FL_SKIPPED);
2078                         goto out;
2079                 }
2080                 break;
2081         case OP_ZERO_RANGE:
2082                 if (!zero_range_calls) {
2083                         log4(OP_ZERO_RANGE, offset, size, FL_SKIPPED);
2084                         goto out;
2085                 }
2086                 break;
2087         case OP_COLLAPSE_RANGE:
2088                 if (!collapse_range_calls) {
2089                         log4(OP_COLLAPSE_RANGE, offset, size, FL_SKIPPED);
2090                         goto out;
2091                 }
2092                 break;
2093         case OP_INSERT_RANGE:
2094                 if (!insert_range_calls) {
2095                         log4(OP_INSERT_RANGE, offset, size, FL_SKIPPED);
2096                         goto out;
2097                 }
2098                 break;
2099         case OP_CLONE_RANGE:
2100                 if (!clone_range_calls) {
2101                         log5(op, offset, size, offset2, FL_SKIPPED);
2102                         goto out;
2103                 }
2104                 break;
2105         case OP_DEDUPE_RANGE:
2106                 if (!dedupe_range_calls) {
2107                         log5(op, offset, size, offset2, FL_SKIPPED);
2108                         goto out;
2109                 }
2110                 break;
2111         case OP_COPY_RANGE:
2112                 if (!copy_range_calls) {
2113                         log5(op, offset, size, offset2, FL_SKIPPED);
2114                         goto out;
2115                 }
2116                 break;
2117         }
2118
2119         switch (op) {
2120         case OP_READ:
2121                 TRIM_OFF_LEN(offset, size, file_size);
2122                 doread(offset, size);
2123                 break;
2124
2125         case OP_WRITE:
2126                 TRIM_OFF_LEN(offset, size, maxfilelen);
2127                 dowrite(offset, size);
2128                 break;
2129
2130         case OP_MAPREAD:
2131                 TRIM_OFF_LEN(offset, size, file_size);
2132                 domapread(offset, size);
2133                 break;
2134
2135         case OP_MAPWRITE:
2136                 TRIM_OFF_LEN(offset, size, maxfilelen);
2137                 domapwrite(offset, size);
2138                 break;
2139
2140         case OP_TRUNCATE:
2141                 dotruncate(size);
2142                 break;
2143
2144         case OP_FALLOCATE:
2145                 TRIM_OFF_LEN(offset, size, maxfilelen);
2146                 do_preallocate(offset, size, keep_size);
2147                 break;
2148
2149         case OP_PUNCH_HOLE:
2150                 TRIM_OFF_LEN(offset, size, file_size);
2151                 do_punch_hole(offset, size);
2152                 break;
2153         case OP_ZERO_RANGE:
2154                 TRIM_OFF_LEN(offset, size, maxfilelen);
2155                 do_zero_range(offset, size, keep_size);
2156                 break;
2157         case OP_COLLAPSE_RANGE:
2158                 TRIM_OFF_LEN(offset, size, file_size - 1);
2159                 offset = rounddown_64(offset, block_size);
2160                 size = rounddown_64(size, block_size);
2161                 if (size == 0) {
2162                         log4(OP_COLLAPSE_RANGE, offset, size, FL_SKIPPED);
2163                         goto out;
2164                 }
2165                 do_collapse_range(offset, size);
2166                 break;
2167         case OP_INSERT_RANGE:
2168                 TRIM_OFF(offset, file_size);
2169                 TRIM_LEN(file_size, size, maxfilelen);
2170                 offset = rounddown_64(offset, block_size);
2171                 size = rounddown_64(size, block_size);
2172                 if (size == 0) {
2173                         log4(OP_INSERT_RANGE, offset, size, FL_SKIPPED);
2174                         goto out;
2175                 }
2176                 if (file_size + size > maxfilelen) {
2177                         log4(OP_INSERT_RANGE, offset, size, FL_SKIPPED);
2178                         goto out;
2179                 }
2180
2181                 do_insert_range(offset, size);
2182                 break;
2183         case OP_CLONE_RANGE:
2184                 if (size == 0) {
2185                         log5(OP_CLONE_RANGE, offset, size, offset2, FL_SKIPPED);
2186                         goto out;
2187                 }
2188                 if (offset2 + size > maxfilelen) {
2189                         log5(OP_CLONE_RANGE, offset, size, offset2, FL_SKIPPED);
2190                         goto out;
2191                 }
2192
2193                 do_clone_range(offset, size, offset2);
2194                 break;
2195         case OP_DEDUPE_RANGE:
2196                 if (size == 0) {
2197                         log5(OP_DEDUPE_RANGE, offset, size, offset2, FL_SKIPPED);
2198                         goto out;
2199                 }
2200                 if (offset2 + size > maxfilelen) {
2201                         log5(OP_DEDUPE_RANGE, offset, size, offset2, FL_SKIPPED);
2202                         goto out;
2203                 }
2204
2205                 do_dedupe_range(offset, size, offset2);
2206                 break;
2207         case OP_COPY_RANGE:
2208                 if (size == 0) {
2209                         log5(OP_COPY_RANGE, offset, size, offset2, FL_SKIPPED);
2210                         goto out;
2211                 }
2212                 if (offset2 + size > maxfilelen) {
2213                         log5(OP_COPY_RANGE, offset, size, offset2, FL_SKIPPED);
2214                         goto out;
2215                 }
2216
2217                 do_copy_range(offset, size, offset2);
2218                 break;
2219         case OP_FSYNC:
2220                 dofsync();
2221                 break;
2222         default:
2223                 prterr("test: unknown operation");
2224                 report_failure(42);
2225                 break;
2226         }
2227
2228         if (check_file && testcalls > simulatedopcount)
2229                 check_contents();
2230
2231 out:
2232         if (closeopen)
2233                 docloseopen();
2234         if (sizechecks && testcalls > simulatedopcount)
2235                 check_size();
2236         return 1;
2237 }
2238
2239
2240 void
2241 usage(void)
2242 {
2243         fprintf(stdout, "usage: %s",
2244                 "fsx [-dknqxBEFJLOWZ][-A|-U] [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
2245         -b opnum: beginning operation number (default 1)\n\
2246         -c P: 1 in P chance of file close+open at each op (default infinity)\n\
2247         -d: debug output for all operations\n\
2248         -f flush and invalidate cache after I/O\n\
2249         -g X: write character X instead of random generated data\n\
2250         -i logdev: do integrity testing, logdev is the dm log writes device\n\
2251         -j logid: prefix debug log messsages with this id\n\
2252         -k: do not truncate existing file and use its size as upper bound on file size\n\
2253         -l flen: the upper bound on file size (default 262144)\n\
2254         -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
2255         -n: no verifications of file size\n\
2256         -o oplen: the upper bound on operation size (default 65536)\n\
2257         -p progressinterval: debug output at specified operation interval\n\
2258         -q: quieter operation\n\
2259         -r readbdy: 4096 would make reads page aligned (default 1)\n\
2260         -s style: 1 gives smaller truncates (default 0)\n\
2261         -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
2262         -w writebdy: 4096 would make writes page aligned (default 1)\n\
2263         -x: preallocate file space before starting, XFS only (default 0)\n\
2264         -y synchronize changes to a file\n"
2265
2266 #ifdef AIO
2267 "       -A: Use the AIO system calls, -A excludes -U\n"
2268 #endif
2269 #ifdef URING
2270 "       -U: Use the IO_URING system calls, -U excludes -A\n"
2271  #endif
2272 "       -D startingop: debug output starting at specified operation\n"
2273 #ifdef HAVE_LINUX_FALLOC_H
2274 "       -F: Do not use fallocate (preallocation) calls\n"
2275 #endif
2276 #ifdef FALLOC_FL_PUNCH_HOLE
2277 "       -H: Do not use punch hole calls\n"
2278 #endif
2279 #ifdef FALLOC_FL_ZERO_RANGE
2280 "       -z: Do not use zero range calls\n"
2281 #endif
2282 #ifdef FALLOC_FL_COLLAPSE_RANGE
2283 "       -C: Do not use collapse range calls\n"
2284 #endif
2285 #ifdef FALLOC_FL_INSERT_RANGE
2286 "       -I: Do not use insert range calls\n"
2287 #endif
2288 #ifdef FICLONERANGE
2289 "       -J: Do not use clone range calls\n"
2290 #endif
2291 #ifdef FIDEDUPERANGE
2292 "       -B: Do not use dedupe range calls\n"
2293 #endif
2294 #ifdef HAVE_COPY_FILE_RANGE
2295 "       -E: Do not use copy range calls\n"
2296 #endif
2297 "       -L: fsxLite - no file creations & no file size changes\n\
2298         -N numops: total # operations to do (default infinity)\n\
2299         -O: use oplen (see -o flag) for every op (default random)\n\
2300         -P: save .fsxlog .fsxops and .fsxgood files in dirpath (default ./)\n\
2301         -S seed: for random # generator (default 1) 0 gets timestamp\n\
2302         -W: mapped write operations DISabled\n\
2303         -X: Read file and compare to good buffer after every operation.\n\
2304         -R: read() system calls only (mapped reads disabled)\n\
2305         -Z: O_DIRECT (use -R, -W, -r and -w too)\n\
2306         --replay-ops opsfile: replay ops from recorded .fsxops file\n\
2307         --record-ops[=opsfile]: dump ops file also on success. optionally specify ops file name\n\
2308         fname: this filename is REQUIRED (no default)\n");
2309         exit(90);
2310 }
2311
2312
2313 long long
2314 getnum(char *s, char **e)
2315 {
2316         long long ret;
2317
2318         *e = (char *) 0;
2319         ret = strtoll(s, e, 0);
2320         if (*e)
2321                 switch (**e) {
2322                 case 'b':
2323                 case 'B':
2324                         ret *= 512;
2325                         *e = *e + 1;
2326                         break;
2327                 case 'k':
2328                 case 'K':
2329                         ret *= 1024;
2330                         *e = *e + 1;
2331                         break;
2332                 case 'm':
2333                 case 'M':
2334                         ret *= 1024*1024;
2335                         *e = *e + 1;
2336                         break;
2337                 case 'w':
2338                 case 'W':
2339                         ret *= 4;
2340                         *e = *e + 1;
2341                         break;
2342                 }
2343         return (ret);
2344 }
2345
2346 #ifdef AIO
2347
2348 #define QSZ     1024
2349 io_context_t    io_ctx;
2350 struct iocb     iocb;
2351
2352 int
2353 aio_setup()
2354 {
2355         int ret;
2356         ret = io_queue_init(QSZ, &io_ctx);
2357         if (ret != 0) {
2358                 fprintf(stderr, "aio_setup: io_queue_init failed: %s\n",
2359                         strerror(ret));
2360                 return(-1);
2361         }
2362         return(0);
2363 }
2364
2365 int
2366 aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
2367 {
2368         struct io_event event;
2369         static struct timespec ts;
2370         struct iocb *iocbs[] = { &iocb };
2371         int ret;
2372         long res;
2373
2374         if (rw == READ) {
2375                 io_prep_pread(&iocb, fd, buf, len, offset);
2376         } else {
2377                 io_prep_pwrite(&iocb, fd, buf, len, offset);
2378         }
2379
2380         ts.tv_sec = 30;
2381         ts.tv_nsec = 0;
2382         ret = io_submit(io_ctx, 1, iocbs);
2383         if (ret != 1) {
2384                 fprintf(stderr, "errcode=%d\n", ret);
2385                 fprintf(stderr, "aio_rw: io_submit failed: %s\n",
2386                                 strerror(ret));
2387                 goto out_error;
2388         }
2389
2390         ret = io_getevents(io_ctx, 1, 1, &event, &ts);
2391         if (ret != 1) {
2392                 if (ret == 0)
2393                         fprintf(stderr, "aio_rw: no events available\n");
2394                 else {
2395                         fprintf(stderr, "errcode=%d\n", -ret);
2396                         fprintf(stderr, "aio_rw: io_getevents failed: %s\n",
2397                                         strerror(-ret));
2398                 }
2399                 goto out_error;
2400         }
2401         if (len != event.res) {
2402                 /*
2403                  * The b0rked libaio defines event.res as unsigned.
2404                  * However the kernel strucuture has it signed,
2405                  * and it's used to pass negated error value.
2406                  * Till the library is fixed use the temp var.
2407                  */
2408                 res = (long)event.res;
2409                 if (res >= 0)
2410                         fprintf(stderr, "bad io length: %lu instead of %u\n",
2411                                         res, len);
2412                 else {
2413                         fprintf(stderr, "errcode=%ld\n", -res);
2414                         fprintf(stderr, "aio_rw: async io failed: %s\n",
2415                                         strerror(-res));
2416                         ret = res;
2417                         goto out_error;
2418                 }
2419
2420         }
2421         return event.res;
2422
2423 out_error:
2424         /*
2425          * The caller expects error return in traditional libc
2426          * convention, i.e. -1 and the errno set to error.
2427          */
2428         errno = -ret;
2429         return -1;
2430 }
2431 #else
2432 aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
2433 {
2434         fprintf(stderr, "io_rw: need AIO support!\n");
2435         exit(111);
2436 }
2437 #endif
2438
2439 #ifdef URING
2440
2441 struct io_uring ring;
2442 #define URING_ENTRIES   1024
2443
2444 int
2445 uring_setup()
2446 {
2447         int ret;
2448
2449         ret = io_uring_queue_init(URING_ENTRIES, &ring, 0);
2450         if (ret != 0) {
2451                 fprintf(stderr, "uring_setup: io_uring_queue_init failed: %s\n",
2452                                 strerror(ret));
2453                 return -1;
2454         }
2455         return 0;
2456 }
2457
2458 int
2459 uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
2460 {
2461         struct io_uring_sqe     *sqe;
2462         struct io_uring_cqe     *cqe;
2463         struct iovec            iovec;
2464         int ret;
2465         int res = 0;
2466         char *p = buf;
2467         unsigned l = len;
2468         unsigned o = offset;
2469
2470         /*
2471          * Due to io_uring tries non-blocking IOs (especially read), that
2472          * always cause 'normal' short reading. To avoid this short read
2473          * fail, try to loop read/write (escpecilly read) data.
2474          */
2475         while (l > 0) {
2476                 sqe = io_uring_get_sqe(&ring);
2477                 if (!sqe) {
2478                         fprintf(stderr, "uring_rw: io_uring_get_sqe failed: %s\n",
2479                                         strerror(errno));
2480                         return -1;
2481                 }
2482
2483                 iovec.iov_base = p;
2484                 iovec.iov_len = l;
2485                 if (rw == READ) {
2486                         io_uring_prep_readv(sqe, fd, &iovec, 1, o);
2487                 } else {
2488                         io_uring_prep_writev(sqe, fd, &iovec, 1, o);
2489                 }
2490
2491                 ret = io_uring_submit_and_wait(&ring, 1);
2492                 if (ret != 1) {
2493                         fprintf(stderr, "errcode=%d\n", -ret);
2494                         fprintf(stderr, "uring %s: io_uring_submit failed: %s\n",
2495                                         rw == READ ? "read":"write", strerror(-ret));
2496                         goto uring_error;
2497                 }
2498
2499                 ret = io_uring_wait_cqe(&ring, &cqe);
2500                 if (ret != 0) {
2501                         fprintf(stderr, "errcode=%d\n", -ret);
2502                         fprintf(stderr, "uring %s: io_uring_wait_cqe failed: %s\n",
2503                                         rw == READ ? "read":"write", strerror(-ret));
2504                         goto uring_error;
2505                 }
2506
2507                 ret = cqe->res;
2508                 io_uring_cqe_seen(&ring, cqe);
2509
2510                 if (ret > 0) {
2511                         o += ret;
2512                         l -= ret;
2513                         p += ret;
2514                         res += ret;
2515                 } else if (ret < 0) {
2516                         fprintf(stderr, "errcode=%d\n", -ret);
2517                         fprintf(stderr, "uring %s: io_uring failed: %s\n",
2518                                         rw == READ ? "read":"write", strerror(-ret));
2519                         goto uring_error;
2520                 } else {
2521                         fprintf(stderr, "uring %s bad io length: %d instead of %u\n",
2522                                         rw == READ ? "read":"write", res, len);
2523                         break;
2524                 }
2525         }
2526         return res;
2527
2528  uring_error:
2529         /*
2530          * The caller expects error return in traditional libc
2531          * convention, i.e. -1 and the errno set to error.
2532          */
2533         errno = -ret;
2534         return -1;
2535 }
2536 #else
2537 int
2538 uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
2539 {
2540         fprintf(stderr, "io_rw: need IO_URING support!\n");
2541         exit(111);
2542 }
2543 #endif
2544
2545 int
2546 fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
2547 {
2548         int ret;
2549
2550         if (aio) {
2551                 ret = aio_rw(rw, fd, buf, len, offset);
2552         } else if (uring) {
2553                 ret = uring_rw(rw, fd, buf, len, offset);
2554         } else {
2555                 if (rw == READ)
2556                         ret = read(fd, buf, len);
2557                 else
2558                         ret = write(fd, buf, len);
2559         }
2560         return ret;
2561 }
2562
2563 #define test_fallocate(mode) __test_fallocate(mode, #mode)
2564
2565 int
2566 __test_fallocate(int mode, const char *mode_str)
2567 {
2568 #ifdef HAVE_LINUX_FALLOC_H
2569         int ret = 0;
2570         if (!lite) {
2571                 if (fallocate(fd, mode, file_size, 1) &&
2572                     (errno == ENOSYS || errno == EOPNOTSUPP)) {
2573                         if(!quiet)
2574                                 fprintf(stderr,
2575                                         "main: filesystem does not support "
2576                                         "fallocate mode %s, disabling!\n",
2577                                         mode_str);
2578                 } else {
2579                         ret = 1;
2580                         if (ftruncate(fd, file_size)) {
2581                                 warn("main: ftruncate");
2582                                 exit(132);
2583                         }
2584                 }
2585         }
2586         return ret;
2587 #endif
2588 }
2589
2590 static struct option longopts[] = {
2591         {"replay-ops", required_argument, 0, 256},
2592         {"record-ops", optional_argument, 0, 255},
2593         { }
2594 };
2595
2596 int
2597 main(int argc, char **argv)
2598 {
2599         int     i, style, ch;
2600         char    *endp, *tmp;
2601         char logfile[PATH_MAX];
2602         struct stat statbuf;
2603         int o_flags = O_RDWR|O_CREAT|O_TRUNC;
2604
2605         logfile[0] = 0;
2606         dname[0] = 0;
2607
2608         page_size = getpagesize();
2609         page_mask = page_size - 1;
2610         mmap_mask = page_mask;
2611         
2612
2613         setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
2614
2615         while ((ch = getopt_long(argc, argv,
2616                                  "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
2617                                  longopts, NULL)) != EOF)
2618                 switch (ch) {
2619                 case 'b':
2620                         simulatedopcount = getnum(optarg, &endp);
2621                         if (!quiet)
2622                                 prt("Will begin at operation %lld\n",
2623                                                 simulatedopcount);
2624                         if (simulatedopcount == 0)
2625                                 usage();
2626                         simulatedopcount -= 1;
2627                         break;
2628                 case 'c':
2629                         closeprob = getnum(optarg, &endp);
2630                         if (!quiet)
2631                                 prt("Chance of close/open is 1 in %d\n", closeprob);
2632                         if (closeprob <= 0)
2633                                 usage();
2634                         break;
2635                 case 'd':
2636                         debug = 1;
2637                         break;
2638                 case 'f':
2639                         flush = 1;
2640                         break;
2641                 case 'g':
2642                         filldata = *optarg;
2643                         break;
2644                 case 'i':
2645                         integrity = 1;
2646                         logdev = strdup(optarg);
2647                         if (!logdev) {
2648                                 prterr("strdup");
2649                                 exit(101);
2650                         }
2651                         break;
2652                 case 'j':
2653                         logid = strdup(optarg);
2654                         if (!logid) {
2655                                 prterr("strdup");
2656                                 exit(101);
2657                         }
2658                         break;
2659                 case 'k':
2660                         o_flags &= ~O_TRUNC;
2661                         break;
2662                 case 'l':
2663                         maxfilelen = getnum(optarg, &endp);
2664                         if (maxfilelen <= 0)
2665                                 usage();
2666                         break;
2667                 case 'm':
2668                         monitorstart = getnum(optarg, &endp);
2669                         if (monitorstart < 0)
2670                                 usage();
2671                         if (!endp || *endp++ != ':')
2672                                 usage();
2673                         monitorend = getnum(endp, &endp);
2674                         if (monitorend < 0)
2675                                 usage();
2676                         if (monitorend == 0)
2677                                 monitorend = -1; /* aka infinity */
2678                         debug = 1;
2679                 case 'n':
2680                         sizechecks = 0;
2681                         break;
2682                 case 'o':
2683                         maxoplen = getnum(optarg, &endp);
2684                         if (maxoplen <= 0)
2685                                 usage();
2686                         break;
2687                 case 'p':
2688                         progressinterval = getnum(optarg, &endp);
2689                         if (progressinterval == 0)
2690                                 usage();
2691                         break;
2692                 case 'q':
2693                         quiet = 1;
2694                         break;
2695                 case 'r':
2696                         readbdy = getnum(optarg, &endp);
2697                         if (readbdy <= 0)
2698                                 usage();
2699                         break;
2700                 case 's':
2701                         style = getnum(optarg, &endp);
2702                         if (style < 0 || style > 1)
2703                                 usage();
2704                         break;
2705                 case 't':
2706                         truncbdy = getnum(optarg, &endp);
2707                         if (truncbdy <= 0)
2708                                 usage();
2709                         break;
2710                 case 'w':
2711                         writebdy = getnum(optarg, &endp);
2712                         if (writebdy <= 0)
2713                                 usage();
2714                         break;
2715                 case 'x':
2716                         prealloc = 1;
2717                         break;
2718                 case 'y':
2719                         do_fsync = 1;
2720                         break;
2721                 case 'A':
2722                         aio = 1;
2723                         break;
2724                 case 'U':
2725                         uring = 1;
2726                         break;
2727                 case 'D':
2728                         debugstart = getnum(optarg, &endp);
2729                         if (debugstart < 1)
2730                                 usage();
2731                         break;
2732                 case 'F':
2733                         fallocate_calls = 0;
2734                         break;
2735                 case 'K':
2736                         keep_size_calls = 0;
2737                         break;
2738                 case 'H':
2739                         punch_hole_calls = 0;
2740                         break;
2741                 case 'z':
2742                         zero_range_calls = 0;
2743                         break;
2744                 case 'C':
2745                         collapse_range_calls = 0;
2746                         break;
2747                 case 'I':
2748                         insert_range_calls = 0;
2749                         break;
2750                 case 'J':
2751                         clone_range_calls = 0;
2752                         break;
2753                 case 'B':
2754                         dedupe_range_calls = 0;
2755                         break;
2756                 case 'E':
2757                         copy_range_calls = 0;
2758                         break;
2759                 case 'L':
2760                         lite = 1;
2761                         o_flags &= ~(O_CREAT|O_TRUNC);
2762                         break;
2763                 case 'N':
2764                         numops = getnum(optarg, &endp);
2765                         if (numops < 0)
2766                                 usage();
2767                         break;
2768                 case 'O':
2769                         randomoplen = 0;
2770                         break;
2771                 case 'P':
2772                         snprintf(dname, sizeof(dname), "%s/", optarg);
2773                         dirpath = strlen(dname);
2774                         break;
2775                 case 'R':
2776                         mapped_reads = 0;
2777                         break;
2778                 case 'S':
2779                         seed = getnum(optarg, &endp);
2780                         if (seed == 0) {
2781                                 seed = time(0) % 10000;
2782                                 seed += (int)getpid();
2783                         }
2784                         if (seed < 0)
2785                                 usage();
2786                         break;
2787                 case 'W':
2788                         mapped_writes = 0;
2789                         if (!quiet)
2790                                 prt("mapped writes DISABLED\n");
2791                         break;
2792                 case 'X':
2793                         check_file = 1;
2794                         break;
2795                 case 'Z':
2796                         o_direct = O_DIRECT;
2797                         o_flags |= O_DIRECT;
2798                         break;
2799                 case 255:  /* --record-ops */
2800                         if (optarg)
2801                                 snprintf(opsfile, sizeof(opsfile), "%s", optarg);
2802                         recordops = opsfile;
2803                         break;
2804                 case 256:  /* --replay-ops */
2805                         replayops = optarg;
2806                         break;
2807                 default:
2808                         usage();
2809                         /* NOTREACHED */
2810                 }
2811         argc -= optind;
2812         argv += optind;
2813         if (argc != 1)
2814                 usage();
2815
2816         if (aio && uring) {
2817                 fprintf(stderr, "-A and -U shouldn't be used together\n");
2818                 usage();
2819         }
2820
2821         if (integrity && !dirpath) {
2822                 fprintf(stderr, "option -i <logdev> requires -P <dirpath>\n");
2823                 usage();
2824         }
2825
2826         fname = argv[0];
2827         tmp = strdup(fname);
2828         if (!tmp) {
2829                 prterr("strdup");
2830                 exit(101);
2831         }
2832         bname = basename(tmp);
2833
2834         signal(SIGHUP,  cleanup);
2835         signal(SIGINT,  cleanup);
2836         signal(SIGPIPE, cleanup);
2837         signal(SIGALRM, cleanup);
2838         signal(SIGTERM, cleanup);
2839         signal(SIGXCPU, cleanup);
2840         signal(SIGXFSZ, cleanup);
2841         signal(SIGVTALRM,       cleanup);
2842         signal(SIGUSR1, cleanup);
2843         signal(SIGUSR2, cleanup);
2844
2845         if (!quiet && seed)
2846                 prt("Seed set to %d\n", seed);
2847         srandom(seed);
2848         fd = open(fname, o_flags, 0666);
2849         if (fd < 0) {
2850                 prterr(fname);
2851                 exit(91);
2852         }
2853         if (fstat(fd, &statbuf)) {
2854                 prterr("check_size: fstat");
2855                 exit(91);
2856         }
2857         block_size = statbuf.st_blksize;
2858 #ifdef XFS
2859         if (prealloc) {
2860                 xfs_flock64_t   resv = { 0 };
2861 #ifdef HAVE_XFS_PLATFORM_DEFS_H
2862                 if (!platform_test_xfs_fd(fd)) {
2863                         prterr(fname);
2864                         fprintf(stderr, "main: cannot prealloc, non XFS\n");
2865                         exit(96);
2866                 }
2867 #endif
2868                 resv.l_len = maxfilelen;
2869                 if ((xfsctl(fname, fd, XFS_IOC_RESVSP, &resv)) < 0) {
2870                         prterr(fname);
2871                         exit(97);
2872                 }
2873         }
2874 #endif
2875
2876         if (dirpath) {
2877                 snprintf(goodfile, sizeof(goodfile), "%s%s.fsxgood", dname, bname);
2878                 snprintf(logfile, sizeof(logfile), "%s%s.fsxlog", dname, bname);
2879                 if (!*opsfile)
2880                         snprintf(opsfile, sizeof(opsfile), "%s%s.fsxops", dname, bname);
2881         } else {
2882                 snprintf(goodfile, sizeof(goodfile), "%s.fsxgood", fname);
2883                 snprintf(logfile, sizeof(logfile), "%s.fsxlog", fname);
2884                 if (!*opsfile)
2885                         snprintf(opsfile, sizeof(opsfile), "%s.fsxops", fname);
2886         }
2887         fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
2888         if (fsxgoodfd < 0) {
2889                 prterr(goodfile);
2890                 exit(92);
2891         }
2892         fsxlogf = fopen(logfile, "w");
2893         if (fsxlogf == NULL) {
2894                 prterr(logfile);
2895                 exit(93);
2896         }
2897         unlink(opsfile);
2898
2899         if (replayops) {
2900                 replayopsf = fopen(replayops, "r");
2901                 if (!replayopsf) {
2902                         prterr(replayops);
2903                         exit(93);
2904                 }
2905         }
2906
2907 #ifdef AIO
2908         if (aio) 
2909                 aio_setup();
2910 #endif
2911 #ifdef URING
2912         if (uring)
2913                 uring_setup();
2914 #endif
2915
2916         if (!(o_flags & O_TRUNC)) {
2917                 off_t ret;
2918                 file_size = maxfilelen = biggest = lseek(fd, (off_t)0, SEEK_END);
2919                 if (file_size == (off_t)-1) {
2920                         prterr(fname);
2921                         warn("main: lseek eof");
2922                         exit(94);
2923                 }
2924                 ret = lseek(fd, (off_t)0, SEEK_SET);
2925                 if (ret == (off_t)-1) {
2926                         prterr(fname);
2927                         warn("main: lseek 0");
2928                         exit(95);
2929                 }
2930         }
2931         original_buf = (char *) malloc(maxfilelen);
2932         for (i = 0; i < maxfilelen; i++)
2933                 original_buf[i] = random() % 256;
2934         good_buf = (char *) malloc(maxfilelen + writebdy);
2935         good_buf = round_ptr_up(good_buf, writebdy, 0);
2936         memset(good_buf, '\0', maxfilelen);
2937         temp_buf = (char *) malloc(maxoplen + readbdy);
2938         temp_buf = round_ptr_up(temp_buf, readbdy, 0);
2939         memset(temp_buf, '\0', maxoplen);
2940         if (lite) {     /* zero entire existing file */
2941                 ssize_t written;
2942
2943                 written = write(fd, good_buf, (size_t)maxfilelen);
2944                 if (written != maxfilelen) {
2945                         if (written == -1) {
2946                                 prterr(fname);
2947                                 warn("main: error on write");
2948                         } else
2949                                 warn("main: short write, 0x%x bytes instead "
2950                                         "of 0x%lx\n",
2951                                         (unsigned)written,
2952                                         maxfilelen);
2953                         exit(98);
2954                 }
2955         } else {
2956                 ssize_t ret, len = file_size;
2957                 off_t off = 0;
2958
2959                 while (len > 0) {
2960                         ret = read(fd, good_buf + off, len);
2961                         if (ret == -1) {
2962                                 prterr(fname);
2963                                 warn("main: error on read");
2964                                 exit(98);
2965                         }
2966                         len -= ret;
2967                         off += ret;
2968                 }
2969
2970                 check_trunc_hack();
2971         }
2972
2973         if (fallocate_calls)
2974                 fallocate_calls = test_fallocate(0);
2975         if (keep_size_calls)
2976                 keep_size_calls = test_fallocate(FALLOC_FL_KEEP_SIZE);
2977         if (punch_hole_calls)
2978                 punch_hole_calls = test_fallocate(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
2979         if (zero_range_calls)
2980                 zero_range_calls = test_fallocate(FALLOC_FL_ZERO_RANGE);
2981         if (collapse_range_calls)
2982                 collapse_range_calls = test_fallocate(FALLOC_FL_COLLAPSE_RANGE);
2983         if (insert_range_calls)
2984                 insert_range_calls = test_fallocate(FALLOC_FL_INSERT_RANGE);
2985         if (clone_range_calls)
2986                 clone_range_calls = test_clone_range();
2987         if (dedupe_range_calls)
2988                 dedupe_range_calls = test_dedupe_range();
2989         if (copy_range_calls)
2990                 copy_range_calls = test_copy_range();
2991
2992         while (numops == -1 || numops--)
2993                 if (!test())
2994                         break;
2995
2996         free(tmp);
2997         if (close(fd)) {
2998                 prterr("close");
2999                 report_failure(99);
3000         }
3001         prt("All %lld operations completed A-OK!\n", testcalls);
3002         if (recordops)
3003                 logdump();
3004
3005         exit(0);
3006         return 0;
3007 }