fsx: add FIDEDUPERANGE support
[xfstests-dev.git] / ltp / fsx.c
1 /*
2  *      Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
3  *
4  *      File:   fsx.c
5  *      Author: Avadis Tevanian, Jr.
6  *
7  *      File system exerciser. 
8  *
9  *      Rewritten 8/98 by Conrad Minshall.
10  *
11  *      Small changes to work under Linux -- davej.
12  *
13  *      Checks for mmap last-page zero fill.
14  */
15
16 #include "global.h"
17
18 #include <limits.h>
19 #include <time.h>
20 #include <strings.h>
21 #include <sys/file.h>
22 #include <sys/mman.h>
23 #include <stdbool.h>
24 #ifdef HAVE_ERR_H
25 #include <err.h>
26 #endif
27 #include <signal.h>
28 #include <stdio.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <stdarg.h>
33 #include <errno.h>
34 #ifdef AIO
35 #include <libaio.h>
36 #endif
37
38 #ifndef MAP_FILE
39 # define MAP_FILE 0
40 #endif
41
42 #define NUMPRINTCOLUMNS 32      /* # columns of data to print on each line */
43
44 /* Operation flags */
45
46 enum opflags { FL_NONE = 0, FL_SKIPPED = 1, FL_CLOSE_OPEN = 2, FL_KEEP_SIZE = 4 };
47
48 /*
49  *      A log entry is an operation and a bunch of arguments.
50  */
51
52 struct log_entry {
53         int     operation;
54         int     nr_args;
55         int     args[4];
56         enum opflags flags;
57 };
58
59 #define LOGSIZE 10000
60
61 struct log_entry        oplog[LOGSIZE]; /* the log */
62 int                     logptr = 0;     /* current position in log */
63 int                     logcount = 0;   /* total ops */
64
65 /*
66  * The operation matrix is complex due to conditional execution of different
67  * features. Hence when we come to deciding what operation to run, we need to
68  * be careful in how we select the different operations. The active operations
69  * are mapped to numbers as follows:
70  *
71  *                      lite    !lite   integrity
72  * READ:                0       0       0
73  * WRITE:               1       1       1
74  * MAPREAD:             2       2       2
75  * MAPWRITE:            3       3       3
76  * TRUNCATE:            -       4       4
77  * FALLOCATE:           -       5       5
78  * PUNCH HOLE:          -       6       6
79  * ZERO RANGE:          -       7       7
80  * COLLAPSE RANGE:      -       8       8
81  * FSYNC:               -       -       9
82  *
83  * When mapped read/writes are disabled, they are simply converted to normal
84  * reads and writes. When fallocate/fpunch calls are disabled, they are
85  * skipped.
86  *
87  * Because of the "lite" version, we also need to have different "maximum
88  * operation" defines to allow the ops to be selected correctly based on the
89  * mode being run.
90  */
91
92 enum {
93         /* common operations */
94         OP_READ = 0,
95         OP_WRITE,
96         OP_MAPREAD,
97         OP_MAPWRITE,
98         OP_MAX_LITE,
99
100         /* !lite operations */
101         OP_TRUNCATE = OP_MAX_LITE,
102         OP_FALLOCATE,
103         OP_PUNCH_HOLE,
104         OP_ZERO_RANGE,
105         OP_COLLAPSE_RANGE,
106         OP_INSERT_RANGE,
107         OP_CLONE_RANGE,
108         OP_DEDUPE_RANGE,
109         OP_MAX_FULL,
110
111         /* integrity operations */
112         OP_FSYNC = OP_MAX_FULL,
113         OP_MAX_INTEGRITY,
114 };
115
116 #undef PAGE_SIZE
117 #define PAGE_SIZE       getpagesize()
118 #undef PAGE_MASK
119 #define PAGE_MASK       (PAGE_SIZE - 1)
120
121 char    *original_buf;                  /* a pointer to the original data */
122 char    *good_buf;                      /* a pointer to the correct data */
123 char    *temp_buf;                      /* a pointer to the current data */
124 char    *fname;                         /* name of our test file */
125 char    *bname;                         /* basename of our test file */
126 char    *logdev;                        /* -i flag */
127 char    *logid;                         /* -j flag */
128 char    dname[1024];                    /* -P flag */
129 char    goodfile[PATH_MAX];
130 int     dirpath = 0;                    /* -P flag */
131 int     fd;                             /* fd for our test file */
132
133 blksize_t       block_size = 0;
134 off_t           file_size = 0;
135 off_t           biggest = 0;
136 unsigned long   testcalls = 0;          /* calls to function "test" */
137
138 unsigned long   simulatedopcount = 0;   /* -b flag */
139 int     closeprob = 0;                  /* -c flag */
140 int     debug = 0;                      /* -d flag */
141 unsigned long   debugstart = 0;         /* -D flag */
142 char    filldata = 0;                   /* -g flag */
143 int     flush = 0;                      /* -f flag */
144 int     do_fsync = 0;                   /* -y flag */
145 unsigned long   maxfilelen = 256 * 1024;        /* -l flag */
146 int     sizechecks = 1;                 /* -n flag disables them */
147 int     maxoplen = 64 * 1024;           /* -o flag */
148 int     quiet = 0;                      /* -q flag */
149 unsigned long progressinterval = 0;     /* -p flag */
150 int     readbdy = 1;                    /* -r flag */
151 int     style = 0;                      /* -s flag */
152 int     prealloc = 0;                   /* -x flag */
153 int     truncbdy = 1;                   /* -t flag */
154 int     writebdy = 1;                   /* -w flag */
155 long    monitorstart = -1;              /* -m flag */
156 long    monitorend = -1;                /* -m flag */
157 int     lite = 0;                       /* -L flag */
158 long    numops = -1;                    /* -N flag */
159 int     randomoplen = 1;                /* -O flag disables it */
160 int     seed = 1;                       /* -S flag */
161 int     mapped_writes = 1;              /* -W flag disables */
162 int     fallocate_calls = 1;            /* -F flag disables */
163 int     keep_size_calls = 1;            /* -K flag disables */
164 int     punch_hole_calls = 1;           /* -H flag disables */
165 int     zero_range_calls = 1;           /* -z flag disables */
166 int     collapse_range_calls = 1;       /* -C flag disables */
167 int     insert_range_calls = 1;         /* -I flag disables */
168 int     mapped_reads = 1;               /* -R flag disables it */
169 int     check_file = 0;                 /* -X flag enables */
170 int     clone_range_calls = 1;          /* -J flag disables */
171 int     dedupe_range_calls = 1;         /* -B flag disables */
172 int     integrity = 0;                  /* -i flag */
173 int     fsxgoodfd = 0;
174 int     o_direct;                       /* -Z */
175 int     aio = 0;
176 int     mark_nr = 0;
177
178 int page_size;
179 int page_mask;
180 int mmap_mask;
181 #ifdef AIO
182 int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
183 #define READ 0
184 #define WRITE 1
185 #define fsxread(a,b,c,d)        aio_rw(READ, a,b,c,d)
186 #define fsxwrite(a,b,c,d)       aio_rw(WRITE, a,b,c,d)
187 #else
188 #define fsxread(a,b,c,d)        read(a,b,c)
189 #define fsxwrite(a,b,c,d)       write(a,b,c)
190 #endif
191
192 const char *replayops = NULL;
193 const char *recordops = NULL;
194 FILE *  fsxlogf = NULL;
195 FILE *  replayopsf = NULL;
196 char opsfile[PATH_MAX];
197 int badoff = -1;
198 int closeopen = 0;
199
200 static void *round_ptr_up(void *ptr, unsigned long align, unsigned long offset)
201 {
202         unsigned long ret = (unsigned long)ptr;
203
204         ret = ((ret + align - 1) & ~(align - 1));
205         ret += offset;
206         return (void *)ret;
207 }
208
209 void
210 vwarnc(int code, const char *fmt, va_list ap)
211 {
212         if (logid)
213                 fprintf(stderr, "%s: ", logid);
214         fprintf(stderr, "fsx: ");
215         if (fmt != NULL) {
216                 vfprintf(stderr, fmt, ap);
217                 fprintf(stderr, ": ");
218         }
219         fprintf(stderr, "%s\n", strerror(code));
220 }
221
222 void
223 warn(const char * fmt, ...)  {
224         va_list ap;
225         va_start(ap, fmt);
226         vwarnc(errno, fmt, ap);
227         va_end(ap);
228 }
229
230 void
231 prt(const char *fmt, ...)
232 {
233         va_list args;
234
235         if (logid)
236                 fprintf(stdout, "%s: ", logid);
237         va_start(args, fmt);
238         vfprintf(stdout, fmt, args);
239         va_end(args);
240         if (fsxlogf) {
241                 va_start(args, fmt);
242                 vfprintf(fsxlogf, fmt, args);
243                 va_end(args);
244         }
245 }
246
247 void
248 prterr(const char *prefix)
249 {
250         prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno));
251 }
252
253
254 static const char *op_names[] = {
255         [OP_READ] = "read",
256         [OP_WRITE] = "write",
257         [OP_MAPREAD] = "mapread",
258         [OP_MAPWRITE] = "mapwrite",
259         [OP_TRUNCATE] = "truncate",
260         [OP_FALLOCATE] = "fallocate",
261         [OP_PUNCH_HOLE] = "punch_hole",
262         [OP_ZERO_RANGE] = "zero_range",
263         [OP_COLLAPSE_RANGE] = "collapse_range",
264         [OP_INSERT_RANGE] = "insert_range",
265         [OP_CLONE_RANGE] = "clone_range",
266         [OP_DEDUPE_RANGE] = "dedupe_range",
267         [OP_FSYNC] = "fsync",
268 };
269
270 static const char *op_name(int operation)
271 {
272         if (operation >= 0 &&
273             operation < sizeof(op_names) / sizeof(op_names[0]))
274                 return op_names[operation];
275         return NULL;
276 }
277
278 static int op_code(const char *name)
279 {
280         int i;
281
282         for (i = 0; i < sizeof(op_names) / sizeof(op_names[0]); i++)
283                 if (op_names[i] && strcmp(name, op_names[i]) == 0)
284                         return i;
285         return -1;
286 }
287
288 void
289 log5(int operation, int arg0, int arg1, int arg2, enum opflags flags)
290 {
291         struct log_entry *le;
292
293         le = &oplog[logptr];
294         le->operation = operation;
295         if (closeopen)
296                 flags |= FL_CLOSE_OPEN;
297         le->args[0] = arg0;
298         le->args[1] = arg1;
299         le->args[2] = arg2;
300         le->args[3] = file_size;
301         le->nr_args = 4;
302         le->flags = flags;
303         logptr++;
304         logcount++;
305         if (logptr >= LOGSIZE)
306                 logptr = 0;
307 }
308
309 void
310 log4(int operation, int arg0, int arg1, enum opflags flags)
311 {
312         struct log_entry *le;
313
314         le = &oplog[logptr];
315         le->operation = operation;
316         if (closeopen)
317                 flags |= FL_CLOSE_OPEN;
318         le->args[0] = arg0;
319         le->args[1] = arg1;
320         le->args[2] = file_size;
321         le->nr_args = 3;
322         le->flags = flags;
323         logptr++;
324         logcount++;
325         if (logptr >= LOGSIZE)
326                 logptr = 0;
327 }
328
329 void
330 logdump(void)
331 {
332         FILE    *logopsf;
333         int     i, count, down;
334         struct log_entry        *lp;
335
336         prt("LOG DUMP (%d total operations):\n", logcount);
337
338         logopsf = fopen(opsfile, "w");
339         if (!logopsf)
340                 prterr(opsfile);
341
342         if (logcount < LOGSIZE) {
343                 i = 0;
344                 count = logcount;
345         } else {
346                 i = logptr;
347                 count = LOGSIZE;
348         }
349         for ( ; count > 0; count--) {
350                 bool overlap, overlap2;
351                 int opnum;
352
353                 opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE;
354                 prt("%d(%3d mod 256): ", opnum, opnum%256);
355                 lp = &oplog[i];
356
357                 overlap = badoff >= lp->args[0] &&
358                           badoff < lp->args[0] + lp->args[1];
359
360                 if (lp->flags & FL_SKIPPED) {
361                         prt("SKIPPED (no operation)");
362                         goto skipped;
363                 }
364
365                 switch (lp->operation) {
366                 case OP_MAPREAD:
367                         prt("MAPREAD  0x%x thru 0x%x\t(0x%x bytes)",
368                             lp->args[0], lp->args[0] + lp->args[1] - 1,
369                             lp->args[1]);
370                         if (overlap)
371                                 prt("\t***RRRR***");
372                         break;
373                 case OP_MAPWRITE:
374                         prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)",
375                             lp->args[0], lp->args[0] + lp->args[1] - 1,
376                             lp->args[1]);
377                         if (overlap)
378                                 prt("\t******WWWW");
379                         break;
380                 case OP_READ:
381                         prt("READ     0x%x thru 0x%x\t(0x%x bytes)",
382                             lp->args[0], lp->args[0] + lp->args[1] - 1,
383                             lp->args[1]);
384                         if (overlap)
385                                 prt("\t***RRRR***");
386                         break;
387                 case OP_WRITE:
388                         prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
389                             lp->args[0], lp->args[0] + lp->args[1] - 1,
390                             lp->args[1]);
391                         if (lp->args[0] > lp->args[2])
392                                 prt(" HOLE");
393                         else if (lp->args[0] + lp->args[1] > lp->args[2])
394                                 prt(" EXTEND");
395                         overlap = (badoff >= lp->args[0] ||
396                                    badoff >=lp->args[2]) &&
397                                   badoff < lp->args[0] + lp->args[1];
398                         if (overlap)
399                                 prt("\t***WWWW");
400                         break;
401                 case OP_TRUNCATE:
402                         down = lp->args[1] < lp->args[2];
403                         prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
404                             down ? "DOWN" : "UP", lp->args[2], lp->args[1]);
405                         overlap = badoff >= lp->args[1 + !down] &&
406                                   badoff < lp->args[1 + !!down];
407                         if (overlap)
408                                 prt("\t******WWWW");
409                         break;
410                 case OP_FALLOCATE:
411                         /* 0: offset 1: length 2: where alloced */
412                         prt("FALLOC   0x%x thru 0x%x\t(0x%x bytes) ",
413                                 lp->args[0], lp->args[0] + lp->args[1],
414                                 lp->args[1]);
415                         if (lp->args[0] + lp->args[1] <= lp->args[2])
416                                 prt("INTERIOR");
417                         else if (lp->flags & FL_KEEP_SIZE)
418                                 prt("PAST_EOF");
419                         else
420                                 prt("EXTENDING");
421                         if (overlap)
422                                 prt("\t******FFFF");
423                         break;
424                 case OP_PUNCH_HOLE:
425                         prt("PUNCH    0x%x thru 0x%x\t(0x%x bytes)",
426                             lp->args[0], lp->args[0] + lp->args[1] - 1,
427                             lp->args[1]);
428                         if (overlap)
429                                 prt("\t******PPPP");
430                         break;
431                 case OP_ZERO_RANGE:
432                         prt("ZERO     0x%x thru 0x%x\t(0x%x bytes)",
433                             lp->args[0], lp->args[0] + lp->args[1] - 1,
434                             lp->args[1]);
435                         if (overlap)
436                                 prt("\t******ZZZZ");
437                         break;
438                 case OP_COLLAPSE_RANGE:
439                         prt("COLLAPSE 0x%x thru 0x%x\t(0x%x bytes)",
440                             lp->args[0], lp->args[0] + lp->args[1] - 1,
441                             lp->args[1]);
442                         if (overlap)
443                                 prt("\t******CCCC");
444                         break;
445                 case OP_INSERT_RANGE:
446                         prt("INSERT 0x%x thru 0x%x\t(0x%x bytes)",
447                             lp->args[0], lp->args[0] + lp->args[1] - 1,
448                             lp->args[1]);
449                         if (overlap)
450                                 prt("\t******IIII");
451                         break;
452                 case OP_CLONE_RANGE:
453                         prt("CLONE 0x%x thru 0x%x\t(0x%x bytes) to 0x%x thru 0x%x",
454                             lp->args[0], lp->args[0] + lp->args[1] - 1,
455                             lp->args[1],
456                             lp->args[2], lp->args[2] + lp->args[1] - 1);
457                         overlap2 = badoff >= lp->args[2] &&
458                                   badoff < lp->args[2] + lp->args[1];
459                         if (overlap && overlap2)
460                                 prt("\tJJJJ**JJJJ");
461                         else if (overlap)
462                                 prt("\tJJJJ******");
463                         else if (overlap2)
464                                 prt("\t******JJJJ");
465                         break;
466                 case OP_DEDUPE_RANGE:
467                         prt("DEDUPE 0x%x thru 0x%x\t(0x%x bytes) to 0x%x thru 0x%x",
468                             lp->args[0], lp->args[0] + lp->args[1] - 1,
469                             lp->args[1],
470                             lp->args[2], lp->args[2] + lp->args[1] - 1);
471                         overlap2 = badoff >= lp->args[2] &&
472                                   badoff < lp->args[2] + lp->args[1];
473                         if (overlap && overlap2)
474                                 prt("\tBBBB**BBBB");
475                         else if (overlap)
476                                 prt("\tBBBB******");
477                         else if (overlap2)
478                                 prt("\t******BBBB");
479                         break;
480                 case OP_FSYNC:
481                         prt("FSYNC");
482                         break;
483                 default:
484                         prt("BOGUS LOG ENTRY (operation code = %d)!",
485                             lp->operation);
486                         continue;
487                 }
488
489             skipped:
490                 if (lp->flags & FL_CLOSE_OPEN)
491                         prt("\n\t\tCLOSE/OPEN");
492                 prt("\n");
493                 i++;
494                 if (i == LOGSIZE)
495                         i = 0;
496
497                 if (logopsf) {
498                         int j;
499
500                         if (lp->flags & FL_SKIPPED)
501                                 fprintf(logopsf, "skip ");
502                         fprintf(logopsf, "%s", op_name(lp->operation));
503                         for (j = 0; j < lp->nr_args; j++)
504                                 fprintf(logopsf, " 0x%x", lp->args[j]);
505                         if (lp->flags & FL_KEEP_SIZE)
506                                 fprintf(logopsf, " keep_size");
507                         if (lp->flags & FL_CLOSE_OPEN)
508                                 fprintf(logopsf, " close_open");
509                         if (overlap)
510                                 fprintf(logopsf, " *");
511                         fprintf(logopsf, "\n");
512                 }
513         }
514
515         if (logopsf) {
516                 if (fclose(logopsf) != 0)
517                         prterr(opsfile);
518                 else
519                         prt("Log of operations saved to \"%s\"; "
520                             "replay with --replay-ops\n",
521                             opsfile);
522         }
523 }
524
525
526 void
527 save_buffer(char *buffer, off_t bufferlength, int fd)
528 {
529         off_t ret;
530         ssize_t byteswritten;
531
532         if (fd <= 0 || bufferlength == 0)
533                 return;
534
535         if (bufferlength > SSIZE_MAX) {
536                 prt("fsx flaw: overflow in save_buffer\n");
537                 exit(67);
538         }
539         if (lite) {
540                 off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END);
541                 if (size_by_seek == (off_t)-1)
542                         prterr("save_buffer: lseek eof");
543                 else if (bufferlength > size_by_seek) {
544                         warn("save_buffer: .fsxgood file too short... will save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
545                              (unsigned long long)bufferlength);
546                         bufferlength = size_by_seek;
547                 }
548         }
549
550         ret = lseek(fd, (off_t)0, SEEK_SET);
551         if (ret == (off_t)-1)
552                 prterr("save_buffer: lseek 0");
553         
554         byteswritten = write(fd, buffer, (size_t)bufferlength);
555         if (byteswritten != bufferlength) {
556                 if (byteswritten == -1)
557                         prterr("save_buffer write");
558                 else
559                         warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
560                              (unsigned)byteswritten,
561                              (unsigned long long)bufferlength);
562         }
563 }
564
565
566 void
567 report_failure(int status)
568 {
569         logdump();
570         
571         if (fsxgoodfd) {
572                 if (good_buf) {
573                         save_buffer(good_buf, file_size, fsxgoodfd);
574                         prt("Correct content saved for comparison\n");
575                         prt("(maybe hexdump \"%s\" vs \"%s\")\n",
576                             fname, goodfile);
577                 }
578                 close(fsxgoodfd);
579         }
580         exit(status);
581 }
582
583
584 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
585                                         *(((unsigned char *)(cp)) + 1)))
586
587 void
588 mark_log(void)
589 {
590         char command[256];
591         int ret;
592
593         snprintf(command, 256, "dmsetup message %s 0 mark %s.mark%d", logdev,
594                  bname, mark_nr);
595         ret = system(command);
596         if (ret) {
597                 prterr("dmsetup mark failed");
598                 exit(211);
599         }
600 }
601
602 void
603 dump_fsync_buffer(void)
604 {
605         char fname_buffer[PATH_MAX];
606         int good_fd;
607
608         if (!good_buf)
609                 return;
610
611         snprintf(fname_buffer, sizeof(fname_buffer), "%s%s.mark%d", dname,
612                  bname, mark_nr);
613         good_fd = open(fname_buffer, O_WRONLY|O_CREAT|O_TRUNC, 0666);
614         if (good_fd < 0) {
615                 prterr(fname_buffer);
616                 exit(212);
617         }
618
619         save_buffer(good_buf, file_size, good_fd);
620         close(good_fd);
621         prt("Dumped fsync buffer to %s\n", fname_buffer + dirpath);
622 }
623
624 void
625 check_buffers(char *buf, unsigned offset, unsigned size)
626 {
627         unsigned char c, t;
628         unsigned i = 0;
629         unsigned n = 0;
630         unsigned op = 0;
631         unsigned bad = 0;
632
633         if (memcmp(good_buf + offset, buf, size) != 0) {
634                 prt("READ BAD DATA: offset = 0x%x, size = 0x%x, fname = %s\n",
635                     offset, size, fname);
636                 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
637                 while (size > 0) {
638                         c = good_buf[offset];
639                         t = buf[i];
640                         if (c != t) {
641                                 if (n < 16) {
642                                         bad = short_at(&buf[i]);
643                                         prt("0x%05x\t0x%04x\t0x%04x", offset,
644                                             short_at(&good_buf[offset]), bad);
645                                         op = buf[offset & 1 ? i+1 : i];
646                                         prt("\t0x%05x\n", n);
647                                         if (op)
648                                                 prt("operation# (mod 256) for "
649                                                   "the bad data may be %u\n",
650                                                 ((unsigned)op & 0xff));
651                                         else
652                                                 prt("operation# (mod 256) for "
653                                                   "the bad data unknown, check"
654                                                   " HOLE and EXTEND ops\n");
655                                 }
656                                 n++;
657                                 badoff = offset;
658                         }
659                         offset++;
660                         i++;
661                         size--;
662                 }
663                 report_failure(110);
664         }
665 }
666
667
668 void
669 check_size(void)
670 {
671         struct stat     statbuf;
672         off_t   size_by_seek;
673
674         if (fstat(fd, &statbuf)) {
675                 prterr("check_size: fstat");
676                 statbuf.st_size = -1;
677         }
678         size_by_seek = lseek(fd, (off_t)0, SEEK_END);
679         if (file_size != statbuf.st_size || file_size != size_by_seek) {
680                 prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n",
681                     (unsigned long long)file_size,
682                     (unsigned long long)statbuf.st_size,
683                     (unsigned long long)size_by_seek);
684                 report_failure(120);
685         }
686 }
687
688
689 void
690 check_trunc_hack(void)
691 {
692         struct stat statbuf;
693         off_t offset = file_size + (off_t)100000;
694
695         if (ftruncate(fd, file_size))
696                 goto ftruncate_err;
697         if (ftruncate(fd, offset))
698                 goto ftruncate_err;
699         fstat(fd, &statbuf);
700         if (statbuf.st_size != offset) {
701                 prt("no extend on truncate! not posix!\n");
702                 exit(130);
703         }
704         if (ftruncate(fd, file_size)) {
705 ftruncate_err:
706                 prterr("check_trunc_hack: ftruncate");
707                 exit(131);
708         }
709 }
710
711 void
712 doflush(unsigned offset, unsigned size)
713 {
714         unsigned pg_offset;
715         unsigned map_size;
716         char    *p;
717
718         if (o_direct == O_DIRECT)
719                 return;
720
721         pg_offset = offset & mmap_mask;
722         map_size  = pg_offset + size;
723
724         if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
725                               MAP_FILE | MAP_SHARED, fd,
726                               (off_t)(offset - pg_offset))) == (char *)-1) {
727                 prterr("doflush: mmap");
728                 report_failure(202);
729         }
730         if (msync(p, map_size, MS_INVALIDATE) != 0) {
731                 prterr("doflush: msync");
732                 report_failure(203);
733         }
734         if (munmap(p, map_size) != 0) {
735                 prterr("doflush: munmap");
736                 report_failure(204);
737         }
738 }
739
740 void
741 doread(unsigned offset, unsigned size)
742 {
743         off_t ret;
744         unsigned iret;
745
746         offset -= offset % readbdy;
747         if (o_direct)
748                 size -= size % readbdy;
749         if (size == 0) {
750                 if (!quiet && testcalls > simulatedopcount && !o_direct)
751                         prt("skipping zero size read\n");
752                 log4(OP_READ, offset, size, FL_SKIPPED);
753                 return;
754         }
755         if (size + offset > file_size) {
756                 if (!quiet && testcalls > simulatedopcount)
757                         prt("skipping seek/read past end of file\n");
758                 log4(OP_READ, offset, size, FL_SKIPPED);
759                 return;
760         }
761
762         log4(OP_READ, offset, size, FL_NONE);
763
764         if (testcalls <= simulatedopcount)
765                 return;
766
767         if (!quiet &&
768                 ((progressinterval && testcalls % progressinterval == 0)  ||
769                 (debug &&
770                        (monitorstart == -1 ||
771                         (offset + size > monitorstart &&
772                         (monitorend == -1 || offset <= monitorend))))))
773                 prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
774                     offset, offset + size - 1, size);
775         ret = lseek(fd, (off_t)offset, SEEK_SET);
776         if (ret == (off_t)-1) {
777                 prterr("doread: lseek");
778                 report_failure(140);
779         }
780         iret = fsxread(fd, temp_buf, size, offset);
781         if (iret != size) {
782                 if (iret == -1)
783                         prterr("doread: read");
784                 else
785                         prt("short read: 0x%x bytes instead of 0x%x\n",
786                             iret, size);
787                 report_failure(141);
788         }
789         check_buffers(temp_buf, offset, size);
790 }
791
792 void
793 check_eofpage(char *s, unsigned offset, char *p, int size)
794 {
795         unsigned long last_page, should_be_zero;
796
797         if (offset + size <= (file_size & ~page_mask))
798                 return;
799         /*
800          * we landed in the last page of the file
801          * test to make sure the VM system provided 0's 
802          * beyond the true end of the file mapping
803          * (as required by mmap def in 1996 posix 1003.1)
804          */
805         last_page = ((unsigned long)p + (offset & page_mask) + size) & ~page_mask;
806
807         for (should_be_zero = last_page + (file_size & page_mask);
808              should_be_zero < last_page + page_size;
809              should_be_zero++)
810                 if (*(char *)should_be_zero) {
811                         prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
812                             s, file_size - 1, should_be_zero & page_mask,
813                             short_at(should_be_zero));
814                         report_failure(205);
815                 }
816 }
817
818 void
819 check_contents(void)
820 {
821         static char *check_buf;
822         unsigned offset = 0;
823         unsigned size = file_size;
824         unsigned map_offset;
825         unsigned map_size;
826         char *p;
827         off_t ret;
828         unsigned iret;
829
830         if (!check_buf) {
831                 check_buf = (char *) malloc(maxfilelen + writebdy);
832                 assert(check_buf != NULL);
833                 check_buf = round_ptr_up(check_buf, writebdy, 0);
834                 memset(check_buf, '\0', maxfilelen);
835         }
836
837         if (o_direct)
838                 size -= size % readbdy;
839         if (size == 0)
840                 return;
841
842         ret = lseek(fd, (off_t)offset, SEEK_SET);
843         if (ret == (off_t)-1) {
844                 prterr("doread: lseek");
845                 report_failure(140);
846         }
847
848         iret = fsxread(fd, check_buf, size, offset);
849         if (iret != size) {
850                 if (iret == -1)
851                         prterr("check_contents: read");
852                 else
853                         prt("short check read: 0x%x bytes instead of 0x%x\n",
854                             iret, size);
855                 report_failure(141);
856         }
857         check_buffers(check_buf, offset, size);
858
859         /* Map eof page, check it */
860         map_offset = size - (size & PAGE_MASK);
861         if (map_offset == size)
862                 map_offset -= PAGE_SIZE;
863         map_size  = size - map_offset;
864
865         p = mmap(0, map_size, PROT_READ, MAP_SHARED, fd, map_offset);
866         if (p == MAP_FAILED) {
867                 prterr("check_contents: mmap");
868                 report_failure(190);
869         }
870         check_eofpage("check_contents", map_offset, p, map_size);
871
872         if (munmap(p, map_size) != 0) {
873                 prterr("check_contents: munmap");
874                 report_failure(191);
875         }
876 }
877
878 void
879 domapread(unsigned offset, unsigned size)
880 {
881         unsigned pg_offset;
882         unsigned map_size;
883         char    *p;
884
885         offset -= offset % readbdy;
886         if (size == 0) {
887                 if (!quiet && testcalls > simulatedopcount)
888                         prt("skipping zero size read\n");
889                 log4(OP_MAPREAD, offset, size, FL_SKIPPED);
890                 return;
891         }
892         if (size + offset > file_size) {
893                 if (!quiet && testcalls > simulatedopcount)
894                         prt("skipping seek/read past end of file\n");
895                 log4(OP_MAPREAD, offset, size, FL_SKIPPED);
896                 return;
897         }
898
899         log4(OP_MAPREAD, offset, size, FL_NONE);
900
901         if (testcalls <= simulatedopcount)
902                 return;
903
904         if (!quiet &&
905                 ((progressinterval && testcalls % progressinterval == 0) ||
906                        (debug &&
907                        (monitorstart == -1 ||
908                         (offset + size > monitorstart &&
909                         (monitorend == -1 || offset <= monitorend))))))
910                 prt("%lu mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
911                     offset, offset + size - 1, size);
912
913         pg_offset = offset & PAGE_MASK;
914         map_size  = pg_offset + size;
915
916         if ((p = (char *)mmap(0, map_size, PROT_READ, MAP_SHARED, fd,
917                               (off_t)(offset - pg_offset))) == (char *)-1) {
918                 prterr("domapread: mmap");
919                 report_failure(190);
920         }
921         memcpy(temp_buf, p + pg_offset, size);
922
923         check_eofpage("Read", offset, p, size);
924
925         if (munmap(p, map_size) != 0) {
926                 prterr("domapread: munmap");
927                 report_failure(191);
928         }
929
930         check_buffers(temp_buf, offset, size);
931 }
932
933
934 void
935 gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
936 {
937         while (size--) {
938                 if (filldata) {
939                         good_buf[offset] = filldata;
940                 } else {
941                         good_buf[offset] = testcalls % 256;
942                         if (offset % 2)
943                                 good_buf[offset] += original_buf[offset];
944                 }
945                 offset++;
946         }
947 }
948
949
950 void
951 dowrite(unsigned offset, unsigned size)
952 {
953         off_t ret;
954         unsigned iret;
955
956         offset -= offset % writebdy;
957         if (o_direct)
958                 size -= size % writebdy;
959         if (size == 0) {
960                 if (!quiet && testcalls > simulatedopcount && !o_direct)
961                         prt("skipping zero size write\n");
962                 log4(OP_WRITE, offset, size, FL_SKIPPED);
963                 return;
964         }
965
966         log4(OP_WRITE, offset, size, FL_NONE);
967
968         gendata(original_buf, good_buf, offset, size);
969         if (file_size < offset + size) {
970                 if (file_size < offset)
971                         memset(good_buf + file_size, '\0', offset - file_size);
972                 file_size = offset + size;
973                 if (lite) {
974                         warn("Lite file size bug in fsx!");
975                         report_failure(149);
976                 }
977         }
978
979         if (testcalls <= simulatedopcount)
980                 return;
981
982         if (!quiet &&
983                 ((progressinterval && testcalls % progressinterval == 0) ||
984                        (debug &&
985                        (monitorstart == -1 ||
986                         (offset + size > monitorstart &&
987                         (monitorend == -1 || offset <= monitorend))))))
988                 prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
989                     offset, offset + size - 1, size);
990         ret = lseek(fd, (off_t)offset, SEEK_SET);
991         if (ret == (off_t)-1) {
992                 prterr("dowrite: lseek");
993                 report_failure(150);
994         }
995         iret = fsxwrite(fd, good_buf + offset, size, offset);
996         if (iret != size) {
997                 if (iret == -1)
998                         prterr("dowrite: write");
999                 else
1000                         prt("short write: 0x%x bytes instead of 0x%x\n",
1001                             iret, size);
1002                 report_failure(151);
1003         }
1004         if (do_fsync) {
1005                 if (fsync(fd)) {
1006                         prt("fsync() failed: %s\n", strerror(errno));
1007                         report_failure(152);
1008                 }
1009         }
1010         if (flush) {
1011                 doflush(offset, size);
1012         }
1013 }
1014
1015
1016 void
1017 domapwrite(unsigned offset, unsigned size)
1018 {
1019         unsigned pg_offset;
1020         unsigned map_size;
1021         off_t    cur_filesize;
1022         char    *p;
1023
1024         offset -= offset % writebdy;
1025         if (size == 0) {
1026                 if (!quiet && testcalls > simulatedopcount)
1027                         prt("skipping zero size write\n");
1028                 log4(OP_MAPWRITE, offset, size, FL_SKIPPED);
1029                 return;
1030         }
1031         cur_filesize = file_size;
1032
1033         log4(OP_MAPWRITE, offset, size, FL_NONE);
1034
1035         gendata(original_buf, good_buf, offset, size);
1036         if (file_size < offset + size) {
1037                 if (file_size < offset)
1038                         memset(good_buf + file_size, '\0', offset - file_size);
1039                 file_size = offset + size;
1040                 if (lite) {
1041                         warn("Lite file size bug in fsx!");
1042                         report_failure(200);
1043                 }
1044         }
1045
1046         if (testcalls <= simulatedopcount)
1047                 return;
1048
1049         if (!quiet &&
1050                 ((progressinterval && testcalls % progressinterval == 0) ||
1051                        (debug &&
1052                        (monitorstart == -1 ||
1053                         (offset + size > monitorstart &&
1054                         (monitorend == -1 || offset <= monitorend))))))
1055                 prt("%lu mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
1056                     offset, offset + size - 1, size);
1057
1058         if (file_size > cur_filesize) {
1059                 if (ftruncate(fd, file_size) == -1) {
1060                         prterr("domapwrite: ftruncate");
1061                         exit(201);
1062                 }
1063         }
1064         pg_offset = offset & PAGE_MASK;
1065         map_size  = pg_offset + size;
1066
1067         if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
1068                               MAP_FILE | MAP_SHARED, fd,
1069                               (off_t)(offset - pg_offset))) == (char *)-1) {
1070                 prterr("domapwrite: mmap");
1071                 report_failure(202);
1072         }
1073         memcpy(p + pg_offset, good_buf + offset, size);
1074         if (msync(p, map_size, MS_SYNC) != 0) {
1075                 prterr("domapwrite: msync");
1076                 report_failure(203);
1077         }
1078
1079         check_eofpage("Write", offset, p, size);
1080
1081         if (munmap(p, map_size) != 0) {
1082                 prterr("domapwrite: munmap");
1083                 report_failure(204);
1084         }
1085 }
1086
1087
1088 void
1089 dotruncate(unsigned size)
1090 {
1091         int oldsize = file_size;
1092
1093         size -= size % truncbdy;
1094         if (size > biggest) {
1095                 biggest = size;
1096                 if (!quiet && testcalls > simulatedopcount)
1097                         prt("truncating to largest ever: 0x%x\n", size);
1098         }
1099
1100         log4(OP_TRUNCATE, 0, size, FL_NONE);
1101
1102         if (size > file_size)
1103                 memset(good_buf + file_size, '\0', size - file_size);
1104         file_size = size;
1105
1106         if (testcalls <= simulatedopcount)
1107                 return;
1108         
1109         if ((progressinterval && testcalls % progressinterval == 0) ||
1110             (debug && (monitorstart == -1 || monitorend == -1 ||
1111                       size <= monitorend)))
1112                 prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
1113         if (ftruncate(fd, (off_t)size) == -1) {
1114                 prt("ftruncate1: %x\n", size);
1115                 prterr("dotruncate: ftruncate");
1116                 report_failure(160);
1117         }
1118 }
1119
1120 #ifdef FALLOC_FL_PUNCH_HOLE
1121 void
1122 do_punch_hole(unsigned offset, unsigned length)
1123 {
1124         unsigned end_offset;
1125         int max_offset = 0;
1126         int max_len = 0;
1127         int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
1128
1129         if (length == 0) {
1130                 if (!quiet && testcalls > simulatedopcount)
1131                         prt("skipping zero length punch hole\n");
1132                 log4(OP_PUNCH_HOLE, offset, length, FL_SKIPPED);
1133                 return;
1134         }
1135
1136         if (file_size <= (loff_t)offset) {
1137                 if (!quiet && testcalls > simulatedopcount)
1138                         prt("skipping hole punch off the end of the file\n");
1139                 log4(OP_PUNCH_HOLE, offset, length, FL_SKIPPED);
1140                 return;
1141         }
1142
1143         end_offset = offset + length;
1144
1145         log4(OP_PUNCH_HOLE, offset, length, FL_NONE);
1146
1147         if (testcalls <= simulatedopcount)
1148                 return;
1149
1150         if ((progressinterval && testcalls % progressinterval == 0) ||
1151             (debug && (monitorstart == -1 || monitorend == -1 ||
1152                       end_offset <= monitorend))) {
1153                 prt("%lu punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
1154                         offset, offset+length, length);
1155         }
1156         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1157                 prt("punch hole: 0x%x to 0x%x\n", offset, offset + length);
1158                 prterr("do_punch_hole: fallocate");
1159                 report_failure(161);
1160         }
1161
1162
1163         max_offset = offset < file_size ? offset : file_size;
1164         max_len = max_offset + length <= file_size ? length :
1165                         file_size - max_offset;
1166         memset(good_buf + max_offset, '\0', max_len);
1167 }
1168
1169 #else
1170 void
1171 do_punch_hole(unsigned offset, unsigned length)
1172 {
1173         return;
1174 }
1175 #endif
1176
1177 #ifdef FALLOC_FL_ZERO_RANGE
1178 void
1179 do_zero_range(unsigned offset, unsigned length, int keep_size)
1180 {
1181         unsigned end_offset;
1182         int mode = FALLOC_FL_ZERO_RANGE;
1183
1184         if (length == 0) {
1185                 if (!quiet && testcalls > simulatedopcount)
1186                         prt("skipping zero length zero range\n");
1187                 log4(OP_ZERO_RANGE, offset, length, FL_SKIPPED |
1188                      (keep_size ? FL_KEEP_SIZE : FL_NONE));
1189                 return;
1190         }
1191
1192         end_offset = keep_size ? 0 : offset + length;
1193
1194         if (end_offset > biggest) {
1195                 biggest = end_offset;
1196                 if (!quiet && testcalls > simulatedopcount)
1197                         prt("zero_range to largest ever: 0x%x\n", end_offset);
1198         }
1199
1200         /*
1201          * last arg matches fallocate string array index in logdump:
1202          *      0: allocate past EOF
1203          *      1: extending prealloc
1204          *      2: interior prealloc
1205          */
1206         log4(OP_ZERO_RANGE, offset, length,
1207              keep_size ? FL_KEEP_SIZE : FL_NONE);
1208
1209         if (testcalls <= simulatedopcount)
1210                 return;
1211
1212         if ((progressinterval && testcalls % progressinterval == 0) ||
1213             (debug && (monitorstart == -1 || monitorend == -1 ||
1214                       end_offset <= monitorend))) {
1215                 prt("%lu zero\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
1216                         offset, offset+length, length);
1217         }
1218         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1219                 prt("zero range: 0x%x to 0x%x\n", offset, offset + length);
1220                 prterr("do_zero_range: fallocate");
1221                 report_failure(161);
1222         }
1223
1224         memset(good_buf + offset, '\0', length);
1225 }
1226
1227 #else
1228 void
1229 do_zero_range(unsigned offset, unsigned length, int keep_size)
1230 {
1231         return;
1232 }
1233 #endif
1234
1235 #ifdef FALLOC_FL_COLLAPSE_RANGE
1236 void
1237 do_collapse_range(unsigned offset, unsigned length)
1238 {
1239         unsigned end_offset;
1240         int mode = FALLOC_FL_COLLAPSE_RANGE;
1241
1242         if (length == 0) {
1243                 if (!quiet && testcalls > simulatedopcount)
1244                         prt("skipping zero length collapse range\n");
1245                 log4(OP_COLLAPSE_RANGE, offset, length, FL_SKIPPED);
1246                 return;
1247         }
1248
1249         end_offset = offset + length;
1250         if ((loff_t)end_offset >= file_size) {
1251                 if (!quiet && testcalls > simulatedopcount)
1252                         prt("skipping collapse range behind EOF\n");
1253                 log4(OP_COLLAPSE_RANGE, offset, length, FL_SKIPPED);
1254                 return;
1255         }
1256
1257         log4(OP_COLLAPSE_RANGE, offset, length, FL_NONE);
1258
1259         if (testcalls <= simulatedopcount)
1260                 return;
1261
1262         if ((progressinterval && testcalls % progressinterval == 0) ||
1263             (debug && (monitorstart == -1 || monitorend == -1 ||
1264                       end_offset <= monitorend))) {
1265                 prt("%lu collapse\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
1266                         offset, offset+length, length);
1267         }
1268         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1269                 prt("collapse range: 0x%x to 0x%x\n", offset, offset + length);
1270                 prterr("do_collapse_range: fallocate");
1271                 report_failure(161);
1272         }
1273
1274         memmove(good_buf + offset, good_buf + end_offset,
1275                 file_size - end_offset);
1276         file_size -= length;
1277 }
1278
1279 #else
1280 void
1281 do_collapse_range(unsigned offset, unsigned length)
1282 {
1283         return;
1284 }
1285 #endif
1286
1287 #ifdef FALLOC_FL_INSERT_RANGE
1288 void
1289 do_insert_range(unsigned offset, unsigned length)
1290 {
1291         unsigned end_offset;
1292         int mode = FALLOC_FL_INSERT_RANGE;
1293
1294         if (length == 0) {
1295                 if (!quiet && testcalls > simulatedopcount)
1296                         prt("skipping zero length insert range\n");
1297                 log4(OP_INSERT_RANGE, offset, length, FL_SKIPPED);
1298                 return;
1299         }
1300
1301         if ((loff_t)offset >= file_size) {
1302                 if (!quiet && testcalls > simulatedopcount)
1303                         prt("skipping insert range behind EOF\n");
1304                 log4(OP_INSERT_RANGE, offset, length, FL_SKIPPED);
1305                 return;
1306         }
1307
1308         log4(OP_INSERT_RANGE, offset, length, FL_NONE);
1309
1310         if (testcalls <= simulatedopcount)
1311                 return;
1312
1313         end_offset = offset + length;
1314         if ((progressinterval && testcalls % progressinterval == 0) ||
1315             (debug && (monitorstart == -1 || monitorend == -1 ||
1316                       end_offset <= monitorend))) {
1317                 prt("%lu insert\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
1318                         offset, offset+length, length);
1319         }
1320         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1321                 prt("insert range: 0x%x to 0x%x\n", offset, offset + length);
1322                 prterr("do_insert_range: fallocate");
1323                 report_failure(161);
1324         }
1325
1326         memmove(good_buf + end_offset, good_buf + offset,
1327                 file_size - offset);
1328         memset(good_buf + offset, '\0', length);
1329         file_size += length;
1330 }
1331
1332 #else
1333 void
1334 do_insert_range(unsigned offset, unsigned length)
1335 {
1336         return;
1337 }
1338 #endif
1339
1340 #ifdef FICLONERANGE
1341 int
1342 test_clone_range(void)
1343 {
1344         struct file_clone_range fcr = {
1345                 .src_fd = fd,
1346         };
1347
1348         if (ioctl(fd, FICLONERANGE, &fcr) &&
1349             (errno = EOPNOTSUPP || errno == ENOTTY)) {
1350                 if (!quiet)
1351                         fprintf(stderr,
1352                                 "main: filesystem does not support "
1353                                 "clone range, disabling!\n");
1354                 return 0;
1355         }
1356
1357         return 1;
1358 }
1359
1360 void
1361 do_clone_range(unsigned offset, unsigned length, unsigned dest)
1362 {
1363         struct file_clone_range fcr = {
1364                 .src_fd = fd,
1365                 .src_offset = offset,
1366                 .src_length = length,
1367                 .dest_offset = dest,
1368         };
1369
1370         if (length == 0) {
1371                 if (!quiet && testcalls > simulatedopcount)
1372                         prt("skipping zero length clone range\n");
1373                 log5(OP_CLONE_RANGE, offset, length, dest, FL_SKIPPED);
1374                 return;
1375         }
1376
1377         if ((loff_t)offset >= file_size) {
1378                 if (!quiet && testcalls > simulatedopcount)
1379                         prt("skipping clone range behind EOF\n");
1380                 log5(OP_CLONE_RANGE, offset, length, dest, FL_SKIPPED);
1381                 return;
1382         }
1383
1384         if (dest + length > biggest) {
1385                 biggest = dest + length;
1386                 if (!quiet && testcalls > simulatedopcount)
1387                         prt("cloning to largest ever: 0x%x\n", dest + length);
1388         }
1389
1390         log5(OP_CLONE_RANGE, offset, length, dest, FL_NONE);
1391
1392         if (testcalls <= simulatedopcount)
1393                 return;
1394
1395         if ((progressinterval && testcalls % progressinterval == 0) ||
1396             (debug && (monitorstart == -1 || monitorend == -1 ||
1397                        dest <= monitorstart || dest + length <= monitorend))) {
1398                 prt("%lu clone\tfrom 0x%x to 0x%x, (0x%x bytes) at 0x%x\n",
1399                         testcalls, offset, offset+length, length, dest);
1400         }
1401
1402         if (ioctl(fd, FICLONERANGE, &fcr) == -1) {
1403                 prt("clone range: 0x%x to 0x%x at 0x%x\n", offset,
1404                                 offset + length, dest);
1405                 prterr("do_clone_range: FICLONERANGE");
1406                 report_failure(161);
1407         }
1408
1409         memcpy(good_buf + dest, good_buf + offset, length);
1410         if (dest > file_size)
1411                 memset(good_buf + file_size, '\0', dest - file_size);
1412         if (dest + length > file_size)
1413                 file_size = dest + length;
1414 }
1415
1416 #else
1417 int
1418 test_clone_range(void)
1419 {
1420         return 0;
1421 }
1422
1423 void
1424 do_clone_range(unsigned offset, unsigned length, unsigned dest)
1425 {
1426         return;
1427 }
1428 #endif
1429
1430 #ifdef FIDEDUPERANGE
1431 int
1432 test_dedupe_range(void)
1433 {
1434         struct file_dedupe_range *fdr;
1435         off_t new_len;
1436         int error;
1437         int ret = 1;
1438
1439         /* Alloc memory */
1440         fdr = calloc(sizeof(struct file_dedupe_range_info) +
1441                      sizeof(struct file_dedupe_range), 1);
1442         if (!fdr) {
1443                 prterr("do_dedupe_range: malloc");
1444                 report_failure(161);
1445         }
1446
1447         /* Make sure we have at least two blocks */
1448         new_len = block_size * 2;
1449         if (file_size < new_len && ftruncate(fd, new_len)) {
1450                 warn("main: ftruncate");
1451                 exit(132);
1452         }
1453
1454         /* Try to dedupe them */
1455         fdr->src_length = block_size;
1456         fdr->dest_count = 1;
1457         fdr->info[0].dest_fd = fd;
1458         fdr->info[0].dest_offset = block_size;
1459
1460         if (ioctl(fd, FIDEDUPERANGE, fdr))
1461                 error = errno;
1462         else if (fdr->info[0].status < 0)
1463                 error = -fdr->info[0].status;
1464         else
1465                 error = 0;
1466
1467         if (error == EOPNOTSUPP || error == ENOTTY) {
1468                 if (!quiet)
1469                         fprintf(stderr,
1470                                 "main: filesystem does not support "
1471                                 "dedupe range, disabling!\n");
1472                 ret = 0;
1473         }
1474
1475         /* Put the file back the way it was. */
1476         if (file_size < new_len && ftruncate(fd, file_size)) {
1477                 warn("main: ftruncate");
1478                 exit(132);
1479         }
1480
1481         free(fdr);
1482         return ret;
1483 }
1484
1485 void
1486 do_dedupe_range(unsigned offset, unsigned length, unsigned dest)
1487 {
1488         struct file_dedupe_range *fdr;
1489
1490         if (length == 0) {
1491                 if (!quiet && testcalls > simulatedopcount)
1492                         prt("skipping zero length dedupe range\n");
1493                 log5(OP_DEDUPE_RANGE, offset, length, dest, FL_SKIPPED);
1494                 return;
1495         }
1496
1497         if ((loff_t)offset >= file_size) {
1498                 if (!quiet && testcalls > simulatedopcount)
1499                         prt("skipping dedupe range behind EOF\n");
1500                 log5(OP_DEDUPE_RANGE, offset, length, dest, FL_SKIPPED);
1501                 return;
1502         }
1503
1504         log5(OP_DEDUPE_RANGE, offset, length, dest, FL_NONE);
1505
1506         if (testcalls <= simulatedopcount)
1507                 return;
1508
1509         if ((progressinterval && testcalls % progressinterval == 0) ||
1510             (debug && (monitorstart == -1 || monitorend == -1 ||
1511                        dest <= monitorstart || dest + length <= monitorend))) {
1512                 prt("%lu dedupe\tfrom 0x%x to 0x%x, (0x%x bytes) at 0x%x\n",
1513                         testcalls, offset, offset+length, length, dest);
1514         }
1515
1516         /* Alloc memory */
1517         fdr = calloc(sizeof(struct file_dedupe_range_info) +
1518                      sizeof(struct file_dedupe_range), 1);
1519         if (!fdr) {
1520                 prterr("do_dedupe_range: malloc");
1521                 report_failure(161);
1522         }
1523
1524         /* Dedupe data blocks */
1525         fdr->src_offset = offset;
1526         fdr->src_length = length;
1527         fdr->dest_count = 1;
1528         fdr->info[0].dest_fd = fd;
1529         fdr->info[0].dest_offset = dest;
1530
1531         if (ioctl(fd, FIDEDUPERANGE, fdr) == -1) {
1532                 prt("dedupe range: 0x%x to 0x%x at 0x%x\n", offset,
1533                                 offset + length, dest);
1534                 prterr("do_dedupe_range(0): FIDEDUPERANGE");
1535                 report_failure(161);
1536         } else if (fdr->info[0].status < 0) {
1537                 errno = -fdr->info[0].status;
1538                 prt("dedupe range: 0x%x to 0x%x at 0x%x\n", offset,
1539                                 offset + length, dest);
1540                 prterr("do_dedupe_range(1): FIDEDUPERANGE");
1541                 report_failure(161);
1542         }
1543
1544         free(fdr);
1545 }
1546
1547 #else
1548 int
1549 test_dedupe_range(void)
1550 {
1551         return 0;
1552 }
1553
1554 void
1555 do_dedupe_range(unsigned offset, unsigned length, unsigned dest)
1556 {
1557         return;
1558 }
1559 #endif
1560
1561 #ifdef HAVE_LINUX_FALLOC_H
1562 /* fallocate is basically a no-op unless extending, then a lot like a truncate */
1563 void
1564 do_preallocate(unsigned offset, unsigned length, int keep_size)
1565 {
1566         unsigned end_offset;
1567
1568         if (length == 0) {
1569                 if (!quiet && testcalls > simulatedopcount)
1570                         prt("skipping zero length fallocate\n");
1571                 log4(OP_FALLOCATE, offset, length, FL_SKIPPED |
1572                      (keep_size ? FL_KEEP_SIZE : FL_NONE));
1573                 return;
1574         }
1575
1576         end_offset = keep_size ? 0 : offset + length;
1577
1578         if (end_offset > biggest) {
1579                 biggest = end_offset;
1580                 if (!quiet && testcalls > simulatedopcount)
1581                         prt("fallocating to largest ever: 0x%x\n", end_offset);
1582         }
1583
1584         /*
1585          * last arg matches fallocate string array index in logdump:
1586          *      0: allocate past EOF
1587          *      1: extending prealloc
1588          *      2: interior prealloc
1589          */
1590         log4(OP_FALLOCATE, offset, length,
1591              keep_size ? FL_KEEP_SIZE : FL_NONE);
1592
1593         if (end_offset > file_size) {
1594                 memset(good_buf + file_size, '\0', end_offset - file_size);
1595                 file_size = end_offset;
1596         }
1597
1598         if (testcalls <= simulatedopcount)
1599                 return;
1600         
1601         if ((progressinterval && testcalls % progressinterval == 0) ||
1602             (debug && (monitorstart == -1 || monitorend == -1 ||
1603                       end_offset <= monitorend)))
1604                 prt("%lu falloc\tfrom 0x%x to 0x%x (0x%x bytes)\n", testcalls,
1605                                 offset, offset + length, length);
1606         if (fallocate(fd, keep_size ? FALLOC_FL_KEEP_SIZE : 0, (loff_t)offset, (loff_t)length) == -1) {
1607                 prt("fallocate: 0x%x to 0x%x\n", offset, offset + length);
1608                 prterr("do_preallocate: fallocate");
1609                 report_failure(161);
1610         }
1611 }
1612 #else
1613 void
1614 do_preallocate(unsigned offset, unsigned length, int keep_size)
1615 {
1616         return;
1617 }
1618 #endif
1619
1620 void
1621 writefileimage()
1622 {
1623         ssize_t iret;
1624
1625         if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
1626                 prterr("writefileimage: lseek");
1627                 report_failure(171);
1628         }
1629         iret = write(fd, good_buf, file_size);
1630         if ((off_t)iret != file_size) {
1631                 if (iret == -1)
1632                         prterr("writefileimage: write");
1633                 else
1634                         prt("short write: 0x%x bytes instead of 0x%llx\n",
1635                             iret, (unsigned long long)file_size);
1636                 report_failure(172);
1637         }
1638         if (lite ? 0 : ftruncate(fd, file_size) == -1) {
1639                 prt("ftruncate2: %llx\n", (unsigned long long)file_size);
1640                 prterr("writefileimage: ftruncate");
1641                 report_failure(173);
1642         }
1643 }
1644
1645
1646 void
1647 docloseopen(void)
1648
1649         if (testcalls <= simulatedopcount)
1650                 return;
1651
1652         if (debug)
1653                 prt("%lu close/open\n", testcalls);
1654         if (close(fd)) {
1655                 prterr("docloseopen: close");
1656                 report_failure(180);
1657         }
1658         fd = open(fname, O_RDWR|o_direct, 0);
1659         if (fd < 0) {
1660                 prterr("docloseopen: open");
1661                 report_failure(181);
1662         }
1663 }
1664
1665 void
1666 dofsync(void)
1667 {
1668         int ret;
1669
1670         if (testcalls <= simulatedopcount)
1671                 return;
1672         if (debug)
1673                 prt("%lu fsync\n", testcalls);
1674         log4(OP_FSYNC, 0, 0, 0);
1675         ret = fsync(fd);
1676         if (ret < 0) {
1677                 prterr("dofsync");
1678                 report_failure(210);
1679         }
1680         mark_log();
1681         dump_fsync_buffer();
1682         mark_nr++;
1683 }
1684
1685 #define TRIM_OFF(off, size)                     \
1686 do {                                            \
1687         if (size)                               \
1688                 (off) %= (size);                \
1689         else                                    \
1690                 (off) = 0;                      \
1691 } while (0)
1692
1693 #define TRIM_LEN(off, len, size)                \
1694 do {                                            \
1695         if ((off) + (len) > (size))             \
1696                 (len) = (size) - (off);         \
1697 } while (0)
1698
1699 #define TRIM_OFF_LEN(off, len, size)            \
1700 do {                                            \
1701         TRIM_OFF(off, size);                    \
1702         TRIM_LEN(off, len, size);               \
1703 } while (0)
1704
1705 void
1706 cleanup(int sig)
1707 {
1708         if (sig)
1709                 prt("signal %d\n", sig);
1710         prt("testcalls = %lu\n", testcalls);
1711         exit(sig);
1712 }
1713
1714 static int
1715 op_args_count(int operation)
1716 {
1717         switch (operation) {
1718         case OP_CLONE_RANGE:
1719         case OP_DEDUPE_RANGE:
1720                 return 4;
1721         default:
1722                 return 3;
1723         }
1724 }
1725
1726 static int
1727 read_op(struct log_entry *log_entry)
1728 {
1729         char line[256];
1730
1731         memset(log_entry, 0, sizeof(*log_entry));
1732         log_entry->operation = -1;
1733
1734         while (log_entry->operation == -1) {
1735                 char *str;
1736                 int i;
1737
1738                 do {
1739                         if (!fgets(line, sizeof(line), replayopsf)) {
1740                                 if (feof(replayopsf)) {
1741                                         replayopsf = NULL;
1742                                         return 0;
1743                                 }
1744                                 goto fail;
1745                         }
1746                         str = strtok(line, " \t\n");
1747                 } while (!str || str[0] == '#');
1748
1749                 if (strcmp(str, "skip") == 0) {
1750                         log_entry->flags |= FL_SKIPPED;
1751                         str = strtok(NULL, " \t\n");
1752                         if (!str)
1753                                 goto fail;
1754                 }
1755                 log_entry->operation = op_code(str);
1756                 if (log_entry->operation == -1)
1757                         goto fail;
1758                 log_entry->nr_args = op_args_count(log_entry->operation);
1759                 for (i = 0; i < log_entry->nr_args; i++) {
1760                         char *end;
1761
1762                         str = strtok(NULL, " \t\n");
1763                         if (!str)
1764                                 goto fail;
1765                         log_entry->args[i] = strtoul(str, &end, 0);
1766                         if (*end)
1767                                 goto fail;
1768                 }
1769                 while ((str = strtok(NULL, " \t\n"))) {
1770                         if (strcmp(str, "keep_size") == 0)
1771                                 log_entry->flags |= FL_KEEP_SIZE;
1772                         else if (strcmp(str, "close_open") == 0)
1773                                 log_entry->flags |= FL_CLOSE_OPEN;
1774                         else if (strcmp(str, "*") == 0)
1775                                 ;  /* overlap marker; ignore */
1776                         else
1777                                 goto fail;
1778                 }
1779         }
1780         return 1;
1781
1782 fail:
1783         fprintf(stderr, "%s: parse error\n", replayops);
1784         fclose(replayopsf);
1785         replayopsf = NULL;
1786         cleanup(100);  /* doesn't return */
1787         return 0;
1788 }
1789
1790
1791 int
1792 test(void)
1793 {
1794         unsigned long   offset, offset2;
1795         unsigned long   size;
1796         unsigned long   rv;
1797         unsigned long   op;
1798         int             keep_size = 0;
1799
1800         if (simulatedopcount > 0 && testcalls == simulatedopcount)
1801                 writefileimage();
1802
1803         testcalls++;
1804
1805         if (debugstart > 0 && testcalls >= debugstart)
1806                 debug = 1;
1807
1808         if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
1809                 prt("%lu...\n", testcalls);
1810
1811         if (replayopsf) {
1812                 struct log_entry log_entry;
1813
1814                 while (read_op(&log_entry)) {
1815                         if (log_entry.flags & FL_SKIPPED) {
1816                                 log4(log_entry.operation,
1817                                      log_entry.args[0], log_entry.args[1],
1818                                      log_entry.flags);
1819                                 continue;
1820                         }
1821
1822                         op = log_entry.operation;
1823                         offset = log_entry.args[0];
1824                         size = log_entry.args[1];
1825                         offset2 = log_entry.args[2];
1826                         closeopen = !!(log_entry.flags & FL_CLOSE_OPEN);
1827                         keep_size = !!(log_entry.flags & FL_KEEP_SIZE);
1828                         goto have_op;
1829                 }
1830                 return 0;
1831         }
1832
1833         rv = random();
1834         if (closeprob)
1835                 closeopen = (rv >> 3) < (1 << 28) / closeprob;
1836
1837         offset = random();
1838         offset2 = 0;
1839         size = maxoplen;
1840         if (randomoplen)
1841                 size = random() % (maxoplen + 1);
1842
1843         /* calculate appropriate op to run */
1844         if (lite)
1845                 op = rv % OP_MAX_LITE;
1846         else if (!integrity)
1847                 op = rv % OP_MAX_FULL;
1848         else
1849                 op = rv % OP_MAX_INTEGRITY;
1850
1851         switch(op) {
1852         case OP_TRUNCATE:
1853                 if (!style)
1854                         size = random() % maxfilelen;
1855                 break;
1856         case OP_FALLOCATE:
1857                 if (fallocate_calls && size && keep_size_calls)
1858                         keep_size = random() % 2;
1859                 break;
1860         case OP_ZERO_RANGE:
1861                 if (zero_range_calls && size && keep_size_calls)
1862                         keep_size = random() % 2;
1863                 break;
1864         case OP_CLONE_RANGE:
1865                 TRIM_OFF_LEN(offset, size, file_size);
1866                 offset = offset & ~(block_size - 1);
1867                 size = size & ~(block_size - 1);
1868                 do {
1869                         offset2 = random();
1870                         TRIM_OFF(offset2, maxfilelen);
1871                         offset2 = offset2 & ~(block_size - 1);
1872                 } while (llabs(offset2 - offset) < size ||
1873                          offset2 + size > maxfilelen);
1874                 break;
1875         case OP_DEDUPE_RANGE:
1876                 {
1877                         int tries = 0;
1878
1879                         TRIM_OFF_LEN(offset, size, file_size);
1880                         offset = offset & ~(block_size - 1);
1881                         size = size & ~(block_size - 1);
1882                         do {
1883                                 if (tries++ >= 30) {
1884                                         size = 0;
1885                                         break;
1886                                 }
1887                                 offset2 = random();
1888                                 TRIM_OFF(offset2, file_size);
1889                                 offset2 = offset2 & ~(block_size - 1);
1890                         } while (llabs(offset2 - offset) < size ||
1891                                  offset2 + size > file_size);
1892                         break;
1893                 }
1894         }
1895
1896 have_op:
1897
1898         switch (op) {
1899         case OP_MAPREAD:
1900                 if (!mapped_reads)
1901                         op = OP_READ;
1902                 break;
1903         case OP_MAPWRITE:
1904                 if (!mapped_writes)
1905                         op = OP_WRITE;
1906                 break;
1907         case OP_FALLOCATE:
1908                 if (!fallocate_calls) {
1909                         log4(OP_FALLOCATE, offset, size, FL_SKIPPED);
1910                         goto out;
1911                 }
1912                 break;
1913         case OP_PUNCH_HOLE:
1914                 if (!punch_hole_calls) {
1915                         log4(OP_PUNCH_HOLE, offset, size, FL_SKIPPED);
1916                         goto out;
1917                 }
1918                 break;
1919         case OP_ZERO_RANGE:
1920                 if (!zero_range_calls) {
1921                         log4(OP_ZERO_RANGE, offset, size, FL_SKIPPED);
1922                         goto out;
1923                 }
1924                 break;
1925         case OP_COLLAPSE_RANGE:
1926                 if (!collapse_range_calls) {
1927                         log4(OP_COLLAPSE_RANGE, offset, size, FL_SKIPPED);
1928                         goto out;
1929                 }
1930                 break;
1931         case OP_INSERT_RANGE:
1932                 if (!insert_range_calls) {
1933                         log4(OP_INSERT_RANGE, offset, size, FL_SKIPPED);
1934                         goto out;
1935                 }
1936                 break;
1937         case OP_CLONE_RANGE:
1938                 if (!clone_range_calls) {
1939                         log5(op, offset, size, offset2, FL_SKIPPED);
1940                         goto out;
1941                 }
1942                 break;
1943         case OP_DEDUPE_RANGE:
1944                 if (!dedupe_range_calls) {
1945                         log5(op, offset, size, offset2, FL_SKIPPED);
1946                         goto out;
1947                 }
1948                 break;
1949         }
1950
1951         switch (op) {
1952         case OP_READ:
1953                 TRIM_OFF_LEN(offset, size, file_size);
1954                 doread(offset, size);
1955                 break;
1956
1957         case OP_WRITE:
1958                 TRIM_OFF_LEN(offset, size, maxfilelen);
1959                 dowrite(offset, size);
1960                 break;
1961
1962         case OP_MAPREAD:
1963                 TRIM_OFF_LEN(offset, size, file_size);
1964                 domapread(offset, size);
1965                 break;
1966
1967         case OP_MAPWRITE:
1968                 TRIM_OFF_LEN(offset, size, maxfilelen);
1969                 domapwrite(offset, size);
1970                 break;
1971
1972         case OP_TRUNCATE:
1973                 dotruncate(size);
1974                 break;
1975
1976         case OP_FALLOCATE:
1977                 TRIM_OFF_LEN(offset, size, maxfilelen);
1978                 do_preallocate(offset, size, keep_size);
1979                 break;
1980
1981         case OP_PUNCH_HOLE:
1982                 TRIM_OFF_LEN(offset, size, file_size);
1983                 do_punch_hole(offset, size);
1984                 break;
1985         case OP_ZERO_RANGE:
1986                 TRIM_OFF_LEN(offset, size, file_size);
1987                 do_zero_range(offset, size, keep_size);
1988                 break;
1989         case OP_COLLAPSE_RANGE:
1990                 TRIM_OFF_LEN(offset, size, file_size - 1);
1991                 offset = offset & ~(block_size - 1);
1992                 size = size & ~(block_size - 1);
1993                 if (size == 0) {
1994                         log4(OP_COLLAPSE_RANGE, offset, size, FL_SKIPPED);
1995                         goto out;
1996                 }
1997                 do_collapse_range(offset, size);
1998                 break;
1999         case OP_INSERT_RANGE:
2000                 TRIM_OFF(offset, file_size);
2001                 TRIM_LEN(file_size, size, maxfilelen);
2002                 offset = offset & ~(block_size - 1);
2003                 size = size & ~(block_size - 1);
2004                 if (size == 0) {
2005                         log4(OP_INSERT_RANGE, offset, size, FL_SKIPPED);
2006                         goto out;
2007                 }
2008                 if (file_size + size > maxfilelen) {
2009                         log4(OP_INSERT_RANGE, offset, size, FL_SKIPPED);
2010                         goto out;
2011                 }
2012
2013                 do_insert_range(offset, size);
2014                 break;
2015         case OP_CLONE_RANGE:
2016                 if (size == 0) {
2017                         log5(OP_CLONE_RANGE, offset, size, offset2, FL_SKIPPED);
2018                         goto out;
2019                 }
2020                 if (offset2 + size > maxfilelen) {
2021                         log5(OP_CLONE_RANGE, offset, size, offset2, FL_SKIPPED);
2022                         goto out;
2023                 }
2024
2025                 do_clone_range(offset, size, offset2);
2026                 break;
2027         case OP_DEDUPE_RANGE:
2028                 if (size == 0) {
2029                         log5(OP_DEDUPE_RANGE, offset, size, offset2, FL_SKIPPED);
2030                         goto out;
2031                 }
2032                 if (offset2 + size > maxfilelen) {
2033                         log5(OP_DEDUPE_RANGE, offset, size, offset2, FL_SKIPPED);
2034                         goto out;
2035                 }
2036
2037                 do_dedupe_range(offset, size, offset2);
2038                 break;
2039         case OP_FSYNC:
2040                 dofsync();
2041                 break;
2042         default:
2043                 prterr("test: unknown operation");
2044                 report_failure(42);
2045                 break;
2046         }
2047
2048         if (check_file && testcalls > simulatedopcount)
2049                 check_contents();
2050
2051 out:
2052         if (sizechecks && testcalls > simulatedopcount)
2053                 check_size();
2054         if (closeopen)
2055                 docloseopen();
2056         return 1;
2057 }
2058
2059
2060 void
2061 usage(void)
2062 {
2063         fprintf(stdout, "usage: %s",
2064                 "fsx [-dknqxABFJLOWZ] [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
2065         -b opnum: beginning operation number (default 1)\n\
2066         -c P: 1 in P chance of file close+open at each op (default infinity)\n\
2067         -d: debug output for all operations\n\
2068         -f flush and invalidate cache after I/O\n\
2069         -g X: write character X instead of random generated data\n\
2070         -i logdev: do integrity testing, logdev is the dm log writes device\n\
2071         -j logid: prefix debug log messsages with this id\n\
2072         -k: do not truncate existing file and use its size as upper bound on file size\n\
2073         -l flen: the upper bound on file size (default 262144)\n\
2074         -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
2075         -n: no verifications of file size\n\
2076         -o oplen: the upper bound on operation size (default 65536)\n\
2077         -p progressinterval: debug output at specified operation interval\n\
2078         -q: quieter operation\n\
2079         -r readbdy: 4096 would make reads page aligned (default 1)\n\
2080         -s style: 1 gives smaller truncates (default 0)\n\
2081         -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
2082         -w writebdy: 4096 would make writes page aligned (default 1)\n\
2083         -x: preallocate file space before starting, XFS only (default 0)\n\
2084         -y synchronize changes to a file\n"
2085
2086 #ifdef AIO
2087 "       -A: Use the AIO system calls\n"
2088 #endif
2089 "       -D startingop: debug output starting at specified operation\n"
2090 #ifdef HAVE_LINUX_FALLOC_H
2091 "       -F: Do not use fallocate (preallocation) calls\n"
2092 #endif
2093 #ifdef FALLOC_FL_PUNCH_HOLE
2094 "       -H: Do not use punch hole calls\n"
2095 #endif
2096 #ifdef FALLOC_FL_ZERO_RANGE
2097 "       -z: Do not use zero range calls\n"
2098 #endif
2099 #ifdef FALLOC_FL_COLLAPSE_RANGE
2100 "       -C: Do not use collapse range calls\n"
2101 #endif
2102 #ifdef FALLOC_FL_INSERT_RANGE
2103 "       -I: Do not use insert range calls\n"
2104 #endif
2105 #ifdef FICLONERANGE
2106 "       -J: Do not use clone range calls\n"
2107 #endif
2108 #ifdef FIDEDUPERANGE
2109 "       -B: Do not use dedupe range calls\n"
2110 #endif
2111 "       -L: fsxLite - no file creations & no file size changes\n\
2112         -N numops: total # operations to do (default infinity)\n\
2113         -O: use oplen (see -o flag) for every op (default random)\n\
2114         -P: save .fsxlog .fsxops and .fsxgood files in dirpath (default ./)\n\
2115         -S seed: for random # generator (default 1) 0 gets timestamp\n\
2116         -W: mapped write operations DISabled\n\
2117         -X: Read file and compare to good buffer after every operation.\n\
2118         -R: read() system calls only (mapped reads disabled)\n\
2119         -Z: O_DIRECT (use -R, -W, -r and -w too)\n\
2120         --replay-ops opsfile: replay ops from recorded .fsxops file\n\
2121         --record-ops[=opsfile]: dump ops file also on success. optionally specify ops file name\n\
2122         fname: this filename is REQUIRED (no default)\n");
2123         exit(90);
2124 }
2125
2126
2127 int
2128 getnum(char *s, char **e)
2129 {
2130         int ret;
2131
2132         *e = (char *) 0;
2133         ret = strtol(s, e, 0);
2134         if (*e)
2135                 switch (**e) {
2136                 case 'b':
2137                 case 'B':
2138                         ret *= 512;
2139                         *e = *e + 1;
2140                         break;
2141                 case 'k':
2142                 case 'K':
2143                         ret *= 1024;
2144                         *e = *e + 1;
2145                         break;
2146                 case 'm':
2147                 case 'M':
2148                         ret *= 1024*1024;
2149                         *e = *e + 1;
2150                         break;
2151                 case 'w':
2152                 case 'W':
2153                         ret *= 4;
2154                         *e = *e + 1;
2155                         break;
2156                 }
2157         return (ret);
2158 }
2159
2160 #ifdef AIO
2161
2162 #define QSZ     1024
2163 io_context_t    io_ctx;
2164 struct iocb     iocb;
2165
2166 int aio_setup()
2167 {
2168         int ret;
2169         ret = io_queue_init(QSZ, &io_ctx);
2170         if (ret != 0) {
2171                 fprintf(stderr, "aio_setup: io_queue_init failed: %s\n",
2172                         strerror(ret));
2173                 return(-1);
2174         }
2175         return(0);
2176 }
2177
2178 int
2179 __aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
2180 {
2181         struct io_event event;
2182         static struct timespec ts;
2183         struct iocb *iocbs[] = { &iocb };
2184         int ret;
2185         long res;
2186
2187         if (rw == READ) {
2188                 io_prep_pread(&iocb, fd, buf, len, offset);
2189         } else {
2190                 io_prep_pwrite(&iocb, fd, buf, len, offset);
2191         }
2192
2193         ts.tv_sec = 30;
2194         ts.tv_nsec = 0;
2195         ret = io_submit(io_ctx, 1, iocbs);
2196         if (ret != 1) {
2197                 fprintf(stderr, "errcode=%d\n", ret);
2198                 fprintf(stderr, "aio_rw: io_submit failed: %s\n",
2199                                 strerror(ret));
2200                 goto out_error;
2201         }
2202
2203         ret = io_getevents(io_ctx, 1, 1, &event, &ts);
2204         if (ret != 1) {
2205                 if (ret == 0)
2206                         fprintf(stderr, "aio_rw: no events available\n");
2207                 else {
2208                         fprintf(stderr, "errcode=%d\n", -ret);
2209                         fprintf(stderr, "aio_rw: io_getevents failed: %s\n",
2210                                         strerror(-ret));
2211                 }
2212                 goto out_error;
2213         }
2214         if (len != event.res) {
2215                 /*
2216                  * The b0rked libaio defines event.res as unsigned.
2217                  * However the kernel strucuture has it signed,
2218                  * and it's used to pass negated error value.
2219                  * Till the library is fixed use the temp var.
2220                  */
2221                 res = (long)event.res;
2222                 if (res >= 0)
2223                         fprintf(stderr, "bad io length: %lu instead of %u\n",
2224                                         res, len);
2225                 else {
2226                         fprintf(stderr, "errcode=%ld\n", -res);
2227                         fprintf(stderr, "aio_rw: async io failed: %s\n",
2228                                         strerror(-res));
2229                         ret = res;
2230                         goto out_error;
2231                 }
2232
2233         }
2234         return event.res;
2235
2236 out_error:
2237         /*
2238          * The caller expects error return in traditional libc
2239          * convention, i.e. -1 and the errno set to error.
2240          */
2241         errno = -ret;
2242         return -1;
2243 }
2244
2245 int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
2246 {
2247         int ret;
2248
2249         if (aio) {
2250                 ret = __aio_rw(rw, fd, buf, len, offset);
2251         } else {
2252                 if (rw == READ)
2253                         ret = read(fd, buf, len);
2254                 else
2255                         ret = write(fd, buf, len);
2256         }
2257         return ret;
2258 }
2259
2260 #endif
2261
2262 #define test_fallocate(mode) __test_fallocate(mode, #mode)
2263
2264 int
2265 __test_fallocate(int mode, const char *mode_str)
2266 {
2267 #ifdef HAVE_LINUX_FALLOC_H
2268         int ret = 0;
2269         if (!lite) {
2270                 if (fallocate(fd, mode, file_size, 1) && errno == EOPNOTSUPP) {
2271                         if(!quiet)
2272                                 fprintf(stderr,
2273                                         "main: filesystem does not support "
2274                                         "fallocate mode %s, disabling!\n",
2275                                         mode_str);
2276                 } else {
2277                         ret = 1;
2278                         if (ftruncate(fd, file_size)) {
2279                                 warn("main: ftruncate");
2280                                 exit(132);
2281                         }
2282                 }
2283         }
2284         return ret;
2285 #endif
2286 }
2287
2288 static struct option longopts[] = {
2289         {"replay-ops", required_argument, 0, 256},
2290         {"record-ops", optional_argument, 0, 255},
2291         { }
2292 };
2293
2294 int
2295 main(int argc, char **argv)
2296 {
2297         int     i, style, ch;
2298         char    *endp, *tmp;
2299         char logfile[PATH_MAX];
2300         struct stat statbuf;
2301         int o_flags = O_RDWR|O_CREAT|O_TRUNC;
2302
2303         logfile[0] = 0;
2304         dname[0] = 0;
2305
2306         page_size = getpagesize();
2307         page_mask = page_size - 1;
2308         mmap_mask = page_mask;
2309         
2310
2311         setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
2312
2313         while ((ch = getopt_long(argc, argv,
2314                                  "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:FJKHzCILN:OP:RS:WXZ",
2315                                  longopts, NULL)) != EOF)
2316                 switch (ch) {
2317                 case 'b':
2318                         simulatedopcount = getnum(optarg, &endp);
2319                         if (!quiet)
2320                                 prt("Will begin at operation %ld\n", simulatedopcount);
2321                         if (simulatedopcount == 0)
2322                                 usage();
2323                         simulatedopcount -= 1;
2324                         break;
2325                 case 'c':
2326                         closeprob = getnum(optarg, &endp);
2327                         if (!quiet)
2328                                 prt("Chance of close/open is 1 in %d\n", closeprob);
2329                         if (closeprob <= 0)
2330                                 usage();
2331                         break;
2332                 case 'd':
2333                         debug = 1;
2334                         break;
2335                 case 'f':
2336                         flush = 1;
2337                         break;
2338                 case 'g':
2339                         filldata = *optarg;
2340                         break;
2341                 case 'i':
2342                         integrity = 1;
2343                         logdev = strdup(optarg);
2344                         if (!logdev) {
2345                                 prterr("strdup");
2346                                 exit(101);
2347                         }
2348                         break;
2349                 case 'j':
2350                         logid = strdup(optarg);
2351                         if (!logid) {
2352                                 prterr("strdup");
2353                                 exit(101);
2354                         }
2355                         break;
2356                 case 'k':
2357                         o_flags &= ~O_TRUNC;
2358                         break;
2359                 case 'l':
2360                         maxfilelen = getnum(optarg, &endp);
2361                         if (maxfilelen <= 0)
2362                                 usage();
2363                         break;
2364                 case 'm':
2365                         monitorstart = getnum(optarg, &endp);
2366                         if (monitorstart < 0)
2367                                 usage();
2368                         if (!endp || *endp++ != ':')
2369                                 usage();
2370                         monitorend = getnum(endp, &endp);
2371                         if (monitorend < 0)
2372                                 usage();
2373                         if (monitorend == 0)
2374                                 monitorend = -1; /* aka infinity */
2375                         debug = 1;
2376                 case 'n':
2377                         sizechecks = 0;
2378                         break;
2379                 case 'o':
2380                         maxoplen = getnum(optarg, &endp);
2381                         if (maxoplen <= 0)
2382                                 usage();
2383                         break;
2384                 case 'p':
2385                         progressinterval = getnum(optarg, &endp);
2386                         if (progressinterval == 0)
2387                                 usage();
2388                         break;
2389                 case 'q':
2390                         quiet = 1;
2391                         break;
2392                 case 'r':
2393                         readbdy = getnum(optarg, &endp);
2394                         if (readbdy <= 0)
2395                                 usage();
2396                         break;
2397                 case 's':
2398                         style = getnum(optarg, &endp);
2399                         if (style < 0 || style > 1)
2400                                 usage();
2401                         break;
2402                 case 't':
2403                         truncbdy = getnum(optarg, &endp);
2404                         if (truncbdy <= 0)
2405                                 usage();
2406                         break;
2407                 case 'w':
2408                         writebdy = getnum(optarg, &endp);
2409                         if (writebdy <= 0)
2410                                 usage();
2411                         break;
2412                 case 'x':
2413                         prealloc = 1;
2414                         break;
2415                 case 'y':
2416                         do_fsync = 1;
2417                         break;
2418                 case 'A':
2419                         aio = 1;
2420                         break;
2421                 case 'D':
2422                         debugstart = getnum(optarg, &endp);
2423                         if (debugstart < 1)
2424                                 usage();
2425                         break;
2426                 case 'F':
2427                         fallocate_calls = 0;
2428                         break;
2429                 case 'K':
2430                         keep_size_calls = 0;
2431                         break;
2432                 case 'H':
2433                         punch_hole_calls = 0;
2434                         break;
2435                 case 'z':
2436                         zero_range_calls = 0;
2437                         break;
2438                 case 'C':
2439                         collapse_range_calls = 0;
2440                         break;
2441                 case 'I':
2442                         insert_range_calls = 0;
2443                         break;
2444                 case 'J':
2445                         clone_range_calls = 0;
2446                         break;
2447                 case 'B':
2448                         dedupe_range_calls = 0;
2449                         break;
2450                 case 'L':
2451                         lite = 1;
2452                         o_flags &= ~(O_CREAT|O_TRUNC);
2453                         break;
2454                 case 'N':
2455                         numops = getnum(optarg, &endp);
2456                         if (numops < 0)
2457                                 usage();
2458                         break;
2459                 case 'O':
2460                         randomoplen = 0;
2461                         break;
2462                 case 'P':
2463                         strncpy(dname, optarg, sizeof(dname));
2464                         strcat(dname, "/");
2465                         dirpath = strlen(dname);
2466                         break;
2467                 case 'R':
2468                         mapped_reads = 0;
2469                         break;
2470                 case 'S':
2471                         seed = getnum(optarg, &endp);
2472                         if (seed == 0) {
2473                                 seed = time(0) % 10000;
2474                                 seed += (int)getpid();
2475                         }
2476                         if (seed < 0)
2477                                 usage();
2478                         break;
2479                 case 'W':
2480                         mapped_writes = 0;
2481                         if (!quiet)
2482                                 prt("mapped writes DISABLED\n");
2483                         break;
2484                 case 'X':
2485                         check_file = 1;
2486                         break;
2487                 case 'Z':
2488                         o_direct = O_DIRECT;
2489                         o_flags |= O_DIRECT;
2490                         break;
2491                 case 255:  /* --record-ops */
2492                         if (optarg)
2493                                 strncpy(opsfile, optarg, sizeof(opsfile));
2494                         recordops = opsfile;
2495                         break;
2496                 case 256:  /* --replay-ops */
2497                         replayops = optarg;
2498                         break;
2499                 default:
2500                         usage();
2501                         /* NOTREACHED */
2502                 }
2503         argc -= optind;
2504         argv += optind;
2505         if (argc != 1)
2506                 usage();
2507
2508         if (integrity && !dirpath) {
2509                 fprintf(stderr, "option -i <logdev> requires -P <dirpath>\n");
2510                 usage();
2511         }
2512
2513         fname = argv[0];
2514         tmp = strdup(fname);
2515         if (!tmp) {
2516                 prterr("strdup");
2517                 exit(101);
2518         }
2519         bname = basename(tmp);
2520
2521         signal(SIGHUP,  cleanup);
2522         signal(SIGINT,  cleanup);
2523         signal(SIGPIPE, cleanup);
2524         signal(SIGALRM, cleanup);
2525         signal(SIGTERM, cleanup);
2526         signal(SIGXCPU, cleanup);
2527         signal(SIGXFSZ, cleanup);
2528         signal(SIGVTALRM,       cleanup);
2529         signal(SIGUSR1, cleanup);
2530         signal(SIGUSR2, cleanup);
2531
2532         if (!quiet && seed)
2533                 prt("Seed set to %d\n", seed);
2534         srandom(seed);
2535         fd = open(fname, o_flags, 0666);
2536         if (fd < 0) {
2537                 prterr(fname);
2538                 exit(91);
2539         }
2540         if (fstat(fd, &statbuf)) {
2541                 prterr("check_size: fstat");
2542                 exit(91);
2543         }
2544         block_size = statbuf.st_blksize;
2545 #ifdef XFS
2546         if (prealloc) {
2547                 xfs_flock64_t   resv = { 0 };
2548 #ifdef HAVE_XFS_PLATFORM_DEFS_H
2549                 if (!platform_test_xfs_fd(fd)) {
2550                         prterr(fname);
2551                         fprintf(stderr, "main: cannot prealloc, non XFS\n");
2552                         exit(96);
2553                 }
2554 #endif
2555                 resv.l_len = maxfilelen;
2556                 if ((xfsctl(fname, fd, XFS_IOC_RESVSP, &resv)) < 0) {
2557                         prterr(fname);
2558                         exit(97);
2559                 }
2560         }
2561 #endif
2562
2563         if (dirpath) {
2564                 snprintf(goodfile, sizeof(goodfile), "%s%s.fsxgood", dname, bname);
2565                 snprintf(logfile, sizeof(logfile), "%s%s.fsxlog", dname, bname);
2566                 if (!*opsfile)
2567                         snprintf(opsfile, sizeof(opsfile), "%s%s.fsxops", dname, bname);
2568         } else {
2569                 snprintf(goodfile, sizeof(goodfile), "%s.fsxgood", fname);
2570                 snprintf(logfile, sizeof(logfile), "%s.fsxlog", fname);
2571                 if (!*opsfile)
2572                         snprintf(opsfile, sizeof(opsfile), "%s.fsxops", fname);
2573         }
2574         fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
2575         if (fsxgoodfd < 0) {
2576                 prterr(goodfile);
2577                 exit(92);
2578         }
2579         fsxlogf = fopen(logfile, "w");
2580         if (fsxlogf == NULL) {
2581                 prterr(logfile);
2582                 exit(93);
2583         }
2584         unlink(opsfile);
2585
2586         if (replayops) {
2587                 replayopsf = fopen(replayops, "r");
2588                 if (!replayopsf) {
2589                         prterr(replayops);
2590                         exit(93);
2591                 }
2592         }
2593
2594 #ifdef AIO
2595         if (aio) 
2596                 aio_setup();
2597 #endif
2598
2599         if (!(o_flags & O_TRUNC)) {
2600                 off_t ret;
2601                 file_size = maxfilelen = biggest = lseek(fd, (off_t)0, SEEK_END);
2602                 if (file_size == (off_t)-1) {
2603                         prterr(fname);
2604                         warn("main: lseek eof");
2605                         exit(94);
2606                 }
2607                 ret = lseek(fd, (off_t)0, SEEK_SET);
2608                 if (ret == (off_t)-1) {
2609                         prterr(fname);
2610                         warn("main: lseek 0");
2611                         exit(95);
2612                 }
2613         }
2614         original_buf = (char *) malloc(maxfilelen);
2615         for (i = 0; i < maxfilelen; i++)
2616                 original_buf[i] = random() % 256;
2617         good_buf = (char *) malloc(maxfilelen + writebdy);
2618         good_buf = round_ptr_up(good_buf, writebdy, 0);
2619         memset(good_buf, '\0', maxfilelen);
2620         temp_buf = (char *) malloc(maxoplen + readbdy);
2621         temp_buf = round_ptr_up(temp_buf, readbdy, 0);
2622         memset(temp_buf, '\0', maxoplen);
2623         if (lite) {     /* zero entire existing file */
2624                 ssize_t written;
2625
2626                 written = write(fd, good_buf, (size_t)maxfilelen);
2627                 if (written != maxfilelen) {
2628                         if (written == -1) {
2629                                 prterr(fname);
2630                                 warn("main: error on write");
2631                         } else
2632                                 warn("main: short write, 0x%x bytes instead "
2633                                         "of 0x%lx\n",
2634                                         (unsigned)written,
2635                                         maxfilelen);
2636                         exit(98);
2637                 }
2638         } else {
2639                 ssize_t ret, len = file_size;
2640                 off_t off = 0;
2641
2642                 while (len > 0) {
2643                         ret = read(fd, good_buf + off, len);
2644                         if (ret == -1) {
2645                                 prterr(fname);
2646                                 warn("main: error on read");
2647                                 exit(98);
2648                         }
2649                         len -= ret;
2650                         off += ret;
2651                 }
2652
2653                 check_trunc_hack();
2654         }
2655
2656         if (fallocate_calls)
2657                 fallocate_calls = test_fallocate(0);
2658         if (keep_size_calls)
2659                 keep_size_calls = test_fallocate(FALLOC_FL_KEEP_SIZE);
2660         if (punch_hole_calls)
2661                 punch_hole_calls = test_fallocate(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
2662         if (zero_range_calls)
2663                 zero_range_calls = test_fallocate(FALLOC_FL_ZERO_RANGE);
2664         if (collapse_range_calls)
2665                 collapse_range_calls = test_fallocate(FALLOC_FL_COLLAPSE_RANGE);
2666         if (insert_range_calls)
2667                 insert_range_calls = test_fallocate(FALLOC_FL_INSERT_RANGE);
2668         if (clone_range_calls)
2669                 clone_range_calls = test_clone_range();
2670         if (dedupe_range_calls)
2671                 dedupe_range_calls = test_dedupe_range();
2672
2673         while (numops == -1 || numops--)
2674                 if (!test())
2675                         break;
2676
2677         free(tmp);
2678         if (close(fd)) {
2679                 prterr("close");
2680                 report_failure(99);
2681         }
2682         prt("All %lu operations completed A-OK!\n", testcalls);
2683         if (recordops)
2684                 logdump();
2685
2686         exit(0);
2687         return 0;
2688 }