fsx: check for filesystem support of FALLOCATE_FL_KEEP_SIZE
[xfstests-dev.git] / ltp / fsx.c
1 /*
2  *      Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
3  *
4  *      File:   fsx.c
5  *      Author: Avadis Tevanian, Jr.
6  *
7  *      File system exerciser. 
8  *
9  *      Rewritten 8/98 by Conrad Minshall.
10  *
11  *      Small changes to work under Linux -- davej.
12  *
13  *      Checks for mmap last-page zero fill.
14  */
15
16 #include "global.h"
17
18 #include <limits.h>
19 #include <time.h>
20 #include <strings.h>
21 #include <sys/file.h>
22 #include <sys/mman.h>
23 #ifdef HAVE_ERR_H
24 #include <err.h>
25 #endif
26 #include <signal.h>
27 #include <stdio.h>
28 #include <stddef.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdarg.h>
32 #include <errno.h>
33 #ifdef AIO
34 #include <libaio.h>
35 #endif
36
37 #ifndef MAP_FILE
38 # define MAP_FILE 0
39 #endif
40
41 #define NUMPRINTCOLUMNS 32      /* # columns of data to print on each line */
42
43 /*
44  *      A log entry is an operation and a bunch of arguments.
45  */
46
47 struct log_entry {
48         int     operation;
49         int     args[3];
50 };
51
52 #define LOGSIZE 10000
53
54 struct log_entry        oplog[LOGSIZE]; /* the log */
55 int                     logptr = 0;     /* current position in log */
56 int                     logcount = 0;   /* total ops */
57
58 /*
59  * The operation matrix is complex due to conditional execution of different
60  * features. Hence when we come to deciding what operation to run, we need to
61  * be careful in how we select the different operations. The active operations
62  * are mapped to numbers as follows:
63  *
64  *              lite    !lite
65  * READ:        0       0
66  * WRITE:       1       1
67  * MAPREAD:     2       2
68  * MAPWRITE:    3       3
69  * TRUNCATE:    -       4
70  * FALLOCATE:   -       5
71  * PUNCH HOLE:  -       6
72  * ZERO RANGE:  -       7
73  *
74  * When mapped read/writes are disabled, they are simply converted to normal
75  * reads and writes. When fallocate/fpunch calls are disabled, they are
76  * converted to OP_SKIPPED. Hence OP_SKIPPED needs to have a number higher than
77  * the operation selction matrix, as does the OP_CLOSEOPEN which is an
78  * operation modifier rather than an operation in itself.
79  *
80  * Because of the "lite" version, we also need to have different "maximum
81  * operation" defines to allow the ops to be selected correctly based on the
82  * mode being run.
83  */
84
85 /* common operations */
86 #define OP_READ         0
87 #define OP_WRITE        1
88 #define OP_MAPREAD      2
89 #define OP_MAPWRITE     3
90 #define OP_MAX_LITE     4
91
92 /* !lite operations */
93 #define OP_TRUNCATE             4
94 #define OP_FALLOCATE            5
95 #define OP_PUNCH_HOLE           6
96 #define OP_ZERO_RANGE           7
97 #define OP_COLLAPSE_RANGE       8
98 #define OP_MAX_FULL             9
99
100 /* operation modifiers */
101 #define OP_CLOSEOPEN    100
102 #define OP_SKIPPED      101
103
104 #undef PAGE_SIZE
105 #define PAGE_SIZE       getpagesize()
106 #undef PAGE_MASK
107 #define PAGE_MASK       (PAGE_SIZE - 1)
108
109 char    *original_buf;                  /* a pointer to the original data */
110 char    *good_buf;                      /* a pointer to the correct data */
111 char    *temp_buf;                      /* a pointer to the current data */
112 char    *fname;                         /* name of our test file */
113 int     fd;                             /* fd for our test file */
114
115 blksize_t       block_size = 0;
116 off_t           file_size = 0;
117 off_t           biggest = 0;
118 char            state[256];
119 unsigned long   testcalls = 0;          /* calls to function "test" */
120
121 unsigned long   simulatedopcount = 0;   /* -b flag */
122 int     closeprob = 0;                  /* -c flag */
123 int     debug = 0;                      /* -d flag */
124 unsigned long   debugstart = 0;         /* -D flag */
125 int     flush = 0;                      /* -f flag */
126 int     do_fsync = 0;                   /* -y flag */
127 unsigned long   maxfilelen = 256 * 1024;        /* -l flag */
128 int     sizechecks = 1;                 /* -n flag disables them */
129 int     maxoplen = 64 * 1024;           /* -o flag */
130 int     quiet = 0;                      /* -q flag */
131 unsigned long progressinterval = 0;     /* -p flag */
132 int     readbdy = 1;                    /* -r flag */
133 int     style = 0;                      /* -s flag */
134 int     prealloc = 0;                   /* -x flag */
135 int     truncbdy = 1;                   /* -t flag */
136 int     writebdy = 1;                   /* -w flag */
137 long    monitorstart = -1;              /* -m flag */
138 long    monitorend = -1;                /* -m flag */
139 int     lite = 0;                       /* -L flag */
140 long    numops = -1;                    /* -N flag */
141 int     randomoplen = 1;                /* -O flag disables it */
142 int     seed = 1;                       /* -S flag */
143 int     mapped_writes = 1;              /* -W flag disables */
144 int     fallocate_calls = 1;            /* -F flag disables */
145 int     keep_size_calls = 1;            /* -K flag disables */
146 int     punch_hole_calls = 1;           /* -H flag disables */
147 int     zero_range_calls = 1;           /* -z flag disables */
148 int     collapse_range_calls = 1;       /* -C flag disables */
149 int     mapped_reads = 1;               /* -R flag disables it */
150 int     fsxgoodfd = 0;
151 int     o_direct;                       /* -Z */
152 int     aio = 0;
153
154 int page_size;
155 int page_mask;
156 int mmap_mask;
157 #ifdef AIO
158 int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
159 #define READ 0
160 #define WRITE 1
161 #define fsxread(a,b,c,d)        aio_rw(READ, a,b,c,d)
162 #define fsxwrite(a,b,c,d)       aio_rw(WRITE, a,b,c,d)
163 #else
164 #define fsxread(a,b,c,d)        read(a,b,c)
165 #define fsxwrite(a,b,c,d)       write(a,b,c)
166 #endif
167
168 FILE *  fsxlogf = NULL;
169 int badoff = -1;
170 int closeopen = 0;
171
172 static void *round_ptr_up(void *ptr, unsigned long align, unsigned long offset)
173 {
174         unsigned long ret = (unsigned long)ptr;
175
176         ret = ((ret + align - 1) & ~(align - 1));
177         ret += offset;
178         return (void *)ret;
179 }
180
181 void
182 vwarnc(int code, const char *fmt, va_list ap) {
183   fprintf(stderr, "fsx: ");
184   if (fmt != NULL) {
185         vfprintf(stderr, fmt, ap);
186         fprintf(stderr, ": ");
187   }
188   fprintf(stderr, "%s\n", strerror(code));
189 }
190
191 void
192 warn(const char * fmt, ...)  {
193         va_list ap;
194         va_start(ap, fmt);
195         vwarnc(errno, fmt, ap);
196         va_end(ap);
197 }
198
199 #define BUF_SIZE 1024
200
201 void
202 prt(char *fmt, ...)
203 {
204         va_list args;
205         char buffer[BUF_SIZE];
206
207         va_start(args, fmt);
208         vsnprintf(buffer, BUF_SIZE, fmt, args);
209         va_end(args);
210         fprintf(stdout, buffer);
211         if (fsxlogf)
212                 fprintf(fsxlogf, buffer);
213 }
214
215 void
216 prterr(char *prefix)
217 {
218         prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno));
219 }
220
221
222 void
223 log4(int operation, int arg0, int arg1, int arg2)
224 {
225         struct log_entry *le;
226
227         le = &oplog[logptr];
228         le->operation = operation;
229         if (closeopen)
230                 le->operation = ~ le->operation;
231         le->args[0] = arg0;
232         le->args[1] = arg1;
233         le->args[2] = arg2;
234         logptr++;
235         logcount++;
236         if (logptr >= LOGSIZE)
237                 logptr = 0;
238 }
239
240
241 void
242 logdump(void)
243 {
244         int     i, count, down;
245         struct log_entry        *lp;
246         char *falloc_type[3] = {"PAST_EOF", "EXTENDING", "INTERIOR"};
247
248         prt("LOG DUMP (%d total operations):\n", logcount);
249         if (logcount < LOGSIZE) {
250                 i = 0;
251                 count = logcount;
252         } else {
253                 i = logptr;
254                 count = LOGSIZE;
255         }
256         for ( ; count > 0; count--) {
257                 int opnum;
258
259                 opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE;
260                 prt("%d(%3d mod 256): ", opnum, opnum%256);
261                 lp = &oplog[i];
262                 if ((closeopen = lp->operation < 0))
263                         lp->operation = ~ lp->operation;
264                         
265                 switch (lp->operation) {
266                 case OP_MAPREAD:
267                         prt("MAPREAD  0x%x thru 0x%x\t(0x%x bytes)",
268                             lp->args[0], lp->args[0] + lp->args[1] - 1,
269                             lp->args[1]);
270                         if (badoff >= lp->args[0] && badoff <
271                                                      lp->args[0] + lp->args[1])
272                                 prt("\t***RRRR***");
273                         break;
274                 case OP_MAPWRITE:
275                         prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)",
276                             lp->args[0], lp->args[0] + lp->args[1] - 1,
277                             lp->args[1]);
278                         if (badoff >= lp->args[0] && badoff <
279                                                      lp->args[0] + lp->args[1])
280                                 prt("\t******WWWW");
281                         break;
282                 case OP_READ:
283                         prt("READ     0x%x thru 0x%x\t(0x%x bytes)",
284                             lp->args[0], lp->args[0] + lp->args[1] - 1,
285                             lp->args[1]);
286                         if (badoff >= lp->args[0] &&
287                             badoff < lp->args[0] + lp->args[1])
288                                 prt("\t***RRRR***");
289                         break;
290                 case OP_WRITE:
291                         prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
292                             lp->args[0], lp->args[0] + lp->args[1] - 1,
293                             lp->args[1]);
294                         if (lp->args[0] > lp->args[2])
295                                 prt(" HOLE");
296                         else if (lp->args[0] + lp->args[1] > lp->args[2])
297                                 prt(" EXTEND");
298                         if ((badoff >= lp->args[0] || badoff >=lp->args[2]) &&
299                             badoff < lp->args[0] + lp->args[1])
300                                 prt("\t***WWWW");
301                         break;
302                 case OP_TRUNCATE:
303                         down = lp->args[0] < lp->args[1];
304                         prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
305                             down ? "DOWN" : "UP", lp->args[1], lp->args[0]);
306                         if (badoff >= lp->args[!down] &&
307                             badoff < lp->args[!!down])
308                                 prt("\t******WWWW");
309                         break;
310                 case OP_FALLOCATE:
311                         /* 0: offset 1: length 2: where alloced */
312                         prt("FALLOC   0x%x thru 0x%x\t(0x%x bytes) %s",
313                                 lp->args[0], lp->args[0] + lp->args[1],
314                                 lp->args[1], falloc_type[lp->args[2]]);
315                         if (badoff >= lp->args[0] &&
316                             badoff < lp->args[0] + lp->args[1])
317                                 prt("\t******FFFF");
318                         break;
319                 case OP_PUNCH_HOLE:
320                         prt("PUNCH    0x%x thru 0x%x\t(0x%x bytes)",
321                             lp->args[0], lp->args[0] + lp->args[1] - 1,
322                             lp->args[1]);
323                         if (badoff >= lp->args[0] && badoff <
324                                                      lp->args[0] + lp->args[1])
325                                 prt("\t******PPPP");
326                         break;
327                 case OP_ZERO_RANGE:
328                         prt("ZERO     0x%x thru 0x%x\t(0x%x bytes)",
329                             lp->args[0], lp->args[0] + lp->args[1] - 1,
330                             lp->args[1]);
331                         if (badoff >= lp->args[0] && badoff <
332                                                      lp->args[0] + lp->args[1])
333                                 prt("\t******ZZZZ");
334                         break;
335                 case OP_COLLAPSE_RANGE:
336                         prt("COLLAPSE 0x%x thru 0x%x\t(0x%x bytes)",
337                             lp->args[0], lp->args[0] + lp->args[1] - 1,
338                             lp->args[1]);
339                         if (badoff >= lp->args[0] && badoff <
340                                                      lp->args[0] + lp->args[1])
341                                 prt("\t******CCCC");
342                         break;
343                 case OP_SKIPPED:
344                         prt("SKIPPED (no operation)");
345                         break;
346                 default:
347                         prt("BOGUS LOG ENTRY (operation code = %d)!",
348                             lp->operation);
349                 }
350                 if (closeopen)
351                         prt("\n\t\tCLOSE/OPEN");
352                 prt("\n");
353                 i++;
354                 if (i == LOGSIZE)
355                         i = 0;
356         }
357 }
358
359
360 void
361 save_buffer(char *buffer, off_t bufferlength, int fd)
362 {
363         off_t ret;
364         ssize_t byteswritten;
365
366         if (fd <= 0 || bufferlength == 0)
367                 return;
368
369         if (bufferlength > SSIZE_MAX) {
370                 prt("fsx flaw: overflow in save_buffer\n");
371                 exit(67);
372         }
373         if (lite) {
374                 off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END);
375                 if (size_by_seek == (off_t)-1)
376                         prterr("save_buffer: lseek eof");
377                 else if (bufferlength > size_by_seek) {
378                         warn("save_buffer: .fsxgood file too short... will save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
379                              (unsigned long long)bufferlength);
380                         bufferlength = size_by_seek;
381                 }
382         }
383
384         ret = lseek(fd, (off_t)0, SEEK_SET);
385         if (ret == (off_t)-1)
386                 prterr("save_buffer: lseek 0");
387         
388         byteswritten = write(fd, buffer, (size_t)bufferlength);
389         if (byteswritten != bufferlength) {
390                 if (byteswritten == -1)
391                         prterr("save_buffer write");
392                 else
393                         warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
394                              (unsigned)byteswritten,
395                              (unsigned long long)bufferlength);
396         }
397 }
398
399
400 void
401 report_failure(int status)
402 {
403         logdump();
404         
405         if (fsxgoodfd) {
406                 if (good_buf) {
407                         save_buffer(good_buf, file_size, fsxgoodfd);
408                         prt("Correct content saved for comparison\n");
409                         prt("(maybe hexdump \"%s\" vs \"%s.fsxgood\")\n",
410                             fname, fname);
411                 }
412                 close(fsxgoodfd);
413         }
414         exit(status);
415 }
416
417
418 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
419                                         *(((unsigned char *)(cp)) + 1)))
420
421 void
422 check_buffers(unsigned offset, unsigned size)
423 {
424         unsigned char c, t;
425         unsigned i = 0;
426         unsigned n = 0;
427         unsigned op = 0;
428         unsigned bad = 0;
429
430         if (memcmp(good_buf + offset, temp_buf, size) != 0) {
431                 prt("READ BAD DATA: offset = 0x%x, size = 0x%x, fname = %s\n",
432                     offset, size, fname);
433                 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
434                 while (size > 0) {
435                         c = good_buf[offset];
436                         t = temp_buf[i];
437                         if (c != t) {
438                                 if (n < 16) {
439                                         bad = short_at(&temp_buf[i]);
440                                         prt("0x%5x\t0x%04x\t0x%04x", offset,
441                                             short_at(&good_buf[offset]), bad);
442                                         op = temp_buf[offset & 1 ? i+1 : i];
443                                         prt("\t0x%5x\n", n);
444                                         if (op)
445                                                 prt("operation# (mod 256) for "
446                                                   "the bad data may be %u\n",
447                                                 ((unsigned)op & 0xff));
448                                         else
449                                                 prt("operation# (mod 256) for "
450                                                   "the bad data unknown, check"
451                                                   " HOLE and EXTEND ops\n");
452                                 }
453                                 n++;
454                                 badoff = offset;
455                         }
456                         offset++;
457                         i++;
458                         size--;
459                 }
460                 report_failure(110);
461         }
462 }
463
464
465 void
466 check_size(void)
467 {
468         struct stat     statbuf;
469         off_t   size_by_seek;
470
471         if (fstat(fd, &statbuf)) {
472                 prterr("check_size: fstat");
473                 statbuf.st_size = -1;
474         }
475         size_by_seek = lseek(fd, (off_t)0, SEEK_END);
476         if (file_size != statbuf.st_size || file_size != size_by_seek) {
477                 prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n",
478                     (unsigned long long)file_size,
479                     (unsigned long long)statbuf.st_size,
480                     (unsigned long long)size_by_seek);
481                 report_failure(120);
482         }
483 }
484
485
486 void
487 check_trunc_hack(void)
488 {
489         struct stat statbuf;
490
491         ftruncate(fd, (off_t)0);
492         ftruncate(fd, (off_t)100000);
493         fstat(fd, &statbuf);
494         if (statbuf.st_size != (off_t)100000) {
495                 prt("no extend on truncate! not posix!\n");
496                 exit(130);
497         }
498         ftruncate(fd, 0);
499 }
500
501 void
502 doflush(unsigned offset, unsigned size)
503 {
504         unsigned pg_offset;
505         unsigned map_size;
506         char    *p;
507
508         if (o_direct == O_DIRECT)
509                 return;
510
511         pg_offset = offset & mmap_mask;
512         map_size  = pg_offset + size;
513
514         if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
515                               MAP_FILE | MAP_SHARED, fd,
516                               (off_t)(offset - pg_offset))) == (char *)-1) {
517                 prterr("doflush: mmap");
518                 report_failure(202);
519         }
520         if (msync(p, map_size, MS_INVALIDATE) != 0) {
521                 prterr("doflush: msync");
522                 report_failure(203);
523         }
524         if (munmap(p, map_size) != 0) {
525                 prterr("doflush: munmap");
526                 report_failure(204);
527         }
528 }
529
530 void
531 doread(unsigned offset, unsigned size)
532 {
533         off_t ret;
534         unsigned iret;
535
536         offset -= offset % readbdy;
537         if (o_direct)
538                 size -= size % readbdy;
539         if (size == 0) {
540                 if (!quiet && testcalls > simulatedopcount && !o_direct)
541                         prt("skipping zero size read\n");
542                 log4(OP_SKIPPED, OP_READ, offset, size);
543                 return;
544         }
545         if (size + offset > file_size) {
546                 if (!quiet && testcalls > simulatedopcount)
547                         prt("skipping seek/read past end of file\n");
548                 log4(OP_SKIPPED, OP_READ, offset, size);
549                 return;
550         }
551
552         log4(OP_READ, offset, size, 0);
553
554         if (testcalls <= simulatedopcount)
555                 return;
556
557         if (!quiet &&
558                 ((progressinterval && testcalls % progressinterval == 0)  ||
559                 (debug &&
560                        (monitorstart == -1 ||
561                         (offset + size > monitorstart &&
562                         (monitorend == -1 || offset <= monitorend))))))
563                 prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
564                     offset, offset + size - 1, size);
565         ret = lseek(fd, (off_t)offset, SEEK_SET);
566         if (ret == (off_t)-1) {
567                 prterr("doread: lseek");
568                 report_failure(140);
569         }
570         iret = fsxread(fd, temp_buf, size, offset);
571         if (iret != size) {
572                 if (iret == -1)
573                         prterr("doread: read");
574                 else
575                         prt("short read: 0x%x bytes instead of 0x%x\n",
576                             iret, size);
577                 report_failure(141);
578         }
579         check_buffers(offset, size);
580 }
581
582
583 void
584 check_eofpage(char *s, unsigned offset, char *p, int size)
585 {
586         unsigned long last_page, should_be_zero;
587
588         if (offset + size <= (file_size & ~page_mask))
589                 return;
590         /*
591          * we landed in the last page of the file
592          * test to make sure the VM system provided 0's 
593          * beyond the true end of the file mapping
594          * (as required by mmap def in 1996 posix 1003.1)
595          */
596         last_page = ((unsigned long)p + (offset & page_mask) + size) & ~page_mask;
597
598         for (should_be_zero = last_page + (file_size & page_mask);
599              should_be_zero < last_page + page_size;
600              should_be_zero++)
601                 if (*(char *)should_be_zero) {
602                         prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
603                             s, file_size - 1, should_be_zero & page_mask,
604                             short_at(should_be_zero));
605                         report_failure(205);
606                 }
607 }
608
609
610 void
611 domapread(unsigned offset, unsigned size)
612 {
613         unsigned pg_offset;
614         unsigned map_size;
615         char    *p;
616
617         offset -= offset % readbdy;
618         if (size == 0) {
619                 if (!quiet && testcalls > simulatedopcount)
620                         prt("skipping zero size read\n");
621                 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
622                 return;
623         }
624         if (size + offset > file_size) {
625                 if (!quiet && testcalls > simulatedopcount)
626                         prt("skipping seek/read past end of file\n");
627                 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
628                 return;
629         }
630
631         log4(OP_MAPREAD, offset, size, 0);
632
633         if (testcalls <= simulatedopcount)
634                 return;
635
636         if (!quiet &&
637                 ((progressinterval && testcalls % progressinterval == 0) ||
638                        (debug &&
639                        (monitorstart == -1 ||
640                         (offset + size > monitorstart &&
641                         (monitorend == -1 || offset <= monitorend))))))
642                 prt("%lu mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
643                     offset, offset + size - 1, size);
644
645         pg_offset = offset & PAGE_MASK;
646         map_size  = pg_offset + size;
647
648         if ((p = (char *)mmap(0, map_size, PROT_READ, MAP_SHARED, fd,
649                               (off_t)(offset - pg_offset))) == (char *)-1) {
650                 prterr("domapread: mmap");
651                 report_failure(190);
652         }
653         memcpy(temp_buf, p + pg_offset, size);
654
655         check_eofpage("Read", offset, p, size);
656
657         if (munmap(p, map_size) != 0) {
658                 prterr("domapread: munmap");
659                 report_failure(191);
660         }
661
662         check_buffers(offset, size);
663 }
664
665
666 void
667 gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
668 {
669         while (size--) {
670                 good_buf[offset] = testcalls % 256; 
671                 if (offset % 2)
672                         good_buf[offset] += original_buf[offset];
673                 offset++;
674         }
675 }
676
677
678 void
679 dowrite(unsigned offset, unsigned size)
680 {
681         off_t ret;
682         unsigned iret;
683
684         offset -= offset % writebdy;
685         if (o_direct)
686                 size -= size % writebdy;
687         if (size == 0) {
688                 if (!quiet && testcalls > simulatedopcount && !o_direct)
689                         prt("skipping zero size write\n");
690                 log4(OP_SKIPPED, OP_WRITE, offset, size);
691                 return;
692         }
693
694         log4(OP_WRITE, offset, size, file_size);
695
696         gendata(original_buf, good_buf, offset, size);
697         if (file_size < offset + size) {
698                 if (file_size < offset)
699                         memset(good_buf + file_size, '\0', offset - file_size);
700                 file_size = offset + size;
701                 if (lite) {
702                         warn("Lite file size bug in fsx!");
703                         report_failure(149);
704                 }
705         }
706
707         if (testcalls <= simulatedopcount)
708                 return;
709
710         if (!quiet &&
711                 ((progressinterval && testcalls % progressinterval == 0) ||
712                        (debug &&
713                        (monitorstart == -1 ||
714                         (offset + size > monitorstart &&
715                         (monitorend == -1 || offset <= monitorend))))))
716                 prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
717                     offset, offset + size - 1, size);
718         ret = lseek(fd, (off_t)offset, SEEK_SET);
719         if (ret == (off_t)-1) {
720                 prterr("dowrite: lseek");
721                 report_failure(150);
722         }
723         iret = fsxwrite(fd, good_buf + offset, size, offset);
724         if (iret != size) {
725                 if (iret == -1)
726                         prterr("dowrite: write");
727                 else
728                         prt("short write: 0x%x bytes instead of 0x%x\n",
729                             iret, size);
730                 report_failure(151);
731         }
732         if (do_fsync) {
733                 if (fsync(fd)) {
734                         prt("fsync() failed: %s\n", strerror(errno));
735                         report_failure(152);
736                 }
737         }
738         if (flush) {
739                 doflush(offset, size);
740         }
741 }
742
743
744 void
745 domapwrite(unsigned offset, unsigned size)
746 {
747         unsigned pg_offset;
748         unsigned map_size;
749         off_t    cur_filesize;
750         char    *p;
751
752         offset -= offset % writebdy;
753         if (size == 0) {
754                 if (!quiet && testcalls > simulatedopcount)
755                         prt("skipping zero size write\n");
756                 log4(OP_SKIPPED, OP_MAPWRITE, offset, size);
757                 return;
758         }
759         cur_filesize = file_size;
760
761         log4(OP_MAPWRITE, offset, size, 0);
762
763         gendata(original_buf, good_buf, offset, size);
764         if (file_size < offset + size) {
765                 if (file_size < offset)
766                         memset(good_buf + file_size, '\0', offset - file_size);
767                 file_size = offset + size;
768                 if (lite) {
769                         warn("Lite file size bug in fsx!");
770                         report_failure(200);
771                 }
772         }
773
774         if (testcalls <= simulatedopcount)
775                 return;
776
777         if (!quiet &&
778                 ((progressinterval && testcalls % progressinterval == 0) ||
779                        (debug &&
780                        (monitorstart == -1 ||
781                         (offset + size > monitorstart &&
782                         (monitorend == -1 || offset <= monitorend))))))
783                 prt("%lu mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
784                     offset, offset + size - 1, size);
785
786         if (file_size > cur_filesize) {
787                 if (ftruncate(fd, file_size) == -1) {
788                         prterr("domapwrite: ftruncate");
789                         exit(201);
790                 }
791         }
792         pg_offset = offset & PAGE_MASK;
793         map_size  = pg_offset + size;
794
795         if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
796                               MAP_FILE | MAP_SHARED, fd,
797                               (off_t)(offset - pg_offset))) == (char *)-1) {
798                 prterr("domapwrite: mmap");
799                 report_failure(202);
800         }
801         memcpy(p + pg_offset, good_buf + offset, size);
802         if (msync(p, map_size, MS_SYNC) != 0) {
803                 prterr("domapwrite: msync");
804                 report_failure(203);
805         }
806
807         check_eofpage("Write", offset, p, size);
808
809         if (munmap(p, map_size) != 0) {
810                 prterr("domapwrite: munmap");
811                 report_failure(204);
812         }
813 }
814
815
816 void
817 dotruncate(unsigned size)
818 {
819         int oldsize = file_size;
820
821         size -= size % truncbdy;
822         if (size > biggest) {
823                 biggest = size;
824                 if (!quiet && testcalls > simulatedopcount)
825                         prt("truncating to largest ever: 0x%x\n", size);
826         }
827
828         log4(OP_TRUNCATE, size, (unsigned)file_size, 0);
829
830         if (size > file_size)
831                 memset(good_buf + file_size, '\0', size - file_size);
832         file_size = size;
833
834         if (testcalls <= simulatedopcount)
835                 return;
836         
837         if ((progressinterval && testcalls % progressinterval == 0) ||
838             (debug && (monitorstart == -1 || monitorend == -1 ||
839                       size <= monitorend)))
840                 prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
841         if (ftruncate(fd, (off_t)size) == -1) {
842                 prt("ftruncate1: %x\n", size);
843                 prterr("dotruncate: ftruncate");
844                 report_failure(160);
845         }
846 }
847
848 #ifdef FALLOC_FL_PUNCH_HOLE
849 void
850 do_punch_hole(unsigned offset, unsigned length)
851 {
852         unsigned end_offset;
853         int max_offset = 0;
854         int max_len = 0;
855         int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
856
857         if (length == 0) {
858                 if (!quiet && testcalls > simulatedopcount)
859                         prt("skipping zero length punch hole\n");
860                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
861                 return;
862         }
863
864         if (file_size <= (loff_t)offset) {
865                 if (!quiet && testcalls > simulatedopcount)
866                         prt("skipping hole punch off the end of the file\n");
867                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
868                 return;
869         }
870
871         end_offset = offset + length;
872
873         log4(OP_PUNCH_HOLE, offset, length, 0);
874
875         if (testcalls <= simulatedopcount)
876                 return;
877
878         if ((progressinterval && testcalls % progressinterval == 0) ||
879             (debug && (monitorstart == -1 || monitorend == -1 ||
880                       end_offset <= monitorend))) {
881                 prt("%lu punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
882                         offset, offset+length, length);
883         }
884         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
885                 prt("%punch hole: %x to %x\n", offset, length);
886                 prterr("do_punch_hole: fallocate");
887                 report_failure(161);
888         }
889
890
891         max_offset = offset < file_size ? offset : file_size;
892         max_len = max_offset + length <= file_size ? length :
893                         file_size - max_offset;
894         memset(good_buf + max_offset, '\0', max_len);
895 }
896
897 #else
898 void
899 do_punch_hole(unsigned offset, unsigned length)
900 {
901         return;
902 }
903 #endif
904
905 #ifdef FALLOC_FL_ZERO_RANGE
906 void
907 do_zero_range(unsigned offset, unsigned length)
908 {
909         unsigned end_offset;
910         int mode = FALLOC_FL_ZERO_RANGE;
911         int keep_size = 0;
912
913         if (length == 0) {
914                 if (!quiet && testcalls > simulatedopcount)
915                         prt("skipping zero length zero range\n");
916                         log4(OP_SKIPPED, OP_ZERO_RANGE, offset, length);
917                 return;
918         }
919
920         if (keep_size_calls)
921                 keep_size = random() % 2;
922
923         end_offset = keep_size ? 0 : offset + length;
924
925         if (end_offset > biggest) {
926                 biggest = end_offset;
927                 if (!quiet && testcalls > simulatedopcount)
928                         prt("zero_range to largest ever: 0x%x\n", end_offset);
929         }
930
931         /*
932          * last arg matches fallocate string array index in logdump:
933          *      0: allocate past EOF
934          *      1: extending prealloc
935          *      2: interior prealloc
936          */
937         log4(OP_ZERO_RANGE, offset, length, (end_offset > file_size) ? (keep_size ? 0 : 1) : 2);
938
939         if (testcalls <= simulatedopcount)
940                 return;
941
942         if ((progressinterval && testcalls % progressinterval == 0) ||
943             (debug && (monitorstart == -1 || monitorend == -1 ||
944                       end_offset <= monitorend))) {
945                 prt("%lu zero\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
946                         offset, offset+length, length);
947         }
948         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
949                 prt("%pzero range: %x to %x\n", offset, length);
950                 prterr("do_zero_range: fallocate");
951                 report_failure(161);
952         }
953
954         memset(good_buf + offset, '\0', length);
955 }
956
957 #else
958 void
959 do_zero_range(unsigned offset, unsigned length)
960 {
961         return;
962 }
963 #endif
964
965 #ifdef FALLOC_FL_COLLAPSE_RANGE
966 void
967 do_collapse_range(unsigned offset, unsigned length)
968 {
969         unsigned end_offset;
970         int mode = FALLOC_FL_COLLAPSE_RANGE;
971
972         if (length == 0) {
973                 if (!quiet && testcalls > simulatedopcount)
974                         prt("skipping zero length collapse range\n");
975                 log4(OP_SKIPPED, OP_COLLAPSE_RANGE, offset, length);
976                 return;
977         }
978
979         end_offset = offset + length;
980         if ((loff_t)end_offset >= file_size) {
981                 if (!quiet && testcalls > simulatedopcount)
982                         prt("skipping collapse range behind EOF\n");
983                 log4(OP_SKIPPED, OP_COLLAPSE_RANGE, offset, length);
984                 return;
985         }
986
987         log4(OP_COLLAPSE_RANGE, offset, length, 0);
988
989         if (testcalls <= simulatedopcount)
990                 return;
991
992         if ((progressinterval && testcalls % progressinterval == 0) ||
993             (debug && (monitorstart == -1 || monitorend == -1 ||
994                       end_offset <= monitorend))) {
995                 prt("%lu collapse\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
996                         offset, offset+length, length);
997         }
998         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
999                 prt("collapse range: %x to %x\n", offset, length);
1000                 prterr("do_collapse_range: fallocate");
1001                 report_failure(161);
1002         }
1003
1004         memmove(good_buf + offset, good_buf + end_offset,
1005                 file_size - end_offset);
1006         file_size -= length;
1007 }
1008
1009 #else
1010 void
1011 do_collapse_range(unsigned offset, unsigned length)
1012 {
1013         return;
1014 }
1015 #endif
1016
1017 #ifdef HAVE_LINUX_FALLOC_H
1018 /* fallocate is basically a no-op unless extending, then a lot like a truncate */
1019 void
1020 do_preallocate(unsigned offset, unsigned length)
1021 {
1022         unsigned end_offset;
1023         int keep_size = 0;
1024
1025         if (length == 0) {
1026                 if (!quiet && testcalls > simulatedopcount)
1027                         prt("skipping zero length fallocate\n");
1028                 log4(OP_SKIPPED, OP_FALLOCATE, offset, length);
1029                 return;
1030         }
1031
1032         if (keep_size_calls)
1033                 keep_size = random() % 2;
1034
1035         end_offset = keep_size ? 0 : offset + length;
1036
1037         if (end_offset > biggest) {
1038                 biggest = end_offset;
1039                 if (!quiet && testcalls > simulatedopcount)
1040                         prt("fallocating to largest ever: 0x%x\n", end_offset);
1041         }
1042
1043         /*
1044          * last arg matches fallocate string array index in logdump:
1045          *      0: allocate past EOF
1046          *      1: extending prealloc
1047          *      2: interior prealloc
1048          */
1049         log4(OP_FALLOCATE, offset, length, (end_offset > file_size) ? (keep_size ? 0 : 1) : 2);
1050
1051         if (end_offset > file_size) {
1052                 memset(good_buf + file_size, '\0', end_offset - file_size);
1053                 file_size = end_offset;
1054         }
1055
1056         if (testcalls <= simulatedopcount)
1057                 return;
1058         
1059         if ((progressinterval && testcalls % progressinterval == 0) ||
1060             (debug && (monitorstart == -1 || monitorend == -1 ||
1061                       end_offset <= monitorend)))
1062                 prt("%lu falloc\tfrom 0x%x to 0x%x (0x%x bytes)\n", testcalls,
1063                                 offset, offset + length, length);
1064         if (fallocate(fd, keep_size ? FALLOC_FL_KEEP_SIZE : 0, (loff_t)offset, (loff_t)length) == -1) {
1065                 prt("fallocate: %x to %x\n", offset, length);
1066                 prterr("do_preallocate: fallocate");
1067                 report_failure(161);
1068         }
1069 }
1070 #else
1071 void
1072 do_preallocate(unsigned offset, unsigned length)
1073 {
1074         return;
1075 }
1076 #endif
1077
1078 void
1079 writefileimage()
1080 {
1081         ssize_t iret;
1082
1083         if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
1084                 prterr("writefileimage: lseek");
1085                 report_failure(171);
1086         }
1087         iret = write(fd, good_buf, file_size);
1088         if ((off_t)iret != file_size) {
1089                 if (iret == -1)
1090                         prterr("writefileimage: write");
1091                 else
1092                         prt("short write: 0x%x bytes instead of 0x%llx\n",
1093                             iret, (unsigned long long)file_size);
1094                 report_failure(172);
1095         }
1096         if (lite ? 0 : ftruncate(fd, file_size) == -1) {
1097                 prt("ftruncate2: %llx\n", (unsigned long long)file_size);
1098                 prterr("writefileimage: ftruncate");
1099                 report_failure(173);
1100         }
1101 }
1102
1103
1104 void
1105 docloseopen(void)
1106
1107         if (testcalls <= simulatedopcount)
1108                 return;
1109
1110         if (debug)
1111                 prt("%lu close/open\n", testcalls);
1112         if (close(fd)) {
1113                 prterr("docloseopen: close");
1114                 report_failure(180);
1115         }
1116         fd = open(fname, O_RDWR|o_direct, 0);
1117         if (fd < 0) {
1118                 prterr("docloseopen: open");
1119                 report_failure(181);
1120         }
1121 }
1122
1123 #define TRIM_OFF_LEN(off, len, size)    \
1124 do {                                    \
1125         if (size)                       \
1126                 (off) %= (size);        \
1127         else                            \
1128                 (off) = 0;              \
1129         if ((off) + (len) > (size))     \
1130                 (len) = (size) - (off); \
1131 } while (0)
1132
1133 void
1134 test(void)
1135 {
1136         unsigned long   offset;
1137         unsigned long   size = maxoplen;
1138         unsigned long   rv = random();
1139         unsigned long   op;
1140
1141         if (simulatedopcount > 0 && testcalls == simulatedopcount)
1142                 writefileimage();
1143
1144         testcalls++;
1145
1146         if (closeprob)
1147                 closeopen = (rv >> 3) < (1 << 28) / closeprob;
1148
1149         if (debugstart > 0 && testcalls >= debugstart)
1150                 debug = 1;
1151
1152         if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
1153                 prt("%lu...\n", testcalls);
1154
1155         offset = random();
1156         if (randomoplen)
1157                 size = random() % (maxoplen + 1);
1158
1159         /* calculate appropriate op to run */
1160         if (lite)
1161                 op = rv % OP_MAX_LITE;
1162         else
1163                 op = rv % OP_MAX_FULL;
1164
1165         switch (op) {
1166         case OP_MAPREAD:
1167                 if (!mapped_reads)
1168                         op = OP_READ;
1169                 break;
1170         case OP_MAPWRITE:
1171                 if (!mapped_writes)
1172                         op = OP_WRITE;
1173                 break;
1174         case OP_FALLOCATE:
1175                 if (!fallocate_calls) {
1176                         log4(OP_SKIPPED, OP_FALLOCATE, offset, size);
1177                         goto out;
1178                 }
1179                 break;
1180         case OP_PUNCH_HOLE:
1181                 if (!punch_hole_calls) {
1182                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, size);
1183                         goto out;
1184                 }
1185                 break;
1186         case OP_ZERO_RANGE:
1187                 if (!zero_range_calls) {
1188                         log4(OP_SKIPPED, OP_ZERO_RANGE, offset, size);
1189                         goto out;
1190                 }
1191                 break;
1192         case OP_COLLAPSE_RANGE:
1193                 if (!collapse_range_calls) {
1194                         log4(OP_SKIPPED, OP_COLLAPSE_RANGE, offset, size);
1195                         goto out;
1196                 }
1197                 break;
1198         }
1199
1200         switch (op) {
1201         case OP_READ:
1202                 TRIM_OFF_LEN(offset, size, file_size);
1203                 doread(offset, size);
1204                 break;
1205
1206         case OP_WRITE:
1207                 TRIM_OFF_LEN(offset, size, maxfilelen);
1208                 dowrite(offset, size);
1209                 break;
1210
1211         case OP_MAPREAD:
1212                 TRIM_OFF_LEN(offset, size, file_size);
1213                 domapread(offset, size);
1214                 break;
1215
1216         case OP_MAPWRITE:
1217                 TRIM_OFF_LEN(offset, size, maxfilelen);
1218                 domapwrite(offset, size);
1219                 break;
1220
1221         case OP_TRUNCATE:
1222                 if (!style)
1223                         size = random() % maxfilelen;
1224                 dotruncate(size);
1225                 break;
1226
1227         case OP_FALLOCATE:
1228                 TRIM_OFF_LEN(offset, size, maxfilelen);
1229                 do_preallocate(offset, size);
1230                 break;
1231
1232         case OP_PUNCH_HOLE:
1233                 TRIM_OFF_LEN(offset, size, file_size);
1234                 do_punch_hole(offset, size);
1235                 break;
1236         case OP_ZERO_RANGE:
1237                 TRIM_OFF_LEN(offset, size, file_size);
1238                 do_zero_range(offset, size);
1239                 break;
1240         case OP_COLLAPSE_RANGE:
1241                 TRIM_OFF_LEN(offset, size, file_size - 1);
1242                 offset = offset & ~(block_size - 1);
1243                 size = size & ~(block_size - 1);
1244                 if (size == 0) {
1245                         log4(OP_SKIPPED, OP_COLLAPSE_RANGE, offset, size);
1246                         goto out;
1247                 }
1248                 do_collapse_range(offset, size);
1249                 break;
1250         default:
1251                 prterr("test: unknown operation");
1252                 report_failure(42);
1253                 break;
1254         }
1255
1256 out:
1257         if (sizechecks && testcalls > simulatedopcount)
1258                 check_size();
1259         if (closeopen)
1260                 docloseopen();
1261 }
1262
1263
1264 void
1265 cleanup(sig)
1266         int     sig;
1267 {
1268         if (sig)
1269                 prt("signal %d\n", sig);
1270         prt("testcalls = %lu\n", testcalls);
1271         exit(sig);
1272 }
1273
1274
1275 void
1276 usage(void)
1277 {
1278         fprintf(stdout, "usage: %s",
1279                 "fsx [-dnqxAFLOWZ] [-b opnum] [-c Prob] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
1280         -b opnum: beginning operation number (default 1)\n\
1281         -c P: 1 in P chance of file close+open at each op (default infinity)\n\
1282         -d: debug output for all operations\n\
1283         -f flush and invalidate cache after I/O\n\
1284         -l flen: the upper bound on file size (default 262144)\n\
1285         -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
1286         -n: no verifications of file size\n\
1287         -o oplen: the upper bound on operation size (default 65536)\n\
1288         -p progressinterval: debug output at specified operation interval\n\
1289         -q: quieter operation\n\
1290         -r readbdy: 4096 would make reads page aligned (default 1)\n\
1291         -s style: 1 gives smaller truncates (default 0)\n\
1292         -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
1293         -w writebdy: 4096 would make writes page aligned (default 1)\n\
1294         -x: preallocate file space before starting, XFS only (default 0)\n\
1295         -y synchronize changes to a file\n"
1296
1297 #ifdef AIO
1298 "       -A: Use the AIO system calls\n"
1299 #endif
1300 "       -D startingop: debug output starting at specified operation\n"
1301 #ifdef HAVE_LINUX_FALLOC_H
1302 "       -F: Do not use fallocate (preallocation) calls\n"
1303 #endif
1304 #ifdef FALLOC_FL_PUNCH_HOLE
1305 "       -H: Do not use punch hole calls\n"
1306 #endif
1307 #ifdef FALLOC_FL_ZERO_RANGE
1308 "       -z: Do not use zero range calls\n"
1309 #endif
1310 #ifdef FALLOC_FL_COLLAPSE_RANGE
1311 "       -C: Do not use collapse range calls\n"
1312 #endif
1313 "       -L: fsxLite - no file creations & no file size changes\n\
1314         -N numops: total # operations to do (default infinity)\n\
1315         -O: use oplen (see -o flag) for every op (default random)\n\
1316         -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
1317         -S seed: for random # generator (default 1) 0 gets timestamp\n\
1318         -W: mapped write operations DISabled\n\
1319         -R: read() system calls only (mapped reads disabled)\n\
1320         -Z: O_DIRECT (use -R, -W, -r and -w too)\n\
1321         fname: this filename is REQUIRED (no default)\n");
1322         exit(90);
1323 }
1324
1325
1326 int
1327 getnum(char *s, char **e)
1328 {
1329         int ret;
1330
1331         *e = (char *) 0;
1332         ret = strtol(s, e, 0);
1333         if (*e)
1334                 switch (**e) {
1335                 case 'b':
1336                 case 'B':
1337                         ret *= 512;
1338                         *e = *e + 1;
1339                         break;
1340                 case 'k':
1341                 case 'K':
1342                         ret *= 1024;
1343                         *e = *e + 1;
1344                         break;
1345                 case 'm':
1346                 case 'M':
1347                         ret *= 1024*1024;
1348                         *e = *e + 1;
1349                         break;
1350                 case 'w':
1351                 case 'W':
1352                         ret *= 4;
1353                         *e = *e + 1;
1354                         break;
1355                 }
1356         return (ret);
1357 }
1358
1359 #ifdef AIO
1360
1361 #define QSZ     1024
1362 io_context_t    io_ctx;
1363 struct iocb     iocb;
1364
1365 int aio_setup()
1366 {
1367         int ret;
1368         ret = io_queue_init(QSZ, &io_ctx);
1369         if (ret != 0) {
1370                 fprintf(stderr, "aio_setup: io_queue_init failed: %s\n",
1371                         strerror(ret));
1372                 return(-1);
1373         }
1374         return(0);
1375 }
1376
1377 int
1378 __aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
1379 {
1380         struct io_event event;
1381         static struct timespec ts;
1382         struct iocb *iocbs[] = { &iocb };
1383         int ret;
1384         long res;
1385
1386         if (rw == READ) {
1387                 io_prep_pread(&iocb, fd, buf, len, offset);
1388         } else {
1389                 io_prep_pwrite(&iocb, fd, buf, len, offset);
1390         }
1391
1392         ts.tv_sec = 30;
1393         ts.tv_nsec = 0;
1394         ret = io_submit(io_ctx, 1, iocbs);
1395         if (ret != 1) {
1396                 fprintf(stderr, "errcode=%d\n", ret);
1397                 fprintf(stderr, "aio_rw: io_submit failed: %s\n",
1398                                 strerror(ret));
1399                 goto out_error;
1400         }
1401
1402         ret = io_getevents(io_ctx, 1, 1, &event, &ts);
1403         if (ret != 1) {
1404                 if (ret == 0)
1405                         fprintf(stderr, "aio_rw: no events available\n");
1406                 else {
1407                         fprintf(stderr, "errcode=%d\n", -ret);
1408                         fprintf(stderr, "aio_rw: io_getevents failed: %s\n",
1409                                         strerror(-ret));
1410                 }
1411                 goto out_error;
1412         }
1413         if (len != event.res) {
1414                 /*
1415                  * The b0rked libaio defines event.res as unsigned.
1416                  * However the kernel strucuture has it signed,
1417                  * and it's used to pass negated error value.
1418                  * Till the library is fixed use the temp var.
1419                  */
1420                 res = (long)event.res;
1421                 if (res >= 0)
1422                         fprintf(stderr, "bad io length: %lu instead of %u\n",
1423                                         res, len);
1424                 else {
1425                         fprintf(stderr, "errcode=%ld\n", -res);
1426                         fprintf(stderr, "aio_rw: async io failed: %s\n",
1427                                         strerror(-res));
1428                         ret = res;
1429                         goto out_error;
1430                 }
1431
1432         }
1433         return event.res;
1434
1435 out_error:
1436         /*
1437          * The caller expects error return in traditional libc
1438          * convention, i.e. -1 and the errno set to error.
1439          */
1440         errno = -ret;
1441         return -1;
1442 }
1443
1444 int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
1445 {
1446         int ret;
1447
1448         if (aio) {
1449                 ret = __aio_rw(rw, fd, buf, len, offset);
1450         } else {
1451                 if (rw == READ)
1452                         ret = read(fd, buf, len);
1453                 else
1454                         ret = write(fd, buf, len);
1455         }
1456         return ret;
1457 }
1458
1459 #endif
1460
1461 int
1462 test_fallocate(int mode)
1463 {
1464 #ifdef HAVE_LINUX_FALLOC_H
1465         int ret = 0;
1466         if (!lite) {
1467                 if (fallocate(fd, mode, 0, 1) && errno == EOPNOTSUPP) {
1468                         if(!quiet)
1469                                 warn("main: filesystem does not support "
1470                                      "fallocate mode 0x%x, disabling!\n", mode);
1471                 } else {
1472                         ret = 1;
1473                         ftruncate(fd, 0);
1474                 }
1475         }
1476         return ret;
1477 #endif
1478 }
1479
1480 int
1481 main(int argc, char **argv)
1482 {
1483         int     i, style, ch;
1484         char    *endp;
1485         char goodfile[1024];
1486         char logfile[1024];
1487         struct stat statbuf;
1488
1489         goodfile[0] = 0;
1490         logfile[0] = 0;
1491
1492         page_size = getpagesize();
1493         page_mask = page_size - 1;
1494         mmap_mask = page_mask;
1495         
1496
1497         setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
1498
1499         while ((ch = getopt(argc, argv, "b:c:dfl:m:no:p:qr:s:t:w:xyAD:FKHzCLN:OP:RS:WZ"))
1500                != EOF)
1501                 switch (ch) {
1502                 case 'b':
1503                         simulatedopcount = getnum(optarg, &endp);
1504                         if (!quiet)
1505                                 fprintf(stdout, "Will begin at operation %ld\n",
1506                                         simulatedopcount);
1507                         if (simulatedopcount == 0)
1508                                 usage();
1509                         simulatedopcount -= 1;
1510                         break;
1511                 case 'c':
1512                         closeprob = getnum(optarg, &endp);
1513                         if (!quiet)
1514                                 fprintf(stdout,
1515                                         "Chance of close/open is 1 in %d\n",
1516                                         closeprob);
1517                         if (closeprob <= 0)
1518                                 usage();
1519                         break;
1520                 case 'd':
1521                         debug = 1;
1522                         break;
1523                 case 'f':
1524                         flush = 1;
1525                         break;
1526                 case 'l':
1527                         maxfilelen = getnum(optarg, &endp);
1528                         if (maxfilelen <= 0)
1529                                 usage();
1530                         break;
1531                 case 'm':
1532                         monitorstart = getnum(optarg, &endp);
1533                         if (monitorstart < 0)
1534                                 usage();
1535                         if (!endp || *endp++ != ':')
1536                                 usage();
1537                         monitorend = getnum(endp, &endp);
1538                         if (monitorend < 0)
1539                                 usage();
1540                         if (monitorend == 0)
1541                                 monitorend = -1; /* aka infinity */
1542                         debug = 1;
1543                 case 'n':
1544                         sizechecks = 0;
1545                         break;
1546                 case 'o':
1547                         maxoplen = getnum(optarg, &endp);
1548                         if (maxoplen <= 0)
1549                                 usage();
1550                         break;
1551                 case 'p':
1552                         progressinterval = getnum(optarg, &endp);
1553                         if (progressinterval == 0)
1554                                 usage();
1555                         break;
1556                 case 'q':
1557                         quiet = 1;
1558                         break;
1559                 case 'r':
1560                         readbdy = getnum(optarg, &endp);
1561                         if (readbdy <= 0)
1562                                 usage();
1563                         break;
1564                 case 's':
1565                         style = getnum(optarg, &endp);
1566                         if (style < 0 || style > 1)
1567                                 usage();
1568                         break;
1569                 case 't':
1570                         truncbdy = getnum(optarg, &endp);
1571                         if (truncbdy <= 0)
1572                                 usage();
1573                         break;
1574                 case 'w':
1575                         writebdy = getnum(optarg, &endp);
1576                         if (writebdy <= 0)
1577                                 usage();
1578                         break;
1579                 case 'x':
1580                         prealloc = 1;
1581                         break;
1582                 case 'y':
1583                         do_fsync = 1;
1584                         break;
1585                 case 'A':
1586                         aio = 1;
1587                         break;
1588                 case 'D':
1589                         debugstart = getnum(optarg, &endp);
1590                         if (debugstart < 1)
1591                                 usage();
1592                         break;
1593                 case 'F':
1594                         fallocate_calls = 0;
1595                         break;
1596                 case 'K':
1597                         keep_size_calls = 0;
1598                         break;
1599                 case 'H':
1600                         punch_hole_calls = 0;
1601                         break;
1602                 case 'z':
1603                         zero_range_calls = 0;
1604                         break;
1605                 case 'C':
1606                         collapse_range_calls = 0;
1607                         break;
1608                 case 'L':
1609                         lite = 1;
1610                         break;
1611                 case 'N':
1612                         numops = getnum(optarg, &endp);
1613                         if (numops < 0)
1614                                 usage();
1615                         break;
1616                 case 'O':
1617                         randomoplen = 0;
1618                         break;
1619                 case 'P':
1620                         strncpy(goodfile, optarg, sizeof(goodfile));
1621                         strcat(goodfile, "/");
1622                         strncpy(logfile, optarg, sizeof(logfile));
1623                         strcat(logfile, "/");
1624                         break;
1625                 case 'R':
1626                         mapped_reads = 0;
1627                         break;
1628                 case 'S':
1629                         seed = getnum(optarg, &endp);
1630                         if (seed == 0)
1631                                 seed = time(0) % 10000;
1632                         if (!quiet)
1633                                 fprintf(stdout, "Seed set to %d\n", seed);
1634                         if (seed < 0)
1635                                 usage();
1636                         break;
1637                 case 'W':
1638                         mapped_writes = 0;
1639                         if (!quiet)
1640                                 fprintf(stdout, "mapped writes DISABLED\n");
1641                         break;
1642                 case 'Z':
1643                         o_direct = O_DIRECT;
1644                         break;
1645                 default:
1646                         usage();
1647                         /* NOTREACHED */
1648                 }
1649         argc -= optind;
1650         argv += optind;
1651         if (argc != 1)
1652                 usage();
1653         fname = argv[0];
1654
1655         signal(SIGHUP,  cleanup);
1656         signal(SIGINT,  cleanup);
1657         signal(SIGPIPE, cleanup);
1658         signal(SIGALRM, cleanup);
1659         signal(SIGTERM, cleanup);
1660         signal(SIGXCPU, cleanup);
1661         signal(SIGXFSZ, cleanup);
1662         signal(SIGVTALRM,       cleanup);
1663         signal(SIGUSR1, cleanup);
1664         signal(SIGUSR2, cleanup);
1665
1666         initstate(seed, state, 256);
1667         setstate(state);
1668         fd = open(fname,
1669                 O_RDWR|(lite ? 0 : O_CREAT|O_TRUNC)|o_direct, 0666);
1670         if (fd < 0) {
1671                 prterr(fname);
1672                 exit(91);
1673         }
1674         if (fstat(fd, &statbuf)) {
1675                 prterr("check_size: fstat");
1676                 exit(91);
1677         }
1678         block_size = statbuf.st_blksize;
1679 #ifdef XFS
1680         if (prealloc) {
1681                 xfs_flock64_t   resv = { 0 };
1682 #ifdef HAVE_XFS_PLATFORM_DEFS_H
1683                 if (!platform_test_xfs_fd(fd)) {
1684                         prterr(fname);
1685                         fprintf(stderr, "main: cannot prealloc, non XFS\n");
1686                         exit(96);
1687                 }
1688 #endif
1689                 resv.l_len = maxfilelen;
1690                 if ((xfsctl(fname, fd, XFS_IOC_RESVSP, &resv)) < 0) {
1691                         prterr(fname);
1692                         exit(97);
1693                 }
1694         }
1695 #endif
1696         strncat(goodfile, fname, 256);
1697         strcat (goodfile, ".fsxgood");
1698         fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
1699         if (fsxgoodfd < 0) {
1700                 prterr(goodfile);
1701                 exit(92);
1702         }
1703         strncat(logfile, fname, 256);
1704         strcat (logfile, ".fsxlog");
1705         fsxlogf = fopen(logfile, "w");
1706         if (fsxlogf == NULL) {
1707                 prterr(logfile);
1708                 exit(93);
1709         }
1710
1711 #ifdef AIO
1712         if (aio) 
1713                 aio_setup();
1714 #endif
1715
1716         if (lite) {
1717                 off_t ret;
1718                 file_size = maxfilelen = lseek(fd, (off_t)0, SEEK_END);
1719                 if (file_size == (off_t)-1) {
1720                         prterr(fname);
1721                         warn("main: lseek eof");
1722                         exit(94);
1723                 }
1724                 ret = lseek(fd, (off_t)0, SEEK_SET);
1725                 if (ret == (off_t)-1) {
1726                         prterr(fname);
1727                         warn("main: lseek 0");
1728                         exit(95);
1729                 }
1730         }
1731         original_buf = (char *) malloc(maxfilelen);
1732         for (i = 0; i < maxfilelen; i++)
1733                 original_buf[i] = random() % 256;
1734         good_buf = (char *) malloc(maxfilelen + writebdy);
1735         good_buf = round_ptr_up(good_buf, writebdy, 0);
1736         memset(good_buf, '\0', maxfilelen);
1737         temp_buf = (char *) malloc(maxoplen + readbdy);
1738         temp_buf = round_ptr_up(temp_buf, readbdy, 0);
1739         memset(temp_buf, '\0', maxoplen);
1740         if (lite) {     /* zero entire existing file */
1741                 ssize_t written;
1742
1743                 written = write(fd, good_buf, (size_t)maxfilelen);
1744                 if (written != maxfilelen) {
1745                         if (written == -1) {
1746                                 prterr(fname);
1747                                 warn("main: error on write");
1748                         } else
1749                                 warn("main: short write, 0x%x bytes instead "
1750                                         "of 0x%lx\n",
1751                                         (unsigned)written,
1752                                         maxfilelen);
1753                         exit(98);
1754                 }
1755         } else 
1756                 check_trunc_hack();
1757
1758         if (fallocate_calls)
1759                 fallocate_calls = test_fallocate(0);
1760         if (keep_size_calls)
1761                 keep_size_calls = test_fallocate(FALLOC_FL_KEEP_SIZE);
1762         if (punch_hole_calls)
1763                 punch_hole_calls = test_fallocate(FALLOC_FL_PUNCH_HOLE |
1764                                                   FALLOC_FL_KEEP_SIZE);
1765         if (zero_range_calls)
1766                 zero_range_calls = test_fallocate(FALLOC_FL_ZERO_RANGE);
1767         if (collapse_range_calls)
1768                 collapse_range_calls = test_fallocate(FALLOC_FL_COLLAPSE_RANGE);
1769
1770         while (numops == -1 || numops--)
1771                 test();
1772
1773         if (close(fd)) {
1774                 prterr("close");
1775                 report_failure(99);
1776         }
1777         prt("All operations completed A-OK!\n");
1778
1779         exit(0);
1780         return 0;
1781 }