generic/033: add xfs delalloc indirect block depletion reproducer
[xfstests-dev.git] / ltp / fsx.c
1 /*
2  *      Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
3  *
4  *      File:   fsx.c
5  *      Author: Avadis Tevanian, Jr.
6  *
7  *      File system exerciser. 
8  *
9  *      Rewritten 8/98 by Conrad Minshall.
10  *
11  *      Small changes to work under Linux -- davej.
12  *
13  *      Checks for mmap last-page zero fill.
14  */
15
16 #include "global.h"
17
18 #include <limits.h>
19 #include <time.h>
20 #include <strings.h>
21 #include <sys/file.h>
22 #include <sys/mman.h>
23 #ifdef HAVE_ERR_H
24 #include <err.h>
25 #endif
26 #include <signal.h>
27 #include <stdio.h>
28 #include <stddef.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdarg.h>
32 #include <errno.h>
33 #ifdef AIO
34 #include <libaio.h>
35 #endif
36
37 #ifndef MAP_FILE
38 # define MAP_FILE 0
39 #endif
40
41 #define NUMPRINTCOLUMNS 32      /* # columns of data to print on each line */
42
43 /*
44  *      A log entry is an operation and a bunch of arguments.
45  */
46
47 struct log_entry {
48         int     operation;
49         int     args[3];
50 };
51
52 #define LOGSIZE 1000
53
54 struct log_entry        oplog[LOGSIZE]; /* the log */
55 int                     logptr = 0;     /* current position in log */
56 int                     logcount = 0;   /* total ops */
57
58 /*
59  * The operation matrix is complex due to conditional execution of different
60  * features. Hence when we come to deciding what operation to run, we need to
61  * be careful in how we select the different operations. The active operations
62  * are mapped to numbers as follows:
63  *
64  *              lite    !lite
65  * READ:        0       0
66  * WRITE:       1       1
67  * MAPREAD:     2       2
68  * MAPWRITE:    3       3
69  * TRUNCATE:    -       4
70  * FALLOCATE:   -       5
71  * PUNCH HOLE:  -       6
72  * ZERO RANGE:  -       7
73  *
74  * When mapped read/writes are disabled, they are simply converted to normal
75  * reads and writes. When fallocate/fpunch calls are disabled, they are
76  * converted to OP_SKIPPED. Hence OP_SKIPPED needs to have a number higher than
77  * the operation selction matrix, as does the OP_CLOSEOPEN which is an
78  * operation modifier rather than an operation in itself.
79  *
80  * Because of the "lite" version, we also need to have different "maximum
81  * operation" defines to allow the ops to be selected correctly based on the
82  * mode being run.
83  */
84
85 /* common operations */
86 #define OP_READ         0
87 #define OP_WRITE        1
88 #define OP_MAPREAD      2
89 #define OP_MAPWRITE     3
90 #define OP_MAX_LITE     4
91
92 /* !lite operations */
93 #define OP_TRUNCATE             4
94 #define OP_FALLOCATE            5
95 #define OP_PUNCH_HOLE           6
96 #define OP_ZERO_RANGE           7
97 #define OP_COLLAPSE_RANGE       8
98 #define OP_MAX_FULL             9
99
100 /* operation modifiers */
101 #define OP_CLOSEOPEN    100
102 #define OP_SKIPPED      101
103
104 #undef PAGE_SIZE
105 #define PAGE_SIZE       getpagesize()
106 #undef PAGE_MASK
107 #define PAGE_MASK       (PAGE_SIZE - 1)
108
109 char    *original_buf;                  /* a pointer to the original data */
110 char    *good_buf;                      /* a pointer to the correct data */
111 char    *temp_buf;                      /* a pointer to the current data */
112 char    *fname;                         /* name of our test file */
113 int     fd;                             /* fd for our test file */
114
115 blksize_t       block_size = 0;
116 off_t           file_size = 0;
117 off_t           biggest = 0;
118 char            state[256];
119 unsigned long   testcalls = 0;          /* calls to function "test" */
120
121 unsigned long   simulatedopcount = 0;   /* -b flag */
122 int     closeprob = 0;                  /* -c flag */
123 int     debug = 0;                      /* -d flag */
124 unsigned long   debugstart = 0;         /* -D flag */
125 int     flush = 0;                      /* -f flag */
126 int     do_fsync = 0;                   /* -y flag */
127 unsigned long   maxfilelen = 256 * 1024;        /* -l flag */
128 int     sizechecks = 1;                 /* -n flag disables them */
129 int     maxoplen = 64 * 1024;           /* -o flag */
130 int     quiet = 0;                      /* -q flag */
131 unsigned long progressinterval = 0;     /* -p flag */
132 int     readbdy = 1;                    /* -r flag */
133 int     style = 0;                      /* -s flag */
134 int     prealloc = 0;                   /* -x flag */
135 int     truncbdy = 1;                   /* -t flag */
136 int     writebdy = 1;                   /* -w flag */
137 long    monitorstart = -1;              /* -m flag */
138 long    monitorend = -1;                /* -m flag */
139 int     lite = 0;                       /* -L flag */
140 long    numops = -1;                    /* -N flag */
141 int     randomoplen = 1;                /* -O flag disables it */
142 int     seed = 1;                       /* -S flag */
143 int     mapped_writes = 1;              /* -W flag disables */
144 int     fallocate_calls = 1;            /* -F flag disables */
145 int     punch_hole_calls = 1;           /* -H flag disables */
146 int     zero_range_calls = 1;           /* -z flag disables */
147 int     collapse_range_calls = 1;       /* -C flag disables */
148 int     mapped_reads = 1;               /* -R flag disables it */
149 int     fsxgoodfd = 0;
150 int     o_direct;                       /* -Z */
151 int     aio = 0;
152
153 int page_size;
154 int page_mask;
155 int mmap_mask;
156 #ifdef AIO
157 int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
158 #define READ 0
159 #define WRITE 1
160 #define fsxread(a,b,c,d)        aio_rw(READ, a,b,c,d)
161 #define fsxwrite(a,b,c,d)       aio_rw(WRITE, a,b,c,d)
162 #else
163 #define fsxread(a,b,c,d)        read(a,b,c)
164 #define fsxwrite(a,b,c,d)       write(a,b,c)
165 #endif
166
167 FILE *  fsxlogf = NULL;
168 int badoff = -1;
169 int closeopen = 0;
170
171 static void *round_ptr_up(void *ptr, unsigned long align, unsigned long offset)
172 {
173         unsigned long ret = (unsigned long)ptr;
174
175         ret = ((ret + align - 1) & ~(align - 1));
176         ret += offset;
177         return (void *)ret;
178 }
179
180 void
181 vwarnc(int code, const char *fmt, va_list ap) {
182   fprintf(stderr, "fsx: ");
183   if (fmt != NULL) {
184         vfprintf(stderr, fmt, ap);
185         fprintf(stderr, ": ");
186   }
187   fprintf(stderr, "%s\n", strerror(code));
188 }
189
190 void
191 warn(const char * fmt, ...)  {
192         va_list ap;
193         va_start(ap, fmt);
194         vwarnc(errno, fmt, ap);
195         va_end(ap);
196 }
197
198 #define BUF_SIZE 1024
199
200 void
201 prt(char *fmt, ...)
202 {
203         va_list args;
204         char buffer[BUF_SIZE];
205
206         va_start(args, fmt);
207         vsnprintf(buffer, BUF_SIZE, fmt, args);
208         va_end(args);
209         fprintf(stdout, buffer);
210         if (fsxlogf)
211                 fprintf(fsxlogf, buffer);
212 }
213
214 void
215 prterr(char *prefix)
216 {
217         prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno));
218 }
219
220
221 void
222 log4(int operation, int arg0, int arg1, int arg2)
223 {
224         struct log_entry *le;
225
226         le = &oplog[logptr];
227         le->operation = operation;
228         if (closeopen)
229                 le->operation = ~ le->operation;
230         le->args[0] = arg0;
231         le->args[1] = arg1;
232         le->args[2] = arg2;
233         logptr++;
234         logcount++;
235         if (logptr >= LOGSIZE)
236                 logptr = 0;
237 }
238
239
240 void
241 logdump(void)
242 {
243         int     i, count, down;
244         struct log_entry        *lp;
245         char *falloc_type[3] = {"PAST_EOF", "EXTENDING", "INTERIOR"};
246
247         prt("LOG DUMP (%d total operations):\n", logcount);
248         if (logcount < LOGSIZE) {
249                 i = 0;
250                 count = logcount;
251         } else {
252                 i = logptr;
253                 count = LOGSIZE;
254         }
255         for ( ; count > 0; count--) {
256                 int opnum;
257
258                 opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE;
259                 prt("%d(%3d mod 256): ", opnum, opnum%256);
260                 lp = &oplog[i];
261                 if ((closeopen = lp->operation < 0))
262                         lp->operation = ~ lp->operation;
263                         
264                 switch (lp->operation) {
265                 case OP_MAPREAD:
266                         prt("MAPREAD  0x%x thru 0x%x\t(0x%x bytes)",
267                             lp->args[0], lp->args[0] + lp->args[1] - 1,
268                             lp->args[1]);
269                         if (badoff >= lp->args[0] && badoff <
270                                                      lp->args[0] + lp->args[1])
271                                 prt("\t***RRRR***");
272                         break;
273                 case OP_MAPWRITE:
274                         prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)",
275                             lp->args[0], lp->args[0] + lp->args[1] - 1,
276                             lp->args[1]);
277                         if (badoff >= lp->args[0] && badoff <
278                                                      lp->args[0] + lp->args[1])
279                                 prt("\t******WWWW");
280                         break;
281                 case OP_READ:
282                         prt("READ     0x%x thru 0x%x\t(0x%x bytes)",
283                             lp->args[0], lp->args[0] + lp->args[1] - 1,
284                             lp->args[1]);
285                         if (badoff >= lp->args[0] &&
286                             badoff < lp->args[0] + lp->args[1])
287                                 prt("\t***RRRR***");
288                         break;
289                 case OP_WRITE:
290                         prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
291                             lp->args[0], lp->args[0] + lp->args[1] - 1,
292                             lp->args[1]);
293                         if (lp->args[0] > lp->args[2])
294                                 prt(" HOLE");
295                         else if (lp->args[0] + lp->args[1] > lp->args[2])
296                                 prt(" EXTEND");
297                         if ((badoff >= lp->args[0] || badoff >=lp->args[2]) &&
298                             badoff < lp->args[0] + lp->args[1])
299                                 prt("\t***WWWW");
300                         break;
301                 case OP_TRUNCATE:
302                         down = lp->args[0] < lp->args[1];
303                         prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
304                             down ? "DOWN" : "UP", lp->args[1], lp->args[0]);
305                         if (badoff >= lp->args[!down] &&
306                             badoff < lp->args[!!down])
307                                 prt("\t******WWWW");
308                         break;
309                 case OP_FALLOCATE:
310                         /* 0: offset 1: length 2: where alloced */
311                         prt("FALLOC   0x%x thru 0x%x\t(0x%x bytes) %s",
312                                 lp->args[0], lp->args[0] + lp->args[1],
313                                 lp->args[1], falloc_type[lp->args[2]]);
314                         if (badoff >= lp->args[0] &&
315                             badoff < lp->args[0] + lp->args[1])
316                                 prt("\t******FFFF");
317                         break;
318                 case OP_PUNCH_HOLE:
319                         prt("PUNCH    0x%x thru 0x%x\t(0x%x bytes)",
320                             lp->args[0], lp->args[0] + lp->args[1] - 1,
321                             lp->args[1]);
322                         if (badoff >= lp->args[0] && badoff <
323                                                      lp->args[0] + lp->args[1])
324                                 prt("\t******PPPP");
325                         break;
326                 case OP_ZERO_RANGE:
327                         prt("ZERO     0x%x thru 0x%x\t(0x%x bytes)",
328                             lp->args[0], lp->args[0] + lp->args[1] - 1,
329                             lp->args[1]);
330                         if (badoff >= lp->args[0] && badoff <
331                                                      lp->args[0] + lp->args[1])
332                                 prt("\t******ZZZZ");
333                         break;
334                 case OP_COLLAPSE_RANGE:
335                         prt("COLLAPSE 0x%x thru 0x%x\t(0x%x bytes)",
336                             lp->args[0], lp->args[0] + lp->args[1] - 1,
337                             lp->args[1]);
338                         if (badoff >= lp->args[0] && badoff <
339                                                      lp->args[0] + lp->args[1])
340                                 prt("\t******CCCC");
341                         break;
342                 case OP_SKIPPED:
343                         prt("SKIPPED (no operation)");
344                         break;
345                 default:
346                         prt("BOGUS LOG ENTRY (operation code = %d)!",
347                             lp->operation);
348                 }
349                 if (closeopen)
350                         prt("\n\t\tCLOSE/OPEN");
351                 prt("\n");
352                 i++;
353                 if (i == LOGSIZE)
354                         i = 0;
355         }
356 }
357
358
359 void
360 save_buffer(char *buffer, off_t bufferlength, int fd)
361 {
362         off_t ret;
363         ssize_t byteswritten;
364
365         if (fd <= 0 || bufferlength == 0)
366                 return;
367
368         if (bufferlength > SSIZE_MAX) {
369                 prt("fsx flaw: overflow in save_buffer\n");
370                 exit(67);
371         }
372         if (lite) {
373                 off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END);
374                 if (size_by_seek == (off_t)-1)
375                         prterr("save_buffer: lseek eof");
376                 else if (bufferlength > size_by_seek) {
377                         warn("save_buffer: .fsxgood file too short... will save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
378                              (unsigned long long)bufferlength);
379                         bufferlength = size_by_seek;
380                 }
381         }
382
383         ret = lseek(fd, (off_t)0, SEEK_SET);
384         if (ret == (off_t)-1)
385                 prterr("save_buffer: lseek 0");
386         
387         byteswritten = write(fd, buffer, (size_t)bufferlength);
388         if (byteswritten != bufferlength) {
389                 if (byteswritten == -1)
390                         prterr("save_buffer write");
391                 else
392                         warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
393                              (unsigned)byteswritten,
394                              (unsigned long long)bufferlength);
395         }
396 }
397
398
399 void
400 report_failure(int status)
401 {
402         logdump();
403         
404         if (fsxgoodfd) {
405                 if (good_buf) {
406                         save_buffer(good_buf, file_size, fsxgoodfd);
407                         prt("Correct content saved for comparison\n");
408                         prt("(maybe hexdump \"%s\" vs \"%s.fsxgood\")\n",
409                             fname, fname);
410                 }
411                 close(fsxgoodfd);
412         }
413         exit(status);
414 }
415
416
417 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
418                                         *(((unsigned char *)(cp)) + 1)))
419
420 void
421 check_buffers(unsigned offset, unsigned size)
422 {
423         unsigned char c, t;
424         unsigned i = 0;
425         unsigned n = 0;
426         unsigned op = 0;
427         unsigned bad = 0;
428
429         if (memcmp(good_buf + offset, temp_buf, size) != 0) {
430                 prt("READ BAD DATA: offset = 0x%x, size = 0x%x, fname = %s\n",
431                     offset, size, fname);
432                 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
433                 while (size > 0) {
434                         c = good_buf[offset];
435                         t = temp_buf[i];
436                         if (c != t) {
437                                 if (n < 16) {
438                                         bad = short_at(&temp_buf[i]);
439                                         prt("0x%5x\t0x%04x\t0x%04x", offset,
440                                             short_at(&good_buf[offset]), bad);
441                                         op = temp_buf[offset & 1 ? i+1 : i];
442                                         prt("\t0x%5x\n", n);
443                                         if (op)
444                                                 prt("operation# (mod 256) for "
445                                                   "the bad data may be %u\n",
446                                                 ((unsigned)op & 0xff));
447                                         else
448                                                 prt("operation# (mod 256) for "
449                                                   "the bad data unknown, check"
450                                                   " HOLE and EXTEND ops\n");
451                                 }
452                                 n++;
453                                 badoff = offset;
454                         }
455                         offset++;
456                         i++;
457                         size--;
458                 }
459                 report_failure(110);
460         }
461 }
462
463
464 void
465 check_size(void)
466 {
467         struct stat     statbuf;
468         off_t   size_by_seek;
469
470         if (fstat(fd, &statbuf)) {
471                 prterr("check_size: fstat");
472                 statbuf.st_size = -1;
473         }
474         size_by_seek = lseek(fd, (off_t)0, SEEK_END);
475         if (file_size != statbuf.st_size || file_size != size_by_seek) {
476                 prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n",
477                     (unsigned long long)file_size,
478                     (unsigned long long)statbuf.st_size,
479                     (unsigned long long)size_by_seek);
480                 report_failure(120);
481         }
482 }
483
484
485 void
486 check_trunc_hack(void)
487 {
488         struct stat statbuf;
489
490         ftruncate(fd, (off_t)0);
491         ftruncate(fd, (off_t)100000);
492         fstat(fd, &statbuf);
493         if (statbuf.st_size != (off_t)100000) {
494                 prt("no extend on truncate! not posix!\n");
495                 exit(130);
496         }
497         ftruncate(fd, 0);
498 }
499
500 void
501 doflush(unsigned offset, unsigned size)
502 {
503         unsigned pg_offset;
504         unsigned map_size;
505         char    *p;
506
507         if (o_direct == O_DIRECT)
508                 return;
509
510         pg_offset = offset & mmap_mask;
511         map_size  = pg_offset + size;
512
513         if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
514                               MAP_FILE | MAP_SHARED, fd,
515                               (off_t)(offset - pg_offset))) == (char *)-1) {
516                 prterr("doflush: mmap");
517                 report_failure(202);
518         }
519         if (msync(p, map_size, MS_INVALIDATE) != 0) {
520                 prterr("doflush: msync");
521                 report_failure(203);
522         }
523         if (munmap(p, map_size) != 0) {
524                 prterr("doflush: munmap");
525                 report_failure(204);
526         }
527 }
528
529 void
530 doread(unsigned offset, unsigned size)
531 {
532         off_t ret;
533         unsigned iret;
534
535         offset -= offset % readbdy;
536         if (o_direct)
537                 size -= size % readbdy;
538         if (size == 0) {
539                 if (!quiet && testcalls > simulatedopcount && !o_direct)
540                         prt("skipping zero size read\n");
541                 log4(OP_SKIPPED, OP_READ, offset, size);
542                 return;
543         }
544         if (size + offset > file_size) {
545                 if (!quiet && testcalls > simulatedopcount)
546                         prt("skipping seek/read past end of file\n");
547                 log4(OP_SKIPPED, OP_READ, offset, size);
548                 return;
549         }
550
551         log4(OP_READ, offset, size, 0);
552
553         if (testcalls <= simulatedopcount)
554                 return;
555
556         if (!quiet &&
557                 ((progressinterval && testcalls % progressinterval == 0)  ||
558                 (debug &&
559                        (monitorstart == -1 ||
560                         (offset + size > monitorstart &&
561                         (monitorend == -1 || offset <= monitorend))))))
562                 prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
563                     offset, offset + size - 1, size);
564         ret = lseek(fd, (off_t)offset, SEEK_SET);
565         if (ret == (off_t)-1) {
566                 prterr("doread: lseek");
567                 report_failure(140);
568         }
569         iret = fsxread(fd, temp_buf, size, offset);
570         if (iret != size) {
571                 if (iret == -1)
572                         prterr("doread: read");
573                 else
574                         prt("short read: 0x%x bytes instead of 0x%x\n",
575                             iret, size);
576                 report_failure(141);
577         }
578         check_buffers(offset, size);
579 }
580
581
582 void
583 check_eofpage(char *s, unsigned offset, char *p, int size)
584 {
585         unsigned long last_page, should_be_zero;
586
587         if (offset + size <= (file_size & ~page_mask))
588                 return;
589         /*
590          * we landed in the last page of the file
591          * test to make sure the VM system provided 0's 
592          * beyond the true end of the file mapping
593          * (as required by mmap def in 1996 posix 1003.1)
594          */
595         last_page = ((unsigned long)p + (offset & page_mask) + size) & ~page_mask;
596
597         for (should_be_zero = last_page + (file_size & page_mask);
598              should_be_zero < last_page + page_size;
599              should_be_zero++)
600                 if (*(char *)should_be_zero) {
601                         prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
602                             s, file_size - 1, should_be_zero & page_mask,
603                             short_at(should_be_zero));
604                         report_failure(205);
605                 }
606 }
607
608
609 void
610 domapread(unsigned offset, unsigned size)
611 {
612         unsigned pg_offset;
613         unsigned map_size;
614         char    *p;
615
616         offset -= offset % readbdy;
617         if (size == 0) {
618                 if (!quiet && testcalls > simulatedopcount)
619                         prt("skipping zero size read\n");
620                 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
621                 return;
622         }
623         if (size + offset > file_size) {
624                 if (!quiet && testcalls > simulatedopcount)
625                         prt("skipping seek/read past end of file\n");
626                 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
627                 return;
628         }
629
630         log4(OP_MAPREAD, offset, size, 0);
631
632         if (testcalls <= simulatedopcount)
633                 return;
634
635         if (!quiet &&
636                 ((progressinterval && testcalls % progressinterval == 0) ||
637                        (debug &&
638                        (monitorstart == -1 ||
639                         (offset + size > monitorstart &&
640                         (monitorend == -1 || offset <= monitorend))))))
641                 prt("%lu mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
642                     offset, offset + size - 1, size);
643
644         pg_offset = offset & PAGE_MASK;
645         map_size  = pg_offset + size;
646
647         if ((p = (char *)mmap(0, map_size, PROT_READ, MAP_SHARED, fd,
648                               (off_t)(offset - pg_offset))) == (char *)-1) {
649                 prterr("domapread: mmap");
650                 report_failure(190);
651         }
652         memcpy(temp_buf, p + pg_offset, size);
653
654         check_eofpage("Read", offset, p, size);
655
656         if (munmap(p, map_size) != 0) {
657                 prterr("domapread: munmap");
658                 report_failure(191);
659         }
660
661         check_buffers(offset, size);
662 }
663
664
665 void
666 gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
667 {
668         while (size--) {
669                 good_buf[offset] = testcalls % 256; 
670                 if (offset % 2)
671                         good_buf[offset] += original_buf[offset];
672                 offset++;
673         }
674 }
675
676
677 void
678 dowrite(unsigned offset, unsigned size)
679 {
680         off_t ret;
681         unsigned iret;
682
683         offset -= offset % writebdy;
684         if (o_direct)
685                 size -= size % writebdy;
686         if (size == 0) {
687                 if (!quiet && testcalls > simulatedopcount && !o_direct)
688                         prt("skipping zero size write\n");
689                 log4(OP_SKIPPED, OP_WRITE, offset, size);
690                 return;
691         }
692
693         log4(OP_WRITE, offset, size, file_size);
694
695         gendata(original_buf, good_buf, offset, size);
696         if (file_size < offset + size) {
697                 if (file_size < offset)
698                         memset(good_buf + file_size, '\0', offset - file_size);
699                 file_size = offset + size;
700                 if (lite) {
701                         warn("Lite file size bug in fsx!");
702                         report_failure(149);
703                 }
704         }
705
706         if (testcalls <= simulatedopcount)
707                 return;
708
709         if (!quiet &&
710                 ((progressinterval && testcalls % progressinterval == 0) ||
711                        (debug &&
712                        (monitorstart == -1 ||
713                         (offset + size > monitorstart &&
714                         (monitorend == -1 || offset <= monitorend))))))
715                 prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
716                     offset, offset + size - 1, size);
717         ret = lseek(fd, (off_t)offset, SEEK_SET);
718         if (ret == (off_t)-1) {
719                 prterr("dowrite: lseek");
720                 report_failure(150);
721         }
722         iret = fsxwrite(fd, good_buf + offset, size, offset);
723         if (iret != size) {
724                 if (iret == -1)
725                         prterr("dowrite: write");
726                 else
727                         prt("short write: 0x%x bytes instead of 0x%x\n",
728                             iret, size);
729                 report_failure(151);
730         }
731         if (do_fsync) {
732                 if (fsync(fd)) {
733                         prt("fsync() failed: %s\n", strerror(errno));
734                         report_failure(152);
735                 }
736         }
737         if (flush) {
738                 doflush(offset, size);
739         }
740 }
741
742
743 void
744 domapwrite(unsigned offset, unsigned size)
745 {
746         unsigned pg_offset;
747         unsigned map_size;
748         off_t    cur_filesize;
749         char    *p;
750
751         offset -= offset % writebdy;
752         if (size == 0) {
753                 if (!quiet && testcalls > simulatedopcount)
754                         prt("skipping zero size write\n");
755                 log4(OP_SKIPPED, OP_MAPWRITE, offset, size);
756                 return;
757         }
758         cur_filesize = file_size;
759
760         log4(OP_MAPWRITE, offset, size, 0);
761
762         gendata(original_buf, good_buf, offset, size);
763         if (file_size < offset + size) {
764                 if (file_size < offset)
765                         memset(good_buf + file_size, '\0', offset - file_size);
766                 file_size = offset + size;
767                 if (lite) {
768                         warn("Lite file size bug in fsx!");
769                         report_failure(200);
770                 }
771         }
772
773         if (testcalls <= simulatedopcount)
774                 return;
775
776         if (!quiet &&
777                 ((progressinterval && testcalls % progressinterval == 0) ||
778                        (debug &&
779                        (monitorstart == -1 ||
780                         (offset + size > monitorstart &&
781                         (monitorend == -1 || offset <= monitorend))))))
782                 prt("%lu mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
783                     offset, offset + size - 1, size);
784
785         if (file_size > cur_filesize) {
786                 if (ftruncate(fd, file_size) == -1) {
787                         prterr("domapwrite: ftruncate");
788                         exit(201);
789                 }
790         }
791         pg_offset = offset & PAGE_MASK;
792         map_size  = pg_offset + size;
793
794         if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
795                               MAP_FILE | MAP_SHARED, fd,
796                               (off_t)(offset - pg_offset))) == (char *)-1) {
797                 prterr("domapwrite: mmap");
798                 report_failure(202);
799         }
800         memcpy(p + pg_offset, good_buf + offset, size);
801         if (msync(p, map_size, MS_SYNC) != 0) {
802                 prterr("domapwrite: msync");
803                 report_failure(203);
804         }
805
806         check_eofpage("Write", offset, p, size);
807
808         if (munmap(p, map_size) != 0) {
809                 prterr("domapwrite: munmap");
810                 report_failure(204);
811         }
812 }
813
814
815 void
816 dotruncate(unsigned size)
817 {
818         int oldsize = file_size;
819
820         size -= size % truncbdy;
821         if (size > biggest) {
822                 biggest = size;
823                 if (!quiet && testcalls > simulatedopcount)
824                         prt("truncating to largest ever: 0x%x\n", size);
825         }
826
827         log4(OP_TRUNCATE, size, (unsigned)file_size, 0);
828
829         if (size > file_size)
830                 memset(good_buf + file_size, '\0', size - file_size);
831         file_size = size;
832
833         if (testcalls <= simulatedopcount)
834                 return;
835         
836         if ((progressinterval && testcalls % progressinterval == 0) ||
837             (debug && (monitorstart == -1 || monitorend == -1 ||
838                       size <= monitorend)))
839                 prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
840         if (ftruncate(fd, (off_t)size) == -1) {
841                 prt("ftruncate1: %x\n", size);
842                 prterr("dotruncate: ftruncate");
843                 report_failure(160);
844         }
845 }
846
847 #ifdef FALLOC_FL_PUNCH_HOLE
848 void
849 do_punch_hole(unsigned offset, unsigned length)
850 {
851         unsigned end_offset;
852         int max_offset = 0;
853         int max_len = 0;
854         int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
855
856         if (length == 0) {
857                 if (!quiet && testcalls > simulatedopcount)
858                         prt("skipping zero length punch hole\n");
859                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
860                 return;
861         }
862
863         if (file_size <= (loff_t)offset) {
864                 if (!quiet && testcalls > simulatedopcount)
865                         prt("skipping hole punch off the end of the file\n");
866                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
867                 return;
868         }
869
870         end_offset = offset + length;
871
872         log4(OP_PUNCH_HOLE, offset, length, 0);
873
874         if (testcalls <= simulatedopcount)
875                 return;
876
877         if ((progressinterval && testcalls % progressinterval == 0) ||
878             (debug && (monitorstart == -1 || monitorend == -1 ||
879                       end_offset <= monitorend))) {
880                 prt("%lu punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
881                         offset, offset+length, length);
882         }
883         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
884                 prt("%punch hole: %x to %x\n", offset, length);
885                 prterr("do_punch_hole: fallocate");
886                 report_failure(161);
887         }
888
889
890         max_offset = offset < file_size ? offset : file_size;
891         max_len = max_offset + length <= file_size ? length :
892                         file_size - max_offset;
893         memset(good_buf + max_offset, '\0', max_len);
894 }
895
896 #else
897 void
898 do_punch_hole(unsigned offset, unsigned length)
899 {
900         return;
901 }
902 #endif
903
904 #ifdef FALLOC_FL_ZERO_RANGE
905 void
906 do_zero_range(unsigned offset, unsigned length)
907 {
908         unsigned end_offset;
909         int mode = FALLOC_FL_ZERO_RANGE;
910         int keep_size;
911
912         if (length == 0) {
913                 if (!quiet && testcalls > simulatedopcount)
914                         prt("skipping zero length zero range\n");
915                         log4(OP_SKIPPED, OP_ZERO_RANGE, offset, length);
916                 return;
917         }
918
919         keep_size = random() % 2;
920
921         end_offset = keep_size ? 0 : offset + length;
922
923         if (end_offset > biggest) {
924                 biggest = end_offset;
925                 if (!quiet && testcalls > simulatedopcount)
926                         prt("zero_range to largest ever: 0x%x\n", end_offset);
927         }
928
929         /*
930          * last arg matches fallocate string array index in logdump:
931          *      0: allocate past EOF
932          *      1: extending prealloc
933          *      2: interior prealloc
934          */
935         log4(OP_ZERO_RANGE, offset, length, (end_offset > file_size) ? (keep_size ? 0 : 1) : 2);
936
937         if (testcalls <= simulatedopcount)
938                 return;
939
940         if ((progressinterval && testcalls % progressinterval == 0) ||
941             (debug && (monitorstart == -1 || monitorend == -1 ||
942                       end_offset <= monitorend))) {
943                 prt("%lu zero\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
944                         offset, offset+length, length);
945         }
946         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
947                 prt("%pzero range: %x to %x\n", offset, length);
948                 prterr("do_zero_range: fallocate");
949                 report_failure(161);
950         }
951
952         memset(good_buf + offset, '\0', length);
953 }
954
955 #else
956 void
957 do_zero_range(unsigned offset, unsigned length)
958 {
959         return;
960 }
961 #endif
962
963 #ifdef FALLOC_FL_COLLAPSE_RANGE
964 void
965 do_collapse_range(unsigned offset, unsigned length)
966 {
967         unsigned end_offset;
968         int mode = FALLOC_FL_COLLAPSE_RANGE;
969
970         if (length == 0) {
971                 if (!quiet && testcalls > simulatedopcount)
972                         prt("skipping zero length collapse range\n");
973                 log4(OP_SKIPPED, OP_COLLAPSE_RANGE, offset, length);
974                 return;
975         }
976
977         end_offset = offset + length;
978         if ((loff_t)end_offset >= file_size) {
979                 if (!quiet && testcalls > simulatedopcount)
980                         prt("skipping collapse range behind EOF\n");
981                 log4(OP_SKIPPED, OP_COLLAPSE_RANGE, offset, length);
982                 return;
983         }
984
985         log4(OP_COLLAPSE_RANGE, offset, length, 0);
986
987         if (testcalls <= simulatedopcount)
988                 return;
989
990         if ((progressinterval && testcalls % progressinterval == 0) ||
991             (debug && (monitorstart == -1 || monitorend == -1 ||
992                       end_offset <= monitorend))) {
993                 prt("%lu collapse\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
994                         offset, offset+length, length);
995         }
996         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
997                 prt("collapse range: %x to %x\n", offset, length);
998                 prterr("do_collapse_range: fallocate");
999                 report_failure(161);
1000         }
1001
1002         memmove(good_buf + offset, good_buf + end_offset,
1003                 file_size - end_offset);
1004         file_size -= length;
1005 }
1006
1007 #else
1008 void
1009 do_collapse_range(unsigned offset, unsigned length)
1010 {
1011         return;
1012 }
1013 #endif
1014
1015 #ifdef HAVE_LINUX_FALLOC_H
1016 /* fallocate is basically a no-op unless extending, then a lot like a truncate */
1017 void
1018 do_preallocate(unsigned offset, unsigned length)
1019 {
1020         unsigned end_offset;
1021         int keep_size;
1022
1023         if (length == 0) {
1024                 if (!quiet && testcalls > simulatedopcount)
1025                         prt("skipping zero length fallocate\n");
1026                 log4(OP_SKIPPED, OP_FALLOCATE, offset, length);
1027                 return;
1028         }
1029
1030         keep_size = random() % 2;
1031
1032         end_offset = keep_size ? 0 : offset + length;
1033
1034         if (end_offset > biggest) {
1035                 biggest = end_offset;
1036                 if (!quiet && testcalls > simulatedopcount)
1037                         prt("fallocating to largest ever: 0x%x\n", end_offset);
1038         }
1039
1040         /*
1041          * last arg matches fallocate string array index in logdump:
1042          *      0: allocate past EOF
1043          *      1: extending prealloc
1044          *      2: interior prealloc
1045          */
1046         log4(OP_FALLOCATE, offset, length, (end_offset > file_size) ? (keep_size ? 0 : 1) : 2);
1047
1048         if (end_offset > file_size) {
1049                 memset(good_buf + file_size, '\0', end_offset - file_size);
1050                 file_size = end_offset;
1051         }
1052
1053         if (testcalls <= simulatedopcount)
1054                 return;
1055         
1056         if ((progressinterval && testcalls % progressinterval == 0) ||
1057             (debug && (monitorstart == -1 || monitorend == -1 ||
1058                       end_offset <= monitorend)))
1059                 prt("%lu falloc\tfrom 0x%x to 0x%x (0x%x bytes)\n", testcalls,
1060                                 offset, offset + length, length);
1061         if (fallocate(fd, keep_size ? FALLOC_FL_KEEP_SIZE : 0, (loff_t)offset, (loff_t)length) == -1) {
1062                 prt("fallocate: %x to %x\n", offset, length);
1063                 prterr("do_preallocate: fallocate");
1064                 report_failure(161);
1065         }
1066 }
1067 #else
1068 void
1069 do_preallocate(unsigned offset, unsigned length)
1070 {
1071         return;
1072 }
1073 #endif
1074
1075 void
1076 writefileimage()
1077 {
1078         ssize_t iret;
1079
1080         if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
1081                 prterr("writefileimage: lseek");
1082                 report_failure(171);
1083         }
1084         iret = write(fd, good_buf, file_size);
1085         if ((off_t)iret != file_size) {
1086                 if (iret == -1)
1087                         prterr("writefileimage: write");
1088                 else
1089                         prt("short write: 0x%x bytes instead of 0x%llx\n",
1090                             iret, (unsigned long long)file_size);
1091                 report_failure(172);
1092         }
1093         if (lite ? 0 : ftruncate(fd, file_size) == -1) {
1094                 prt("ftruncate2: %llx\n", (unsigned long long)file_size);
1095                 prterr("writefileimage: ftruncate");
1096                 report_failure(173);
1097         }
1098 }
1099
1100
1101 void
1102 docloseopen(void)
1103
1104         if (testcalls <= simulatedopcount)
1105                 return;
1106
1107         if (debug)
1108                 prt("%lu close/open\n", testcalls);
1109         if (close(fd)) {
1110                 prterr("docloseopen: close");
1111                 report_failure(180);
1112         }
1113         fd = open(fname, O_RDWR|o_direct, 0);
1114         if (fd < 0) {
1115                 prterr("docloseopen: open");
1116                 report_failure(181);
1117         }
1118 }
1119
1120 #define TRIM_OFF_LEN(off, len, size)    \
1121 do {                                    \
1122         if (size)                       \
1123                 (off) %= (size);        \
1124         else                            \
1125                 (off) = 0;              \
1126         if ((off) + (len) > (size))     \
1127                 (len) = (size) - (off); \
1128 } while (0)
1129
1130 void
1131 test(void)
1132 {
1133         unsigned long   offset;
1134         unsigned long   size = maxoplen;
1135         unsigned long   rv = random();
1136         unsigned long   op;
1137
1138         if (simulatedopcount > 0 && testcalls == simulatedopcount)
1139                 writefileimage();
1140
1141         testcalls++;
1142
1143         if (closeprob)
1144                 closeopen = (rv >> 3) < (1 << 28) / closeprob;
1145
1146         if (debugstart > 0 && testcalls >= debugstart)
1147                 debug = 1;
1148
1149         if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
1150                 prt("%lu...\n", testcalls);
1151
1152         offset = random();
1153         if (randomoplen)
1154                 size = random() % (maxoplen + 1);
1155
1156         /* calculate appropriate op to run */
1157         if (lite)
1158                 op = rv % OP_MAX_LITE;
1159         else
1160                 op = rv % OP_MAX_FULL;
1161
1162         switch (op) {
1163         case OP_MAPREAD:
1164                 if (!mapped_reads)
1165                         op = OP_READ;
1166                 break;
1167         case OP_MAPWRITE:
1168                 if (!mapped_writes)
1169                         op = OP_WRITE;
1170                 break;
1171         case OP_FALLOCATE:
1172                 if (!fallocate_calls) {
1173                         log4(OP_SKIPPED, OP_FALLOCATE, offset, size);
1174                         goto out;
1175                 }
1176                 break;
1177         case OP_PUNCH_HOLE:
1178                 if (!punch_hole_calls) {
1179                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, size);
1180                         goto out;
1181                 }
1182                 break;
1183         case OP_ZERO_RANGE:
1184                 if (!zero_range_calls) {
1185                         log4(OP_SKIPPED, OP_ZERO_RANGE, offset, size);
1186                         goto out;
1187                 }
1188                 break;
1189         case OP_COLLAPSE_RANGE:
1190                 if (!collapse_range_calls) {
1191                         log4(OP_SKIPPED, OP_COLLAPSE_RANGE, offset, size);
1192                         goto out;
1193                 }
1194                 break;
1195         }
1196
1197         switch (op) {
1198         case OP_READ:
1199                 TRIM_OFF_LEN(offset, size, file_size);
1200                 doread(offset, size);
1201                 break;
1202
1203         case OP_WRITE:
1204                 TRIM_OFF_LEN(offset, size, maxfilelen);
1205                 dowrite(offset, size);
1206                 break;
1207
1208         case OP_MAPREAD:
1209                 TRIM_OFF_LEN(offset, size, file_size);
1210                 domapread(offset, size);
1211                 break;
1212
1213         case OP_MAPWRITE:
1214                 TRIM_OFF_LEN(offset, size, maxfilelen);
1215                 domapwrite(offset, size);
1216                 break;
1217
1218         case OP_TRUNCATE:
1219                 if (!style)
1220                         size = random() % maxfilelen;
1221                 dotruncate(size);
1222                 break;
1223
1224         case OP_FALLOCATE:
1225                 TRIM_OFF_LEN(offset, size, maxfilelen);
1226                 do_preallocate(offset, size);
1227                 break;
1228
1229         case OP_PUNCH_HOLE:
1230                 TRIM_OFF_LEN(offset, size, file_size);
1231                 do_punch_hole(offset, size);
1232                 break;
1233         case OP_ZERO_RANGE:
1234                 TRIM_OFF_LEN(offset, size, file_size);
1235                 do_zero_range(offset, size);
1236                 break;
1237         case OP_COLLAPSE_RANGE:
1238                 TRIM_OFF_LEN(offset, size, file_size - 1);
1239                 offset = offset & ~(block_size - 1);
1240                 size = size & ~(block_size - 1);
1241                 if (size == 0) {
1242                         log4(OP_SKIPPED, OP_COLLAPSE_RANGE, offset, size);
1243                         goto out;
1244                 }
1245                 do_collapse_range(offset, size);
1246                 break;
1247         default:
1248                 prterr("test: unknown operation");
1249                 report_failure(42);
1250                 break;
1251         }
1252
1253 out:
1254         if (sizechecks && testcalls > simulatedopcount)
1255                 check_size();
1256         if (closeopen)
1257                 docloseopen();
1258 }
1259
1260
1261 void
1262 cleanup(sig)
1263         int     sig;
1264 {
1265         if (sig)
1266                 prt("signal %d\n", sig);
1267         prt("testcalls = %lu\n", testcalls);
1268         exit(sig);
1269 }
1270
1271
1272 void
1273 usage(void)
1274 {
1275         fprintf(stdout, "usage: %s",
1276                 "fsx [-dnqxAFLOWZ] [-b opnum] [-c Prob] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
1277         -b opnum: beginning operation number (default 1)\n\
1278         -c P: 1 in P chance of file close+open at each op (default infinity)\n\
1279         -d: debug output for all operations\n\
1280         -f flush and invalidate cache after I/O\n\
1281         -l flen: the upper bound on file size (default 262144)\n\
1282         -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
1283         -n: no verifications of file size\n\
1284         -o oplen: the upper bound on operation size (default 65536)\n\
1285         -p progressinterval: debug output at specified operation interval\n\
1286         -q: quieter operation\n\
1287         -r readbdy: 4096 would make reads page aligned (default 1)\n\
1288         -s style: 1 gives smaller truncates (default 0)\n\
1289         -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
1290         -w writebdy: 4096 would make writes page aligned (default 1)\n\
1291         -x: preallocate file space before starting, XFS only (default 0)\n\
1292         -y synchronize changes to a file\n"
1293
1294 #ifdef AIO
1295 "       -A: Use the AIO system calls\n"
1296 #endif
1297 "       -D startingop: debug output starting at specified operation\n"
1298 #ifdef HAVE_LINUX_FALLOC_H
1299 "       -F: Do not use fallocate (preallocation) calls\n"
1300 #endif
1301 #ifdef FALLOC_FL_PUNCH_HOLE
1302 "       -H: Do not use punch hole calls\n"
1303 #endif
1304 #ifdef FALLOC_FL_ZERO_RANGE
1305 "       -z: Do not use zero range calls\n"
1306 #endif
1307 #ifdef FALLOC_FL_COLLAPSE_RANGE
1308 "       -C: Do not use collapse range calls\n"
1309 #endif
1310 "       -L: fsxLite - no file creations & no file size changes\n\
1311         -N numops: total # operations to do (default infinity)\n\
1312         -O: use oplen (see -o flag) for every op (default random)\n\
1313         -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
1314         -S seed: for random # generator (default 1) 0 gets timestamp\n\
1315         -W: mapped write operations DISabled\n\
1316         -R: read() system calls only (mapped reads disabled)\n\
1317         -Z: O_DIRECT (use -R, -W, -r and -w too)\n\
1318         fname: this filename is REQUIRED (no default)\n");
1319         exit(90);
1320 }
1321
1322
1323 int
1324 getnum(char *s, char **e)
1325 {
1326         int ret;
1327
1328         *e = (char *) 0;
1329         ret = strtol(s, e, 0);
1330         if (*e)
1331                 switch (**e) {
1332                 case 'b':
1333                 case 'B':
1334                         ret *= 512;
1335                         *e = *e + 1;
1336                         break;
1337                 case 'k':
1338                 case 'K':
1339                         ret *= 1024;
1340                         *e = *e + 1;
1341                         break;
1342                 case 'm':
1343                 case 'M':
1344                         ret *= 1024*1024;
1345                         *e = *e + 1;
1346                         break;
1347                 case 'w':
1348                 case 'W':
1349                         ret *= 4;
1350                         *e = *e + 1;
1351                         break;
1352                 }
1353         return (ret);
1354 }
1355
1356 #ifdef AIO
1357
1358 #define QSZ     1024
1359 io_context_t    io_ctx;
1360 struct iocb     iocb;
1361
1362 int aio_setup()
1363 {
1364         int ret;
1365         ret = io_queue_init(QSZ, &io_ctx);
1366         if (ret != 0) {
1367                 fprintf(stderr, "aio_setup: io_queue_init failed: %s\n",
1368                         strerror(ret));
1369                 return(-1);
1370         }
1371         return(0);
1372 }
1373
1374 int
1375 __aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
1376 {
1377         struct io_event event;
1378         static struct timespec ts;
1379         struct iocb *iocbs[] = { &iocb };
1380         int ret;
1381         long res;
1382
1383         if (rw == READ) {
1384                 io_prep_pread(&iocb, fd, buf, len, offset);
1385         } else {
1386                 io_prep_pwrite(&iocb, fd, buf, len, offset);
1387         }
1388
1389         ts.tv_sec = 30;
1390         ts.tv_nsec = 0;
1391         ret = io_submit(io_ctx, 1, iocbs);
1392         if (ret != 1) {
1393                 fprintf(stderr, "errcode=%d\n", ret);
1394                 fprintf(stderr, "aio_rw: io_submit failed: %s\n",
1395                                 strerror(ret));
1396                 goto out_error;
1397         }
1398
1399         ret = io_getevents(io_ctx, 1, 1, &event, &ts);
1400         if (ret != 1) {
1401                 if (ret == 0)
1402                         fprintf(stderr, "aio_rw: no events available\n");
1403                 else {
1404                         fprintf(stderr, "errcode=%d\n", -ret);
1405                         fprintf(stderr, "aio_rw: io_getevents failed: %s\n",
1406                                         strerror(-ret));
1407                 }
1408                 goto out_error;
1409         }
1410         if (len != event.res) {
1411                 /*
1412                  * The b0rked libaio defines event.res as unsigned.
1413                  * However the kernel strucuture has it signed,
1414                  * and it's used to pass negated error value.
1415                  * Till the library is fixed use the temp var.
1416                  */
1417                 res = (long)event.res;
1418                 if (res >= 0)
1419                         fprintf(stderr, "bad io length: %lu instead of %u\n",
1420                                         res, len);
1421                 else {
1422                         fprintf(stderr, "errcode=%ld\n", -res);
1423                         fprintf(stderr, "aio_rw: async io failed: %s\n",
1424                                         strerror(-res));
1425                         ret = res;
1426                         goto out_error;
1427                 }
1428
1429         }
1430         return event.res;
1431
1432 out_error:
1433         /*
1434          * The caller expects error return in traditional libc
1435          * convention, i.e. -1 and the errno set to error.
1436          */
1437         errno = -ret;
1438         return -1;
1439 }
1440
1441 int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
1442 {
1443         int ret;
1444
1445         if (aio) {
1446                 ret = __aio_rw(rw, fd, buf, len, offset);
1447         } else {
1448                 if (rw == READ)
1449                         ret = read(fd, buf, len);
1450                 else
1451                         ret = write(fd, buf, len);
1452         }
1453         return ret;
1454 }
1455
1456 #endif
1457
1458 int
1459 test_fallocate(int mode)
1460 {
1461 #ifdef HAVE_LINUX_FALLOC_H
1462         int ret = 0;
1463         if (!lite) {
1464                 if (fallocate(fd, mode, 0, 1) && errno == EOPNOTSUPP) {
1465                         if(!quiet)
1466                                 warn("main: filesystem does not support "
1467                                      "fallocate mode 0x%x, disabling!\n", mode);
1468                 } else {
1469                         ret = 1;
1470                         ftruncate(fd, 0);
1471                 }
1472         }
1473         return ret;
1474 #endif
1475 }
1476
1477 int
1478 main(int argc, char **argv)
1479 {
1480         int     i, style, ch;
1481         char    *endp;
1482         char goodfile[1024];
1483         char logfile[1024];
1484         struct stat statbuf;
1485
1486         goodfile[0] = 0;
1487         logfile[0] = 0;
1488
1489         page_size = getpagesize();
1490         page_mask = page_size - 1;
1491         mmap_mask = page_mask;
1492         
1493
1494         setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
1495
1496         while ((ch = getopt(argc, argv, "b:c:dfl:m:no:p:qr:s:t:w:xyAD:FHzCLN:OP:RS:WZ"))
1497                != EOF)
1498                 switch (ch) {
1499                 case 'b':
1500                         simulatedopcount = getnum(optarg, &endp);
1501                         if (!quiet)
1502                                 fprintf(stdout, "Will begin at operation %ld\n",
1503                                         simulatedopcount);
1504                         if (simulatedopcount == 0)
1505                                 usage();
1506                         simulatedopcount -= 1;
1507                         break;
1508                 case 'c':
1509                         closeprob = getnum(optarg, &endp);
1510                         if (!quiet)
1511                                 fprintf(stdout,
1512                                         "Chance of close/open is 1 in %d\n",
1513                                         closeprob);
1514                         if (closeprob <= 0)
1515                                 usage();
1516                         break;
1517                 case 'd':
1518                         debug = 1;
1519                         break;
1520                 case 'f':
1521                         flush = 1;
1522                         break;
1523                 case 'l':
1524                         maxfilelen = getnum(optarg, &endp);
1525                         if (maxfilelen <= 0)
1526                                 usage();
1527                         break;
1528                 case 'm':
1529                         monitorstart = getnum(optarg, &endp);
1530                         if (monitorstart < 0)
1531                                 usage();
1532                         if (!endp || *endp++ != ':')
1533                                 usage();
1534                         monitorend = getnum(endp, &endp);
1535                         if (monitorend < 0)
1536                                 usage();
1537                         if (monitorend == 0)
1538                                 monitorend = -1; /* aka infinity */
1539                         debug = 1;
1540                 case 'n':
1541                         sizechecks = 0;
1542                         break;
1543                 case 'o':
1544                         maxoplen = getnum(optarg, &endp);
1545                         if (maxoplen <= 0)
1546                                 usage();
1547                         break;
1548                 case 'p':
1549                         progressinterval = getnum(optarg, &endp);
1550                         if (progressinterval == 0)
1551                                 usage();
1552                         break;
1553                 case 'q':
1554                         quiet = 1;
1555                         break;
1556                 case 'r':
1557                         readbdy = getnum(optarg, &endp);
1558                         if (readbdy <= 0)
1559                                 usage();
1560                         break;
1561                 case 's':
1562                         style = getnum(optarg, &endp);
1563                         if (style < 0 || style > 1)
1564                                 usage();
1565                         break;
1566                 case 't':
1567                         truncbdy = getnum(optarg, &endp);
1568                         if (truncbdy <= 0)
1569                                 usage();
1570                         break;
1571                 case 'w':
1572                         writebdy = getnum(optarg, &endp);
1573                         if (writebdy <= 0)
1574                                 usage();
1575                         break;
1576                 case 'x':
1577                         prealloc = 1;
1578                         break;
1579                 case 'y':
1580                         do_fsync = 1;
1581                         break;
1582                 case 'A':
1583                         aio = 1;
1584                         break;
1585                 case 'D':
1586                         debugstart = getnum(optarg, &endp);
1587                         if (debugstart < 1)
1588                                 usage();
1589                         break;
1590                 case 'F':
1591                         fallocate_calls = 0;
1592                         break;
1593                 case 'H':
1594                         punch_hole_calls = 0;
1595                         break;
1596                 case 'z':
1597                         zero_range_calls = 0;
1598                         break;
1599                 case 'C':
1600                         collapse_range_calls = 0;
1601                         break;
1602                 case 'L':
1603                         lite = 1;
1604                         break;
1605                 case 'N':
1606                         numops = getnum(optarg, &endp);
1607                         if (numops < 0)
1608                                 usage();
1609                         break;
1610                 case 'O':
1611                         randomoplen = 0;
1612                         break;
1613                 case 'P':
1614                         strncpy(goodfile, optarg, sizeof(goodfile));
1615                         strcat(goodfile, "/");
1616                         strncpy(logfile, optarg, sizeof(logfile));
1617                         strcat(logfile, "/");
1618                         break;
1619                 case 'R':
1620                         mapped_reads = 0;
1621                         break;
1622                 case 'S':
1623                         seed = getnum(optarg, &endp);
1624                         if (seed == 0)
1625                                 seed = time(0) % 10000;
1626                         if (!quiet)
1627                                 fprintf(stdout, "Seed set to %d\n", seed);
1628                         if (seed < 0)
1629                                 usage();
1630                         break;
1631                 case 'W':
1632                         mapped_writes = 0;
1633                         if (!quiet)
1634                                 fprintf(stdout, "mapped writes DISABLED\n");
1635                         break;
1636                 case 'Z':
1637                         o_direct = O_DIRECT;
1638                         break;
1639                 default:
1640                         usage();
1641                         /* NOTREACHED */
1642                 }
1643         argc -= optind;
1644         argv += optind;
1645         if (argc != 1)
1646                 usage();
1647         fname = argv[0];
1648
1649         signal(SIGHUP,  cleanup);
1650         signal(SIGINT,  cleanup);
1651         signal(SIGPIPE, cleanup);
1652         signal(SIGALRM, cleanup);
1653         signal(SIGTERM, cleanup);
1654         signal(SIGXCPU, cleanup);
1655         signal(SIGXFSZ, cleanup);
1656         signal(SIGVTALRM,       cleanup);
1657         signal(SIGUSR1, cleanup);
1658         signal(SIGUSR2, cleanup);
1659
1660         initstate(seed, state, 256);
1661         setstate(state);
1662         fd = open(fname,
1663                 O_RDWR|(lite ? 0 : O_CREAT|O_TRUNC)|o_direct, 0666);
1664         if (fd < 0) {
1665                 prterr(fname);
1666                 exit(91);
1667         }
1668         if (fstat(fd, &statbuf)) {
1669                 prterr("check_size: fstat");
1670                 exit(91);
1671         }
1672         block_size = statbuf.st_blksize;
1673 #ifdef XFS
1674         if (prealloc) {
1675                 xfs_flock64_t   resv = { 0 };
1676 #ifdef HAVE_XFS_PLATFORM_DEFS_H
1677                 if (!platform_test_xfs_fd(fd)) {
1678                         prterr(fname);
1679                         fprintf(stderr, "main: cannot prealloc, non XFS\n");
1680                         exit(96);
1681                 }
1682 #endif
1683                 resv.l_len = maxfilelen;
1684                 if ((xfsctl(fname, fd, XFS_IOC_RESVSP, &resv)) < 0) {
1685                         prterr(fname);
1686                         exit(97);
1687                 }
1688         }
1689 #endif
1690         strncat(goodfile, fname, 256);
1691         strcat (goodfile, ".fsxgood");
1692         fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
1693         if (fsxgoodfd < 0) {
1694                 prterr(goodfile);
1695                 exit(92);
1696         }
1697         strncat(logfile, fname, 256);
1698         strcat (logfile, ".fsxlog");
1699         fsxlogf = fopen(logfile, "w");
1700         if (fsxlogf == NULL) {
1701                 prterr(logfile);
1702                 exit(93);
1703         }
1704
1705 #ifdef AIO
1706         if (aio) 
1707                 aio_setup();
1708 #endif
1709
1710         if (lite) {
1711                 off_t ret;
1712                 file_size = maxfilelen = lseek(fd, (off_t)0, SEEK_END);
1713                 if (file_size == (off_t)-1) {
1714                         prterr(fname);
1715                         warn("main: lseek eof");
1716                         exit(94);
1717                 }
1718                 ret = lseek(fd, (off_t)0, SEEK_SET);
1719                 if (ret == (off_t)-1) {
1720                         prterr(fname);
1721                         warn("main: lseek 0");
1722                         exit(95);
1723                 }
1724         }
1725         original_buf = (char *) malloc(maxfilelen);
1726         for (i = 0; i < maxfilelen; i++)
1727                 original_buf[i] = random() % 256;
1728         good_buf = (char *) malloc(maxfilelen + writebdy);
1729         good_buf = round_ptr_up(good_buf, writebdy, 0);
1730         memset(good_buf, '\0', maxfilelen);
1731         temp_buf = (char *) malloc(maxoplen + readbdy);
1732         temp_buf = round_ptr_up(temp_buf, readbdy, 0);
1733         memset(temp_buf, '\0', maxoplen);
1734         if (lite) {     /* zero entire existing file */
1735                 ssize_t written;
1736
1737                 written = write(fd, good_buf, (size_t)maxfilelen);
1738                 if (written != maxfilelen) {
1739                         if (written == -1) {
1740                                 prterr(fname);
1741                                 warn("main: error on write");
1742                         } else
1743                                 warn("main: short write, 0x%x bytes instead "
1744                                         "of 0x%lx\n",
1745                                         (unsigned)written,
1746                                         maxfilelen);
1747                         exit(98);
1748                 }
1749         } else 
1750                 check_trunc_hack();
1751
1752         if (fallocate_calls)
1753                 fallocate_calls = test_fallocate(0);
1754         if (punch_hole_calls)
1755                 punch_hole_calls = test_fallocate(FALLOC_FL_PUNCH_HOLE |
1756                                                   FALLOC_FL_KEEP_SIZE);
1757         if (zero_range_calls)
1758                 zero_range_calls = test_fallocate(FALLOC_FL_ZERO_RANGE);
1759         if (collapse_range_calls)
1760                 collapse_range_calls = test_fallocate(FALLOC_FL_COLLAPSE_RANGE);
1761
1762         while (numops == -1 || numops--)
1763                 test();
1764
1765         if (close(fd)) {
1766                 prterr("close");
1767                 report_failure(99);
1768         }
1769         prt("All operations completed A-OK!\n");
1770
1771         exit(0);
1772         return 0;
1773 }