Add fallocate calls to fsx
[xfstests-dev.git] / ltp / fsx.c
1 /*
2  *      Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
3  *
4  *      File:   fsx.c
5  *      Author: Avadis Tevanian, Jr.
6  *
7  *      File system exerciser. 
8  *
9  *      Rewritten 8/98 by Conrad Minshall.
10  *
11  *      Small changes to work under Linux -- davej.
12  *
13  *      Checks for mmap last-page zero fill.
14  */
15
16 #include "global.h"
17
18 #include <limits.h>
19 #include <time.h>
20 #include <strings.h>
21 #include <sys/file.h>
22 #include <sys/mman.h>
23 #ifdef HAVE_ERR_H
24 #include <err.h>
25 #endif
26 #include <signal.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <stdarg.h>
31 #include <errno.h>
32 #ifdef AIO
33 #include <libaio.h>
34 #endif
35 #ifdef FALLOCATE
36 #include <linux/falloc.h>
37 #endif
38
39 #ifndef MAP_FILE
40 # define MAP_FILE 0
41 #endif
42
43 #define NUMPRINTCOLUMNS 32      /* # columns of data to print on each line */
44
45 /*
46  *      A log entry is an operation and a bunch of arguments.
47  */
48
49 struct log_entry {
50         int     operation;
51         int     args[3];
52 };
53
54 #define LOGSIZE 1000
55
56 struct log_entry        oplog[LOGSIZE]; /* the log */
57 int                     logptr = 0;     /* current position in log */
58 int                     logcount = 0;   /* total ops */
59
60 /*
61  *      Define operations
62  */
63
64 #define OP_READ         1
65 #define OP_WRITE        2
66 #define OP_TRUNCATE     3
67 #define OP_CLOSEOPEN    4
68 #define OP_MAPREAD      5
69 #define OP_MAPWRITE     6
70 #define OP_SKIPPED      7
71 #define OP_FALLOCATE    8
72
73 #undef PAGE_SIZE
74 #define PAGE_SIZE       getpagesize()
75 #undef PAGE_MASK
76 #define PAGE_MASK       (PAGE_SIZE - 1)
77
78 char    *original_buf;                  /* a pointer to the original data */
79 char    *good_buf;                      /* a pointer to the correct data */
80 char    *temp_buf;                      /* a pointer to the current data */
81 char    *fname;                         /* name of our test file */
82 int     fd;                             /* fd for our test file */
83
84 off_t           file_size = 0;
85 off_t           biggest = 0;
86 char            state[256];
87 unsigned long   testcalls = 0;          /* calls to function "test" */
88
89 unsigned long   simulatedopcount = 0;   /* -b flag */
90 int     closeprob = 0;                  /* -c flag */
91 int     debug = 0;                      /* -d flag */
92 unsigned long   debugstart = 0;         /* -D flag */
93 int     flush = 0;                      /* -f flag */
94 int     do_fsync = 0;                   /* -y flag */
95 unsigned long   maxfilelen = 256 * 1024;        /* -l flag */
96 int     sizechecks = 1;                 /* -n flag disables them */
97 int     maxoplen = 64 * 1024;           /* -o flag */
98 int     quiet = 0;                      /* -q flag */
99 unsigned long progressinterval = 0;     /* -p flag */
100 int     readbdy = 1;                    /* -r flag */
101 int     style = 0;                      /* -s flag */
102 int     prealloc = 0;                   /* -x flag */
103 int     truncbdy = 1;                   /* -t flag */
104 int     writebdy = 1;                   /* -w flag */
105 long    monitorstart = -1;              /* -m flag */
106 long    monitorend = -1;                /* -m flag */
107 int     lite = 0;                       /* -L flag */
108 long    numops = -1;                    /* -N flag */
109 int     randomoplen = 1;                /* -O flag disables it */
110 int     seed = 1;                       /* -S flag */
111 int     mapped_writes = 1;              /* -W flag disables */
112 int     fallocate_calls = 1;            /* -F flag disables */
113 int     mapped_reads = 1;               /* -R flag disables it */
114 int     fsxgoodfd = 0;
115 int     o_direct;                       /* -Z */
116 int     aio = 0;
117
118 int page_size;
119 int page_mask;
120 int mmap_mask;
121 #ifdef AIO
122 int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
123 #define READ 0
124 #define WRITE 1
125 #define fsxread(a,b,c,d)        aio_rw(READ, a,b,c,d)
126 #define fsxwrite(a,b,c,d)       aio_rw(WRITE, a,b,c,d)
127 #else
128 #define fsxread(a,b,c,d)        read(a,b,c)
129 #define fsxwrite(a,b,c,d)       write(a,b,c)
130 #endif
131
132 FILE *  fsxlogf = NULL;
133 int badoff = -1;
134 int closeopen = 0;
135
136 static void *round_up(void *ptr, unsigned long align, unsigned long offset)
137 {
138         unsigned long ret = (unsigned long)ptr;
139
140         ret = ((ret + align - 1) & ~(align - 1));
141         ret += offset;
142         return (void *)ret;
143 }
144
145 void
146 vwarnc(int code, const char *fmt, va_list ap) {
147   fprintf(stderr, "fsx: ");
148   if (fmt != NULL) {
149         vfprintf(stderr, fmt, ap);
150         fprintf(stderr, ": ");
151   }
152   fprintf(stderr, "%s\n", strerror(code));
153 }
154
155 void
156 warn(const char * fmt, ...)  {
157         va_list ap;
158         va_start(ap, fmt);
159         vwarnc(errno, fmt, ap);
160         va_end(ap);
161 }
162
163 #define BUF_SIZE 1024
164
165 void
166 prt(char *fmt, ...)
167 {
168         va_list args;
169         char buffer[BUF_SIZE];
170
171         va_start(args, fmt);
172         vsnprintf(buffer, BUF_SIZE, fmt, args);
173         va_end(args);
174         fprintf(stdout, buffer);
175         if (fsxlogf)
176                 fprintf(fsxlogf, buffer);
177 }
178
179 void
180 prterr(char *prefix)
181 {
182         prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno));
183 }
184
185
186 void
187 log4(int operation, int arg0, int arg1, int arg2)
188 {
189         struct log_entry *le;
190
191         le = &oplog[logptr];
192         le->operation = operation;
193         if (closeopen)
194                 le->operation = ~ le->operation;
195         le->args[0] = arg0;
196         le->args[1] = arg1;
197         le->args[2] = arg2;
198         logptr++;
199         logcount++;
200         if (logptr >= LOGSIZE)
201                 logptr = 0;
202 }
203
204
205 void
206 logdump(void)
207 {
208         int     i, count, down;
209         struct log_entry        *lp;
210         char *falloc_type[3] = {"PAST_EOF", "EXTENDING", "INTERIOR"};
211
212         prt("LOG DUMP (%d total operations):\n", logcount);
213         if (logcount < LOGSIZE) {
214                 i = 0;
215                 count = logcount;
216         } else {
217                 i = logptr;
218                 count = LOGSIZE;
219         }
220         for ( ; count > 0; count--) {
221                 int opnum;
222
223                 opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE;
224                 prt("%d(%d mod 256): ", opnum, opnum%256);
225                 lp = &oplog[i];
226                 if ((closeopen = lp->operation < 0))
227                         lp->operation = ~ lp->operation;
228                         
229                 switch (lp->operation) {
230                 case OP_MAPREAD:
231                         prt("MAPREAD\t0x%x thru 0x%x\t(0x%x bytes)",
232                             lp->args[0], lp->args[0] + lp->args[1] - 1,
233                             lp->args[1]);
234                         if (badoff >= lp->args[0] && badoff <
235                                                      lp->args[0] + lp->args[1])
236                                 prt("\t***RRRR***");
237                         break;
238                 case OP_MAPWRITE:
239                         prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)",
240                             lp->args[0], lp->args[0] + lp->args[1] - 1,
241                             lp->args[1]);
242                         if (badoff >= lp->args[0] && badoff <
243                                                      lp->args[0] + lp->args[1])
244                                 prt("\t******WWWW");
245                         break;
246                 case OP_READ:
247                         prt("READ\t0x%x thru 0x%x\t(0x%x bytes)",
248                             lp->args[0], lp->args[0] + lp->args[1] - 1,
249                             lp->args[1]);
250                         if (badoff >= lp->args[0] &&
251                             badoff < lp->args[0] + lp->args[1])
252                                 prt("\t***RRRR***");
253                         break;
254                 case OP_WRITE:
255                         prt("WRITE\t0x%x thru 0x%x\t(0x%x bytes)",
256                             lp->args[0], lp->args[0] + lp->args[1] - 1,
257                             lp->args[1]);
258                         if (lp->args[0] > lp->args[2])
259                                 prt(" HOLE");
260                         else if (lp->args[0] + lp->args[1] > lp->args[2])
261                                 prt(" EXTEND");
262                         if ((badoff >= lp->args[0] || badoff >=lp->args[2]) &&
263                             badoff < lp->args[0] + lp->args[1])
264                                 prt("\t***WWWW");
265                         break;
266                 case OP_TRUNCATE:
267                         down = lp->args[0] < lp->args[1];
268                         prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
269                             down ? "DOWN" : "UP", lp->args[1], lp->args[0]);
270                         if (badoff >= lp->args[!down] &&
271                             badoff < lp->args[!!down])
272                                 prt("\t******WWWW");
273                         break;
274                 case OP_FALLOCATE:
275                         /* 0: offset 1: length 2: where alloced */
276                         prt("FALLOCATE %s\tfrom 0x%x to 0x%x",
277                             falloc_type[lp->args[2]], lp->args[0], lp->args[0] + lp->args[1]);
278                         if (badoff >= lp->args[0] &&
279                             badoff < lp->args[0] + lp->args[1])
280                                 prt("\t******FFFF");
281                         break;
282                 case OP_SKIPPED:
283                         prt("SKIPPED (no operation)");
284                         break;
285                 default:
286                         prt("BOGUS LOG ENTRY (operation code = %d)!",
287                             lp->operation);
288                 }
289                 if (closeopen)
290                         prt("\n\t\tCLOSE/OPEN");
291                 prt("\n");
292                 i++;
293                 if (i == LOGSIZE)
294                         i = 0;
295         }
296 }
297
298
299 void
300 save_buffer(char *buffer, off_t bufferlength, int fd)
301 {
302         off_t ret;
303         ssize_t byteswritten;
304
305         if (fd <= 0 || bufferlength == 0)
306                 return;
307
308         if (bufferlength > SSIZE_MAX) {
309                 prt("fsx flaw: overflow in save_buffer\n");
310                 exit(67);
311         }
312         if (lite) {
313                 off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END);
314                 if (size_by_seek == (off_t)-1)
315                         prterr("save_buffer: lseek eof");
316                 else if (bufferlength > size_by_seek) {
317                         warn("save_buffer: .fsxgood file too short... will save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
318                              (unsigned long long)bufferlength);
319                         bufferlength = size_by_seek;
320                 }
321         }
322
323         ret = lseek(fd, (off_t)0, SEEK_SET);
324         if (ret == (off_t)-1)
325                 prterr("save_buffer: lseek 0");
326         
327         byteswritten = write(fd, buffer, (size_t)bufferlength);
328         if (byteswritten != bufferlength) {
329                 if (byteswritten == -1)
330                         prterr("save_buffer write");
331                 else
332                         warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
333                              (unsigned)byteswritten,
334                              (unsigned long long)bufferlength);
335         }
336 }
337
338
339 void
340 report_failure(int status)
341 {
342         logdump();
343         
344         if (fsxgoodfd) {
345                 if (good_buf) {
346                         save_buffer(good_buf, file_size, fsxgoodfd);
347                         prt("Correct content saved for comparison\n");
348                         prt("(maybe hexdump \"%s\" vs \"%s.fsxgood\")\n",
349                             fname, fname);
350                 }
351                 close(fsxgoodfd);
352         }
353         exit(status);
354 }
355
356
357 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
358                                         *(((unsigned char *)(cp)) + 1)))
359
360 void
361 check_buffers(unsigned offset, unsigned size)
362 {
363         unsigned char c, t;
364         unsigned i = 0;
365         unsigned n = 0;
366         unsigned op = 0;
367         unsigned bad = 0;
368
369         if (memcmp(good_buf + offset, temp_buf, size) != 0) {
370                 prt("READ BAD DATA: offset = 0x%x, size = 0x%x, fname = %s\n",
371                     offset, size, fname);
372                 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
373                 while (size > 0) {
374                         c = good_buf[offset];
375                         t = temp_buf[i];
376                         if (c != t) {
377                                 if (n < 16) {
378                                         bad = short_at(&temp_buf[i]);
379                                         prt("0x%5x\t0x%04x\t0x%04x", offset,
380                                             short_at(&good_buf[offset]), bad);
381                                         op = temp_buf[offset & 1 ? i+1 : i];
382                                         prt("\t0x%5x\n", n);
383                                         if (op)
384                                                 prt("operation# (mod 256) for "
385                                                   "the bad data may be %u\n",
386                                                 ((unsigned)op & 0xff));
387                                         else
388                                                 prt("operation# (mod 256) for "
389                                                   "the bad data unknown, check"
390                                                   " HOLE and EXTEND ops\n");
391                                 }
392                                 n++;
393                                 badoff = offset;
394                         }
395                         offset++;
396                         i++;
397                         size--;
398                 }
399                 report_failure(110);
400         }
401 }
402
403
404 void
405 check_size(void)
406 {
407         struct stat     statbuf;
408         off_t   size_by_seek;
409
410         if (fstat(fd, &statbuf)) {
411                 prterr("check_size: fstat");
412                 statbuf.st_size = -1;
413         }
414         size_by_seek = lseek(fd, (off_t)0, SEEK_END);
415         if (file_size != statbuf.st_size || file_size != size_by_seek) {
416                 prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n",
417                     (unsigned long long)file_size,
418                     (unsigned long long)statbuf.st_size,
419                     (unsigned long long)size_by_seek);
420                 report_failure(120);
421         }
422 }
423
424
425 void
426 check_trunc_hack(void)
427 {
428         struct stat statbuf;
429
430         ftruncate(fd, (off_t)0);
431         ftruncate(fd, (off_t)100000);
432         fstat(fd, &statbuf);
433         if (statbuf.st_size != (off_t)100000) {
434                 prt("no extend on truncate! not posix!\n");
435                 exit(130);
436         }
437         ftruncate(fd, 0);
438 }
439
440 void
441 doflush(unsigned offset, unsigned size)
442 {
443         unsigned pg_offset;
444         unsigned map_size;
445         char    *p;
446
447         if (o_direct == O_DIRECT)
448                 return;
449
450         pg_offset = offset & mmap_mask;
451         map_size  = pg_offset + size;
452
453         if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
454                               MAP_FILE | MAP_SHARED, fd,
455                               (off_t)(offset - pg_offset))) == (char *)-1) {
456                 prterr("doflush: mmap");
457                 report_failure(202);
458         }
459         if (msync(p, map_size, MS_INVALIDATE) != 0) {
460                 prterr("doflush: msync");
461                 report_failure(203);
462         }
463         if (munmap(p, map_size) != 0) {
464                 prterr("doflush: munmap");
465                 report_failure(204);
466         }
467 }
468
469 void
470 doread(unsigned offset, unsigned size)
471 {
472         off_t ret;
473         unsigned iret;
474
475         offset -= offset % readbdy;
476         if (o_direct)
477                 size -= size % readbdy;
478         if (size == 0) {
479                 if (!quiet && testcalls > simulatedopcount && !o_direct)
480                         prt("skipping zero size read\n");
481                 log4(OP_SKIPPED, OP_READ, offset, size);
482                 return;
483         }
484         if (size + offset > file_size) {
485                 if (!quiet && testcalls > simulatedopcount)
486                         prt("skipping seek/read past end of file\n");
487                 log4(OP_SKIPPED, OP_READ, offset, size);
488                 return;
489         }
490
491         log4(OP_READ, offset, size, 0);
492
493         if (testcalls <= simulatedopcount)
494                 return;
495
496         if (!quiet &&
497                 ((progressinterval && testcalls % progressinterval == 0)  ||
498                 (debug &&
499                        (monitorstart == -1 ||
500                         (offset + size > monitorstart &&
501                         (monitorend == -1 || offset <= monitorend))))))
502                 prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
503                     offset, offset + size - 1, size);
504         ret = lseek(fd, (off_t)offset, SEEK_SET);
505         if (ret == (off_t)-1) {
506                 prterr("doread: lseek");
507                 report_failure(140);
508         }
509         iret = fsxread(fd, temp_buf, size, offset);
510         if (iret != size) {
511                 if (iret == -1)
512                         prterr("doread: read");
513                 else
514                         prt("short read: 0x%x bytes instead of 0x%x\n",
515                             iret, size);
516                 report_failure(141);
517         }
518         check_buffers(offset, size);
519 }
520
521
522 void
523 check_eofpage(char *s, unsigned offset, char *p, int size)
524 {
525         unsigned long last_page, should_be_zero;
526
527         if (offset + size <= (file_size & ~page_mask))
528                 return;
529         /*
530          * we landed in the last page of the file
531          * test to make sure the VM system provided 0's 
532          * beyond the true end of the file mapping
533          * (as required by mmap def in 1996 posix 1003.1)
534          */
535         last_page = ((unsigned long)p + (offset & page_mask) + size) & ~page_mask;
536
537         for (should_be_zero = last_page + (file_size & page_mask);
538              should_be_zero < last_page + page_size;
539              should_be_zero++)
540                 if (*(char *)should_be_zero) {
541                         prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
542                             s, file_size - 1, should_be_zero & page_mask,
543                             short_at(should_be_zero));
544                         report_failure(205);
545                 }
546 }
547
548
549 void
550 domapread(unsigned offset, unsigned size)
551 {
552         unsigned pg_offset;
553         unsigned map_size;
554         char    *p;
555
556         offset -= offset % readbdy;
557         if (size == 0) {
558                 if (!quiet && testcalls > simulatedopcount)
559                         prt("skipping zero size read\n");
560                 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
561                 return;
562         }
563         if (size + offset > file_size) {
564                 if (!quiet && testcalls > simulatedopcount)
565                         prt("skipping seek/read past end of file\n");
566                 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
567                 return;
568         }
569
570         log4(OP_MAPREAD, offset, size, 0);
571
572         if (testcalls <= simulatedopcount)
573                 return;
574
575         if (!quiet &&
576                 ((progressinterval && testcalls % progressinterval == 0) ||
577                        (debug &&
578                        (monitorstart == -1 ||
579                         (offset + size > monitorstart &&
580                         (monitorend == -1 || offset <= monitorend))))))
581                 prt("%lu mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
582                     offset, offset + size - 1, size);
583
584         pg_offset = offset & PAGE_MASK;
585         map_size  = pg_offset + size;
586
587         if ((p = (char *)mmap(0, map_size, PROT_READ, MAP_SHARED, fd,
588                               (off_t)(offset - pg_offset))) == (char *)-1) {
589                 prterr("domapread: mmap");
590                 report_failure(190);
591         }
592         memcpy(temp_buf, p + pg_offset, size);
593
594         check_eofpage("Read", offset, p, size);
595
596         if (munmap(p, map_size) != 0) {
597                 prterr("domapread: munmap");
598                 report_failure(191);
599         }
600
601         check_buffers(offset, size);
602 }
603
604
605 void
606 gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
607 {
608         while (size--) {
609                 good_buf[offset] = testcalls % 256; 
610                 if (offset % 2)
611                         good_buf[offset] += original_buf[offset];
612                 offset++;
613         }
614 }
615
616
617 void
618 dowrite(unsigned offset, unsigned size)
619 {
620         off_t ret;
621         unsigned iret;
622
623         offset -= offset % writebdy;
624         if (o_direct)
625                 size -= size % writebdy;
626         if (size == 0) {
627                 if (!quiet && testcalls > simulatedopcount && !o_direct)
628                         prt("skipping zero size write\n");
629                 log4(OP_SKIPPED, OP_WRITE, offset, size);
630                 return;
631         }
632
633         log4(OP_WRITE, offset, size, file_size);
634
635         gendata(original_buf, good_buf, offset, size);
636         if (file_size < offset + size) {
637                 if (file_size < offset)
638                         memset(good_buf + file_size, '\0', offset - file_size);
639                 file_size = offset + size;
640                 if (lite) {
641                         warn("Lite file size bug in fsx!");
642                         report_failure(149);
643                 }
644         }
645
646         if (testcalls <= simulatedopcount)
647                 return;
648
649         if (!quiet &&
650                 ((progressinterval && testcalls % progressinterval == 0) ||
651                        (debug &&
652                        (monitorstart == -1 ||
653                         (offset + size > monitorstart &&
654                         (monitorend == -1 || offset <= monitorend))))))
655                 prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
656                     offset, offset + size - 1, size);
657         ret = lseek(fd, (off_t)offset, SEEK_SET);
658         if (ret == (off_t)-1) {
659                 prterr("dowrite: lseek");
660                 report_failure(150);
661         }
662         iret = fsxwrite(fd, good_buf + offset, size, offset);
663         if (iret != size) {
664                 if (iret == -1)
665                         prterr("dowrite: write");
666                 else
667                         prt("short write: 0x%x bytes instead of 0x%x\n",
668                             iret, size);
669                 report_failure(151);
670         }
671         if (do_fsync) {
672                 if (fsync(fd)) {
673                         prt("fsync() failed: %s\n", strerror(errno));
674                         report_failure(152);
675                 }
676         }
677         if (flush) {
678                 doflush(offset, size);
679         }
680 }
681
682
683 void
684 domapwrite(unsigned offset, unsigned size)
685 {
686         unsigned pg_offset;
687         unsigned map_size;
688         off_t    cur_filesize;
689         char    *p;
690
691         offset -= offset % writebdy;
692         if (size == 0) {
693                 if (!quiet && testcalls > simulatedopcount)
694                         prt("skipping zero size write\n");
695                 log4(OP_SKIPPED, OP_MAPWRITE, offset, size);
696                 return;
697         }
698         cur_filesize = file_size;
699
700         log4(OP_MAPWRITE, offset, size, 0);
701
702         gendata(original_buf, good_buf, offset, size);
703         if (file_size < offset + size) {
704                 if (file_size < offset)
705                         memset(good_buf + file_size, '\0', offset - file_size);
706                 file_size = offset + size;
707                 if (lite) {
708                         warn("Lite file size bug in fsx!");
709                         report_failure(200);
710                 }
711         }
712
713         if (testcalls <= simulatedopcount)
714                 return;
715
716         if (!quiet &&
717                 ((progressinterval && testcalls % progressinterval == 0) ||
718                        (debug &&
719                        (monitorstart == -1 ||
720                         (offset + size > monitorstart &&
721                         (monitorend == -1 || offset <= monitorend))))))
722                 prt("%lu mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
723                     offset, offset + size - 1, size);
724
725         if (file_size > cur_filesize) {
726                 if (ftruncate(fd, file_size) == -1) {
727                         prterr("domapwrite: ftruncate");
728                         exit(201);
729                 }
730         }
731         pg_offset = offset & PAGE_MASK;
732         map_size  = pg_offset + size;
733
734         if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
735                               MAP_FILE | MAP_SHARED, fd,
736                               (off_t)(offset - pg_offset))) == (char *)-1) {
737                 prterr("domapwrite: mmap");
738                 report_failure(202);
739         }
740         memcpy(p + pg_offset, good_buf + offset, size);
741         if (msync(p, map_size, 0) != 0) {
742                 prterr("domapwrite: msync");
743                 report_failure(203);
744         }
745
746         check_eofpage("Write", offset, p, size);
747
748         if (munmap(p, map_size) != 0) {
749                 prterr("domapwrite: munmap");
750                 report_failure(204);
751         }
752 }
753
754
755 void
756 dotruncate(unsigned size)
757 {
758         int oldsize = file_size;
759
760         size -= size % truncbdy;
761         if (size > biggest) {
762                 biggest = size;
763                 if (!quiet && testcalls > simulatedopcount)
764                         prt("truncating to largest ever: 0x%x\n", size);
765         }
766
767         log4(OP_TRUNCATE, size, (unsigned)file_size, 0);
768
769         if (size > file_size)
770                 memset(good_buf + file_size, '\0', size - file_size);
771         file_size = size;
772
773         if (testcalls <= simulatedopcount)
774                 return;
775         
776         if ((progressinterval && testcalls % progressinterval == 0) ||
777             (debug && (monitorstart == -1 || monitorend == -1 ||
778                       size <= monitorend)))
779                 prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
780         if (ftruncate(fd, (off_t)size) == -1) {
781                 prt("ftruncate1: %x\n", size);
782                 prterr("dotruncate: ftruncate");
783                 report_failure(160);
784         }
785 }
786
787 #ifdef FALLOCATE
788 /* fallocate is basically a no-op unless extending, then a lot like a truncate */
789 void
790 dofallocate(unsigned offset, unsigned length)
791 {
792         unsigned end_offset;
793         int keep_size;
794
795         if (length == 0) {
796                 if (!quiet && testcalls > simulatedopcount)
797                         prt("skipping zero length fallocate\n");
798                 log4(OP_SKIPPED, OP_FALLOCATE, offset, length);
799                 return;
800         }
801
802         keep_size = random() % 2;
803
804         end_offset = keep_size ? 0 : offset + length;
805
806         if (end_offset > biggest) {
807                 biggest = end_offset;
808                 if (!quiet && testcalls > simulatedopcount)
809                         prt("fallocating to largest ever: 0x%x\n", end_offset);
810         }
811
812         /*
813          * last arg:
814          *      1: allocate past EOF
815          *      2: extending prealloc
816          *      3: interior prealloc
817          */
818         log4(OP_FALLOCATE, offset, length, (end_offset > file_size) ? (keep_size ? 1 : 2) : 3);
819
820         if (end_offset > file_size) {
821                 memset(good_buf + file_size, '\0', end_offset - file_size);
822                 file_size = end_offset;
823         }
824
825         if (testcalls <= simulatedopcount)
826                 return;
827         
828         if ((progressinterval && testcalls % progressinterval == 0) ||
829             (debug && (monitorstart == -1 || monitorend == -1 ||
830                       end_offset <= monitorend)))
831                 prt("%lu falloc\tfrom 0x%x to 0x%x\n", testcalls, offset, length);
832         if (fallocate(fd, keep_size ? FALLOC_FL_KEEP_SIZE : 0, (loff_t)offset, (loff_t)length) == -1) {
833                 prt("fallocate: %x to %x\n", offset, length);
834                 prterr("dofallocate: fallocate");
835                 report_failure(161);
836         }
837 }
838 #else
839 void
840 dofallocate(unsigned offset, unsigned length)
841 {
842         return;
843 }
844 #endif
845
846 void
847 writefileimage()
848 {
849         ssize_t iret;
850
851         if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
852                 prterr("writefileimage: lseek");
853                 report_failure(171);
854         }
855         iret = write(fd, good_buf, file_size);
856         if ((off_t)iret != file_size) {
857                 if (iret == -1)
858                         prterr("writefileimage: write");
859                 else
860                         prt("short write: 0x%x bytes instead of 0x%llx\n",
861                             iret, (unsigned long long)file_size);
862                 report_failure(172);
863         }
864         if (lite ? 0 : ftruncate(fd, file_size) == -1) {
865                 prt("ftruncate2: %llx\n", (unsigned long long)file_size);
866                 prterr("writefileimage: ftruncate");
867                 report_failure(173);
868         }
869 }
870
871
872 void
873 docloseopen(void)
874
875         if (testcalls <= simulatedopcount)
876                 return;
877
878         if (debug)
879                 prt("%lu close/open\n", testcalls);
880         if (close(fd)) {
881                 prterr("docloseopen: close");
882                 report_failure(180);
883         }
884         fd = open(fname, O_RDWR|o_direct, 0);
885         if (fd < 0) {
886                 prterr("docloseopen: open");
887                 report_failure(181);
888         }
889 }
890
891
892 void
893 test(void)
894 {
895         unsigned long   offset;
896         unsigned long   size = maxoplen;
897         unsigned long   rv = random();
898         unsigned long   op = rv % (3 + !lite + mapped_writes + fallocate_calls);
899
900         /* turn off the map read if necessary */
901
902         if (op == 2 && !mapped_reads)
903             op = 0;
904
905         if (simulatedopcount > 0 && testcalls == simulatedopcount)
906                 writefileimage();
907
908         testcalls++;
909
910         if (closeprob)
911                 closeopen = (rv >> 3) < (1 << 28) / closeprob;
912
913         if (debugstart > 0 && testcalls >= debugstart)
914                 debug = 1;
915
916         if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
917                 prt("%lu...\n", testcalls);
918
919         /*
920          *                 lite  !lite
921          * READ:        op = 0     0
922          * WRITE:       op = 1     1
923          * MAPREAD:     op = 2     2
924          * TRUNCATE:    op = -     3
925          * MAPWRITE:    op = 3     4
926          * FALLOCATE:   op = -     5
927          */
928         if (lite ? 0 : op == 3 && (style & 1) == 0) /* vanilla truncate? */
929                 dotruncate(random() % maxfilelen);
930         else {
931                 if (randomoplen)
932                         size = random() % (maxoplen+1);
933
934                 if (lite ? 0 : op == 3) {
935                         /* truncate */
936                         dotruncate(size);
937                 } else {
938                         offset = random();
939                         if (op == 5) {
940                                 /* fallocate */
941                                 offset %= maxfilelen;
942                                 if (offset + size > maxfilelen)
943                                         size = maxfilelen - offset;
944                                 dofallocate(offset, size);
945                         } else if (op == 1 || op == (lite ? 3 : 4)) {
946                                 /* write / mapwrite */
947                                 offset %= maxfilelen;
948                                 if (offset + size > maxfilelen)
949                                         size = maxfilelen - offset;
950                                 if (op != 1)
951                                         domapwrite(offset, size);
952                                 else
953                                         dowrite(offset, size);
954                         } else {
955                                 /* read / mapread */
956                                 if (file_size)
957                                         offset %= file_size;
958                                 else
959                                         offset = 0;
960                                 if (offset + size > file_size)
961                                         size = file_size - offset;
962                                 if (op != 0)
963                                         domapread(offset, size);
964                                 else
965                                         doread(offset, size);
966                         }
967                 }
968         }
969         if (sizechecks && testcalls > simulatedopcount)
970                 check_size();
971         if (closeopen)
972                 docloseopen();
973 }
974
975
976 void
977 cleanup(sig)
978         int     sig;
979 {
980         if (sig)
981                 prt("signal %d\n", sig);
982         prt("testcalls = %lu\n", testcalls);
983         exit(sig);
984 }
985
986
987 void
988 usage(void)
989 {
990         fprintf(stdout, "usage: %s",
991                 "fsx [-dnqxAFLOWZ] [-b opnum] [-c Prob] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
992         -b opnum: beginning operation number (default 1)\n\
993         -c P: 1 in P chance of file close+open at each op (default infinity)\n\
994         -d: debug output for all operations\n\
995         -f flush and invalidate cache after I/O\n\
996         -l flen: the upper bound on file size (default 262144)\n\
997         -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
998         -n: no verifications of file size\n\
999         -o oplen: the upper bound on operation size (default 65536)\n\
1000         -p progressinterval: debug output at specified operation interval\n\
1001         -q: quieter operation\n\
1002         -r readbdy: 4096 would make reads page aligned (default 1)\n\
1003         -s style: 1 gives smaller truncates (default 0)\n\
1004         -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
1005         -w writebdy: 4096 would make writes page aligned (default 1)\n\
1006         -x: preallocate file space before starting, XFS only (default 0)\n\
1007         -y synchronize changes to a file\n"
1008
1009 #ifdef AIO
1010 "       -A: Use the AIO system calls\n"
1011 #endif
1012 "       -D startingop: debug output starting at specified operation\n"
1013 #ifdef FALLOCATE
1014 "       -F: Do not use fallocate (preallocation) calls\n"
1015 #endif
1016 "       -L: fsxLite - no file creations & no file size changes\n\
1017         -N numops: total # operations to do (default infinity)\n\
1018         -O: use oplen (see -o flag) for every op (default random)\n\
1019         -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
1020         -S seed: for random # generator (default 1) 0 gets timestamp\n\
1021         -W: mapped write operations DISabled\n\
1022         -R: read() system calls only (mapped reads disabled)\n\
1023         -Z: O_DIRECT (use -R, -W, -r and -w too)\n\
1024         fname: this filename is REQUIRED (no default)\n");
1025         exit(90);
1026 }
1027
1028
1029 int
1030 getnum(char *s, char **e)
1031 {
1032         int ret;
1033
1034         *e = (char *) 0;
1035         ret = strtol(s, e, 0);
1036         if (*e)
1037                 switch (**e) {
1038                 case 'b':
1039                 case 'B':
1040                         ret *= 512;
1041                         *e = *e + 1;
1042                         break;
1043                 case 'k':
1044                 case 'K':
1045                         ret *= 1024;
1046                         *e = *e + 1;
1047                         break;
1048                 case 'm':
1049                 case 'M':
1050                         ret *= 1024*1024;
1051                         *e = *e + 1;
1052                         break;
1053                 case 'w':
1054                 case 'W':
1055                         ret *= 4;
1056                         *e = *e + 1;
1057                         break;
1058                 }
1059         return (ret);
1060 }
1061
1062 #ifdef AIO
1063
1064 #define QSZ     1024
1065 io_context_t    io_ctx;
1066 struct iocb     iocb;
1067
1068 int aio_setup()
1069 {
1070         int ret;
1071         ret = io_queue_init(QSZ, &io_ctx);
1072         if (ret != 0) {
1073                 fprintf(stderr, "aio_setup: io_queue_init failed: %s\n",
1074                         strerror(ret));
1075                 return(-1);
1076         }
1077         return(0);
1078 }
1079
1080 int
1081 __aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
1082 {
1083         struct io_event event;
1084         static struct timespec ts;
1085         struct iocb *iocbs[] = { &iocb };
1086         int ret;
1087         long res;
1088
1089         if (rw == READ) {
1090                 io_prep_pread(&iocb, fd, buf, len, offset);
1091         } else {
1092                 io_prep_pwrite(&iocb, fd, buf, len, offset);
1093         }
1094
1095         ts.tv_sec = 30;
1096         ts.tv_nsec = 0;
1097         ret = io_submit(io_ctx, 1, iocbs);
1098         if (ret != 1) {
1099                 fprintf(stderr, "errcode=%d\n", ret);
1100                 fprintf(stderr, "aio_rw: io_submit failed: %s\n",
1101                                 strerror(ret));
1102                 goto out_error;
1103         }
1104
1105         ret = io_getevents(io_ctx, 1, 1, &event, &ts);
1106         if (ret != 1) {
1107                 if (ret == 0)
1108                         fprintf(stderr, "aio_rw: no events available\n");
1109                 else {
1110                         fprintf(stderr, "errcode=%d\n", -ret);
1111                         fprintf(stderr, "aio_rw: io_getevents failed: %s\n",
1112                                         strerror(-ret));
1113                 }
1114                 goto out_error;
1115         }
1116         if (len != event.res) {
1117                 /*
1118                  * The b0rked libaio defines event.res as unsigned.
1119                  * However the kernel strucuture has it signed,
1120                  * and it's used to pass negated error value.
1121                  * Till the library is fixed use the temp var.
1122                  */
1123                 res = (long)event.res;
1124                 if (res >= 0)
1125                         fprintf(stderr, "bad io length: %lu instead of %u\n",
1126                                         res, len);
1127                 else {
1128                         fprintf(stderr, "errcode=%ld\n", -res);
1129                         fprintf(stderr, "aio_rw: async io failed: %s\n",
1130                                         strerror(-res));
1131                         ret = res;
1132                         goto out_error;
1133                 }
1134
1135         }
1136         return event.res;
1137
1138 out_error:
1139         /*
1140          * The caller expects error return in traditional libc
1141          * convention, i.e. -1 and the errno set to error.
1142          */
1143         errno = -ret;
1144         return -1;
1145 }
1146
1147 int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
1148 {
1149         int ret;
1150
1151         if (aio) {
1152                 ret = __aio_rw(rw, fd, buf, len, offset);
1153         } else {
1154                 if (rw == READ)
1155                         ret = read(fd, buf, len);
1156                 else
1157                         ret = write(fd, buf, len);
1158         }
1159         return ret;
1160 }
1161
1162 #endif
1163
1164 int
1165 main(int argc, char **argv)
1166 {
1167         int     i, style, ch;
1168         char    *endp;
1169         char goodfile[1024];
1170         char logfile[1024];
1171
1172         goodfile[0] = 0;
1173         logfile[0] = 0;
1174
1175         page_size = getpagesize();
1176         page_mask = page_size - 1;
1177         mmap_mask = page_mask;
1178         
1179
1180         setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
1181
1182         while ((ch = getopt(argc, argv, "b:c:dfl:m:no:p:qr:s:t:w:xyAD:FLN:OP:RS:WZ"))
1183                != EOF)
1184                 switch (ch) {
1185                 case 'b':
1186                         simulatedopcount = getnum(optarg, &endp);
1187                         if (!quiet)
1188                                 fprintf(stdout, "Will begin at operation %ld\n",
1189                                         simulatedopcount);
1190                         if (simulatedopcount == 0)
1191                                 usage();
1192                         simulatedopcount -= 1;
1193                         break;
1194                 case 'c':
1195                         closeprob = getnum(optarg, &endp);
1196                         if (!quiet)
1197                                 fprintf(stdout,
1198                                         "Chance of close/open is 1 in %d\n",
1199                                         closeprob);
1200                         if (closeprob <= 0)
1201                                 usage();
1202                         break;
1203                 case 'd':
1204                         debug = 1;
1205                         break;
1206                 case 'f':
1207                         flush = 1;
1208                         break;
1209                 case 'l':
1210                         maxfilelen = getnum(optarg, &endp);
1211                         if (maxfilelen <= 0)
1212                                 usage();
1213                         break;
1214                 case 'm':
1215                         monitorstart = getnum(optarg, &endp);
1216                         if (monitorstart < 0)
1217                                 usage();
1218                         if (!endp || *endp++ != ':')
1219                                 usage();
1220                         monitorend = getnum(endp, &endp);
1221                         if (monitorend < 0)
1222                                 usage();
1223                         if (monitorend == 0)
1224                                 monitorend = -1; /* aka infinity */
1225                         debug = 1;
1226                 case 'n':
1227                         sizechecks = 0;
1228                         break;
1229                 case 'o':
1230                         maxoplen = getnum(optarg, &endp);
1231                         if (maxoplen <= 0)
1232                                 usage();
1233                         break;
1234                 case 'p':
1235                         progressinterval = getnum(optarg, &endp);
1236                         if (progressinterval == 0)
1237                                 usage();
1238                         break;
1239                 case 'q':
1240                         quiet = 1;
1241                         break;
1242                 case 'r':
1243                         readbdy = getnum(optarg, &endp);
1244                         if (readbdy <= 0)
1245                                 usage();
1246                         break;
1247                 case 's':
1248                         style = getnum(optarg, &endp);
1249                         if (style < 0 || style > 1)
1250                                 usage();
1251                         break;
1252                 case 't':
1253                         truncbdy = getnum(optarg, &endp);
1254                         if (truncbdy <= 0)
1255                                 usage();
1256                         break;
1257                 case 'w':
1258                         writebdy = getnum(optarg, &endp);
1259                         if (writebdy <= 0)
1260                                 usage();
1261                         break;
1262                 case 'x':
1263                         prealloc = 1;
1264                         break;
1265                 case 'y':
1266                         do_fsync = 1;
1267                         break;
1268                 case 'A':
1269                         aio = 1;
1270                         break;
1271                 case 'D':
1272                         debugstart = getnum(optarg, &endp);
1273                         if (debugstart < 1)
1274                                 usage();
1275                         break;
1276                 case 'F':
1277                         fallocate_calls = 0;
1278                         break;
1279                 case 'L':
1280                         lite = 1;
1281                         break;
1282                 case 'N':
1283                         numops = getnum(optarg, &endp);
1284                         if (numops < 0)
1285                                 usage();
1286                         break;
1287                 case 'O':
1288                         randomoplen = 0;
1289                         break;
1290                 case 'P':
1291                         strncpy(goodfile, optarg, sizeof(goodfile));
1292                         strcat(goodfile, "/");
1293                         strncpy(logfile, optarg, sizeof(logfile));
1294                         strcat(logfile, "/");
1295                         break;
1296                 case 'R':
1297                         mapped_reads = 0;
1298                         break;
1299                 case 'S':
1300                         seed = getnum(optarg, &endp);
1301                         if (seed == 0)
1302                                 seed = time(0) % 10000;
1303                         if (!quiet)
1304                                 fprintf(stdout, "Seed set to %d\n", seed);
1305                         if (seed < 0)
1306                                 usage();
1307                         break;
1308                 case 'W':
1309                         mapped_writes = 0;
1310                         if (!quiet)
1311                                 fprintf(stdout, "mapped writes DISABLED\n");
1312                         break;
1313                 case 'Z':
1314                         o_direct = O_DIRECT;
1315                         break;
1316                 default:
1317                         usage();
1318                         /* NOTREACHED */
1319                 }
1320         argc -= optind;
1321         argv += optind;
1322         if (argc != 1)
1323                 usage();
1324         fname = argv[0];
1325
1326         signal(SIGHUP,  cleanup);
1327         signal(SIGINT,  cleanup);
1328         signal(SIGPIPE, cleanup);
1329         signal(SIGALRM, cleanup);
1330         signal(SIGTERM, cleanup);
1331         signal(SIGXCPU, cleanup);
1332         signal(SIGXFSZ, cleanup);
1333         signal(SIGVTALRM,       cleanup);
1334         signal(SIGUSR1, cleanup);
1335         signal(SIGUSR2, cleanup);
1336
1337         initstate(seed, state, 256);
1338         setstate(state);
1339         fd = open(fname,
1340                 O_RDWR|(lite ? 0 : O_CREAT|O_TRUNC)|o_direct, 0666);
1341         if (fd < 0) {
1342                 prterr(fname);
1343                 exit(91);
1344         }
1345 #ifdef XFS
1346         if (prealloc) {
1347                 xfs_flock64_t   resv = { 0 };
1348 #ifdef HAVE_XFS_PLATFORM_DEFS_H
1349                 if (!platform_test_xfs_fd(fd)) {
1350                         prterr(fname);
1351                         fprintf(stderr, "main: cannot prealloc, non XFS\n");
1352                         exit(96);
1353                 }
1354 #endif
1355                 resv.l_len = maxfilelen;
1356                 if ((xfsctl(fname, fd, XFS_IOC_RESVSP, &resv)) < 0) {
1357                         prterr(fname);
1358                         exit(97);
1359                 }
1360         }
1361 #endif
1362         strncat(goodfile, fname, 256);
1363         strcat (goodfile, ".fsxgood");
1364         fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
1365         if (fsxgoodfd < 0) {
1366                 prterr(goodfile);
1367                 exit(92);
1368         }
1369         strncat(logfile, fname, 256);
1370         strcat (logfile, ".fsxlog");
1371         fsxlogf = fopen(logfile, "w");
1372         if (fsxlogf == NULL) {
1373                 prterr(logfile);
1374                 exit(93);
1375         }
1376
1377 #ifdef AIO
1378         if (aio) 
1379                 aio_setup();
1380 #endif
1381
1382         if (lite) {
1383                 off_t ret;
1384                 file_size = maxfilelen = lseek(fd, (off_t)0, SEEK_END);
1385                 if (file_size == (off_t)-1) {
1386                         prterr(fname);
1387                         warn("main: lseek eof");
1388                         exit(94);
1389                 }
1390                 ret = lseek(fd, (off_t)0, SEEK_SET);
1391                 if (ret == (off_t)-1) {
1392                         prterr(fname);
1393                         warn("main: lseek 0");
1394                         exit(95);
1395                 }
1396         }
1397         original_buf = (char *) malloc(maxfilelen);
1398         for (i = 0; i < maxfilelen; i++)
1399                 original_buf[i] = random() % 256;
1400         good_buf = (char *) malloc(maxfilelen + writebdy);
1401         good_buf = round_up(good_buf, writebdy, 0);
1402         memset(good_buf, '\0', maxfilelen);
1403         temp_buf = (char *) malloc(maxoplen + readbdy);
1404         temp_buf = round_up(temp_buf, readbdy, 0);
1405         memset(temp_buf, '\0', maxoplen);
1406         if (lite) {     /* zero entire existing file */
1407                 ssize_t written;
1408
1409                 written = write(fd, good_buf, (size_t)maxfilelen);
1410                 if (written != maxfilelen) {
1411                         if (written == -1) {
1412                                 prterr(fname);
1413                                 warn("main: error on write");
1414                         } else
1415                                 warn("main: short write, 0x%x bytes instead "
1416                                         "of 0x%lx\n",
1417                                         (unsigned)written,
1418                                         maxfilelen);
1419                         exit(98);
1420                 }
1421         } else 
1422                 check_trunc_hack();
1423
1424 #ifdef FALLOCATE
1425         if (!lite && fallocate_calls) {
1426                 if (fallocate(fd, 0, 0, 1) && errno == EOPNOTSUPP) {
1427                         warn("main: filesystem does not support fallocate, disabling");
1428                         fallocate_calls = 0;
1429                 } else
1430                         ftruncate(fd, 0);
1431         }
1432 #else /* ! FALLOCATE */
1433         fallocate_calls = 0;
1434 #endif
1435
1436         while (numops == -1 || numops--)
1437                 test();
1438
1439         if (close(fd)) {
1440                 prterr("close");
1441                 report_failure(99);
1442         }
1443         prt("All operations completed A-OK!\n");
1444
1445         exit(0);
1446         return 0;
1447 }