fsx: Add check for mmap last-page zero fill
[xfstests-dev.git] / ltp / fsx.c
1 /*
2  *      Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
3  *
4  *      File:   fsx.c
5  *      Author: Avadis Tevanian, Jr.
6  *
7  *      File system exerciser. 
8  *
9  *      Rewritten 8/98 by Conrad Minshall.
10  *
11  *      Small changes to work under Linux -- davej.
12  *
13  *      Checks for mmap last-page zero fill.
14  */
15
16 #include "global.h"
17
18 #include <limits.h>
19 #include <time.h>
20 #include <strings.h>
21 #include <sys/file.h>
22 #include <sys/mman.h>
23 #ifdef HAVE_ERR_H
24 #include <err.h>
25 #endif
26 #include <signal.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <stdarg.h>
31 #include <errno.h>
32 #ifdef AIO
33 #include <libaio.h>
34 #endif
35
36 #ifndef MAP_FILE
37 # define MAP_FILE 0
38 #endif
39
40 #define NUMPRINTCOLUMNS 32      /* # columns of data to print on each line */
41
42 /*
43  *      A log entry is an operation and a bunch of arguments.
44  */
45
46 struct log_entry {
47         int     operation;
48         int     args[3];
49 };
50
51 #define LOGSIZE 1000
52
53 struct log_entry        oplog[LOGSIZE]; /* the log */
54 int                     logptr = 0;     /* current position in log */
55 int                     logcount = 0;   /* total ops */
56
57 /*
58  *      Define operations
59  */
60
61 #define OP_READ         1
62 #define OP_WRITE        2
63 #define OP_TRUNCATE     3
64 #define OP_CLOSEOPEN    4
65 #define OP_MAPREAD      5
66 #define OP_MAPWRITE     6
67 #define OP_SKIPPED      7
68
69 #undef PAGE_SIZE
70 #define PAGE_SIZE       getpagesize()
71 #undef PAGE_MASK
72 #define PAGE_MASK       (PAGE_SIZE - 1)
73
74 char    *original_buf;                  /* a pointer to the original data */
75 char    *good_buf;                      /* a pointer to the correct data */
76 char    *temp_buf;                      /* a pointer to the current data */
77 char    *fname;                         /* name of our test file */
78 int     fd;                             /* fd for our test file */
79
80 off_t           file_size = 0;
81 off_t           biggest = 0;
82 char            state[256];
83 unsigned long   testcalls = 0;          /* calls to function "test" */
84
85 unsigned long   simulatedopcount = 0;   /* -b flag */
86 int     closeprob = 0;                  /* -c flag */
87 int     debug = 0;                      /* -d flag */
88 unsigned long   debugstart = 0;         /* -D flag */
89 int     flush = 0;                      /* -f flag */
90 int     do_fsync = 0;                   /* -y flag */
91 unsigned long   maxfilelen = 256 * 1024;        /* -l flag */
92 int     sizechecks = 1;                 /* -n flag disables them */
93 int     maxoplen = 64 * 1024;           /* -o flag */
94 int     quiet = 0;                      /* -q flag */
95 unsigned long progressinterval = 0;     /* -p flag */
96 int     readbdy = 1;                    /* -r flag */
97 int     style = 0;                      /* -s flag */
98 int     prealloc = 0;                   /* -x flag */
99 int     truncbdy = 1;                   /* -t flag */
100 int     writebdy = 1;                   /* -w flag */
101 long    monitorstart = -1;              /* -m flag */
102 long    monitorend = -1;                /* -m flag */
103 int     lite = 0;                       /* -L flag */
104 long    numops = -1;                    /* -N flag */
105 int     randomoplen = 1;                /* -O flag disables it */
106 int     seed = 1;                       /* -S flag */
107 int     mapped_writes = 1;              /* -W flag disables */
108 int     mapped_reads = 1;               /* -R flag disables it */
109 int     fsxgoodfd = 0;
110 int     o_direct;                       /* -Z */
111 int     aio = 0;
112
113 int page_size;
114 int page_mask;
115 int mmap_mask;
116 #ifdef AIO
117 int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
118 #define READ 0
119 #define WRITE 1
120 #define fsxread(a,b,c,d)        aio_rw(READ, a,b,c,d)
121 #define fsxwrite(a,b,c,d)       aio_rw(WRITE, a,b,c,d)
122 #else
123 #define fsxread(a,b,c,d)        read(a,b,c)
124 #define fsxwrite(a,b,c,d)       write(a,b,c)
125 #endif
126
127 FILE *  fsxlogf = NULL;
128 int badoff = -1;
129 int closeopen = 0;
130
131 static void *round_up(void *ptr, unsigned long align, unsigned long offset)
132 {
133         unsigned long ret = (unsigned long)ptr;
134
135         ret = ((ret + align - 1) & ~(align - 1));
136         ret += offset;
137         return (void *)ret;
138 }
139
140 void
141 vwarnc(int code, const char *fmt, va_list ap) {
142   fprintf(stderr, "fsx: ");
143   if (fmt != NULL) {
144         vfprintf(stderr, fmt, ap);
145         fprintf(stderr, ": ");
146   }
147   fprintf(stderr, "%s\n", strerror(code));
148 }
149
150 void
151 warn(const char * fmt, ...)  {
152         va_list ap;
153         va_start(ap, fmt);
154         vwarnc(errno, fmt, ap);
155         va_end(ap);
156 }
157
158 #define BUF_SIZE 1024
159
160 void
161 prt(char *fmt, ...)
162 {
163         va_list args;
164         char buffer[BUF_SIZE];
165
166         va_start(args, fmt);
167         vsnprintf(buffer, BUF_SIZE, fmt, args);
168         va_end(args);
169         fprintf(stdout, buffer);
170         if (fsxlogf)
171                 fprintf(fsxlogf, buffer);
172 }
173
174 void
175 prterr(char *prefix)
176 {
177         prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno));
178 }
179
180
181 void
182 log4(int operation, int arg0, int arg1, int arg2)
183 {
184         struct log_entry *le;
185
186         le = &oplog[logptr];
187         le->operation = operation;
188         if (closeopen)
189                 le->operation = ~ le->operation;
190         le->args[0] = arg0;
191         le->args[1] = arg1;
192         le->args[2] = arg2;
193         logptr++;
194         logcount++;
195         if (logptr >= LOGSIZE)
196                 logptr = 0;
197 }
198
199
200 void
201 logdump(void)
202 {
203         int     i, count, down;
204         struct log_entry        *lp;
205
206         prt("LOG DUMP (%d total operations):\n", logcount);
207         if (logcount < LOGSIZE) {
208                 i = 0;
209                 count = logcount;
210         } else {
211                 i = logptr;
212                 count = LOGSIZE;
213         }
214         for ( ; count > 0; count--) {
215                 int opnum;
216
217                 opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE;
218                 prt("%d(%d mod 256): ", opnum, opnum%256);
219                 lp = &oplog[i];
220                 if ((closeopen = lp->operation < 0))
221                         lp->operation = ~ lp->operation;
222                         
223                 switch (lp->operation) {
224                 case OP_MAPREAD:
225                         prt("MAPREAD\t0x%x thru 0x%x\t(0x%x bytes)",
226                             lp->args[0], lp->args[0] + lp->args[1] - 1,
227                             lp->args[1]);
228                         if (badoff >= lp->args[0] && badoff <
229                                                      lp->args[0] + lp->args[1])
230                                 prt("\t***RRRR***");
231                         break;
232                 case OP_MAPWRITE:
233                         prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)",
234                             lp->args[0], lp->args[0] + lp->args[1] - 1,
235                             lp->args[1]);
236                         if (badoff >= lp->args[0] && badoff <
237                                                      lp->args[0] + lp->args[1])
238                                 prt("\t******WWWW");
239                         break;
240                 case OP_READ:
241                         prt("READ\t0x%x thru 0x%x\t(0x%x bytes)",
242                             lp->args[0], lp->args[0] + lp->args[1] - 1,
243                             lp->args[1]);
244                         if (badoff >= lp->args[0] &&
245                             badoff < lp->args[0] + lp->args[1])
246                                 prt("\t***RRRR***");
247                         break;
248                 case OP_WRITE:
249                         prt("WRITE\t0x%x thru 0x%x\t(0x%x bytes)",
250                             lp->args[0], lp->args[0] + lp->args[1] - 1,
251                             lp->args[1]);
252                         if (lp->args[0] > lp->args[2])
253                                 prt(" HOLE");
254                         else if (lp->args[0] + lp->args[1] > lp->args[2])
255                                 prt(" EXTEND");
256                         if ((badoff >= lp->args[0] || badoff >=lp->args[2]) &&
257                             badoff < lp->args[0] + lp->args[1])
258                                 prt("\t***WWWW");
259                         break;
260                 case OP_TRUNCATE:
261                         down = lp->args[0] < lp->args[1];
262                         prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
263                             down ? "DOWN" : "UP", lp->args[1], lp->args[0]);
264                         if (badoff >= lp->args[!down] &&
265                             badoff < lp->args[!!down])
266                                 prt("\t******WWWW");
267                         break;
268                 case OP_SKIPPED:
269                         prt("SKIPPED (no operation)");
270                         break;
271                 default:
272                         prt("BOGUS LOG ENTRY (operation code = %d)!",
273                             lp->operation);
274                 }
275                 if (closeopen)
276                         prt("\n\t\tCLOSE/OPEN");
277                 prt("\n");
278                 i++;
279                 if (i == LOGSIZE)
280                         i = 0;
281         }
282 }
283
284
285 void
286 save_buffer(char *buffer, off_t bufferlength, int fd)
287 {
288         off_t ret;
289         ssize_t byteswritten;
290
291         if (fd <= 0 || bufferlength == 0)
292                 return;
293
294         if (bufferlength > SSIZE_MAX) {
295                 prt("fsx flaw: overflow in save_buffer\n");
296                 exit(67);
297         }
298         if (lite) {
299                 off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END);
300                 if (size_by_seek == (off_t)-1)
301                         prterr("save_buffer: lseek eof");
302                 else if (bufferlength > size_by_seek) {
303                         warn("save_buffer: .fsxgood file too short... will save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
304                              (unsigned long long)bufferlength);
305                         bufferlength = size_by_seek;
306                 }
307         }
308
309         ret = lseek(fd, (off_t)0, SEEK_SET);
310         if (ret == (off_t)-1)
311                 prterr("save_buffer: lseek 0");
312         
313         byteswritten = write(fd, buffer, (size_t)bufferlength);
314         if (byteswritten != bufferlength) {
315                 if (byteswritten == -1)
316                         prterr("save_buffer write");
317                 else
318                         warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
319                              (unsigned)byteswritten,
320                              (unsigned long long)bufferlength);
321         }
322 }
323
324
325 void
326 report_failure(int status)
327 {
328         logdump();
329         
330         if (fsxgoodfd) {
331                 if (good_buf) {
332                         save_buffer(good_buf, file_size, fsxgoodfd);
333                         prt("Correct content saved for comparison\n");
334                         prt("(maybe hexdump \"%s\" vs \"%s.fsxgood\")\n",
335                             fname, fname);
336                 }
337                 close(fsxgoodfd);
338         }
339         exit(status);
340 }
341
342
343 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
344                                         *(((unsigned char *)(cp)) + 1)))
345
346 void
347 check_buffers(unsigned offset, unsigned size)
348 {
349         unsigned char c, t;
350         unsigned i = 0;
351         unsigned n = 0;
352         unsigned op = 0;
353         unsigned bad = 0;
354
355         if (memcmp(good_buf + offset, temp_buf, size) != 0) {
356                 prt("READ BAD DATA: offset = 0x%x, size = 0x%x, fname = %s\n",
357                     offset, size, fname);
358                 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
359                 while (size > 0) {
360                         c = good_buf[offset];
361                         t = temp_buf[i];
362                         if (c != t) {
363                                 if (n < 16) {
364                                         bad = short_at(&temp_buf[i]);
365                                         prt("0x%5x\t0x%04x\t0x%04x", offset,
366                                             short_at(&good_buf[offset]), bad);
367                                         op = temp_buf[offset & 1 ? i+1 : i];
368                                         prt("\t0x%5x\n", n);
369                                         if (op)
370                                                 prt("operation# (mod 256) for "
371                                                   "the bad data may be %u\n",
372                                                 ((unsigned)op & 0xff));
373                                         else
374                                                 prt("operation# (mod 256) for "
375                                                   "the bad data unknown, check"
376                                                   " HOLE and EXTEND ops\n");
377                                 }
378                                 n++;
379                                 badoff = offset;
380                         }
381                         offset++;
382                         i++;
383                         size--;
384                 }
385                 report_failure(110);
386         }
387 }
388
389
390 void
391 check_size(void)
392 {
393         struct stat     statbuf;
394         off_t   size_by_seek;
395
396         if (fstat(fd, &statbuf)) {
397                 prterr("check_size: fstat");
398                 statbuf.st_size = -1;
399         }
400         size_by_seek = lseek(fd, (off_t)0, SEEK_END);
401         if (file_size != statbuf.st_size || file_size != size_by_seek) {
402                 prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n",
403                     (unsigned long long)file_size,
404                     (unsigned long long)statbuf.st_size,
405                     (unsigned long long)size_by_seek);
406                 report_failure(120);
407         }
408 }
409
410
411 void
412 check_trunc_hack(void)
413 {
414         struct stat statbuf;
415
416         ftruncate(fd, (off_t)0);
417         ftruncate(fd, (off_t)100000);
418         fstat(fd, &statbuf);
419         if (statbuf.st_size != (off_t)100000) {
420                 prt("no extend on truncate! not posix!\n");
421                 exit(130);
422         }
423         ftruncate(fd, 0);
424 }
425
426 void
427 doflush(unsigned offset, unsigned size)
428 {
429         unsigned pg_offset;
430         unsigned map_size;
431         char    *p;
432
433         if (o_direct == O_DIRECT)
434                 return;
435
436         pg_offset = offset & mmap_mask;
437         map_size  = pg_offset + size;
438
439         if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
440                               MAP_FILE | MAP_SHARED, fd,
441                               (off_t)(offset - pg_offset))) == (char *)-1) {
442                 prterr("doflush: mmap");
443                 report_failure(202);
444         }
445         if (msync(p, map_size, MS_INVALIDATE) != 0) {
446                 prterr("doflush: msync");
447                 report_failure(203);
448         }
449         if (munmap(p, map_size) != 0) {
450                 prterr("doflush: munmap");
451                 report_failure(204);
452         }
453 }
454
455 void
456 doread(unsigned offset, unsigned size)
457 {
458         off_t ret;
459         unsigned iret;
460
461         offset -= offset % readbdy;
462         if (o_direct)
463                 size -= size % readbdy;
464         if (size == 0) {
465                 if (!quiet && testcalls > simulatedopcount && !o_direct)
466                         prt("skipping zero size read\n");
467                 log4(OP_SKIPPED, OP_READ, offset, size);
468                 return;
469         }
470         if (size + offset > file_size) {
471                 if (!quiet && testcalls > simulatedopcount)
472                         prt("skipping seek/read past end of file\n");
473                 log4(OP_SKIPPED, OP_READ, offset, size);
474                 return;
475         }
476
477         log4(OP_READ, offset, size, 0);
478
479         if (testcalls <= simulatedopcount)
480                 return;
481
482         if (!quiet &&
483                 ((progressinterval && testcalls % progressinterval == 0)  ||
484                 (debug &&
485                        (monitorstart == -1 ||
486                         (offset + size > monitorstart &&
487                         (monitorend == -1 || offset <= monitorend))))))
488                 prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
489                     offset, offset + size - 1, size);
490         ret = lseek(fd, (off_t)offset, SEEK_SET);
491         if (ret == (off_t)-1) {
492                 prterr("doread: lseek");
493                 report_failure(140);
494         }
495         iret = fsxread(fd, temp_buf, size, offset);
496         if (iret != size) {
497                 if (iret == -1)
498                         prterr("doread: read");
499                 else
500                         prt("short read: 0x%x bytes instead of 0x%x\n",
501                             iret, size);
502                 report_failure(141);
503         }
504         check_buffers(offset, size);
505 }
506
507
508 void
509 check_eofpage(char *s, unsigned offset, char *p, int size)
510 {
511         unsigned long last_page, should_be_zero;
512
513         if (offset + size <= (file_size & ~page_mask))
514                 return;
515         /*
516          * we landed in the last page of the file
517          * test to make sure the VM system provided 0's 
518          * beyond the true end of the file mapping
519          * (as required by mmap def in 1996 posix 1003.1)
520          */
521         last_page = ((unsigned long)p + (offset & page_mask) + size) & ~page_mask;
522
523         for (should_be_zero = last_page + (file_size & page_mask);
524              should_be_zero < last_page + page_size;
525              should_be_zero++)
526                 if (*(char *)should_be_zero) {
527                         prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
528                             s, file_size - 1, should_be_zero & page_mask,
529                             short_at(should_be_zero));
530                         report_failure(205);
531                 }
532 }
533
534
535 void
536 domapread(unsigned offset, unsigned size)
537 {
538         unsigned pg_offset;
539         unsigned map_size;
540         char    *p;
541
542         offset -= offset % readbdy;
543         if (size == 0) {
544                 if (!quiet && testcalls > simulatedopcount)
545                         prt("skipping zero size read\n");
546                 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
547                 return;
548         }
549         if (size + offset > file_size) {
550                 if (!quiet && testcalls > simulatedopcount)
551                         prt("skipping seek/read past end of file\n");
552                 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
553                 return;
554         }
555
556         log4(OP_MAPREAD, offset, size, 0);
557
558         if (testcalls <= simulatedopcount)
559                 return;
560
561         if (!quiet &&
562                 ((progressinterval && testcalls % progressinterval == 0) ||
563                        (debug &&
564                        (monitorstart == -1 ||
565                         (offset + size > monitorstart &&
566                         (monitorend == -1 || offset <= monitorend))))))
567                 prt("%lu mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
568                     offset, offset + size - 1, size);
569
570         pg_offset = offset & PAGE_MASK;
571         map_size  = pg_offset + size;
572
573         if ((p = (char *)mmap(0, map_size, PROT_READ, MAP_SHARED, fd,
574                               (off_t)(offset - pg_offset))) == (char *)-1) {
575                 prterr("domapread: mmap");
576                 report_failure(190);
577         }
578         memcpy(temp_buf, p + pg_offset, size);
579
580         check_eofpage("Read", offset, p, size);
581
582         if (munmap(p, map_size) != 0) {
583                 prterr("domapread: munmap");
584                 report_failure(191);
585         }
586
587         check_buffers(offset, size);
588 }
589
590
591 void
592 gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
593 {
594         while (size--) {
595                 good_buf[offset] = testcalls % 256; 
596                 if (offset % 2)
597                         good_buf[offset] += original_buf[offset];
598                 offset++;
599         }
600 }
601
602
603 void
604 dowrite(unsigned offset, unsigned size)
605 {
606         off_t ret;
607         unsigned iret;
608
609         offset -= offset % writebdy;
610         if (o_direct)
611                 size -= size % writebdy;
612         if (size == 0) {
613                 if (!quiet && testcalls > simulatedopcount && !o_direct)
614                         prt("skipping zero size write\n");
615                 log4(OP_SKIPPED, OP_WRITE, offset, size);
616                 return;
617         }
618
619         log4(OP_WRITE, offset, size, file_size);
620
621         gendata(original_buf, good_buf, offset, size);
622         if (file_size < offset + size) {
623                 if (file_size < offset)
624                         memset(good_buf + file_size, '\0', offset - file_size);
625                 file_size = offset + size;
626                 if (lite) {
627                         warn("Lite file size bug in fsx!");
628                         report_failure(149);
629                 }
630         }
631
632         if (testcalls <= simulatedopcount)
633                 return;
634
635         if (!quiet &&
636                 ((progressinterval && testcalls % progressinterval == 0) ||
637                        (debug &&
638                        (monitorstart == -1 ||
639                         (offset + size > monitorstart &&
640                         (monitorend == -1 || offset <= monitorend))))))
641                 prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
642                     offset, offset + size - 1, size);
643         ret = lseek(fd, (off_t)offset, SEEK_SET);
644         if (ret == (off_t)-1) {
645                 prterr("dowrite: lseek");
646                 report_failure(150);
647         }
648         iret = fsxwrite(fd, good_buf + offset, size, offset);
649         if (iret != size) {
650                 if (iret == -1)
651                         prterr("dowrite: write");
652                 else
653                         prt("short write: 0x%x bytes instead of 0x%x\n",
654                             iret, size);
655                 report_failure(151);
656         }
657         if (do_fsync) {
658                 if (fsync(fd)) {
659                         prt("fsync() failed: %s\n", strerror(errno));
660                         report_failure(152);
661                 }
662         }
663         if (flush) {
664                 doflush(offset, size);
665         }
666 }
667
668
669 void
670 domapwrite(unsigned offset, unsigned size)
671 {
672         unsigned pg_offset;
673         unsigned map_size;
674         off_t    cur_filesize;
675         char    *p;
676
677         offset -= offset % writebdy;
678         if (size == 0) {
679                 if (!quiet && testcalls > simulatedopcount)
680                         prt("skipping zero size write\n");
681                 log4(OP_SKIPPED, OP_MAPWRITE, offset, size);
682                 return;
683         }
684         cur_filesize = file_size;
685
686         log4(OP_MAPWRITE, offset, size, 0);
687
688         gendata(original_buf, good_buf, offset, size);
689         if (file_size < offset + size) {
690                 if (file_size < offset)
691                         memset(good_buf + file_size, '\0', offset - file_size);
692                 file_size = offset + size;
693                 if (lite) {
694                         warn("Lite file size bug in fsx!");
695                         report_failure(200);
696                 }
697         }
698
699         if (testcalls <= simulatedopcount)
700                 return;
701
702         if (!quiet &&
703                 ((progressinterval && testcalls % progressinterval == 0) ||
704                        (debug &&
705                        (monitorstart == -1 ||
706                         (offset + size > monitorstart &&
707                         (monitorend == -1 || offset <= monitorend))))))
708                 prt("%lu mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
709                     offset, offset + size - 1, size);
710
711         if (file_size > cur_filesize) {
712                 if (ftruncate(fd, file_size) == -1) {
713                         prterr("domapwrite: ftruncate");
714                         exit(201);
715                 }
716         }
717         pg_offset = offset & PAGE_MASK;
718         map_size  = pg_offset + size;
719
720         if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
721                               MAP_FILE | MAP_SHARED, fd,
722                               (off_t)(offset - pg_offset))) == (char *)-1) {
723                 prterr("domapwrite: mmap");
724                 report_failure(202);
725         }
726         memcpy(p + pg_offset, good_buf + offset, size);
727         if (msync(p, map_size, 0) != 0) {
728                 prterr("domapwrite: msync");
729                 report_failure(203);
730         }
731
732         check_eofpage("Write", offset, p, size);
733
734         if (munmap(p, map_size) != 0) {
735                 prterr("domapwrite: munmap");
736                 report_failure(204);
737         }
738 }
739
740
741 void
742 dotruncate(unsigned size)
743 {
744         int oldsize = file_size;
745
746         size -= size % truncbdy;
747         if (size > biggest) {
748                 biggest = size;
749                 if (!quiet && testcalls > simulatedopcount)
750                         prt("truncating to largest ever: 0x%x\n", size);
751         }
752
753         log4(OP_TRUNCATE, size, (unsigned)file_size, 0);
754
755         if (size > file_size)
756                 memset(good_buf + file_size, '\0', size - file_size);
757         file_size = size;
758
759         if (testcalls <= simulatedopcount)
760                 return;
761         
762         if ((progressinterval && testcalls % progressinterval == 0) ||
763             (debug && (monitorstart == -1 || monitorend == -1 ||
764                       size <= monitorend)))
765                 prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
766         if (ftruncate(fd, (off_t)size) == -1) {
767                 prt("ftruncate1: %x\n", size);
768                 prterr("dotruncate: ftruncate");
769                 report_failure(160);
770         }
771 }
772
773
774 void
775 writefileimage()
776 {
777         ssize_t iret;
778
779         if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
780                 prterr("writefileimage: lseek");
781                 report_failure(171);
782         }
783         iret = write(fd, good_buf, file_size);
784         if ((off_t)iret != file_size) {
785                 if (iret == -1)
786                         prterr("writefileimage: write");
787                 else
788                         prt("short write: 0x%x bytes instead of 0x%llx\n",
789                             iret, (unsigned long long)file_size);
790                 report_failure(172);
791         }
792         if (lite ? 0 : ftruncate(fd, file_size) == -1) {
793                 prt("ftruncate2: %llx\n", (unsigned long long)file_size);
794                 prterr("writefileimage: ftruncate");
795                 report_failure(173);
796         }
797 }
798
799
800 void
801 docloseopen(void)
802
803         if (testcalls <= simulatedopcount)
804                 return;
805
806         if (debug)
807                 prt("%lu close/open\n", testcalls);
808         if (close(fd)) {
809                 prterr("docloseopen: close");
810                 report_failure(180);
811         }
812         fd = open(fname, O_RDWR|o_direct, 0);
813         if (fd < 0) {
814                 prterr("docloseopen: open");
815                 report_failure(181);
816         }
817 }
818
819
820 void
821 test(void)
822 {
823         unsigned long   offset;
824         unsigned long   size = maxoplen;
825         unsigned long   rv = random();
826         unsigned long   op = rv % (3 + !lite + mapped_writes);
827
828         /* turn off the map read if necessary */
829
830         if (op == 2 && !mapped_reads)
831             op = 0;
832
833         if (simulatedopcount > 0 && testcalls == simulatedopcount)
834                 writefileimage();
835
836         testcalls++;
837
838         if (closeprob)
839                 closeopen = (rv >> 3) < (1 << 28) / closeprob;
840
841         if (debugstart > 0 && testcalls >= debugstart)
842                 debug = 1;
843
844         if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
845                 prt("%lu...\n", testcalls);
846
847         /*
848          * READ:        op = 0
849          * WRITE:       op = 1
850          * MAPREAD:     op = 2
851          * TRUNCATE:    op = 3
852          * MAPWRITE:    op = 3 or 4
853          */
854         if (lite ? 0 : op == 3 && (style & 1) == 0) /* vanilla truncate? */
855                 dotruncate(random() % maxfilelen);
856         else {
857                 if (randomoplen)
858                         size = random() % (maxoplen+1);
859                 if (lite ? 0 : op == 3)
860                         dotruncate(size);
861                 else {
862                         offset = random();
863                         if (op == 1 || op == (lite ? 3 : 4)) {
864                                 offset %= maxfilelen;
865                                 if (offset + size > maxfilelen)
866                                         size = maxfilelen - offset;
867                                 if (op != 1)
868                                         domapwrite(offset, size);
869                                 else
870                                         dowrite(offset, size);
871                         } else {
872                                 if (file_size)
873                                         offset %= file_size;
874                                 else
875                                         offset = 0;
876                                 if (offset + size > file_size)
877                                         size = file_size - offset;
878                                 if (op != 0)
879                                         domapread(offset, size);
880                                 else
881                                         doread(offset, size);
882                         }
883                 }
884         }
885         if (sizechecks && testcalls > simulatedopcount)
886                 check_size();
887         if (closeopen)
888                 docloseopen();
889 }
890
891
892 void
893 cleanup(sig)
894         int     sig;
895 {
896         if (sig)
897                 prt("signal %d\n", sig);
898         prt("testcalls = %lu\n", testcalls);
899         exit(sig);
900 }
901
902
903 void
904 usage(void)
905 {
906         fprintf(stdout, "usage: %s",
907                 "fsx [-dnqxALOWZ] [-b opnum] [-c Prob] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
908         -b opnum: beginning operation number (default 1)\n\
909         -c P: 1 in P chance of file close+open at each op (default infinity)\n\
910         -d: debug output for all operations\n\
911         -f flush and invalidate cache after I/O\n\
912         -l flen: the upper bound on file size (default 262144)\n\
913         -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
914         -n: no verifications of file size\n\
915         -o oplen: the upper bound on operation size (default 65536)\n\
916         -p progressinterval: debug output at specified operation interval\n\
917         -q: quieter operation\n\
918         -r readbdy: 4096 would make reads page aligned (default 1)\n\
919         -s style: 1 gives smaller truncates (default 0)\n\
920         -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
921         -w writebdy: 4096 would make writes page aligned (default 1)\n\
922         -x: preallocate file space before starting, XFS only (default 0)\n\
923         -y synchronize changes to a file\n"
924
925 #ifdef AIO
926 "       -A: Use the AIO system calls\n"
927 #endif
928 "       -D startingop: debug output starting at specified operation\n\
929         -L: fsxLite - no file creations & no file size changes\n\
930         -N numops: total # operations to do (default infinity)\n\
931         -O: use oplen (see -o flag) for every op (default random)\n\
932         -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
933         -S seed: for random # generator (default 1) 0 gets timestamp\n\
934         -W: mapped write operations DISabled\n\
935         -R: read() system calls only (mapped reads disabled)\n\
936         -Z: O_DIRECT (use -R, -W, -r and -w too)\n\
937         fname: this filename is REQUIRED (no default)\n");
938         exit(90);
939 }
940
941
942 int
943 getnum(char *s, char **e)
944 {
945         int ret;
946
947         *e = (char *) 0;
948         ret = strtol(s, e, 0);
949         if (*e)
950                 switch (**e) {
951                 case 'b':
952                 case 'B':
953                         ret *= 512;
954                         *e = *e + 1;
955                         break;
956                 case 'k':
957                 case 'K':
958                         ret *= 1024;
959                         *e = *e + 1;
960                         break;
961                 case 'm':
962                 case 'M':
963                         ret *= 1024*1024;
964                         *e = *e + 1;
965                         break;
966                 case 'w':
967                 case 'W':
968                         ret *= 4;
969                         *e = *e + 1;
970                         break;
971                 }
972         return (ret);
973 }
974
975 #ifdef AIO
976
977 #define QSZ     1024
978 io_context_t    io_ctx;
979 struct iocb     iocb;
980
981 int aio_setup()
982 {
983         int ret;
984         ret = io_queue_init(QSZ, &io_ctx);
985         if (ret != 0) {
986                 fprintf(stderr, "aio_setup: io_queue_init failed: %s\n",
987                         strerror(ret));
988                 return(-1);
989         }
990         return(0);
991 }
992
993 int
994 __aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
995 {
996         struct io_event event;
997         static struct timespec ts;
998         struct iocb *iocbs[] = { &iocb };
999         int ret;
1000         long res;
1001
1002         if (rw == READ) {
1003                 io_prep_pread(&iocb, fd, buf, len, offset);
1004         } else {
1005                 io_prep_pwrite(&iocb, fd, buf, len, offset);
1006         }
1007
1008         ts.tv_sec = 30;
1009         ts.tv_nsec = 0;
1010         ret = io_submit(io_ctx, 1, iocbs);
1011         if (ret != 1) {
1012                 fprintf(stderr, "errcode=%d\n", ret);
1013                 fprintf(stderr, "aio_rw: io_submit failed: %s\n",
1014                                 strerror(ret));
1015                 goto out_error;
1016         }
1017
1018         ret = io_getevents(io_ctx, 1, 1, &event, &ts);
1019         if (ret != 1) {
1020                 if (ret == 0)
1021                         fprintf(stderr, "aio_rw: no events available\n");
1022                 else {
1023                         fprintf(stderr, "errcode=%d\n", -ret);
1024                         fprintf(stderr, "aio_rw: io_getevents failed: %s\n",
1025                                         strerror(-ret));
1026                 }
1027                 goto out_error;
1028         }
1029         if (len != event.res) {
1030                 /*
1031                  * The b0rked libaio defines event.res as unsigned.
1032                  * However the kernel strucuture has it signed,
1033                  * and it's used to pass negated error value.
1034                  * Till the library is fixed use the temp var.
1035                  */
1036                 res = (long)event.res;
1037                 if (res >= 0)
1038                         fprintf(stderr, "bad io length: %lu instead of %u\n",
1039                                         res, len);
1040                 else {
1041                         fprintf(stderr, "errcode=%d\n", -res);
1042                         fprintf(stderr, "aio_rw: async io failed: %s\n",
1043                                         strerror(-res));
1044                         ret = res;
1045                         goto out_error;
1046                 }
1047
1048         }
1049         return event.res;
1050
1051 out_error:
1052         /*
1053          * The caller expects error return in traditional libc
1054          * convention, i.e. -1 and the errno set to error.
1055          */
1056         errno = -ret;
1057         return -1;
1058 }
1059
1060 int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
1061 {
1062         int ret;
1063
1064         if (aio) {
1065                 ret = __aio_rw(rw, fd, buf, len, offset);
1066         } else {
1067                 if (rw == READ)
1068                         ret = read(fd, buf, len);
1069                 else
1070                         ret = write(fd, buf, len);
1071         }
1072         return ret;
1073 }
1074
1075 #endif
1076
1077 int
1078 main(int argc, char **argv)
1079 {
1080         int     i, style, ch;
1081         char    *endp;
1082         char goodfile[1024];
1083         char logfile[1024];
1084
1085         goodfile[0] = 0;
1086         logfile[0] = 0;
1087
1088         page_size = getpagesize();
1089         page_mask = page_size - 1;
1090         mmap_mask = page_mask;
1091         
1092
1093         setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
1094
1095         while ((ch = getopt(argc, argv, "b:c:dfl:m:no:p:qr:s:t:w:xyAD:LN:OP:RS:WZ"))
1096                != EOF)
1097                 switch (ch) {
1098                 case 'b':
1099                         simulatedopcount = getnum(optarg, &endp);
1100                         if (!quiet)
1101                                 fprintf(stdout, "Will begin at operation %ld\n",
1102                                         simulatedopcount);
1103                         if (simulatedopcount == 0)
1104                                 usage();
1105                         simulatedopcount -= 1;
1106                         break;
1107                 case 'c':
1108                         closeprob = getnum(optarg, &endp);
1109                         if (!quiet)
1110                                 fprintf(stdout,
1111                                         "Chance of close/open is 1 in %d\n",
1112                                         closeprob);
1113                         if (closeprob <= 0)
1114                                 usage();
1115                         break;
1116                 case 'd':
1117                         debug = 1;
1118                         break;
1119                 case 'f':
1120                         flush = 1;
1121                         break;
1122                 case 'l':
1123                         maxfilelen = getnum(optarg, &endp);
1124                         if (maxfilelen <= 0)
1125                                 usage();
1126                         break;
1127                 case 'm':
1128                         monitorstart = getnum(optarg, &endp);
1129                         if (monitorstart < 0)
1130                                 usage();
1131                         if (!endp || *endp++ != ':')
1132                                 usage();
1133                         monitorend = getnum(endp, &endp);
1134                         if (monitorend < 0)
1135                                 usage();
1136                         if (monitorend == 0)
1137                                 monitorend = -1; /* aka infinity */
1138                         debug = 1;
1139                 case 'n':
1140                         sizechecks = 0;
1141                         break;
1142                 case 'o':
1143                         maxoplen = getnum(optarg, &endp);
1144                         if (maxoplen <= 0)
1145                                 usage();
1146                         break;
1147                 case 'p':
1148                         progressinterval = getnum(optarg, &endp);
1149                         if (progressinterval == 0)
1150                                 usage();
1151                         break;
1152                 case 'q':
1153                         quiet = 1;
1154                         break;
1155                 case 'r':
1156                         readbdy = getnum(optarg, &endp);
1157                         if (readbdy <= 0)
1158                                 usage();
1159                         break;
1160                 case 's':
1161                         style = getnum(optarg, &endp);
1162                         if (style < 0 || style > 1)
1163                                 usage();
1164                         break;
1165                 case 't':
1166                         truncbdy = getnum(optarg, &endp);
1167                         if (truncbdy <= 0)
1168                                 usage();
1169                         break;
1170                 case 'w':
1171                         writebdy = getnum(optarg, &endp);
1172                         if (writebdy <= 0)
1173                                 usage();
1174                         break;
1175                 case 'x':
1176                         prealloc = 1;
1177                         break;
1178                 case 'y':
1179                         do_fsync = 1;
1180                         break;
1181                 case 'A':
1182                         aio = 1;
1183                         break;
1184                 case 'D':
1185                         debugstart = getnum(optarg, &endp);
1186                         if (debugstart < 1)
1187                                 usage();
1188                         break;
1189                 case 'L':
1190                         lite = 1;
1191                         break;
1192                 case 'N':
1193                         numops = getnum(optarg, &endp);
1194                         if (numops < 0)
1195                                 usage();
1196                         break;
1197                 case 'O':
1198                         randomoplen = 0;
1199                         break;
1200                 case 'P':
1201                         strncpy(goodfile, optarg, sizeof(goodfile));
1202                         strcat(goodfile, "/");
1203                         strncpy(logfile, optarg, sizeof(logfile));
1204                         strcat(logfile, "/");
1205                         break;
1206                 case 'R':
1207                         mapped_reads = 0;
1208                         break;
1209                 case 'S':
1210                         seed = getnum(optarg, &endp);
1211                         if (seed == 0)
1212                                 seed = time(0) % 10000;
1213                         if (!quiet)
1214                                 fprintf(stdout, "Seed set to %d\n", seed);
1215                         if (seed < 0)
1216                                 usage();
1217                         break;
1218                 case 'W':
1219                         mapped_writes = 0;
1220                         if (!quiet)
1221                                 fprintf(stdout, "mapped writes DISABLED\n");
1222                         break;
1223                 case 'Z':
1224                         o_direct = O_DIRECT;
1225                         break;
1226                 default:
1227                         usage();
1228                         /* NOTREACHED */
1229                 }
1230         argc -= optind;
1231         argv += optind;
1232         if (argc != 1)
1233                 usage();
1234         fname = argv[0];
1235
1236         signal(SIGHUP,  cleanup);
1237         signal(SIGINT,  cleanup);
1238         signal(SIGPIPE, cleanup);
1239         signal(SIGALRM, cleanup);
1240         signal(SIGTERM, cleanup);
1241         signal(SIGXCPU, cleanup);
1242         signal(SIGXFSZ, cleanup);
1243         signal(SIGVTALRM,       cleanup);
1244         signal(SIGUSR1, cleanup);
1245         signal(SIGUSR2, cleanup);
1246
1247         initstate(seed, state, 256);
1248         setstate(state);
1249         fd = open(fname,
1250                 O_RDWR|(lite ? 0 : O_CREAT|O_TRUNC)|o_direct, 0666);
1251         if (fd < 0) {
1252                 prterr(fname);
1253                 exit(91);
1254         }
1255 #ifdef XFS
1256         if (prealloc) {
1257                 xfs_flock64_t   resv = { 0 };
1258 #ifdef HAVE_XFS_PLATFORM_DEFS_H
1259                 if (!platform_test_xfs_fd(fd)) {
1260                         prterr(fname);
1261                         fprintf(stderr, "main: cannot prealloc, non XFS\n");
1262                         exit(96);
1263                 }
1264 #endif
1265                 resv.l_len = maxfilelen;
1266                 if ((xfsctl(fname, fd, XFS_IOC_RESVSP, &resv)) < 0) {
1267                         prterr(fname);
1268                         exit(97);
1269                 }
1270         }
1271 #endif
1272         strncat(goodfile, fname, 256);
1273         strcat (goodfile, ".fsxgood");
1274         fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
1275         if (fsxgoodfd < 0) {
1276                 prterr(goodfile);
1277                 exit(92);
1278         }
1279         strncat(logfile, fname, 256);
1280         strcat (logfile, ".fsxlog");
1281         fsxlogf = fopen(logfile, "w");
1282         if (fsxlogf == NULL) {
1283                 prterr(logfile);
1284                 exit(93);
1285         }
1286
1287 #ifdef AIO
1288         if (aio) 
1289                 aio_setup();
1290 #endif
1291
1292         if (lite) {
1293                 off_t ret;
1294                 file_size = maxfilelen = lseek(fd, (off_t)0, SEEK_END);
1295                 if (file_size == (off_t)-1) {
1296                         prterr(fname);
1297                         warn("main: lseek eof");
1298                         exit(94);
1299                 }
1300                 ret = lseek(fd, (off_t)0, SEEK_SET);
1301                 if (ret == (off_t)-1) {
1302                         prterr(fname);
1303                         warn("main: lseek 0");
1304                         exit(95);
1305                 }
1306         }
1307         original_buf = (char *) malloc(maxfilelen);
1308         for (i = 0; i < maxfilelen; i++)
1309                 original_buf[i] = random() % 256;
1310         good_buf = (char *) malloc(maxfilelen + writebdy);
1311         good_buf = round_up(good_buf, writebdy, 0);
1312         memset(good_buf, '\0', maxfilelen);
1313         temp_buf = (char *) malloc(maxoplen + readbdy);
1314         temp_buf = round_up(temp_buf, readbdy, 0);
1315         memset(temp_buf, '\0', maxoplen);
1316         if (lite) {     /* zero entire existing file */
1317                 ssize_t written;
1318
1319                 written = write(fd, good_buf, (size_t)maxfilelen);
1320                 if (written != maxfilelen) {
1321                         if (written == -1) {
1322                                 prterr(fname);
1323                                 warn("main: error on write");
1324                         } else
1325                                 warn("main: short write, 0x%x bytes instead "
1326                                         "of 0x%lx\n",
1327                                         (unsigned)written,
1328                                         maxfilelen);
1329                         exit(98);
1330                 }
1331         } else 
1332                 check_trunc_hack();
1333
1334         while (numops == -1 || numops--)
1335                 test();
1336
1337         if (close(fd)) {
1338                 prterr("close");
1339                 report_failure(99);
1340         }
1341         prt("All operations completed A-OK!\n");
1342
1343         exit(0);
1344         return 0;
1345 }