xfstests: fix some warnings
[xfstests-dev.git] / ltp / fsx.c
index db5fcffbc2bd8431261e5ad43f15a9fd49c3aa00..1167d728922c12feeb4cafc0bd70835359add00b 100644 (file)
--- a/ltp/fsx.c
+++ b/ltp/fsx.c
@@ -1,24 +1,5 @@
 /*
- * Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
- *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
- * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- *
+ *     Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
  *
  *     File:   fsx.c
  *     Author: Avadis Tevanian, Jr.
  *
  *     Small changes to work under Linux -- davej.
  *
- *     XFS space preallocation changes -- nathans.
+ *     Checks for mmap last-page zero fill.
  */
 
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/param.h>
+#include "global.h"
+
 #include <limits.h>
 #include <time.h>
 #include <strings.h>
 #include <sys/file.h>
 #include <sys/mman.h>
-#include <limits.h>
+#ifdef HAVE_ERR_H
 #include <err.h>
+#endif
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #include <stdarg.h>
 #include <errno.h>
-
-#include <xfs/libxfs.h>
+#ifdef AIO
+#include <libaio.h>
+#endif
 
 #ifndef MAP_FILE
 # define MAP_FILE 0
@@ -85,9 +66,9 @@ int                   logcount = 0;   /* total ops */
 #define OP_MAPWRITE    6
 #define OP_SKIPPED     7
 
-#ifndef PAGE_SIZE
+#undef PAGE_SIZE
 #define PAGE_SIZE       getpagesize()
-#endif
+#undef PAGE_MASK
 #define PAGE_MASK       (PAGE_SIZE - 1)
 
 char   *original_buf;                  /* a pointer to the original data */
@@ -105,6 +86,8 @@ unsigned long        simulatedopcount = 0;   /* -b flag */
 int    closeprob = 0;                  /* -c flag */
 int    debug = 0;                      /* -d flag */
 unsigned long  debugstart = 0;         /* -D flag */
+int    flush = 0;                      /* -f flag */
+int    do_fsync = 0;                   /* -y flag */
 unsigned long  maxfilelen = 256 * 1024;        /* -l flag */
 int    sizechecks = 1;                 /* -n flag disables them */
 int    maxoplen = 64 * 1024;           /* -o flag */
@@ -112,7 +95,7 @@ int  quiet = 0;                      /* -q flag */
 unsigned long progressinterval = 0;    /* -p flag */
 int    readbdy = 1;                    /* -r flag */
 int    style = 0;                      /* -s flag */
-int    prealloc = 0;                   /* -u flag */
+int    prealloc = 0;                   /* -x flag */
 int    truncbdy = 1;                   /* -t flag */
 int    writebdy = 1;                   /* -w flag */
 long   monitorstart = -1;              /* -m flag */
@@ -124,21 +107,68 @@ int       seed = 1;                       /* -S flag */
 int     mapped_writes = 1;              /* -W flag disables */
 int    mapped_reads = 1;               /* -R flag disables it */
 int    fsxgoodfd = 0;
+int    o_direct;                       /* -Z */
+int    aio = 0;
+
+int page_size;
+int page_mask;
+int mmap_mask;
+#ifdef AIO
+int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
+#define READ 0
+#define WRITE 1
+#define fsxread(a,b,c,d)       aio_rw(READ, a,b,c,d)
+#define fsxwrite(a,b,c,d)      aio_rw(WRITE, a,b,c,d)
+#else
+#define fsxread(a,b,c,d)       read(a,b,c)
+#define fsxwrite(a,b,c,d)      write(a,b,c)
+#endif
+
 FILE * fsxlogf = NULL;
-int    badoff = -1;
-int    closeopen = 0;
+int badoff = -1;
+int closeopen = 0;
+
+static void *round_up(void *ptr, unsigned long align, unsigned long offset)
+{
+       unsigned long ret = (unsigned long)ptr;
+
+       ret = ((ret + align - 1) & ~(align - 1));
+       ret += offset;
+       return (void *)ret;
+}
 
+void
+vwarnc(int code, const char *fmt, va_list ap) {
+  fprintf(stderr, "fsx: ");
+  if (fmt != NULL) {
+       vfprintf(stderr, fmt, ap);
+       fprintf(stderr, ": ");
+  }
+  fprintf(stderr, "%s\n", strerror(code));
+}
+
+void
+warn(const char * fmt, ...)  {
+       va_list ap;
+       va_start(ap, fmt);
+       vwarnc(errno, fmt, ap);
+       va_end(ap);
+}
+
+#define BUF_SIZE 1024
 
 void
 prt(char *fmt, ...)
 {
        va_list args;
+       char buffer[BUF_SIZE];
 
        va_start(args, fmt);
-       vfprintf(stdout, fmt, args);
-       if (fsxlogf)
-               vfprintf(fsxlogf, fmt, args);
+       vsnprintf(buffer, BUF_SIZE, fmt, args);
        va_end(args);
+       fprintf(stdout, buffer);
+       if (fsxlogf)
+               fprintf(fsxlogf, buffer);
 }
 
 void
@@ -266,11 +296,11 @@ save_buffer(char *buffer, off_t bufferlength, int fd)
                exit(67);
        }
        if (lite) {
-               off_t size_by_seek = lseek(fd, (off_t)0, L_XTND);
+               off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END);
                if (size_by_seek == (off_t)-1)
                        prterr("save_buffer: lseek eof");
                else if (bufferlength > size_by_seek) {
-                       fprintf(stderr, "save_buffer: .fsxgood file too short... will save 0x%qx bytes instead of 0x%qx\n", (unsigned long long)size_by_seek,
+                       warn("save_buffer: .fsxgood file too short... will save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
                             (unsigned long long)bufferlength);
                        bufferlength = size_by_seek;
                }
@@ -285,7 +315,7 @@ save_buffer(char *buffer, off_t bufferlength, int fd)
                if (byteswritten == -1)
                        prterr("save_buffer write");
                else
-                       fprintf(stderr, "save_buffer: short write, 0x%x bytes instead of 0x%qx\n",
+                       warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
                             (unsigned)byteswritten,
                             (unsigned long long)bufferlength);
        }
@@ -322,19 +352,28 @@ check_buffers(unsigned offset, unsigned size)
        unsigned op = 0;
        unsigned bad = 0;
 
-       if (bcmp(good_buf + offset, temp_buf, size) != 0) {
-               prt("READ BAD DATA: offset = 0x%x, size = 0x%x\n",
-                   offset, size);
+       if (memcmp(good_buf + offset, temp_buf, size) != 0) {
+               prt("READ BAD DATA: offset = 0x%x, size = 0x%x, fname = %s\n",
+                   offset, size, fname);
                prt("OFFSET\tGOOD\tBAD\tRANGE\n");
                while (size > 0) {
                        c = good_buf[offset];
                        t = temp_buf[i];
                        if (c != t) {
-                               if (n == 0) {
+                               if (n < 16) {
                                        bad = short_at(&temp_buf[i]);
                                        prt("0x%5x\t0x%04x\t0x%04x", offset,
                                            short_at(&good_buf[offset]), bad);
                                        op = temp_buf[offset & 1 ? i+1 : i];
+                                       prt("\t0x%5x\n", n);
+                                       if (op)
+                                               prt("operation# (mod 256) for "
+                                                 "the bad data may be %u\n",
+                                               ((unsigned)op & 0xff));
+                                       else
+                                               prt("operation# (mod 256) for "
+                                                 "the bad data unknown, check"
+                                                 " HOLE and EXTEND ops\n");
                                }
                                n++;
                                badoff = offset;
@@ -343,14 +382,6 @@ check_buffers(unsigned offset, unsigned size)
                        i++;
                        size--;
                }
-               if (n) {
-                       prt("\t0x%5x\n", n);
-                       if (bad)
-                               prt("operation# (mod 256) for the bad data may be %u\n", ((unsigned)op & 0xff));
-                       else
-                               prt("operation# (mod 256) for the bad data unknown, check HOLE and EXTEND ops\n");
-               } else
-                       prt("????????????????\n");
                report_failure(110);
        }
 }
@@ -366,9 +397,9 @@ check_size(void)
                prterr("check_size: fstat");
                statbuf.st_size = -1;
        }
-       size_by_seek = lseek(fd, (off_t)0, L_XTND);
+       size_by_seek = lseek(fd, (off_t)0, SEEK_END);
        if (file_size != statbuf.st_size || file_size != size_by_seek) {
-               prt("Size error: expected 0x%qx stat 0x%qx seek 0x%qx\n",
+               prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n",
                    (unsigned long long)file_size,
                    (unsigned long long)statbuf.st_size,
                    (unsigned long long)size_by_seek);
@@ -392,6 +423,34 @@ check_trunc_hack(void)
        ftruncate(fd, 0);
 }
 
+void
+doflush(unsigned offset, unsigned size)
+{
+       unsigned pg_offset;
+       unsigned map_size;
+       char    *p;
+
+       if (o_direct == O_DIRECT)
+               return;
+
+       pg_offset = offset & mmap_mask;
+       map_size  = pg_offset + size;
+
+       if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
+                             MAP_FILE | MAP_SHARED, fd,
+                             (off_t)(offset - pg_offset))) == (char *)-1) {
+               prterr("doflush: mmap");
+               report_failure(202);
+       }
+       if (msync(p, map_size, MS_INVALIDATE) != 0) {
+               prterr("doflush: msync");
+               report_failure(203);
+       }
+       if (munmap(p, map_size) != 0) {
+               prterr("doflush: munmap");
+               report_failure(204);
+       }
+}
 
 void
 doread(unsigned offset, unsigned size)
@@ -400,8 +459,10 @@ doread(unsigned offset, unsigned size)
        unsigned iret;
 
        offset -= offset % readbdy;
+       if (o_direct)
+               size -= size % readbdy;
        if (size == 0) {
-               if (!quiet && testcalls > simulatedopcount)
+               if (!quiet && testcalls > simulatedopcount && !o_direct)
                        prt("skipping zero size read\n");
                log4(OP_SKIPPED, OP_READ, offset, size);
                return;
@@ -418,11 +479,12 @@ doread(unsigned offset, unsigned size)
        if (testcalls <= simulatedopcount)
                return;
 
-       if (!quiet && ((progressinterval && !(testcalls % progressinterval)) ||
-                      (debug &&
-                       (monitorstart == -1 ||
-                        (offset + size > monitorstart &&
-                         (monitorend == -1 || offset <= monitorend))))))
+       if (!quiet &&
+               ((progressinterval && testcalls % progressinterval == 0)  ||
+               (debug &&
+                      (monitorstart == -1 ||
+                       (offset + size > monitorstart &&
+                       (monitorend == -1 || offset <= monitorend))))))
                prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
                    offset, offset + size - 1, size);
        ret = lseek(fd, (off_t)offset, SEEK_SET);
@@ -430,7 +492,7 @@ doread(unsigned offset, unsigned size)
                prterr("doread: lseek");
                report_failure(140);
        }
-       iret = read(fd, temp_buf, size);
+       iret = fsxread(fd, temp_buf, size, offset);
        if (iret != size) {
                if (iret == -1)
                        prterr("doread: read");
@@ -443,6 +505,33 @@ doread(unsigned offset, unsigned size)
 }
 
 
+void
+check_eofpage(char *s, unsigned offset, char *p, int size)
+{
+       unsigned long last_page, should_be_zero;
+
+       if (offset + size <= (file_size & ~page_mask))
+               return;
+       /*
+        * we landed in the last page of the file
+        * test to make sure the VM system provided 0's 
+        * beyond the true end of the file mapping
+        * (as required by mmap def in 1996 posix 1003.1)
+        */
+       last_page = ((unsigned long)p + (offset & page_mask) + size) & ~page_mask;
+
+       for (should_be_zero = last_page + (file_size & page_mask);
+            should_be_zero < last_page + page_size;
+            should_be_zero++)
+               if (*(char *)should_be_zero) {
+                       prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
+                           s, file_size - 1, should_be_zero & page_mask,
+                           short_at(should_be_zero));
+                       report_failure(205);
+               }
+}
+
+
 void
 domapread(unsigned offset, unsigned size)
 {
@@ -469,11 +558,12 @@ domapread(unsigned offset, unsigned size)
        if (testcalls <= simulatedopcount)
                return;
 
-       if (!quiet && ((progressinterval && !(testcalls % progressinterval)) ||
+       if (!quiet &&
+               ((progressinterval && testcalls % progressinterval == 0) ||
                       (debug &&
-                       (monitorstart == -1 ||
-                        (offset + size > monitorstart &&
-                         ((monitorend == -1 || offset <= monitorend)))))))
+                      (monitorstart == -1 ||
+                       (offset + size > monitorstart &&
+                       (monitorend == -1 || offset <= monitorend))))))
                prt("%lu mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
                    offset, offset + size - 1, size);
 
@@ -486,6 +576,9 @@ domapread(unsigned offset, unsigned size)
                report_failure(190);
        }
        memcpy(temp_buf, p + pg_offset, size);
+
+       check_eofpage("Read", offset, p, size);
+
        if (munmap(p, map_size) != 0) {
                prterr("domapread: munmap");
                report_failure(191);
@@ -514,8 +607,10 @@ dowrite(unsigned offset, unsigned size)
        unsigned iret;
 
        offset -= offset % writebdy;
+       if (o_direct)
+               size -= size % writebdy;
        if (size == 0) {
-               if (!quiet && testcalls > simulatedopcount)
+               if (!quiet && testcalls > simulatedopcount && !o_direct)
                        prt("skipping zero size write\n");
                log4(OP_SKIPPED, OP_WRITE, offset, size);
                return;
@@ -526,21 +621,23 @@ dowrite(unsigned offset, unsigned size)
        gendata(original_buf, good_buf, offset, size);
        if (file_size < offset + size) {
                if (file_size < offset)
-                       bzero(good_buf + file_size, offset - file_size);
+                       memset(good_buf + file_size, '\0', offset - file_size);
                file_size = offset + size;
                if (lite) {
-                       fprintf(stderr, "Lite file size bug in fsx!\n");
+                       warn("Lite file size bug in fsx!");
                        report_failure(149);
                }
        }
 
        if (testcalls <= simulatedopcount)
                return;
-       if (!quiet && ((progressinterval && !(testcalls % progressinterval)) ||
+
+       if (!quiet &&
+               ((progressinterval && testcalls % progressinterval == 0) ||
                       (debug &&
-                       (monitorstart == -1 ||
-                        (offset + size > monitorstart &&
-                         ((monitorend == -1 || offset <= monitorend)))))))
+                      (monitorstart == -1 ||
+                       (offset + size > monitorstart &&
+                       (monitorend == -1 || offset <= monitorend))))))
                prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
                    offset, offset + size - 1, size);
        ret = lseek(fd, (off_t)offset, SEEK_SET);
@@ -548,7 +645,7 @@ dowrite(unsigned offset, unsigned size)
                prterr("dowrite: lseek");
                report_failure(150);
        }
-       iret = write(fd, good_buf + offset, size);
+       iret = fsxwrite(fd, good_buf + offset, size, offset);
        if (iret != size) {
                if (iret == -1)
                        prterr("dowrite: write");
@@ -557,6 +654,15 @@ dowrite(unsigned offset, unsigned size)
                            iret, size);
                report_failure(151);
        }
+       if (do_fsync) {
+               if (fsync(fd)) {
+                       prt("fsync() failed: %s\n", strerror(errno));
+                       report_failure(152);
+               }
+       }
+       if (flush) {
+               doflush(offset, size);
+       }
 }
 
 
@@ -582,10 +688,10 @@ domapwrite(unsigned offset, unsigned size)
        gendata(original_buf, good_buf, offset, size);
        if (file_size < offset + size) {
                if (file_size < offset)
-                       bzero(good_buf + file_size, offset - file_size);
+                       memset(good_buf + file_size, '\0', offset - file_size);
                file_size = offset + size;
                if (lite) {
-                       fprintf(stderr, "Lite file size bug in fsx!\n");
+                       warn("Lite file size bug in fsx!");
                        report_failure(200);
                }
        }
@@ -593,11 +699,12 @@ domapwrite(unsigned offset, unsigned size)
        if (testcalls <= simulatedopcount)
                return;
 
-       if (!quiet && ((progressinterval && !(testcalls % progressinterval)) ||
+       if (!quiet &&
+               ((progressinterval && testcalls % progressinterval == 0) ||
                       (debug &&
-                       (monitorstart == -1 ||
-                        (offset + size > monitorstart &&
-                         (monitorend == -1 || offset <= monitorend))))))
+                      (monitorstart == -1 ||
+                       (offset + size > monitorstart &&
+                       (monitorend == -1 || offset <= monitorend))))))
                prt("%lu mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
                    offset, offset + size - 1, size);
 
@@ -621,6 +728,9 @@ domapwrite(unsigned offset, unsigned size)
                prterr("domapwrite: msync");
                report_failure(203);
        }
+
+       check_eofpage("Write", offset, p, size);
+
        if (munmap(p, map_size) != 0) {
                prterr("domapwrite: munmap");
                report_failure(204);
@@ -643,13 +753,13 @@ dotruncate(unsigned size)
        log4(OP_TRUNCATE, size, (unsigned)file_size, 0);
 
        if (size > file_size)
-               bzero(good_buf + file_size, size - file_size);
+               memset(good_buf + file_size, '\0', size - file_size);
        file_size = size;
 
        if (testcalls <= simulatedopcount)
                return;
        
-       if ((progressinterval && !(testcalls % progressinterval)) ||
+       if ((progressinterval && testcalls % progressinterval == 0) ||
            (debug && (monitorstart == -1 || monitorend == -1 ||
                      size <= monitorend)))
                prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
@@ -675,12 +785,12 @@ writefileimage()
                if (iret == -1)
                        prterr("writefileimage: write");
                else
-                       prt("short write: 0x%x bytes instead of 0x%qx\n",
+                       prt("short write: 0x%x bytes instead of 0x%llx\n",
                            iret, (unsigned long long)file_size);
                report_failure(172);
        }
        if (lite ? 0 : ftruncate(fd, file_size) == -1) {
-               prt("ftruncate2: %qx\n", (unsigned long long)file_size);
+               prt("ftruncate2: %llx\n", (unsigned long long)file_size);
                prterr("writefileimage: ftruncate");
                report_failure(173);
        }
@@ -699,7 +809,7 @@ docloseopen(void)
                prterr("docloseopen: close");
                report_failure(180);
        }
-       fd = open(fname, O_RDWR, 0);
+       fd = open(fname, O_RDWR|o_direct, 0);
        if (fd < 0) {
                prterr("docloseopen: open");
                report_failure(181);
@@ -794,10 +904,11 @@ void
 usage(void)
 {
        fprintf(stdout, "usage: %s",
-               "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
+               "fsx [-dnqxALOWZ] [-b opnum] [-c Prob] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
        -b opnum: beginning operation number (default 1)\n\
        -c P: 1 in P chance of file close+open at each op (default infinity)\n\
        -d: debug output for all operations\n\
+       -f flush and invalidate cache after I/O\n\
        -l flen: the upper bound on file size (default 262144)\n\
        -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
        -n: no verifications of file size\n\
@@ -809,7 +920,12 @@ usage(void)
        -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
        -w writebdy: 4096 would make writes page aligned (default 1)\n\
        -x: preallocate file space before starting, XFS only (default 0)\n\
-       -D startingop: debug output starting at specified operation\n\
+       -y synchronize changes to a file\n"
+
+#ifdef AIO
+"      -A: Use the AIO system calls\n"
+#endif
+"      -D startingop: debug output starting at specified operation\n\
        -L: fsxLite - no file creations & no file size changes\n\
        -N numops: total # operations to do (default infinity)\n\
        -O: use oplen (see -o flag) for every op (default random)\n\
@@ -817,6 +933,7 @@ usage(void)
        -S seed: for random # generator (default 1) 0 gets timestamp\n\
        -W: mapped write operations DISabled\n\
         -R: read() system calls only (mapped reads disabled)\n\
+        -Z: O_DIRECT (use -R, -W, -r and -w too)\n\
        fname: this filename is REQUIRED (no default)\n");
        exit(90);
 }
@@ -825,7 +942,7 @@ usage(void)
 int
 getnum(char *s, char **e)
 {
-       int ret = -1;
+       int ret;
 
        *e = (char *) 0;
        ret = strtol(s, e, 0);
@@ -855,6 +972,107 @@ getnum(char *s, char **e)
        return (ret);
 }
 
+#ifdef AIO
+
+#define QSZ     1024
+io_context_t   io_ctx;
+struct iocb    iocb;
+
+int aio_setup()
+{
+       int ret;
+       ret = io_queue_init(QSZ, &io_ctx);
+       if (ret != 0) {
+               fprintf(stderr, "aio_setup: io_queue_init failed: %s\n",
+                        strerror(ret));
+                return(-1);
+        }
+        return(0);
+}
+
+int
+__aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+{
+       struct io_event event;
+       static struct timespec ts;
+       struct iocb *iocbs[] = { &iocb };
+       int ret;
+       long res;
+
+       if (rw == READ) {
+               io_prep_pread(&iocb, fd, buf, len, offset);
+       } else {
+               io_prep_pwrite(&iocb, fd, buf, len, offset);
+       }
+
+       ts.tv_sec = 30;
+       ts.tv_nsec = 0;
+       ret = io_submit(io_ctx, 1, iocbs);
+       if (ret != 1) {
+               fprintf(stderr, "errcode=%d\n", ret);
+               fprintf(stderr, "aio_rw: io_submit failed: %s\n",
+                               strerror(ret));
+               goto out_error;
+       }
+
+       ret = io_getevents(io_ctx, 1, 1, &event, &ts);
+       if (ret != 1) {
+               if (ret == 0)
+                       fprintf(stderr, "aio_rw: no events available\n");
+               else {
+                       fprintf(stderr, "errcode=%d\n", -ret);
+                       fprintf(stderr, "aio_rw: io_getevents failed: %s\n",
+                                       strerror(-ret));
+               }
+               goto out_error;
+       }
+       if (len != event.res) {
+               /*
+                * The b0rked libaio defines event.res as unsigned.
+                * However the kernel strucuture has it signed,
+                * and it's used to pass negated error value.
+                * Till the library is fixed use the temp var.
+                */
+               res = (long)event.res;
+               if (res >= 0)
+                       fprintf(stderr, "bad io length: %lu instead of %u\n",
+                                       res, len);
+               else {
+                       fprintf(stderr, "errcode=%ld\n", -res);
+                       fprintf(stderr, "aio_rw: async io failed: %s\n",
+                                       strerror(-res));
+                       ret = res;
+                       goto out_error;
+               }
+
+       }
+       return event.res;
+
+out_error:
+       /*
+        * The caller expects error return in traditional libc
+        * convention, i.e. -1 and the errno set to error.
+        */
+       errno = -ret;
+       return -1;
+}
+
+int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+{
+       int ret;
+
+       if (aio) {
+               ret = __aio_rw(rw, fd, buf, len, offset);
+       } else {
+               if (rw == READ)
+                       ret = read(fd, buf, len);
+               else
+                       ret = write(fd, buf, len);
+       }
+       return ret;
+}
+
+#endif
 
 int
 main(int argc, char **argv)
@@ -867,9 +1085,14 @@ main(int argc, char **argv)
        goodfile[0] = 0;
        logfile[0] = 0;
 
+       page_size = getpagesize();
+       page_mask = page_size - 1;
+       mmap_mask = page_mask;
+       
+
        setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
 
-       while ((ch = getopt(argc, argv, "b:c:dl:m:no:p:qr:s:t:w:xD:LN:OP:RS:W"))
+       while ((ch = getopt(argc, argv, "b:c:dfl:m:no:p:qr:s:t:w:xyAD:LN:OP:RS:WZ"))
               != EOF)
                switch (ch) {
                case 'b':
@@ -893,6 +1116,9 @@ main(int argc, char **argv)
                case 'd':
                        debug = 1;
                        break;
+               case 'f':
+                       flush = 1;
+                       break;
                case 'l':
                        maxfilelen = getnum(optarg, &endp);
                        if (maxfilelen <= 0)
@@ -920,7 +1146,7 @@ main(int argc, char **argv)
                        break;
                case 'p':
                        progressinterval = getnum(optarg, &endp);
-                       if (progressinterval < 0)
+                       if (progressinterval == 0)
                                usage();
                        break;
                case 'q':
@@ -949,6 +1175,12 @@ main(int argc, char **argv)
                case 'x':
                        prealloc = 1;
                        break;
+               case 'y':
+                       do_fsync = 1;
+                       break;
+               case 'A':
+                       aio = 1;
+                       break;
                case 'D':
                        debugstart = getnum(optarg, &endp);
                        if (debugstart < 1)
@@ -988,7 +1220,9 @@ main(int argc, char **argv)
                        if (!quiet)
                                fprintf(stdout, "mapped writes DISABLED\n");
                        break;
-              
+               case 'Z':
+                       o_direct = O_DIRECT;
+                       break;
                default:
                        usage();
                        /* NOTREACHED */
@@ -999,7 +1233,6 @@ main(int argc, char **argv)
                usage();
        fname = argv[0];
 
-
        signal(SIGHUP,  cleanup);
        signal(SIGINT,  cleanup);
        signal(SIGPIPE, cleanup);
@@ -1013,26 +1246,29 @@ main(int argc, char **argv)
 
        initstate(seed, state, 256);
        setstate(state);
-       fd = open(fname, O_RDWR|(lite ? 0 : O_CREAT|O_TRUNC), 0666);
+       fd = open(fname,
+               O_RDWR|(lite ? 0 : O_CREAT|O_TRUNC)|o_direct, 0666);
        if (fd < 0) {
                prterr(fname);
                exit(91);
        }
+#ifdef XFS
        if (prealloc) {
                xfs_flock64_t   resv = { 0 };
-
+#ifdef HAVE_XFS_PLATFORM_DEFS_H
                if (!platform_test_xfs_fd(fd)) {
                        prterr(fname);
                        fprintf(stderr, "main: cannot prealloc, non XFS\n");
                        exit(96);
                }
-
+#endif
                resv.l_len = maxfilelen;
                if ((xfsctl(fname, fd, XFS_IOC_RESVSP, &resv)) < 0) {
                        prterr(fname);
                        exit(97);
                }
        }
+#endif
        strncat(goodfile, fname, 256);
        strcat (goodfile, ".fsxgood");
        fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
@@ -1047,28 +1283,36 @@ main(int argc, char **argv)
                prterr(logfile);
                exit(93);
        }
+
+#ifdef AIO
+       if (aio) 
+               aio_setup();
+#endif
+
        if (lite) {
                off_t ret;
-               file_size = maxfilelen = lseek(fd, (off_t)0, L_XTND);
+               file_size = maxfilelen = lseek(fd, (off_t)0, SEEK_END);
                if (file_size == (off_t)-1) {
                        prterr(fname);
-                       fprintf(stderr, "main: lseek eof\n");
+                       warn("main: lseek eof");
                        exit(94);
                }
                ret = lseek(fd, (off_t)0, SEEK_SET);
                if (ret == (off_t)-1) {
                        prterr(fname);
-                       fprintf(stderr, "main: lseek 0\n");
+                       warn("main: lseek 0");
                        exit(95);
                }
        }
        original_buf = (char *) malloc(maxfilelen);
        for (i = 0; i < maxfilelen; i++)
                original_buf[i] = random() % 256;
-       good_buf = (char *) malloc(maxfilelen);
-       bzero(good_buf, maxfilelen);
-       temp_buf = (char *) malloc(maxoplen);
-       bzero(temp_buf, maxoplen);
+       good_buf = (char *) malloc(maxfilelen + writebdy);
+       good_buf = round_up(good_buf, writebdy, 0);
+       memset(good_buf, '\0', maxfilelen);
+       temp_buf = (char *) malloc(maxoplen + readbdy);
+       temp_buf = round_up(temp_buf, readbdy, 0);
+       memset(temp_buf, '\0', maxoplen);
        if (lite) {     /* zero entire existing file */
                ssize_t written;
 
@@ -1076,10 +1320,12 @@ main(int argc, char **argv)
                if (written != maxfilelen) {
                        if (written == -1) {
                                prterr(fname);
-                               fprintf(stderr, "main: error on write\n");
+                               warn("main: error on write");
                        } else
-                               fprintf(stderr, "main: short write, 0x%x bytes instead of 0x%lx\n",
-                                    (unsigned)written, maxfilelen);
+                               warn("main: short write, 0x%x bytes instead "
+                                       "of 0x%lx\n",
+                                       (unsigned)written,
+                                       maxfilelen);
                        exit(98);
                }
        } else