]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
fsx: cleanup crypto library at exit
authorJason Dillaman <dillaman@redhat.com>
Tue, 28 Apr 2015 14:54:47 +0000 (10:54 -0400)
committerJason Dillaman <dillaman@redhat.com>
Fri, 17 Jul 2015 18:17:04 +0000 (14:17 -0400)
Also made small tweaks so that it can be compiled under
a C++ compiler.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
(cherry picked from commit c44f8e7fbc19924a9453d8c032c624ebb6c0296f)

src/test/Makefile-client.am
src/test/librbd/fsx.c [deleted file]
src/test/librbd/fsx.cc [new file with mode: 0644]

index e83a13107b996f5f61a2b0ac8a08d729df709b31..ca5ce803ea679c48ed3713e9029af002b5533e1b 100644 (file)
@@ -333,12 +333,11 @@ ceph_test_librbd_LDADD += $(LIBRBD_TP)
 endif
 
 if LINUX
-# Force use of C++ linker with dummy.cc - LIBKRBD is a C++ library
-ceph_test_librbd_fsx_SOURCES = test/librbd/fsx.c common/dummy.cc
+ceph_test_librbd_fsx_SOURCES = test/librbd/fsx.cc
 ceph_test_librbd_fsx_LDADD = \
        $(LIBKRBD) $(LIBRBD) $(LIBRADOS) \
        $(CRYPTO_LIBS) $(PTHREAD_LIBS) -luuid
-ceph_test_librbd_fsx_CFLAGS = ${AM_CFLAGS}
+ceph_test_librbd_fsx_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 bin_DEBUGPROGRAMS += ceph_test_librbd_fsx
 endif
 endif # WITH_RBD
diff --git a/src/test/librbd/fsx.c b/src/test/librbd/fsx.c
deleted file mode 100644 (file)
index e2ad45c..0000000
+++ /dev/null
@@ -1,2321 +0,0 @@
-// -*- mode:C; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*- 
-/*
- *     Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
- *
- *     File:   fsx.c
- *     Author: Avadis Tevanian, Jr.
- *
- *     File system exerciser. 
- *
- *     Rewritten 8/98 by Conrad Minshall.
- *
- *     Small changes to work under Linux -- davej.
- *
- *     Checks for mmap last-page zero fill.
- */
-
-#include <sys/types.h>
-#include <unistd.h>
-#include <limits.h>
-#include <time.h>
-#include <strings.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#ifdef HAVE_ERR_H
-#include <err.h>
-#endif
-#include <signal.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <assert.h>
-#include <errno.h>
-#include <math.h>
-
-#include "include/intarith.h"
-#include "include/krbd.h"
-#include "include/rados/librados.h"
-#include "include/rbd/librbd.h"
-
-#define NUMPRINTCOLUMNS 32     /* # columns of data to print on each line */
-
-/*
- *     A log entry is an operation and a bunch of arguments.
- */
-
-struct log_entry {
-       int     operation;
-       int     args[3];
-};
-
-#define        LOGSIZE 1000
-
-struct log_entry       oplog[LOGSIZE]; /* the log */
-int                    logptr = 0;     /* current position in log */
-int                    logcount = 0;   /* total ops */
-
-/*
- * The operation matrix is complex due to conditional execution of different
- * features. Hence when we come to deciding what operation to run, we need to
- * be careful in how we select the different operations. The active operations
- * are mapped to numbers as follows:
- *
- *             lite    !lite
- * READ:       0       0
- * WRITE:      1       1
- * MAPREAD:    2       2
- * MAPWRITE:   3       3
- * TRUNCATE:   -       4
- * FALLOCATE:  -       5
- * PUNCH HOLE: -       6
- *
- * When mapped read/writes are disabled, they are simply converted to normal
- * reads and writes. When fallocate/fpunch calls are disabled, they are
- * converted to OP_SKIPPED. Hence OP_SKIPPED needs to have a number higher than
- * the operation selction matrix, as does the OP_CLOSEOPEN which is an
- * operation modifier rather than an operation in itself.
- *
- * Because of the "lite" version, we also need to have different "maximum
- * operation" defines to allow the ops to be selected correctly based on the
- * mode being run.
- */
-
-/* common operations */
-#define        OP_READ         0
-#define OP_WRITE       1
-#define OP_MAPREAD     2
-#define OP_MAPWRITE    3
-#define OP_MAX_LITE    4
-
-/* !lite operations */
-#define OP_TRUNCATE    4
-#define OP_FALLOCATE   5
-#define OP_PUNCH_HOLE  6
-/* rbd-specific operations */
-#define OP_CLONE        7
-#define OP_FLATTEN     8
-#define OP_MAX_FULL    9
-
-/* operation modifiers */
-#define OP_CLOSEOPEN   100
-#define OP_SKIPPED     101
-
-#undef PAGE_SIZE
-#define PAGE_SIZE       getpagesize()
-#undef PAGE_MASK
-#define PAGE_MASK       (PAGE_SIZE - 1)
-
-char   *original_buf;                  /* a pointer to the original data */
-char   *good_buf;                      /* a pointer to the correct data */
-char   *temp_buf;                      /* a pointer to the current data */
-
-char   dirpath[1024];
-
-off_t          file_size = 0;
-off_t          biggest = 0;
-unsigned long  testcalls = 0;          /* calls to function "test" */
-
-unsigned long  simulatedopcount = 0;   /* -b flag */
-int    closeprob = 0;                  /* -c flag */
-int    debug = 0;                      /* -d flag */
-unsigned long  debugstart = 0;         /* -D flag */
-int    flush = 0;                      /* -f flag */
-int    holebdy = 1;                    /* -h flag */
-int    do_fsync = 0;                   /* -y flag */
-unsigned long  maxfilelen = 256 * 1024;        /* -l flag */
-int    sizechecks = 1;                 /* -n flag disables them */
-int    maxoplen = 64 * 1024;           /* -o flag */
-int    quiet = 0;                      /* -q flag */
-unsigned long progressinterval = 0;    /* -p flag */
-int    readbdy = 1;                    /* -r flag */
-int    style = 0;                      /* -s flag */
-int    prealloc = 0;                   /* -x flag */
-int    truncbdy = 1;                   /* -t flag */
-int    writebdy = 1;                   /* -w flag */
-long   monitorstart = -1;              /* -m flag */
-long   monitorend = -1;                /* -m flag */
-int    lite = 0;                       /* -L flag */
-long   numops = -1;                    /* -N flag */
-int    randomoplen = 1;                /* -O flag disables it */
-int    seed = 1;                       /* -S flag */
-int     mapped_writes = 0;              /* -W flag disables */
-int     fallocate_calls = 0;            /* -F flag disables */
-int     punch_hole_calls = 1;           /* -H flag disables */
-int    clone_calls = 1;                /* -C flag disables */
-int    randomize_striping = 1;         /* -U flag disables */
-int    randomize_parent_overlap = 1;
-int    mapped_reads = 0;               /* -R flag disables it */
-int    fsxgoodfd = 0;
-int    o_direct = 0;                   /* -Z flag */
-int    aio = 0;
-
-int num_clones = 0;
-
-int page_size;
-int page_mask;
-int mmap_mask;
-#ifdef AIO
-int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
-#define READ 0
-#define WRITE 1
-#define fsxread(a,b,c,d)       aio_rw(READ, a,b,c,d)
-#define fsxwrite(a,b,c,d)      aio_rw(WRITE, a,b,c,d)
-#else
-#define fsxread(a,b,c,d)       read(a,b,c)
-#define fsxwrite(a,b,c,d)      write(a,b,c)
-#endif
-
-FILE * fsxlogf = NULL;
-int badoff = -1;
-int closeopen = 0;
-
-void
-vwarnc(int code, const char *fmt, va_list ap) {
-  fprintf(stderr, "fsx: ");
-  if (fmt != NULL) {
-       vfprintf(stderr, fmt, ap);
-       fprintf(stderr, ": ");
-  }
-  fprintf(stderr, "%s\n", strerror(code));
-}
-
-void
-warn(const char * fmt, ...)  {
-       va_list ap;
-       va_start(ap, fmt);
-       vwarnc(errno, fmt, ap);
-       va_end(ap);
-}
-
-#define BUF_SIZE 1024
-
-void
-prt(char *fmt, ...)
-{
-       va_list args;
-       char buffer[BUF_SIZE];
-
-       va_start(args, fmt);
-       vsnprintf(buffer, BUF_SIZE, fmt, args);
-       va_end(args);
-       fprintf(stdout, "%s", buffer);
-       if (fsxlogf)
-               fprintf(fsxlogf, "%s", buffer);
-}
-
-void
-prterr(char *prefix)
-{
-       prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno));
-}
-
-void
-prterrcode(char *prefix, int code)
-{
-       prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(-code));
-}
-
-void
-simple_err(const char *msg, int err)
-{
-    fprintf(stderr, "%s: %s\n", msg, strerror(-err));
-}
-
-/*
- * random
- */
-
-#define RND_STATE_LEN  256
-char   rnd_state[RND_STATE_LEN];
-struct random_data rnd_data;
-
-int32_t
-get_random(void)
-{
-       int32_t val;
-
-       if (random_r(&rnd_data, &val) < 0) {
-               prterr("random_r");
-               exit(1);
-       }
-
-       return val;
-}
-
-/*
- * rbd
- */
-
-struct rbd_ctx {
-       const char *name;       /* image name */
-       rbd_image_t image;      /* image handle */
-       const char *krbd_name;  /* image /dev/rbd<id> name */
-       int krbd_fd;            /* image /dev/rbd<id> fd */
-};
-
-#define RBD_CTX_INIT   (struct rbd_ctx) { NULL, NULL, NULL, -1 }
-
-struct rbd_operations {
-       int (*open)(const char *name, struct rbd_ctx *ctx);
-       int (*close)(struct rbd_ctx *ctx);
-       ssize_t (*read)(struct rbd_ctx *ctx, uint64_t off, size_t len, void *buf);
-       ssize_t (*write)(struct rbd_ctx *ctx, uint64_t off, size_t len, void *buf);
-       int (*flush)(struct rbd_ctx *ctx);
-       int (*discard)(struct rbd_ctx *ctx, uint64_t off, uint64_t len);
-       int (*get_size)(struct rbd_ctx *ctx, uint64_t *size);
-       int (*resize)(struct rbd_ctx *ctx, uint64_t size);
-       int (*clone)(struct rbd_ctx *ctx, const char *src_snapname,
-                    const char *dst_imagename, int *order, int stripe_unit,
-                    int stripe_count);
-       int (*flatten)(struct rbd_ctx *ctx);
-};
-
-char *pool;                    /* name of the pool our test image is in */
-char *iname;                   /* name of our test image */
-rados_t cluster;               /* handle for our test cluster */
-rados_ioctx_t ioctx;           /* handle for our test pool */
-struct krbd_ctx *krbd;         /* handle for libkrbd */
-
-/*
- * librbd/krbd rbd_operations handlers.  Given the rest of fsx.c, no
- * attempt to do error handling is made in these handlers.
- */
-
-int
-__librbd_open(const char *name, struct rbd_ctx *ctx)
-{
-       rbd_image_t image;
-       int ret;
-
-       assert(!ctx->name && !ctx->image &&
-              !ctx->krbd_name && ctx->krbd_fd < 0);
-
-       ret = rbd_open(ioctx, name, &image, NULL);
-       if (ret < 0) {
-               prt("rbd_open(%s) failed\n", name);
-               return ret;
-       }
-
-       ctx->name = strdup(name);
-       ctx->image = image;
-       ctx->krbd_name = NULL;
-       ctx->krbd_fd = -1;
-
-       return 0;
-}
-
-int
-librbd_open(const char *name, struct rbd_ctx *ctx)
-{
-       return __librbd_open(name, ctx);
-}
-
-int
-__librbd_close(struct rbd_ctx *ctx)
-{
-       int ret;
-
-       assert(ctx->name && ctx->image);
-
-       ret = rbd_close(ctx->image);
-       if (ret < 0) {
-               prt("rbd_close(%s) failed\n", ctx->name);
-               return ret;
-       }
-
-       free((void *)ctx->name);
-
-       ctx->name = NULL;
-       ctx->image = NULL;
-
-       return 0;
-}
-
-int
-librbd_close(struct rbd_ctx *ctx)
-{
-       return __librbd_close(ctx);
-}
-
-int
-librbd_verify_object_map(struct rbd_ctx *ctx)
-{
-       int n;
-       uint64_t flags;
-       n = rbd_get_flags(ctx->image, &flags);
-       if (n < 0) {
-               prt("rbd_get_flags() failed\n");
-               return n;
-       }
-
-       if ((flags & RBD_FLAG_OBJECT_MAP_INVALID) != 0) {
-               prt("rbd_get_flags() indicates object map is invalid\n");
-               return -EINVAL;
-       }
-       return 0;
-}
-
-ssize_t
-librbd_read(struct rbd_ctx *ctx, uint64_t off, size_t len, void *buf)
-{
-       ssize_t n;
-
-       n = rbd_read(ctx->image, off, len, buf);
-       if (n < 0)
-               prt("rbd_read(%llu, %zu) failed\n", off, len);
-
-       return n;
-}
-
-ssize_t
-librbd_write(struct rbd_ctx *ctx, uint64_t off, size_t len, void *buf)
-{
-       ssize_t n;
-       int ret;
-
-       n = rbd_write(ctx->image, off, len, buf);
-       if (n < 0) {
-               prt("rbd_write(%llu, %zu) failed\n", off, len);
-               return n;
-       }
-
-       ret = librbd_verify_object_map(ctx);
-       if (ret < 0) {
-               return ret;
-       }
-       return n;
-}
-
-int
-librbd_flush(struct rbd_ctx *ctx)
-{
-       int ret;
-
-       ret = rbd_flush(ctx->image);
-       if (ret < 0) {
-               prt("rbd_flush failed\n");
-               return ret;
-       }
-
-       return librbd_verify_object_map(ctx);
-}
-
-int
-librbd_discard(struct rbd_ctx *ctx, uint64_t off, uint64_t len)
-{
-       int ret;
-
-       ret = rbd_discard(ctx->image, off, len);
-       if (ret < 0) {
-               prt("rbd_discard(%llu, %llu) failed\n", off, len);
-               return ret;
-       }
-
-       return librbd_verify_object_map(ctx);
-}
-
-int
-librbd_get_size(struct rbd_ctx *ctx, uint64_t *size)
-{
-       rbd_image_info_t info;
-       int ret;
-
-       ret = rbd_stat(ctx->image, &info, sizeof(info));
-       if (ret < 0) {
-               prt("rbd_stat failed\n");
-               return ret;
-       }
-
-       *size = info.size;
-
-       return 0;
-}
-
-int
-__librbd_resize(struct rbd_ctx *ctx, uint64_t size)
-{
-       int ret;
-
-       ret = rbd_resize(ctx->image, size);
-       if (ret < 0) {
-               prt("rbd_resize(%llu) failed\n", size);
-               return ret;
-       }
-
-       return librbd_verify_object_map(ctx);
-}
-
-int
-librbd_resize(struct rbd_ctx *ctx, uint64_t size)
-{
-       return __librbd_resize(ctx, size);
-}
-
-int
-__librbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
-              const char *dst_imagename, int *order, int stripe_unit,
-              int stripe_count, bool krbd)
-{
-       int ret;
-
-       ret = rbd_snap_create(ctx->image, src_snapname);
-       if (ret < 0) {
-               prt("rbd_snap_create(%s@%s) failed\n", ctx->name,
-                   src_snapname);
-               return ret;
-       }
-
-       ret = rbd_snap_protect(ctx->image, src_snapname);
-       if (ret < 0) {
-               prt("rbd_snap_protect(%s@%s) failed\n", ctx->name,
-                   src_snapname);
-               return ret;
-       }
-
-       uint64_t features = RBD_FEATURES_ALL;
-       if (krbd) {
-               features &= ~(RBD_FEATURE_EXCLUSIVE_LOCK |
-                             RBD_FEATURE_OBJECT_MAP);
-       }
-       ret = rbd_clone2(ioctx, ctx->name, src_snapname, ioctx,
-                        dst_imagename, features, order,
-                        stripe_unit, stripe_count);
-       if (ret < 0) {
-               prt("rbd_clone2(%s@%s -> %s) failed\n", ctx->name,
-                   src_snapname, dst_imagename);
-               return ret;
-       }
-
-       return 0;
-}
-
-int
-librbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
-            const char *dst_imagename, int *order, int stripe_unit,
-            int stripe_count)
-{
-       return __librbd_clone(ctx, src_snapname, dst_imagename, order,
-                             stripe_unit, stripe_count, false);
-}
-
-int
-__librbd_flatten(struct rbd_ctx *ctx)
-{
-       int ret;
-
-       ret = rbd_flatten(ctx->image);
-       if (ret < 0) {
-               prt("rbd_flatten failed\n");
-               return ret;
-       }
-
-       return librbd_verify_object_map(ctx);
-}
-
-int
-librbd_flatten(struct rbd_ctx *ctx)
-{
-       return __librbd_flatten(ctx);
-}
-
-const struct rbd_operations librbd_operations = {
-       .open   = librbd_open,
-       .close  = librbd_close,
-       .read   = librbd_read,
-       .write  = librbd_write,
-       .flush  = librbd_flush,
-       .discard = librbd_discard,
-       .get_size = librbd_get_size,
-       .resize = librbd_resize,
-       .clone  = librbd_clone,
-       .flatten = librbd_flatten,
-};
-
-int
-krbd_open(const char *name, struct rbd_ctx *ctx)
-{
-       char *devnode;
-       int fd;
-       int ret;
-
-       ret = __librbd_open(name, ctx);
-       if (ret < 0)
-               return ret;
-
-       ret = krbd_map(krbd, pool, name, NULL, NULL, &devnode);
-       if (ret < 0) {
-               prt("krbd_map(%s) failed\n", name);
-               return ret;
-       }
-
-       fd = open(devnode, O_RDWR | o_direct);
-       if (fd < 0) {
-               ret = -errno;
-               prt("open(%s) failed\n", devnode);
-               return ret;
-       }
-
-       ctx->krbd_name = devnode;
-       ctx->krbd_fd = fd;
-
-       return 0;
-}
-
-int
-krbd_close(struct rbd_ctx *ctx)
-{
-       int ret;
-
-       assert(ctx->krbd_name && ctx->krbd_fd >= 0);
-
-       if (close(ctx->krbd_fd) < 0) {
-               ret = -errno;
-               prt("close(%s) failed\n", ctx->krbd_name);
-               return ret;
-       }
-
-       ret = krbd_unmap(krbd, ctx->krbd_name);
-       if (ret < 0) {
-               prt("krbd_unmap(%s) failed\n", ctx->krbd_name);
-               return ret;
-       }
-
-       free((void *)ctx->krbd_name);
-
-       ctx->krbd_name = NULL;
-       ctx->krbd_fd = -1;
-
-       return __librbd_close(ctx);
-}
-
-ssize_t
-krbd_read(struct rbd_ctx *ctx, uint64_t off, size_t len, void *buf)
-{
-       ssize_t n;
-
-       n = pread(ctx->krbd_fd, buf, len, off);
-       if (n < 0) {
-               n = -errno;
-               prt("pread(%llu, %zu) failed\n", off, len);
-               return n;
-       }
-
-       return n;
-}
-
-ssize_t
-krbd_write(struct rbd_ctx *ctx, uint64_t off, size_t len, void *buf)
-{
-       ssize_t n;
-
-       n = pwrite(ctx->krbd_fd, buf, len, off);
-       if (n < 0) {
-               n = -errno;
-               prt("pwrite(%llu, %zu) failed\n", off, len);
-               return n;
-       }
-
-       return n;
-}
-
-int
-__krbd_flush(struct rbd_ctx *ctx)
-{
-       int ret;
-
-       if (o_direct)
-               return 0;
-
-       /*
-        * fsync(2) on the block device does not sync the filesystem
-        * mounted on top of it, but that's OK - we control the entire
-        * lifetime of the block device and write directly to it.
-        */
-       if (fsync(ctx->krbd_fd) < 0) {
-               ret = -errno;
-               prt("fsync failed\n");
-               return ret;
-       }
-
-       return 0;
-}
-
-int
-krbd_flush(struct rbd_ctx *ctx)
-{
-       return __krbd_flush(ctx);
-}
-
-int
-krbd_discard(struct rbd_ctx *ctx, uint64_t off, uint64_t len)
-{
-       uint64_t range[2] = { off, len };
-       int ret;
-
-       /*
-        * off and len must be 512-byte aligned, otherwise BLKDISCARD
-        * will fail with -EINVAL.  This means that -K (enable krbd
-        * mode) requires -h 512 or similar.
-        */
-       if (ioctl(ctx->krbd_fd, BLKDISCARD, &range) < 0) {
-               ret = -errno;
-               prt("BLKDISCARD(%llu, %llu) failed\n", off, len);
-               return ret;
-       }
-
-       return 0;
-}
-
-int
-krbd_get_size(struct rbd_ctx *ctx, uint64_t *size)
-{
-       uint64_t bytes;
-       int ret;
-
-       if (ioctl(ctx->krbd_fd, BLKGETSIZE64, &bytes) < 0) {
-               ret = -errno;
-               prt("BLKGETSIZE64 failed\n");
-               return ret;
-       }
-
-       *size = bytes;
-
-       return 0;
-}
-
-int
-krbd_resize(struct rbd_ctx *ctx, uint64_t size)
-{
-       int ret;
-
-       assert(size % truncbdy == 0);
-
-       /*
-        * This is essential: when krbd detects a size change, it calls
-        * revalidate_disk(), which ends up calling invalidate_bdev(),
-        * which invalidates only clean buffers.  The cache flush makes
-        * it invalidate everything, which is what we need if we are
-        * shrinking.
-        */
-       ret = __krbd_flush(ctx);
-       if (ret < 0)
-               return ret;
-
-       return __librbd_resize(ctx, size);
-}
-
-int
-krbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
-          const char *dst_imagename, int *order, int stripe_unit,
-          int stripe_count)
-{
-       int ret;
-
-       ret = __krbd_flush(ctx);
-       if (ret < 0)
-               return ret;
-
-       return __librbd_clone(ctx, src_snapname, dst_imagename, order,
-                             stripe_unit, stripe_count, true);
-}
-
-int
-krbd_flatten(struct rbd_ctx *ctx)
-{
-       int ret;
-
-       ret = __krbd_flush(ctx);
-       if (ret < 0)
-               return ret;
-
-       return __librbd_flatten(ctx);
-}
-
-const struct rbd_operations krbd_operations = {
-       .open   = krbd_open,
-       .close  = krbd_close,
-       .read   = krbd_read,
-       .write  = krbd_write,
-       .flush  = krbd_flush,
-       .discard = krbd_discard,
-       .get_size = krbd_get_size,
-       .resize = krbd_resize,
-       .clone  = krbd_clone,
-       .flatten = krbd_flatten,
-};
-
-struct rbd_ctx ctx = RBD_CTX_INIT;
-const struct rbd_operations *ops = &librbd_operations;
-
-static bool rbd_image_has_parent(struct rbd_ctx *ctx)
-{
-       int ret;
-
-       ret = rbd_get_parent_info(ctx->image, NULL, 0, NULL, 0, NULL, 0);
-       if (ret < 0 && ret != -ENOENT) {
-               prterrcode("rbd_get_parent_info", ret);
-               exit(1);
-       }
-
-       return !ret;
-}
-
-/*
- * fsx
- */
-
-void
-log4(int operation, int arg0, int arg1, int arg2)
-{
-       struct log_entry *le;
-
-       le = &oplog[logptr];
-       le->operation = operation;
-       if (closeopen)
-               le->operation = ~ le->operation;
-       le->args[0] = arg0;
-       le->args[1] = arg1;
-       le->args[2] = arg2;
-       logptr++;
-       logcount++;
-       if (logptr >= LOGSIZE)
-               logptr = 0;
-}
-
-void
-logdump(void)
-{
-       int     i, count, down;
-       struct log_entry        *lp;
-       char *falloc_type[3] = {"PAST_EOF", "EXTENDING", "INTERIOR"};
-
-       prt("LOG DUMP (%d total operations):\n", logcount);
-       if (logcount < LOGSIZE) {
-               i = 0;
-               count = logcount;
-       } else {
-               i = logptr;
-               count = LOGSIZE;
-       }
-       for ( ; count > 0; count--) {
-               int opnum;
-
-               opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE;
-               prt("%d(%3d mod 256): ", opnum, opnum%256);
-               lp = &oplog[i];
-               if ((closeopen = lp->operation < 0))
-                       lp->operation = ~ lp->operation;
-                       
-               switch (lp->operation) {
-               case OP_MAPREAD:
-                       prt("MAPREAD  0x%x thru 0x%x\t(0x%x bytes)",
-                           lp->args[0], lp->args[0] + lp->args[1] - 1,
-                           lp->args[1]);
-                       if (badoff >= lp->args[0] && badoff <
-                                                    lp->args[0] + lp->args[1])
-                               prt("\t***RRRR***");
-                       break;
-               case OP_MAPWRITE:
-                       prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)",
-                           lp->args[0], lp->args[0] + lp->args[1] - 1,
-                           lp->args[1]);
-                       if (badoff >= lp->args[0] && badoff <
-                                                    lp->args[0] + lp->args[1])
-                               prt("\t******WWWW");
-                       break;
-               case OP_READ:
-                       prt("READ     0x%x thru 0x%x\t(0x%x bytes)",
-                           lp->args[0], lp->args[0] + lp->args[1] - 1,
-                           lp->args[1]);
-                       if (badoff >= lp->args[0] &&
-                           badoff < lp->args[0] + lp->args[1])
-                               prt("\t***RRRR***");
-                       break;
-               case OP_WRITE:
-                       prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
-                           lp->args[0], lp->args[0] + lp->args[1] - 1,
-                           lp->args[1]);
-                       if (lp->args[0] > lp->args[2])
-                               prt(" HOLE");
-                       else if (lp->args[0] + lp->args[1] > lp->args[2])
-                               prt(" EXTEND");
-                       if ((badoff >= lp->args[0] || badoff >=lp->args[2]) &&
-                           badoff < lp->args[0] + lp->args[1])
-                               prt("\t***WWWW");
-                       break;
-               case OP_TRUNCATE:
-                       down = lp->args[0] < lp->args[1];
-                       prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
-                           down ? "DOWN" : "UP", lp->args[1], lp->args[0]);
-                       if (badoff >= lp->args[!down] &&
-                           badoff < lp->args[!!down])
-                               prt("\t******WWWW");
-                       break;
-               case OP_FALLOCATE:
-                       /* 0: offset 1: length 2: where alloced */
-                       prt("FALLOC   0x%x thru 0x%x\t(0x%x bytes) %s",
-                               lp->args[0], lp->args[0] + lp->args[1],
-                               lp->args[1], falloc_type[lp->args[2]]);
-                       if (badoff >= lp->args[0] &&
-                           badoff < lp->args[0] + lp->args[1])
-                               prt("\t******FFFF");
-                       break;
-               case OP_PUNCH_HOLE:
-                       prt("PUNCH    0x%x thru 0x%x\t(0x%x bytes)",
-                           lp->args[0], lp->args[0] + lp->args[1] - 1,
-                           lp->args[1]);
-                       if (badoff >= lp->args[0] && badoff <
-                                                    lp->args[0] + lp->args[1])
-                               prt("\t******PPPP");
-                       break;
-               case OP_CLONE:
-                       prt("CLONE");
-                       break;
-               case OP_FLATTEN:
-                       prt("FLATTEN");
-                       break;
-               case OP_SKIPPED:
-                       prt("SKIPPED (no operation)");
-                       break;
-               default:
-                       prt("BOGUS LOG ENTRY (operation code = %d)!",
-                           lp->operation);
-               }
-               if (closeopen)
-                       prt("\n\t\tCLOSE/OPEN");
-               prt("\n");
-               i++;
-               if (i == LOGSIZE)
-                       i = 0;
-       }
-}
-
-void
-save_buffer(char *buffer, off_t bufferlength, int fd)
-{
-       off_t ret;
-       ssize_t byteswritten;
-
-       if (fd <= 0 || bufferlength == 0)
-               return;
-
-       if (bufferlength > SSIZE_MAX) {
-               prt("fsx flaw: overflow in save_buffer\n");
-               exit(67);
-       }
-
-       ret = lseek(fd, (off_t)0, SEEK_SET);
-       if (ret == (off_t)-1)
-               prterr("save_buffer: lseek 0");
-       
-       byteswritten = write(fd, buffer, (size_t)bufferlength);
-       if (byteswritten != bufferlength) {
-               if (byteswritten == -1)
-                       prterr("save_buffer write");
-               else
-                       warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
-                            (unsigned)byteswritten,
-                            (unsigned long long)bufferlength);
-       }
-}
-
-
-void
-report_failure(int status)
-{
-       logdump();
-       
-       if (fsxgoodfd) {
-               if (good_buf) {
-                       save_buffer(good_buf, file_size, fsxgoodfd);
-                       prt("Correct content saved for comparison\n");
-                       prt("(maybe hexdump \"%s\" vs \"%s.fsxgood\")\n",
-                           iname, iname);
-               }
-               close(fsxgoodfd);
-       }
-       sleep(3);   // so the log can flush to disk.  KLUDGEY!
-       exit(status);
-}
-
-#define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
-                                       *(((unsigned char *)(cp)) + 1)))
-
-void
-check_buffers(char *good_buf, char *temp_buf, unsigned offset, unsigned size)
-{
-       unsigned char c, t;
-       unsigned i = 0;
-       unsigned n = 0;
-       unsigned op = 0;
-       unsigned bad = 0;
-
-       if (memcmp(good_buf + offset, temp_buf, size) != 0) {
-               prt("READ BAD DATA: offset = 0x%x, size = 0x%x, fname = %s\n",
-                   offset, size, iname);
-               prt("OFFSET\tGOOD\tBAD\tRANGE\n");
-               while (size > 0) {
-                       c = good_buf[offset];
-                       t = temp_buf[i];
-                       if (c != t) {
-                               if (n < 16) {
-                                       bad = short_at(&temp_buf[i]);
-                                       prt("0x%5x\t0x%04x\t0x%04x", offset,
-                                           short_at(&good_buf[offset]), bad);
-                                       op = temp_buf[offset & 1 ? i+1 : i];
-                                       prt("\t0x%5x\n", n);
-                                       if (op)
-                                               prt("operation# (mod 256) for "
-                                                 "the bad data may be %u\n",
-                                               ((unsigned)op & 0xff));
-                                       else
-                                               prt("operation# (mod 256) for "
-                                                 "the bad data unknown, check"
-                                                 " HOLE and EXTEND ops\n");
-                               }
-                               n++;
-                               badoff = offset;
-                       }
-                       offset++;
-                       i++;
-                       size--;
-               }
-               report_failure(110);
-       }
-}
-
-
-void
-check_size(void)
-{
-       uint64_t size;
-       int ret;
-
-       ret = ops->get_size(&ctx, &size);
-       if (ret < 0)
-               prterrcode("check_size: ops->get_size", ret);
-
-       if ((uint64_t)file_size != size) {
-               prt("Size error: expected 0x%llx stat 0x%llx\n",
-                   (unsigned long long)file_size,
-                   (unsigned long long)size);
-               report_failure(120);
-       }
-}
-
-#define TRUNC_HACK_SIZE        (200ULL << 9)   /* 512-byte aligned for krbd */
-
-void
-check_trunc_hack(void)
-{
-       uint64_t size;
-       int ret;
-
-       ret = ops->resize(&ctx, 0ULL);
-       if (ret < 0)
-               prterrcode("check_trunc_hack: ops->resize pre", ret);
-
-       ret = ops->resize(&ctx, TRUNC_HACK_SIZE);
-       if (ret < 0)
-               prterrcode("check_trunc_hack: ops->resize actual", ret);
-
-       ret = ops->get_size(&ctx, &size);
-       if (ret < 0)
-               prterrcode("check_trunc_hack: ops->get_size", ret);
-
-       if (size != TRUNC_HACK_SIZE) {
-               prt("no extend on truncate! not posix!\n");
-               exit(130);
-       }
-
-       ret = ops->resize(&ctx, 0ULL);
-       if (ret < 0)
-               prterrcode("check_trunc_hack: ops->resize post", ret);
-}
-
-int
-create_image()
-{
-       int r;
-       int order = 0;
-
-       r = rados_create(&cluster, NULL);
-       if (r < 0) {
-               simple_err("Could not create cluster handle", r);
-               return r;
-       }
-       rados_conf_parse_env(cluster, NULL);
-       r = rados_conf_read_file(cluster, NULL);
-       if (r < 0) {
-               simple_err("Error reading ceph config file", r);
-               goto failed_shutdown;
-       }
-       r = rados_connect(cluster);
-       if (r < 0) {
-               simple_err("Error connecting to cluster", r);
-               goto failed_shutdown;
-       }
-       r = krbd_create_from_context(rados_cct(cluster), &krbd);
-       if (r < 0) {
-               simple_err("Could not create libkrbd handle", r);
-               goto failed_shutdown;
-       }
-
-       r = rados_pool_create(cluster, pool);
-       if (r < 0 && r != -EEXIST) {
-               simple_err("Error creating pool", r);
-               goto failed_krbd;
-       }
-       r = rados_ioctx_create(cluster, pool, &ioctx);
-       if (r < 0) {
-               simple_err("Error creating ioctx", r);
-               goto failed_krbd;
-       }
-       if (clone_calls) {
-               r = rbd_create2(ioctx, iname, 0, RBD_FEATURE_LAYERING, &order);
-       } else {
-               r = rbd_create(ioctx, iname, 0, &order);
-       }
-       if (r < 0) {
-               simple_err("Error creating image", r);
-               goto failed_open;
-       }
-
-       return 0;
-
- failed_open:
-       rados_ioctx_destroy(ioctx);
- failed_krbd:
-       krbd_destroy(krbd);
- failed_shutdown:
-       rados_shutdown(cluster);
-       return r;
-}
-
-void
-doflush(unsigned offset, unsigned size)
-{
-       int ret;
-
-       if (o_direct)
-               return;
-
-       ret = ops->flush(&ctx);
-       if (ret < 0)
-               prterrcode("doflush: ops->flush", ret);
-}
-
-void
-doread(unsigned offset, unsigned size)
-{
-       int ret;
-
-       offset -= offset % readbdy;
-       if (o_direct)
-               size -= size % readbdy;
-       if (size == 0) {
-               if (!quiet && testcalls > simulatedopcount && !o_direct)
-                       prt("skipping zero size read\n");
-               log4(OP_SKIPPED, OP_READ, offset, size);
-               return;
-       }
-       if (size + offset > file_size) {
-               if (!quiet && testcalls > simulatedopcount)
-                       prt("skipping seek/read past end of file\n");
-               log4(OP_SKIPPED, OP_READ, offset, size);
-               return;
-       }
-
-       log4(OP_READ, offset, size, 0);
-
-       if (testcalls <= simulatedopcount)
-               return;
-
-       if (!quiet &&
-               ((progressinterval && testcalls % progressinterval == 0)  ||
-               (debug &&
-                      (monitorstart == -1 ||
-                       (offset + size > monitorstart &&
-                       (monitorend == -1 || offset <= monitorend))))))
-               prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
-                   offset, offset + size - 1, size);
-
-       ret = ops->read(&ctx, offset, size, temp_buf);
-       if (ret != (int)size) {
-               if (ret < 0)
-                       prterrcode("doread: ops->read", ret);
-               else
-                       prt("short read: 0x%x bytes instead of 0x%x\n",
-                           ret, size);
-               report_failure(141);
-       }
-
-       check_buffers(good_buf, temp_buf, offset, size);
-}
-
-
-void
-check_eofpage(char *s, unsigned offset, char *p, int size)
-{
-       unsigned long last_page, should_be_zero;
-
-       if (offset + size <= (file_size & ~page_mask))
-               return;
-       /*
-        * we landed in the last page of the file
-        * test to make sure the VM system provided 0's 
-        * beyond the true end of the file mapping
-        * (as required by mmap def in 1996 posix 1003.1)
-        */
-       last_page = ((unsigned long)p + (offset & page_mask) + size) & ~page_mask;
-
-       for (should_be_zero = last_page + (file_size & page_mask);
-            should_be_zero < last_page + page_size;
-            should_be_zero++)
-               if (*(char *)should_be_zero) {
-                       prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
-                           s, file_size - 1, should_be_zero & page_mask,
-                           short_at(should_be_zero));
-                       report_failure(205);
-               }
-}
-
-
-void
-gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
-{
-       while (size--) {
-               good_buf[offset] = testcalls % 256; 
-               if (offset % 2)
-                       good_buf[offset] += original_buf[offset];
-               offset++;
-       }
-}
-
-
-void
-dowrite(unsigned offset, unsigned size)
-{
-       ssize_t ret;
-       off_t newsize;
-
-       offset -= offset % writebdy;
-       if (o_direct)
-               size -= size % writebdy;
-       if (size == 0) {
-               if (!quiet && testcalls > simulatedopcount && !o_direct)
-                       prt("skipping zero size write\n");
-               log4(OP_SKIPPED, OP_WRITE, offset, size);
-               return;
-       }
-
-       log4(OP_WRITE, offset, size, file_size);
-
-       gendata(original_buf, good_buf, offset, size);
-       if (file_size < offset + size) {
-               newsize = ceil(((double)offset + size) / truncbdy) * truncbdy;
-               if (file_size < newsize)
-                       memset(good_buf + file_size, '\0', newsize - file_size);
-               file_size = newsize;
-               if (lite) {
-                       warn("Lite file size bug in fsx!");
-                       report_failure(149);
-               }
-               ret = ops->resize(&ctx, newsize);
-               if (ret < 0) {
-                       prterrcode("dowrite: ops->resize", ret);
-                       report_failure(150);
-               }
-       }
-
-       if (testcalls <= simulatedopcount)
-               return;
-
-       if (!quiet &&
-               ((progressinterval && testcalls % progressinterval == 0) ||
-                      (debug &&
-                      (monitorstart == -1 ||
-                       (offset + size > monitorstart &&
-                       (monitorend == -1 || offset <= monitorend))))))
-               prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
-                   offset, offset + size - 1, size);
-
-       ret = ops->write(&ctx, offset, size, good_buf + offset);
-       if (ret != size) {
-               if (ret < 0)
-                       prterrcode("dowrite: ops->write", ret);
-               else
-                       prt("short write: 0x%x bytes instead of 0x%x\n",
-                           ret, size);
-               report_failure(151);
-       }
-
-       if (flush)
-               doflush(offset, size);
-}
-
-
-void
-dotruncate(unsigned size)
-{
-       int oldsize = file_size;
-       int ret;
-
-       size -= size % truncbdy;
-       if (size > biggest) {
-               biggest = size;
-               if (!quiet && testcalls > simulatedopcount)
-                       prt("truncating to largest ever: 0x%x\n", size);
-       }
-
-       log4(OP_TRUNCATE, size, (unsigned)file_size, 0);
-
-       if (size > file_size)
-               memset(good_buf + file_size, '\0', size - file_size);
-       else if (size < file_size)
-               memset(good_buf + size, '\0', file_size - size);
-       file_size = size;
-
-       if (testcalls <= simulatedopcount)
-               return;
-
-       if ((progressinterval && testcalls % progressinterval == 0) ||
-           (debug && (monitorstart == -1 || monitorend == -1 ||
-                     size <= monitorend)))
-               prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
-
-       ret = ops->resize(&ctx, size);
-       if (ret < 0) {
-               prterrcode("dotruncate: ops->resize", ret);
-               report_failure(160);
-       }
-}
-
-void
-do_punch_hole(unsigned offset, unsigned length)
-{
-       unsigned end_offset;
-       int max_offset = 0;
-       int max_len = 0;
-       int ret;
-
-       offset -= offset % holebdy;
-       length -= length % holebdy;
-       if (length == 0) {
-               if (!quiet && testcalls > simulatedopcount)
-                       prt("skipping zero length punch hole\n");
-               log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
-               return;
-       }
-
-       if (file_size <= (loff_t)offset) {
-               if (!quiet && testcalls > simulatedopcount)
-                       prt("skipping hole punch off the end of the file\n");
-               log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
-               return;
-       }
-
-       end_offset = offset + length;
-
-       log4(OP_PUNCH_HOLE, offset, length, 0);
-
-       if (testcalls <= simulatedopcount)
-               return;
-
-       if ((progressinterval && testcalls % progressinterval == 0) ||
-           (debug && (monitorstart == -1 || monitorend == -1 ||
-                     end_offset <= monitorend))) {
-               prt("%lu punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
-                       offset, offset+length, length);
-       }
-
-       ret = ops->discard(&ctx, (unsigned long long)offset,
-                          (unsigned long long)length);
-       if (ret < 0) {
-               prterrcode("do_punch_hole: ops->discard", ret);
-               report_failure(161);
-       }
-
-       max_offset = offset < file_size ? offset : file_size;
-       max_len = max_offset + length <= file_size ? length :
-                       file_size - max_offset;
-       memset(good_buf + max_offset, '\0', max_len);
-}
-
-void clone_filename(char *buf, size_t len, int clones)
-{
-       snprintf(buf, len, "%s/fsx-%s-parent%d",
-                dirpath, iname, clones);
-}
-
-void clone_imagename(char *buf, size_t len, int clones)
-{
-       if (clones > 0)
-               snprintf(buf, len, "%s-clone%d", iname, clones);
-       else
-               strncpy(buf, iname, len);
-}
-
-void check_clone(int clonenum);
-
-void
-do_clone()
-{
-       char filename[1024];
-       char imagename[1024];
-       char lastimagename[1024];
-       int ret, fd;
-       int order = 0, stripe_unit = 0, stripe_count = 0;
-       uint64_t newsize = file_size;
-
-       log4(OP_CLONE, 0, 0, 0);
-       ++num_clones;
-
-       if (randomize_striping) {
-               order = 18 + get_random() % 8;
-               stripe_unit = 1ull << (order - 1 - (get_random() % 8));
-               stripe_count = 2 + get_random() % 14;
-       }
-
-       prt("%lu clone\t%d order %d su %d sc %d\n", testcalls, num_clones,
-           order, stripe_unit, stripe_count);
-
-       clone_imagename(imagename, sizeof(imagename), num_clones);
-       clone_imagename(lastimagename, sizeof(lastimagename),
-                       num_clones - 1);
-       assert(strcmp(lastimagename, ctx.name) == 0);
-
-       ret = ops->clone(&ctx, "snap", imagename, &order, stripe_unit,
-                        stripe_count);
-       if (ret < 0) {
-               prterrcode("do_clone: ops->clone", ret);
-               exit(165);
-       }
-
-       if (randomize_parent_overlap && rbd_image_has_parent(&ctx)) {
-               int rand = get_random() % 16 + 1; // [1..16]
-
-               if (rand < 13) {
-                       uint64_t overlap;
-
-                       ret = rbd_get_overlap(ctx.image, &overlap);
-                       if (ret < 0) {
-                               prterrcode("do_clone: rbd_get_overlap", ret);
-                               exit(1);
-                       }
-
-                       if (rand < 10) {        // 9/16
-                               newsize = overlap * ((double)rand / 10);
-                               newsize -= newsize % truncbdy;
-                       } else {                // 3/16
-                               newsize = 0;
-                       }
-
-                       assert(newsize != (uint64_t)file_size);
-                       prt("truncating image %s from 0x%llx (overlap 0x%llx) to 0x%llx\n",
-                           ctx.name, file_size, overlap, newsize);
-
-                       ret = ops->resize(&ctx, newsize);
-                       if (ret < 0) {
-                               prterrcode("do_clone: ops->resize", ret);
-                               exit(1);
-                       }
-               } else if (rand < 15) {         // 2/16
-                       prt("flattening image %s\n", ctx.name);
-
-                       ret = ops->flatten(&ctx);
-                       if (ret < 0) {
-                               prterrcode("do_clone: ops->flatten", ret);
-                               exit(1);
-                       }
-               } else {                        // 2/16
-                       prt("leaving image %s intact\n", ctx.name);
-               }
-       }
-
-       clone_filename(filename, sizeof(filename), num_clones);
-       if ((fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0) {
-               simple_err("do_clone: open", -errno);
-               exit(162);
-       }
-       save_buffer(good_buf, newsize, fd);
-       if ((ret = close(fd)) < 0) {
-               simple_err("do_clone: close", -errno);
-               exit(163);
-       }
-
-       /*
-        * Close parent.
-        */
-       if ((ret = ops->close(&ctx)) < 0) {
-               prterrcode("do_clone: ops->close", ret);
-               exit(174);
-       }
-
-       /*
-        * Open freshly made clone.
-        */
-       if ((ret = ops->open(imagename, &ctx)) < 0) {
-               prterrcode("do_clone: ops->open", ret);
-               exit(166);
-       }
-
-       if (num_clones > 1)
-               check_clone(num_clones - 2);
-}
-
-void
-check_clone(int clonenum)
-{
-       char filename[128];
-       char imagename[128];
-       int ret, fd;
-       struct rbd_ctx cur_ctx = RBD_CTX_INIT;
-       struct stat file_info;
-       char *good_buf, *temp_buf;
-
-       clone_imagename(imagename, sizeof(imagename), clonenum);
-       if ((ret = ops->open(imagename, &cur_ctx)) < 0) {
-               prterrcode("check_clone: ops->open", ret);
-               exit(167);
-       }
-
-       clone_filename(filename, sizeof(filename), clonenum + 1);
-       if ((fd = open(filename, O_RDONLY)) < 0) {
-               simple_err("check_clone: open", -errno);
-               exit(168);
-       }
-
-       prt("checking clone #%d, image %s against file %s\n",
-           clonenum, imagename, filename);
-       if ((ret = fstat(fd, &file_info)) < 0) {
-               simple_err("check_clone: fstat", -errno);
-               exit(169);
-       }
-
-       good_buf = NULL;
-       ret = posix_memalign((void **)&good_buf,
-                            MAX(writebdy, (int)sizeof(void *)),
-                            file_info.st_size);
-       if (ret > 0) {
-               prterrcode("check_clone: posix_memalign(good_buf)", -ret);
-               exit(96);
-       }
-
-       temp_buf = NULL;
-       ret = posix_memalign((void **)&temp_buf,
-                            MAX(readbdy, (int)sizeof(void *)),
-                            file_info.st_size);
-       if (ret > 0) {
-               prterrcode("check_clone: posix_memalign(temp_buf)", -ret);
-               exit(97);
-       }
-
-       if ((ret = pread(fd, good_buf, file_info.st_size, 0)) < 0) {
-               simple_err("check_clone: pread", -errno);
-               exit(170);
-       }
-       if ((ret = ops->read(&cur_ctx, 0, file_info.st_size, temp_buf)) < 0) {
-               prterrcode("check_clone: ops->read", ret);
-               exit(171);
-       }
-       close(fd);
-       if ((ret = ops->close(&cur_ctx)) < 0) {
-               prterrcode("check_clone: ops->close", ret);
-               exit(174);
-       }
-       check_buffers(good_buf, temp_buf, 0, file_info.st_size);
-
-       unlink(filename);
-
-       free(good_buf);
-       free(temp_buf);
-}
-
-void
-writefileimage()
-{
-       ssize_t ret;
-
-       ret = ops->write(&ctx, 0, file_size, good_buf);
-       if (ret != file_size) {
-               if (ret < 0)
-                       prterrcode("writefileimage: ops->write", ret);
-               else
-                       prt("short write: 0x%x bytes instead of 0x%llx\n",
-                           ret, (unsigned long long)file_size);
-               report_failure(172);
-       }
-
-       if (!lite) {
-               ret = ops->resize(&ctx, file_size);
-               if (ret < 0) {
-                       prterrcode("writefileimage: ops->resize", ret);
-                       report_failure(173);
-               }
-       }
-}
-
-void
-do_flatten()
-{
-       int ret;
-
-       if (!rbd_image_has_parent(&ctx)) {
-               log4(OP_SKIPPED, OP_FLATTEN, 0, 0);
-               return;
-       }
-       log4(OP_FLATTEN, 0, 0, 0);
-       prt("%lu flatten\n", testcalls);
-
-       ret = ops->flatten(&ctx);
-       if (ret < 0) {
-               prterrcode("writefileimage: ops->flatten", ret);
-               exit(177);
-       }
-}
-
-void
-docloseopen(void)
-{
-       char *name;
-       int ret;
-
-       if (testcalls <= simulatedopcount)
-               return;
-
-       name = strdup(ctx.name);
-
-       if (debug)
-               prt("%lu close/open\n", testcalls);
-
-       ret = ops->close(&ctx);
-       if (ret < 0) {
-               prterrcode("docloseopen: ops->close", ret);
-               report_failure(180);
-       }
-
-       ret = ops->open(name, &ctx);
-       if (ret < 0) {
-               prterrcode("docloseopen: ops->open", ret);
-               report_failure(181);
-       }
-
-       free(name);
-}
-
-#define TRIM_OFF_LEN(off, len, size)   \
-do {                                   \
-       if (size)                       \
-               (off) %= (size);        \
-       else                            \
-               (off) = 0;              \
-       if ((unsigned)(off) + (unsigned)(len) > (unsigned)(size))       \
-               (len) = (size) - (off); \
-} while (0)
-
-void
-test(void)
-{
-       unsigned long   offset;
-       unsigned long   size = maxoplen;
-       unsigned long   rv = get_random();
-       unsigned long   op;
-
-       if (simulatedopcount > 0 && testcalls == simulatedopcount)
-               writefileimage();
-
-       testcalls++;
-
-       if (closeprob)
-               closeopen = (rv >> 3) < (1u << 28) / (unsigned)closeprob;
-
-       if (debugstart > 0 && testcalls >= debugstart)
-               debug = 1;
-
-       if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
-               prt("%lu...\n", testcalls);
-
-       offset = get_random();
-       if (randomoplen)
-               size = get_random() % (maxoplen + 1);
-
-       /* calculate appropriate op to run */
-       if (lite)
-               op = rv % OP_MAX_LITE;
-       else
-               op = rv % OP_MAX_FULL;
-
-       switch (op) {
-       case OP_MAPREAD:
-               if (!mapped_reads)
-                       op = OP_READ;
-               break;
-       case OP_MAPWRITE:
-               if (!mapped_writes)
-                       op = OP_WRITE;
-               break;
-       case OP_FALLOCATE:
-               if (!fallocate_calls) {
-                       log4(OP_SKIPPED, OP_FALLOCATE, offset, size);
-                       goto out;
-               }
-               break;
-       case OP_PUNCH_HOLE:
-               if (!punch_hole_calls) {
-                       log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, size);
-                       goto out;
-               }
-               break;
-       case OP_CLONE:
-               /* clone, 8% chance */
-               if (!clone_calls || file_size == 0 || get_random() % 100 >= 8) {
-                       log4(OP_SKIPPED, OP_CLONE, 0, 0);
-                       goto out;
-               }
-               break;
-       case OP_FLATTEN:
-               /* flatten four times as rarely as clone, 2% chance */
-               if (get_random() % 100 >= 2) {
-                       log4(OP_SKIPPED, OP_FLATTEN, 0, 0);
-                       goto out;
-               }
-               break;
-       }
-
-       switch (op) {
-       case OP_READ:
-               TRIM_OFF_LEN(offset, size, file_size);
-               doread(offset, size);
-               break;
-
-       case OP_WRITE:
-               TRIM_OFF_LEN(offset, size, maxfilelen);
-               dowrite(offset, size);
-               break;
-
-       case OP_MAPREAD:
-               TRIM_OFF_LEN(offset, size, file_size);
-               exit(183);
-               break;
-
-       case OP_MAPWRITE:
-               TRIM_OFF_LEN(offset, size, maxfilelen);
-               exit(182);
-               break;
-
-       case OP_TRUNCATE:
-               if (!style)
-                       size = get_random() % maxfilelen;
-               dotruncate(size);
-               break;
-
-       case OP_PUNCH_HOLE:
-               TRIM_OFF_LEN(offset, size, file_size);
-               do_punch_hole(offset, size);
-               break;
-
-       case OP_CLONE:
-               do_clone();
-               break;
-
-       case OP_FLATTEN:
-               do_flatten();
-               break;
-
-       default:
-               prterr("test: unknown operation");
-               report_failure(42);
-               break;
-       }
-
-out:
-       if (sizechecks && testcalls > simulatedopcount)
-               check_size();
-       if (closeopen)
-               docloseopen();
-}
-
-
-void
-cleanup(sig)
-       int     sig;
-{
-       if (sig)
-               prt("signal %d\n", sig);
-       prt("testcalls = %lu\n", testcalls);
-       exit(sig);
-}
-
-
-void
-usage(void)
-{
-       fprintf(stdout, "usage: %s",
-               "fsx [-dfnqxyACFHKLORUWZ] [-b opnum] [-c Prob] [-h holebdy] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] pname iname\n\
-       -b opnum: beginning operation number (default 1)\n\
-       -c P: 1 in P chance of file close+open at each op (default infinity)\n\
-       -d: debug output for all operations\n\
-       -f: flush and invalidate cache after I/O\n\
-       -h holebdy: 4096 would make discards page aligned (default 1)\n\
-       -l flen: the upper bound on file size (default 262144)\n\
-       -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
-       -n: no verifications of file size\n\
-       -o oplen: the upper bound on operation size (default 65536)\n\
-       -p progressinterval: debug output at specified operation interval\n\
-       -q: quieter operation\n\
-       -r readbdy: 4096 would make reads page aligned (default 1)\n\
-       -s style: 1 gives smaller truncates (default 0)\n\
-       -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
-       -w writebdy: 4096 would make writes page aligned (default 1)\n\
-       -x: preallocate file space before starting, XFS only (default 0)\n\
-       -y: synchronize changes to a file\n"
-
-#ifdef AIO
-"      -A: Use the AIO system calls\n"
-#endif
-"      -C: do not use clone calls\n\
-       -D startingop: debug output starting at specified operation\n"
-#ifdef FALLOCATE
-"      -F: Do not use fallocate (preallocation) calls\n"
-#endif
-"      -H: do not use punch hole calls\n\
-       -K: enable krbd mode (use -t and -h too)\n\
-       -L: fsxLite - no file creations & no file size changes\n\
-       -N numops: total # operations to do (default infinity)\n\
-       -O: use oplen (see -o flag) for every op (default random)\n\
-       -P dirpath: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
-       -R: read() system calls only (mapped reads disabled)\n\
-       -S seed: for random # generator (default 1) 0 gets timestamp\n\
-       -U: disable randomized striping\n\
-       -W: mapped write operations DISabled\n\
-       -Z: O_DIRECT (use -R, -W, -r and -w too)\n\
-       poolname: this is REQUIRED (no default)\n\
-       imagename: this is REQUIRED (no default)\n");
-       exit(89);
-}
-
-
-int
-getnum(char *s, char **e)
-{
-       int ret;
-
-       *e = (char *) 0;
-       ret = strtol(s, e, 0);
-       if (*e)
-               switch (**e) {
-               case 'b':
-               case 'B':
-                       ret *= 512;
-                       *e = *e + 1;
-                       break;
-               case 'k':
-               case 'K':
-                       ret *= 1024;
-                       *e = *e + 1;
-                       break;
-               case 'm':
-               case 'M':
-                       ret *= 1024*1024;
-                       *e = *e + 1;
-                       break;
-               case 'w':
-               case 'W':
-                       ret *= 4;
-                       *e = *e + 1;
-                       break;
-               }
-       return (ret);
-}
-
-#ifdef AIO
-
-#define QSZ     1024
-io_context_t   io_ctx;
-struct iocb    iocb;
-
-int aio_setup()
-{
-       int ret;
-       ret = io_queue_init(QSZ, &io_ctx);
-       if (ret != 0) {
-               fprintf(stderr, "aio_setup: io_queue_init failed: %s\n",
-                        strerror(ret));
-                return(-1);
-        }
-        return(0);
-}
-
-int
-__aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
-{
-       struct io_event event;
-       static struct timespec ts;
-       struct iocb *iocbs[] = { &iocb };
-       int ret;
-       long res;
-
-       if (rw == READ) {
-               io_prep_pread(&iocb, fd, buf, len, offset);
-       } else {
-               io_prep_pwrite(&iocb, fd, buf, len, offset);
-       }
-
-       ts.tv_sec = 30;
-       ts.tv_nsec = 0;
-       ret = io_submit(io_ctx, 1, iocbs);
-       if (ret != 1) {
-               fprintf(stderr, "errcode=%d\n", ret);
-               fprintf(stderr, "aio_rw: io_submit failed: %s\n",
-                               strerror(ret));
-               goto out_error;
-       }
-
-       ret = io_getevents(io_ctx, 1, 1, &event, &ts);
-       if (ret != 1) {
-               if (ret == 0)
-                       fprintf(stderr, "aio_rw: no events available\n");
-               else {
-                       fprintf(stderr, "errcode=%d\n", -ret);
-                       fprintf(stderr, "aio_rw: io_getevents failed: %s\n",
-                                       strerror(-ret));
-               }
-               goto out_error;
-       }
-       if (len != event.res) {
-               /*
-                * The b0rked libaio defines event.res as unsigned.
-                * However the kernel strucuture has it signed,
-                * and it's used to pass negated error value.
-                * Till the library is fixed use the temp var.
-                */
-               res = (long)event.res;
-               if (res >= 0)
-                       fprintf(stderr, "bad io length: %lu instead of %u\n",
-                                       res, len);
-               else {
-                       fprintf(stderr, "errcode=%ld\n", -res);
-                       fprintf(stderr, "aio_rw: async io failed: %s\n",
-                                       strerror(-res));
-                       ret = res;
-                       goto out_error;
-               }
-
-       }
-       return event.res;
-
-out_error:
-       /*
-        * The caller expects error return in traditional libc
-        * convention, i.e. -1 and the errno set to error.
-        */
-       errno = -ret;
-       return -1;
-}
-
-int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
-{
-       int ret;
-
-       if (aio) {
-               ret = __aio_rw(rw, fd, buf, len, offset);
-       } else {
-               if (rw == READ)
-                       ret = read(fd, buf, len);
-               else
-                       ret = write(fd, buf, len);
-       }
-       return ret;
-}
-
-#endif
-
-void
-test_fallocate()
-{
-#ifdef FALLOCATE
-       if (!lite && fallocate_calls) {
-               if (fallocate(fd, 0, 0, 1) && errno == EOPNOTSUPP) {
-                       if(!quiet)
-                               warn("main: filesystem does not support fallocate, disabling\n");
-                       fallocate_calls = 0;
-               } else {
-                       ftruncate(fd, 0);
-               }
-       }
-#else /* ! FALLOCATE */
-       fallocate_calls = 0;
-#endif
-
-}
-
-int
-main(int argc, char **argv)
-{
-       int     i, style, ch, ret;
-       char    *endp;
-       char goodfile[1024];
-       char logfile[1024];
-
-       goodfile[0] = 0;
-       logfile[0] = 0;
-
-       page_size = getpagesize();
-       page_mask = page_size - 1;
-       mmap_mask = page_mask;
-
-       setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
-
-       while ((ch = getopt(argc, argv, "b:c:dfh:l:m:no:p:qr:s:t:w:xyACD:FHKLN:OP:RS:UWZ"))
-              != EOF)
-               switch (ch) {
-               case 'b':
-                       simulatedopcount = getnum(optarg, &endp);
-                       if (!quiet)
-                               fprintf(stdout, "Will begin at operation %ld\n",
-                                       simulatedopcount);
-                       if (simulatedopcount == 0)
-                               usage();
-                       simulatedopcount -= 1;
-                       break;
-               case 'c':
-                       closeprob = getnum(optarg, &endp);
-                       if (!quiet)
-                               fprintf(stdout,
-                                       "Chance of close/open is 1 in %d\n",
-                                       closeprob);
-                       if (closeprob <= 0)
-                               usage();
-                       break;
-               case 'd':
-                       debug = 1;
-                       break;
-               case 'f':
-                       flush = 1;
-                       break;
-               case 'h':
-                       holebdy = getnum(optarg, &endp);
-                       if (holebdy <= 0)
-                               usage();
-                       break;
-               case 'l':
-                       maxfilelen = getnum(optarg, &endp);
-                       if (maxfilelen <= 0)
-                               usage();
-                       break;
-               case 'm':
-                       monitorstart = getnum(optarg, &endp);
-                       if (monitorstart < 0)
-                               usage();
-                       if (!endp || *endp++ != ':')
-                               usage();
-                       monitorend = getnum(endp, &endp);
-                       if (monitorend < 0)
-                               usage();
-                       if (monitorend == 0)
-                               monitorend = -1; /* aka infinity */
-                       debug = 1;
-                       break;
-               case 'n':
-                       sizechecks = 0;
-                       break;
-               case 'o':
-                       maxoplen = getnum(optarg, &endp);
-                       if (maxoplen <= 0)
-                               usage();
-                       break;
-               case 'p':
-                       progressinterval = getnum(optarg, &endp);
-                       if (progressinterval == 0)
-                               usage();
-                       break;
-               case 'q':
-                       quiet = 1;
-                       break;
-               case 'r':
-                       readbdy = getnum(optarg, &endp);
-                       if (readbdy <= 0)
-                               usage();
-                       break;
-               case 's':
-                       style = getnum(optarg, &endp);
-                       if (style < 0 || style > 1)
-                               usage();
-                       break;
-               case 't':
-                       truncbdy = getnum(optarg, &endp);
-                       if (truncbdy <= 0)
-                               usage();
-                       break;
-               case 'w':
-                       writebdy = getnum(optarg, &endp);
-                       if (writebdy <= 0)
-                               usage();
-                       break;
-               case 'x':
-                       prealloc = 1;
-                       break;
-               case 'y':
-                       do_fsync = 1;
-                       break;
-               case 'A':
-                       aio = 1;
-                       break;
-               case 'C':
-                       clone_calls = 0;
-                       break;
-               case 'D':
-                       debugstart = getnum(optarg, &endp);
-                       if (debugstart < 1)
-                               usage();
-                       break;
-               case 'F':
-                       fallocate_calls = 0;
-                       break;
-               case 'H':
-                       punch_hole_calls = 0;
-                       break;
-               case 'K':
-                       prt("krbd mode enabled\n");
-                       ops = &krbd_operations;
-                       break;
-               case 'L':
-                       prt("lite mode not supported for rbd\n");
-                       exit(1);
-                       break;
-               case 'N':
-                       numops = getnum(optarg, &endp);
-                       if (numops < 0)
-                               usage();
-                       break;
-               case 'O':
-                       randomoplen = 0;
-                       break;
-               case 'P':
-                       strncpy(dirpath, optarg, sizeof(dirpath)-1);
-                       dirpath[sizeof(dirpath)-1] = '\0';
-                       strncpy(goodfile, dirpath, sizeof(goodfile)-1);
-                       goodfile[sizeof(goodfile)-1] = '\0';
-                       if (strlen(goodfile) < sizeof(goodfile)-2) {
-                               strcat(goodfile, "/");
-                       } else {
-                               prt("file name to long\n");
-                               exit(1);
-                       }
-                       strncpy(logfile, dirpath, sizeof(logfile));
-                       if (strlen(logfile) < sizeof(logfile)-2) {
-                               strcat(logfile, "/");
-                       } else {
-                               prt("file path to long\n");
-                               exit(1);
-                       }
-                       break;
-                case 'R':
-                        mapped_reads = 0;
-                       if (!quiet)
-                               fprintf(stdout, "mapped reads DISABLED\n");
-                        break;
-               case 'S':
-                        seed = getnum(optarg, &endp);
-                       if (seed == 0)
-                               seed = time(0) % 10000;
-                       if (!quiet)
-                               fprintf(stdout, "Seed set to %d\n", seed);
-                       if (seed < 0)
-                               usage();
-                       break;
-               case 'U':
-                       randomize_striping = 0;
-                       break;
-               case 'W':
-                       mapped_writes = 0;
-                       if (!quiet)
-                               fprintf(stdout, "mapped writes DISABLED\n");
-                       break;
-               case 'Z':
-                       o_direct = O_DIRECT;
-                       break;
-               default:
-                       usage();
-                       /* NOTREACHED */
-               }
-       argc -= optind;
-       argv += optind;
-       if (argc != 2)
-               usage();
-       pool = argv[0];
-       iname = argv[1];
-
-       signal(SIGHUP,  cleanup);
-       signal(SIGINT,  cleanup);
-       signal(SIGPIPE, cleanup);
-       signal(SIGALRM, cleanup);
-       signal(SIGTERM, cleanup);
-       signal(SIGXCPU, cleanup);
-       signal(SIGXFSZ, cleanup);
-       signal(SIGVTALRM,       cleanup);
-       signal(SIGUSR1, cleanup);
-       signal(SIGUSR2, cleanup);
-
-       if (initstate_r(seed, rnd_state, RND_STATE_LEN, &rnd_data) < 0) {
-               prterr("initstate_r");
-               exit(1);
-       }
-       if (setstate_r(rnd_state, &rnd_data) < 0) {
-               prterr("setstate_r");
-               exit(1);
-       }
-
-       ret = create_image();
-       if (ret < 0) {
-               prterrcode(iname, ret);
-               exit(90);
-       }
-       ret = ops->open(iname, &ctx);
-       if (ret < 0) {
-               simple_err("Error opening image", ret);
-               exit(91);
-       }
-       if (!dirpath[0])
-               strcat(dirpath, ".");
-       strncat(goodfile, iname, 256);
-       strcat (goodfile, ".fsxgood");
-       fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
-       if (fsxgoodfd < 0) {
-               prterr(goodfile);
-               exit(92);
-       }
-       strncat(logfile, iname, 256);
-       strcat (logfile, ".fsxlog");
-       fsxlogf = fopen(logfile, "w");
-       if (fsxlogf == NULL) {
-               prterr(logfile);
-               exit(93);
-       }
-
-#ifdef AIO
-       if (aio) 
-               aio_setup();
-#endif
-
-       original_buf = (char *) malloc(maxfilelen);
-       for (i = 0; i < (int)maxfilelen; i++)
-               original_buf[i] = get_random() % 256;
-
-       ret = posix_memalign((void **)&good_buf,
-                            MAX(writebdy, (int)sizeof(void *)), maxfilelen);
-       if (ret > 0) {
-               if (ret == EINVAL)
-                       prt("writebdy is not a suitable power of two\n");
-               else
-                       prterrcode("main: posix_memalign(good_buf)", -ret);
-               exit(94);
-       }
-       memset(good_buf, '\0', maxfilelen);
-
-       ret = posix_memalign((void **)&temp_buf,
-                            MAX(readbdy, (int)sizeof(void *)), maxfilelen);
-       if (ret > 0) {
-               if (ret == EINVAL)
-                       prt("readbdy is not a suitable power of two\n");
-               else
-                       prterrcode("main: posix_memalign(temp_buf)", -ret);
-               exit(95);
-       }
-       memset(temp_buf, '\0', maxfilelen);
-
-       if (lite) {     /* zero entire existing file */
-               ssize_t written;
-
-               written = ops->write(&ctx, 0, (size_t)maxfilelen, good_buf);
-               if (written != (ssize_t)maxfilelen) {
-                       if (written < 0) {
-                               prterrcode(iname, written);
-                               warn("main: error on write");
-                       } else
-                               warn("main: short write, 0x%x bytes instead "
-                                       "of 0x%lx\n",
-                                       (unsigned)written,
-                                       maxfilelen);
-                       exit(98);
-               }
-       } else
-               check_trunc_hack();
-
-       //test_fallocate();
-
-       while (numops == -1 || numops--)
-               test();
-
-       ret = ops->close(&ctx);
-       if (ret < 0) {
-               prterrcode("ops->close", ret);
-               report_failure(99);
-       }
-
-       if (num_clones > 0)
-               check_clone(num_clones - 1);
-
-       while (num_clones >= 0) {
-               static int first = 1;
-               rbd_image_t image;
-               char clonename[128];
-               char errmsg[128];
-
-               clone_imagename(clonename, 128, num_clones);
-               if ((ret = rbd_open(ioctx, clonename, &image, NULL)) < 0) {
-                       sprintf(errmsg, "rbd_open %s", clonename);
-                       prterrcode(errmsg, ret);
-                       report_failure(101);
-               }
-               if (!first) {
-                       if ((ret = rbd_snap_unprotect(image, "snap")) < 0) {
-                               sprintf(errmsg, "rbd_snap_unprotect %s@snap",
-                                       clonename);
-                               prterrcode(errmsg, ret);
-                               report_failure(102);
-                       }
-                       if ((ret = rbd_snap_remove(image, "snap")) < 0) {
-                               sprintf(errmsg, "rbd_snap_remove %s@snap",
-                                       clonename);
-                               prterrcode(errmsg, ret);
-                               report_failure(103);
-                       }
-               }
-               if ((ret = rbd_close(image)) < 0) {
-                       sprintf(errmsg, "rbd_close %s", clonename);
-                       prterrcode(errmsg, ret);
-                       report_failure(104);
-               }
-
-               if ((ret = rbd_remove(ioctx, clonename)) < 0) {
-                       sprintf(errmsg, "rbd_remove %s", clonename);
-                       prterrcode(errmsg, ret);
-                       report_failure(105);
-               }
-
-               first = 0;
-               num_clones--;
-       }
-
-       prt("All operations completed A-OK!\n");
-       fclose(fsxlogf);
-
-       rados_ioctx_destroy(ioctx);
-       krbd_destroy(krbd);
-       rados_shutdown(cluster);
-
-       free(original_buf);
-       free(good_buf);
-       free(temp_buf);
-
-       exit(0);
-       return 0;
-}
diff --git a/src/test/librbd/fsx.cc b/src/test/librbd/fsx.cc
new file mode 100644 (file)
index 0000000..c5ed1e6
--- /dev/null
@@ -0,0 +1,2322 @@
+// -*- mode:C; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*- 
+/*
+ *     Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
+ *
+ *     File:   fsx.c
+ *     Author: Avadis Tevanian, Jr.
+ *
+ *     File system exerciser. 
+ *
+ *     Rewritten 8/98 by Conrad Minshall.
+ *
+ *     Small changes to work under Linux -- davej.
+ *
+ *     Checks for mmap last-page zero fill.
+ */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <limits.h>
+#include <time.h>
+#include <strings.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifdef HAVE_ERR_H
+#include <err.h>
+#endif
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <errno.h>
+#include <math.h>
+
+#include "include/intarith.h"
+#include "include/krbd.h"
+#include "include/rados/librados.h"
+#include "include/rbd/librbd.h"
+#include "common/ceph_crypto.h"
+
+#define NUMPRINTCOLUMNS 32     /* # columns of data to print on each line */
+
+/*
+ *     A log entry is an operation and a bunch of arguments.
+ */
+
+struct log_entry {
+       int     operation;
+       int     args[3];
+};
+
+#define        LOGSIZE 1000
+
+struct log_entry       oplog[LOGSIZE]; /* the log */
+int                    logptr = 0;     /* current position in log */
+int                    logcount = 0;   /* total ops */
+
+/*
+ * The operation matrix is complex due to conditional execution of different
+ * features. Hence when we come to deciding what operation to run, we need to
+ * be careful in how we select the different operations. The active operations
+ * are mapped to numbers as follows:
+ *
+ *             lite    !lite
+ * READ:       0       0
+ * WRITE:      1       1
+ * MAPREAD:    2       2
+ * MAPWRITE:   3       3
+ * TRUNCATE:   -       4
+ * FALLOCATE:  -       5
+ * PUNCH HOLE: -       6
+ *
+ * When mapped read/writes are disabled, they are simply converted to normal
+ * reads and writes. When fallocate/fpunch calls are disabled, they are
+ * converted to OP_SKIPPED. Hence OP_SKIPPED needs to have a number higher than
+ * the operation selction matrix, as does the OP_CLOSEOPEN which is an
+ * operation modifier rather than an operation in itself.
+ *
+ * Because of the "lite" version, we also need to have different "maximum
+ * operation" defines to allow the ops to be selected correctly based on the
+ * mode being run.
+ */
+
+/* common operations */
+#define        OP_READ         0
+#define OP_WRITE       1
+#define OP_MAPREAD     2
+#define OP_MAPWRITE    3
+#define OP_MAX_LITE    4
+
+/* !lite operations */
+#define OP_TRUNCATE    4
+#define OP_FALLOCATE   5
+#define OP_PUNCH_HOLE  6
+/* rbd-specific operations */
+#define OP_CLONE        7
+#define OP_FLATTEN     8
+#define OP_MAX_FULL    9
+
+/* operation modifiers */
+#define OP_CLOSEOPEN   100
+#define OP_SKIPPED     101
+
+#undef PAGE_SIZE
+#define PAGE_SIZE       getpagesize()
+#undef PAGE_MASK
+#define PAGE_MASK       (PAGE_SIZE - 1)
+
+char   *original_buf;                  /* a pointer to the original data */
+char   *good_buf;                      /* a pointer to the correct data */
+char   *temp_buf;                      /* a pointer to the current data */
+
+char   dirpath[1024];
+
+off_t          file_size = 0;
+off_t          biggest = 0;
+unsigned long  testcalls = 0;          /* calls to function "test" */
+
+unsigned long  simulatedopcount = 0;   /* -b flag */
+int    closeprob = 0;                  /* -c flag */
+int    debug = 0;                      /* -d flag */
+unsigned long  debugstart = 0;         /* -D flag */
+int    flush = 0;                      /* -f flag */
+int    holebdy = 1;                    /* -h flag */
+int    do_fsync = 0;                   /* -y flag */
+unsigned long  maxfilelen = 256 * 1024;        /* -l flag */
+int    sizechecks = 1;                 /* -n flag disables them */
+int    maxoplen = 64 * 1024;           /* -o flag */
+int    quiet = 0;                      /* -q flag */
+unsigned long progressinterval = 0;    /* -p flag */
+int    readbdy = 1;                    /* -r flag */
+int    style = 0;                      /* -s flag */
+int    prealloc = 0;                   /* -x flag */
+int    truncbdy = 1;                   /* -t flag */
+int    writebdy = 1;                   /* -w flag */
+long   monitorstart = -1;              /* -m flag */
+long   monitorend = -1;                /* -m flag */
+int    lite = 0;                       /* -L flag */
+long   numops = -1;                    /* -N flag */
+int    randomoplen = 1;                /* -O flag disables it */
+int    seed = 1;                       /* -S flag */
+int     mapped_writes = 0;              /* -W flag disables */
+int     fallocate_calls = 0;            /* -F flag disables */
+int     punch_hole_calls = 1;           /* -H flag disables */
+int    clone_calls = 1;                /* -C flag disables */
+int    randomize_striping = 1;         /* -U flag disables */
+int    randomize_parent_overlap = 1;
+int    mapped_reads = 0;               /* -R flag disables it */
+int    fsxgoodfd = 0;
+int    o_direct = 0;                   /* -Z flag */
+int    aio = 0;
+
+int num_clones = 0;
+
+int page_size;
+int page_mask;
+int mmap_mask;
+#ifdef AIO
+int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
+#define READ 0
+#define WRITE 1
+#define fsxread(a,b,c,d)       aio_rw(READ, a,b,c,d)
+#define fsxwrite(a,b,c,d)      aio_rw(WRITE, a,b,c,d)
+#else
+#define fsxread(a,b,c,d)       read(a,b,c)
+#define fsxwrite(a,b,c,d)      write(a,b,c)
+#endif
+
+FILE * fsxlogf = NULL;
+int badoff = -1;
+int closeopen = 0;
+
+void
+vwarnc(int code, const char *fmt, va_list ap) {
+  fprintf(stderr, "fsx: ");
+  if (fmt != NULL) {
+       vfprintf(stderr, fmt, ap);
+       fprintf(stderr, ": ");
+  }
+  fprintf(stderr, "%s\n", strerror(code));
+}
+
+void
+warn(const char * fmt, ...)  {
+       va_list ap;
+       va_start(ap, fmt);
+       vwarnc(errno, fmt, ap);
+       va_end(ap);
+}
+
+#define BUF_SIZE 1024
+
+void
+prt(const char *fmt, ...)
+{
+       va_list args;
+       char buffer[BUF_SIZE];
+
+       va_start(args, fmt);
+       vsnprintf(buffer, BUF_SIZE, fmt, args);
+       va_end(args);
+       fprintf(stdout, "%s", buffer);
+       if (fsxlogf)
+               fprintf(fsxlogf, "%s", buffer);
+}
+
+void
+prterr(const char *prefix)
+{
+       prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno));
+}
+
+void
+prterrcode(const char *prefix, int code)
+{
+       prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(-code));
+}
+
+void
+simple_err(const char *msg, int err)
+{
+    fprintf(stderr, "%s: %s\n", msg, strerror(-err));
+}
+
+/*
+ * random
+ */
+
+#define RND_STATE_LEN  256
+char   rnd_state[RND_STATE_LEN];
+struct random_data rnd_data;
+
+int32_t
+get_random(void)
+{
+       int32_t val;
+
+       if (random_r(&rnd_data, &val) < 0) {
+               prterr("random_r");
+               exit(1);
+       }
+
+       return val;
+}
+
+/*
+ * rbd
+ */
+
+struct rbd_ctx {
+       const char *name;       /* image name */
+       rbd_image_t image;      /* image handle */
+       const char *krbd_name;  /* image /dev/rbd<id> name */
+       int krbd_fd;            /* image /dev/rbd<id> fd */
+};
+
+#define RBD_CTX_INIT   (struct rbd_ctx) { NULL, NULL, NULL, -1 }
+
+struct rbd_operations {
+       int (*open)(const char *name, struct rbd_ctx *ctx);
+       int (*close)(struct rbd_ctx *ctx);
+       ssize_t (*read)(struct rbd_ctx *ctx, uint64_t off, size_t len, char *buf);
+       ssize_t (*write)(struct rbd_ctx *ctx, uint64_t off, size_t len, const char *buf);
+       int (*flush)(struct rbd_ctx *ctx);
+       int (*discard)(struct rbd_ctx *ctx, uint64_t off, uint64_t len);
+       int (*get_size)(struct rbd_ctx *ctx, uint64_t *size);
+       int (*resize)(struct rbd_ctx *ctx, uint64_t size);
+       int (*clone)(struct rbd_ctx *ctx, const char *src_snapname,
+                    const char *dst_imagename, int *order, int stripe_unit,
+                    int stripe_count);
+       int (*flatten)(struct rbd_ctx *ctx);
+};
+
+char *pool;                    /* name of the pool our test image is in */
+char *iname;                   /* name of our test image */
+rados_t cluster;               /* handle for our test cluster */
+rados_ioctx_t ioctx;           /* handle for our test pool */
+struct krbd_ctx *krbd;         /* handle for libkrbd */
+
+/*
+ * librbd/krbd rbd_operations handlers.  Given the rest of fsx.c, no
+ * attempt to do error handling is made in these handlers.
+ */
+
+int
+__librbd_open(const char *name, struct rbd_ctx *ctx)
+{
+       rbd_image_t image;
+       int ret;
+
+       assert(!ctx->name && !ctx->image &&
+              !ctx->krbd_name && ctx->krbd_fd < 0);
+
+       ret = rbd_open(ioctx, name, &image, NULL);
+       if (ret < 0) {
+               prt("rbd_open(%s) failed\n", name);
+               return ret;
+       }
+
+       ctx->name = strdup(name);
+       ctx->image = image;
+       ctx->krbd_name = NULL;
+       ctx->krbd_fd = -1;
+
+       return 0;
+}
+
+int
+librbd_open(const char *name, struct rbd_ctx *ctx)
+{
+       return __librbd_open(name, ctx);
+}
+
+int
+__librbd_close(struct rbd_ctx *ctx)
+{
+       int ret;
+
+       assert(ctx->name && ctx->image);
+
+       ret = rbd_close(ctx->image);
+       if (ret < 0) {
+               prt("rbd_close(%s) failed\n", ctx->name);
+               return ret;
+       }
+
+       free((void *)ctx->name);
+
+       ctx->name = NULL;
+       ctx->image = NULL;
+
+       return 0;
+}
+
+int
+librbd_close(struct rbd_ctx *ctx)
+{
+       return __librbd_close(ctx);
+}
+
+int
+librbd_verify_object_map(struct rbd_ctx *ctx)
+{
+       int n;
+       uint64_t flags;
+       n = rbd_get_flags(ctx->image, &flags);
+       if (n < 0) {
+               prt("rbd_get_flags() failed\n");
+               return n;
+       }
+
+       if ((flags & RBD_FLAG_OBJECT_MAP_INVALID) != 0) {
+               prt("rbd_get_flags() indicates object map is invalid\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
+ssize_t
+librbd_read(struct rbd_ctx *ctx, uint64_t off, size_t len, char *buf)
+{
+       ssize_t n;
+
+       n = rbd_read(ctx->image, off, len, buf);
+       if (n < 0)
+               prt("rbd_read(%llu, %zu) failed\n", off, len);
+
+       return n;
+}
+
+ssize_t
+librbd_write(struct rbd_ctx *ctx, uint64_t off, size_t len, const char *buf)
+{
+       ssize_t n;
+       int ret;
+
+       n = rbd_write(ctx->image, off, len, buf);
+       if (n < 0) {
+               prt("rbd_write(%llu, %zu) failed\n", off, len);
+               return n;
+       }
+
+       ret = librbd_verify_object_map(ctx);
+       if (ret < 0) {
+               return ret;
+       }
+       return n;
+}
+
+int
+librbd_flush(struct rbd_ctx *ctx)
+{
+       int ret;
+
+       ret = rbd_flush(ctx->image);
+       if (ret < 0) {
+               prt("rbd_flush failed\n");
+               return ret;
+       }
+
+       return librbd_verify_object_map(ctx);
+}
+
+int
+librbd_discard(struct rbd_ctx *ctx, uint64_t off, uint64_t len)
+{
+       int ret;
+
+       ret = rbd_discard(ctx->image, off, len);
+       if (ret < 0) {
+               prt("rbd_discard(%llu, %llu) failed\n", off, len);
+               return ret;
+       }
+
+       return librbd_verify_object_map(ctx);
+}
+
+int
+librbd_get_size(struct rbd_ctx *ctx, uint64_t *size)
+{
+       rbd_image_info_t info;
+       int ret;
+
+       ret = rbd_stat(ctx->image, &info, sizeof(info));
+       if (ret < 0) {
+               prt("rbd_stat failed\n");
+               return ret;
+       }
+
+       *size = info.size;
+
+       return 0;
+}
+
+int
+__librbd_resize(struct rbd_ctx *ctx, uint64_t size)
+{
+       int ret;
+
+       ret = rbd_resize(ctx->image, size);
+       if (ret < 0) {
+               prt("rbd_resize(%llu) failed\n", size);
+               return ret;
+       }
+
+       return librbd_verify_object_map(ctx);
+}
+
+int
+librbd_resize(struct rbd_ctx *ctx, uint64_t size)
+{
+       return __librbd_resize(ctx, size);
+}
+
+int
+__librbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
+              const char *dst_imagename, int *order, int stripe_unit,
+              int stripe_count, bool krbd)
+{
+       int ret;
+
+       ret = rbd_snap_create(ctx->image, src_snapname);
+       if (ret < 0) {
+               prt("rbd_snap_create(%s@%s) failed\n", ctx->name,
+                   src_snapname);
+               return ret;
+       }
+
+       ret = rbd_snap_protect(ctx->image, src_snapname);
+       if (ret < 0) {
+               prt("rbd_snap_protect(%s@%s) failed\n", ctx->name,
+                   src_snapname);
+               return ret;
+       }
+
+       uint64_t features = RBD_FEATURES_ALL;
+       if (krbd) {
+               features &= ~(RBD_FEATURE_EXCLUSIVE_LOCK |
+                             RBD_FEATURE_OBJECT_MAP);
+       }
+       ret = rbd_clone2(ioctx, ctx->name, src_snapname, ioctx,
+                        dst_imagename, features, order,
+                        stripe_unit, stripe_count);
+       if (ret < 0) {
+               prt("rbd_clone2(%s@%s -> %s) failed\n", ctx->name,
+                   src_snapname, dst_imagename);
+               return ret;
+       }
+
+       return 0;
+}
+
+int
+librbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
+            const char *dst_imagename, int *order, int stripe_unit,
+            int stripe_count)
+{
+       return __librbd_clone(ctx, src_snapname, dst_imagename, order,
+                             stripe_unit, stripe_count, false);
+}
+
+int
+__librbd_flatten(struct rbd_ctx *ctx)
+{
+       int ret;
+
+       ret = rbd_flatten(ctx->image);
+       if (ret < 0) {
+               prt("rbd_flatten failed\n");
+               return ret;
+       }
+
+       return librbd_verify_object_map(ctx);
+}
+
+int
+librbd_flatten(struct rbd_ctx *ctx)
+{
+       return __librbd_flatten(ctx);
+}
+
+const struct rbd_operations librbd_operations = {
+       librbd_open,
+       librbd_close,
+       librbd_read,
+       librbd_write,
+       librbd_flush,
+       librbd_discard,
+       librbd_get_size,
+       librbd_resize,
+       librbd_clone,
+       librbd_flatten
+};
+
+int
+krbd_open(const char *name, struct rbd_ctx *ctx)
+{
+       char *devnode;
+       int fd;
+       int ret;
+
+       ret = __librbd_open(name, ctx);
+       if (ret < 0)
+               return ret;
+
+       ret = krbd_map(krbd, pool, name, NULL, NULL, &devnode);
+       if (ret < 0) {
+               prt("krbd_map(%s) failed\n", name);
+               return ret;
+       }
+
+       fd = open(devnode, O_RDWR | o_direct);
+       if (fd < 0) {
+               ret = -errno;
+               prt("open(%s) failed\n", devnode);
+               return ret;
+       }
+
+       ctx->krbd_name = devnode;
+       ctx->krbd_fd = fd;
+
+       return 0;
+}
+
+int
+krbd_close(struct rbd_ctx *ctx)
+{
+       int ret;
+
+       assert(ctx->krbd_name && ctx->krbd_fd >= 0);
+
+       if (close(ctx->krbd_fd) < 0) {
+               ret = -errno;
+               prt("close(%s) failed\n", ctx->krbd_name);
+               return ret;
+       }
+
+       ret = krbd_unmap(krbd, ctx->krbd_name);
+       if (ret < 0) {
+               prt("krbd_unmap(%s) failed\n", ctx->krbd_name);
+               return ret;
+       }
+
+       free((void *)ctx->krbd_name);
+
+       ctx->krbd_name = NULL;
+       ctx->krbd_fd = -1;
+
+       return __librbd_close(ctx);
+}
+
+ssize_t
+krbd_read(struct rbd_ctx *ctx, uint64_t off, size_t len, char *buf)
+{
+       ssize_t n;
+
+       n = pread(ctx->krbd_fd, buf, len, off);
+       if (n < 0) {
+               n = -errno;
+               prt("pread(%llu, %zu) failed\n", off, len);
+               return n;
+       }
+
+       return n;
+}
+
+ssize_t
+krbd_write(struct rbd_ctx *ctx, uint64_t off, size_t len, const char *buf)
+{
+       ssize_t n;
+
+       n = pwrite(ctx->krbd_fd, buf, len, off);
+       if (n < 0) {
+               n = -errno;
+               prt("pwrite(%llu, %zu) failed\n", off, len);
+               return n;
+       }
+
+       return n;
+}
+
+int
+__krbd_flush(struct rbd_ctx *ctx)
+{
+       int ret;
+
+       if (o_direct)
+               return 0;
+
+       /*
+        * fsync(2) on the block device does not sync the filesystem
+        * mounted on top of it, but that's OK - we control the entire
+        * lifetime of the block device and write directly to it.
+        */
+       if (fsync(ctx->krbd_fd) < 0) {
+               ret = -errno;
+               prt("fsync failed\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+int
+krbd_flush(struct rbd_ctx *ctx)
+{
+       return __krbd_flush(ctx);
+}
+
+int
+krbd_discard(struct rbd_ctx *ctx, uint64_t off, uint64_t len)
+{
+       uint64_t range[2] = { off, len };
+       int ret;
+
+       /*
+        * off and len must be 512-byte aligned, otherwise BLKDISCARD
+        * will fail with -EINVAL.  This means that -K (enable krbd
+        * mode) requires -h 512 or similar.
+        */
+       if (ioctl(ctx->krbd_fd, BLKDISCARD, &range) < 0) {
+               ret = -errno;
+               prt("BLKDISCARD(%llu, %llu) failed\n", off, len);
+               return ret;
+       }
+
+       return 0;
+}
+
+int
+krbd_get_size(struct rbd_ctx *ctx, uint64_t *size)
+{
+       uint64_t bytes;
+       int ret;
+
+       if (ioctl(ctx->krbd_fd, BLKGETSIZE64, &bytes) < 0) {
+               ret = -errno;
+               prt("BLKGETSIZE64 failed\n");
+               return ret;
+       }
+
+       *size = bytes;
+
+       return 0;
+}
+
+int
+krbd_resize(struct rbd_ctx *ctx, uint64_t size)
+{
+       int ret;
+
+       assert(size % truncbdy == 0);
+
+       /*
+        * This is essential: when krbd detects a size change, it calls
+        * revalidate_disk(), which ends up calling invalidate_bdev(),
+        * which invalidates only clean buffers.  The cache flush makes
+        * it invalidate everything, which is what we need if we are
+        * shrinking.
+        */
+       ret = __krbd_flush(ctx);
+       if (ret < 0)
+               return ret;
+
+       return __librbd_resize(ctx, size);
+}
+
+int
+krbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
+          const char *dst_imagename, int *order, int stripe_unit,
+          int stripe_count)
+{
+       int ret;
+
+       ret = __krbd_flush(ctx);
+       if (ret < 0)
+               return ret;
+
+       return __librbd_clone(ctx, src_snapname, dst_imagename, order,
+                             stripe_unit, stripe_count, true);
+}
+
+int
+krbd_flatten(struct rbd_ctx *ctx)
+{
+       int ret;
+
+       ret = __krbd_flush(ctx);
+       if (ret < 0)
+               return ret;
+
+       return __librbd_flatten(ctx);
+}
+
+const struct rbd_operations krbd_operations = {
+       krbd_open,
+       krbd_close,
+       krbd_read,
+       krbd_write,
+       krbd_flush,
+       krbd_discard,
+       krbd_get_size,
+       krbd_resize,
+       krbd_clone,
+       krbd_flatten,
+};
+
+struct rbd_ctx ctx = RBD_CTX_INIT;
+const struct rbd_operations *ops = &librbd_operations;
+
+static bool rbd_image_has_parent(struct rbd_ctx *ctx)
+{
+       int ret;
+
+       ret = rbd_get_parent_info(ctx->image, NULL, 0, NULL, 0, NULL, 0);
+       if (ret < 0 && ret != -ENOENT) {
+               prterrcode("rbd_get_parent_info", ret);
+               exit(1);
+       }
+
+       return !ret;
+}
+
+/*
+ * fsx
+ */
+
+void
+log4(int operation, int arg0, int arg1, int arg2)
+{
+       struct log_entry *le;
+
+       le = &oplog[logptr];
+       le->operation = operation;
+       if (closeopen)
+               le->operation = ~ le->operation;
+       le->args[0] = arg0;
+       le->args[1] = arg1;
+       le->args[2] = arg2;
+       logptr++;
+       logcount++;
+       if (logptr >= LOGSIZE)
+               logptr = 0;
+}
+
+void
+logdump(void)
+{
+       int     i, count, down;
+       struct log_entry        *lp;
+       const char *falloc_type[3] = {"PAST_EOF", "EXTENDING", "INTERIOR"};
+
+       prt("LOG DUMP (%d total operations):\n", logcount);
+       if (logcount < LOGSIZE) {
+               i = 0;
+               count = logcount;
+       } else {
+               i = logptr;
+               count = LOGSIZE;
+       }
+       for ( ; count > 0; count--) {
+               int opnum;
+
+               opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE;
+               prt("%d(%3d mod 256): ", opnum, opnum%256);
+               lp = &oplog[i];
+               if ((closeopen = lp->operation < 0))
+                       lp->operation = ~ lp->operation;
+                       
+               switch (lp->operation) {
+               case OP_MAPREAD:
+                       prt("MAPREAD  0x%x thru 0x%x\t(0x%x bytes)",
+                           lp->args[0], lp->args[0] + lp->args[1] - 1,
+                           lp->args[1]);
+                       if (badoff >= lp->args[0] && badoff <
+                                                    lp->args[0] + lp->args[1])
+                               prt("\t***RRRR***");
+                       break;
+               case OP_MAPWRITE:
+                       prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)",
+                           lp->args[0], lp->args[0] + lp->args[1] - 1,
+                           lp->args[1]);
+                       if (badoff >= lp->args[0] && badoff <
+                                                    lp->args[0] + lp->args[1])
+                               prt("\t******WWWW");
+                       break;
+               case OP_READ:
+                       prt("READ     0x%x thru 0x%x\t(0x%x bytes)",
+                           lp->args[0], lp->args[0] + lp->args[1] - 1,
+                           lp->args[1]);
+                       if (badoff >= lp->args[0] &&
+                           badoff < lp->args[0] + lp->args[1])
+                               prt("\t***RRRR***");
+                       break;
+               case OP_WRITE:
+                       prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
+                           lp->args[0], lp->args[0] + lp->args[1] - 1,
+                           lp->args[1]);
+                       if (lp->args[0] > lp->args[2])
+                               prt(" HOLE");
+                       else if (lp->args[0] + lp->args[1] > lp->args[2])
+                               prt(" EXTEND");
+                       if ((badoff >= lp->args[0] || badoff >=lp->args[2]) &&
+                           badoff < lp->args[0] + lp->args[1])
+                               prt("\t***WWWW");
+                       break;
+               case OP_TRUNCATE:
+                       down = lp->args[0] < lp->args[1];
+                       prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
+                           down ? "DOWN" : "UP", lp->args[1], lp->args[0]);
+                       if (badoff >= lp->args[!down] &&
+                           badoff < lp->args[!!down])
+                               prt("\t******WWWW");
+                       break;
+               case OP_FALLOCATE:
+                       /* 0: offset 1: length 2: where alloced */
+                       prt("FALLOC   0x%x thru 0x%x\t(0x%x bytes) %s",
+                               lp->args[0], lp->args[0] + lp->args[1],
+                               lp->args[1], falloc_type[lp->args[2]]);
+                       if (badoff >= lp->args[0] &&
+                           badoff < lp->args[0] + lp->args[1])
+                               prt("\t******FFFF");
+                       break;
+               case OP_PUNCH_HOLE:
+                       prt("PUNCH    0x%x thru 0x%x\t(0x%x bytes)",
+                           lp->args[0], lp->args[0] + lp->args[1] - 1,
+                           lp->args[1]);
+                       if (badoff >= lp->args[0] && badoff <
+                                                    lp->args[0] + lp->args[1])
+                               prt("\t******PPPP");
+                       break;
+               case OP_CLONE:
+                       prt("CLONE");
+                       break;
+               case OP_FLATTEN:
+                       prt("FLATTEN");
+                       break;
+               case OP_SKIPPED:
+                       prt("SKIPPED (no operation)");
+                       break;
+               default:
+                       prt("BOGUS LOG ENTRY (operation code = %d)!",
+                           lp->operation);
+               }
+               if (closeopen)
+                       prt("\n\t\tCLOSE/OPEN");
+               prt("\n");
+               i++;
+               if (i == LOGSIZE)
+                       i = 0;
+       }
+}
+
+void
+save_buffer(char *buffer, off_t bufferlength, int fd)
+{
+       off_t ret;
+       ssize_t byteswritten;
+
+       if (fd <= 0 || bufferlength == 0)
+               return;
+
+       if (bufferlength > SSIZE_MAX) {
+               prt("fsx flaw: overflow in save_buffer\n");
+               exit(67);
+       }
+
+       ret = lseek(fd, (off_t)0, SEEK_SET);
+       if (ret == (off_t)-1)
+               prterr("save_buffer: lseek 0");
+       
+       byteswritten = write(fd, buffer, (size_t)bufferlength);
+       if (byteswritten != bufferlength) {
+               if (byteswritten == -1)
+                       prterr("save_buffer write");
+               else
+                       warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
+                            (unsigned)byteswritten,
+                            (unsigned long long)bufferlength);
+       }
+}
+
+
+void
+report_failure(int status)
+{
+       logdump();
+       
+       if (fsxgoodfd) {
+               if (good_buf) {
+                       save_buffer(good_buf, file_size, fsxgoodfd);
+                       prt("Correct content saved for comparison\n");
+                       prt("(maybe hexdump \"%s\" vs \"%s.fsxgood\")\n",
+                           iname, iname);
+               }
+               close(fsxgoodfd);
+       }
+       sleep(3);   // so the log can flush to disk.  KLUDGEY!
+       exit(status);
+}
+
+#define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
+                                       *(((unsigned char *)(cp)) + 1)))
+
+void
+check_buffers(char *good_buf, char *temp_buf, unsigned offset, unsigned size)
+{
+       unsigned char c, t;
+       unsigned i = 0;
+       unsigned n = 0;
+       unsigned op = 0;
+       unsigned bad = 0;
+
+       if (memcmp(good_buf + offset, temp_buf, size) != 0) {
+               prt("READ BAD DATA: offset = 0x%x, size = 0x%x, fname = %s\n",
+                   offset, size, iname);
+               prt("OFFSET\tGOOD\tBAD\tRANGE\n");
+               while (size > 0) {
+                       c = good_buf[offset];
+                       t = temp_buf[i];
+                       if (c != t) {
+                               if (n < 16) {
+                                       bad = short_at(&temp_buf[i]);
+                                       prt("0x%5x\t0x%04x\t0x%04x", offset,
+                                           short_at(&good_buf[offset]), bad);
+                                       op = temp_buf[offset & 1 ? i+1 : i];
+                                       prt("\t0x%5x\n", n);
+                                       if (op)
+                                               prt("operation# (mod 256) for "
+                                                 "the bad data may be %u\n",
+                                               ((unsigned)op & 0xff));
+                                       else
+                                               prt("operation# (mod 256) for "
+                                                 "the bad data unknown, check"
+                                                 " HOLE and EXTEND ops\n");
+                               }
+                               n++;
+                               badoff = offset;
+                       }
+                       offset++;
+                       i++;
+                       size--;
+               }
+               report_failure(110);
+       }
+}
+
+
+void
+check_size(void)
+{
+       uint64_t size;
+       int ret;
+
+       ret = ops->get_size(&ctx, &size);
+       if (ret < 0)
+               prterrcode("check_size: ops->get_size", ret);
+
+       if ((uint64_t)file_size != size) {
+               prt("Size error: expected 0x%llx stat 0x%llx\n",
+                   (unsigned long long)file_size,
+                   (unsigned long long)size);
+               report_failure(120);
+       }
+}
+
+#define TRUNC_HACK_SIZE        (200ULL << 9)   /* 512-byte aligned for krbd */
+
+void
+check_trunc_hack(void)
+{
+       uint64_t size;
+       int ret;
+
+       ret = ops->resize(&ctx, 0ULL);
+       if (ret < 0)
+               prterrcode("check_trunc_hack: ops->resize pre", ret);
+
+       ret = ops->resize(&ctx, TRUNC_HACK_SIZE);
+       if (ret < 0)
+               prterrcode("check_trunc_hack: ops->resize actual", ret);
+
+       ret = ops->get_size(&ctx, &size);
+       if (ret < 0)
+               prterrcode("check_trunc_hack: ops->get_size", ret);
+
+       if (size != TRUNC_HACK_SIZE) {
+               prt("no extend on truncate! not posix!\n");
+               exit(130);
+       }
+
+       ret = ops->resize(&ctx, 0ULL);
+       if (ret < 0)
+               prterrcode("check_trunc_hack: ops->resize post", ret);
+}
+
+int
+create_image()
+{
+       int r;
+       int order = 0;
+
+       r = rados_create(&cluster, NULL);
+       if (r < 0) {
+               simple_err("Could not create cluster handle", r);
+               return r;
+       }
+       rados_conf_parse_env(cluster, NULL);
+       r = rados_conf_read_file(cluster, NULL);
+       if (r < 0) {
+               simple_err("Error reading ceph config file", r);
+               goto failed_shutdown;
+       }
+       r = rados_connect(cluster);
+       if (r < 0) {
+               simple_err("Error connecting to cluster", r);
+               goto failed_shutdown;
+       }
+       r = krbd_create_from_context(rados_cct(cluster), &krbd);
+       if (r < 0) {
+               simple_err("Could not create libkrbd handle", r);
+               goto failed_shutdown;
+       }
+
+       r = rados_pool_create(cluster, pool);
+       if (r < 0 && r != -EEXIST) {
+               simple_err("Error creating pool", r);
+               goto failed_krbd;
+       }
+       r = rados_ioctx_create(cluster, pool, &ioctx);
+       if (r < 0) {
+               simple_err("Error creating ioctx", r);
+               goto failed_krbd;
+       }
+       if (clone_calls) {
+               r = rbd_create2(ioctx, iname, 0, RBD_FEATURE_LAYERING, &order);
+       } else {
+               r = rbd_create(ioctx, iname, 0, &order);
+       }
+       if (r < 0) {
+               simple_err("Error creating image", r);
+               goto failed_open;
+       }
+
+       return 0;
+
+ failed_open:
+       rados_ioctx_destroy(ioctx);
+ failed_krbd:
+       krbd_destroy(krbd);
+ failed_shutdown:
+       rados_shutdown(cluster);
+       return r;
+}
+
+void
+doflush(unsigned offset, unsigned size)
+{
+       int ret;
+
+       if (o_direct)
+               return;
+
+       ret = ops->flush(&ctx);
+       if (ret < 0)
+               prterrcode("doflush: ops->flush", ret);
+}
+
+void
+doread(unsigned offset, unsigned size)
+{
+       int ret;
+
+       offset -= offset % readbdy;
+       if (o_direct)
+               size -= size % readbdy;
+       if (size == 0) {
+               if (!quiet && testcalls > simulatedopcount && !o_direct)
+                       prt("skipping zero size read\n");
+               log4(OP_SKIPPED, OP_READ, offset, size);
+               return;
+       }
+       if (size + offset > file_size) {
+               if (!quiet && testcalls > simulatedopcount)
+                       prt("skipping seek/read past end of file\n");
+               log4(OP_SKIPPED, OP_READ, offset, size);
+               return;
+       }
+
+       log4(OP_READ, offset, size, 0);
+
+       if (testcalls <= simulatedopcount)
+               return;
+
+       if (!quiet &&
+               ((progressinterval && testcalls % progressinterval == 0)  ||
+               (debug &&
+                      (monitorstart == -1 ||
+                       (offset + size > monitorstart &&
+                       (monitorend == -1 || offset <= monitorend))))))
+               prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
+                   offset, offset + size - 1, size);
+
+       ret = ops->read(&ctx, offset, size, temp_buf);
+       if (ret != (int)size) {
+               if (ret < 0)
+                       prterrcode("doread: ops->read", ret);
+               else
+                       prt("short read: 0x%x bytes instead of 0x%x\n",
+                           ret, size);
+               report_failure(141);
+       }
+
+       check_buffers(good_buf, temp_buf, offset, size);
+}
+
+
+void
+check_eofpage(char *s, unsigned offset, char *p, int size)
+{
+       unsigned long last_page, should_be_zero;
+
+       if (offset + size <= (file_size & ~page_mask))
+               return;
+       /*
+        * we landed in the last page of the file
+        * test to make sure the VM system provided 0's 
+        * beyond the true end of the file mapping
+        * (as required by mmap def in 1996 posix 1003.1)
+        */
+       last_page = ((unsigned long)p + (offset & page_mask) + size) & ~page_mask;
+
+       for (should_be_zero = last_page + (file_size & page_mask);
+            should_be_zero < last_page + page_size;
+            should_be_zero++)
+               if (*(char *)should_be_zero) {
+                       prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
+                           s, file_size - 1, should_be_zero & page_mask,
+                           short_at(should_be_zero));
+                       report_failure(205);
+               }
+}
+
+
+void
+gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
+{
+       while (size--) {
+               good_buf[offset] = testcalls % 256; 
+               if (offset % 2)
+                       good_buf[offset] += original_buf[offset];
+               offset++;
+       }
+}
+
+
+void
+dowrite(unsigned offset, unsigned size)
+{
+       ssize_t ret;
+       off_t newsize;
+
+       offset -= offset % writebdy;
+       if (o_direct)
+               size -= size % writebdy;
+       if (size == 0) {
+               if (!quiet && testcalls > simulatedopcount && !o_direct)
+                       prt("skipping zero size write\n");
+               log4(OP_SKIPPED, OP_WRITE, offset, size);
+               return;
+       }
+
+       log4(OP_WRITE, offset, size, file_size);
+
+       gendata(original_buf, good_buf, offset, size);
+       if (file_size < offset + size) {
+               newsize = ceil(((double)offset + size) / truncbdy) * truncbdy;
+               if (file_size < newsize)
+                       memset(good_buf + file_size, '\0', newsize - file_size);
+               file_size = newsize;
+               if (lite) {
+                       warn("Lite file size bug in fsx!");
+                       report_failure(149);
+               }
+               ret = ops->resize(&ctx, newsize);
+               if (ret < 0) {
+                       prterrcode("dowrite: ops->resize", ret);
+                       report_failure(150);
+               }
+       }
+
+       if (testcalls <= simulatedopcount)
+               return;
+
+       if (!quiet &&
+               ((progressinterval && testcalls % progressinterval == 0) ||
+                      (debug &&
+                      (monitorstart == -1 ||
+                       (offset + size > monitorstart &&
+                       (monitorend == -1 || offset <= monitorend))))))
+               prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
+                   offset, offset + size - 1, size);
+
+       ret = ops->write(&ctx, offset, size, good_buf + offset);
+       if (ret != size) {
+               if (ret < 0)
+                       prterrcode("dowrite: ops->write", ret);
+               else
+                       prt("short write: 0x%x bytes instead of 0x%x\n",
+                           ret, size);
+               report_failure(151);
+       }
+
+       if (flush)
+               doflush(offset, size);
+}
+
+
+void
+dotruncate(unsigned size)
+{
+       int oldsize = file_size;
+       int ret;
+
+       size -= size % truncbdy;
+       if (size > biggest) {
+               biggest = size;
+               if (!quiet && testcalls > simulatedopcount)
+                       prt("truncating to largest ever: 0x%x\n", size);
+       }
+
+       log4(OP_TRUNCATE, size, (unsigned)file_size, 0);
+
+       if (size > file_size)
+               memset(good_buf + file_size, '\0', size - file_size);
+       else if (size < file_size)
+               memset(good_buf + size, '\0', file_size - size);
+       file_size = size;
+
+       if (testcalls <= simulatedopcount)
+               return;
+
+       if ((progressinterval && testcalls % progressinterval == 0) ||
+           (debug && (monitorstart == -1 || monitorend == -1 ||
+                     size <= monitorend)))
+               prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
+
+       ret = ops->resize(&ctx, size);
+       if (ret < 0) {
+               prterrcode("dotruncate: ops->resize", ret);
+               report_failure(160);
+       }
+}
+
+void
+do_punch_hole(unsigned offset, unsigned length)
+{
+       unsigned end_offset;
+       int max_offset = 0;
+       int max_len = 0;
+       int ret;
+
+       offset -= offset % holebdy;
+       length -= length % holebdy;
+       if (length == 0) {
+               if (!quiet && testcalls > simulatedopcount)
+                       prt("skipping zero length punch hole\n");
+               log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
+               return;
+       }
+
+       if (file_size <= (loff_t)offset) {
+               if (!quiet && testcalls > simulatedopcount)
+                       prt("skipping hole punch off the end of the file\n");
+               log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
+               return;
+       }
+
+       end_offset = offset + length;
+
+       log4(OP_PUNCH_HOLE, offset, length, 0);
+
+       if (testcalls <= simulatedopcount)
+               return;
+
+       if ((progressinterval && testcalls % progressinterval == 0) ||
+           (debug && (monitorstart == -1 || monitorend == -1 ||
+                     end_offset <= monitorend))) {
+               prt("%lu punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
+                       offset, offset+length, length);
+       }
+
+       ret = ops->discard(&ctx, (unsigned long long)offset,
+                          (unsigned long long)length);
+       if (ret < 0) {
+               prterrcode("do_punch_hole: ops->discard", ret);
+               report_failure(161);
+       }
+
+       max_offset = offset < file_size ? offset : file_size;
+       max_len = max_offset + length <= file_size ? length :
+                       file_size - max_offset;
+       memset(good_buf + max_offset, '\0', max_len);
+}
+
+void clone_filename(char *buf, size_t len, int clones)
+{
+       snprintf(buf, len, "%s/fsx-%s-parent%d",
+                dirpath, iname, clones);
+}
+
+void clone_imagename(char *buf, size_t len, int clones)
+{
+       if (clones > 0)
+               snprintf(buf, len, "%s-clone%d", iname, clones);
+       else
+               strncpy(buf, iname, len);
+}
+
+void check_clone(int clonenum);
+
+void
+do_clone()
+{
+       char filename[1024];
+       char imagename[1024];
+       char lastimagename[1024];
+       int ret, fd;
+       int order = 0, stripe_unit = 0, stripe_count = 0;
+       uint64_t newsize = file_size;
+
+       log4(OP_CLONE, 0, 0, 0);
+       ++num_clones;
+
+       if (randomize_striping) {
+               order = 18 + get_random() % 8;
+               stripe_unit = 1ull << (order - 1 - (get_random() % 8));
+               stripe_count = 2 + get_random() % 14;
+       }
+
+       prt("%lu clone\t%d order %d su %d sc %d\n", testcalls, num_clones,
+           order, stripe_unit, stripe_count);
+
+       clone_imagename(imagename, sizeof(imagename), num_clones);
+       clone_imagename(lastimagename, sizeof(lastimagename),
+                       num_clones - 1);
+       assert(strcmp(lastimagename, ctx.name) == 0);
+
+       ret = ops->clone(&ctx, "snap", imagename, &order, stripe_unit,
+                        stripe_count);
+       if (ret < 0) {
+               prterrcode("do_clone: ops->clone", ret);
+               exit(165);
+       }
+
+       if (randomize_parent_overlap && rbd_image_has_parent(&ctx)) {
+               int rand = get_random() % 16 + 1; // [1..16]
+
+               if (rand < 13) {
+                       uint64_t overlap;
+
+                       ret = rbd_get_overlap(ctx.image, &overlap);
+                       if (ret < 0) {
+                               prterrcode("do_clone: rbd_get_overlap", ret);
+                               exit(1);
+                       }
+
+                       if (rand < 10) {        // 9/16
+                               newsize = overlap * ((double)rand / 10);
+                               newsize -= newsize % truncbdy;
+                       } else {                // 3/16
+                               newsize = 0;
+                       }
+
+                       assert(newsize != (uint64_t)file_size);
+                       prt("truncating image %s from 0x%llx (overlap 0x%llx) to 0x%llx\n",
+                           ctx.name, file_size, overlap, newsize);
+
+                       ret = ops->resize(&ctx, newsize);
+                       if (ret < 0) {
+                               prterrcode("do_clone: ops->resize", ret);
+                               exit(1);
+                       }
+               } else if (rand < 15) {         // 2/16
+                       prt("flattening image %s\n", ctx.name);
+
+                       ret = ops->flatten(&ctx);
+                       if (ret < 0) {
+                               prterrcode("do_clone: ops->flatten", ret);
+                               exit(1);
+                       }
+               } else {                        // 2/16
+                       prt("leaving image %s intact\n", ctx.name);
+               }
+       }
+
+       clone_filename(filename, sizeof(filename), num_clones);
+       if ((fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0) {
+               simple_err("do_clone: open", -errno);
+               exit(162);
+       }
+       save_buffer(good_buf, newsize, fd);
+       if ((ret = close(fd)) < 0) {
+               simple_err("do_clone: close", -errno);
+               exit(163);
+       }
+
+       /*
+        * Close parent.
+        */
+       if ((ret = ops->close(&ctx)) < 0) {
+               prterrcode("do_clone: ops->close", ret);
+               exit(174);
+       }
+
+       /*
+        * Open freshly made clone.
+        */
+       if ((ret = ops->open(imagename, &ctx)) < 0) {
+               prterrcode("do_clone: ops->open", ret);
+               exit(166);
+       }
+
+       if (num_clones > 1)
+               check_clone(num_clones - 2);
+}
+
+void
+check_clone(int clonenum)
+{
+       char filename[128];
+       char imagename[128];
+       int ret, fd;
+       struct rbd_ctx cur_ctx = RBD_CTX_INIT;
+       struct stat file_info;
+       char *good_buf, *temp_buf;
+
+       clone_imagename(imagename, sizeof(imagename), clonenum);
+       if ((ret = ops->open(imagename, &cur_ctx)) < 0) {
+               prterrcode("check_clone: ops->open", ret);
+               exit(167);
+       }
+
+       clone_filename(filename, sizeof(filename), clonenum + 1);
+       if ((fd = open(filename, O_RDONLY)) < 0) {
+               simple_err("check_clone: open", -errno);
+               exit(168);
+       }
+
+       prt("checking clone #%d, image %s against file %s\n",
+           clonenum, imagename, filename);
+       if ((ret = fstat(fd, &file_info)) < 0) {
+               simple_err("check_clone: fstat", -errno);
+               exit(169);
+       }
+
+       good_buf = NULL;
+       ret = posix_memalign((void **)&good_buf,
+                            MAX(writebdy, (int)sizeof(void *)),
+                            file_info.st_size);
+       if (ret > 0) {
+               prterrcode("check_clone: posix_memalign(good_buf)", -ret);
+               exit(96);
+       }
+
+       temp_buf = NULL;
+       ret = posix_memalign((void **)&temp_buf,
+                            MAX(readbdy, (int)sizeof(void *)),
+                            file_info.st_size);
+       if (ret > 0) {
+               prterrcode("check_clone: posix_memalign(temp_buf)", -ret);
+               exit(97);
+       }
+
+       if ((ret = pread(fd, good_buf, file_info.st_size, 0)) < 0) {
+               simple_err("check_clone: pread", -errno);
+               exit(170);
+       }
+       if ((ret = ops->read(&cur_ctx, 0, file_info.st_size, temp_buf)) < 0) {
+               prterrcode("check_clone: ops->read", ret);
+               exit(171);
+       }
+       close(fd);
+       if ((ret = ops->close(&cur_ctx)) < 0) {
+               prterrcode("check_clone: ops->close", ret);
+               exit(174);
+       }
+       check_buffers(good_buf, temp_buf, 0, file_info.st_size);
+
+       unlink(filename);
+
+       free(good_buf);
+       free(temp_buf);
+}
+
+void
+writefileimage()
+{
+       ssize_t ret;
+
+       ret = ops->write(&ctx, 0, file_size, good_buf);
+       if (ret != file_size) {
+               if (ret < 0)
+                       prterrcode("writefileimage: ops->write", ret);
+               else
+                       prt("short write: 0x%x bytes instead of 0x%llx\n",
+                           ret, (unsigned long long)file_size);
+               report_failure(172);
+       }
+
+       if (!lite) {
+               ret = ops->resize(&ctx, file_size);
+               if (ret < 0) {
+                       prterrcode("writefileimage: ops->resize", ret);
+                       report_failure(173);
+               }
+       }
+}
+
+void
+do_flatten()
+{
+       int ret;
+
+       if (!rbd_image_has_parent(&ctx)) {
+               log4(OP_SKIPPED, OP_FLATTEN, 0, 0);
+               return;
+       }
+       log4(OP_FLATTEN, 0, 0, 0);
+       prt("%lu flatten\n", testcalls);
+
+       ret = ops->flatten(&ctx);
+       if (ret < 0) {
+               prterrcode("writefileimage: ops->flatten", ret);
+               exit(177);
+       }
+}
+
+void
+docloseopen(void)
+{
+       char *name;
+       int ret;
+
+       if (testcalls <= simulatedopcount)
+               return;
+
+       name = strdup(ctx.name);
+
+       if (debug)
+               prt("%lu close/open\n", testcalls);
+
+       ret = ops->close(&ctx);
+       if (ret < 0) {
+               prterrcode("docloseopen: ops->close", ret);
+               report_failure(180);
+       }
+
+       ret = ops->open(name, &ctx);
+       if (ret < 0) {
+               prterrcode("docloseopen: ops->open", ret);
+               report_failure(181);
+       }
+
+       free(name);
+}
+
+#define TRIM_OFF_LEN(off, len, size)   \
+do {                                   \
+       if (size)                       \
+               (off) %= (size);        \
+       else                            \
+               (off) = 0;              \
+       if ((unsigned)(off) + (unsigned)(len) > (unsigned)(size))       \
+               (len) = (size) - (off); \
+} while (0)
+
+void
+test(void)
+{
+       unsigned long   offset;
+       unsigned long   size = maxoplen;
+       unsigned long   rv = get_random();
+       unsigned long   op;
+
+       if (simulatedopcount > 0 && testcalls == simulatedopcount)
+               writefileimage();
+
+       testcalls++;
+
+       if (closeprob)
+               closeopen = (rv >> 3) < (1u << 28) / (unsigned)closeprob;
+
+       if (debugstart > 0 && testcalls >= debugstart)
+               debug = 1;
+
+       if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
+               prt("%lu...\n", testcalls);
+
+       offset = get_random();
+       if (randomoplen)
+               size = get_random() % (maxoplen + 1);
+
+       /* calculate appropriate op to run */
+       if (lite)
+               op = rv % OP_MAX_LITE;
+       else
+               op = rv % OP_MAX_FULL;
+
+       switch (op) {
+       case OP_MAPREAD:
+               if (!mapped_reads)
+                       op = OP_READ;
+               break;
+       case OP_MAPWRITE:
+               if (!mapped_writes)
+                       op = OP_WRITE;
+               break;
+       case OP_FALLOCATE:
+               if (!fallocate_calls) {
+                       log4(OP_SKIPPED, OP_FALLOCATE, offset, size);
+                       goto out;
+               }
+               break;
+       case OP_PUNCH_HOLE:
+               if (!punch_hole_calls) {
+                       log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, size);
+                       goto out;
+               }
+               break;
+       case OP_CLONE:
+               /* clone, 8% chance */
+               if (!clone_calls || file_size == 0 || get_random() % 100 >= 8) {
+                       log4(OP_SKIPPED, OP_CLONE, 0, 0);
+                       goto out;
+               }
+               break;
+       case OP_FLATTEN:
+               /* flatten four times as rarely as clone, 2% chance */
+               if (get_random() % 100 >= 2) {
+                       log4(OP_SKIPPED, OP_FLATTEN, 0, 0);
+                       goto out;
+               }
+               break;
+       }
+
+       switch (op) {
+       case OP_READ:
+               TRIM_OFF_LEN(offset, size, file_size);
+               doread(offset, size);
+               break;
+
+       case OP_WRITE:
+               TRIM_OFF_LEN(offset, size, maxfilelen);
+               dowrite(offset, size);
+               break;
+
+       case OP_MAPREAD:
+               TRIM_OFF_LEN(offset, size, file_size);
+               exit(183);
+               break;
+
+       case OP_MAPWRITE:
+               TRIM_OFF_LEN(offset, size, maxfilelen);
+               exit(182);
+               break;
+
+       case OP_TRUNCATE:
+               if (!style)
+                       size = get_random() % maxfilelen;
+               dotruncate(size);
+               break;
+
+       case OP_PUNCH_HOLE:
+               TRIM_OFF_LEN(offset, size, file_size);
+               do_punch_hole(offset, size);
+               break;
+
+       case OP_CLONE:
+               do_clone();
+               break;
+
+       case OP_FLATTEN:
+               do_flatten();
+               break;
+
+       default:
+               prterr("test: unknown operation");
+               report_failure(42);
+               break;
+       }
+
+out:
+       if (sizechecks && testcalls > simulatedopcount)
+               check_size();
+       if (closeopen)
+               docloseopen();
+}
+
+
+void
+cleanup(int sig)
+{
+       if (sig)
+               prt("signal %d\n", sig);
+       prt("testcalls = %lu\n", testcalls);
+       exit(sig);
+}
+
+
+void
+usage(void)
+{
+       fprintf(stdout, "usage: %s",
+               "fsx [-dfnqxyACFHKLORUWZ] [-b opnum] [-c Prob] [-h holebdy] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] pname iname\n\
+       -b opnum: beginning operation number (default 1)\n\
+       -c P: 1 in P chance of file close+open at each op (default infinity)\n\
+       -d: debug output for all operations\n\
+       -f: flush and invalidate cache after I/O\n\
+       -h holebdy: 4096 would make discards page aligned (default 1)\n\
+       -l flen: the upper bound on file size (default 262144)\n\
+       -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
+       -n: no verifications of file size\n\
+       -o oplen: the upper bound on operation size (default 65536)\n\
+       -p progressinterval: debug output at specified operation interval\n\
+       -q: quieter operation\n\
+       -r readbdy: 4096 would make reads page aligned (default 1)\n\
+       -s style: 1 gives smaller truncates (default 0)\n\
+       -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
+       -w writebdy: 4096 would make writes page aligned (default 1)\n\
+       -x: preallocate file space before starting, XFS only (default 0)\n\
+       -y: synchronize changes to a file\n"
+
+#ifdef AIO
+"      -A: Use the AIO system calls\n"
+#endif
+"      -C: do not use clone calls\n\
+       -D startingop: debug output starting at specified operation\n"
+#ifdef FALLOCATE
+"      -F: Do not use fallocate (preallocation) calls\n"
+#endif
+"      -H: do not use punch hole calls\n\
+       -K: enable krbd mode (use -t and -h too)\n\
+       -L: fsxLite - no file creations & no file size changes\n\
+       -N numops: total # operations to do (default infinity)\n\
+       -O: use oplen (see -o flag) for every op (default random)\n\
+       -P dirpath: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
+       -R: read() system calls only (mapped reads disabled)\n\
+       -S seed: for random # generator (default 1) 0 gets timestamp\n\
+       -U: disable randomized striping\n\
+       -W: mapped write operations DISabled\n\
+       -Z: O_DIRECT (use -R, -W, -r and -w too)\n\
+       poolname: this is REQUIRED (no default)\n\
+       imagename: this is REQUIRED (no default)\n");
+       exit(89);
+}
+
+
+int
+getnum(char *s, char **e)
+{
+       int ret;
+
+       *e = (char *) 0;
+       ret = strtol(s, e, 0);
+       if (*e)
+               switch (**e) {
+               case 'b':
+               case 'B':
+                       ret *= 512;
+                       *e = *e + 1;
+                       break;
+               case 'k':
+               case 'K':
+                       ret *= 1024;
+                       *e = *e + 1;
+                       break;
+               case 'm':
+               case 'M':
+                       ret *= 1024*1024;
+                       *e = *e + 1;
+                       break;
+               case 'w':
+               case 'W':
+                       ret *= 4;
+                       *e = *e + 1;
+                       break;
+               }
+       return (ret);
+}
+
+#ifdef AIO
+
+#define QSZ     1024
+io_context_t   io_ctx;
+struct iocb    iocb;
+
+int aio_setup()
+{
+       int ret;
+       ret = io_queue_init(QSZ, &io_ctx);
+       if (ret != 0) {
+               fprintf(stderr, "aio_setup: io_queue_init failed: %s\n",
+                        strerror(ret));
+                return(-1);
+        }
+        return(0);
+}
+
+int
+__aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+{
+       struct io_event event;
+       static struct timespec ts;
+       struct iocb *iocbs[] = { &iocb };
+       int ret;
+       long res;
+
+       if (rw == READ) {
+               io_prep_pread(&iocb, fd, buf, len, offset);
+       } else {
+               io_prep_pwrite(&iocb, fd, buf, len, offset);
+       }
+
+       ts.tv_sec = 30;
+       ts.tv_nsec = 0;
+       ret = io_submit(io_ctx, 1, iocbs);
+       if (ret != 1) {
+               fprintf(stderr, "errcode=%d\n", ret);
+               fprintf(stderr, "aio_rw: io_submit failed: %s\n",
+                               strerror(ret));
+               goto out_error;
+       }
+
+       ret = io_getevents(io_ctx, 1, 1, &event, &ts);
+       if (ret != 1) {
+               if (ret == 0)
+                       fprintf(stderr, "aio_rw: no events available\n");
+               else {
+                       fprintf(stderr, "errcode=%d\n", -ret);
+                       fprintf(stderr, "aio_rw: io_getevents failed: %s\n",
+                                       strerror(-ret));
+               }
+               goto out_error;
+       }
+       if (len != event.res) {
+               /*
+                * The b0rked libaio defines event.res as unsigned.
+                * However the kernel strucuture has it signed,
+                * and it's used to pass negated error value.
+                * Till the library is fixed use the temp var.
+                */
+               res = (long)event.res;
+               if (res >= 0)
+                       fprintf(stderr, "bad io length: %lu instead of %u\n",
+                                       res, len);
+               else {
+                       fprintf(stderr, "errcode=%ld\n", -res);
+                       fprintf(stderr, "aio_rw: async io failed: %s\n",
+                                       strerror(-res));
+                       ret = res;
+                       goto out_error;
+               }
+
+       }
+       return event.res;
+
+out_error:
+       /*
+        * The caller expects error return in traditional libc
+        * convention, i.e. -1 and the errno set to error.
+        */
+       errno = -ret;
+       return -1;
+}
+
+int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+{
+       int ret;
+
+       if (aio) {
+               ret = __aio_rw(rw, fd, buf, len, offset);
+       } else {
+               if (rw == READ)
+                       ret = read(fd, buf, len);
+               else
+                       ret = write(fd, buf, len);
+       }
+       return ret;
+}
+
+#endif
+
+void
+test_fallocate()
+{
+#ifdef FALLOCATE
+       if (!lite && fallocate_calls) {
+               if (fallocate(fd, 0, 0, 1) && errno == EOPNOTSUPP) {
+                       if(!quiet)
+                               warn("main: filesystem does not support fallocate, disabling\n");
+                       fallocate_calls = 0;
+               } else {
+                       ftruncate(fd, 0);
+               }
+       }
+#else /* ! FALLOCATE */
+       fallocate_calls = 0;
+#endif
+
+}
+
+int
+main(int argc, char **argv)
+{
+       int     i, style, ch, ret;
+       char    *endp;
+       char goodfile[1024];
+       char logfile[1024];
+
+       goodfile[0] = 0;
+       logfile[0] = 0;
+
+       page_size = getpagesize();
+       page_mask = page_size - 1;
+       mmap_mask = page_mask;
+
+       setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
+
+       while ((ch = getopt(argc, argv, "b:c:dfh:l:m:no:p:qr:s:t:w:xyACD:FHKLN:OP:RS:UWZ"))
+              != EOF)
+               switch (ch) {
+               case 'b':
+                       simulatedopcount = getnum(optarg, &endp);
+                       if (!quiet)
+                               fprintf(stdout, "Will begin at operation %ld\n",
+                                       simulatedopcount);
+                       if (simulatedopcount == 0)
+                               usage();
+                       simulatedopcount -= 1;
+                       break;
+               case 'c':
+                       closeprob = getnum(optarg, &endp);
+                       if (!quiet)
+                               fprintf(stdout,
+                                       "Chance of close/open is 1 in %d\n",
+                                       closeprob);
+                       if (closeprob <= 0)
+                               usage();
+                       break;
+               case 'd':
+                       debug = 1;
+                       break;
+               case 'f':
+                       flush = 1;
+                       break;
+               case 'h':
+                       holebdy = getnum(optarg, &endp);
+                       if (holebdy <= 0)
+                               usage();
+                       break;
+               case 'l':
+                       maxfilelen = getnum(optarg, &endp);
+                       if (maxfilelen <= 0)
+                               usage();
+                       break;
+               case 'm':
+                       monitorstart = getnum(optarg, &endp);
+                       if (monitorstart < 0)
+                               usage();
+                       if (!endp || *endp++ != ':')
+                               usage();
+                       monitorend = getnum(endp, &endp);
+                       if (monitorend < 0)
+                               usage();
+                       if (monitorend == 0)
+                               monitorend = -1; /* aka infinity */
+                       debug = 1;
+                       break;
+               case 'n':
+                       sizechecks = 0;
+                       break;
+               case 'o':
+                       maxoplen = getnum(optarg, &endp);
+                       if (maxoplen <= 0)
+                               usage();
+                       break;
+               case 'p':
+                       progressinterval = getnum(optarg, &endp);
+                       if (progressinterval == 0)
+                               usage();
+                       break;
+               case 'q':
+                       quiet = 1;
+                       break;
+               case 'r':
+                       readbdy = getnum(optarg, &endp);
+                       if (readbdy <= 0)
+                               usage();
+                       break;
+               case 's':
+                       style = getnum(optarg, &endp);
+                       if (style < 0 || style > 1)
+                               usage();
+                       break;
+               case 't':
+                       truncbdy = getnum(optarg, &endp);
+                       if (truncbdy <= 0)
+                               usage();
+                       break;
+               case 'w':
+                       writebdy = getnum(optarg, &endp);
+                       if (writebdy <= 0)
+                               usage();
+                       break;
+               case 'x':
+                       prealloc = 1;
+                       break;
+               case 'y':
+                       do_fsync = 1;
+                       break;
+               case 'A':
+                       aio = 1;
+                       break;
+               case 'C':
+                       clone_calls = 0;
+                       break;
+               case 'D':
+                       debugstart = getnum(optarg, &endp);
+                       if (debugstart < 1)
+                               usage();
+                       break;
+               case 'F':
+                       fallocate_calls = 0;
+                       break;
+               case 'H':
+                       punch_hole_calls = 0;
+                       break;
+               case 'K':
+                       prt("krbd mode enabled\n");
+                       ops = &krbd_operations;
+                       break;
+               case 'L':
+                       prt("lite mode not supported for rbd\n");
+                       exit(1);
+                       break;
+               case 'N':
+                       numops = getnum(optarg, &endp);
+                       if (numops < 0)
+                               usage();
+                       break;
+               case 'O':
+                       randomoplen = 0;
+                       break;
+               case 'P':
+                       strncpy(dirpath, optarg, sizeof(dirpath)-1);
+                       dirpath[sizeof(dirpath)-1] = '\0';
+                       strncpy(goodfile, dirpath, sizeof(goodfile)-1);
+                       goodfile[sizeof(goodfile)-1] = '\0';
+                       if (strlen(goodfile) < sizeof(goodfile)-2) {
+                               strcat(goodfile, "/");
+                       } else {
+                               prt("file name to long\n");
+                               exit(1);
+                       }
+                       strncpy(logfile, dirpath, sizeof(logfile));
+                       if (strlen(logfile) < sizeof(logfile)-2) {
+                               strcat(logfile, "/");
+                       } else {
+                               prt("file path to long\n");
+                               exit(1);
+                       }
+                       break;
+                case 'R':
+                        mapped_reads = 0;
+                       if (!quiet)
+                               fprintf(stdout, "mapped reads DISABLED\n");
+                        break;
+               case 'S':
+                        seed = getnum(optarg, &endp);
+                       if (seed == 0)
+                               seed = time(0) % 10000;
+                       if (!quiet)
+                               fprintf(stdout, "Seed set to %d\n", seed);
+                       if (seed < 0)
+                               usage();
+                       break;
+               case 'U':
+                       randomize_striping = 0;
+                       break;
+               case 'W':
+                       mapped_writes = 0;
+                       if (!quiet)
+                               fprintf(stdout, "mapped writes DISABLED\n");
+                       break;
+               case 'Z':
+                       o_direct = O_DIRECT;
+                       break;
+               default:
+                       usage();
+                       /* NOTREACHED */
+               }
+       argc -= optind;
+       argv += optind;
+       if (argc != 2)
+               usage();
+       pool = argv[0];
+       iname = argv[1];
+
+       signal(SIGHUP,  cleanup);
+       signal(SIGINT,  cleanup);
+       signal(SIGPIPE, cleanup);
+       signal(SIGALRM, cleanup);
+       signal(SIGTERM, cleanup);
+       signal(SIGXCPU, cleanup);
+       signal(SIGXFSZ, cleanup);
+       signal(SIGVTALRM,       cleanup);
+       signal(SIGUSR1, cleanup);
+       signal(SIGUSR2, cleanup);
+
+       if (initstate_r(seed, rnd_state, RND_STATE_LEN, &rnd_data) < 0) {
+               prterr("initstate_r");
+               exit(1);
+       }
+       if (setstate_r(rnd_state, &rnd_data) < 0) {
+               prterr("setstate_r");
+               exit(1);
+       }
+
+       ret = create_image();
+       if (ret < 0) {
+               prterrcode(iname, ret);
+               exit(90);
+       }
+       ret = ops->open(iname, &ctx);
+       if (ret < 0) {
+               simple_err("Error opening image", ret);
+               exit(91);
+       }
+       if (!dirpath[0])
+               strcat(dirpath, ".");
+       strncat(goodfile, iname, 256);
+       strcat (goodfile, ".fsxgood");
+       fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
+       if (fsxgoodfd < 0) {
+               prterr(goodfile);
+               exit(92);
+       }
+       strncat(logfile, iname, 256);
+       strcat (logfile, ".fsxlog");
+       fsxlogf = fopen(logfile, "w");
+       if (fsxlogf == NULL) {
+               prterr(logfile);
+               exit(93);
+       }
+
+#ifdef AIO
+       if (aio) 
+               aio_setup();
+#endif
+
+       original_buf = (char *) malloc(maxfilelen);
+       for (i = 0; i < (int)maxfilelen; i++)
+               original_buf[i] = get_random() % 256;
+
+       ret = posix_memalign((void **)&good_buf,
+                            MAX(writebdy, (int)sizeof(void *)), maxfilelen);
+       if (ret > 0) {
+               if (ret == EINVAL)
+                       prt("writebdy is not a suitable power of two\n");
+               else
+                       prterrcode("main: posix_memalign(good_buf)", -ret);
+               exit(94);
+       }
+       memset(good_buf, '\0', maxfilelen);
+
+       ret = posix_memalign((void **)&temp_buf,
+                            MAX(readbdy, (int)sizeof(void *)), maxfilelen);
+       if (ret > 0) {
+               if (ret == EINVAL)
+                       prt("readbdy is not a suitable power of two\n");
+               else
+                       prterrcode("main: posix_memalign(temp_buf)", -ret);
+               exit(95);
+       }
+       memset(temp_buf, '\0', maxfilelen);
+
+       if (lite) {     /* zero entire existing file */
+               ssize_t written;
+
+               written = ops->write(&ctx, 0, (size_t)maxfilelen, good_buf);
+               if (written != (ssize_t)maxfilelen) {
+                       if (written < 0) {
+                               prterrcode(iname, written);
+                               warn("main: error on write");
+                       } else
+                               warn("main: short write, 0x%x bytes instead "
+                                       "of 0x%lx\n",
+                                       (unsigned)written,
+                                       maxfilelen);
+                       exit(98);
+               }
+       } else
+               check_trunc_hack();
+
+       //test_fallocate();
+
+       while (numops == -1 || numops--)
+               test();
+
+       ret = ops->close(&ctx);
+       if (ret < 0) {
+               prterrcode("ops->close", ret);
+               report_failure(99);
+       }
+
+       if (num_clones > 0)
+               check_clone(num_clones - 1);
+
+       while (num_clones >= 0) {
+               static int first = 1;
+               rbd_image_t image;
+               char clonename[128];
+               char errmsg[128];
+
+               clone_imagename(clonename, 128, num_clones);
+               if ((ret = rbd_open(ioctx, clonename, &image, NULL)) < 0) {
+                       sprintf(errmsg, "rbd_open %s", clonename);
+                       prterrcode(errmsg, ret);
+                       report_failure(101);
+               }
+               if (!first) {
+                       if ((ret = rbd_snap_unprotect(image, "snap")) < 0) {
+                               sprintf(errmsg, "rbd_snap_unprotect %s@snap",
+                                       clonename);
+                               prterrcode(errmsg, ret);
+                               report_failure(102);
+                       }
+                       if ((ret = rbd_snap_remove(image, "snap")) < 0) {
+                               sprintf(errmsg, "rbd_snap_remove %s@snap",
+                                       clonename);
+                               prterrcode(errmsg, ret);
+                               report_failure(103);
+                       }
+               }
+               if ((ret = rbd_close(image)) < 0) {
+                       sprintf(errmsg, "rbd_close %s", clonename);
+                       prterrcode(errmsg, ret);
+                       report_failure(104);
+               }
+
+               if ((ret = rbd_remove(ioctx, clonename)) < 0) {
+                       sprintf(errmsg, "rbd_remove %s", clonename);
+                       prterrcode(errmsg, ret);
+                       report_failure(105);
+               }
+
+               first = 0;
+               num_clones--;
+       }
+
+       prt("All operations completed A-OK!\n");
+       fclose(fsxlogf);
+
+       rados_ioctx_destroy(ioctx);
+       krbd_destroy(krbd);
+       rados_shutdown(cluster);
+
+        ceph::crypto::shutdown();
+       free(original_buf);
+       free(good_buf);
+       free(temp_buf);
+
+       exit(0);
+       return 0;
+}