-# aclocal.m4 generated automatically by aclocal 1.6.3 -*- Autoconf -*-
+# generated automatically by aclocal 1.8.3 -*- Autoconf -*-
-# Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
# Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
-#
-# Generic macro, sets up all of the global packaging variables.
-# The following environment variables may be set to override defaults:
-# DEBUG OPTIMIZER MALLOCLIB PLATFORM DISTRIBUTION INSTALL_USER INSTALL_GROUP
-# BUILD_VERSION
-#
-AC_DEFUN([AC_PACKAGE_GLOBALS],
- [ pkg_name="$1"
- AC_SUBST(pkg_name)
-
- . ./VERSION
- pkg_version=${PKG_MAJOR}.${PKG_MINOR}.${PKG_REVISION}
- AC_SUBST(pkg_version)
- pkg_release=$PKG_BUILD
- test -z "$BUILD_VERSION" || pkg_release="$BUILD_VERSION"
- AC_SUBST(pkg_release)
-
- DEBUG=${DEBUG:-'-DDEBUG'} dnl -DNDEBUG
- debug_build="$DEBUG"
- AC_SUBST(debug_build)
-
- OPTIMIZER=${OPTIMIZER:-'-g'} dnl -O2
- opt_build="$OPTIMIZER"
- AC_SUBST(opt_build)
-
- MALLOCLIB=${MALLOCLIB:-''} dnl /usr/lib/libefence.a
- malloc_lib="$MALLOCLIB"
- AC_SUBST(malloc_lib)
-
- PKG_USER=${INSTALL_USER:-'root'}
- pkg_user="$PKG_USER"
- AC_SUBST(pkg_user)
-
- PKG_GROUP=${INSTALL_GROUP:-'root'}
- pkg_group="$PKG_GROUP"
- AC_SUBST(pkg_group)
-
- pkg_distribution=`uname -s`
- test -z "$DISTRIBUTION" || pkg_distribution="$DISTRIBUTION"
- AC_SUBST(pkg_distribution)
-
- pkg_platform=`uname -s | tr 'A-Z' 'a-z' | sed -e 's/irix64/irix/'`
- test -z "$PLATFORM" || pkg_platform="$PLATFORM"
- AC_SUBST(pkg_platform)
- ])
-
-#
-# Check for specified utility (env var) - if unset, fail.
-#
-AC_DEFUN([AC_PACKAGE_NEED_UTILITY],
- [ if test -z "$2"; then
- echo
- echo FATAL ERROR: $3 does not seem to be installed.
- echo $1 cannot be built without a working $4 installation.
- exit 1
- fi
- ])
-
-#
-# Generic macro, sets up all of the global build variables.
-# The following environment variables may be set to override defaults:
-# CC MAKE LIBTOOL TAR ZIP MAKEDEPEND AWK SED ECHO SORT
-# MSGFMT MSGMERGE RPM
-#
-AC_DEFUN([AC_PACKAGE_UTILITIES],
- [ AC_PROG_CC
- cc="$CC"
- AC_SUBST(cc)
- AC_PACKAGE_NEED_UTILITY($1, "$cc", cc, [C compiler])
-
- if test -z "$MAKE"; then
- AC_PATH_PROG(MAKE, gmake,, /usr/bin:/usr/freeware/bin)
- fi
- if test -z "$MAKE"; then
- AC_PATH_PROG(MAKE, make,, /usr/bin)
- fi
- make=$MAKE
- AC_SUBST(make)
- AC_PACKAGE_NEED_UTILITY($1, "$make", make, [GNU make])
-
- if test -z "$LIBTOOL"; then
- AC_PATH_PROG(LIBTOOL, glibtool,, /usr/bin)
- fi
- if test -z "$LIBTOOL"; then
- AC_PATH_PROG(LIBTOOL, libtool,, /usr/bin:/usr/local/bin:/usr/freeware/bin)
- fi
- libtool=$LIBTOOL
- AC_SUBST(libtool)
- AC_PACKAGE_NEED_UTILITY($1, "$libtool", libtool, [GNU libtool])
-
- if test -z "$TAR"; then
- AC_PATH_PROG(TAR, tar,, /usr/freeware/bin:/bin:/usr/local/bin:/usr/bin)
- fi
- tar=$TAR
- AC_SUBST(tar)
- if test -z "$ZIP"; then
- AC_PATH_PROG(ZIP, gzip,, /bin:/usr/local/bin:/usr/freeware/bin)
- fi
-
- zip=$ZIP
- AC_SUBST(zip)
-
- if test -z "$MAKEDEPEND"; then
- AC_PATH_PROG(MAKEDEPEND, makedepend, /bin/true)
- fi
- makedepend=$MAKEDEPEND
- AC_SUBST(makedepend)
-
- if test -z "$AWK"; then
- AC_PATH_PROG(AWK, awk,, /bin:/usr/bin)
- fi
- awk=$AWK
- AC_SUBST(awk)
-
- if test -z "$SED"; then
- AC_PATH_PROG(SED, sed,, /bin:/usr/bin)
- fi
- sed=$SED
- AC_SUBST(sed)
-
- if test -z "$ECHO"; then
- AC_PATH_PROG(ECHO, echo,, /bin:/usr/bin)
- fi
- echo=$ECHO
- AC_SUBST(echo)
-
- if test -z "$SORT"; then
- AC_PATH_PROG(SORT, sort,, /bin:/usr/bin)
- fi
- sort=$SORT
- AC_SUBST(sort)
-
- dnl check if symbolic links are supported
- AC_PROG_LN_S
-
- if test "$enable_gettext" = yes; then
- if test -z "$MSGFMT"; then
- AC_PATH_PROG(MSGFMT, msgfmt,, /usr/bin:/usr/freeware/bin)
- fi
- msgfmt=$MSGFMT
- AC_SUBST(msgfmt)
- AC_PACKAGE_NEED_UTILITY($1, "$msgfmt", msgfmt, gettext)
-
- if test -z "$MSGMERGE"; then
- AC_PATH_PROG(MSGMERGE, msgmerge,, /usr/bin:/usr/freeware/bin)
- fi
- msgmerge=$MSGMERGE
- AC_SUBST(msgmerge)
- AC_PACKAGE_NEED_UTILITY($1, "$msgmerge", msgmerge, gettext)
- fi
-
- if test -z "$RPM"; then
- AC_PATH_PROG(RPM, rpm,, /bin:/usr/bin:/usr/freeware/bin)
- fi
- rpm=$RPM
- AC_SUBST(rpm)
-
- dnl .. and what version is rpm
- rpm_version=0
- test -x "$RPM" && rpm_version=`$RPM --version \
- | awk '{print $NF}' | awk -F. '{V=1; print $V}'`
- AC_SUBST(rpm_version)
- dnl At some point in rpm 4.0, rpm can no longer build rpms, and
- dnl rpmbuild is needed (rpmbuild may go way back; not sure)
- dnl So, if rpm version >= 4.0, look for rpmbuild. Otherwise build w/ rpm
- if test $rpm_version -ge 4; then
- AC_PATH_PROG(RPMBUILD, rpmbuild)
- rpmbuild=$RPMBUILD
- else
- rpmbuild=$RPM
- fi
- AC_SUBST(rpmbuild)
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_UUID_H],
- [ AC_CHECK_HEADERS([uuid.h sys/uuid.h uuid/uuid.h])
- if test $ac_cv_header_uuid_h = no -a \
- $ac_cv_header_sys_uuid_h = no -a \
- $ac_cv_header_uuid_uuid_h = no; then
- echo
- echo 'FATAL ERROR: could not find a valid UUID header.'
- echo 'Install the Universally Unique Identifiers development package.'
- exit 1
- fi
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_UUIDCOMPARE],
- [ AC_CHECK_FUNCS(uuid_compare)
- if test $ac_cv_func_uuid_compare = yes; then
- libuuid=""
- else
- AC_CHECK_LIB(uuid, uuid_compare,, [
- echo
- echo 'FATAL ERROR: could not find a valid UUID library.'
- echo 'Install the Universally Unique Identifiers library package.'
- exit 1])
- libuuid="-luuid"
- fi
- AC_SUBST(libuuid)
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_SYS_ACL_H],
- [ AC_CHECK_HEADERS([sys/acl.h])
- if test "$ac_cv_header_sys_acl_h" != "yes"; then
- echo
- echo 'FATAL ERROR: sys/acl.h does not exist.'
- echo 'Install the access control lists (acl) development package.'
- echo 'Alternatively, run "make install-lib" from the acl source.'
- exit 1
- fi
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_ACL_LIBACL_H],
- [ AC_CHECK_HEADERS([acl/libacl.h])
- if test "$ac_cv_header_acl_libacl_h" != "yes"; then
- echo
- echo 'FATAL ERROR: acl/libacl.h does not exist.'
- echo 'Install the access control lists (acl) development package.'
- echo 'Alternatively, run "make install-lib" from the acl source.'
- exit 1
- fi
- ])
-
-
-AC_DEFUN([AC_PACKAGE_NEED_ACLINIT_LIBACL],
- [ AC_CHECK_LIB(acl, acl_init,, [
- echo
- echo 'FATAL ERROR: could not find a valid Access Control List library.'
- echo 'Install either the libacl (rpm) or the libacl1 (deb) package.'
- echo 'Alternatively, run "make install-lib" from the acl source.'
- exit 1
- ])
- libacl="-lacl"
- test -f `pwd`/../acl/libacl/libacl.la && \
- libacl="`pwd`/../acl/libacl/libacl.la"
- test -f /usr/lib/libacl.la && libacl="/usr/lib/libacl.la"
- AC_SUBST(libacl)
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_ATTR_XATTR_H],
- [ AC_CHECK_HEADERS([attr/xattr.h])
- if test "$ac_cv_header_attr_xattr_h" != "yes"; then
- echo
- echo 'FATAL ERROR: attr/xattr.h does not exist.'
- echo 'Install the extended attributes (attr) development package.'
- echo 'Alternatively, run "make install-lib" from the attr source.'
- exit 1
- fi
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_ATTR_ERROR_H],
- [ AC_CHECK_HEADERS([attr/error_context.h])
- if test "$ac_cv_header_attr_error_context_h" != "yes"; then
- echo
- echo 'FATAL ERROR: attr/error_context.h does not exist.'
- echo 'Install the extended attributes (attr) development package.'
- echo 'Alternatively, run "make install-lib" from the attr source.'
- exit 1
- fi
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_ATTRIBUTES_H],
- [ have_attributes_h=false
- AC_CHECK_HEADERS([attr/attributes.h sys/attributes.h], [have_attributes_h=true], )
- if test "$have_attributes_h" = "false"; then
- echo
- echo 'FATAL ERROR: attributes.h does not exist.'
- echo 'Install the extended attributes (attr) development package.'
- echo 'Alternatively, run "make install-lib" from the attr source.'
- exit 1
- fi
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_GETXATTR_LIBATTR],
- [ AC_CHECK_LIB(attr, getxattr,, [
- echo
- echo 'FATAL ERROR: could not find a valid Extended Attributes library.'
- echo 'Install the extended attributes (attr) development package.'
- echo 'Alternatively, run "make install-lib" from the attr source.'
- exit 1
- ])
- libattr="-lattr"
- test -f `pwd`/../attr/libattr/libattr.la && \
- libattr="`pwd`/../attr/libattr/libattr.la"
- test -f /usr/lib/libattr.la && libattr="/usr/lib/libattr.la"
- AC_SUBST(libattr)
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_ATTRGET_LIBATTR],
- [ AC_CHECK_LIB(attr, attr_get,, [
- echo
- echo 'FATAL ERROR: could not find a valid Extended Attributes library.'
- echo 'Install the extended attributes (attr) development package.'
- echo 'Alternatively, run "make install-lib" from the attr source.'
- exit 1
- ])
- libattr="-lattr"
- test -f `pwd`/../attr/libattr/libattr.la && \
- libattr="`pwd`/../attr/libattr/libattr.la"
- test -f /usr/lib/libattr.la && libattr="/usr/lib/libattr.la"
- AC_SUBST(libattr)
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_ATTRIBUTES_MACROS],
- [ AC_MSG_CHECKING([macros in attr/attributes.h])
- AC_TRY_LINK([
-#include <sys/types.h>
-#include <attr/attributes.h>],
- [ int x = ATTR_SECURE; ], [ echo ok ], [
- echo
- echo 'FATAL ERROR: could not find a current attributes header.'
- echo 'Upgrade the extended attributes (attr) development package.'
- echo 'Alternatively, run "make install-dev" from the attr source.'
- exit 1 ])
- ])
-
-AC_DEFUN([AC_PACKAGE_WANT_NDBM],
- [ AC_CHECK_HEADERS(ndbm.h, [ have_db=true ], [ have_db=false ])
- libgdbm=""
- AC_SUBST(libgdbm)
- AC_SUBST(have_db)
- ])
-
-AC_DEFUN([AC_PACKAGE_WANT_GDBM],
- [ AC_CHECK_HEADERS([gdbm/ndbm.h], [ have_db=true ], [ have_db=false ])
- if test $have_db = true -a -f /usr/lib/libgdbm.a; then
- libgdbm="/usr/lib/libgdbm.a"
- fi
- AC_SUBST(libgdbm)
- AC_SUBST(have_db)
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_XFS_LIBXFS_H],
- [ AC_CHECK_HEADERS([xfs/libxfs.h])
- if test "$ac_cv_header_xfs_libxfs_h" != "yes"; then
- echo
- echo 'FATAL ERROR: cannot find a valid <xfs/libxfs.h> header file.'
- echo 'Install or upgrade the XFS development package.'
- echo 'Alternatively, run "make install-dev" from the xfsprogs source.'
- exit 1
- fi
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_XFS_XQM_H],
- [ AC_CHECK_HEADERS([xfs/xqm.h])
- if test "$ac_cv_header_xfs_xqm_h" != "yes"; then
- echo
- echo 'FATAL ERROR: cannot find a valid <xfs/xqm.h> header file.'
- echo 'Install or upgrade the XFS development package.'
- echo 'Alternatively, run "make install-dev" from the xfsprogs source.'
- exit 1
- fi
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_XFS_HANDLE_H],
- [ AC_CHECK_HEADERS([xfs/handle.h])
- if test "$ac_cv_header_xfs_handle_h" != "yes"; then
- echo
- echo 'FATAL ERROR: cannot find a valid <xfs/handle.h> header file.'
- echo 'Install or upgrade the XFS development package.'
- echo 'Alternatively, run "make install-dev" from the xfsprogs source.'
- exit 1
- fi
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_LIBXFSINIT_LIBXFS],
- [ AC_CHECK_LIB(xfs, libxfs_init,, [
- echo
- echo 'FATAL ERROR: could not find a valid XFS base library.'
- echo 'Install or upgrade the XFS library package.'
- echo 'Alternatively, run "make install-dev" from the xfsprogs source.'
- exit 1
- ])
- libxfs="-lxfs"
- test -f `pwd`/../xfsprogs/libxfs/libxfs.la && \
- libxfs="`pwd`/../xfsprogs/libxfs/libxfs.la"
- test -f /usr/lib/libxfs.la && libxfs="/usr/lib/libxfs.la"
- AC_SUBST(libxfs)
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_OPEN_BY_FSHANDLE],
- [ AC_CHECK_LIB(handle, open_by_fshandle,, [
- echo
- echo 'FATAL ERROR: could not find a current XFS handle library.'
- echo 'Install or upgrade the XFS library package.'
- echo 'Alternatively, run "make install-dev" from the xfsprogs source.'
- exit 1
- ])
- libhdl="-lhandle"
- test -f `pwd`/../xfsprogs/libhandle/libhandle.la && \
- libhdl="`pwd`/../xfsprogs/libhandle/libhandle.la"
- test -f /usr/lib/libhandle.la && libhdl="/usr/lib/libhandle.la"
- AC_SUBST(libhdl)
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_ATTRLIST_LIBHANDLE],
- [ AC_CHECK_LIB(handle, attr_list_by_handle,, [
- echo
- echo 'FATAL ERROR: could not find a current XFS handle library.'
- echo 'Install or upgrade the XFS library package.'
- echo 'Alternatively, run "make install-lib" from the xfsprogs source.'
- exit 1
- ])
- libhdl="-lhandle"
- test -f `pwd`/../xfsprogs/libhandle/libhandle.la && \
- libhdl="`pwd`/../xfsprogs/libhandle/libhandle.la"
- test -f /usr/lib/libhandle.la && libhdl="/usr/lib/libhandle.la"
- AC_SUBST(libhdl)
- ])
-
-AC_DEFUN([AC_PACKAGE_NEED_XFSCTL_MACRO],
- [ AC_MSG_CHECKING([xfsctl from xfs/libxfs.h])
- AC_TRY_LINK([#include <xfs/libxfs.h>], [ int x = xfsctl(0, 0, 0, 0); ],
- [ echo ok ],
- [ echo
- echo 'FATAL ERROR: cannot find required macros in the XFS headers.'
- echo 'Upgrade your XFS programs (xfsprogs) development package.'
- echo 'Alternatively, run "make install-dev" from the xfsprogs source.'
- exit 1
- ])
- ])
-
+m4_include([./m4/package_acldev.m4])
+m4_include([./m4/package_aiodev.m4])
+m4_include([./m4/package_attrdev.m4])
+m4_include([./m4/package_gdbmdev.m4])
+m4_include([./m4/package_globals.m4])
+m4_include([./m4/package_utilies.m4])
+m4_include([./m4/package_uuiddev.m4])
+m4_include([./m4/package_xfslibs.m4])
--- /dev/null
+/*
+ * Copyright (c) 2004 SuSE, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ *
+ * aio-stress
+ *
+ * will open or create each file on the command line, and start a series
+ * of aio to it.
+ *
+ * aio is done in a rotating loop. first file1 gets 8 requests, then
+ * file2, then file3 etc. As each file finishes writing, it is switched
+ * to reads
+ *
+ * io buffers are aligned in case you want to do raw io
+ *
+ * compile with gcc -Wall -laio -lpthread -o aio-stress aio-stress.c
+ *
+ * run aio-stress -h to see the options
+ *
+ * Please mail Chris Mason (mason@suse.com) with bug reports or patches
+ */
+#define _FILE_OFFSET_BITS 64
+#define PROG_VERSION "0.18"
+#define NEW_GETEVENTS
+
+#include <stdio.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <libaio.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <pthread.h>
+
+#define IO_FREE 0
+#define IO_PENDING 1
+#define RUN_FOREVER -1
+
+#ifndef O_DIRECT
+#define O_DIRECT 040000 /* direct disk access hint */
+#endif
+
+enum {
+ WRITE,
+ READ,
+ RWRITE,
+ RREAD,
+ LAST_STAGE,
+};
+
+#define USE_MALLOC 0
+#define USE_SHM 1
+#define USE_SHMFS 2
+
+/*
+ * various globals, these are effectively read only by the time the threads
+ * are started
+ */
+long stages = 0;
+unsigned long page_size_mask;
+int o_direct = 0;
+int o_sync = 0;
+int latency_stats = 0;
+int io_iter = 8;
+int iterations = RUN_FOREVER;
+int max_io_submit = 0;
+long rec_len = 64 * 1024;
+int depth = 64;
+int num_threads = 1;
+int num_contexts = 1;
+off_t context_offset = 2 * 1024 * 1024;
+int fsync_stages = 1;
+int use_shm = 0;
+int shm_id;
+char *unaligned_buffer = NULL;
+char *aligned_buffer = NULL;
+int padded_reclen = 0;
+int stonewall = 1;
+int verify = 0;
+char *verify_buf = NULL;
+
+struct io_unit;
+struct thread_info;
+
+/* pthread mutexes and other globals for keeping the threads in sync */
+pthread_cond_t stage_cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t stage_mutex = PTHREAD_MUTEX_INITIALIZER;
+int threads_ending = 0;
+int threads_starting = 0;
+struct timeval global_stage_start_time;
+struct thread_info *global_thread_info;
+
+/*
+ * latencies during io_submit are measured, these are the
+ * granularities for deviations
+ */
+#define DEVIATIONS 6
+int deviations[DEVIATIONS] = { 100, 250, 500, 1000, 5000, 10000 };
+struct io_latency {
+ double max;
+ double min;
+ double total_io;
+ double total_lat;
+ double deviations[DEVIATIONS];
+};
+
+/* container for a series of operations to a file */
+struct io_oper {
+ /* already open file descriptor, valid for whatever operation you want */
+ int fd;
+
+ /* starting byte of the operation */
+ off_t start;
+
+ /* ending byte of the operation */
+ off_t end;
+
+ /* size of the read/write buffer */
+ int reclen;
+
+ /* max number of pending requests before a wait is triggered */
+ int depth;
+
+ /* current number of pending requests */
+ int num_pending;
+
+ /* last error, zero if there were none */
+ int last_err;
+
+ /* total number of errors hit. */
+ int num_err;
+
+ /* read,write, random, etc */
+ int rw;
+
+ /* number of ios that will get sent to aio */
+ int total_ios;
+
+ /* number of ios we've already sent */
+ int started_ios;
+
+ /* last offset used in an io operation */
+ off_t last_offset;
+
+ /* stonewalled = 1 when we got cut off before submitting all our ios */
+ int stonewalled;
+
+ /* list management */
+ struct io_oper *next;
+ struct io_oper *prev;
+
+ struct timeval start_time;
+
+ char *file_name;
+};
+
+/* a single io, and all the tracking needed for it */
+struct io_unit {
+ /* note, iocb must go first! */
+ struct iocb iocb;
+
+ /* pointer to parent io operation struct */
+ struct io_oper *io_oper;
+
+ /* aligned buffer */
+ char *buf;
+
+ /* size of the aligned buffer (record size) */
+ int buf_size;
+
+ /* state of this io unit (free, pending, done) */
+ int busy;
+
+ /* result of last operation */
+ long res;
+
+ struct io_unit *next;
+};
+
+struct thread_info {
+ io_context_t io_ctx;
+ pthread_t tid;
+
+ /* allocated array of io_unit structs */
+ struct io_unit *ios;
+
+ /* list of io units available for io */
+ struct io_unit *free_ious;
+
+ /* number of io units in the ios array */
+ int num_global_ios;
+
+ /* number of io units in flight */
+ int num_global_pending;
+
+ /* preallocated array of iocb pointers, only used in run_active */
+ struct iocb **iocbs;
+
+ /* preallocated array of events */
+ struct io_event *events;
+
+ /* size of the events array */
+ int num_global_events;
+
+ /* latency stats for io_submit */
+ struct io_latency io_submit_latency;
+
+ /* list of operations still in progress, and of those finished */
+ struct io_oper *active_opers;
+ struct io_oper *finished_opers;
+
+ /* number of files this thread is doing io on */
+ int num_files;
+
+ /* how much io this thread did in the last stage */
+ double stage_mb_trans;
+};
+
+static double time_since(struct timeval *tv) {
+ double sec, usec;
+ double ret;
+ struct timeval stop;
+ gettimeofday(&stop, NULL);
+ sec = stop.tv_sec - tv->tv_sec;
+ usec = stop.tv_usec - tv->tv_usec;
+ if (sec > 0 && usec < 0) {
+ sec--;
+ usec += 1000000;
+ }
+ ret = sec + usec / (double)1000000;
+ if (ret < 0)
+ ret = 0;
+ return ret;
+}
+
+static void calc_latency(struct timeval *tv, struct io_latency *lat)
+{
+ double delta;
+ int i;
+ delta = time_since(tv);
+ delta = delta * 1000;
+
+ if (delta > lat->max)
+ lat->max = delta;
+ if (!lat->min || delta < lat->min)
+ lat->min = delta;
+ lat->total_io++;
+ lat->total_lat += delta;
+ for (i = 0 ; i < DEVIATIONS ; i++) {
+ if (delta < deviations[i]) {
+ lat->deviations[i]++;
+ break;
+ }
+ }
+}
+
+static void oper_list_add(struct io_oper *oper, struct io_oper **list)
+{
+ if (!*list) {
+ *list = oper;
+ oper->prev = oper->next = oper;
+ return;
+ }
+ oper->prev = (*list)->prev;
+ oper->next = *list;
+ (*list)->prev->next = oper;
+ (*list)->prev = oper;
+ return;
+}
+
+static void oper_list_del(struct io_oper *oper, struct io_oper **list)
+{
+ if ((*list)->next == (*list)->prev && *list == (*list)->next) {
+ *list = NULL;
+ return;
+ }
+ oper->prev->next = oper->next;
+ oper->next->prev = oper->prev;
+ if (*list == oper)
+ *list = oper->next;
+}
+
+/* worker func to check error fields in the io unit */
+static int check_finished_io(struct io_unit *io) {
+ int i;
+ if (io->res != io->buf_size) {
+
+ struct stat s;
+ fstat(io->io_oper->fd, &s);
+
+ /*
+ * If file size is large enough for the read, then this short
+ * read is an error.
+ */
+ if ((io->io_oper->rw == READ || io->io_oper->rw == RREAD) &&
+ s.st_size > (io->iocb.u.c.offset + io->res)) {
+
+ fprintf(stderr, "io err %lu (%s) op %d, off %Lu size %d\n",
+ io->res, strerror(-io->res), io->iocb.aio_lio_opcode,
+ io->iocb.u.c.offset, io->buf_size);
+ io->io_oper->last_err = io->res;
+ io->io_oper->num_err++;
+ return -1;
+ }
+ }
+ if (verify && io->io_oper->rw == READ) {
+ if (memcmp(io->buf, verify_buf, io->io_oper->reclen)) {
+ fprintf(stderr, "verify error, file %s offset %Lu contents (offset:bad:good):\n",
+ io->io_oper->file_name, io->iocb.u.c.offset);
+
+ for (i = 0 ; i < io->io_oper->reclen ; i++) {
+ if (io->buf[i] != verify_buf[i]) {
+ fprintf(stderr, "%d:%c:%c ", i, io->buf[i], verify_buf[i]);
+ }
+ }
+ fprintf(stderr, "\n");
+ }
+
+ }
+ return 0;
+}
+
+/* worker func to check the busy bits and get an io unit ready for use */
+static int grab_iou(struct io_unit *io, struct io_oper *oper) {
+ if (io->busy == IO_PENDING)
+ return -1;
+
+ io->busy = IO_PENDING;
+ io->res = 0;
+ io->io_oper = oper;
+ return 0;
+}
+
+char *stage_name(int rw) {
+ switch(rw) {
+ case WRITE:
+ return "write";
+ case READ:
+ return "read";
+ case RWRITE:
+ return "random write";
+ case RREAD:
+ return "random read";
+ }
+ return "unknown";
+}
+
+static inline double oper_mb_trans(struct io_oper *oper) {
+ return ((double)oper->started_ios * (double)oper->reclen) /
+ (double)(1024 * 1024);
+}
+
+static void print_time(struct io_oper *oper) {
+ double runtime;
+ double tput;
+ double mb;
+
+ runtime = time_since(&oper->start_time);
+ mb = oper_mb_trans(oper);
+ tput = mb / runtime;
+ fprintf(stderr, "%s on %s (%.2f MB/s) %.2f MB in %.2fs\n",
+ stage_name(oper->rw), oper->file_name, tput, mb, runtime);
+}
+
+static void print_latency(struct thread_info *t) {
+ struct io_latency *lat = &t->io_submit_latency;
+ double avg = lat->total_lat / lat->total_io;
+ int i;
+ double total_counted = 0;
+ fprintf(stderr, "latency min %.2f avg %.2f max %.2f\n\t",
+ lat->min, avg, lat->max);
+
+ for (i = 0 ; i < DEVIATIONS ; i++) {
+ fprintf(stderr, " %.0f < %d", lat->deviations[i], deviations[i]);
+ total_counted += lat->deviations[i];
+ }
+ if (total_counted && lat->total_io - total_counted)
+ fprintf(stderr, " < %.0f", lat->total_io - total_counted);
+ fprintf(stderr, "\n");
+ memset(&t->io_submit_latency, 0, sizeof(t->io_submit_latency));
+}
+
+/*
+ * updates the fields in the io operation struct that belongs to this
+ * io unit, and make the io unit reusable again
+ */
+void finish_io(struct thread_info *t, struct io_unit *io, long result) {
+ struct io_oper *oper = io->io_oper;
+
+ io->res = result;
+ io->busy = IO_FREE;
+ io->next = t->free_ious;
+ t->free_ious = io;
+ oper->num_pending--;
+ t->num_global_pending--;
+ check_finished_io(io);
+ if (oper->num_pending == 0 &&
+ (oper->started_ios == oper->total_ios || oper->stonewalled))
+ {
+ print_time(oper);
+ }
+}
+
+int read_some_events(struct thread_info *t) {
+ struct io_unit *event_io;
+ struct io_event *event;
+ int nr;
+ int i;
+ int min_nr = io_iter;
+
+ if (t->num_global_pending < io_iter)
+ min_nr = t->num_global_pending;
+
+#ifdef NEW_GETEVENTS
+ nr = io_getevents(t->io_ctx, min_nr, t->num_global_events, t->events,NULL);
+#else
+ nr = io_getevents(t->io_ctx, t->num_global_events, t->events, NULL);
+#endif
+ if (nr <= 0)
+ return nr;
+
+ for (i = 0 ; i < nr ; i++) {
+ event = t->events + i;
+ event_io = (struct io_unit *)((unsigned long)event->obj);
+ finish_io(t, event_io, event->res);
+ }
+ return nr;
+}
+
+/*
+ * finds a free io unit, waiting for pending requests if required. returns
+ * null if none could be found
+ */
+static struct io_unit *find_iou(struct thread_info *t, struct io_oper *oper)
+{
+ struct io_unit *event_io;
+ int nr;
+
+retry:
+ if (t->free_ious) {
+ event_io = t->free_ious;
+ t->free_ious = t->free_ious->next;
+ if (grab_iou(event_io, oper)) {
+ fprintf(stderr, "io unit on free list but not free\n");
+ abort();
+ }
+ return event_io;
+ }
+ nr = read_some_events(t);
+ if (nr > 0)
+ goto retry;
+ else
+ fprintf(stderr, "no free ious after read_some_events\n");
+ return NULL;
+}
+
+/*
+ * wait for all pending requests for this io operation to finish
+ */
+static int io_oper_wait(struct thread_info *t, struct io_oper *oper) {
+ struct io_event event;
+ struct io_unit *event_io;
+
+ if (oper == NULL) {
+ return 0;
+ }
+
+ if (oper->num_pending == 0)
+ goto done;
+
+ /* this func is not speed sensitive, no need to go wild reading
+ * more than one event at a time
+ */
+#ifdef NEW_GETEVENTS
+ while(io_getevents(t->io_ctx, 1, 1, &event, NULL) > 0) {
+#else
+ while(io_getevents(t->io_ctx, 1, &event, NULL) > 0) {
+#endif
+ event_io = (struct io_unit *)((unsigned long)event.obj);
+
+ finish_io(t, event_io, event.res);
+
+ if (oper->num_pending == 0)
+ break;
+ }
+done:
+ if (oper->num_err) {
+ fprintf(stderr, "%u errors on oper, last %u\n",
+ oper->num_err, oper->last_err);
+ }
+ return 0;
+}
+
+off_t random_byte_offset(struct io_oper *oper) {
+ off_t num;
+ off_t rand_byte = oper->start;
+ off_t range;
+ off_t offset = 1;
+
+ range = (oper->end - oper->start) / (1024 * 1024);
+ if ((page_size_mask+1) > (1024 * 1024))
+ offset = (page_size_mask+1) / (1024 * 1024);
+ if (range < offset)
+ range = 0;
+ else
+ range -= offset;
+
+ /* find a random mb offset */
+ num = 1 + (int)((double)range * rand() / (RAND_MAX + 1.0 ));
+ rand_byte += num * 1024 * 1024;
+
+ /* find a random byte offset */
+ num = 1 + (int)((double)(1024 * 1024) * rand() / (RAND_MAX + 1.0));
+
+ /* page align */
+ num = (num + page_size_mask) & ~page_size_mask;
+ rand_byte += num;
+
+ if (rand_byte + oper->reclen > oper->end) {
+ rand_byte -= oper->reclen;
+ }
+ return rand_byte;
+}
+
+/*
+ * build an aio iocb for an operation, based on oper->rw and the
+ * last offset used. This finds the struct io_unit that will be attached
+ * to the iocb, and things are ready for submission to aio after this
+ * is called.
+ *
+ * returns null on error
+ */
+static struct io_unit *build_iocb(struct thread_info *t, struct io_oper *oper)
+{
+ struct io_unit *io;
+ off_t rand_byte;
+
+ io = find_iou(t, oper);
+ if (!io) {
+ fprintf(stderr, "unable to find io unit\n");
+ return NULL;
+ }
+
+ switch(oper->rw) {
+ case WRITE:
+ io_prep_pwrite(&io->iocb,oper->fd, io->buf, oper->reclen,
+ oper->last_offset);
+ oper->last_offset += oper->reclen;
+ break;
+ case READ:
+ io_prep_pread(&io->iocb,oper->fd, io->buf, oper->reclen,
+ oper->last_offset);
+ oper->last_offset += oper->reclen;
+ break;
+ case RREAD:
+ rand_byte = random_byte_offset(oper);
+ oper->last_offset = rand_byte;
+ io_prep_pread(&io->iocb,oper->fd, io->buf, oper->reclen,
+ rand_byte);
+ break;
+ case RWRITE:
+ rand_byte = random_byte_offset(oper);
+ oper->last_offset = rand_byte;
+ io_prep_pwrite(&io->iocb,oper->fd, io->buf, oper->reclen,
+ rand_byte);
+
+ break;
+ }
+
+ return io;
+}
+
+/*
+ * wait for any pending requests, and then free all ram associated with
+ * an operation. returns the last error the operation hit (zero means none)
+ */
+static int
+finish_oper(struct thread_info *t, struct io_oper *oper)
+{
+ unsigned long last_err;
+
+ io_oper_wait(t, oper);
+ last_err = oper->last_err;
+ if (oper->num_pending > 0) {
+ fprintf(stderr, "oper num_pending is %d\n", oper->num_pending);
+ }
+ close(oper->fd);
+ free(oper);
+ return last_err;
+}
+
+/*
+ * allocates an io operation and fills in all the fields. returns
+ * null on error
+ */
+static struct io_oper *
+create_oper(int fd, int rw, off_t start, off_t end, int reclen, int depth,
+ int iter, char *file_name)
+{
+ struct io_oper *oper;
+
+ oper = malloc (sizeof(*oper));
+ if (!oper) {
+ fprintf(stderr, "unable to allocate io oper\n");
+ return NULL;
+ }
+ memset(oper, 0, sizeof(*oper));
+
+ oper->depth = depth;
+ oper->start = start;
+ oper->end = end;
+ oper->last_offset = oper->start;
+ oper->fd = fd;
+ oper->reclen = reclen;
+ oper->rw = rw;
+ oper->total_ios = (oper->end - oper->start) / oper->reclen;
+ oper->file_name = file_name;
+
+ return oper;
+}
+
+/*
+ * does setup on num_ios worth of iocbs, but does not actually
+ * start any io
+ */
+int build_oper(struct thread_info *t, struct io_oper *oper, int num_ios,
+ struct iocb **my_iocbs)
+{
+ int i;
+ struct io_unit *io;
+
+ if (oper->started_ios == 0)
+ gettimeofday(&oper->start_time, NULL);
+
+ if (num_ios == 0)
+ num_ios = oper->total_ios;
+
+ if ((oper->started_ios + num_ios) > oper->total_ios)
+ num_ios = oper->total_ios - oper->started_ios;
+
+ for( i = 0 ; i < num_ios ; i++) {
+ io = build_iocb(t, oper);
+ if (!io) {
+ return -1;
+ }
+ my_iocbs[i] = &io->iocb;
+ }
+ return num_ios;
+}
+
+/*
+ * runs through the iocbs in the array provided and updates
+ * counters in the associated oper struct
+ */
+static void update_iou_counters(struct iocb **my_iocbs, int nr)
+{
+ struct io_unit *io;
+ int i;
+ for (i = 0 ; i < nr ; i++) {
+ io = (struct io_unit *)(my_iocbs[i]);
+ io->io_oper->num_pending++;
+ io->io_oper->started_ios++;
+ }
+}
+
+/* starts some io for a given file, returns zero if all went well */
+int run_built(struct thread_info *t, int num_ios, struct iocb **my_iocbs)
+{
+ int ret;
+ struct timeval start_time;
+
+resubmit:
+ gettimeofday(&start_time, NULL);
+ ret = io_submit(t->io_ctx, num_ios, my_iocbs);
+ calc_latency(&start_time, &t->io_submit_latency);
+ if (ret != num_ios) {
+ /* some ios got through */
+ if (ret > 0) {
+ update_iou_counters(my_iocbs, ret);
+ my_iocbs += ret;
+ t->num_global_pending += ret;
+ num_ios -= ret;
+ }
+ /*
+ * we've used all the requests allocated in aio_init, wait and
+ * retry
+ */
+ if (ret > 0 || ret == -EAGAIN) {
+ if ((ret = read_some_events(t) > 0)) {
+ goto resubmit;
+ }
+ }
+
+ fprintf(stderr, "ret %d (%s) on io_submit\n", ret, strerror(-ret));
+ return -1;
+ }
+ update_iou_counters(my_iocbs, ret);
+ t->num_global_pending += ret;
+ return 0;
+}
+
+/*
+ * changes oper->rw to the next in a command sequence, or returns zero
+ * to say this operation is really, completely done for
+ */
+static int restart_oper(struct io_oper *oper) {
+ int new_rw = 0;
+ if (oper->last_err)
+ return 0;
+
+ /* this switch falls through */
+ switch(oper->rw) {
+ case WRITE:
+ if (stages & (1 << READ))
+ new_rw = READ;
+ case READ:
+ if (!new_rw && stages & (1 << RWRITE))
+ new_rw = RWRITE;
+ case RWRITE:
+ if (!new_rw && stages & (1 << RREAD))
+ new_rw = RREAD;
+ }
+
+ if (new_rw) {
+ oper->started_ios = 0;
+ oper->last_offset = oper->start;
+ oper->stonewalled = 0;
+
+ /*
+ * we're restarting an operation with pending requests, so the
+ * timing info won't be printed by finish_io. Printing it here
+ */
+ if (oper->num_pending)
+ print_time(oper);
+
+ oper->rw = new_rw;
+ return 1;
+ }
+ return 0;
+}
+
+static int oper_runnable(struct io_oper *oper) {
+ struct stat buf;
+ int ret;
+
+ /* first context is always runnable, if started_ios > 0, no need to
+ * redo the calculations
+ */
+ if (oper->started_ios || oper->start == 0)
+ return 1;
+ /*
+ * only the sequential phases force delays in starting */
+ if (oper->rw >= RWRITE)
+ return 1;
+ ret = fstat(oper->fd, &buf);
+ if (ret < 0) {
+ perror("fstat");
+ exit(1);
+ }
+ if (S_ISREG(buf.st_mode) && buf.st_size < oper->start)
+ return 0;
+ return 1;
+}
+
+/*
+ * runs through all the io operations on the active list, and starts
+ * a chunk of io on each. If any io operations are completely finished,
+ * it either switches them to the next stage or puts them on the
+ * finished list.
+ *
+ * this function stops after max_io_submit iocbs are sent down the
+ * pipe, even if it has not yet touched all the operations on the
+ * active list. Any operations that have finished are moved onto
+ * the finished_opers list.
+ */
+static int run_active_list(struct thread_info *t,
+ int io_iter,
+ int max_io_submit)
+{
+ struct io_oper *oper;
+ struct io_oper *built_opers = NULL;
+ struct iocb **my_iocbs = t->iocbs;
+ int ret = 0;
+ int num_built = 0;
+
+ oper = t->active_opers;
+ while(oper) {
+ if (!oper_runnable(oper)) {
+ oper = oper->next;
+ if (oper == t->active_opers)
+ break;
+ continue;
+ }
+ ret = build_oper(t, oper, io_iter, my_iocbs);
+ if (ret >= 0) {
+ my_iocbs += ret;
+ num_built += ret;
+ oper_list_del(oper, &t->active_opers);
+ oper_list_add(oper, &built_opers);
+ oper = t->active_opers;
+ if (num_built + io_iter > max_io_submit)
+ break;
+ } else
+ break;
+ }
+ if (num_built) {
+ ret = run_built(t, num_built, t->iocbs);
+ if (ret < 0) {
+ fprintf(stderr, "error %d on run_built\n", ret);
+ exit(1);
+ }
+ while(built_opers) {
+ oper = built_opers;
+ oper_list_del(oper, &built_opers);
+ oper_list_add(oper, &t->active_opers);
+ if (oper->started_ios == oper->total_ios) {
+ oper_list_del(oper, &t->active_opers);
+ oper_list_add(oper, &t->finished_opers);
+ }
+ }
+ }
+ return 0;
+}
+
+void drop_shm() {
+ int ret;
+ struct shmid_ds ds;
+ if (use_shm != USE_SHM)
+ return;
+
+ ret = shmctl(shm_id, IPC_RMID, &ds);
+ if (ret) {
+ perror("shmctl IPC_RMID");
+ }
+}
+
+void aio_setup(io_context_t *io_ctx, int n)
+{
+ int res = io_queue_init(n, io_ctx);
+ if (res != 0) {
+ fprintf(stderr, "io_queue_setup(%d) returned %d (%s)\n",
+ n, res, strerror(-res));
+ exit(3);
+ }
+}
+
+/*
+ * allocate io operation and event arrays for a given thread
+ */
+int setup_ious(struct thread_info *t,
+ int num_files, int depth,
+ int reclen, int max_io_submit) {
+ int i;
+ size_t bytes = num_files * depth * sizeof(*t->ios);
+
+ t->ios = malloc(bytes);
+ if (!t->ios) {
+ fprintf(stderr, "unable to allocate io units\n");
+ return -1;
+ }
+ memset(t->ios, 0, bytes);
+
+ for (i = 0 ; i < depth * num_files; i++) {
+ t->ios[i].buf = aligned_buffer;
+ aligned_buffer += padded_reclen;
+ t->ios[i].buf_size = reclen;
+ if (verify)
+ memset(t->ios[i].buf, 'b', reclen);
+ else
+ memset(t->ios[i].buf, 0, reclen);
+ t->ios[i].next = t->free_ious;
+ t->free_ious = t->ios + i;
+ }
+ if (verify) {
+ verify_buf = aligned_buffer;
+ memset(verify_buf, 'b', reclen);
+ }
+
+ t->iocbs = malloc(sizeof(struct iocb *) * max_io_submit);
+ if (!t->iocbs) {
+ fprintf(stderr, "unable to allocate iocbs\n");
+ goto free_buffers;
+ }
+
+ memset(t->iocbs, 0, max_io_submit * sizeof(struct iocb *));
+
+ t->events = malloc(sizeof(struct io_event) * depth * num_files);
+ if (!t->events) {
+ fprintf(stderr, "unable to allocate ram for events\n");
+ goto free_buffers;
+ }
+ memset(t->events, 0, num_files * sizeof(struct io_event)*depth);
+
+ t->num_global_ios = num_files * depth;
+ t->num_global_events = t->num_global_ios;
+ return 0;
+
+free_buffers:
+ if (t->ios)
+ free(t->ios);
+ if (t->iocbs)
+ free(t->iocbs);
+ if (t->events)
+ free(t->events);
+ return -1;
+}
+
+/*
+ * The buffers used for file data are allocated as a single big
+ * malloc, and then each thread and operation takes a piece and uses
+ * that for file data. This lets us do a large shm or bigpages alloc
+ * and without trying to find a special place in each thread to map the
+ * buffers to
+ */
+int setup_shared_mem(int num_threads, int num_files, int depth,
+ int reclen, int max_io_submit)
+{
+ char *p = NULL;
+ size_t total_ram;
+
+ padded_reclen = (reclen + page_size_mask) / (page_size_mask+1);
+ padded_reclen = padded_reclen * (page_size_mask+1);
+ total_ram = num_files * depth * padded_reclen + num_threads;
+ if (verify)
+ total_ram += padded_reclen;
+
+ if (use_shm == USE_MALLOC) {
+ p = malloc(total_ram + page_size_mask);
+ } else if (use_shm == USE_SHM) {
+ shm_id = shmget(IPC_PRIVATE, total_ram, IPC_CREAT | 0700);
+ if (shm_id < 0) {
+ perror("shmget");
+ drop_shm();
+ goto free_buffers;
+ }
+ p = shmat(shm_id, (char *)0x50000000, 0);
+ if ((long)p == -1) {
+ perror("shmat");
+ goto free_buffers;
+ }
+ /* won't really be dropped until we shmdt */
+ drop_shm();
+ } else if (use_shm == USE_SHMFS) {
+ char mmap_name[16]; /* /dev/shm/ + null + XXXXXX */
+ int fd;
+
+ strcpy(mmap_name, "/dev/shm/XXXXXX");
+ fd = mkstemp(mmap_name);
+ if (fd < 0) {
+ perror("mkstemp");
+ goto free_buffers;
+ }
+ unlink(mmap_name);
+ ftruncate(fd, total_ram);
+ shm_id = fd;
+ p = mmap((char *)0x50000000, total_ram,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+ if (p == MAP_FAILED) {
+ perror("mmap");
+ goto free_buffers;
+ }
+ }
+ if (!p) {
+ fprintf(stderr, "unable to allocate buffers\n");
+ goto free_buffers;
+ }
+ unaligned_buffer = p;
+ p = (char*)((intptr_t) (p + page_size_mask) & ~page_size_mask);
+ aligned_buffer = p;
+ return 0;
+
+free_buffers:
+ drop_shm();
+ if (unaligned_buffer)
+ free(unaligned_buffer);
+ return -1;
+}
+
+/*
+ * runs through all the thread_info structs and calculates a combined
+ * throughput
+ */
+void global_thread_throughput(struct thread_info *t, char *this_stage) {
+ int i;
+ double runtime = time_since(&global_stage_start_time);
+ double total_mb = 0;
+ double min_trans = 0;
+
+ for (i = 0 ; i < num_threads ; i++) {
+ total_mb += global_thread_info[i].stage_mb_trans;
+ if (!min_trans || t->stage_mb_trans < min_trans)
+ min_trans = t->stage_mb_trans;
+ }
+ if (total_mb) {
+ fprintf(stderr, "%s throughput (%.2f MB/s) ", this_stage,
+ total_mb / runtime);
+ fprintf(stderr, "%.2f MB in %.2fs", total_mb, runtime);
+ if (stonewall)
+ fprintf(stderr, " min transfer %.2fMB", min_trans);
+ fprintf(stderr, "\n");
+ }
+}
+
+
+/* this is the meat of the state machine. There is a list of
+ * active operations structs, and as each one finishes the required
+ * io it is moved to a list of finished operations. Once they have
+ * all finished whatever stage they were in, they are given the chance
+ * to restart and pick a different stage (read/write/random read etc)
+ *
+ * various timings are printed in between the stages, along with
+ * thread synchronization if there are more than one threads.
+ */
+int worker(struct thread_info *t)
+{
+ struct io_oper *oper;
+ char *this_stage = NULL;
+ struct timeval stage_time;
+ int status = 0;
+ int iteration = 0;
+ int cnt;
+
+ aio_setup(&t->io_ctx, 512);
+
+restart:
+ printf("Starting %s iter:%d \n", __FUNCTION__,iteration);
+ if (num_threads > 1) {
+ printf("num_threads %d \n", num_threads);
+ pthread_mutex_lock(&stage_mutex);
+ threads_starting++;
+ if (threads_starting == num_threads) {
+ threads_ending = 0;
+ gettimeofday(&global_stage_start_time, NULL);
+ pthread_cond_broadcast(&stage_cond);
+ }
+ while (threads_starting != num_threads)
+ pthread_cond_wait(&stage_cond, &stage_mutex);
+ pthread_mutex_unlock(&stage_mutex);
+ }
+ if (t->active_opers) {
+// printf("active_opers %p line:%d\n", t->active_opers, __LINE__);
+ this_stage = stage_name(t->active_opers->rw);
+ gettimeofday(&stage_time, NULL);
+ t->stage_mb_trans = 0;
+ }
+ cnt = 0;
+ /* first we send everything through aio */
+// printf("cnt:%d max_iterations:%d oper:%p\n",cnt, iterations,oper);
+
+ while (t->active_opers && (cnt < iterations || iterations == RUN_FOREVER)) {
+// printf("active_opers %p line:%d cnt:%d ", t->active_opers,__LINE__,cnt);
+ if (stonewall && threads_ending) {
+ oper = t->active_opers;
+ oper->stonewalled = 1;
+ oper_list_del(oper, &t->active_opers);
+ oper_list_add(oper, &t->finished_opers);
+// printf(" if branch\n");
+ } else {
+ run_active_list(t, io_iter, max_io_submit);
+// printf(" else branch\n");
+ }
+ cnt++;
+ }
+
+ if (latency_stats)
+ print_latency(t);
+
+ /* then we wait for all the operations to finish */
+ oper = t->finished_opers;
+// printf("line:%d oper:%p\n", __LINE__, oper);
+ do {
+ io_oper_wait(t, oper);
+ if (oper != NULL) {
+ oper = oper->next;
+ }
+ } while (oper != t->finished_opers);
+// printf("finished_opers %p line:%d\n", t->finished_opers,__LINE__);
+
+ /* then we do an fsync to get the timing for any future operations
+ * right, and check to see if any of these need to get restarted
+ */
+ oper = t->finished_opers;
+// printf("oper %p line:%d\n", oper,__LINE__);
+ while (oper) {
+ if (fsync_stages)
+ fsync(oper->fd);
+ t->stage_mb_trans += oper_mb_trans(oper);
+ if (restart_oper(oper)) {
+ oper_list_del(oper, &t->finished_opers);
+ oper_list_add(oper, &t->active_opers);
+ oper = t->finished_opers;
+ continue;
+ }
+ oper = oper->next;
+ if (oper == t->finished_opers)
+ break;
+ }
+
+ if (t->stage_mb_trans && t->num_files > 0) {
+// printf("num_files %d line:%d\n", t->num_files,__LINE__);
+ double seconds = time_since(&stage_time);
+ fprintf(stderr, "thread %d %s totals (%.2f MB/s) %.2f MB in %.2fs\n",
+ t - global_thread_info, this_stage, t->stage_mb_trans/seconds,
+ t->stage_mb_trans, seconds);
+ }
+
+ if (num_threads > 1) {
+// printf("num_threads %d line:%d\n", num_threads,__LINE__);
+ pthread_mutex_lock(&stage_mutex);
+ threads_ending++;
+ if (threads_ending == num_threads) {
+ threads_starting = 0;
+ pthread_cond_broadcast(&stage_cond);
+ global_thread_throughput(t, this_stage);
+ }
+// printf("threads_ending %d line:%d\n", threads_ending,__LINE__);
+ while (threads_ending != num_threads)
+ pthread_cond_wait(&stage_cond, &stage_mutex);
+ pthread_mutex_unlock(&stage_mutex);
+ }
+
+ /* someone got restarted, go back to the beginning */
+ if (t->active_opers && (cnt < iterations || iterations == RUN_FOREVER)) {
+ iteration++;
+ goto restart;
+ }
+
+ /* finally, free all the ram */
+// printf("finished_opers %p line:%d\n", t->finished_opers,__LINE__);
+ while (t->finished_opers) {
+ oper = t->finished_opers;
+ oper_list_del(oper, &t->finished_opers);
+ status = finish_oper(t, oper);
+ }
+
+ if (t->num_global_pending) {
+ fprintf(stderr, "global num pending is %d\n", t->num_global_pending);
+ }
+ io_queue_release(t->io_ctx);
+
+ return status;
+}
+
+typedef void * (*start_routine)(void *);
+int run_workers(struct thread_info *t, int num_threads)
+{
+ int ret;
+ int thread_ret;
+ int i;
+
+// printf("%s num_threads %d line:%d\n", __FUNCTION__,num_threads,__LINE__);
+ for(i = 0 ; i < num_threads ; i++) {
+ ret = pthread_create(&t[i].tid, NULL, (start_routine)worker, t + i);
+ if (ret) {
+ perror("pthread_create");
+ exit(1);
+ }
+ }
+ for(i = 0 ; i < num_threads ; i++) {
+ ret = pthread_join(t[i].tid, (void *)&thread_ret);
+ if (ret) {
+ perror("pthread_join");
+ exit(1);
+ }
+ }
+ return 0;
+}
+
+off_t parse_size(char *size_arg, off_t mult) {
+ char c;
+ int num;
+ off_t ret;
+ c = size_arg[strlen(size_arg) - 1];
+ if (c > '9') {
+ size_arg[strlen(size_arg) - 1] = '\0';
+ }
+ num = atoi(size_arg);
+ switch(c) {
+ case 'g':
+ case 'G':
+ mult = 1024 * 1024 * 1024;
+ break;
+ case 'm':
+ case 'M':
+ mult = 1024 * 1024;
+ break;
+ case 'k':
+ case 'K':
+ mult = 1024;
+ break;
+ }
+ ret = mult * num;
+ return ret;
+}
+
+void print_usage(void) {
+ printf("usage: aio-stress [-s size] [-r size] [-a size] [-d num] [-b num]\n");
+ printf(" [-i num] [-t num] [-c num] [-C size] [-nxhOS ]\n");
+ printf(" file1 [file2 ...]\n");
+ printf("\t-a size in KB at which to align buffers\n");
+ printf("\t-b max number of iocbs to give io_submit at once\n");
+ printf("\t-c number of io contexts per file\n");
+ printf("\t-C offset between contexts, default 2MB\n");
+ printf("\t-s size in MB of the test file(s), default 1024MB\n");
+ printf("\t-r record size in KB used for each io, default 64KB\n");
+ printf("\t-d number of pending aio requests for each file, default 64\n");
+ printf("\t-i number of ios per file sent before switching\n\t to the next file, default 8\n");
+ printf("\t-I total number of ayncs IOs the program will run, default is run until Cntl-C\n");
+ printf("\t-O Use O_DIRECT (not available in 2.4 kernels),\n");
+ printf("\t-S Use O_SYNC for writes\n");
+ printf("\t-o add an operation to the list: write=0, read=1,\n");
+ printf("\t random write=2, random read=3.\n");
+ printf("\t repeat -o to specify multiple ops: -o 0 -o 1 etc.\n");
+ printf("\t-m shm use ipc shared memory for io buffers instead of malloc\n");
+ printf("\t-m shmfs mmap a file in /dev/shm for io buffers\n");
+ printf("\t-n no fsyncs between write stage and read stage\n");
+ printf("\t-l print io_submit latencies after each stage\n");
+ printf("\t-t number of threads to run\n");
+ printf("\t-v verification of bytes written\n");
+ printf("\t-x turn off thread stonewalling\n");
+ printf("\t-h this message\n");
+ printf("\n\t the size options (-a -s and -r) allow modifiers -s 400{k,m,g}\n");
+ printf("\t translate to 400KB, 400MB and 400GB\n");
+ printf("version %s\n", PROG_VERSION);
+}
+
+int main(int ac, char **av)
+{
+ int rwfd;
+ int i;
+ int j;
+ int c;
+
+ off_t file_size = 1 * 1024 * 1024 * 1024;
+ int first_stage = WRITE;
+ struct io_oper *oper;
+ int status = 0;
+ int num_files = 0;
+ int open_fds = 0;
+ struct thread_info *t;
+
+ page_size_mask = getpagesize() - 1;
+
+ while(1) {
+ c = getopt(ac, av, "a:b:c:C:m:s:r:d:i:I:o:t:lnhOSxv");
+ if (c < 0)
+ break;
+
+ switch(c) {
+ case 'a':
+ page_size_mask = parse_size(optarg, 1024);
+ page_size_mask--;
+ break;
+ case 'c':
+ num_contexts = atoi(optarg);
+ break;
+ case 'C':
+ context_offset = parse_size(optarg, 1024 * 1024);
+ case 'b':
+ max_io_submit = atoi(optarg);
+ break;
+ case 's':
+ file_size = parse_size(optarg, 1024 * 1024);
+ break;
+ case 'd':
+ depth = atoi(optarg);
+ break;
+ case 'r':
+ rec_len = parse_size(optarg, 1024);
+ break;
+ case 'i':
+ io_iter = atoi(optarg);
+ break;
+ case 'I':
+ iterations = atoi(optarg);
+ break;
+ case 'n':
+ fsync_stages = 0;
+ break;
+ case 'l':
+ latency_stats = 1;
+ break;
+ case 'm':
+ if (!strcmp(optarg, "shm")) {
+ fprintf(stderr, "using ipc shm\n");
+ use_shm = USE_SHM;
+ } else if (!strcmp(optarg, "shmfs")) {
+ fprintf(stderr, "using /dev/shm for buffers\n");
+ use_shm = USE_SHMFS;
+ }
+ break;
+ case 'o':
+ i = atoi(optarg);
+ stages |= 1 << i;
+ fprintf(stderr, "adding stage %s\n", stage_name(i));
+ break;
+ case 'O':
+ o_direct = O_DIRECT;
+ break;
+ case 'S':
+ o_sync = O_SYNC;
+ break;
+ case 't':
+ num_threads = atoi(optarg);
+ break;
+ case 'x':
+ stonewall = 0;
+ break;
+ case 'v':
+ verify = 1;
+ break;
+ case 'h':
+ default:
+ print_usage();
+ exit(1);
+ }
+ }
+
+ /*
+ * make sure we don't try to submit more ios than we have allocated
+ * memory for
+ */
+ if (depth < io_iter) {
+ io_iter = depth;
+ fprintf(stderr, "dropping io_iter to %d\n", io_iter);
+ }
+
+ if (optind >= ac) {
+ print_usage();
+ exit(1);
+ }
+
+ num_files = ac - optind;
+
+ if (num_threads > (num_files * num_contexts)) {
+ num_threads = num_files * num_contexts;
+ fprintf(stderr, "dropping thread count to the number of contexts %d\n",
+ num_threads);
+ }
+
+ t = malloc(num_threads * sizeof(*t));
+ if (!t) {
+ perror("malloc");
+ exit(1);
+ }
+ global_thread_info = t;
+
+ /* by default, allow a huge number of iocbs to be sent towards
+ * io_submit
+ */
+ if (!max_io_submit)
+ max_io_submit = num_files * io_iter * num_contexts;
+
+ /*
+ * make sure we don't try to submit more ios than max_io_submit allows
+ */
+ if (max_io_submit < io_iter) {
+ io_iter = max_io_submit;
+ fprintf(stderr, "dropping io_iter to %d\n", io_iter);
+ }
+
+ if (!stages) {
+ stages = (1 << WRITE) | (1 << READ) | (1 << RREAD) | (1 << RWRITE);
+ } else {
+ for (i = 0 ; i < LAST_STAGE; i++) {
+ if (stages & (1 << i)) {
+ first_stage = i;
+ fprintf(stderr, "starting with %s\n", stage_name(i));
+ break;
+ }
+ }
+ }
+
+ if (file_size < num_contexts * context_offset) {
+ fprintf(stderr, "file size %Lu too small for %d contexts\n",
+ file_size, num_contexts);
+ exit(1);
+ }
+
+ fprintf(stderr, "file size %LuMB, record size %luKB, depth %d, ios per iteration %d\n", file_size / (1024 * 1024), rec_len / 1024, depth, io_iter);
+ fprintf(stderr, "max io_submit %d, buffer alignment set to %luKB\n",
+ max_io_submit, (page_size_mask + 1)/1024);
+ fprintf(stderr, "threads %d files %d contexts %d context offset %LuMB verification %s\n",
+ num_threads, num_files, num_contexts,
+ context_offset / (1024 * 1024), verify ? "on" : "off");
+ /* open all the files and do any required setup for them */
+ for (i = optind ; i < ac ; i++) {
+ int thread_index;
+ for (j = 0 ; j < num_contexts ; j++) {
+ thread_index = open_fds % num_threads;
+ open_fds++;
+// fprintf(stderr, "adding file %s thread %d\n", av[i], thread_index);
+
+ rwfd = open(av[i], O_CREAT | O_RDWR | o_direct | o_sync, 0600);
+ assert(rwfd != -1);
+
+ oper = create_oper(rwfd, first_stage, j * context_offset,
+ file_size - j * context_offset, rec_len,
+ depth, io_iter, av[i]);
+ if (!oper) {
+ fprintf(stderr, "error in create_oper\n");
+ exit(-1);
+ }
+ oper_list_add(oper, &t[thread_index].active_opers);
+ t[thread_index].num_files++;
+ }
+ }
+ if (setup_shared_mem(num_threads, num_files * num_contexts,
+ depth, rec_len, max_io_submit))
+ {
+ exit(1);
+ }
+ for (i = 0 ; i < num_threads ; i++) {
+ if (setup_ious(&t[i], t[i].num_files, depth, rec_len, max_io_submit))
+ exit(1);
+ }
+ if (num_threads > 1){
+ printf("Running multi thread version num_threads:%d\n", num_threads);
+ run_workers(t, num_threads);
+ }
+ else {
+ printf("Running single thread version \n");
+ status = worker(t);
+ }
+
+
+ for (i = optind ; i < ac ; i++) {
+ printf("Cleaning up file %s \n", av[i]);
+ unlink(av[i]);
+ }
+
+ if (status) {
+ printf("non zero return %d \n", status);
+ }
+ else{
+ printf("aio-stress Completed successfully %d \n", status);
+ }
+
+ exit(0);
+}
+