]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
Import teuthology tasks (firefly branch)
authorZack Cerza <zack.cerza@inktank.com>
Thu, 7 Aug 2014 14:24:52 +0000 (08:24 -0600)
committerZack Cerza <zack.cerza@inktank.com>
Thu, 7 Aug 2014 14:24:52 +0000 (08:24 -0600)
Signed-off-by: Zack Cerza <zack.cerza@inktank.com>
277 files changed:
.gitignore [deleted file]
README.rst [deleted file]
bootstrap [deleted file]
build_qemu_image.sh [deleted file]
check-syntax.sh [deleted file]
cleanup-and-unlock.sh [deleted file]
cleanup-run.sh [deleted file]
cleanup-user.sh [deleted file]
coverage/cov-analyze.sh [deleted file]
coverage/cov-init.sh [deleted file]
examples/3node_ceph.yaml [deleted file]
examples/3node_rgw.yaml [deleted file]
examples/parallel_example.yaml [deleted file]
hammer.sh [deleted file]
jenkins/jenkins-pull-requests-build [deleted file]
pytest.ini [deleted file]
requirements.txt [deleted file]
roles/3-simple.yaml [deleted file]
roles/overrides.yaml [deleted file]
schedule_suite.sh [deleted file]
scripts/__init__.py [deleted file]
scripts/coverage.py [deleted file]
scripts/kill.py [deleted file]
scripts/lock.py [deleted file]
scripts/ls.py [deleted file]
scripts/nuke.py [deleted file]
scripts/queue.py [deleted file]
scripts/report.py [deleted file]
scripts/results.py [deleted file]
scripts/run.py [deleted file]
scripts/schedule.py [deleted file]
scripts/suite.py [deleted file]
scripts/test/script.py [deleted file]
scripts/test/test_coverage.py [deleted file]
scripts/test/test_lock.py [deleted file]
scripts/test/test_ls.py [deleted file]
scripts/test/test_nuke.py [deleted file]
scripts/test/test_report.py [deleted file]
scripts/test/test_results.py [deleted file]
scripts/test/test_run.py [deleted file]
scripts/test/test_schedule.py [deleted file]
scripts/test/test_suite.py [deleted file]
scripts/test/test_updatekeys.py [deleted file]
scripts/test/test_worker.py [deleted file]
scripts/updatekeys.py [deleted file]
scripts/worker.py [deleted file]
setup.py [deleted file]
tasks/__init__.py [new file with mode: 0644]
tasks/admin_socket.py [new file with mode: 0644]
tasks/apache.conf.template [new file with mode: 0644]
tasks/autotest.py [new file with mode: 0644]
tasks/blktrace.py [new file with mode: 0644]
tasks/calamari/http_client.py [new file with mode: 0755]
tasks/calamari/servertest_1_0.py [new file with mode: 0755]
tasks/ceph.py [new file with mode: 0644]
tasks/ceph_client.py [new file with mode: 0644]
tasks/ceph_deploy.py [new file with mode: 0644]
tasks/ceph_fuse.py [new file with mode: 0644]
tasks/ceph_manager.py [new file with mode: 0644]
tasks/chef.py [new file with mode: 0644]
tasks/cifs_mount.py [new file with mode: 0644]
tasks/cram.py [new file with mode: 0644]
tasks/devstack.py [new file with mode: 0644]
tasks/die_on_err.py [new file with mode: 0644]
tasks/divergent_priors.py [new file with mode: 0644]
tasks/dump_stuck.py [new file with mode: 0644]
tasks/ec_lost_unfound.py [new file with mode: 0644]
tasks/filestore_idempotent.py [new file with mode: 0644]
tasks/kclient.py [new file with mode: 0644]
tasks/locktest.py [new file with mode: 0755]
tasks/lost_unfound.py [new file with mode: 0644]
tasks/manypools.py [new file with mode: 0644]
tasks/mds_creation_failure.py [new file with mode: 0644]
tasks/mds_thrash.py [new file with mode: 0644]
tasks/metadata.yaml [new file with mode: 0644]
tasks/mon_clock_skew_check.py [new file with mode: 0644]
tasks/mon_recovery.py [new file with mode: 0644]
tasks/mon_thrash.py [new file with mode: 0644]
tasks/multibench.py [new file with mode: 0644]
tasks/object_source_down.py [new file with mode: 0644]
tasks/omapbench.py [new file with mode: 0644]
tasks/osd_backfill.py [new file with mode: 0644]
tasks/osd_failsafe_enospc.py [new file with mode: 0644]
tasks/osd_recovery.py [new file with mode: 0644]
tasks/peer.py [new file with mode: 0644]
tasks/peering_speed_test.py [new file with mode: 0644]
tasks/qemu.py [new file with mode: 0644]
tasks/rados.py [new file with mode: 0644]
tasks/radosbench.py [new file with mode: 0644]
tasks/radosgw_admin.py [new file with mode: 0644]
tasks/radosgw_admin_rest.py [new file with mode: 0644]
tasks/radosgw_agent.py [new file with mode: 0644]
tasks/rbd.py [new file with mode: 0644]
tasks/rbd_fsx.py [new file with mode: 0644]
tasks/recovery_bench.py [new file with mode: 0644]
tasks/rep_lost_unfound_delete.py [new file with mode: 0644]
tasks/repair_test.py [new file with mode: 0644]
tasks/rest_api.py [new file with mode: 0644]
tasks/restart.py [new file with mode: 0644]
tasks/rgw.py [new file with mode: 0644]
tasks/rgw_logsocket.py [new file with mode: 0644]
tasks/s3readwrite.py [new file with mode: 0644]
tasks/s3roundtrip.py [new file with mode: 0644]
tasks/s3tests.py [new file with mode: 0644]
tasks/samba.py [new file with mode: 0644]
tasks/scrub.py [new file with mode: 0644]
tasks/scrub_test.py [new file with mode: 0644]
tasks/test/__init__.py [new file with mode: 0644]
tasks/test/test_devstack.py [new file with mode: 0644]
tasks/tgt.py [new file with mode: 0644]
tasks/thrashosds.py [new file with mode: 0644]
tasks/userdata_setup.yaml [new file with mode: 0644]
tasks/userdata_teardown.yaml [new file with mode: 0644]
tasks/util/__init__.py [new file with mode: 0644]
tasks/util/kclient.py [new file with mode: 0644]
tasks/util/rados.py [new file with mode: 0644]
tasks/util/rgw.py [new file with mode: 0644]
tasks/watch_notify_stress.py [new file with mode: 0644]
tasks/workunit.py [new file with mode: 0644]
teuthology/__init__.py [deleted file]
teuthology/beanstalk.py [deleted file]
teuthology/ceph.conf.template [deleted file]
teuthology/config.py [deleted file]
teuthology/contextutil.py [deleted file]
teuthology/coverage.py [deleted file]
teuthology/kill.py [deleted file]
teuthology/lock.py [deleted file]
teuthology/locker/__init__.py [deleted file]
teuthology/locker/api.py [deleted file]
teuthology/locker/config.py [deleted file]
teuthology/locker/locker.py [deleted file]
teuthology/lockstatus.py [deleted file]
teuthology/misc.py [deleted file]
teuthology/nuke.py [deleted file]
teuthology/orchestra/__init__.py [deleted file]
teuthology/orchestra/cluster.py [deleted file]
teuthology/orchestra/connection.py [deleted file]
teuthology/orchestra/monkey.py [deleted file]
teuthology/orchestra/remote.py [deleted file]
teuthology/orchestra/run.py [deleted file]
teuthology/orchestra/test/__init__.py [deleted file]
teuthology/orchestra/test/test_cluster.py [deleted file]
teuthology/orchestra/test/test_connection.py [deleted file]
teuthology/orchestra/test/test_integration.py [deleted file]
teuthology/orchestra/test/test_remote.py [deleted file]
teuthology/orchestra/test/test_run.py [deleted file]
teuthology/orchestra/test/util.py [deleted file]
teuthology/packaging.py [deleted file]
teuthology/parallel.py [deleted file]
teuthology/report.py [deleted file]
teuthology/results.py [deleted file]
teuthology/run.py [deleted file]
teuthology/run_tasks.py [deleted file]
teuthology/safepath.py [deleted file]
teuthology/schedule.py [deleted file]
teuthology/sentry.py [deleted file]
teuthology/suite.py [deleted file]
teuthology/task/__init__.py [deleted file]
teuthology/task/adjust-ulimits [deleted file]
teuthology/task/admin_socket.py [deleted file]
teuthology/task/apache.conf.template [deleted file]
teuthology/task/args.py [deleted file]
teuthology/task/autotest.py [deleted file]
teuthology/task/blktrace.py [deleted file]
teuthology/task/calamari.py [deleted file]
teuthology/task/calamari/http_client.py [deleted file]
teuthology/task/calamari/servertest_1_0.py [deleted file]
teuthology/task/ceph.py [deleted file]
teuthology/task/ceph_client.py [deleted file]
teuthology/task/ceph_deploy.py [deleted file]
teuthology/task/ceph_fuse.py [deleted file]
teuthology/task/ceph_manager.py [deleted file]
teuthology/task/chef.py [deleted file]
teuthology/task/cifs_mount.py [deleted file]
teuthology/task/clock.py [deleted file]
teuthology/task/common_fs_utils.py [deleted file]
teuthology/task/cram.py [deleted file]
teuthology/task/daemon-helper [deleted file]
teuthology/task/devstack.py [deleted file]
teuthology/task/die_on_err.py [deleted file]
teuthology/task/divergent_priors.py [deleted file]
teuthology/task/dump_stuck.py [deleted file]
teuthology/task/ec_lost_unfound.py [deleted file]
teuthology/task/edit_sudoers.sh [deleted file]
teuthology/task/exec.py [deleted file]
teuthology/task/filestore_idempotent.py [deleted file]
teuthology/task/hadoop.py [deleted file]
teuthology/task/install.py [deleted file]
teuthology/task/interactive.py [deleted file]
teuthology/task/internal.py [deleted file]
teuthology/task/iscsi.py [deleted file]
teuthology/task/kclient.py [deleted file]
teuthology/task/kcon_most [deleted file]
teuthology/task/kcon_most.py [deleted file]
teuthology/task/kernel.py [deleted file]
teuthology/task/knfsd.py [deleted file]
teuthology/task/localdir.py [deleted file]
teuthology/task/lockfile.py [deleted file]
teuthology/task/locktest.py [deleted file]
teuthology/task/lost_unfound.py [deleted file]
teuthology/task/manypools.py [deleted file]
teuthology/task/mds_creation_failure.py [deleted file]
teuthology/task/mds_thrash.py [deleted file]
teuthology/task/metadata.yaml [deleted file]
teuthology/task/mon_clock_skew_check.py [deleted file]
teuthology/task/mon_recovery.py [deleted file]
teuthology/task/mon_thrash.py [deleted file]
teuthology/task/mpi.py [deleted file]
teuthology/task/multibench.py [deleted file]
teuthology/task/nfs.py [deleted file]
teuthology/task/nop.py [deleted file]
teuthology/task/object_source_down.py [deleted file]
teuthology/task/omapbench.py [deleted file]
teuthology/task/osd_backfill.py [deleted file]
teuthology/task/osd_failsafe_enospc.py [deleted file]
teuthology/task/osd_recovery.py [deleted file]
teuthology/task/parallel.py [deleted file]
teuthology/task/parallel_example.py [deleted file]
teuthology/task/peer.py [deleted file]
teuthology/task/peering_speed_test.py [deleted file]
teuthology/task/pexec.py [deleted file]
teuthology/task/print.py [deleted file]
teuthology/task/proc_thrasher.py [deleted file]
teuthology/task/qemu.py [deleted file]
teuthology/task/rados.py [deleted file]
teuthology/task/radosbench.py [deleted file]
teuthology/task/radosgw_admin.py [deleted file]
teuthology/task/radosgw_admin_rest.py [deleted file]
teuthology/task/radosgw_agent.py [deleted file]
teuthology/task/rbd.py [deleted file]
teuthology/task/rbd_fsx.py [deleted file]
teuthology/task/recovery_bench.py [deleted file]
teuthology/task/rep_lost_unfound_delete.py [deleted file]
teuthology/task/repair_test.py [deleted file]
teuthology/task/rest_api.py [deleted file]
teuthology/task/restart.py [deleted file]
teuthology/task/rgw.py [deleted file]
teuthology/task/rgw_logsocket.py [deleted file]
teuthology/task/s3readwrite.py [deleted file]
teuthology/task/s3roundtrip.py [deleted file]
teuthology/task/s3tests.py [deleted file]
teuthology/task/samba.py [deleted file]
teuthology/task/scrub.py [deleted file]
teuthology/task/scrub_test.py [deleted file]
teuthology/task/sequential.py [deleted file]
teuthology/task/sleep.py [deleted file]
teuthology/task/ssh_keys.py [deleted file]
teuthology/task/swift.py [deleted file]
teuthology/task/tasktest.py [deleted file]
teuthology/task/test/__init__.py [deleted file]
teuthology/task/test/test_devstack.py [deleted file]
teuthology/task/tgt.py [deleted file]
teuthology/task/thrashosds.py [deleted file]
teuthology/task/timer.py [deleted file]
teuthology/task/userdata_setup.yaml [deleted file]
teuthology/task/userdata_teardown.yaml [deleted file]
teuthology/task/valgrind.supp [deleted file]
teuthology/task/watch_notify_stress.py [deleted file]
teuthology/task/workunit.py [deleted file]
teuthology/task_util/__init__.py [deleted file]
teuthology/task_util/kclient.py [deleted file]
teuthology/task_util/rados.py [deleted file]
teuthology/task_util/rgw.py [deleted file]
teuthology/test/__init__.py [deleted file]
teuthology/test/fake_archive.py [deleted file]
teuthology/test/test_config.py [deleted file]
teuthology/test/test_contextutil.py [deleted file]
teuthology/test/test_get_distro.py [deleted file]
teuthology/test/test_get_distro_version.py [deleted file]
teuthology/test/test_get_multi_machine_types.py [deleted file]
teuthology/test/test_misc.py [deleted file]
teuthology/test/test_report.py [deleted file]
teuthology/test/test_results.py [deleted file]
teuthology/test/test_safepath.py [deleted file]
teuthology/worker.py [deleted file]
tox.ini [deleted file]
watch-suite.sh [deleted file]

diff --git a/.gitignore b/.gitignore
deleted file mode 100644 (file)
index 09e16a9..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-*~
-.#*
-## the next line needs to start with a backslash to avoid looking like
-## a comment
-\#*#
-.*.swp
-
-*.pyc
-*.pyo
-.tox
-
-/*.egg-info
-/virtualenv
-/build
-/*.yaml
diff --git a/README.rst b/README.rst
deleted file mode 100644 (file)
index dac71bb..0000000
+++ /dev/null
@@ -1,519 +0,0 @@
-==================================================
- `Teuthology` -- The Ceph integration test runner
-==================================================
-
-The Ceph project needs automated tests. Because Ceph is a highly
-distributed system, and has active kernel development, its testing
-requirements are quite different from e.g. typical LAMP web
-applications. Nothing out there seemed to handle our requirements,
-so we wrote our own framework, called `Teuthology`.
-
-
-Overview
-========
-
-Teuthology runs a given set of Python functions (`tasks`), with an SSH
-connection to every host participating in the test. The SSH connection
-uses `Paramiko <http://www.lag.net/paramiko/>`__, a native Python
-client for the SSH2 protocol, and this allows us to e.g. run multiple
-commands inside a single SSH connection, to speed up test
-execution. Tests can use `gevent <http://www.gevent.org/>`__ to
-perform actions concurrently or in the background.
-
-
-Build
-=====
-Teuthology is not meant to be distributed as a library, therefore we depend
-on the pinned dependencies listed in ``requirements.txt``, the ``setup.py``
-will not list any and will only be there to install the package entry points
-(a.k.a teuthology's scripts).
-
-    git clone https://github.com/ceph/teuthology.git
-    cd teuthology
-
-
-Bootstrap for Ubuntu Systems
-----------------------------
-A ``boostrap`` script is provided for automated builds/execution of teuthology
-itself. You can run it directly **only if you are using Ubuntu**.
-
-Teuthology uses several Python packages that are not in the standard
-library. To make the dependencies easier to get right, we use a
-`virtualenv` to manage them. To get started, ensure you have the
-``virtualenv`` and ``pip`` programs installed; e.g. on Debian/Ubuntu::
-
-    sudo apt-get install python-dev python-virtualenv python-pip libevent-dev libmysqlclient-dev python-libvirt
-
-and then run::
-
-    ./bootstrap
-
-
-MacOS X
--------
-
-.. note:: These instructions assume you are using `homebrew <http://brew.sh/>`_
-
-As always, create a ``virtualenv`` specific to teuthology and make sure it
-is activated before proceeding (location doesn't matter, we use an example
-location)::
-
-    mkdir ~/.virtualenvs
-    virtualenv --system-site-packages ~/.virtualenvs/teuthology
-    source ~/.virtualenvs/teuthology/bin/activate
-
-Install the system dependencies::
-
-    brew install libvirt mysql libevent
-
-Make sure you are able to import ``libvirt`` without error::
-
-    python -c "import libvirt"
-
-If python can't find libvirt yet, you may need to do the following:
-
-    cd /Library/Python/{pyversion}/site-packages
-    sudo ln -s /usr/local/Cellar/libvirt/{version}/lib/python{pyversion}/site-packages/* .
-
-Finally, install the teuthology package and ``requirements.txt``::
-
-    python setup.py develop
-    pip install -r requirements.txt
-
-
-Generic install
----------------
-These instructions should help get teuthology installed properly in
-a system that is not OSX or Debian-based.
-
-Install all the system dependencies needed:
-
-* mysql client
-* libevent
-* libvirt (with the Python bindings)
-
-Install Python packaging tools:
-
-* pip
-* virtualenv
-
-In some cases, depending on the OS, you will need a python development package
-with some build helpers that are required to build packages. In Ubuntu, this is
-the ``python-dev`` package.
-
-With a dedicated ``virtualenv`` activated, install the teuthology package and
-``requirements.txt``::
-
-    python setup.py develop
-    pip install -r requirements.txt
-
-
-Test configuration
-==================
-
-An integration test run takes three items of configuration:
-
-- ``targets``: what hosts to run on; this is a dictionary mapping
-  hosts to ssh host keys, like:
-  "username@hostname.example.com: ssh-rsa long_hostkey_here"
-- ``roles``: how to use the hosts; this is a list of lists, where each
-  entry lists all the roles to be run on a single host; for example, a
-  single entry might say ``[mon.1, osd.1]``
-- ``tasks``: how to set up the cluster and what tests to run on it;
-  see below for examples
-
-The format for this configuration is `YAML <http://yaml.org/>`__, a
-structured data format that is still human-readable and editable.
-
-For example, a full config for a test run that sets up a three-machine
-cluster, mounts Ceph via ``ceph-fuse``, and leaves you at an interactive
-Python prompt for manual exploration (and enabling you to SSH in to
-the nodes & use the live cluster ad hoc), might look like this::
-
-    roles:
-    - [mon.0, mds.0, osd.0]
-    - [mon.1, osd.1]
-    - [mon.2, client.0]
-    targets:
-        ubuntu@host07.example.com: ssh-rsa host07_ssh_key
-        ubuntu@host08.example.com: ssh-rsa host08_ssh_key
-        ubuntu@host09.example.com: ssh-rsa host09_ssh_key
-    tasks:
-    - install:
-    - ceph:
-    - ceph-fuse: [client.0]
-    - interactive:
-
-The number of entries under ``roles`` and ``targets`` must match.
-
-Note the colon after every task name in the ``tasks`` section.
-
-The ``install`` task needs to precede all other tasks.
-
-The listed targets need resolvable hostnames. If you do not have a DNS server
-running, you can add entries to ``/etc/hosts``. You also need to be able to SSH
-in to the listed targets without passphrases, and the remote user needs to have
-passwordless `sudo` access. Note that the ssh keys at the end of the
-``targets`` entries are the public ssh keys for the hosts.  On Ubuntu, these
-are located at /etc/ssh/ssh_host_rsa_key.pub
-
-If you'd save the above file as ``example.yaml``, you could run
-teuthology on it by saying::
-
-    ./virtualenv/bin/teuthology example.yaml
-
-You can also pass the ``-v`` option, for more verbose execution. See
-``teuthology --help`` for more.
-
-
-Multiple config files
----------------------
-
-You can pass multiple files as arguments to teuthology. Each one
-will be read as a config file, and their contents will be merged. This
-allows you to e.g. share definitions of what a "simple 3 node cluster"
-is. The source tree comes with ``roles/3-simple.yaml``, so we could
-skip the ``roles`` section in the above ``example.yaml`` and then
-run::
-
-    ./virtualenv/bin/teuthology roles/3-simple.yaml example.yaml
-
-
-Reserving target machines
--------------------------
-
-Before locking machines will work, you must create a .teuthology.yaml
-file in your home directory that sets a lock_server, i.e.::
-
-    lock_server: http://host.example.com:8080/lock
-
-Teuthology automatically locks nodes for you if you specify the
-``--lock`` option. Without this option, you must specify machines to
-run on in a ``targets.yaml`` file, and lock them using
-teuthology-lock.
-
-Note that the default owner of a machine is ``USER@HOST``.
-You can override this with the ``--owner`` option when running
-teuthology or teuthology-lock.
-
-With teuthology-lock, you can also add a description, so you can
-remember which tests you were running on them. This can be done when
-locking or unlocking machines, or as a separate action with the
-``--update`` option. To lock 3 machines and set a description, run::
-
-    ./virtualenv/bin/teuthology-lock --lock-many 3 --desc 'test foo'
-
-If machines become unusable for some reason, you can mark them down::
-
-    ./virtualenv/bin/teuthology-lock --update --status down machine1 machine2
-
-To see the status of all machines, use the ``--list`` option. This can
-be restricted to particular machines as well::
-
-    ./virtualenv/bin/teuthology-lock --list machine1 machine2
-
-
-Tasks
-=====
-
-A task is a Python module in the ``teuthology.task`` package, with a
-callable named ``task``. It gets the following arguments:
-
-- ``ctx``: a context that is available through the lifetime of the
-  test run, and has useful attributes such as ``cluster``, letting the
-  task access the remote hosts. Tasks can also store their internal
-  state here. (TODO beware namespace collisions.)
-- ``config``: the data structure after the colon in the config file,
-  e.g. for the above ``ceph-fuse`` example, it would be a list like
-  ``["client.0"]``.
-
-Tasks can be simple functions, called once in the order they are
-listed in ``tasks``. But sometimes, it makes sense for a task to be
-able to clean up after itself; for example, unmounting the filesystem
-after a test run. A task callable that returns a Python `context
-manager
-<http://docs.python.org/library/stdtypes.html#typecontextmanager>`__
-will have the manager added to a stack, and the stack will be unwound
-at the end of the run. This means the cleanup actions are run in
-reverse order, both on success and failure. A nice way of writing
-context managers is the ``contextlib.contextmanager`` decorator; look
-for that string in the existing tasks to see examples, and note where
-they use ``yield``.
-
-Further details on some of the more complex tasks such as install or workunit
-can be obtained via python help. For example::
-
-    >>> import teuthology.task.workunit
-    >>> help(teuthology.task.workunit)
-
-displays a page of more documentation and more concrete examples.
-
-Some of the more important / commonly used tasks include:
-
-* ``chef``: Run the chef task.
-* ``install``: by default, the install task goes to gitbuilder and installs the
-  results of the latest build. You can, however, add additional parameters to
-  the test configuration to cause it to install any branch, SHA, archive or
-  URL. The following are valid parameters.
-
-- ``branch``: specify a branch (bobtail, cuttlefish...)
-- ``flavor``: specify a flavor (next, unstable...). Flavors can be thought of
-  as subsets of branches.  Sometimes (unstable, for example) they may have a
-  predefined meaning.
-- ``project``: specify a project (ceph, samba...)
-- ``sha1``: install the build with this sha1 value.
-- ``tag``: specify a tag/identifying text for this build (v47.2, v48.1...)
-* ``ceph``: Bring up Ceph
-
-* ``overrides``: override behavior. Typically, this includes sub-tasks being
-  overridden. Sub-tasks can nest further information.  For example, overrides
-  of install tasks are project specific, so the following section of a yaml
-  file would cause all ceph installation to default into using the cuttlefish
-  branch::
-
-    overrides:
-      install:
-        ceph:
-          branch: cuttlefish
-
-* ``workunit``: workunits are a way of grouping tasks and behavior on targets.
-* ``sequential``: group the sub-tasks into a unit where the sub-tasks run
-  sequentially as listed.
-* ``parallel``: group the sub-tasks into a unit where the sub-task all run in
-  parallel.
-
-Sequential and parallel tasks can be nested.  Tasks run sequentially if not
-specified.
-
-The above list is a very incomplete description of the tasks available on
-teuthology. The teuthology/task subdirectory contains all the python files
-that implement tasks.
-Many of these tasks are used to run shell scripts that are defined in the
-ceph/ceph-qa-suite.
-
-If machines were locked as part of the run (with the --lock switch), 
-teuthology normally leaves them locked when there is any task failure
-for investigation of the machine state.  When developing new teuthology
-tasks, sometimes this behavior is not useful.  The ``unlock_on_failure``
-global option can be set to True to make the unlocking happen unconditionally.
-
-Troubleshooting
-===============
-
-Sometimes when a bug triggers, instead of automatic cleanup, you want
-to explore the system as is. Adding a top-level::
-
-    interactive-on-error: true
-
-as a config file for teuthology will make that possible. With that
-option, any *task* that fails, will have the ``interactive`` task
-called after it. This means that before any cleanup happens, you get a
-chance to inspect the system -- both through Teuthology and via extra
-SSH connections -- and the cleanup completes only when you choose so.
-Just exit the interactive Python session to continue the cleanup.
-
-Note that this only catches exceptions *between* the tasks. If a task
-calls multiple subtasks, e.g. with ``contextutil.nested``, those
-cleanups *will* be performed. Later on, we can let tasks communicate
-the subtasks they wish to invoke to the top-level runner, avoiding
-this issue.
-
-Interactive task facilities
-===========================
-The ``interactive`` task presents a prompt for you to interact with the
-teuthology configuration.  The ``ctx`` variable is available to explore,
-and a ``pprint.PrettyPrinter().pprint`` object is added for convenience as
-'pp', so you can do things like pp(dict-of-interest) to see a formatted
-view of the dict.
-
-Test Sandbox Directory
-======================
-
-Teuthology currently places most test files and mount points in a
-sandbox directory, defaulting to ``/home/$USER/cephtest``.  To change
-the location of the sandbox directory, the following option can be
-specified in ``$HOME/.teuthology.yaml``::
-
-    test_path: <directory>
-
-
-VIRTUAL MACHINE SUPPORT
-=======================
-
-Teuthology also supports virtual machines, which can function like
-physical machines but differ in the following ways:
-
-VPSHOST:
---------
-
-A new entry, vpshost, has been added to the teuthology database of
-available machines.  For physical machines, this value is null. For
-virtual machines, this entry is the name of the physical machine that
-that virtual machine resides on.
-
-There are fixed "slots" for virtual machines that appear in the teuthology
-database.  These slots have a machine type of vps and can be locked like
-any other machine.  The existence of a vpshost field is how teuthology
-knows whether or not a database entry represents a physical or a virtual
-machine.
-
-The following needs to be set in ~/.config/libvirt/libvirt.conf in order to get the
-right virtual machine associations for the Inktank lab::
-
-    uri_aliases = [
-        'mira001=qemu+ssh://ubuntu@mira001.front.sepia.ceph.com/system?no_tty=1',
-        'mira003=qemu+ssh://ubuntu@mira003.front.sepia.ceph.com/system?no_tty=1',
-        'mira004=qemu+ssh://ubuntu@mira004.front.sepia.ceph.com/system?no_tty=1',
-        'mira006=qemu+ssh://ubuntu@mira006.front.sepia.ceph.com/system?no_tty=1',
-        'mira007=qemu+ssh://ubuntu@mira007.front.sepia.ceph.com/system?no_tty=1',
-        'mira008=qemu+ssh://ubuntu@mira008.front.sepia.ceph.com/system?no_tty=1',
-        'mira009=qemu+ssh://ubuntu@mira009.front.sepia.ceph.com/system?no_tty=1',
-        'mira010=qemu+ssh://ubuntu@mira010.front.sepia.ceph.com/system?no_tty=1',
-        'mira011=qemu+ssh://ubuntu@mira011.front.sepia.ceph.com/system?no_tty=1',
-        'mira013=qemu+ssh://ubuntu@mira013.front.sepia.ceph.com/system?no_tty=1',
-        'mira014=qemu+ssh://ubuntu@mira014.front.sepia.ceph.com/system?no_tty=1',
-        'mira015=qemu+ssh://ubuntu@mira015.front.sepia.ceph.com/system?no_tty=1',
-        'mira017=qemu+ssh://ubuntu@mira017.front.sepia.ceph.com/system?no_tty=1',
-        'mira018=qemu+ssh://ubuntu@mira018.front.sepia.ceph.com/system?no_tty=1',
-        'mira020=qemu+ssh://ubuntu@mira020.front.sepia.ceph.com/system?no_tty=1',
-        'vercoi01=qemu+ssh://ubuntu@vercoi01.front.sepia.ceph.com/system?no_tty=1',
-        'vercoi02=qemu+ssh://ubuntu@vercoi02.front.sepia.ceph.com/system?no_tty=1',
-        'vercoi03=qemu+ssh://ubuntu@vercoi03.front.sepia.ceph.com/system?no_tty=1',
-        'vercoi04=qemu+ssh://ubuntu@vercoi04.front.sepia.ceph.com/system?no_tty=1',
-        'vercoi05=qemu+ssh://ubuntu@vercoi05.front.sepia.ceph.com/system?no_tty=1',
-        'vercoi06=qemu+ssh://ubuntu@vercoi06.front.sepia.ceph.com/system?no_tty=1',
-        'vercoi07=qemu+ssh://ubuntu@vercoi07.front.sepia.ceph.com/system?no_tty=1',
-        'vercoi08=qemu+ssh://ubuntu@vercoi08.front.sepia.ceph.com/system?no_tty=1',
-        'senta01=qemu+ssh://ubuntu@senta01.front.sepia.ceph.com/system?no_tty=1',
-        'senta02=qemu+ssh://ubuntu@senta02.front.sepia.ceph.com/system?no_tty=1',
-        'senta03=qemu+ssh://ubuntu@senta03.front.sepia.ceph.com/system?no_tty=1',
-        'senta04=qemu+ssh://ubuntu@senta04.front.sepia.ceph.com/system?no_tty=1',
-    ]
-
-DOWNBURST:
-----------
-
-When a virtual machine is locked, downburst is run on that machine to install a
-new image.  This allows the user to set different virtual OSes to be installed
-on the newly created virtual machine.  Currently the default virtual machine is
-ubuntu (precise).  A different vm installation can be set using the
-``--os-type`` option in ``teuthology.lock``.
-
-When a virtual machine is unlocked, downburst destroys the image on the
-machine.
-
-Temporary yaml files are used to downburst a virtual machine.  A typical
-yaml file will look like this::
-
-    downburst:
-      cpus: 1
-      disk-size: 30G
-      distro: centos
-      networks:
-      - {source: front}
-      ram: 4G
-
-These values are used by downburst to create the virtual machine.
-
-When locking a file, a downburst meta-data yaml file can be specified by using
-the downburst-conf parameter on the command line.
-
-HOST KEYS:
-----------
-
-Because teuthology reinstalls a new machine, a new hostkey is generated.  After
-locking, once a connection is established to the new machine,
-``teuthology-lock`` with the ``--list`` or ``--list-targets`` options will
-display the new keys.  When vps machines are locked using the ``--lock-many``
-option, a message is displayed indicating that ``--list-targets`` should be run
-later.
-
-CEPH-QA-CHEF:
--------------
-
-Once teuthology starts after a new vm is installed, teuthology
-checks for the existence of ``/ceph-qa-ready``.  If this file is not
-present, ``ceph-qa-chef`` is run when teuthology first comes up.
-
-ASSUMPTIONS:
-------------
-
-It is assumed that downburst is on the user's ``$PATH``.
-
-
-Test Suites
-===========
-
-Most of the current teuthology test suite execution scripts automatically
-download their tests from the master branch of the appropriate github
-repository.  People who want to run experimental test suites usually modify the
-download method in the ``teuthology/task`` script to use some other branch or
-repository. This should be generalized in later teuthology releases.
-Teuthology QA suites can be found in ``src/ceph-qa-suite``. Make sure that this
-directory exists in your source tree before running the test suites.
-
-Each suite name is determined by the name of the directory in ``ceph-qa-suite``
-that contains that suite. The directory contains subdirectories and yaml files,
-which, when assembled, produce valid tests that can be run. The test suite
-application generates combinations of these files and thus ends up running a
-set of tests based off the data in the directory for the suite.
-
-To run a suite, enter::
-
-    ./schedule_suite.sh <suite> <ceph> <kernel> <email> <flavor> <teuth> <mtype> <template>
-
-where:
-
-* ``suite``: the name of the suite (the directory in ceph-qa-suite).
-* ``ceph``: ceph branch to be used.
-* ``kernel``: version of the kernel to be used.
-* ``email``: email address to send the results to.
-* ``flavor``: flavor of the test
-* ``teuth``: version of teuthology to run
-* ``mtype``: machine type of the run
-* ``templates``: template file used for further modifying the suite (optional)
-
-For example, consider::
-
-     schedule_suite.sh rbd wip-fix cuttlefish bob.smith@foo.com master cuttlefish plana
-
-The above command runs the rbd suite using wip-fix as the ceph branch, a
-straight cuttlefish kernel, and the master flavor of cuttlefish teuthology.  It
-will run on plana machines.
-
-In order for a queued task to be run, a teuthworker thread on
-``teuthology.front.sepia.ceph.com`` needs to remove the task from the queue.
-On ``teuthology.front.sepia.ceph.com``, run ``ps aux | grep teuthology-worker``
-to view currently running tasks. If no processes are reading from the test
-version that you are running, additonal teuthworker tasks need to be started.
-To start these tasks:
-
-* copy your build tree to ``/home/teuthworker`` on ``teuthology.front.sepia.ceph.com``.
-* Give it a unique name (in this example, xxx)
-* start up some number of worker threads (as many as machines you are testing with, there are 60 running for the default queue)::
-
-    /home/virtualenv/bin/python
-    /var/lib/teuthworker/xxx/virtualenv/bin/teuthworker
-    /var/lib/teuthworker/archive --tube xxx
-    --log-dir /var/lib/teuthworker/archive/worker_logs
-
-    Note: The threads on teuthology.front.sepia.ceph.com are started via
-    ~/teuthworker/start.sh.  You can use that file as a model for your
-    own threads, or add to this file if you want your threads to be
-    more permanent.
-
-Once the suite completes, an email message is sent to the users specified, and
-a large amount of information is left on ``teuthology.front.sepia.ceph.com`` in
-``/var/lib/teuthworker/archive``.
-
-This is symbolically linked to /a for convenience. A new directory is created
-whose name consists of a concatenation of the date and time that the suite was
-started, the name of the suite, the ceph branch tested, the kernel used, and
-the flavor. For every test run there is a directory whose name is the pid
-number of the pid of that test.  Each of these directory contains a copy of the
-``teuthology.log`` for that process.  Other information from the suite is
-stored in files in the directory, and task-specific yaml files and other logs
-are saved in the subdirectories.
-
-These logs are also publically available at
-``http://qa-proxy.ceph.com/teuthology/``.
diff --git a/bootstrap b/bootstrap
deleted file mode 100755 (executable)
index 46590ca..0000000
--- a/bootstrap
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/sh
-set -e
-
-for package in python-dev python-pip python-virtualenv libevent-dev python-libvirt libmysqlclient-dev; do
-    if [ "$(dpkg --status -- $package|sed -n 's/^Status: //p')" != "install ok installed" ]; then
-       # add a space after old values
-       missing="${missing:+$missing }$package"
-    fi
-done
-if [ -n "$missing" ]; then
-    echo "$0: missing required packages, please install them:" 1>&2
-    echo "sudo apt-get install $missing"
-    exit 1
-fi
-
-if [ -z "$NO_CLOBBER" ] || [ ! -e ./virtualenv ]; then
-    # site packages needed because libvirt python bindings are not nicely
-    # packaged
-    virtualenv --system-site-packages --distribute virtualenv
-
-    # avoid pip bugs
-    ./virtualenv/bin/pip install --upgrade pip
-
-    # work-around change in pip 1.5
-    ./virtualenv/bin/pip install setuptools --no-use-wheel --upgrade
-fi
-
-./virtualenv/bin/pip install -r requirements.txt
-
-# forbid setuptools from using the network because it'll try to use
-# easy_install, and we really wanted pip; next line will fail if pip
-# requirements.txt does not match setup.py requirements -- sucky but
-# good enough for now
-./virtualenv/bin/python setup.py develop \
-    --allow-hosts None
diff --git a/build_qemu_image.sh b/build_qemu_image.sh
deleted file mode 100755 (executable)
index 614f519..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/sh -x
-set -e
-
-IMAGE_URL=http://cloud-images.ubuntu.com/releases/precise/release/ubuntu-12.04-server-cloudimg-amd64-disk1.img
-
-wget -O base.qcow2 $IMAGE_URL
-
-image=base.raw
-qemu-img convert -O raw base.qcow2 $image
-rm -f base.qcow2
-
-# Note: this assumes that sector size is 512, and that there's only one
-# partition. very brittle.
-START_SECT=$(fdisk -lu $image | grep ^$image | awk '{print $3}')
-START_BYTE=$(echo "$START_SECT * 512" | bc)
-
-root=/tmp/$$
-
-cleanup() {
-    sudo chroot $root rm -f /etc/resolv.conf || true
-    sudo chroot $root ln -s ../run/resolvconf/resolv.conf /etc/resolv.conf || true
-       sudo umount $root/proc || true
-       sudo umount $root/sys || true
-       sudo umount $root/dev/pts || true
-       sudo umount $root
-    sudo rmdir $root
-}
-trap cleanup INT TERM EXIT
-
-sudo mkdir $root
-sudo mount -o loop,offset=$START_BYTE $image $root
-
-# set up chroot
-sudo mount -t proc proc $root/proc
-sudo mount -t sysfs sysfs $root/sys
-sudo mount -t devpts devptr $root/dev/pts
-
-# set up network access
-sudo chroot $root rm /etc/resolv.conf
-sudo cp /etc/resolv.conf $root/etc/resolv.conf
-
-# packages
-# These should be kept in sync with ceph-qa-chef.git/cookbooks/ceph-qa/default.rb
-sudo chroot $root apt-get -y --force-yes install iozone3 bonnie++ dbench \
-    tiobench build-essential attr libtool automake gettext uuid-dev      \
-    libacl1-dev bc xfsdump dmapi xfslibs-dev
-
-# install ltp without ltp-network-test, so we don't pull in xinetd and
-# a bunch of other unnecessary stuff
-sudo chroot $root apt-get -y --force-yes --no-install-recommends install ltp-kernel-test
-
-# add 9p fs support
-sudo chroot $root apt-get -y --force-yes install linux-image-extra-virtual
-
-cleanup
-trap - INT TERM EXIT
-
-qemu-img convert -O qcow2 $image output.qcow2
-rm -f $image
-
-exit 0
diff --git a/check-syntax.sh b/check-syntax.sh
deleted file mode 100755 (executable)
index f63586f..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-
-which pyflakes > /dev/null
-if test $? != 0; then
-    echo "$0 requires pyflakes (sudo apt-get install pyflakes)"
-    exit 1
-fi
-
-d=$(dirname $0)
-for f in $(find ${d}/teuthology | grep py$); do
-    if test -n "${V}"; then
-       echo "checking ${f}"
-    fi
-    pyflakes ${f} > >( \
-       grep -v "'Lock' imported but unused" | \
-       grep -v "'MachineLock' imported but unused" \
-       )
-done
diff --git a/cleanup-and-unlock.sh b/cleanup-and-unlock.sh
deleted file mode 100755 (executable)
index 7b30a2b..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh -ex
-
-bin/teuthology-nuke -t $1 -r --owner $2
-bin/teuthology-lock --unlock -t $1 --owner $2
diff --git a/cleanup-run.sh b/cleanup-run.sh
deleted file mode 100755 (executable)
index 50bc74d..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/sh -ex
-
-owner=`teuthology-lock -a --list --desc-pattern /$1/ --status up | grep locked_by | head -1 | awk '{print $2}' | sed 's/"//g' | sed 's/,//'`
-teuthology-lock --list-targets --desc-pattern /$1/ --status up --owner $owner > /tmp/$$
-teuthology-nuke --unlock -t /tmp/$$ -r --owner $owner
-rm /tmp/$$
-
diff --git a/cleanup-user.sh b/cleanup-user.sh
deleted file mode 100755 (executable)
index 91d12b1..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/sh -ex
-
-teuthology-lock --list-targets --owner $1 --status up > /tmp/$$
-teuthology-nuke --unlock -t /tmp/$$ -r --owner $1
-rm /tmp/$$
-
diff --git a/coverage/cov-analyze.sh b/coverage/cov-analyze.sh
deleted file mode 100755 (executable)
index 9b309e2..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-set -e
-
-usage () {
-       printf '%s: usage: %s -d WORKING_DIR -o OUT_BASENAME -t TEST_DIR\n' "$(basename "$0")" "$(basename "$0")" 1>&2
-       echo <<EOF
-WORKING_DIR should contain the source, .gcno, and initial lcov files (as created by cov-init.sh)
-TEST_DIR should contain the data archived from a teuthology test.
-
-Example:
-    mkdir coverage
-    ./cov-init.sh ~/teuthology_output/foo coverage
-    $0 -t ~/teuthology_output/foo -d coverage -o foo
-EOF
-       exit 1
-}
-
-OUTPUT_BASENAME=
-TEST_DIR=
-COV_DIR=
-
-while getopts  "d:o:t:" flag
-do
-       case $flag in
-               d) COV_DIR=$OPTARG;;
-               o) OUTPUT_BASENAME=$OPTARG;;
-               t) TEST_DIR=$OPTARG;;
-               *) usage;;
-       esac
-done
-
-shift $(($OPTIND - 1))
-
-echo $OUTPUT_BASENAME
-if [ -z "$OUTPUT_BASENAME" ] || [ -z "$TEST_DIR" ] || [ -z "$COV_DIR" ]; then
-       usage
-fi
-
-cp $COV_DIR/base.lcov "$COV_DIR/${OUTPUT_BASENAME}.lcov"
-
-for remote in `ls $TEST_DIR/remote`; do
-       echo "processing coverage for $remote..."
-       cp $TEST_DIR/remote/$remote/coverage/*.gcda $COV_DIR/ceph/src
-       cp $TEST_DIR/remote/$remote/coverage/_libs/*.gcda $COV_DIR/ceph/src/.libs
-       lcov -d $COV_DIR/ceph/src -c -o "$COV_DIR/${remote}_full.lcov"
-       lcov -r "$COV_DIR/${remote}_full.lcov" /usr/include\* -o "$COV_DIR/${remote}.lcov"
-       lcov -a "$COV_DIR/${remote}.lcov" -a "$COV_DIR/${OUTPUT_BASENAME}.lcov" -o "$COV_DIR/${OUTPUT_BASENAME}_tmp.lcov"
-       mv "$COV_DIR/${OUTPUT_BASENAME}_tmp.lcov" "$COV_DIR/${OUTPUT_BASENAME}.lcov"
-       rm "$COV_DIR/${remote}_full.lcov"
-       find $COV_DIR/ceph/src -name '*.gcda' -type f -delete
-done
diff --git a/coverage/cov-init.sh b/coverage/cov-init.sh
deleted file mode 100755 (executable)
index 2cb1ebd..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/bash
-set -e
-
-usage () {
-       printf '%s: usage: %s TEST_DIR OUTPUT_DIR CEPH_BUILD_TARBALL\n' "$(basename "$0")" "$(basename "$0")" 1>&2
-       exit 1
-}
-
-TEST_DIR=$1
-OUTPUT_DIR=$2
-CEPH_TARBALL=$3
-
-if [ -z "$TEST_DIR" ] || [ -z "$OUTPUT_DIR" ] || [ -z "$CEPH_TARBALL" ]; then
-       usage
-fi
-
-SHA1=`cat $TEST_DIR/ceph-sha1`
-
-mkdir -p $OUTPUT_DIR/ceph
-
-echo "Retrieving source and .gcno files..."
-wget -q -O- "https://github.com/ceph/ceph/tarball/$SHA1" | tar xzf - --strip-components=1 -C $OUTPUT_DIR/ceph
-tar zxf $CEPH_TARBALL -C $OUTPUT_DIR
-cp $OUTPUT_DIR/usr/local/lib/ceph/coverage/*.gcno $OUTPUT_DIR/ceph/src
-mkdir $OUTPUT_DIR/ceph/src/.libs
-cp $OUTPUT_DIR/usr/local/lib/ceph/coverage/.libs/*.gcno $OUTPUT_DIR/ceph/src/.libs
-rm -rf $OUTPUT_DIR/usr
-# leave ceph tarball around in case we need to inspect core files
-
-echo "Initializing lcov files..."
-lcov -d $OUTPUT_DIR/ceph/src -z
-lcov -d $OUTPUT_DIR/ceph/src -c -i -o $OUTPUT_DIR/base_full.lcov
-lcov -r $OUTPUT_DIR/base_full.lcov /usr/include\* -o $OUTPUT_DIR/base.lcov
-rm $OUTPUT_DIR/base_full.lcov
-echo "Done."
diff --git a/examples/3node_ceph.yaml b/examples/3node_ceph.yaml
deleted file mode 100644 (file)
index 16544f3..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-roles:
-- [mon.0, mds.0, osd.0]
-- [mon.1, osd.1]
-- [mon.2, client.0]
-
-tasks:
-- install:
-- ceph:
-- kclient: [client.0]
-- interactive:
-
-targets:
-  ubuntu@<host1>: ssh-rsa <host1 key>
-  ubuntu@<host2>: ssh-rsa <host2 key>
-  ubuntu@<host3>: ssh-rsa <host3 key>
diff --git a/examples/3node_rgw.yaml b/examples/3node_rgw.yaml
deleted file mode 100644 (file)
index a21dab0..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-interactive-on-error: true
-overrides:
-  ceph:
-    branch: master
-    fs: xfs
-roles:
-- - mon.a
-  - mon.c
-  - osd.0
-- - mon.b
-  - mds.a
-  - osd.1
-- - client.0
-tasks:
-- install:
-- ceph: null
-- rgw:
-  - client.0
-- interactive:
-
-targets:
-  ubuntu@<host1>: ssh-rsa <host1 key>
-  ubuntu@<host2>: ssh-rsa <host2 key>
-  ubuntu@<host3>: ssh-rsa <host3 key>
diff --git a/examples/parallel_example.yaml b/examples/parallel_example.yaml
deleted file mode 100644 (file)
index d149135..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-interactive-on-error: true
-overrides:
-roles:
-- - test0
-  - test1
-- - test0
-  - test1
-- - test0
-tasks:
-- install:
-- parallel_example:
-  - test0
-  - test1  
-
-targets:
-  ubuntu@<host1>: ssh-rsa <host1 key>
-  ubuntu@<host2>: ssh-rsa <host2 key>
-  ubuntu@<host3>: ssh-rsa <host3 key>
-
-
diff --git a/hammer.sh b/hammer.sh
deleted file mode 100755 (executable)
index 30af3f3..0000000
--- a/hammer.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/sh -ex
-
-if [ $1 = "-a" ]; then
-    shift
-    job=$1
-    log="--archive $job.out"
-else
-    job=$1
-    log=""
-fi
-
-test -e $1
-
-teuthology-nuke -t $job
-
-title() {
-       echo '\[\033]0;hammer '$job' '$N' passes\007\]'
-}
-
-N=0
-title
-[ -n "$log" ] && [ -d $job.out ] && rm -rf $job.out
-while teuthology $log $job $2 $3 $4 
-do
-       date
-       N=$(($N+1))
-       echo "$job: $N passes"
-       [ -n "$log" ] && rm -rf $job.out
-       title
-done
-echo "$job: $N passes, then failure."
diff --git a/jenkins/jenkins-pull-requests-build b/jenkins/jenkins-pull-requests-build
deleted file mode 100755 (executable)
index 36203d2..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/sh
-
-# This is the script that runs inside Jenkins.
-# http://jenkins.ceph.com/job/teuthology-pull-requests/
-
-set -x
-set -e
-
-virtualenv --version
-virtualenv --system-site-packages --distribute venv
-. venv/bin/activate
-venv/bin/pip install tox
-tox -rv
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644 (file)
index 847360a..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-[pytest]
-norecursedirs = .git build virtualenv teuthology.egg-info .tox
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644 (file)
index 665866a..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-# 0.14 switches to libev, that means bootstrap needs to change too
-gevent == 0.13.6
-# 1.7.7 has AES-128-CBC support for SSH keys, that's needed
-MySQL-python == 1.2.3
-PyYAML
-argparse >= 1.2.1
-beanstalkc >= 0.2.0
-boto >= 2.0b4
-bunch >= 1.0.0
-configobj
-httplib2
-paramiko < 1.8
-pexpect
-requests >= 1.0
-raven
-web.py
-docopt
-psutil >= 2.1.0
-configparser
-
-# Test Dependencies
-# nose >=1.0.0
-# fudge >=1.0.3
diff --git a/roles/3-simple.yaml b/roles/3-simple.yaml
deleted file mode 100644 (file)
index ac2b391..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-roles:
-- [mon.a, mds.a, osd.0]
-- [mon.b, mds.a-s, osd.1]
-- [mon.c, client.0]
diff --git a/roles/overrides.yaml b/roles/overrides.yaml
deleted file mode 100644 (file)
index 2a28b33..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-nuke-on-error: true
-kernel:
-  branch: master
-overrides:
-  ceph:
-    branch: BRANCH_NAME
-    log-whitelist:
-    - 'clocks not synchronized'
-tasks:
-- chef:
diff --git a/schedule_suite.sh b/schedule_suite.sh
deleted file mode 100755 (executable)
index 94955c2..0000000
+++ /dev/null
@@ -1,214 +0,0 @@
-#!/bin/bash
-
-suite=$1
-ceph=$2
-kernel=$3
-email=$4
-flavor=$5
-teuthology_branch=$6
-mtype=$7
-template=$8
-distro=$9
-limit=${10}
-extra_args=""
-
-if [ -z "$limit" ]
-then
-    limitline=''
-else
-    limitline="--limit $limit"
-fi
-
-if [ -z "$kernel" ]; then
-    echo "usage: $0 <suite> <ceph branch> <kernel branch> [email] [flavor] [teuthology-branch] [machinetype] [template] [distro]"
-    echo "  flavor can be 'basic', 'gcov', 'notcmalloc'."
-    echo "  PRIO=123 in the environment sets priority (default 1000, smaller=higher)"
-    exit 1
-fi
-
-if [ -z "$email" ]
-then
-    email='ceph-qa@ceph.com'
-    email_specified=0
-else
-    email_specified=1
-fi
-[ -z "$flavor" ] && flavor='basic'
-[ -z "$distro" ] && distro='ubuntu'
-[ -z "$mtype" ] && mtype='plana'
-
-multi=`echo $mtype | awk -F' |,|-|\t' '{print NF}'`
-if [ $multi -gt 1 ]
-then
-    tube=multi
-else
-    tube=$mtype
-fi
-
-
-if [ "$PRIO" != "" ]; then
-    extra_args="$extra_args --priority $PRIO"
-fi
-
-stamp=`date +%Y-%m-%d_%H:%M:%S`
-nicesuite=`echo $suite | sed 's/\//:/g'`
-name=`whoami`"-$stamp-$nicesuite-$ceph-$kernel-$flavor-$tube"
-
-function schedule_fail {
-    SUBJECT="Failed to schedule $name"
-    MESSAGE="$@"
-    echo $SUBJECT:
-    echo $MESSAGE
-    if [ ! -z "$email" ] && [ "$email_specified" -eq 1 ]
-    then
-        echo "$MESSAGE" | mail -s "$SUBJECT" $email
-    fi
-    exit 1
-}
-
-if [ "$kernel" = "-" ]
-then
-    kernelvalue=""
-else
-    if [ "$kernel" = "distro" ]
-    then
-        KERNEL_SHA1=distro
-    else
-        KERNEL_SHA1=`wget http://gitbuilder.ceph.com/kernel-deb-precise-x86_64-basic/ref/$kernel/sha1 -O- 2>/dev/null`
-    fi
-        [ -z "$KERNEL_SHA1" ] && schedule_fail "Couldn't find kernel branch $kernel"
-        kernelvalue="kernel:
-  kdb: true
-  sha1: $KERNEL_SHA1"
-fi
-##
-[ ! -d ~/src/ceph-qa-suite ] && schedule_fail "error: expects to find ~/src/ceph-qa-suite"
-[ ! -d ~/src/teuthology/virtualenv/bin ] && schedule_fail "error: expects to find ~/src/teuthology/virtualenv/bin"
-
-echo kernel sha1 $KERNEL_SHA1
-
-## get sha1
-if [ "$distro" = "ubuntu" ]
-then
-    if [ "$mtype" = "saya" ]
-    then
-        CEPH_SHA1=`wget http://gitbuilder.ceph.com/ceph-deb-saucy-armv7l-$flavor/ref/$ceph/sha1 -O- 2>/dev/null`
-    else
-        CEPH_SHA1=`wget http://gitbuilder.ceph.com/ceph-deb-precise-x86_64-$flavor/ref/$ceph/sha1 -O- 2>/dev/null`
-    fi
-else
-    CEPH_SHA1=`wget http://gitbuilder.ceph.com/ceph-rpm-centos6-x86_64-$flavor/ref/$ceph/sha1 -O- 2>/dev/null`
-fi
-
-[ -z "$CEPH_SHA1" ] && schedule_fail "Can't find ceph branch $ceph"
-
-echo ceph sha1 $CEPH_SHA1
-
-# Are there packages for this sha1?
-if [ "$distro" = "ubuntu" ]
-then
-    if [ "$mtype" = "saya" ]
-    then
-        CEPH_VER=`wget http://gitbuilder.ceph.com/ceph-deb-saucy-armv7l-$flavor/sha1/$CEPH_SHA1/version -O- 2>/dev/null`
-    else
-        CEPH_VER=`wget http://gitbuilder.ceph.com/ceph-deb-precise-x86_64-$flavor/sha1/$CEPH_SHA1/version -O- 2>/dev/null`
-    fi
-else
-    CEPH_VER=`wget http://gitbuilder.ceph.com/ceph-rpm-centos6-x86_64-$flavor/sha1/$CEPH_SHA1/version -O- 2>/dev/null`
-fi
-
-[ -z "$CEPH_VER" ] && schedule_fail "Can't find packages for ceph branch $ceph sha1 $CEPH_SHA1"
-
-if [ -n "$teuthology_branch" ] && wget http://github.com/ceph/s3-tests/tree/$teuthology_branch -O- 2>/dev/null >/dev/null ; then
-    s3branch=$teuthology_branch
-elif wget http://github.com/ceph/s3-tests/tree/$ceph -O- 2>/dev/null >/dev/null ; then
-    s3branch=$ceph
-else
-    echo "branch $ceph not in s3-tests.git; will use master for s3tests"
-    s3branch='master'
-fi
-echo "s3branch $s3branch"
-
-if [ -z "$teuthology_branch" ]; then
-    if wget http://github.com/ceph/teuthology/tree/$ceph -O- 2>/dev/null >/dev/null ; then
-        teuthology_branch=$ceph
-    else
-        echo "branch $ceph not in teuthology.git; will use master for teuthology"
-        teuthology_branch='master'
-    fi
-fi
-echo "teuthology branch $teuthology_branch"
-
-## always include this
-fn="/tmp/schedule.suite.$$"
-trap "rm $fn" EXIT
-cat <<EOF > $fn
-teuthology_branch: $teuthology_branch
-$kernelvalue
-nuke-on-error: true
-machine_type: $mtype
-os_type: $distro
-branch: $ceph
-suite: $nicesuite
-tasks:
-- chef:
-- clock.check:
-overrides:
-  workunit:
-    sha1: $CEPH_SHA1
-  s3tests:
-    branch: $s3branch
-  install:
-    ceph:
-      sha1: $CEPH_SHA1
-  ceph:
-    sha1: $CEPH_SHA1
-    conf:
-      mon:
-        debug ms: 1
-        debug mon: 20
-        debug paxos: 20
-      osd:
-        debug ms: 1
-        debug osd: 20
-        debug filestore: 20
-        debug journal: 20
-    log-whitelist:
-    - slow request
-  ceph-deploy:
-    branch:
-      dev: $ceph
-    conf:
-      mon:
-        osd default pool size: 2
-        debug mon: 1
-        debug paxos: 20
-        debug ms: 20
-      client:
-        log file: /var/log/ceph/ceph-\$name.\$pid.log
-  admin_socket:
-    branch: $ceph
-EOF
-
-if [ "$flavor" = "gcov" ]; then
-    cat <<EOF >> $fn
-    coverage: yes
-EOF
-fi
-
-## template, too?
-if [ -n "$template" ]; then
-    sed s/CEPH_SHA1/$CEPH_SHA1/ $template | sed s/KERNEL_SHA1/$KERNEL_SHA1/ >> $fn
-fi
-
-echo "name $name"
-
-./virtualenv/bin/teuthology-suite -v $fn \
-    --base ~/src/ceph-qa-suite/suites \
-    --collections $suite \
-    --email $email \
-    --timeout 36000 \
-    $limitline \
-    --name $name \
-    --worker $tube \
-    $extra_args
diff --git a/scripts/__init__.py b/scripts/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/scripts/coverage.py b/scripts/coverage.py
deleted file mode 100644 (file)
index 3aef9f5..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-import argparse
-
-import teuthology.coverage
-
-
-def main():
-    teuthology.coverage.main(parse_args())
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(description="""
-Analyze the coverage of a suite of test runs, generating html output with lcov.
-""")
-    parser.add_argument(
-        '-o', '--lcov-output',
-        help='the directory in which to store results',
-        required=True,
-    )
-    parser.add_argument(
-        '--html-output',
-        help='the directory in which to store html output',
-    )
-    parser.add_argument(
-        '--cov-tools-dir',
-        help='the location of coverage scripts (cov-init and cov-analyze)',
-        default='../../coverage',
-    )
-    parser.add_argument(
-        '--skip-init',
-        help='skip initialization (useful if a run stopped partway through)',
-        action='store_true',
-        default=False,
-    )
-    parser.add_argument(
-        '-v', '--verbose',
-        help='be more verbose',
-        action='store_true',
-        default=False,
-    )
-    parser.add_argument(
-        'test_dir',
-        help='the location of the test results',
-    )
-    return parser.parse_args()
diff --git a/scripts/kill.py b/scripts/kill.py
deleted file mode 100644 (file)
index cef3e0c..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-import docopt
-
-import teuthology.config
-import teuthology.kill
-
-doc = """
-usage: teuthology-kill -h
-       teuthology-kill [-a ARCHIVE] [-p] -r RUN
-       teuthology-kill [-a ARCHIVE] [-p] -m MACHINE_TYPE -r RUN
-       teuthology-kill [-a ARCHIVE] -r RUN -j JOB ...
-       teuthology-kill [-p] -o OWNER -m MACHINE_TYPE -r RUN
-
-Kill running teuthology jobs:
-1. Removes any queued jobs from the beanstalk queue
-2. Kills any running jobs
-3. Nukes any machines involved
-
-optional arguments:
-  -h, --help            show this help message and exit
-  -a ARCHIVE, --archive ARCHIVE
-                        The base archive directory
-                        [default: {archive_base}]
-  -p, --preserve-queue  Preserve the queue - do not delete queued jobs
-  -r, --run RUN         The name(s) of the run(s) to kill
-  -j, --job JOB         The job_id of the job to kill
-  -o, --owner OWNER     The owner of the job(s)
-  -m, --machine_type MACHINE_TYPE
-                        The type of machine the job(s) are running on.
-                        This is required if killing a job that is still
-                        entirely in the queue.
-""".format(archive_base=teuthology.config.config.archive_base)
-
-
-def main():
-    args = docopt.docopt(doc)
-    teuthology.kill.main(args)
diff --git a/scripts/lock.py b/scripts/lock.py
deleted file mode 100644 (file)
index 0596f58..0000000
+++ /dev/null
@@ -1,166 +0,0 @@
-import argparse
-import textwrap
-
-import teuthology.lock
-
-
-def _positive_int(string):
-    value = int(string)
-    if value < 1:
-        raise argparse.ArgumentTypeError(
-            '{string} is not positive'.format(string=string))
-    return value
-
-
-def main():
-    teuthology.lock.main(parse_args())
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description='Lock, unlock, or query lock status of machines',
-        epilog=textwrap.dedent('''
-            Examples:
-            teuthology-lock --summary
-            teuthology-lock --lock-many 1 --machine-type vps
-            teuthology-lock --lock -t target.yaml
-            teuthology-lock --list-targets plana01
-            teuthology-lock --list --brief --owner user@host
-            teuthology-lock --brief
-            teuthology-lock --update --status down --desc testing plana01
-        '''),
-        formatter_class=argparse.RawTextHelpFormatter)
-    parser.add_argument(
-        '-v', '--verbose',
-        action='store_true',
-        default=False,
-        help='be more verbose',
-    )
-    group = parser.add_mutually_exclusive_group(required=True)
-    group.add_argument(
-        '--list',
-        action='store_true',
-        default=False,
-        help='Show lock info for machines owned by you, or only machines ' +
-        'specified. Can be restricted by --owner, --status, and --locked.',
-    )
-    group.add_argument(
-        '--brief',
-        action='store_true',
-        default=False,
-        help='Like --list, but with summary instead of detail',
-    )
-    group.add_argument(
-        '--list-targets',
-        action='store_true',
-        default=False,
-        help='Show lock info for all machines, or only machines specified, ' +
-        'in targets: yaml format. Can be restricted by --owner, --status, ' +
-        'and --locked.',
-    )
-    group.add_argument(
-        '--lock',
-        action='store_true',
-        default=False,
-        help='lock particular machines',
-    )
-    group.add_argument(
-        '--unlock',
-        action='store_true',
-        default=False,
-        help='unlock particular machines',
-    )
-    group.add_argument(
-        '--lock-many',
-        dest='num_to_lock',
-        type=_positive_int,
-        help='lock this many machines',
-    )
-    group.add_argument(
-        '--update',
-        action='store_true',
-        default=False,
-        help='update the description or status of some machines',
-    )
-    group.add_argument(
-        '--summary',
-        action='store_true',
-        default=False,
-        help='summarize locked-machine counts by owner',
-    )
-    parser.add_argument(
-        '-a', '--all',
-        action='store_true',
-        default=False,
-        help='list all machines, not just those owned by you',
-    )
-    parser.add_argument(
-        '--owner',
-        default=None,
-        help='owner of the lock(s) (must match to unlock a machine)',
-    )
-    parser.add_argument(
-        '-f',
-        action='store_true',
-        default=False,
-        help="don't exit after the first error, continue locking or " +
-        "unlocking other machines",
-    )
-    parser.add_argument(
-        '--desc',
-        default=None,
-        help='lock description',
-    )
-    parser.add_argument(
-        '--desc-pattern',
-        default=None,
-        help='lock description',
-    )
-    parser.add_argument(
-        '--machine-type',
-        default=None,
-        help='Type of machine to lock, valid choices: mira | plana | ' +
-        'burnupi | vps | saya | tala',
-    )
-    parser.add_argument(
-        '--status',
-        default=None,
-        choices=['up', 'down'],
-        help='whether a machine is usable for testing',
-    )
-    parser.add_argument(
-        '--locked',
-        default=None,
-        choices=['true', 'false'],
-        help='whether a machine is locked',
-    )
-    parser.add_argument(
-        '-t', '--targets',
-        dest='targets',
-        default=None,
-        help='input yaml containing targets',
-    )
-    parser.add_argument(
-        'machines',
-        metavar='MACHINE',
-        default=[],
-        nargs='*',
-        help='machines to operate on',
-    )
-    parser.add_argument(
-        '--os-type',
-        default='ubuntu',
-        help='OS type (distro)',
-    )
-    parser.add_argument(
-        '--os-version',
-        default=None,
-        help='OS (distro) version such as "12.10"',
-    )
-    parser.add_argument(
-        '--downburst-conf',
-        default=None,
-        help='Downburst meta-data yaml file to be used for vps machines',
-    )
-
-    return parser.parse_args()
diff --git a/scripts/ls.py b/scripts/ls.py
deleted file mode 100644 (file)
index f3109b4..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-import argparse
-from teuthology.suite import ls
-
-
-def main():
-    args = parse_args()
-    ls(args.archive_dir, args.verbose)
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(description='List teuthology job results')
-    parser.add_argument(
-        '--archive-dir',
-        metavar='DIR',
-        help='path under which to archive results',
-        required=True,
-    )
-    parser.add_argument(
-        '-v', '--verbose',
-        action='store_true', default=False,
-        help='show reasons tests failed',
-    )
-    return parser.parse_args()
diff --git a/scripts/nuke.py b/scripts/nuke.py
deleted file mode 100644 (file)
index aa9f91d..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-import argparse
-from argparse import RawTextHelpFormatter
-import textwrap
-
-import teuthology.misc
-import teuthology.nuke
-
-
-def main():
-    teuthology.nuke.main(parse_args())
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description='Reset test machines',
-        epilog=textwrap.dedent('''
-        Examples:
-        teuthology-nuke -t target.yaml --unlock --owner user@host
-        teuthology-nuke -t target.yaml --pid 1234 --unlock --owner user@host \n
-        '''),
-        formatter_class=RawTextHelpFormatter)
-    parser.add_argument(
-        '-v', '--verbose',
-        action='store_true', default=None,
-        help='be more verbose'
-    )
-    parser.add_argument(
-        '-t', '--targets',
-        nargs='+',
-        type=teuthology.misc.config_file,
-        action=teuthology.misc.MergeConfig,
-        default={},
-        dest='config',
-        help='yaml config containing machines to nuke',
-    )
-    parser.add_argument(
-        '-a', '--archive',
-        metavar='DIR',
-        help='archive path for a job to kill and nuke',
-    )
-    parser.add_argument(
-        '--owner',
-        help='job owner',
-    )
-    parser.add_argument(
-        '-p',
-        '--pid',
-        type=int,
-        default=False,
-        help='pid of the process to be killed',
-    )
-    parser.add_argument(
-        '-r', '--reboot-all',
-        action='store_true',
-        default=False,
-        help='reboot all machines',
-    )
-    parser.add_argument(
-        '-s', '--synch-clocks',
-        action='store_true',
-        default=False,
-        help='synchronize clocks on all machines',
-    )
-    parser.add_argument(
-        '-u', '--unlock',
-        action='store_true',
-        default=False,
-        help='Unlock each successfully nuked machine, and output targets that'
-        'could not be nuked.'
-    )
-    parser.add_argument(
-        '-n', '--name',
-        metavar='NAME',
-        help='Name of run to cleanup'
-    )
-    parser.add_argument(
-        '-i', '--noipmi',
-        action='store_true', default=False,
-        help='Skip ipmi checking'
-    )
-    return parser.parse_args()
diff --git a/scripts/queue.py b/scripts/queue.py
deleted file mode 100644 (file)
index 324aade..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-import docopt
-
-import teuthology.config
-import teuthology.beanstalk
-
-doc = """
-usage: teuthology-queue -h
-       teuthology-queue [-d] -m MACHINE_TYPE
-       teuthology-queue [-r] -m MACHINE_TYPE
-       teuthology-queue -m MACHINE_TYPE -D PATTERN
-
-List Jobs in queue:
-  If -d then jobs with PATTERN in the job name are deleted from the queue.
-
-Arguments:
-  -m, --machine_type MACHINE_TYPE
-                        Which machine type queue to work on.
-
-optional arguments:
-  -h, --help            Show this help message and exit
-  -D, --delete PATTERN  Delete Jobs with PATTERN in their name
-  -d, --description     Show job descriptions
-  -r, --runs            Only show run names
-
-
-""".format(archive_base=teuthology.config.config.archive_base)
-
-
-def main():
-    args = docopt.docopt(doc)
-    teuthology.beanstalk.main(args)
diff --git a/scripts/report.py b/scripts/report.py
deleted file mode 100644 (file)
index d2b39d3..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-import docopt
-
-import teuthology.report
-
-doc = """
-usage:
-    teuthology-report -h
-    teuthology-report [-v] [-R] [-n] [-s SERVER] [-a ARCHIVE] [-D] -r RUN ...
-    teuthology-report [-v] [-s SERVER] [-a ARCHIVE] [-D] -r RUN -j JOB ...
-    teuthology-report [-v] [-R] [-n] [-s SERVER] [-a ARCHIVE] --all-runs
-
-Submit test results to a web service
-
-optional arguments:
-  -h, --help            show this help message and exit
-  -a ARCHIVE, --archive ARCHIVE
-                        The base archive directory
-                        [default: {archive_base}]
-  -r [RUN ...], --run [RUN ...]
-                        A run (or list of runs) to submit
-  -j [JOB ...], --job [JOB ...]
-                        A job (or list of jobs) to submit
-  --all-runs            Submit all runs in the archive
-  -R, --refresh         Re-push any runs already stored on the server. Note
-                        that this may be slow.
-  -s SERVER, --server SERVER
-                        "The server to post results to, e.g.
-                        http://localhost:8080/ . May also be specified in
-                        ~/.teuthology.yaml as 'results_server'
-  -n, --no-save         By default, when submitting all runs, we remember the
-                        last successful submission in a file called
-                        'last_successful_run'. Pass this flag to disable that
-                        behavior.
-  -D, --dead            Mark all given jobs (or entire runs) with status
-                        'dead'. Implies --refresh.
-  -v, --verbose         be more verbose
-""".format(archive_base=teuthology.config.config.archive_base)
-
-
-def main():
-    args = docopt.docopt(doc)
-    teuthology.report.main(args)
diff --git a/scripts/results.py b/scripts/results.py
deleted file mode 100644 (file)
index b857c53..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-import argparse
-
-import teuthology.results
-
-
-def main():
-    teuthology.results.main(parse_args())
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description='Email teuthology suite results')
-    parser.add_argument(
-        '--email',
-        help='address to email test failures to',
-    )
-    parser.add_argument(
-        '--timeout',
-        help='how many seconds to wait for all tests to finish (default no ' +
-        'wait)',
-        type=int,
-        default=0,
-    )
-    parser.add_argument(
-        '--archive-dir',
-        metavar='DIR',
-        help='path under which results for the suite are stored',
-        required=True,
-    )
-    parser.add_argument(
-        '--name',
-        help='name of the suite',
-        required=True,
-    )
-    parser.add_argument(
-        '-v', '--verbose',
-        action='store_true', default=False,
-        help='be more verbose',
-    )
-    return parser.parse_args()
diff --git a/scripts/run.py b/scripts/run.py
deleted file mode 100644 (file)
index 5eee377..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-import argparse
-
-import teuthology.misc
-import teuthology.run
-
-
-def main():
-    teuthology.run.main(parse_args())
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(description='Run ceph integration tests')
-    parser.add_argument(
-        '-v', '--verbose',
-        action='store_true', default=None,
-        help='be more verbose',
-    )
-    parser.add_argument(
-        'config',
-        metavar='CONFFILE',
-        nargs='+',
-        type=teuthology.misc.config_file,
-        action=teuthology.misc.MergeConfig,
-        default={},
-        help='config file to read',
-    )
-    parser.add_argument(
-        '-a', '--archive',
-        metavar='DIR',
-        help='path to archive results in',
-    )
-    parser.add_argument(
-        '--description',
-        help='job description',
-    )
-    parser.add_argument(
-        '--owner',
-        help='job owner',
-    )
-    parser.add_argument(
-        '--lock',
-        action='store_true',
-        default=False,
-        help='lock machines for the duration of the run',
-    )
-    parser.add_argument(
-        '--machine-type',
-        default=None,
-        help='Type of machine to lock/run tests on.',
-    )
-    parser.add_argument(
-        '--os-type',
-        default='ubuntu',
-        help='Distro/OS of machine to run test on.',
-    )
-    parser.add_argument(
-        '--os-version',
-        default=None,
-        help='Distro/OS version of machine to run test on.',
-    )
-    parser.add_argument(
-        '--block',
-        action='store_true',
-        default=False,
-        help='block until locking machines succeeds (use with --lock)',
-    )
-    parser.add_argument(
-        '--name',
-        metavar='NAME',
-        help='name for this teuthology run',
-    )
-
-    return parser.parse_args()
diff --git a/scripts/schedule.py b/scripts/schedule.py
deleted file mode 100644 (file)
index 62732f2..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-import argparse
-
-import teuthology.misc
-import teuthology.schedule
-
-
-def main():
-    teuthology.schedule.main(parse_args())
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description='Schedule ceph integration tests')
-    parser.add_argument(
-        'config',
-        metavar='CONFFILE',
-        nargs='*',
-        type=teuthology.misc.config_file,
-        action=teuthology.misc.MergeConfig,
-        default={},
-        help='config file to read',
-    )
-    parser.add_argument(
-        '--name',
-        help='name of suite run the job is part of',
-    )
-    parser.add_argument(
-        '--last-in-suite',
-        action='store_true',
-        default=False,
-        help='mark the last job in a suite so suite post-processing can be ' +
-        'run',
-    )
-    parser.add_argument(
-        '--email',
-        help='where to send the results of a suite (only applies to the ' +
-        'last job in a suite)',
-    )
-    parser.add_argument(
-        '--timeout',
-        help='how many seconds to wait for jobs to finish before emailing ' +
-        'results (only applies to the last job in a suite',
-        type=int,
-    )
-    parser.add_argument(
-        '--description',
-        help='job description',
-    )
-    parser.add_argument(
-        '--owner',
-        help='job owner',
-    )
-    parser.add_argument(
-        '--delete',
-        metavar='JOBID',
-        type=int,
-        nargs='*',
-        help='list of jobs to remove from the queue',
-    )
-    parser.add_argument(
-        '-n', '--num',
-        default=1,
-        type=int,
-        help='number of times to run/queue the job'
-    )
-    parser.add_argument(
-        '-p', '--priority',
-        default=1000,
-        type=int,
-        help='beanstalk priority (lower is sooner)'
-    )
-    parser.add_argument(
-        '-v', '--verbose',
-        action='store_true',
-        default=False,
-        help='be more verbose',
-    )
-    parser.add_argument(
-        '-w', '--worker',
-        default='plana',
-        help='which worker to use (type of machine)',
-    )
-    parser.add_argument(
-        '-s', '--show',
-        metavar='JOBID',
-        type=int,
-        nargs='*',
-        help='output the contents of specified jobs in the queue',
-    )
-
-    args = parser.parse_args()
-
-    if not args.last_in_suite:
-        msg = '--email is only applicable to the last job in a suite'
-        assert not args.email, msg
-        msg = '--timeout is only applicable to the last job in a suite'
-        assert not args.timeout, msg
-
-    return args
diff --git a/scripts/suite.py b/scripts/suite.py
deleted file mode 100644 (file)
index 6a7336a..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-import argparse
-
-import teuthology.suite
-
-
-def main():
-    teuthology.suite.main(parse_args())
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(description="""
-Run a suite of ceph integration tests.
-
-A suite is a set of collections.
-
-A collection is a directory containing facets.
-
-A facet is a directory containing config snippets.
-
-Running a collection means running teuthology for every configuration
-combination generated by taking one config snippet from each facet.
-
-Any config files passed on the command line will be used for every
-combination, and will override anything in the suite.
-""")
-    parser.add_argument(
-        '-v', '--verbose',
-        action='store_true', default=None,
-        help='be more verbose',
-    )
-    parser.add_argument(
-        '--dry-run',
-        action='store_true', default=None,
-        help='do a dry run; do not schedule anything',
-    )
-    parser.add_argument(
-        '--name',
-        help='name for this suite',
-        required=True,
-    )
-    parser.add_argument(
-        '--base',
-        default=None,
-        help='base directory for the collection(s)'
-    )
-    parser.add_argument(
-        '--collections',
-        metavar='DIR',
-        nargs='+',
-        required=True,
-        help='the collections to run',
-    )
-    parser.add_argument(
-        '--owner',
-        help='job owner',
-    )
-    parser.add_argument(
-        '--email',
-        help='address to email test failures to',
-    )
-    parser.add_argument(
-        '--timeout',
-        help='how many seconds to wait for jobs to finish before emailing ' +
-        'results',
-    )
-    parser.add_argument(
-        '-n', '--num',
-        default=1,
-        type=int,
-        help='number of times to run/queue each job'
-    )
-    parser.add_argument(
-        '-p', '--priority',
-        default=1000,
-        type=int,
-        help='queue priority (lower value is higher priority)'
-    )
-    parser.add_argument(
-        '-l', '--limit',
-        default=0,
-        type=int,
-        help='limit number of jobs in loop to N'
-    )
-    parser.add_argument(
-        '-w', '--worker',
-        default='plana',
-        help='which worker to use (type of machine)',
-    )
-    parser.add_argument(
-        'config',
-        metavar='CONFFILE',
-        nargs='*',
-        default=[],
-        help='config file to read',
-    )
-
-    return parser.parse_args()
diff --git a/scripts/test/script.py b/scripts/test/script.py
deleted file mode 100644 (file)
index 4f5aa7a..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-import subprocess
-from pytest import raises
-
-
-class Script(object):
-    script_name = 'teuthology'
-
-    def test_help(self):
-        args = (self.script_name, '--help')
-        out = subprocess.check_output(args)
-        assert out.startswith('usage')
-
-    def test_invalid(self):
-        args = (self.script_name, 'INVALID')
-        with raises(subprocess.CalledProcessError):
-            subprocess.check_call(args)
diff --git a/scripts/test/test_coverage.py b/scripts/test/test_coverage.py
deleted file mode 100644 (file)
index a04b9e8..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-from script import Script
-
-
-class TestCoverage(Script):
-    script_name = 'teuthology-coverage'
diff --git a/scripts/test/test_lock.py b/scripts/test/test_lock.py
deleted file mode 100644 (file)
index 3fc803a..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-from script import Script
-
-
-class TestLock(Script):
-    script_name = 'teuthology-lock'
diff --git a/scripts/test/test_ls.py b/scripts/test/test_ls.py
deleted file mode 100644 (file)
index 6de10ce..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-from script import Script
-
-
-class TestLs(Script):
-    script_name = 'teuthology-ls'
diff --git a/scripts/test/test_nuke.py b/scripts/test/test_nuke.py
deleted file mode 100644 (file)
index fa615c4..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-from script import Script
-
-
-class TestNuke(Script):
-    script_name = 'teuthology-nuke'
diff --git a/scripts/test/test_report.py b/scripts/test/test_report.py
deleted file mode 100644 (file)
index c8065fd..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-from script import Script
-
-
-class TestReport(Script):
-    script_name = 'teuthology-report'
diff --git a/scripts/test/test_results.py b/scripts/test/test_results.py
deleted file mode 100644 (file)
index a97981c..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-from script import Script
-
-
-class TestResults(Script):
-    script_name = 'teuthology-results'
diff --git a/scripts/test/test_run.py b/scripts/test/test_run.py
deleted file mode 100644 (file)
index 36d42c3..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-from script import Script
-
-
-class TestRun(Script):
-    script_name = 'teuthology'
diff --git a/scripts/test/test_schedule.py b/scripts/test/test_schedule.py
deleted file mode 100644 (file)
index e89f983..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-from script import Script
-
-
-class TestSchedule(Script):
-    script_name = 'teuthology-schedule'
diff --git a/scripts/test/test_suite.py b/scripts/test/test_suite.py
deleted file mode 100644 (file)
index 062aba4..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-from script import Script
-
-
-class TestSuite(Script):
-    script_name = 'teuthology-suite'
diff --git a/scripts/test/test_updatekeys.py b/scripts/test/test_updatekeys.py
deleted file mode 100644 (file)
index c4122b0..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-from script import Script
-import subprocess
-from pytest import raises
-from pytest import skip
-
-
-class TestUpdatekeys(Script):
-    script_name = 'teuthology-updatekeys'
-
-    def test_invalid(self):
-        skip("teuthology.lock needs to be partially refactored to allow" +
-             "teuthology-updatekeys to return nonzero in all erorr cases")
-
-    def test_all_and_targets(self):
-        args = (self.script_name, '-a', '-t', 'foo')
-        with raises(subprocess.CalledProcessError):
-            subprocess.check_call(args)
-
-    def test_no_args(self):
-        with raises(subprocess.CalledProcessError):
-            subprocess.check_call(self.script_name)
diff --git a/scripts/test/test_worker.py b/scripts/test/test_worker.py
deleted file mode 100644 (file)
index 8e76c43..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-from script import Script
-
-
-class TestWorker(Script):
-    script_name = 'teuthology-worker'
diff --git a/scripts/updatekeys.py b/scripts/updatekeys.py
deleted file mode 100644 (file)
index d2ac6cc..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-import argparse
-import sys
-
-import teuthology.lock
-
-
-def main():
-    status = teuthology.lock.updatekeys(parse_args())
-    sys.exit(status)
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(description="""
-Update any hostkeys that have changed. You can list specific machines
-to run on, or use -a to check all of them automatically.
-""")
-    parser.add_argument(
-        '-v', '--verbose',
-        action='store_true',
-        default=False,
-        help='be more verbose',
-    )
-    group = parser.add_mutually_exclusive_group()
-    group.add_argument(
-        '-t', '--targets',
-        default=None,
-        help='input yaml containing targets to check',
-    )
-    group.add_argument(
-        '-a', '--all',
-        action='store_true',
-        default=False,
-        help='update hostkeys of all machines in the db',
-    )
-    group.add_argument(
-        'machines',
-        metavar='MACHINES',
-        default=[],
-        nargs='*',
-        help='hosts to check for updated keys',
-    )
-
-    args = parser.parse_args()
-
-    if not (args.all or args.targets or args.machines):
-        parser.print_usage()
-        print "{name}: error: You must specify machines to update".format(
-            name='teuthology-updatekeys')
-        sys.exit(2)
-
-    return args
diff --git a/scripts/worker.py b/scripts/worker.py
deleted file mode 100644 (file)
index a3e12c2..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-import argparse
-
-import teuthology.worker
-
-
-def main():
-    teuthology.worker.main(parse_args())
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(description="""
-Grab jobs from a beanstalk queue and run the teuthology tests they
-describe. One job is run at a time.
-""")
-    parser.add_argument(
-        '-v', '--verbose',
-        action='store_true', default=None,
-        help='be more verbose',
-    )
-    parser.add_argument(
-        '--archive-dir',
-        metavar='DIR',
-        help='path under which to archive results',
-        required=True,
-    )
-    parser.add_argument(
-        '-l', '--log-dir',
-        help='path in which to store logs',
-        required=True,
-    )
-    parser.add_argument(
-        '-t', '--tube',
-        help='which beanstalk tube to read jobs from',
-        required=True,
-    )
-
-    return parser.parse_args()
diff --git a/setup.py b/setup.py
deleted file mode 100644 (file)
index 2354e68..0000000
--- a/setup.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from setuptools import setup, find_packages
-
-setup(
-    name='teuthology',
-    version='0.0.1',
-    packages=find_packages(),
-
-    author='Tommi Virtanen',
-    author_email='tommi.virtanen@dreamhost.com',
-    description='Ceph test runner',
-    license='MIT',
-    keywords='ceph testing ssh cluster',
-
-    # to find the code associated with entry point
-    # A.B:foo first cd into directory A, open file B
-    # and find sub foo
-    entry_points={
-        'console_scripts': [
-            'teuthology = scripts.run:main',
-            'teuthology-nuke = scripts.nuke:main',
-            'teuthology-suite = scripts.suite:main',
-            'teuthology-ls = scripts.ls:main',
-            'teuthology-worker = scripts.worker:main',
-            'teuthology-lock = scripts.lock:main',
-            'teuthology-schedule = scripts.schedule:main',
-            'teuthology-updatekeys = scripts.updatekeys:main',
-            'teuthology-coverage = scripts.coverage:main',
-            'teuthology-results = scripts.results:main',
-            'teuthology-report = scripts.report:main',
-            'teuthology-kill = scripts.kill:main',
-            'teuthology-queue = scripts.queue:main',
-            ],
-        },
-
-    )
diff --git a/tasks/__init__.py b/tasks/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tasks/admin_socket.py b/tasks/admin_socket.py
new file mode 100644 (file)
index 0000000..20a6701
--- /dev/null
@@ -0,0 +1,192 @@
+"""
+Admin Socket task -- used in rados, powercycle, and smoke testing
+"""
+from cStringIO import StringIO
+
+import json
+import logging
+import os
+import time
+
+from ..orchestra import run
+from teuthology import misc as teuthology
+from teuthology.parallel import parallel
+
+log = logging.getLogger(__name__)
+
+
+def task(ctx, config):
+    """
+    Run an admin socket command, make sure the output is json, and run
+    a test program on it. The test program should read json from
+    stdin. This task succeeds if the test program exits with status 0.
+
+    To run the same test on all clients::
+
+        tasks:
+        - ceph:
+        - rados:
+        - admin_socket:
+            all:
+              dump_requests:
+                test: http://example.com/script
+
+    To restrict it to certain clients::
+
+        tasks:
+        - ceph:
+        - rados: [client.1]
+        - admin_socket:
+            client.1:
+              dump_requests:
+                test: http://example.com/script
+
+    If an admin socket command has arguments, they can be specified as
+    a list::
+
+        tasks:
+        - ceph:
+        - rados: [client.0]
+        - admin_socket:
+            client.0:
+              dump_requests:
+                test: http://example.com/script
+              help:
+                test: http://example.com/test_help_version
+                args: [version]
+
+    Note that there must be a ceph client with an admin socket running
+    before this task is run. The tests are parallelized at the client
+    level. Tests for a single client are run serially.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    assert isinstance(config, dict), \
+        'admin_socket task requires a dict for configuration'
+    teuthology.replace_all_with_clients(ctx.cluster, config)
+
+    with parallel() as ptask:
+        for client, tests in config.iteritems():
+            ptask.spawn(_run_tests, ctx, client, tests)
+
+
+def _socket_command(ctx, remote, socket_path, command, args):
+    """
+    Run an admin socket command and return the result as a string.
+
+    :param ctx: Context
+    :param remote: Remote site
+    :param socket_path: path to socket
+    :param command: command to be run remotely
+    :param args: command arguments
+
+    :returns: output of command in json format
+    """
+    json_fp = StringIO()
+    testdir = teuthology.get_testdir(ctx)
+    max_tries = 60
+    while True:
+        proc = remote.run(
+            args=[
+                'sudo',
+                'adjust-ulimits',
+                'ceph-coverage',
+                '{tdir}/archive/coverage'.format(tdir=testdir),
+                'ceph',
+                '--admin-daemon', socket_path,
+                ] + command.split(' ') + args,
+            stdout=json_fp,
+            check_status=False,
+            )
+        if proc.exitstatus == 0:
+            break
+        assert max_tries > 0
+        max_tries -= 1
+        log.info('ceph cli returned an error, command not registered yet?')
+        log.info('sleeping and retrying ...')
+        time.sleep(1)
+    out = json_fp.getvalue()
+    json_fp.close()
+    log.debug('admin socket command %s returned %s', command, out)
+    return json.loads(out)
+
+def _run_tests(ctx, client, tests):
+    """
+    Create a temp directory and wait for a client socket to be created.
+    For each test, copy the executable locally and run the test.
+    Remove temp directory when finished.
+
+    :param ctx: Context
+    :param client: client machine to run the test
+    :param tests: list of tests to run
+    """
+    testdir = teuthology.get_testdir(ctx)
+    log.debug('Running admin socket tests on %s', client)
+    (remote,) = ctx.cluster.only(client).remotes.iterkeys()
+    socket_path = '/var/run/ceph/ceph-{name}.asok'.format(name=client)
+    overrides = ctx.config.get('overrides', {}).get('admin_socket', {})
+
+    try:
+        tmp_dir = os.path.join(
+            testdir,
+            'admin_socket_{client}'.format(client=client),
+            )
+        remote.run(
+            args=[
+                'mkdir',
+                '--',
+                tmp_dir,
+                run.Raw('&&'),
+                # wait for client process to create the socket
+                'while', 'test', '!', '-e', socket_path, run.Raw(';'),
+                'do', 'sleep', '1', run.Raw(';'), 'done',
+                ],
+            )
+
+        for command, config in tests.iteritems():
+            if config is None:
+                config = {}
+            teuthology.deep_merge(config, overrides)
+            log.debug('Testing %s with config %s', command, str(config))
+
+            test_path = None
+            if 'test' in config:
+                url = config['test'].format(
+                    branch=config.get('branch', 'master')
+                    )
+                test_path = os.path.join(tmp_dir, command)
+                remote.run(
+                    args=[
+                        'wget',
+                        '-q',
+                        '-O',
+                        test_path,
+                        '--',
+                        url,
+                        run.Raw('&&'),
+                        'chmod',
+                        'u=rx',
+                        '--',
+                        test_path,
+                        ],
+                    )
+
+            args = config.get('args', [])
+            assert isinstance(args, list), \
+                'admin socket command args must be a list'
+            sock_out = _socket_command(ctx, remote, socket_path, command, args)
+            if test_path is not None:
+                remote.run(
+                    args=[
+                        test_path,
+                        ],
+                    stdin=json.dumps(sock_out),
+                    )
+
+    finally:
+        remote.run(
+            args=[
+                'rm', '-rf', '--', tmp_dir,
+                ],
+            )
diff --git a/tasks/apache.conf.template b/tasks/apache.conf.template
new file mode 100644 (file)
index 0000000..c6fc662
--- /dev/null
@@ -0,0 +1,42 @@
+<IfModule !env_module>
+  LoadModule env_module {mod_path}/mod_env.so
+</IfModule>
+<IfModule !rewrite_module>
+  LoadModule rewrite_module {mod_path}/mod_rewrite.so
+</IfModule>
+<IfModule !fastcgi_module>
+  LoadModule fastcgi_module {mod_path}/mod_fastcgi.so
+</IfModule>
+<IfModule !log_config_module>
+  LoadModule log_config_module {mod_path}/mod_log_config.so
+</IfModule>
+
+Listen {port}
+ServerName {host}
+
+ServerRoot {testdir}/apache
+ErrorLog {testdir}/archive/apache.{client}/error.log
+LogFormat "%h l %u %t \"%r\" %>s %b \"{{Referer}}i\" \"%{{User-agent}}i\"" combined
+CustomLog {testdir}/archive/apache.{client}/access.log combined
+PidFile {testdir}/apache/tmp.{client}/apache.pid
+DocumentRoot {testdir}/apache/htdocs.{client}
+FastCgiIPCDir {testdir}/apache/tmp.{client}/fastcgi_sock
+FastCgiExternalServer {testdir}/apache/htdocs.{client}/rgw.fcgi -socket rgw_sock -idle-timeout {idle_timeout}
+RewriteEngine On
+
+RewriteRule ^/([a-zA-Z0-9-_.]*)([/]?.*) /rgw.fcgi?page=$1&params=$2&%{{QUERY_STRING}} [E=HTTP_AUTHORIZATION:%{{HTTP:Authorization}},L]
+
+# Set fastcgi environment variables.
+# Note that this is separate from Unix environment variables!
+SetEnv RGW_LOG_LEVEL 20
+SetEnv RGW_SHOULD_LOG yes
+SetEnv RGW_PRINT_CONTINUE {print_continue}
+
+<Directory {testdir}/apache/htdocs.{client}>
+  Options +ExecCGI
+  AllowOverride All
+  SetHandler fastcgi-script
+</Directory>
+
+AllowEncodedSlashes On
+ServerSignature Off
diff --git a/tasks/autotest.py b/tasks/autotest.py
new file mode 100644 (file)
index 0000000..24a7675
--- /dev/null
@@ -0,0 +1,166 @@
+""" 
+Run an autotest test on the ceph cluster.
+"""
+import json
+import logging
+import os
+
+from teuthology import misc as teuthology
+from teuthology.parallel import parallel
+from ..orchestra import run
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Run an autotest test on the ceph cluster.
+
+    Only autotest client tests are supported.
+
+    The config is a mapping from role name to list of tests to run on
+    that client.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - ceph-fuse: [client.0, client.1]
+        - autotest:
+            client.0: [dbench]
+            client.1: [bonnie]
+
+    You can also specify a list of tests to run on all clients::
+
+        tasks:
+        - ceph:
+        - ceph-fuse:
+        - autotest:
+            all: [dbench]
+    """
+    assert isinstance(config, dict)
+    config = teuthology.replace_all_with_clients(ctx.cluster, config)
+    log.info('Setting up autotest...')
+    testdir = teuthology.get_testdir(ctx)
+    with parallel() as p:
+        for role in config.iterkeys():
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            p.spawn(_download, testdir, remote)
+
+    log.info('Making a separate scratch dir for every client...')
+    for role in config.iterkeys():
+        assert isinstance(role, basestring)
+        PREFIX = 'client.'
+        assert role.startswith(PREFIX)
+        id_ = role[len(PREFIX):]
+        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
+        mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+        scratch = os.path.join(mnt, 'client.{id}'.format(id=id_))
+        remote.run(
+            args=[
+                'sudo',
+                'install',
+                '-d',
+                '-m', '0755',
+                '--owner={user}'.format(user='ubuntu'), #TODO
+                '--',
+                scratch,
+                ],
+            )
+
+    with parallel() as p:
+        for role, tests in config.iteritems():
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            p.spawn(_run_tests, testdir, remote, role, tests)
+
+def _download(testdir, remote):
+    """
+    Download.  Does not explicitly support muliple tasks in a single run.
+    """
+    remote.run(
+        args=[
+            # explicitly does not support multiple autotest tasks
+            # in a single run; the result archival would conflict
+            'mkdir', '{tdir}/archive/autotest'.format(tdir=testdir),
+            run.Raw('&&'),
+            'mkdir', '{tdir}/autotest'.format(tdir=testdir),
+            run.Raw('&&'),
+            'wget',
+            '-nv',
+            '--no-check-certificate',
+            'https://github.com/ceph/autotest/tarball/ceph',
+            '-O-',
+            run.Raw('|'),
+            'tar',
+            '-C', '{tdir}/autotest'.format(tdir=testdir),
+            '-x',
+            '-z',
+            '-f-',
+            '--strip-components=1',
+            ],
+        )
+
+def _run_tests(testdir, remote, role, tests):
+    """
+    Spawned to run test on remote site
+    """
+    assert isinstance(role, basestring)
+    PREFIX = 'client.'
+    assert role.startswith(PREFIX)
+    id_ = role[len(PREFIX):]
+    mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+    scratch = os.path.join(mnt, 'client.{id}'.format(id=id_))
+
+    assert isinstance(tests, list)
+    for idx, testname in enumerate(tests):
+        log.info('Running autotest client test #%d: %s...', idx, testname)
+
+        tag = 'client.{id}.num{idx}.{testname}'.format(
+            idx=idx,
+            testname=testname,
+            id=id_,
+            )
+        control = '{tdir}/control.{tag}'.format(tdir=testdir, tag=tag)
+        teuthology.write_file(
+            remote=remote,
+            path=control,
+            data='import json; data=json.loads({data!r}); job.run_test(**data)'.format(
+                data=json.dumps(dict(
+                        url=testname,
+                        dir=scratch,
+                        # TODO perhaps tag
+                        # results will be in {testdir}/autotest/client/results/dbench
+                        # or {testdir}/autotest/client/results/dbench.{tag}
+                        )),
+                ),
+            )
+        remote.run(
+            args=[
+                '{tdir}/autotest/client/bin/autotest'.format(tdir=testdir),
+                '--verbose',
+                '--harness=simple',
+                '--tag={tag}'.format(tag=tag),
+                control,
+                run.Raw('3>&1'),
+                ],
+            )
+
+        remote.run(
+            args=[
+                'rm', '-rf', '--', control,
+                ],
+            )
+
+        remote.run(
+            args=[
+                'mv',
+                '--',
+                '{tdir}/autotest/client/results/{tag}'.format(tdir=testdir, tag=tag),
+                '{tdir}/archive/autotest/{tag}'.format(tdir=testdir, tag=tag),
+                ],
+            )
+
+    remote.run(
+        args=[
+            'rm', '-rf', '--', '{tdir}/autotest'.format(tdir=testdir),
+            ],
+        )
diff --git a/tasks/blktrace.py b/tasks/blktrace.py
new file mode 100644 (file)
index 0000000..208bfd5
--- /dev/null
@@ -0,0 +1,93 @@
+"""
+Run blktrace program through teuthology
+"""
+import contextlib
+import logging
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from ..orchestra import run 
+
+log = logging.getLogger(__name__)
+blktrace = '/usr/sbin/blktrace'
+daemon_signal = 'term'
+
+@contextlib.contextmanager
+def setup(ctx, config):
+    """
+    Setup all the remotes
+    """
+    osds = ctx.cluster.only(teuthology.is_type('osd'))
+    log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=teuthology.get_testdir(ctx))
+
+    for remote, roles_for_host in osds.remotes.iteritems():
+        log.info('Creating %s on %s' % (log_dir, remote.name))
+        remote.run(
+            args=['mkdir', '-p', '-m0755', '--', log_dir],
+            wait=False,
+            )
+    yield
+
+@contextlib.contextmanager
+def execute(ctx, config):
+    """
+    Run the blktrace program on remote machines.
+    """
+    procs = []
+    testdir = teuthology.get_testdir(ctx)
+    log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir)
+
+    osds = ctx.cluster.only(teuthology.is_type('osd'))
+    for remote, roles_for_host in osds.remotes.iteritems():
+        roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote]
+        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
+            if roles_to_devs.get(id_):
+                dev = roles_to_devs[id_]
+                log.info("running blktrace on %s: %s" % (remote.name, dev))
+
+                proc = remote.run(
+                    args=[
+                        'cd',
+                        log_dir,
+                        run.Raw(';'),
+                        'daemon-helper',
+                        daemon_signal,
+                        'sudo',
+                        blktrace,
+                        '-o',
+                        dev.rsplit("/", 1)[1],
+                        '-d',
+                        dev,
+                        ],
+                    wait=False,   
+                    stdin=run.PIPE,
+                    )
+                procs.append(proc)
+    try:
+        yield
+    finally:
+        osds = ctx.cluster.only(teuthology.is_type('osd'))
+        log.info('stopping blktrace processs')
+        for proc in procs:
+            proc.stdin.close()
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Usage:
+        blktrace:
+      
+    Runs blktrace on all clients.
+    """
+    if config is None:
+        config = dict(('client.{id}'.format(id=id_), None)
+                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client'))
+    elif isinstance(config, list):
+        config = dict.fromkeys(config)
+
+    with contextutil.nested(
+        lambda: setup(ctx=ctx, config=config),
+        lambda: execute(ctx=ctx, config=config),
+        ):
+        yield
+
diff --git a/tasks/calamari/http_client.py b/tasks/calamari/http_client.py
new file mode 100755 (executable)
index 0000000..84a03c7
--- /dev/null
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+
+import json
+import logging
+import requests
+
+log = logging.getLogger(__name__)
+
+
+class AuthenticatedHttpClient(requests.Session):
+    """
+    Client for the calamari REST API, principally exists to do
+    authentication, but also helpfully prefixes
+    URLs in requests with the API base URL and JSONizes
+    POST data.
+    """
+    def __init__(self, api_url, username, password):
+        super(AuthenticatedHttpClient, self).__init__()
+        self._username = username
+        self._password = password
+        self._api_url = api_url
+        self.headers = {
+            'Content-type': "application/json; charset=UTF-8"
+        }
+
+    def request(self, method, url, **kwargs):
+        if not url.startswith('/'):
+            url = self._api_url + url
+        response = super(AuthenticatedHttpClient, self).request(method, url, **kwargs)
+        if response.status_code >= 400:
+            # For the benefit of test logs
+            print "%s: %s" % (response.status_code, response.content)
+        return response
+
+    def post(self, url, data=None, **kwargs):
+        if isinstance(data, dict):
+            data = json.dumps(data)
+        return super(AuthenticatedHttpClient, self).post(url, data, **kwargs)
+
+    def patch(self, url, data=None, **kwargs):
+        if isinstance(data, dict):
+            data = json.dumps(data)
+        return super(AuthenticatedHttpClient, self).patch(url, data, **kwargs)
+
+    def login(self):
+        """
+        Authenticate with the Django auth system as
+        it is exposed in the Calamari REST API.
+        """
+        log.info("Logging in as %s" % self._username)
+        response = self.get("auth/login/")
+        response.raise_for_status()
+        self.headers['X-XSRF-TOKEN'] = response.cookies['XSRF-TOKEN']
+
+        self.post("auth/login/", {
+            'next': "/",
+            'username': self._username,
+            'password': self._password
+        })
+        response.raise_for_status()
+
+        # Check we're allowed in now.
+        response = self.get("cluster")
+        response.raise_for_status()
+
+if __name__ == "__main__":
+
+    import argparse
+
+    p = argparse.ArgumentParser()
+    p.add_argument('-u', '--uri', default='http://mira035/api/v1/')
+    p.add_argument('--user', default='admin')
+    p.add_argument('--pass', dest='password', default='admin')
+    args, remainder = p.parse_known_args()
+
+    c = AuthenticatedHttpClient(args.uri, args.user, args.password)
+    c.login()
+    response = c.request('GET', ''.join(remainder)).json()
+    print json.dumps(response, indent=2)
diff --git a/tasks/calamari/servertest_1_0.py b/tasks/calamari/servertest_1_0.py
new file mode 100755 (executable)
index 0000000..b9b07a3
--- /dev/null
@@ -0,0 +1,269 @@
+#!/usr/bin/env python
+
+import datetime
+import os
+import logging
+import logging.handlers
+import requests
+import uuid
+import unittest
+from http_client import AuthenticatedHttpClient
+
+log = logging.getLogger(__name__)
+log.addHandler(logging.StreamHandler())
+log.setLevel(logging.INFO)
+
+global base_uri
+global client
+base_uri = None
+server_uri = None
+client = None
+
+def setUpModule():
+    global base_uri
+    global server_uri
+    global client
+    try:
+        base_uri = os.environ['CALAMARI_BASE_URI']
+    except KeyError:
+        log.error('Must define CALAMARI_BASE_URI')
+        os._exit(1)
+    if not base_uri.endswith('/'):
+        base_uri += '/'
+    if not base_uri.endswith('api/v1/'):
+        base_uri += 'api/v1/'
+    client = AuthenticatedHttpClient(base_uri, 'admin', 'admin')
+    server_uri = base_uri.replace('api/v1/', '')
+    client.login()
+
+class RestTest(unittest.TestCase):
+    'Base class for all tests here; get class\'s data'
+
+    def setUp(self):
+        # Called once for each test_* case.  A bit wasteful, but we
+        # really like using the simple class variable self.uri
+        # to customize each derived TestCase
+        method = getattr(self, 'method', 'GET')
+        raw = self.uri.startswith('/')
+        self.response = self.get_object(method, self.uri, raw=raw)
+
+    def get_object(self, method, url, raw=False):
+        global server_uri
+        'Return Python object decoded from JSON response to method/url'
+        if not raw:
+            return client.request(method, url).json()
+        else:
+            return requests.request(method, server_uri + url).json()
+
+class TestUserMe(RestTest):
+
+    uri = 'user/me'
+
+    def test_me(self):
+        self.assertEqual(self.response['username'], 'admin')
+
+class TestCluster(RestTest):
+
+    uri = 'cluster'
+
+    def test_id(self):
+        self.assertEqual(self.response[0]['id'], 1)
+
+    def test_times(self):
+        for time in (
+            self.response[0]['cluster_update_time'],
+            self.response[0]['cluster_update_attempt_time'],
+        ):
+            self.assertTrue(is_datetime(time))
+
+    def test_api_base_url(self):
+        api_base_url = self.response[0]['api_base_url']
+        self.assertTrue(api_base_url.startswith('http'))
+        self.assertIn('api/v0.1', api_base_url)
+
+class TestHealth(RestTest):
+
+    uri = 'cluster/1/health'
+
+    def test_cluster(self):
+        self.assertEqual(self.response['cluster'], 1)
+
+    def test_times(self):
+        for time in (
+            self.response['cluster_update_time'],
+            self.response['added'],
+        ):
+            self.assertTrue(is_datetime(time))
+
+    def test_report_and_overall_status(self):
+        self.assertIn('report', self.response)
+        self.assertIn('overall_status', self.response['report'])
+
+class TestHealthCounters(RestTest):
+
+    uri = 'cluster/1/health_counters'
+
+    def test_cluster(self):
+        self.assertEqual(self.response['cluster'], 1)
+
+    def test_time(self):
+        self.assertTrue(is_datetime(self.response['cluster_update_time']))
+
+    def test_existence(self):
+        for section in ('pg', 'mon', 'osd'):
+            for counter in ('warn', 'critical', 'ok'):
+                count = self.response[section][counter]['count']
+                self.assertIsInstance(count, int)
+        self.assertIsInstance(self.response['pool']['total'], int)
+
+    def test_mds_sum(self):
+        count = self.response['mds']
+        self.assertEqual(
+            count['up_not_in'] + count['not_up_not_in'] + count['up_in'],
+            count['total']
+        )
+
+class TestSpace(RestTest):
+
+    uri = 'cluster/1/space'
+
+    def test_cluster(self):
+        self.assertEqual(self.response['cluster'], 1)
+
+    def test_times(self):
+        for time in (
+            self.response['cluster_update_time'],
+            self.response['added'],
+        ):
+            self.assertTrue(is_datetime(time))
+
+    def test_space(self):
+        for size in ('free_bytes', 'used_bytes', 'capacity_bytes'):
+            self.assertIsInstance(self.response['space'][size], int)
+            self.assertGreater(self.response['space'][size], 0)
+
+    def test_report(self):
+        for size in ('total_used', 'total_space', 'total_avail'):
+            self.assertIsInstance(self.response['report'][size], int)
+            self.assertGreater(self.response['report'][size], 0)
+
+class TestOSD(RestTest):
+
+    uri = 'cluster/1/osd'
+
+    def test_cluster(self):
+        self.assertEqual(self.response['cluster'], 1)
+
+    def test_times(self):
+        for time in (
+            self.response['cluster_update_time'],
+            self.response['added'],
+        ):
+            self.assertTrue(is_datetime(time))
+
+    def test_osd_uuid(self):
+        for osd in self.response['osds']:
+            uuidobj = uuid.UUID(osd['uuid'])
+            self.assertEqual(str(uuidobj), osd['uuid'])
+
+    def test_osd_pools(self):
+        for osd in self.response['osds']:
+            if osd['up'] != 1:
+                continue
+            self.assertIsInstance(osd['pools'], list)
+            self.assertIsInstance(osd['pools'][0], basestring)
+
+    def test_osd_up_in(self):
+        for osd in self.response['osds']:
+            for flag in ('up', 'in'):
+                self.assertIn(osd[flag], (0, 1))
+
+    def test_osd_0(self):
+        osd0 = self.get_object('GET', 'cluster/1/osd/0')['osd']
+        for field in osd0.keys():
+            if not field.startswith('cluster_update_time'):
+                self.assertEqual(self.response['osds'][0][field], osd0[field])
+
+class TestPool(RestTest):
+
+    uri = 'cluster/1/pool'
+
+    def test_cluster(self):
+        for pool in self.response:
+            self.assertEqual(pool['cluster'], 1)
+
+    def test_fields_are_ints(self):
+        for pool in self.response:
+            for field in ('id', 'used_objects', 'used_bytes'):
+                self.assertIsInstance(pool[field], int)
+
+    def test_name_is_str(self):
+        for pool in self.response:
+            self.assertIsInstance(pool['name'], basestring)
+
+    def test_pool_0(self):
+        poolid = self.response[0]['id']
+        pool = self.get_object('GET', 'cluster/1/pool/{id}'.format(id=poolid))
+        self.assertEqual(self.response[0], pool)
+
+class TestServer(RestTest):
+
+    uri = 'cluster/1/server'
+
+    def test_ipaddr(self):
+        for server in self.response:
+            octets = server['addr'].split('.')
+            self.assertEqual(len(octets), 4)
+            for octetstr in octets:
+                octet = int(octetstr)
+                self.assertIsInstance(octet, int)
+                self.assertGreaterEqual(octet, 0)
+                self.assertLessEqual(octet, 255)
+
+    def test_hostname_name_strings(self):
+        for server in self.response:
+            for field in ('name', 'hostname'):
+                self.assertIsInstance(server[field], basestring)
+
+    def test_services(self):
+        for server in self.response:
+            self.assertIsInstance(server['services'], list)
+            for service in server['services']:
+                self.assertIn(service['type'], ('osd', 'mon', 'mds'))
+
+class TestGraphitePoolIOPS(RestTest):
+
+    uri = '/graphite/render?format=json-array&' \
+          'target=ceph.cluster.ceph.pool.0.num_read&' \
+          'target=ceph.cluster.ceph.pool.0.num_write'
+
+    def test_targets_contain_request(self):
+        self.assertIn('targets', self.response)
+        self.assertIn('ceph.cluster.ceph.pool.0.num_read',
+                      self.response['targets'])
+        self.assertIn('ceph.cluster.ceph.pool.0.num_write',
+                      self.response['targets'])
+
+    def test_datapoints(self):
+        self.assertIn('datapoints', self.response)
+        self.assertGreater(len(self.response['datapoints']), 0)
+        data = self.response['datapoints'][0]
+        self.assertEqual(len(data), 3)
+        self.assertIsInstance(data[0], int)
+        if data[1]:
+            self.assertIsInstance(data[1], float)
+        if data[2]:
+            self.assertIsInstance(data[2], float)
+
+#
+# Utility functions
+#
+
+DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%fZ'
+
+def is_datetime(time):
+    datetime.datetime.strptime(time, DATETIME_FORMAT)
+    return True
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tasks/ceph.py b/tasks/ceph.py
new file mode 100644 (file)
index 0000000..39e3a56
--- /dev/null
@@ -0,0 +1,1389 @@
+"""
+Ceph cluster task.
+
+Handle the setup, starting, and clean-up of a Ceph cluster.
+"""
+from cStringIO import StringIO
+
+import argparse
+import contextlib
+import logging
+import os
+import struct
+import json
+import time
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from ..orchestra import run
+import ceph_client as cclient
+
+log = logging.getLogger(__name__)
+
+class DaemonState(object):
+    """
+    Daemon State.  A daemon exists for each instance of each role.
+    """
+    def __init__(self, remote, role, id_, *command_args, **command_kwargs):
+        """
+        Pass remote command information as parameters to remote site
+
+        :param remote: Remote site
+        :param role: Role (osd, rgw, mon, mds)
+        :param id_: Id within role (osd.1, osd.2, for eaxmple)
+        :param command_args: positional arguments (used in restart commands)
+        :param command_kwargs: keyword arguments (used in restart commands)
+        """
+        self.remote = remote
+        self.command_args = command_args
+        self.command_kwargs = command_kwargs
+        self.role = role
+        self.id_ = id_
+        self.log = command_kwargs.get('logger', log)
+        self.proc = None
+
+    def stop(self):
+        """
+        Stop this daemon instance.
+
+        Note: this can raise a run.CommandFailedError,
+        run.CommandCrashedError, or run.ConnectionLostError.
+        """
+        if not self.running():
+            self.log.error('tried to stop a non-running daemon')
+            return
+        self.proc.stdin.close()
+        self.log.debug('waiting for process to exit')
+        run.wait([self.proc])
+        self.proc = None
+        self.log.info('Stopped')
+
+    def restart(self, *args, **kwargs):
+        """
+        Restart with a new command passed in the arguments
+
+        :param args: positional arguments passed to remote.run
+        :param kwargs: keyword arguments passed to remote.run
+        """
+        self.log.info('Restarting')
+        if self.proc is not None:
+            self.log.debug('stopping old one...')
+            self.stop()
+        cmd_args = list(self.command_args)
+        cmd_args.extend(args)
+        cmd_kwargs = self.command_kwargs
+        cmd_kwargs.update(kwargs)
+        self.proc = self.remote.run(*cmd_args, **cmd_kwargs)
+        self.log.info('Started')
+
+    def restart_with_args(self, extra_args):
+        """
+        Restart, adding new paramaters to the current command.
+
+        :param extra_args: Extra keyword arguments to be added.
+        """
+        self.log.info('Restarting')
+        if self.proc is not None:
+            self.log.debug('stopping old one...')
+            self.stop()
+        cmd_args = list(self.command_args)
+        # we only want to make a temporary mod of the args list
+        # so we shallow copy the dict, and deepcopy the args list
+        cmd_kwargs = self.command_kwargs.copy()
+        from copy import deepcopy
+        cmd_kwargs['args'] = deepcopy(self.command_kwargs['args'])
+        cmd_kwargs['args'].extend(extra_args)
+        self.proc = self.remote.run(*cmd_args, **cmd_kwargs)
+        self.log.info('Started')
+
+    def signal(self, sig):
+        """
+        Send a signal to associated remote commnad
+
+        :param sig: signal to send
+        """
+        self.proc.stdin.write(struct.pack('!b', sig))
+        self.log.info('Sent signal %d', sig)
+
+    def running(self):
+        """
+        Are we running?
+        :return: True if remote run command value is set, False otherwise.
+        """
+        return self.proc is not None
+
+    def reset(self):
+        """
+        clear remote run command value.
+        """
+        self.proc = None
+
+    def wait_for_exit(self):
+        """
+        clear remote run command value after waiting for exit.
+        """
+        if self.proc:
+            try:
+                run.wait([self.proc])
+            finally:
+                self.proc = None
+
+class CephState(object):
+    """
+    Collection of daemon state instances
+    """
+    def __init__(self):
+        """
+        self.daemons is a dictionary indexed by role.  Each entry is a dictionary of
+        DaemonState values indexcd by an id parameter.
+        """
+        self.daemons = {}
+
+    def add_daemon(self, remote, role, id_, *args, **kwargs):
+        """
+        Add a daemon.  If there already is a daemon for this id_ and role, stop that
+        daemon and.  Restart the damon once the new value is set.
+
+        :param remote: Remote site
+        :param role: Role (osd, mds, mon, rgw,  for example)
+        :param id_: Id (index into role dictionary)
+        :param args: Daemonstate positional parameters
+        :param kwargs: Daemonstate keyword parameters
+        """
+        if role not in self.daemons:
+            self.daemons[role] = {}
+        if id_ in self.daemons[role]:
+            self.daemons[role][id_].stop()
+            self.daemons[role][id_] = None
+        self.daemons[role][id_] = DaemonState(remote, role, id_, *args, **kwargs)
+        self.daemons[role][id_].restart()
+
+    def get_daemon(self, role, id_):
+        """
+        get the daemon associated with this id_ for this role.
+
+        :param role: Role (osd, mds, mon, rgw,  for example)
+        :param id_: Id (index into role dictionary)
+        """
+        if role not in self.daemons:
+            return None
+        return self.daemons[role].get(str(id_), None)
+
+    def iter_daemons_of_role(self, role):
+        """
+        Iterate through all daemon instances for this role.  Return dictionary of
+        daemon values.
+
+        :param role: Role (osd, mds, mon, rgw,  for example)
+        """
+        return self.daemons.get(role, {}).values()
+
+
+@contextlib.contextmanager
+def ceph_log(ctx, config):
+    """
+    Create /var/log/ceph log directory that is open to everyone.
+    Add valgrind and profiling-logger directories.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    log.info('Making ceph log dir writeable by non-root...')
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'sudo',
+                'chmod',
+                '777',
+                '/var/log/ceph',
+                ],
+            wait=False,
+            )
+        )
+    log.info('Disabling ceph logrotate...')
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'sudo',
+                'rm', '-f', '--',
+                '/etc/logrotate.d/ceph',
+                ],
+            wait=False,
+            )
+        )
+    log.info('Creating extra log directories...')
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'sudo',
+                'install', '-d', '-m0755', '--',
+                '/var/log/ceph/valgrind',
+                '/var/log/ceph/profiling-logger',
+                ],
+            wait=False,
+            )
+        )
+
+    try:
+        yield
+
+    finally:
+        pass
+
+
+def assign_devs(roles, devs):
+    """
+    Create a dictionary of devs indexed by roles
+
+    :param roles: List of roles
+    :param devs: Corresponding list of devices.
+    :returns: Dictionary of devs indexed by roles.
+    """
+    return dict(zip(roles, devs))
+
+@contextlib.contextmanager
+def valgrind_post(ctx, config):
+    """
+    After the tests run, look throught all the valgrind logs.  Exceptions are raised
+    if textual errors occured in the logs, or if valgrind exceptions were detected in
+    the logs.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    try:
+        yield
+    finally:
+        lookup_procs = list()
+        log.info('Checking for errors in any valgrind logs...');
+        for remote in ctx.cluster.remotes.iterkeys():
+            #look at valgrind logs for each node
+            proc = remote.run(
+                args=[
+                    'sudo',
+                    'zgrep',
+                    '<kind>',
+                    run.Raw('/var/log/ceph/valgrind/*'),
+                    '/dev/null', # include a second file so that we always get a filename prefix on the output
+                    run.Raw('|'),
+                    'sort',
+                    run.Raw('|'),
+                    'uniq',
+                    ],
+                wait=False,
+                check_status=False,
+                stdout=StringIO(),
+                )
+            lookup_procs.append((proc, remote))
+
+        valgrind_exception = None
+        for (proc, remote) in lookup_procs:
+            proc.exitstatus.get()
+            out = proc.stdout.getvalue()
+            for line in out.split('\n'):
+                if line == '':
+                    continue
+                try:
+                    (file, kind) = line.split(':')
+                except Exception:
+                    log.error('failed to split line %s', line)
+                    raise
+                log.debug('file %s kind %s', file, kind)
+                if (file.find('mds') >= 0) and kind.find('Lost') > 0:
+                    continue
+                log.error('saw valgrind issue %s in %s', kind, file)
+                valgrind_exception = Exception('saw valgrind issues')
+
+        if valgrind_exception is not None:
+            raise valgrind_exception
+
+
+def mount_osd_data(ctx, remote, osd):
+    """
+    Mount a remote OSD
+
+    :param ctx: Context
+    :param remote: Remote site
+    :param ods: Osd name
+    """
+    log.debug('Mounting data for osd.{o} on {r}'.format(o=osd, r=remote))
+    if remote in ctx.disk_config.remote_to_roles_to_dev and osd in ctx.disk_config.remote_to_roles_to_dev[remote]:
+        dev = ctx.disk_config.remote_to_roles_to_dev[remote][osd]
+        mount_options = ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][osd]
+        fstype = ctx.disk_config.remote_to_roles_to_dev_fstype[remote][osd]
+        mnt = os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=osd))
+
+        log.info('Mounting osd.{o}: dev: {n}, mountpoint: {p}, type: {t}, options: {v}'.format(
+                 o=osd, n=remote.name, p=mnt, t=fstype, v=mount_options))
+
+        remote.run(
+            args=[
+                'sudo',
+                'mount',
+                '-t', fstype,
+                '-o', ','.join(mount_options),
+                dev,
+                mnt,
+            ]
+            )
+
+def make_admin_daemon_dir(ctx, remote):
+    """
+    Create /var/run/ceph directory on remote site.
+
+    :param ctx: Context
+    :param remote: Remote site
+    """
+    remote.run(
+            args=[
+                'sudo',
+                'install', '-d', '-m0777', '--', '/var/run/ceph',
+                ],
+            )
+
+@contextlib.contextmanager
+def cluster(ctx, config):
+    """
+    Handle the creation and removal of a ceph cluster.
+
+    On startup:
+        Create directories needed for the cluster.
+        Create remote journals for all osds.
+        Create and set keyring.
+        Copy the monmap to tht test systems.
+        Setup mon nodes.
+        Setup mds nodes.
+        Mkfs osd nodes.
+        Add keyring information to monmaps
+        Mkfs mon nodes.
+
+    On exit:
+        If errors occured, extract a failure message and store in ctx.summary.
+        Unmount all test files and temporary journaling files.
+        Save the monitor information and archive all ceph logs.
+        Cleanup the keyring setup, and remove all monitor map and data files left over.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    if ctx.config.get('use_existing_cluster', False) is True:
+        log.info("'use_existing_cluster' is true; skipping cluster creation")
+        yield
+
+    testdir = teuthology.get_testdir(ctx)
+    log.info('Creating ceph cluster...')
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'install', '-d', '-m0755', '--',
+                '{tdir}/data'.format(tdir=testdir),
+                ],
+            wait=False,
+            )
+        )
+
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'sudo',
+                'install', '-d', '-m0777', '--', '/var/run/ceph',
+                ],
+            wait=False,
+            )
+        )
+
+
+    devs_to_clean = {}
+    remote_to_roles_to_devs = {}
+    remote_to_roles_to_journals = {}
+    osds = ctx.cluster.only(teuthology.is_type('osd'))
+    for remote, roles_for_host in osds.remotes.iteritems():
+        devs = teuthology.get_scratch_devices(remote)
+        roles_to_devs = {}
+        roles_to_journals = {}
+        if config.get('fs'):
+            log.info('fs option selected, checking for scratch devs')
+            log.info('found devs: %s' % (str(devs),))
+            devs_id_map = teuthology.get_wwn_id_map(remote, devs)
+            iddevs = devs_id_map.values()
+            roles_to_devs = assign_devs(
+                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs
+                )
+            if len(roles_to_devs) < len(iddevs):
+                iddevs = iddevs[len(roles_to_devs):]
+            devs_to_clean[remote] = []
+
+        if config.get('block_journal'):
+            log.info('block journal enabled')
+            roles_to_journals = assign_devs(
+                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs
+                )
+            log.info('journal map: %s', roles_to_journals)
+
+        if config.get('tmpfs_journal'):
+            log.info('tmpfs journal enabled')
+            roles_to_journals = {}
+            remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] )
+            for osd in teuthology.roles_of_type(roles_for_host, 'osd'):
+                tmpfs = '/mnt/osd.%s' % osd
+                roles_to_journals[osd] = tmpfs
+                remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] )
+            log.info('journal map: %s', roles_to_journals)
+
+        log.info('dev map: %s' % (str(roles_to_devs),))
+        remote_to_roles_to_devs[remote] = roles_to_devs
+        remote_to_roles_to_journals[remote] = roles_to_journals
+
+
+    log.info('Generating config...')
+    remotes_and_roles = ctx.cluster.remotes.items()
+    roles = [role_list for (remote, role_list) in remotes_and_roles]
+    ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
+    conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips)
+    for remote, roles_to_journals in remote_to_roles_to_journals.iteritems():
+        for role, journal in roles_to_journals.iteritems():
+            key = "osd." + str(role)
+            if key not in conf:
+                conf[key] = {}
+            conf[key]['osd journal'] = journal
+    for section, keys in config['conf'].iteritems():
+        for key, value in keys.iteritems():
+            log.info("[%s] %s = %s" % (section, key, value))
+            if section not in conf:
+                conf[section] = {}
+            conf[section][key] = value
+
+    if config.get('tmpfs_journal'):
+        conf['journal dio'] = False
+
+    ctx.ceph = argparse.Namespace()
+    ctx.ceph.conf = conf
+
+    keyring_path = config.get('keyring_path', '/etc/ceph/ceph.keyring')
+
+    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+
+    firstmon = teuthology.get_first_mon(ctx, config)
+
+    log.info('Setting up %s...' % firstmon)
+    ctx.cluster.only(firstmon).run(
+        args=[
+            'sudo',
+            'adjust-ulimits',
+            'ceph-coverage',
+            coverage_dir,
+            'ceph-authtool',
+            '--create-keyring',
+            keyring_path,
+            ],
+        )
+    ctx.cluster.only(firstmon).run(
+        args=[
+            'sudo',
+            'adjust-ulimits',
+            'ceph-coverage',
+            coverage_dir,
+            'ceph-authtool',
+            '--gen-key',
+            '--name=mon.',
+            keyring_path,
+            ],
+        )
+    ctx.cluster.only(firstmon).run(
+        args=[
+            'sudo',
+            'chmod',
+            '0644',
+            keyring_path,
+            ],
+        )
+    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+    fsid = teuthology.create_simple_monmap(
+        ctx,
+        remote=mon0_remote,
+        conf=conf,
+        )
+    if not 'global' in conf:
+        conf['global'] = {}
+    conf['global']['fsid'] = fsid
+
+    log.info('Writing ceph.conf for FSID %s...' % fsid)
+    conf_path = config.get('conf_path', '/etc/ceph/ceph.conf')
+    conf_fp = StringIO()
+    conf.write(conf_fp)
+    conf_fp.seek(0)
+    writes = ctx.cluster.run(
+        args=[
+            'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'),
+            'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'),
+            'sudo', 'python',
+            '-c',
+            'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
+            conf_path,
+            run.Raw('&&'),
+            'sudo', 'chmod', '0644', conf_path,
+            ],
+        stdin=run.PIPE,
+        wait=False,
+        )
+    teuthology.feed_many_stdins_and_close(conf_fp, writes)
+    run.wait(writes)
+
+    log.info('Creating admin key on %s...' % firstmon)
+    ctx.cluster.only(firstmon).run(
+        args=[
+            'sudo',
+            'adjust-ulimits',
+            'ceph-coverage',
+            coverage_dir,
+            'ceph-authtool',
+            '--gen-key',
+            '--name=client.admin',
+            '--set-uid=0',
+            '--cap', 'mon', 'allow *',
+            '--cap', 'osd', 'allow *',
+            '--cap', 'mds', 'allow',
+            keyring_path,
+            ],
+        )
+
+    log.info('Copying monmap to all nodes...')
+    keyring = teuthology.get_file(
+        remote=mon0_remote,
+        path=keyring_path,
+        )
+    monmap = teuthology.get_file(
+        remote=mon0_remote,
+        path='{tdir}/monmap'.format(tdir=testdir),
+        )
+
+    for rem in ctx.cluster.remotes.iterkeys():
+        # copy mon key and initial monmap
+        log.info('Sending monmap to node {remote}'.format(remote=rem))
+        teuthology.sudo_write_file(
+            remote=rem,
+            path=keyring_path,
+            data=keyring,
+            perms='0644'
+            )
+        teuthology.write_file(
+            remote=rem,
+            path='{tdir}/monmap'.format(tdir=testdir),
+            data=monmap,
+            )
+
+    log.info('Setting up mon nodes...')
+    mons = ctx.cluster.only(teuthology.is_type('mon'))
+    run.wait(
+        mons.run(
+            args=[
+                'adjust-ulimits',
+                'ceph-coverage',
+                coverage_dir,
+                'osdmaptool',
+                '-c', conf_path,
+                '--clobber',
+                '--createsimple', '{num:d}'.format(
+                    num=teuthology.num_instances_of_type(ctx.cluster, 'osd'),
+                    ),
+                '{tdir}/osdmap'.format(tdir=testdir),
+                '--pg_bits', '2',
+                '--pgp_bits', '4',
+                ],
+            wait=False,
+            ),
+        )
+
+    log.info('Setting up mds nodes...')
+    mdss = ctx.cluster.only(teuthology.is_type('mds'))
+    for remote, roles_for_host in mdss.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, 'mds'):
+            remote.run(
+                args=[
+                    'sudo',
+                    'mkdir',
+                    '-p',
+                    '/var/lib/ceph/mds/ceph-{id}'.format(id=id_),
+                    run.Raw('&&'),
+                    'sudo',
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    coverage_dir,
+                    'ceph-authtool',
+                    '--create-keyring',
+                    '--gen-key',
+                    '--name=mds.{id}'.format(id=id_),
+                    '/var/lib/ceph/mds/ceph-{id}/keyring'.format(id=id_),
+                    ],
+                )
+
+    cclient.create_keyring(ctx)
+    log.info('Running mkfs on osd nodes...')
+
+    ctx.disk_config = argparse.Namespace()
+    ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs
+    ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals
+    ctx.disk_config.remote_to_roles_to_dev_mount_options = {}
+    ctx.disk_config.remote_to_roles_to_dev_fstype = {}
+
+    log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev)))
+    for remote, roles_for_host in osds.remotes.iteritems():
+        roles_to_devs = remote_to_roles_to_devs[remote]
+        roles_to_journals = remote_to_roles_to_journals[remote]
+
+
+        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
+            remote.run(
+                args=[
+                    'sudo',
+                    'mkdir',
+                    '-p',
+                    '/var/lib/ceph/osd/ceph-{id}'.format(id=id_),
+                    ])
+            log.info(str(roles_to_journals))
+            log.info(id_)
+            if roles_to_devs.get(id_):
+                dev = roles_to_devs[id_]
+                fs = config.get('fs')
+                package = None
+                mkfs_options = config.get('mkfs_options')
+                mount_options = config.get('mount_options')
+                if fs == 'btrfs':
+                    #package = 'btrfs-tools'
+                    if mount_options is None:
+                        mount_options = ['noatime','user_subvol_rm_allowed']
+                    if mkfs_options is None:
+                        mkfs_options = ['-m', 'single',
+                                        '-l', '32768',
+                                        '-n', '32768']
+                if fs == 'xfs':
+                    #package = 'xfsprogs'
+                    if mount_options is None:
+                        mount_options = ['noatime']
+                    if mkfs_options is None:
+                        mkfs_options = ['-f', '-i', 'size=2048']
+                if fs == 'ext4' or fs == 'ext3':
+                    if mount_options is None:
+                        mount_options = ['noatime','user_xattr']
+
+                if mount_options is None:
+                    mount_options = []
+                if mkfs_options is None:
+                    mkfs_options = []
+                mkfs = ['mkfs.%s' % fs] + mkfs_options
+                log.info('%s on %s on %s' % (mkfs, dev, remote))
+                if package is not None:
+                    remote.run(
+                        args=[
+                            'sudo',
+                            'apt-get', 'install', '-y', package
+                            ],
+                        stdout=StringIO(),
+                        )
+
+                try:
+                    remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])
+                except run.CommandFailedError:
+                    # Newer btfs-tools doesn't prompt for overwrite, use -f
+                    if '-f' not in mount_options:
+                        mkfs_options.append('-f')
+                        mkfs = ['mkfs.%s' % fs] + mkfs_options
+                        log.info('%s on %s on %s' % (mkfs, dev, remote))
+                    remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])
+                        
+                log.info('mount %s on %s -o %s' % (dev, remote,
+                                                   ','.join(mount_options)))
+                remote.run(
+                    args=[
+                        'sudo',
+                        'mount',
+                        '-t', fs,
+                        '-o', ','.join(mount_options),
+                        dev,
+                        os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)),
+                        ]
+                    )
+                if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options:
+                    ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {}
+                ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][id_] = mount_options
+                if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype:
+                    ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {}
+                ctx.disk_config.remote_to_roles_to_dev_fstype[remote][id_] = fs
+                devs_to_clean[remote].append(
+                    os.path.join(
+                        os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)),
+                        )
+                    )
+
+        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
+            remote.run(
+                args=[
+                    'sudo',
+                    'MALLOC_CHECK_=3',
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    coverage_dir,
+                    'ceph-osd',
+                    '--mkfs',
+                    '--mkkey',
+                    '-i', id_,
+                    '--monmap', '{tdir}/monmap'.format(tdir=testdir),
+                    ],
+                )
+
+
+    log.info('Reading keys from all nodes...')
+    keys_fp = StringIO()
+    keys = []
+    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
+        for type_ in ['mds','osd']:
+            for id_ in teuthology.roles_of_type(roles_for_host, type_):
+                data = teuthology.get_file(
+                    remote=remote,
+                    path='/var/lib/ceph/{type}/ceph-{id}/keyring'.format(
+                        type=type_,
+                        id=id_,
+                        ),
+                    sudo=True,
+                    )
+                keys.append((type_, id_, data))
+                keys_fp.write(data)
+    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
+        for type_ in ['client']:
+            for id_ in teuthology.roles_of_type(roles_for_host, type_):
+                data = teuthology.get_file(
+                    remote=remote,
+                    path='/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
+                    )
+                keys.append((type_, id_, data))
+                keys_fp.write(data)
+
+    log.info('Adding keys to all mons...')
+    writes = mons.run(
+        args=[
+            'sudo', 'tee', '-a',
+            keyring_path,
+            ],
+        stdin=run.PIPE,
+        wait=False,
+        stdout=StringIO(),
+        )
+    keys_fp.seek(0)
+    teuthology.feed_many_stdins_and_close(keys_fp, writes)
+    run.wait(writes)
+    for type_, id_, data in keys:
+        run.wait(
+            mons.run(
+                args=[
+                    'sudo',
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    coverage_dir,
+                    'ceph-authtool',
+                    keyring_path,
+                    '--name={type}.{id}'.format(
+                        type=type_,
+                        id=id_,
+                        ),
+                    ] + list(teuthology.generate_caps(type_)),
+                wait=False,
+                ),
+            )
+
+    log.info('Running mkfs on mon nodes...')
+    for remote, roles_for_host in mons.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, 'mon'):
+            remote.run(
+                args=[
+                  'sudo',
+                  'mkdir',
+                  '-p',
+                  '/var/lib/ceph/mon/ceph-{id}'.format(id=id_),
+                  ],
+                )
+            remote.run(
+                args=[
+                    'sudo',
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    coverage_dir,
+                    'ceph-mon',
+                    '--mkfs',
+                    '-i', id_,
+                    '--monmap={tdir}/monmap'.format(tdir=testdir),
+                    '--osdmap={tdir}/osdmap'.format(tdir=testdir),
+                    '--keyring={kpath}'.format(kpath=keyring_path),
+                    ],
+                )
+
+
+    run.wait(
+        mons.run(
+            args=[
+                'rm',
+                '--',
+                '{tdir}/monmap'.format(tdir=testdir),
+                '{tdir}/osdmap'.format(tdir=testdir),
+                ],
+            wait=False,
+            ),
+        )
+
+    try:
+        yield
+    except Exception:
+        # we need to know this below
+        ctx.summary['success'] = False
+        raise
+    finally:
+        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+
+        log.info('Checking cluster log for badness...')
+        def first_in_ceph_log(pattern, excludes):
+            """
+            Find the first occurence of the pattern specified in the Ceph log,
+            Returns None if none found.
+
+            :param pattern: Pattern scanned for.
+            :param excludes: Patterns to ignore.
+            :return: First line of text (or None if not found)
+            """
+            args = [
+                'sudo',
+                'egrep', pattern,
+                '/var/log/ceph/ceph.log',
+                ]
+            for exclude in excludes:
+                args.extend([run.Raw('|'), 'egrep', '-v', exclude])
+            args.extend([
+                    run.Raw('|'), 'head', '-n', '1',
+                    ])
+            r = mon0_remote.run(
+                stdout=StringIO(),
+                args=args,
+                )
+            stdout = r.stdout.getvalue()
+            if stdout != '':
+                return stdout
+            return None
+
+        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
+                             config['log_whitelist']) is not None:
+            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
+            ctx.summary['success'] = False
+            # use the most severe problem as the failure reason
+            if 'failure_reason' not in ctx.summary:
+                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
+                    match = first_in_ceph_log(pattern, config['log_whitelist'])
+                    if match is not None:
+                        ctx.summary['failure_reason'] = \
+                            '"{match}" in cluster log'.format(
+                            match=match.rstrip('\n'),
+                            )
+                        break
+
+        for remote, dirs in devs_to_clean.iteritems():
+            for dir_ in dirs:
+                log.info('Unmounting %s on %s' % (dir_, remote))
+                remote.run(
+                    args=[
+                        'sync',
+                        run.Raw('&&'),
+                        'sudo',
+                        'umount',
+                        '-f',
+                        dir_
+                        ]
+                    )
+
+        if config.get('tmpfs_journal'):
+            log.info('tmpfs journal enabled - unmounting tmpfs at /mnt')
+            for remote, roles_for_host in osds.remotes.iteritems():
+                remote.run(
+                    args=[ 'sudo', 'umount', '-f', '/mnt' ],
+                    check_status=False,
+                )
+
+        if ctx.archive is not None and \
+                not (ctx.config.get('archive-on-error') and ctx.summary['success']):
+            # archive mon data, too
+            log.info('Archiving mon data...')
+            path = os.path.join(ctx.archive, 'data')
+            os.makedirs(path)
+            for remote, roles in mons.remotes.iteritems():
+                for role in roles:
+                    if role.startswith('mon.'):
+                        teuthology.pull_directory_tarball(
+                            remote,
+                            '/var/lib/ceph/mon',
+                            path + '/' + role + '.tgz')
+
+            # and logs
+            log.info('Compressing logs...')
+            run.wait(
+                ctx.cluster.run(
+                    args=[
+                        'sudo',
+                        'find',
+                        '/var/log/ceph',
+                        '-name',
+                        '*.log',
+                        '-print0',
+                        run.Raw('|'),
+                        'sudo',
+                        'xargs',
+                        '-0',
+                        '--no-run-if-empty',
+                        '--',
+                        'gzip',
+                        '--',
+                        ],
+                    wait=False,
+                    ),
+                )
+
+            log.info('Archiving logs...')
+            path = os.path.join(ctx.archive, 'remote')
+            os.makedirs(path)
+            for remote in ctx.cluster.remotes.iterkeys():
+                sub = os.path.join(path, remote.shortname)
+                os.makedirs(sub)
+                teuthology.pull_directory(remote, '/var/log/ceph',
+                                          os.path.join(sub, 'log'))
+
+
+        log.info('Cleaning ceph cluster...')
+        run.wait(
+            ctx.cluster.run(
+                args=[
+                    'sudo',
+                    'rm',
+                    '-rf',
+                    '--',
+                    conf_path,
+                    keyring_path,
+                    '{tdir}/data'.format(tdir=testdir),
+                    '{tdir}/monmap'.format(tdir=testdir),
+                    ],
+                wait=False,
+                ),
+            )
+
+def get_all_pg_info(rem_site, testdir):
+    """
+    Get the results of a ceph pg dump
+    """
+    info = rem_site.run(args=[
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir),
+                        'ceph', 'pg', 'dump',
+                        '--format', 'json'], stdout=StringIO())
+    all_info = json.loads(info.stdout.getvalue())
+    return all_info['pg_stats']
+    
+def osd_scrub_pgs(ctx, config):
+    """
+    Scrub pgs when we exit.
+    
+    First make sure all pgs are active and clean.
+    Next scrub all osds.
+    Then periodically check until all pgs have scrub time stamps that
+    indicate the last scrub completed.  Time out if no progess is made
+    here after two minutes.
+    """
+    retries = 12
+    delays = 10
+    vlist = ctx.cluster.remotes.values()
+    testdir = teuthology.get_testdir(ctx)
+    rem_site = ctx.cluster.remotes.keys()[0]
+    all_clean = False
+    for _ in range(0, retries):
+       stats = get_all_pg_info(rem_site, testdir)
+        states = [stat['state'] for stat in stats]
+        if len(set(states)) == 1 and states[0] == 'active+clean':
+            all_clean = True
+            break
+        log.info("Waiting for all osds to be active and clean.")
+        time.sleep(delays)
+    if not all_clean:
+        log.info("Scrubbing terminated -- not all pgs were active and clean.")
+        return
+    check_time_now = time.localtime()
+    time.sleep(1)
+    for slists in vlist:
+        for role in slists:
+            if role.startswith('osd.'):
+                log.info("Scrubbing osd {osd}".format(osd=role))
+                rem_site.run(args=[
+                            'adjust-ulimits',
+                            'ceph-coverage',
+                            '{tdir}/archive/coverage'.format(tdir=testdir),
+                            'ceph', 'osd', 'scrub', role])
+    prev_good = 0
+    gap_cnt = 0
+    loop = True
+    while loop:
+       stats = get_all_pg_info(rem_site, testdir)
+        timez = [stat['last_scrub_stamp'] for stat in stats]
+        loop = False
+        thiscnt = 0
+        for tmval in timez:
+            pgtm = time.strptime(tmval[0:tmval.find('.')], '%Y-%m-%d %H:%M:%S')
+            if pgtm > check_time_now:
+                thiscnt += 1
+            else:
+                loop = True
+        if thiscnt > prev_good:
+            prev_good = thiscnt
+            gap_cnt = 0
+        else:
+            gap_cnt += 1
+            if gap_cnt > retries:
+                log.info('Exiting scrub checking -- not all pgs scrubbed.')
+                return
+        if loop:
+            log.info('Still waiting for all pgs to be scrubbed.')
+            time.sleep(delays)
+
+@contextlib.contextmanager
+def run_daemon(ctx, config, type_):
+    """
+    Run daemons for a role type.  Handle the startup and termination of a a daemon.
+    On startup -- set coverages, cpu_profile, valgrind values for all remotes,
+    and a max_mds value for one mds.
+    On cleanup -- Stop all existing daemons of this type.
+
+    :param ctx: Context
+    :param config: Configuration
+    :paran type_: Role type
+    """
+    log.info('Starting %s daemons...' % type_)
+    testdir = teuthology.get_testdir(ctx)
+    daemons = ctx.cluster.only(teuthology.is_type(type_))
+
+    # check whether any daemons if this type are configured
+    if daemons is None:
+        return
+    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+
+    daemon_signal = 'kill'
+    if config.get('coverage') or config.get('valgrind') is not None:
+        daemon_signal = 'term'
+
+    num_active = 0
+    for remote, roles_for_host in daemons.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, type_):
+            name = '%s.%s' % (type_, id_)
+
+            if not (id_.endswith('-s')) and (id_.find('-s-') == -1):
+                num_active += 1
+
+            run_cmd = [
+                'sudo',
+                'adjust-ulimits',
+                'ceph-coverage',
+                coverage_dir,
+                'daemon-helper',
+                daemon_signal,
+                ]
+            run_cmd_tail = [
+                'ceph-%s' % (type_),
+                '-f',
+                '-i', id_]
+
+            if type_ in config.get('cpu_profile', []):
+                profile_path = '/var/log/ceph/profiling-logger/%s.%s.prof' % (type_, id_)
+                run_cmd.extend([ 'env', 'CPUPROFILE=%s' % profile_path ])
+
+            if config.get('valgrind') is not None:
+                valgrind_args = None
+                if type_ in config['valgrind']:
+                    valgrind_args = config['valgrind'][type_]
+                if name in config['valgrind']:
+                    valgrind_args = config['valgrind'][name]
+                run_cmd = teuthology.get_valgrind_args(testdir, name,
+                                                       run_cmd,
+                                                       valgrind_args)
+
+            run_cmd.extend(run_cmd_tail)
+
+            ctx.daemons.add_daemon(remote, type_, id_,
+                                   args=run_cmd,
+                                   logger=log.getChild(name),
+                                   stdin=run.PIPE,
+                                   wait=False,
+                                   )
+
+    if type_ == 'mds':
+        firstmon = teuthology.get_first_mon(ctx, config)
+        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+
+        mon0_remote.run(args=[
+            'adjust-ulimits',
+            'ceph-coverage',
+            coverage_dir,
+            'ceph',
+            'mds', 'set_max_mds', str(num_active)])
+
+    try:
+        yield
+    finally:
+        teuthology.stop_daemons_of_type(ctx, type_)
+
+def healthy(ctx, config):
+    """
+    Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    log.info('Waiting until ceph is healthy...')
+    firstmon = teuthology.get_first_mon(ctx, config)
+    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+    teuthology.wait_until_osds_up(
+        ctx,
+        cluster=ctx.cluster,
+        remote=mon0_remote
+        )
+    teuthology.wait_until_healthy(
+        ctx,
+        remote=mon0_remote,
+        )
+
+def wait_for_osds_up(ctx, config):
+    """
+    Wait for all osd's to come up.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    log.info('Waiting until ceph osds are all up...')
+    firstmon = teuthology.get_first_mon(ctx, config)
+    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+    teuthology.wait_until_osds_up(
+        ctx,
+        cluster=ctx.cluster,
+        remote=mon0_remote
+        )
+
+def wait_for_mon_quorum(ctx, config):
+    """
+    Check renote ceph status until all monitors are up.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+
+    assert isinstance(config, list)
+    firstmon = teuthology.get_first_mon(ctx, config)
+    (remote,) = ctx.cluster.only(firstmon).remotes.keys()
+    while True:
+        r = remote.run(
+            args=[
+                'ceph',
+                'quorum_status',
+                ],
+            stdout=StringIO(),
+            logger=log.getChild('quorum_status'),
+            )
+        j = json.loads(r.stdout.getvalue())
+        q = j.get('quorum_names', [])
+        log.debug('Quorum: %s', q)
+        if sorted(q) == sorted(config):
+            break
+        time.sleep(1)
+
+
+@contextlib.contextmanager
+def restart(ctx, config):
+    """
+   restart ceph daemons
+
+   For example::
+      tasks:
+      - ceph.restart: [all]
+
+   For example::
+      tasks:
+      - ceph.restart: [osd.0, mon.1]
+
+   or::
+
+      tasks:
+      - ceph.restart:
+          daemons: [osd.0, mon.1]
+          wait-for-healthy: false
+          wait-for-osds-up: true
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    if config is None:
+        config = {}
+    if isinstance(config, list):
+        config = { 'daemons': config }
+    if 'daemons' not in config:
+        config['daemons'] = []
+        type_daemon = ['mon', 'osd', 'mds', 'rgw']
+        for d in type_daemon:
+            type_ = d
+            for daemon in ctx.daemons.iter_daemons_of_role(type_):
+                config['daemons'].append(type_ + '.' + daemon.id_)
+
+    assert isinstance(config['daemons'], list)
+    daemons = dict.fromkeys(config['daemons'])
+    for i in daemons.keys():
+        type_ = i.split('.', 1)[0]
+        id_ = i.split('.', 1)[1]
+        ctx.daemons.get_daemon(type_, id_).stop()
+        ctx.daemons.get_daemon(type_, id_).restart()
+
+    if config.get('wait-for-healthy', True):
+        healthy(ctx=ctx, config=None)
+    if config.get('wait-for-osds-up', False):
+        wait_for_osds_up(ctx=ctx, config=None)
+    yield
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Set up and tear down a Ceph cluster.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - interactive:
+
+    You can also specify what branch to run::
+
+        tasks:
+        - ceph:
+            branch: foo
+
+    Or a tag::
+
+        tasks:
+        - ceph:
+            tag: v0.42.13
+
+    Or a sha1::
+
+        tasks:
+        - ceph:
+            sha1: 1376a5ab0c89780eab39ffbbe436f6a6092314ed
+
+    Or a local source dir::
+
+        tasks:
+        - ceph:
+            path: /home/sage/ceph
+
+    To capture code coverage data, use::
+
+        tasks:
+        - ceph:
+            coverage: true
+
+    To use btrfs, ext4, or xfs on the target's scratch disks, use::
+
+        tasks:
+        - ceph:
+            fs: xfs
+            mkfs_options: [-b,size=65536,-l,logdev=/dev/sdc1]
+            mount_options: [nobarrier, inode64]
+
+    Note, this will cause the task to check the /scratch_devs file on each node
+    for available devices.  If no such file is found, /dev/sdb will be used.
+
+    To run some daemons under valgrind, include their names
+    and the tool/args to use in a valgrind section::
+
+        tasks:
+        - ceph:
+          valgrind:
+            mds.1: --tool=memcheck
+            osd.1: [--tool=memcheck, --leak-check=no]
+
+    Those nodes which are using memcheck or valgrind will get
+    checked for bad results.
+
+    To adjust or modify config options, use::
+
+        tasks:
+        - ceph:
+            conf:
+              section:
+                key: value
+
+    For example::
+
+        tasks:
+        - ceph:
+            conf:
+              mds.0:
+                some option: value
+                other key: other value
+              client.0:
+                debug client: 10
+                debug ms: 1
+
+    By default, the cluster log is checked for errors and warnings,
+    and the run marked failed if any appear. You can ignore log
+    entries by giving a list of egrep compatible regexes, i.e.:
+
+        tasks:
+        - ceph:
+            log-whitelist: ['foo.*bar', 'bad message']
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        "task ceph only supports a dictionary for configuration"
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('ceph', {}))
+
+    ctx.daemons = CephState()
+
+    testdir = teuthology.get_testdir(ctx)
+    if config.get('coverage'):
+        coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+        log.info('Creating coverage directory...')
+        run.wait(
+            ctx.cluster.run(
+                args=[
+                    'install', '-d', '-m0755', '--',
+                    coverage_dir,
+                    ],
+                wait=False,
+                )
+            )
+
+    with contextutil.nested(
+        lambda: ceph_log(ctx=ctx, config=None),
+        lambda: valgrind_post(ctx=ctx, config=config),
+        lambda: cluster(ctx=ctx, config=dict(
+                conf=config.get('conf', {}),
+                fs=config.get('fs', None),
+                mkfs_options=config.get('mkfs_options', None),
+                mount_options=config.get('mount_options',None),
+                block_journal=config.get('block_journal', None),
+                tmpfs_journal=config.get('tmpfs_journal', None),
+                log_whitelist=config.get('log-whitelist', []),
+                cpu_profile=set(config.get('cpu_profile', [])),
+                )),
+        lambda: run_daemon(ctx=ctx, config=config, type_='mon'),
+        lambda: run_daemon(ctx=ctx, config=config, type_='osd'),
+        lambda: run_daemon(ctx=ctx, config=config, type_='mds'),
+        ):
+        try:
+            if config.get('wait-for-healthy', True):
+                healthy(ctx=ctx, config=None)
+            yield
+        finally:
+            osd_scrub_pgs(ctx, config)
diff --git a/tasks/ceph_client.py b/tasks/ceph_client.py
new file mode 100644 (file)
index 0000000..8935fc8
--- /dev/null
@@ -0,0 +1,40 @@
+"""
+Set up client keyring
+"""
+import logging
+
+from teuthology import misc as teuthology
+from ..orchestra import run
+
+log = logging.getLogger(__name__)
+
+def create_keyring(ctx):
+    """
+    Set up key ring on remote sites
+    """
+    log.info('Setting up client nodes...')
+    clients = ctx.cluster.only(teuthology.is_type('client'))
+    testdir = teuthology.get_testdir(ctx)
+    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+    for remote, roles_for_host in clients.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
+            client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
+            remote.run(
+                args=[
+                    'sudo',
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    coverage_dir,
+                    'ceph-authtool',
+                    '--create-keyring',
+                    '--gen-key',
+                    # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
+                    '--name=client.{id}'.format(id=id_),
+                    client_keyring,
+                    run.Raw('&&'),
+                    'sudo',
+                    'chmod',
+                    '0644',
+                    client_keyring,
+                    ],
+                )
diff --git a/tasks/ceph_deploy.py b/tasks/ceph_deploy.py
new file mode 100644 (file)
index 0000000..9964bab
--- /dev/null
@@ -0,0 +1,478 @@
+"""
+Execute ceph-deploy as a task
+"""
+from cStringIO import StringIO
+
+import contextlib
+import os
+import time
+import logging
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from ..config import config as teuth_config
+import install as install_fn
+from ..orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def download_ceph_deploy(ctx, config):
+    """
+    Downloads ceph-deploy from the ceph.com git mirror and (by default)
+    switches to the master branch. If the `ceph-deploy-branch` is specified, it
+    will use that instead.
+    """
+    log.info('Downloading ceph-deploy...')
+    testdir = teuthology.get_testdir(ctx)
+    ceph_admin = teuthology.get_first_mon(ctx, config)
+    default_cd_branch = {'ceph-deploy-branch': 'master'}
+    ceph_deploy_branch = config.get(
+        'ceph-deploy',
+        default_cd_branch).get('ceph-deploy-branch')
+
+    ctx.cluster.only(ceph_admin).run(
+        args=[
+            'git', 'clone', '-b', ceph_deploy_branch,
+            teuth_config.ceph_git_base_url + 'ceph-deploy.git',
+            '{tdir}/ceph-deploy'.format(tdir=testdir),
+            ],
+        )
+    ctx.cluster.only(ceph_admin).run(
+        args=[
+            'cd',
+            '{tdir}/ceph-deploy'.format(tdir=testdir),
+            run.Raw('&&'),
+            './bootstrap',
+            ],
+        )
+
+    try:
+        yield
+    finally:
+        log.info('Removing ceph-deploy ...')
+        ctx.cluster.only(ceph_admin).run(
+            args=[
+                'rm',
+                '-rf',
+                '{tdir}/ceph-deploy'.format(tdir=testdir),
+                ],
+            )
+
+
+def is_healthy(ctx, config):
+    """Wait until a Ceph cluster is healthy."""
+    testdir = teuthology.get_testdir(ctx)
+    ceph_admin = teuthology.get_first_mon(ctx, config)
+    (remote,) = ctx.cluster.only(ceph_admin).remotes.keys()
+    max_tries = 90  # 90 tries * 10 secs --> 15 minutes
+    tries = 0
+    while True:
+        tries += 1
+        if tries >= max_tries:
+            msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes"
+            raise RuntimeError(msg)
+
+        r = remote.run(
+            args=[
+                'cd',
+                '{tdir}'.format(tdir=testdir),
+                run.Raw('&&'),
+                'sudo', 'ceph',
+                'health',
+                ],
+            stdout=StringIO(),
+            logger=log.getChild('health'),
+            )
+        out = r.stdout.getvalue()
+        log.debug('Ceph health: %s', out.rstrip('\n'))
+        if out.split(None, 1)[0] == 'HEALTH_OK':
+            break
+        time.sleep(10)
+
+def get_nodes_using_roles(ctx, config, role):
+    """Extract the names of nodes that match a given role from a cluster"""
+    newl = []
+    for _remote, roles_for_host in ctx.cluster.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, role):
+            rem = _remote
+            if role == 'mon':
+                req1 = str(rem).split('@')[-1]
+            else:
+                req = str(rem).split('.')[0]
+                req1 = str(req).split('@')[1]
+            newl.append(req1)
+    return newl
+
+def get_dev_for_osd(ctx, config):
+    """Get a list of all osd device names."""
+    osd_devs = []
+    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
+        host = remote.name.split('@')[-1]
+        shortname = host.split('.')[0]
+        devs = teuthology.get_scratch_devices(remote)
+        num_osd_per_host = list(teuthology.roles_of_type(roles_for_host, 'osd'))
+        num_osds = len(num_osd_per_host)
+        assert num_osds <= len(devs), 'fewer disks than osds on ' + shortname
+        for dev in devs[:num_osds]:
+            dev_short = dev.split('/')[-1]
+            osd_devs.append('{host}:{dev}'.format(host=shortname, dev=dev_short))
+    return osd_devs
+
+def get_all_nodes(ctx, config):
+    """Return a string of node names separated by blanks"""
+    nodelist = []
+    for t, k in ctx.config['targets'].iteritems():
+        host = t.split('@')[-1]
+        simple_host = host.split('.')[0]
+        nodelist.append(simple_host)
+    nodelist = " ".join(nodelist)
+    return nodelist
+
+def execute_ceph_deploy(ctx, config, cmd):
+    """Remotely execute a ceph_deploy command"""
+    testdir = teuthology.get_testdir(ctx)
+    ceph_admin = teuthology.get_first_mon(ctx, config)
+    exec_cmd = cmd
+    (remote,) = ctx.cluster.only(ceph_admin).remotes.iterkeys()
+    proc = remote.run(
+        args = [
+            'cd',
+            '{tdir}/ceph-deploy'.format(tdir=testdir),
+            run.Raw('&&'),
+            run.Raw(exec_cmd),
+            ],
+            check_status=False,
+        )
+    exitstatus = proc.exitstatus
+    return exitstatus
+
+
+@contextlib.contextmanager
+def build_ceph_cluster(ctx, config):
+    """Build a ceph cluster"""
+
+    try:
+        log.info('Building ceph cluster using ceph-deploy...')
+        testdir = teuthology.get_testdir(ctx)
+        ceph_branch = None
+        if config.get('branch') is not None:
+            cbranch = config.get('branch')
+            for var, val in cbranch.iteritems():
+                if var == 'testing':
+                    ceph_branch = '--{var}'.format(var=var)
+                ceph_branch = '--{var}={val}'.format(var=var, val=val)
+        node_dev_list = []
+        all_nodes = get_all_nodes(ctx, config)
+        mds_nodes = get_nodes_using_roles(ctx, config, 'mds')
+        mds_nodes = " ".join(mds_nodes)
+        mon_node = get_nodes_using_roles(ctx, config, 'mon')
+        mon_nodes = " ".join(mon_node)
+        new_mon = './ceph-deploy new'+" "+mon_nodes
+        install_nodes = './ceph-deploy install '+ceph_branch+" "+all_nodes
+        purge_nodes = './ceph-deploy purge'+" "+all_nodes
+        purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes
+        mon_hostname = mon_nodes.split(' ')[0]
+        mon_hostname = str(mon_hostname)
+        gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname
+        deploy_mds = './ceph-deploy mds create'+" "+mds_nodes
+        no_of_osds = 0
+
+        if mon_nodes is None:
+            raise RuntimeError("no monitor nodes in the config file")
+
+        estatus_new = execute_ceph_deploy(ctx, config, new_mon)
+        if estatus_new != 0:
+            raise RuntimeError("ceph-deploy: new command failed")
+
+        log.info('adding config inputs...')
+        testdir = teuthology.get_testdir(ctx)
+        conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
+        first_mon = teuthology.get_first_mon(ctx, config)
+        (remote,) = ctx.cluster.only(first_mon).remotes.keys()
+
+        lines = None
+        if config.get('conf') is not None:
+            confp = config.get('conf')
+            for section, keys in confp.iteritems():
+                lines = '[{section}]\n'.format(section=section)
+                teuthology.append_lines_to_file(remote, conf_path, lines,
+                                                sudo=True)
+                for key, value in keys.iteritems():
+                    log.info("[%s] %s = %s" % (section, key, value))
+                    lines = '{key} = {value}\n'.format(key=key, value=value)
+                    teuthology.append_lines_to_file(remote, conf_path, lines,
+                                                    sudo=True)
+
+        estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
+        if estatus_install != 0:
+            raise RuntimeError("ceph-deploy: Failed to install ceph")
+
+        mon_no = None
+        mon_no = config.get('mon_initial_members')
+        if mon_no is not None:
+            i = 0
+            mon1 = []
+            while(i < mon_no):
+                mon1.append(mon_node[i])
+                i = i + 1
+            initial_mons = " ".join(mon1)
+            for k in range(mon_no, len(mon_node)):
+                mon_create_nodes = './ceph-deploy mon create' + " " + \
+                    initial_mons + " " + mon_node[k]
+                estatus_mon = execute_ceph_deploy(ctx, config,
+                                                  mon_create_nodes)
+                if estatus_mon != 0:
+                    raise RuntimeError("ceph-deploy: Failed to create monitor")
+        else:
+            mon_create_nodes = './ceph-deploy mon create-initial'
+            estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
+            if estatus_mon != 0:
+                raise RuntimeError("ceph-deploy: Failed to create monitors")
+
+        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
+        max_gather_tries = 90
+        gather_tries = 0
+        while (estatus_gather != 0):
+            gather_tries += 1
+            if gather_tries >= max_gather_tries:
+                msg = 'ceph-deploy was not able to gatherkeys after 15 minutes'
+                raise RuntimeError(msg)
+            estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
+            time.sleep(10)
+
+        if mds_nodes:
+            estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
+            if estatus_mds != 0:
+                raise RuntimeError("ceph-deploy: Failed to deploy mds")
+
+        if config.get('test_mon_destroy') is not None:
+            for d in range(1, len(mon_node)):
+                mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d]
+                estatus_mon_d = execute_ceph_deploy(ctx, config,
+                                                    mon_destroy_nodes)
+                if estatus_mon_d != 0:
+                    raise RuntimeError("ceph-deploy: Failed to delete monitor")
+
+        node_dev_list = get_dev_for_osd(ctx, config)
+        for d in node_dev_list:
+            osd_create_cmds = './ceph-deploy osd create --zap-disk'+" "+d
+            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
+            if estatus_osd == 0:
+                log.info('successfully created osd')
+                no_of_osds += 1
+            else:
+                zap_disk = './ceph-deploy disk zap'+" "+d
+                execute_ceph_deploy(ctx, config, zap_disk)
+                estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
+                if estatus_osd == 0:
+                    log.info('successfully created osd')
+                    no_of_osds += 1
+                else:
+                    raise RuntimeError("ceph-deploy: Failed to create osds")
+
+        if config.get('wait-for-healthy', True) and no_of_osds >= 2:
+            is_healthy(ctx=ctx, config=None)
+
+            log.info('Setting up client nodes...')
+            conf_path = '/etc/ceph/ceph.conf'
+            admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
+            first_mon = teuthology.get_first_mon(ctx, config)
+            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+            conf_data = teuthology.get_file(
+                remote=mon0_remote,
+                path=conf_path,
+                sudo=True,
+                )
+            admin_keyring = teuthology.get_file(
+                remote=mon0_remote,
+                path=admin_keyring_path,
+                sudo=True,
+                )
+
+            clients = ctx.cluster.only(teuthology.is_type('client'))
+            for remot, roles_for_host in clients.remotes.iteritems():
+                for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
+                    client_keyring = \
+                        '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
+                    mon0_remote.run(
+                        args=[
+                            'cd',
+                            '{tdir}'.format(tdir=testdir),
+                            run.Raw('&&'),
+                            'sudo', 'bash', '-c',
+                            run.Raw('"'), 'ceph',
+                            'auth',
+                            'get-or-create',
+                            'client.{id}'.format(id=id_),
+                            'mds', 'allow',
+                            'mon', 'allow *',
+                            'osd', 'allow *',
+                            run.Raw('>'),
+                            client_keyring,
+                            run.Raw('"'),
+                            ],
+                        )
+                    key_data = teuthology.get_file(
+                        remote=mon0_remote,
+                        path=client_keyring,
+                        sudo=True,
+                        )
+                    teuthology.sudo_write_file(
+                        remote=remot,
+                        path=client_keyring,
+                        data=key_data,
+                        perms='0644'
+                    )
+                    teuthology.sudo_write_file(
+                        remote=remot,
+                        path=admin_keyring_path,
+                        data=admin_keyring,
+                        perms='0644'
+                    )
+                    teuthology.sudo_write_file(
+                        remote=remot,
+                        path=conf_path,
+                        data=conf_data,
+                        perms='0644'
+                    )
+        else:
+            raise RuntimeError(
+                "The cluster is NOT operational due to insufficient OSDs")
+        yield
+
+    finally:
+        log.info('Stopping ceph...')
+        ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
+                              'sudo', 'service', 'ceph', 'stop' ])
+
+        # Are you really not running anymore?
+        # try first with the init tooling
+        ctx.cluster.run(args=['sudo', 'status', 'ceph-all', run.Raw('||'),
+                              'sudo', 'service',  'status', 'ceph-all'])
+
+        # and now just check for the processes themselves, as if upstart/sysvinit
+        # is lying to us. Ignore errors if the grep fails
+        ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
+                              'grep', '-v', 'grep', run.Raw('|'),
+                              'grep', 'ceph'], check_status=False)
+
+        if ctx.archive is not None:
+            # archive mon data, too
+            log.info('Archiving mon data...')
+            path = os.path.join(ctx.archive, 'data')
+            os.makedirs(path)
+            mons = ctx.cluster.only(teuthology.is_type('mon'))
+            for remote, roles in mons.remotes.iteritems():
+                for role in roles:
+                    if role.startswith('mon.'):
+                        teuthology.pull_directory_tarball(
+                            remote,
+                            '/var/lib/ceph/mon',
+                            path + '/' + role + '.tgz')
+
+            log.info('Compressing logs...')
+            run.wait(
+                ctx.cluster.run(
+                    args=[
+                        'sudo',
+                        'find',
+                        '/var/log/ceph',
+                        '-name',
+                        '*.log',
+                        '-print0',
+                        run.Raw('|'),
+                        'sudo',
+                        'xargs',
+                        '-0',
+                        '--no-run-if-empty',
+                        '--',
+                        'gzip',
+                        '--',
+                        ],
+                    wait=False,
+                    ),
+                )
+
+            log.info('Archiving logs...')
+            path = os.path.join(ctx.archive, 'remote')
+            os.makedirs(path)
+            for remote in ctx.cluster.remotes.iterkeys():
+                sub = os.path.join(path, remote.shortname)
+                os.makedirs(sub)
+                teuthology.pull_directory(remote, '/var/log/ceph',
+                                          os.path.join(sub, 'log'))
+
+        # Prevent these from being undefined if the try block fails
+        all_nodes = get_all_nodes(ctx, config)
+        purge_nodes = './ceph-deploy purge'+" "+all_nodes
+        purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes
+
+        log.info('Purging package...')
+        execute_ceph_deploy(ctx, config, purge_nodes)
+        log.info('Purging data...')
+        execute_ceph_deploy(ctx, config, purgedata_nodes)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Set up and tear down a Ceph cluster.
+
+    For example::
+
+        tasks:
+        - install:
+             extras: yes
+        - ssh_keys:
+        - ceph-deploy:
+             branch:
+                stable: bobtail
+             mon_initial_members: 1
+
+        tasks:
+        - install:
+             extras: yes
+        - ssh_keys:
+        - ceph-deploy:
+             branch:
+                dev: master
+             conf:
+                mon:
+                   debug mon = 20
+
+        tasks:
+        - install:
+             extras: yes
+        - ssh_keys:
+        - ceph-deploy:
+             branch:
+                testing:
+    """
+    if config is None:
+        config = {}
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
+
+    assert isinstance(config, dict), \
+        "task ceph-deploy only supports a dictionary for configuration"
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
+
+    if config.get('branch') is not None:
+        assert isinstance(config['branch'], dict), 'branch must be a dictionary'
+
+    with contextutil.nested(
+         lambda: install_fn.ship_utilities(ctx=ctx, config=None),
+         lambda: download_ceph_deploy(ctx=ctx, config=config),
+         lambda: build_ceph_cluster(ctx=ctx, config=dict(
+                 conf=config.get('conf', {}),
+                 branch=config.get('branch',{}),
+                 mon_initial_members=config.get('mon_initial_members', None),
+                 test_mon_destroy=config.get('test_mon_destroy', None),
+                 )),
+        ):
+        yield
diff --git a/tasks/ceph_fuse.py b/tasks/ceph_fuse.py
new file mode 100644 (file)
index 0000000..ef3998b
--- /dev/null
@@ -0,0 +1,207 @@
+"""
+Ceph FUSE client task
+"""
+import contextlib
+import logging
+import os
+import time
+from cStringIO import StringIO
+
+from teuthology import misc
+from ..orchestra import run
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Mount/unmount a ``ceph-fuse`` client.
+
+    The config is optional and defaults to mounting on all clients. If
+    a config is given, it is expected to be a list of clients to do
+    this operation on. This lets you e.g. set up one client with
+    ``ceph-fuse`` and another with ``kclient``.
+
+    Example that mounts all clients::
+
+        tasks:
+        - ceph:
+        - ceph-fuse:
+        - interactive:
+
+    Example that uses both ``kclient` and ``ceph-fuse``::
+
+        tasks:
+        - ceph:
+        - ceph-fuse: [client.0]
+        - kclient: [client.1]
+        - interactive:
+
+    Example that enables valgrind:
+
+        tasks:
+        - ceph:
+        - ceph-fuse:
+            client.0:
+              valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
+        - interactive:
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    log.info('Mounting ceph-fuse clients...')
+    fuse_daemons = {}
+
+    testdir = misc.get_testdir(ctx)
+
+    if config is None:
+        config = dict(('client.{id}'.format(id=id_), None)
+                  for id_ in misc.all_roles_of_type(ctx.cluster, 'client'))
+    elif isinstance(config, list):
+        config = dict((name, None) for name in config)
+
+    overrides = ctx.config.get('overrides', {})
+    misc.deep_merge(config, overrides.get('ceph-fuse', {}))
+
+    clients = list(misc.get_clients(ctx=ctx, roles=config.keys()))
+
+    for id_, remote in clients:
+        client_config = config.get("client.%s" % id_)
+        if client_config is None:
+            client_config = {}
+        log.info("Client client.%s config is %s" % (id_, client_config))
+
+        daemon_signal = 'kill'
+        if client_config.get('coverage') or client_config.get('valgrind') is not None:
+            daemon_signal = 'term'
+
+        mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+        log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format(
+                id=id_, remote=remote,mnt=mnt))
+
+        remote.run(
+            args=[
+                'mkdir',
+                '--',
+                mnt,
+                ],
+            )
+
+        run_cmd=[
+            'sudo',
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            'daemon-helper',
+            daemon_signal,
+            ]
+        run_cmd_tail=[
+            'ceph-fuse',
+            '-f',
+            '--name', 'client.{id}'.format(id=id_),
+            # TODO ceph-fuse doesn't understand dash dash '--',
+            mnt,
+            ]
+
+        if client_config.get('valgrind') is not None:
+            run_cmd = misc.get_valgrind_args(
+                testdir,
+                'client.{id}'.format(id=id_),
+                run_cmd,
+                client_config.get('valgrind'),
+                )
+
+        run_cmd.extend(run_cmd_tail)
+
+        proc = remote.run(
+            args=run_cmd,
+            logger=log.getChild('ceph-fuse.{id}'.format(id=id_)),
+            stdin=run.PIPE,
+            wait=False,
+            )
+        fuse_daemons[id_] = proc
+
+    for id_, remote in clients:
+        mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+        wait_until_fuse_mounted(
+            remote=remote,
+            fuse=fuse_daemons[id_],
+            mountpoint=mnt,
+            )
+        remote.run(args=['sudo', 'chmod', '1777', '{tdir}/mnt.{id}'.format(tdir=testdir, id=id_)],)
+
+    try:
+        yield
+    finally:
+        log.info('Unmounting ceph-fuse clients...')
+        for id_, remote in clients:
+            mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+            try:
+              remote.run(
+                  args=[
+                      'sudo',
+                      'fusermount',
+                      '-u',
+                      mnt,
+                      ],
+                  )
+            except run.CommandFailedError:
+              log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=remote.name))
+              # abort the fuse mount, killing all hung processes
+              remote.run(
+                  args=[
+                      'if', 'test', '-e', '/sys/fs/fuse/connections/*/abort',
+                      run.Raw(';'), 'then',
+                      'echo',
+                      '1',
+                      run.Raw('>'),
+                      run.Raw('/sys/fs/fuse/connections/*/abort'),
+                      run.Raw(';'), 'fi',
+                      ],
+                 )
+              # make sure its unmounted
+              remote.run(
+                  args=[
+                      'sudo',
+                      'umount',
+                      '-l',
+                      '-f',
+                      mnt,
+                      ],
+                  )
+
+        run.wait(fuse_daemons.itervalues())
+
+        for id_, remote in clients:
+            mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+            remote.run(
+                args=[
+                    'rmdir',
+                    '--',
+                    mnt,
+                    ],
+                )
+
+
+def wait_until_fuse_mounted(remote, fuse, mountpoint):
+    while True:
+        proc = remote.run(
+            args=[
+                'stat',
+                '--file-system',
+                '--printf=%T\n',
+                '--',
+                mountpoint,
+                ],
+            stdout=StringIO(),
+            )
+        fstype = proc.stdout.getvalue().rstrip('\n')
+        if fstype == 'fuseblk':
+            break
+        log.debug('ceph-fuse not yet mounted, got fs type {fstype!r}'.format(fstype=fstype))
+
+        # it shouldn't have exited yet; exposes some trivial problems
+        assert not fuse.exitstatus.ready()
+
+        time.sleep(5)
+    log.info('ceph-fuse is mounted on %s', mountpoint)
diff --git a/tasks/ceph_manager.py b/tasks/ceph_manager.py
new file mode 100644 (file)
index 0000000..39d2466
--- /dev/null
@@ -0,0 +1,1421 @@
+"""
+ceph manager -- Thrasher and CephManager objects
+"""
+from cStringIO import StringIO
+import random
+import time
+import gevent
+import json
+import threading
+from teuthology import misc as teuthology
+from teuthology.task import ceph as ceph_task
+from teuthology.task.scrub import Scrubber
+
+class Thrasher:
+    """
+    Object used to thrash Ceph
+    """
+    def __init__(self, manager, config, logger=None):
+        self.ceph_manager = manager
+        self.ceph_manager.wait_for_clean()
+        osd_status = self.ceph_manager.get_osd_status()
+        self.in_osds = osd_status['in']
+        self.live_osds = osd_status['live']
+        self.out_osds = osd_status['out']
+        self.dead_osds = osd_status['dead']
+        self.stopping = False
+        self.logger = logger
+        self.config = config
+        self.revive_timeout = self.config.get("revive_timeout", 75)
+        if self.config.get('powercycle'):
+            self.revive_timeout += 120
+        self.clean_wait = self.config.get('clean_wait', 0)
+        self.minin = self.config.get("min_in", 3)
+
+        num_osds = self.in_osds + self.out_osds
+        self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * num_osds
+        if self.logger is not None:
+            self.log = lambda x: self.logger.info(x)
+        else:
+            def tmp(x):
+                """
+                Implement log behavior
+                """
+                print x
+            self.log = tmp
+        if self.config is None:
+            self.config = dict()
+        # prevent monitor from auto-marking things out while thrasher runs
+        # try both old and new tell syntax, in case we are testing old code
+        try:
+            manager.raw_cluster_cmd('--', 'tell', 'mon.*', 'injectargs',
+                                    '--mon-osd-down-out-interval 0')
+        except Exception:
+            manager.raw_cluster_cmd('--', 'mon', 'tell', '*', 'injectargs',
+                                    '--mon-osd-down-out-interval 0')
+        self.thread = gevent.spawn(self.do_thrash)
+
+    def kill_osd(self, osd=None, mark_down=False, mark_out=False):
+        """
+        :param osd: Osd to be killed.
+        :mark_down: Mark down if true.
+        :mark_out: Mark out if true.
+        """
+        if osd is None:
+            osd = random.choice(self.live_osds)
+        self.log("Killing osd %s, live_osds are %s" % (str(osd), str(self.live_osds)))
+        self.live_osds.remove(osd)
+        self.dead_osds.append(osd)
+        self.ceph_manager.kill_osd(osd)
+        if mark_down:
+            self.ceph_manager.mark_down_osd(osd)
+        if mark_out and osd in self.in_osds:
+            self.out_osd(osd)
+
+    def blackhole_kill_osd(self, osd=None):
+        """
+        If all else fails, kill the osd.
+        :param osd: Osd to be killed.
+        """
+        if osd is None:
+            osd = random.choice(self.live_osds)
+        self.log("Blackholing and then killing osd %s, live_osds are %s" % (str(osd), str(self.live_osds)))
+        self.live_osds.remove(osd)
+        self.dead_osds.append(osd)
+        self.ceph_manager.blackhole_kill_osd(osd)
+
+    def revive_osd(self, osd=None):
+        """
+        Revive the osd.
+        :param osd: Osd to be revived.
+        """
+        if osd is None:
+            osd = random.choice(self.dead_osds)
+        self.log("Reviving osd %s" % (str(osd),))
+        self.live_osds.append(osd)
+        self.dead_osds.remove(osd)
+        self.ceph_manager.revive_osd(osd, self.revive_timeout)
+
+    def out_osd(self, osd=None):
+        """
+        Mark the osd out
+        :param osd: Osd to be marked.
+        """
+        if osd is None:
+            osd = random.choice(self.in_osds)
+        self.log("Removing osd %s, in_osds are: %s" % (str(osd), str(self.in_osds)))
+        self.ceph_manager.mark_out_osd(osd)
+        self.in_osds.remove(osd)
+        self.out_osds.append(osd)
+
+    def in_osd(self, osd=None):
+        """
+        Mark the osd out
+        :param osd: Osd to be marked.
+        """
+        if osd is None:
+            osd = random.choice(self.out_osds)
+        if osd in self.dead_osds:
+            return self.revive_osd(osd)
+        self.log("Adding osd %s" % (str(osd),))
+        self.out_osds.remove(osd)
+        self.in_osds.append(osd)
+        self.ceph_manager.mark_in_osd(osd)
+        self.log("Added osd %s"%(str(osd),))
+
+    def reweight_osd(self, osd=None):
+        """
+        Reweight an osd that is in
+        :param osd: Osd to be marked.
+        """
+        if osd is None:
+            osd = random.choice(self.in_osds)
+        val = random.uniform(.1, 1.0)
+        self.log("Reweighting osd %s to %s" % (str(osd), str(val)))
+        self.ceph_manager.raw_cluster_cmd('osd', 'reweight', str(osd), str(val))
+
+    def primary_affinity(self, osd=None):
+        if osd is None:
+            osd = random.choice(self.in_osds)
+        if random.random() >= .5:
+            pa = random.random()
+        elif random.random() >= .5:
+            pa = 1
+        else:
+            pa = 0
+        self.log('Setting osd %s primary_affinity to %f' % (str(osd), pa))
+        self.ceph_manager.raw_cluster_cmd('osd', 'primary-affinity', str(osd), str(pa))
+
+    def all_up(self):
+        """
+        Make sure all osds are up and not out.
+        """
+        while len(self.dead_osds) > 0:
+            self.log("reviving osd")
+            self.revive_osd()
+        while len(self.out_osds) > 0:
+            self.log("inning osd")
+            self.in_osd()
+
+    def do_join(self):
+        """
+        Break out of this Ceph loop
+        """
+        self.stopping = True
+        self.thread.get()
+
+    def grow_pool(self):
+        """
+        Increase the size of the pool
+        """
+        pool = self.ceph_manager.get_pool()
+        self.log("Growing pool %s"%(pool,))
+        self.ceph_manager.expand_pool(pool, self.config.get('pool_grow_by', 10), self.max_pgs)
+
+    def fix_pgp_num(self):
+        """
+        Fix number of pgs in pool.
+        """
+        pool = self.ceph_manager.get_pool()
+        self.log("fixing pg num pool %s"%(pool,))
+        self.ceph_manager.set_pool_pgpnum(pool)
+
+    def test_pool_min_size(self):
+        """
+        Kill and revive all osds except one.
+        """
+        self.log("test_pool_min_size")
+        self.all_up()
+        self.ceph_manager.wait_for_recovery(
+            timeout=self.config.get('timeout')
+            )
+        the_one = random.choice(self.in_osds)
+        self.log("Killing everyone but %s", the_one)
+        to_kill = filter(lambda x: x != the_one, self.in_osds)
+        [self.kill_osd(i) for i in to_kill]
+        [self.out_osd(i) for i in to_kill]
+        time.sleep(self.config.get("test_pool_min_size_time", 10))
+        self.log("Killing %s" % (the_one,))
+        self.kill_osd(the_one)
+        self.out_osd(the_one)
+        self.log("Reviving everyone but %s" % (the_one,))
+        [self.revive_osd(i) for i in to_kill]
+        [self.in_osd(i) for i in to_kill]
+        self.log("Revived everyone but %s" % (the_one,))
+        self.log("Waiting for clean")
+        self.ceph_manager.wait_for_recovery(
+            timeout=self.config.get('timeout')
+            )
+
+    def inject_pause(self, conf_key, duration, check_after, should_be_down):
+        """
+        Pause injection testing. Check for osd being down when finished.
+        """
+        the_one = random.choice(self.live_osds)
+        self.log("inject_pause on {osd}".format(osd = the_one))
+        self.log(
+            "Testing {key} pause injection for duration {duration}".format(
+                key = conf_key,
+                duration = duration
+                ))
+        self.log(
+            "Checking after {after}, should_be_down={shouldbedown}".format(
+                after = check_after,
+                shouldbedown = should_be_down
+                ))
+        self.ceph_manager.set_config(the_one, **{conf_key:duration})
+        if not should_be_down:
+            return
+        time.sleep(check_after)
+        status = self.ceph_manager.get_osd_status()
+        assert the_one in status['down']
+        time.sleep(duration - check_after + 20)
+        status = self.ceph_manager.get_osd_status()
+        assert not the_one in status['down']
+
+    def test_backfill_full(self):
+        """
+        Test backfills stopping when the replica fills up.
+
+        First, use osd_backfill_full_ratio to simulate a now full
+        osd by setting it to 0 on all of the OSDs.
+
+        Second, on a random subset, set
+        osd_debug_skip_full_check_in_backfill_reservation to force
+        the more complicated check in do_scan to be exercised.
+
+        Then, verify that all backfills stop.
+        """
+        self.log("injecting osd_backfill_full_ratio = 0")
+        for i in self.live_osds:
+            self.ceph_manager.set_config(
+                i,
+                osd_debug_skip_full_check_in_backfill_reservation = random.choice(
+                    ['false', 'true']),
+                osd_backfill_full_ratio = 0)
+        for i in range(30):
+            status = self.ceph_manager.compile_pg_status()
+            if 'backfill' not in status.keys():
+                break
+            self.log(
+                "waiting for {still_going} backfills".format(
+                    still_going=status.get('backfill')))
+            time.sleep(1)
+        assert('backfill' not in self.ceph_manager.compile_pg_status().keys())
+        for i in self.live_osds:
+            self.ceph_manager.set_config(
+                i,
+                osd_debug_skip_full_check_in_backfill_reservation = \
+                    'false',
+                osd_backfill_full_ratio = 0.85)
+
+    def test_map_discontinuity(self):
+        """
+        1) Allows the osds to recover
+        2) kills an osd
+        3) allows the remaining osds to recover
+        4) waits for some time
+        5) revives the osd
+        This sequence should cause the revived osd to have to handle
+        a map gap since the mons would have trimmed
+        """
+        while len(self.in_osds) < (self.minin + 1):
+            self.in_osd()
+        self.log("Waiting for recovery")
+        self.ceph_manager.wait_for_all_up(
+            timeout=self.config.get('timeout')
+            )
+        # now we wait 20s for the pg status to change, if it takes longer,
+        # the test *should* fail!
+        time.sleep(20)
+        self.ceph_manager.wait_for_clean(
+            timeout=self.config.get('timeout')
+            )
+
+        # now we wait 20s for the backfill replicas to hear about the clean
+        time.sleep(20)
+        self.log("Recovered, killing an osd")
+        self.kill_osd(mark_down=True, mark_out=True)
+        self.log("Waiting for clean again")
+        self.ceph_manager.wait_for_clean(
+            timeout=self.config.get('timeout')
+            )
+        self.log("Waiting for trim")
+        time.sleep(int(self.config.get("map_discontinuity_sleep_time", 40)))
+        self.revive_osd()
+
+    def choose_action(self):
+        """
+        Random action selector.
+        """
+        chance_down = self.config.get('chance_down', 0.4)
+        chance_test_min_size = self.config.get('chance_test_min_size', 0)
+        chance_test_backfill_full = self.config.get('chance_test_backfill_full', 0)
+        if isinstance(chance_down, int):
+            chance_down = float(chance_down) / 100
+        minin = self.minin
+        minout = self.config.get("min_out", 0)
+        minlive = self.config.get("min_live", 2)
+        mindead = self.config.get("min_dead", 0)
+
+        self.log('choose_action: min_in %d min_out %d min_live %d min_dead %d' %
+                 (minin, minout, minlive, mindead))
+        actions = []
+        if len(self.in_osds) > minin:
+            actions.append((self.out_osd, 1.0,))
+        if len(self.live_osds) > minlive and chance_down > 0:
+            actions.append((self.kill_osd, chance_down,))
+        if len(self.out_osds) > minout:
+            actions.append((self.in_osd, 1.7,))
+        if len(self.dead_osds) > mindead:
+            actions.append((self.revive_osd, 1.0,))
+        if self.config.get('thrash_primary_affinity', True):
+            actions.append((self.primary_affinity, 1.0,))
+        actions.append((self.reweight_osd, self.config.get('reweight_osd',.5),))
+        actions.append((self.grow_pool, self.config.get('chance_pgnum_grow', 0),))
+        actions.append((self.fix_pgp_num, self.config.get('chance_pgpnum_fix', 0),))
+        actions.append((self.test_pool_min_size, chance_test_min_size,))
+        actions.append((self.test_backfill_full, chance_test_backfill_full,))
+        for key in ['heartbeat_inject_failure', 'filestore_inject_stall']:
+            for scenario in [
+                (lambda: self.inject_pause(key,
+                                           self.config.get('pause_short', 3),
+                                           0,
+                                           False),
+                 self.config.get('chance_inject_pause_short', 1),),
+                (lambda: self.inject_pause(key,
+                                           self.config.get('pause_long', 80),
+                                           self.config.get('pause_check_after', 70),
+                                           True),
+                 self.config.get('chance_inject_pause_long', 0),)]:
+                actions.append(scenario)
+
+        total = sum([y for (x, y) in actions])
+        val = random.uniform(0, total)
+        for (action, prob) in actions:
+            if val < prob:
+                return action
+            val -= prob
+        return None
+
+    def do_thrash(self):
+        """
+        Loop to select random actions to thrash ceph manager with.
+        """
+        cleanint = self.config.get("clean_interval", 60)
+        scrubint = self.config.get("scrub_interval", -1)
+        maxdead = self.config.get("max_dead", 0)
+        delay = self.config.get("op_delay", 5)
+        self.log("starting do_thrash")
+        while not self.stopping:
+            self.log(" ".join([str(x) for x in ["in_osds: ", self.in_osds, " out_osds: ", self.out_osds,
+                                                "dead_osds: ", self.dead_osds, "live_osds: ",
+                                                self.live_osds]]))
+            if random.uniform(0, 1) < (float(delay) / cleanint):
+                while len(self.dead_osds) > maxdead:
+                    self.revive_osd()
+                for osd in self.in_osds:
+                    self.ceph_manager.raw_cluster_cmd('osd', 'reweight',
+                                                      str(osd), str(1))
+                if random.uniform(0, 1) < float(
+                    self.config.get('chance_test_map_discontinuity', 0)):
+                    self.test_map_discontinuity()
+                else:
+                    self.ceph_manager.wait_for_recovery(
+                        timeout=self.config.get('timeout')
+                        )
+                time.sleep(self.clean_wait)
+                if scrubint > 0:
+                    if random.uniform(0, 1) < (float(delay) / scrubint):
+                        self.log('Scrubbing while thrashing being performed')
+                        Scrubber(self.ceph_manager, self.config)
+            self.choose_action()()
+            time.sleep(delay)
+        self.all_up()
+
+class CephManager:
+    """
+    Ceph manager object.
+    Contains several local functions that form a bulk of this module.
+    """
+    def __init__(self, controller, ctx=None, config=None, logger=None):
+        self.lock = threading.RLock()
+        self.ctx = ctx
+        self.config = config
+        self.controller = controller
+        self.next_pool_id = 0
+        self.created_erasure_pool = False
+        if (logger):
+            self.log = lambda x: logger.info(x)
+        else:
+            def tmp(x):
+                """
+                implement log behavior.
+                """
+                print x
+            self.log = tmp
+        if self.config is None:
+            self.config = dict()
+        pools = self.list_pools()
+        self.pools = {}
+        for pool in pools:
+            self.pools[pool] = self.get_pool_property(pool, 'pg_num')
+
+    def raw_cluster_cmd(self, *args):
+        """
+        Start ceph on a raw cluster.  Return count
+        """
+        testdir = teuthology.get_testdir(self.ctx)
+        ceph_args = [
+                'adjust-ulimits',
+                'ceph-coverage',
+                '{tdir}/archive/coverage'.format(tdir=testdir),
+                'ceph',
+                ]
+        ceph_args.extend(args)
+        proc = self.controller.run(
+            args=ceph_args,
+            stdout=StringIO(),
+            )
+        return proc.stdout.getvalue()
+
+    def raw_cluster_cmd_result(self, *args):
+        """
+        Start ceph on a cluster.  Return success or failure information.
+        """
+        testdir = teuthology.get_testdir(self.ctx)
+        ceph_args = [
+                'adjust-ulimits',
+                'ceph-coverage',
+                '{tdir}/archive/coverage'.format(tdir=testdir),
+                'ceph',
+                ]
+        ceph_args.extend(args)
+        proc = self.controller.run(
+            args=ceph_args,
+            check_status=False,
+            )
+        return proc.exitstatus
+
+    def do_rados(self, remote, cmd):
+        """
+        Execute a remote rados command.
+        """
+        testdir = teuthology.get_testdir(self.ctx)
+        pre = [
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            'rados',
+            ]
+        pre.extend(cmd)
+        proc = remote.run(
+            args=pre,
+            wait=True,
+            )
+        return proc
+
+    def rados_write_objects(
+        self, pool, num_objects, size, timelimit, threads, cleanup=False):
+        """
+        Write rados objects
+        Threads not used yet.
+        """
+        args = [
+            '-p', pool,
+            '--num-objects', num_objects,
+            '-b', size,
+            'bench', timelimit,
+            'write'
+            ]
+        if not cleanup: args.append('--no-cleanup')
+        return self.do_rados(self.controller, map(str, args))
+
+    def do_put(self, pool, obj, fname):
+        """
+        Implement rados put operation
+        """
+        return self.do_rados(
+            self.controller,
+            [
+                '-p',
+                pool,
+                'put',
+                obj,
+                fname
+                ]
+            )
+
+    def do_get(self, pool, obj, fname='/dev/null'):
+        """
+        Implement rados get operation
+        """
+        return self.do_rados(
+            self.controller,
+            [
+                '-p',
+                pool,
+                'stat',
+                obj,
+                fname
+                ]
+            )
+
+    def osd_admin_socket(self, osdnum, command, check_status=True):
+        """
+        Remotely start up ceph specifying the admin socket
+        """
+        testdir = teuthology.get_testdir(self.ctx)
+        remote = None
+        for _remote, roles_for_host in self.ctx.cluster.remotes.iteritems():
+            for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
+                if int(id_) == int(osdnum):
+                    remote = _remote
+        assert remote is not None
+        args = [
+            'sudo',
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            'ceph',
+            '--admin-daemon',
+            '/var/run/ceph/ceph-osd.{id}.asok'.format(id=osdnum),
+            ]
+        args.extend(command)
+        return remote.run(
+            args=args,
+            stdout=StringIO(),
+            wait=True,
+            check_status=check_status
+            )
+
+    def get_pgid(self, pool, pgnum):
+        """
+        :param pool: pool number
+        :param pgnum: pg number
+        :returns: a string representing this pg.
+        """
+        poolnum = self.get_pool_num(pool)
+        pg_str = "{poolnum}.{pgnum}".format(
+            poolnum=poolnum,
+            pgnum=pgnum)
+        return pg_str
+
+    def get_pg_replica(self, pool, pgnum):
+        """
+        get replica for pool, pgnum (e.g. (data, 0)->0
+        """
+        output = self.raw_cluster_cmd("pg", "dump", '--format=json')
+        j = json.loads('\n'.join(output.split('\n')[1:]))
+        pg_str = self.get_pgid(pool, pgnum)
+        for pg in j['pg_stats']:
+            if pg['pgid'] == pg_str:
+                return int(pg['acting'][-1])
+        assert False
+
+    def get_pg_primary(self, pool, pgnum):
+        """
+        get primary for pool, pgnum (e.g. (data, 0)->0
+        """
+        output = self.raw_cluster_cmd("pg", "dump", '--format=json')
+        j = json.loads('\n'.join(output.split('\n')[1:]))
+        pg_str = self.get_pgid(pool, pgnum)
+        for pg in j['pg_stats']:
+            if pg['pgid'] == pg_str:
+                return int(pg['acting'][0])
+        assert False
+
+    def get_pool_num(self, pool):
+        """
+        get number for pool (e.g., data -> 2)
+        """
+        out = self.raw_cluster_cmd('osd', 'dump', '--format=json')
+        j = json.loads('\n'.join(out.split('\n')[1:]))
+        for i in j['pools']:
+            if i['pool_name'] == pool:
+                return int(i['pool'])
+        assert False
+
+    def list_pools(self):
+        """
+        list all pool names
+        """
+        out = self.raw_cluster_cmd('osd', 'dump', '--format=json')
+        j = json.loads('\n'.join(out.split('\n')[1:]))
+        self.log(j['pools'])
+        return [str(i['pool_name']) for i in j['pools']]
+
+    def clear_pools(self):
+        """
+        remove all pools
+        """
+        [self.remove_pool(i) for i in self.list_pools()]
+
+    def kick_recovery_wq(self, osdnum):
+        """
+        Run kick_recovery_wq on cluster.
+        """
+        return self.raw_cluster_cmd(
+            'tell', "osd.%d" % (int(osdnum),),
+            'debug',
+            'kick_recovery_wq',
+            '0')
+
+    def wait_run_admin_socket(self, osdnum, args=['version'], timeout=75):
+        """
+        If osd_admin_socket call suceeds, return.  Otherwise wait
+        five seconds and try again.
+        """
+        tries = 0
+        while True:
+            proc = self.osd_admin_socket(
+                osdnum, args,
+                check_status=False)
+            if proc.exitstatus is 0:
+                break
+            else:
+                tries += 1
+                if (tries * 5) > timeout:
+                    raise Exception('timed out waiting for admin_socket to appear after osd.{o} restart'.format(o=osdnum))
+                self.log(
+                    "waiting on admin_socket for {osdnum}, {command}".format(
+                        osdnum=osdnum,
+                        command=args))
+                time.sleep(5)
+
+    def set_config(self, osdnum, **argdict):
+        """
+        :param osdnum: osd number
+        :param argdict: dictionary containing values to set.
+        """
+        for k, v in argdict.iteritems():
+            self.wait_run_admin_socket(
+                osdnum,
+                ['config', 'set', str(k), str(v)])
+
+    def raw_cluster_status(self):
+        """
+        Get status from cluster
+        """
+        status = self.raw_cluster_cmd('status', '--format=json-pretty')
+        return json.loads(status)
+
+    def raw_osd_status(self):
+        """
+        Get osd status from cluster
+        """
+        return self.raw_cluster_cmd('osd', 'dump')
+
+    def get_osd_status(self):
+        """
+        Get osd statuses sorted by states that the osds are in.
+        """
+        osd_lines = filter(
+            lambda x: x.startswith('osd.') and (("up" in x) or ("down" in x)),
+            self.raw_osd_status().split('\n'))
+        self.log(osd_lines)
+        in_osds = [int(i[4:].split()[0]) for i in filter(
+                lambda x: " in " in x,
+                osd_lines)]
+        out_osds = [int(i[4:].split()[0]) for i in filter(
+                lambda x: " out " in x,
+                osd_lines)]
+        up_osds = [int(i[4:].split()[0]) for i in filter(
+                lambda x: " up " in x,
+                osd_lines)]
+        down_osds = [int(i[4:].split()[0]) for i in filter(
+                lambda x: " down " in x,
+                osd_lines)]
+        dead_osds = [int(x.id_) for x in
+                     filter(lambda x: not x.running(), self.ctx.daemons.iter_daemons_of_role('osd'))]
+        live_osds = [int(x.id_) for x in
+                     filter(lambda x: x.running(), self.ctx.daemons.iter_daemons_of_role('osd'))]
+        return { 'in' : in_osds, 'out' : out_osds, 'up' : up_osds,
+                 'down' : down_osds, 'dead' : dead_osds, 'live' : live_osds,
+                 'raw' : osd_lines}
+
+    def get_num_pgs(self):
+        """
+        Check cluster status for the number of pgs
+        """
+        status = self.raw_cluster_status()
+        self.log(status)
+        return status['pgmap']['num_pgs']
+
+    def create_pool_with_unique_name(self, pg_num=16, ec_pool=False, ec_m=1, ec_k=2):
+        """
+        Create a pool named unique_pool_X where X is unique.
+        """
+        name = ""
+        with self.lock:
+            name = "unique_pool_%s" % (str(self.next_pool_id),)
+            self.next_pool_id += 1
+            self.create_pool(
+                name,
+                pg_num,
+                ec_pool=ec_pool,
+                ec_m=ec_m,
+                ec_k=ec_k)
+        return name
+
+    def create_pool(self, pool_name, pg_num=16, ec_pool=False, ec_m=1, ec_k=2):
+        """
+        Create a pool named from the pool_name parameter.
+        :param pool_name: name of the pool being created.
+        :param pg_num: initial number of pgs.
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            assert isinstance(pg_num, int)
+            assert pool_name not in self.pools
+            self.log("creating pool_name %s"%(pool_name,))
+            if ec_pool and not self.created_erasure_pool:
+                self.created_erasure_pool = True
+                self.raw_cluster_cmd('osd', 'erasure-code-profile', 'set', 'teuthologyprofile', 'ruleset-failure-domain=osd', 'm='+str(ec_m), 'k='+str(ec_k))
+
+            if ec_pool:
+                self.raw_cluster_cmd('osd', 'pool', 'create', pool_name, str(pg_num), str(pg_num), 'erasure', 'teuthologyprofile')
+            else:
+                self.raw_cluster_cmd('osd', 'pool', 'create', pool_name, str(pg_num))
+            self.pools[pool_name] = pg_num
+
+    def remove_pool(self, pool_name):
+        """
+        Remove the indicated pool
+        :param pool_name: Pool to be removed
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            assert pool_name in self.pools
+            self.log("removing pool_name %s" % (pool_name,))
+            del self.pools[pool_name]
+            self.do_rados(
+                self.controller,
+                ['rmpool', pool_name, pool_name, "--yes-i-really-really-mean-it"]
+                )
+
+    def get_pool(self):
+        """
+        Pick a random pool
+        """
+        with self.lock:
+            return random.choice(self.pools.keys())
+
+    def get_pool_pg_num(self, pool_name):
+        """
+        Return the number of pgs in the pool specified.
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            if pool_name in self.pools:
+                return self.pools[pool_name]
+            return 0
+
+    def get_pool_property(self, pool_name, prop):
+        """
+        :param pool_name: pool
+        :param prop: property to be checked.
+        :returns: property as an int value.
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            assert isinstance(prop, str)
+            output = self.raw_cluster_cmd(
+                'osd',
+                'pool',
+                'get',
+                pool_name,
+                prop)
+            return int(output.split()[1])
+
+    def set_pool_property(self, pool_name, prop, val):
+        """
+        :param pool_name: pool
+        :param prop: property to be set.
+        :param val: value to set.
+
+        This routine retries if set operation fails.
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            assert isinstance(prop, str)
+            assert isinstance(val, int)
+            tries = 0
+            while True:
+                r = self.raw_cluster_cmd_result(
+                    'osd',
+                    'pool',
+                    'set',
+                    pool_name,
+                    prop,
+                    str(val))
+                if r != 11: # EAGAIN
+                    break
+                tries += 1
+                if tries > 50:
+                    raise Exception('timed out getting EAGAIN when setting pool property %s %s = %s' % (pool_name, prop, val))
+                self.log('got EAGAIN setting pool property, waiting a few seconds...')
+                time.sleep(2)
+
+    def expand_pool(self, pool_name, by, max_pgs):
+        """
+        Increase the number of pgs in a pool
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            assert isinstance(by, int)
+            assert pool_name in self.pools
+            if self.get_num_creating() > 0:
+                return
+            if (self.pools[pool_name] + by) > max_pgs:
+                return
+            self.log("increase pool size by %d"%(by,))
+            new_pg_num = self.pools[pool_name] + by
+            self.set_pool_property(pool_name, "pg_num", new_pg_num)
+            self.pools[pool_name] = new_pg_num
+
+    def set_pool_pgpnum(self, pool_name):
+        """
+        Set pgpnum property of pool_name pool.
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            assert pool_name in self.pools
+            if self.get_num_creating() > 0:
+                return
+            self.set_pool_property(pool_name, 'pgp_num', self.pools[pool_name])
+
+    def list_pg_missing(self, pgid):
+        """
+        return list of missing pgs with the id specified
+        """
+        r = None
+        offset = {}
+        while True:
+            out = self.raw_cluster_cmd('--', 'pg', pgid, 'list_missing',
+                                       json.dumps(offset))
+            j = json.loads(out)
+            if r is None:
+                r = j
+            else:
+                r['objects'].extend(j['objects'])
+            if not 'more' in j:
+                break
+            if j['more'] == 0:
+                break
+            offset = j['objects'][-1]['oid']
+        if 'more' in r:
+            del r['more']
+        return r
+
+    def get_pg_stats(self):
+        """
+        Dump the cluster and get pg stats
+        """
+        out = self.raw_cluster_cmd('pg', 'dump', '--format=json')
+        j = json.loads('\n'.join(out.split('\n')[1:]))
+        return j['pg_stats']
+
+    def compile_pg_status(self):
+        """
+        Return a histogram of pg state values
+        """
+        ret = {}
+        j = self.get_pg_stats()
+        for pg in j:
+            for status in pg['state'].split('+'):
+                if status not in ret:
+                    ret[status] = 0
+                ret[status] += 1
+        return ret
+
+    def pg_scrubbing(self, pool, pgnum):
+        """
+        pg scrubbing wrapper
+        """
+        pgstr = self.get_pgid(pool, pgnum)
+        stats = self.get_single_pg_stats(pgstr)
+        return 'scrub' in stats['state']
+
+    def pg_repairing(self, pool, pgnum):
+        """
+        pg repairing wrapper
+        """
+        pgstr = self.get_pgid(pool, pgnum)
+        stats = self.get_single_pg_stats(pgstr)
+        return 'repair' in stats['state']
+
+    def pg_inconsistent(self, pool, pgnum):
+        """
+        pg inconsistent wrapper
+        """
+        pgstr = self.get_pgid(pool, pgnum)
+        stats = self.get_single_pg_stats(pgstr)
+        return 'inconsistent' in stats['state']
+
+    def get_last_scrub_stamp(self, pool, pgnum):
+        """
+        Get the timestamp of the last scrub.
+        """
+        stats = self.get_single_pg_stats(self.get_pgid(pool, pgnum))
+        return stats["last_scrub_stamp"]
+
+    def do_pg_scrub(self, pool, pgnum, stype):
+        """
+        Scrub pg and wait for scrubbing to finish
+        """
+        init = self.get_last_scrub_stamp(pool, pgnum)
+        self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum))
+        while init == self.get_last_scrub_stamp(pool, pgnum):
+            self.log("waiting for scrub type %s"%(stype,))
+            time.sleep(10)
+
+    def get_single_pg_stats(self, pgid):
+        """
+        Return pg for the pgid specified.
+        """
+        all_stats = self.get_pg_stats()
+
+        for pg in all_stats:
+            if pg['pgid'] == pgid:
+                return pg
+
+        return None
+
+    def get_osd_dump(self):
+        """
+        Dump osds
+        :returns: all osds
+        """
+        out = self.raw_cluster_cmd('osd', 'dump', '--format=json')
+        j = json.loads('\n'.join(out.split('\n')[1:]))
+        return j['osds']
+
+    def get_stuck_pgs(self, type_, threshold):
+        """
+        :returns: stuck pg information from the cluster
+        """
+        out = self.raw_cluster_cmd('pg', 'dump_stuck', type_, str(threshold),
+                                   '--format=json')
+        return json.loads(out)
+
+    def get_num_unfound_objects(self):
+        """
+        Check cluster status to get the number of unfound objects
+        """
+        status = self.raw_cluster_status()
+        self.log(status)
+        return status['pgmap'].get('unfound_objects', 0)
+
+    def get_num_creating(self):
+        """
+        Find the number of pgs in creating mode.
+        """
+        pgs = self.get_pg_stats()
+        num = 0
+        for pg in pgs:
+            if 'creating' in pg['state']:
+                num += 1
+        return num
+
+    def get_num_active_clean(self):
+        """
+        Find the number of active and clean pgs.
+        """
+        pgs = self.get_pg_stats()
+        num = 0
+        for pg in pgs:
+            if pg['state'].count('active') and pg['state'].count('clean') and not pg['state'].count('stale'):
+                num += 1
+        return num
+
+    def get_num_active_recovered(self):
+        """
+        Find the number of active and recovered pgs.
+        """
+        pgs = self.get_pg_stats()
+        num = 0
+        for pg in pgs:
+            if pg['state'].count('active') and not pg['state'].count('recover') and not pg['state'].count('backfill') and not pg['state'].count('stale'):
+                num += 1
+        return num
+
+    def get_is_making_recovery_progress(self):
+        """
+        Return whether there is recovery progress discernable in the
+        raw cluster status
+        """
+        status = self.raw_cluster_status()
+        kps = status['pgmap'].get('recovering_keys_per_sec', 0)
+        bps = status['pgmap'].get('recovering_bytes_per_sec', 0)
+        ops = status['pgmap'].get('recovering_objects_per_sec', 0)
+        return kps > 0 or bps > 0 or ops > 0
+
+    def get_num_active(self):
+        """
+        Find the number of active pgs.
+        """
+        pgs = self.get_pg_stats()
+        num = 0
+        for pg in pgs:
+            if pg['state'].count('active') and not pg['state'].count('stale'):
+                num += 1
+        return num
+
+    def get_num_down(self):
+        """
+        Find the number of pgs that are down.
+        """
+        pgs = self.get_pg_stats()
+        num = 0
+        for pg in pgs:
+            if (pg['state'].count('down') and not pg['state'].count('stale')) or \
+                    (pg['state'].count('incomplete') and not pg['state'].count('stale')):
+                num += 1
+        return num
+
+    def get_num_active_down(self):
+        """
+        Find the number of pgs that are either active or down.
+        """
+        pgs = self.get_pg_stats()
+        num = 0
+        for pg in pgs:
+            if (pg['state'].count('active') and not pg['state'].count('stale')) or \
+                    (pg['state'].count('down') and not pg['state'].count('stale')) or \
+                    (pg['state'].count('incomplete') and not pg['state'].count('stale')):
+                num += 1
+        return num
+
+    def is_clean(self):
+        """
+        True if all pgs are clean
+        """
+        return self.get_num_active_clean() == self.get_num_pgs()
+
+    def is_recovered(self):
+        """
+        True if all pgs have recovered
+        """
+        return self.get_num_active_recovered() == self.get_num_pgs()
+
+    def is_active_or_down(self):
+        """
+        True if all pgs are active or down
+        """
+        return self.get_num_active_down() == self.get_num_pgs()
+
+    def wait_for_clean(self, timeout=None):
+        """
+        Returns trues when all pgs are clean.
+        """
+        self.log("waiting for clean")
+        start = time.time()
+        num_active_clean = self.get_num_active_clean()
+        while not self.is_clean():
+            if timeout is not None:
+                if self.get_is_making_recovery_progress():
+                    self.log("making progress, resetting timeout")
+                    start = time.time()
+                else:
+                    self.log("no progress seen, keeping timeout for now")
+                    assert time.time() - start < timeout, \
+                        'failed to become clean before timeout expired'
+            cur_active_clean = self.get_num_active_clean()
+            if cur_active_clean != num_active_clean:
+                start = time.time()
+                num_active_clean = cur_active_clean
+            time.sleep(3)
+        self.log("clean!")
+
+    def are_all_osds_up(self):
+        """
+        Returns true if all osds are up.
+        """
+        x = self.get_osd_dump()
+        return (len(x) == \
+                    sum([(y['up'] > 0) for y in x]))
+
+    def wait_for_all_up(self, timeout=None):
+        """
+        When this exits, either the timeout has expired, or all
+        osds are up.
+        """
+        self.log("waiting for all up")
+        start = time.time()
+        while not self.are_all_osds_up():
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'timeout expired in wait_for_all_up'
+            time.sleep(3)
+        self.log("all up!")
+
+    def wait_for_recovery(self, timeout=None):
+        """
+        Check peering. When this exists, we have recovered.
+        """
+        self.log("waiting for recovery to complete")
+        start = time.time()
+        num_active_recovered = self.get_num_active_recovered()
+        while not self.is_recovered():
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'failed to recover before timeout expired'
+            cur_active_recovered = self.get_num_active_recovered()
+            if cur_active_recovered != num_active_recovered:
+                start = time.time()
+                num_active_recovered = cur_active_recovered
+            time.sleep(3)
+        self.log("recovered!")
+
+    def wait_for_active(self, timeout=None):
+        """
+        Check peering. When this exists, we are definitely active
+        """
+        self.log("waiting for peering to complete")
+        start = time.time()
+        num_active = self.get_num_active()
+        while not self.is_active():
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'failed to recover before timeout expired'
+            cur_active = self.get_num_active()
+            if cur_active != num_active:
+                start = time.time()
+                num_active = cur_active
+            time.sleep(3)
+        self.log("active!")
+
+    def wait_for_active_or_down(self, timeout=None):
+        """
+        Check peering. When this exists, we are definitely either
+        active or down
+        """
+        self.log("waiting for peering to complete or become blocked")
+        start = time.time()
+        num_active_down = self.get_num_active_down()
+        while not self.is_active_or_down():
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'failed to recover before timeout expired'
+            cur_active_down = self.get_num_active_down()
+            if cur_active_down != num_active_down:
+                start = time.time()
+                num_active_down = cur_active_down
+            time.sleep(3)
+        self.log("active or down!")
+
+    def osd_is_up(self, osd):
+        """
+        Wrapper for osd check
+        """
+        osds = self.get_osd_dump()
+        return osds[osd]['up'] > 0
+
+    def wait_till_osd_is_up(self, osd, timeout=None):
+        """
+        Loop waiting for osd.
+        """
+        self.log('waiting for osd.%d to be up' % osd)
+        start = time.time()
+        while not self.osd_is_up(osd):
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'osd.%d failed to come up before timeout expired' % osd
+            time.sleep(3)
+        self.log('osd.%d is up' % osd)
+
+    def is_active(self):
+        """
+        Wrapper to check if active
+        """
+        return self.get_num_active() == self.get_num_pgs()
+
+    def wait_till_active(self, timeout=None):
+        """
+        Wait until osds are active.
+        """
+        self.log("waiting till active")
+        start = time.time()
+        while not self.is_active():
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'failed to become active before timeout expired'
+            time.sleep(3)
+        self.log("active!")
+
+    def mark_out_osd(self, osd):
+        """
+        Wrapper to mark osd out.
+        """
+        self.raw_cluster_cmd('osd', 'out', str(osd))
+
+    def kill_osd(self, osd):
+        """
+        Kill osds by either power cycling (if indicated by the config)
+        or by stopping.
+        """
+        if self.config.get('powercycle'):
+            (remote,) = self.ctx.cluster.only('osd.{o}'.format(o=osd)).remotes.iterkeys()
+            self.log('kill_osd on osd.{o} doing powercycle of {s}'.format(o=osd, s=remote.name))
+            assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized.  Check ipmi config."
+            remote.console.power_off()
+        else:
+            self.ctx.daemons.get_daemon('osd', osd).stop()
+
+    def blackhole_kill_osd(self, osd):
+        """
+        Stop osd if nothing else works.
+        """
+        self.raw_cluster_cmd('--', 'tell', 'osd.%d' % osd,
+                             'injectargs', '--filestore-blackhole')
+        time.sleep(2)
+        self.ctx.daemons.get_daemon('osd', osd).stop()
+
+    def revive_osd(self, osd, timeout=150):
+        """
+        Revive osds by either power cycling (if indicated by the config)
+        or by restarting.
+        """
+        if self.config.get('powercycle'):
+            (remote,) = self.ctx.cluster.only('osd.{o}'.format(o=osd)).remotes.iterkeys()
+            self.log('kill_osd on osd.{o} doing powercycle of {s}'.format(o=osd, s=remote.name))
+            assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized.  Check ipmi config."
+            remote.console.power_on()
+            if not remote.console.check_status(300):
+                raise Exception('Failed to revive osd.{o} via ipmi'.format(o=osd))
+            teuthology.reconnect(self.ctx, 60, [remote])
+            ceph_task.mount_osd_data(self.ctx, remote, str(osd))
+            ceph_task.make_admin_daemon_dir(self.ctx, remote)
+            self.ctx.daemons.get_daemon('osd', osd).reset()
+        self.ctx.daemons.get_daemon('osd', osd).restart()
+        # wait for dump_ops_in_flight; this command doesn't appear
+        # until after the signal handler is installed and it is safe
+        # to stop the osd again without making valgrind leak checks
+        # unhappy.  see #5924.
+        self.wait_run_admin_socket(osd,
+                                   args=['dump_ops_in_flight'],
+                                   timeout=timeout)
+
+    def mark_down_osd(self, osd):
+        """
+        Cluster command wrapper
+        """
+        self.raw_cluster_cmd('osd', 'down', str(osd))
+
+    def mark_in_osd(self, osd):
+        """
+        Cluster command wrapper
+        """
+        self.raw_cluster_cmd('osd', 'in', str(osd))
+
+
+    ## monitors
+
+    def signal_mon(self, mon, sig):
+        """
+        Wrapper to local get_deamon call
+        """
+        self.ctx.daemons.get_daemon('mon', mon).signal(sig)
+
+    def kill_mon(self, mon):
+        """
+        Kill the monitor by either power cycling (if the config says so),
+        or by doing a stop.
+        """
+        if self.config.get('powercycle'):
+            (remote,) = self.ctx.cluster.only('mon.{m}'.format(m=mon)).remotes.iterkeys()
+            self.log('kill_mon on mon.{m} doing powercycle of {s}'.format(m=mon, s=remote.name))
+            assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized.  Check ipmi config."
+            remote.console.power_off()
+        else:
+            self.ctx.daemons.get_daemon('mon', mon).stop()
+
+    def revive_mon(self, mon):
+        """
+        Restart by either power cycling (if the config says so),
+        or by doing a normal restart.
+        """
+        if self.config.get('powercycle'):
+            (remote,) = self.ctx.cluster.only('mon.{m}'.format(m=mon)).remotes.iterkeys()
+            self.log('revive_mon on mon.{m} doing powercycle of {s}'.format(m=mon, s=remote.name))
+            assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized.  Check ipmi config."
+            remote.console.power_on()
+            ceph_task.make_admin_daemon_dir(self.ctx, remote)
+        self.ctx.daemons.get_daemon('mon', mon).restart()
+
+    def get_mon_status(self, mon):
+        """
+        Extract all the monitor status information from the cluster
+        """
+        addr = self.ctx.ceph.conf['mon.%s' % mon]['mon addr']
+        out = self.raw_cluster_cmd('-m', addr, 'mon_status')
+        return json.loads(out)
+
+    def get_mon_quorum(self):
+        """
+        Extract monitor quorum information from the cluster
+        """
+        out = self.raw_cluster_cmd('quorum_status')
+        j = json.loads(out)
+        self.log('quorum_status is %s' % out)
+        return j['quorum']
+
+    def wait_for_mon_quorum_size(self, size, timeout=300):
+        """
+        Loop until quorum size is reached.
+        """
+        self.log('waiting for quorum size %d' % size)
+        start = time.time()
+        while not len(self.get_mon_quorum()) == size:
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'failed to reach quorum size %d before timeout expired' % size
+            time.sleep(3)
+        self.log("quorum is size %d" % size)
+
+    def get_mon_health(self, debug=False):
+        """
+        Extract all the monitor health information.
+        """
+        out = self.raw_cluster_cmd('health', '--format=json')
+        if debug:
+            self.log('health:\n{h}'.format(h=out))
+        return json.loads(out)
+
+    ## metadata servers
+
+    def kill_mds(self, mds):
+        """
+        Powercyle if set in config, otherwise just stop.
+        """
+        if self.config.get('powercycle'):
+            (remote,) = self.ctx.cluster.only('mds.{m}'.format(m=mds)).remotes.iterkeys()
+            self.log('kill_mds on mds.{m} doing powercycle of {s}'.format(m=mds, s=remote.name))
+            assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized.  Check ipmi config."
+            remote.console.power_off()
+        else:
+            self.ctx.daemons.get_daemon('mds', mds).stop()
+
+    def kill_mds_by_rank(self, rank):
+        """
+        kill_mds wrapper to kill based on rank passed.
+        """
+        status = self.get_mds_status_by_rank(rank)
+        self.kill_mds(status['name'])
+
+    def revive_mds(self, mds, standby_for_rank=None):
+        """
+        Revive mds -- do an ipmpi powercycle (if indicated by the config)
+        and then restart (using --hot-standby if specified.
+        """
+        if self.config.get('powercycle'):
+            (remote,) = self.ctx.cluster.only('mds.{m}'.format(m=mds)).remotes.iterkeys()
+            self.log('revive_mds on mds.{m} doing powercycle of {s}'.format(m=mds, s=remote.name))
+            assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized.  Check ipmi config."
+            remote.console.power_on()
+            ceph_task.make_admin_daemon_dir(self.ctx, remote)
+        args = []
+        if standby_for_rank:
+            args.extend(['--hot-standby', standby_for_rank])
+        self.ctx.daemons.get_daemon('mds', mds).restart(*args)
+
+    def revive_mds_by_rank(self, rank, standby_for_rank=None):
+        """
+        revive_mds wrapper to revive based on rank passed.
+        """
+        status = self.get_mds_status_by_rank(rank)
+        self.revive_mds(status['name'], standby_for_rank)
+
+    def get_mds_status(self, mds):
+        """
+        Run cluster commands for the mds in order to get mds information
+        """
+        out = self.raw_cluster_cmd('mds', 'dump', '--format=json')
+        j = json.loads(' '.join(out.splitlines()[1:]))
+        # collate; for dup ids, larger gid wins.
+        for info in j['info'].itervalues():
+            if info['name'] == mds:
+                return info
+        return None
+
+    def get_mds_status_by_rank(self, rank):
+        """
+        Run cluster commands for the mds in order to get mds information
+        check rank.
+        """
+        out = self.raw_cluster_cmd('mds', 'dump', '--format=json')
+        j = json.loads(' '.join(out.splitlines()[1:]))
+        # collate; for dup ids, larger gid wins.
+        for info in j['info'].itervalues():
+            if info['rank'] == rank:
+                return info
+        return None
+
+    def get_mds_status_all(self):
+        """
+        Run cluster command to extract all the mds status.
+        """
+        out = self.raw_cluster_cmd('mds', 'dump', '--format=json')
+        j = json.loads(' '.join(out.splitlines()[1:]))
+        return j
diff --git a/tasks/chef.py b/tasks/chef.py
new file mode 100644 (file)
index 0000000..db793c3
--- /dev/null
@@ -0,0 +1,35 @@
+"""
+Chef-solo task
+"""
+import logging
+
+from ..orchestra import run
+from .. import misc
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Run chef-solo on all nodes.
+    """
+    log.info('Running chef-solo...')
+
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'wget',
+#                '-q',
+                '-O-',
+#                'https://raw.github.com/ceph/ceph-qa-chef/master/solo/solo-from-scratch',
+                'http://ceph.com/git/?p=ceph-qa-chef.git;a=blob_plain;f=solo/solo-from-scratch;hb=HEAD',
+                run.Raw('|'),
+                'sh',
+                '-x',
+                ],
+            wait=False,
+            )
+        )
+
+    log.info('Reconnecting after ceph-qa-chef run')
+    misc.reconnect(ctx, 10)     #Reconnect for ulimit and other ceph-qa-chef changes
+
diff --git a/tasks/cifs_mount.py b/tasks/cifs_mount.py
new file mode 100644 (file)
index 0000000..ac58f31
--- /dev/null
@@ -0,0 +1,137 @@
+"""
+Mount cifs clients.  Unmount when finished.
+"""
+import contextlib
+import logging
+import os
+
+from teuthology import misc as teuthology
+from ..orchestra import run
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Mount/unmount a cifs client.
+
+    The config is optional and defaults to mounting on all clients. If
+    a config is given, it is expected to be a list of clients to do
+    this operation on.
+
+    Example that starts smbd and mounts cifs on all nodes::
+
+        tasks:
+        - ceph:
+        - samba:
+        - cifs-mount:
+        - interactive:
+
+    Example that splits smbd and cifs:
+
+        tasks:
+        - ceph:
+        - samba: [samba.0]
+        - cifs-mount: [client.0]
+        - ceph-fuse: [client.1]
+        - interactive:
+
+    Example that specifies the share name:
+
+        tasks:
+        - ceph:
+        - ceph-fuse:
+        - samba:
+            samba.0:
+                cephfuse: "{testdir}/mnt.0"
+        - cifs-mount:
+            client.0:
+                share: cephfuse
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    log.info('Mounting cifs clients...')
+
+    if config is None:
+        config = dict(('client.{id}'.format(id=id_), None)
+                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client'))
+    elif isinstance(config, list):
+        config = dict((name, None) for name in config)
+
+    clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys()))
+
+    from teuthology.task.samba import get_sambas
+    samba_roles = ['samba.{id_}'.format(id_=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')]
+    sambas = list(get_sambas(ctx=ctx, roles=samba_roles))
+    (ip, _) = sambas[0][1].ssh.get_transport().getpeername()
+    log.info('samba ip: {ip}'.format(ip=ip))
+
+    for id_, remote in clients:
+        mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_))
+        log.info('Mounting cifs client.{id} at {remote} {mnt}...'.format(
+                id=id_, remote=remote,mnt=mnt))
+
+        remote.run(
+            args=[
+                'mkdir',
+                '--',
+                mnt,
+                ],
+            )
+
+        rolestr = 'client.{id_}'.format(id_=id_)
+        unc = "ceph"
+        log.info("config: {c}".format(c=config))
+        if config[rolestr] is not None and 'share' in config[rolestr]:
+            unc = config[rolestr]['share']
+
+        remote.run(
+            args=[
+                'sudo',
+                'mount',
+                '-t',
+                'cifs',
+                '//{sambaip}/{unc}'.format(sambaip=ip, unc=unc),
+                '-o',
+                'username=ubuntu,password=ubuntu',
+                mnt,
+                ],
+            )
+
+        remote.run(
+            args=[
+                'sudo',
+                'chown',
+                'ubuntu:ubuntu',
+                '{m}/'.format(m=mnt),
+                ],
+            )
+
+    try:
+        yield
+    finally:
+        log.info('Unmounting cifs clients...')
+        for id_, remote in clients:
+            remote.run(
+                args=[
+                    'sudo',
+                    'umount',
+                    mnt,
+                    ],
+                )
+        for id_, remote in clients:
+            while True:
+                try:
+                    remote.run(
+                        args=[
+                            'rmdir', '--', mnt,
+                            run.Raw('2>&1'),
+                            run.Raw('|'),
+                            'grep', 'Device or resource busy',
+                            ],
+                        )
+                    import time
+                    time.sleep(1)
+                except Exception:
+                    break
diff --git a/tasks/cram.py b/tasks/cram.py
new file mode 100644 (file)
index 0000000..05824d2
--- /dev/null
@@ -0,0 +1,135 @@
+"""
+Cram tests
+"""
+import logging
+import os
+
+from teuthology import misc as teuthology
+from teuthology.parallel import parallel
+from ..orchestra import run
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Run all cram tests from the specified urls on the specified
+    clients. Each client runs tests in parallel.
+
+    Limitations:
+    Tests must have a .t suffix. Tests with duplicate names will
+    overwrite each other, so only the last one will run.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - cram:
+            clients:
+              client.0:
+              - http://ceph.com/qa/test.t
+              - http://ceph.com/qa/test2.t]
+              client.1: [http://ceph.com/qa/test.t]
+
+    You can also run a list of cram tests on all clients::
+
+        tasks:
+        - ceph:
+        - cram:
+            clients:
+              all: [http://ceph.com/qa/test.t]
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    assert isinstance(config, dict)
+    assert 'clients' in config and isinstance(config['clients'], dict), \
+           'configuration must contain a dictionary of clients'
+
+    clients = teuthology.replace_all_with_clients(ctx.cluster,
+                                                  config['clients'])
+    testdir = teuthology.get_testdir(ctx)
+
+    try:
+        for client, tests in clients.iteritems():
+            (remote,) = ctx.cluster.only(client).remotes.iterkeys()
+            client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client)
+            remote.run(
+                args=[
+                    'mkdir', '--', client_dir,
+                    run.Raw('&&'),
+                    'virtualenv', '{tdir}/virtualenv'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    '{tdir}/virtualenv/bin/pip'.format(tdir=testdir),
+                    'install', 'cram',
+                    ],
+                )
+            for test in tests:
+                log.info('fetching test %s for %s', test, client)
+                assert test.endswith('.t'), 'tests must end in .t'
+                remote.run(
+                    args=[
+                        'wget', '-nc', '-nv', '-P', client_dir, '--', test,
+                        ],
+                    )
+
+        with parallel() as p:
+            for role in clients.iterkeys():
+                p.spawn(_run_tests, ctx, role)
+    finally:
+        for client, tests in clients.iteritems():
+            (remote,) = ctx.cluster.only(client).remotes.iterkeys()
+            client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client)
+            test_files = set([test.rsplit('/', 1)[1] for test in tests])
+
+            # remove test files unless they failed
+            for test_file in test_files:
+                abs_file = os.path.join(client_dir, test_file)
+                remote.run(
+                    args=[
+                        'test', '-f', abs_file + '.err',
+                        run.Raw('||'),
+                        'rm', '-f', '--', abs_file,
+                        ],
+                    )
+
+            # ignore failure since more than one client may
+            # be run on a host, and the client dir should be
+            # non-empty if the test failed
+            remote.run(
+                args=[
+                    'rm', '-rf', '--',
+                    '{tdir}/virtualenv'.format(tdir=testdir),
+                    run.Raw(';'),
+                    'rmdir', '--ignore-fail-on-non-empty', client_dir,
+                    ],
+                )
+
+def _run_tests(ctx, role):
+    """
+    For each role, check to make sure it's a client, then run the cram on that client
+
+    :param ctx: Context
+    :param role: Roles
+    """
+    assert isinstance(role, basestring)
+    PREFIX = 'client.'
+    assert role.startswith(PREFIX)
+    id_ = role[len(PREFIX):]
+    (remote,) = ctx.cluster.only(role).remotes.iterkeys()
+    ceph_ref = ctx.summary.get('ceph-sha1', 'master')
+
+    testdir = teuthology.get_testdir(ctx)
+    log.info('Running tests for %s...', role)
+    remote.run(
+        args=[
+            run.Raw('CEPH_REF={ref}'.format(ref=ceph_ref)),
+            run.Raw('CEPH_ID="{id}"'.format(id=id_)),
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            '{tdir}/virtualenv/bin/cram'.format(tdir=testdir),
+            '-v', '--',
+            run.Raw('{tdir}/archive/cram.{role}/*.t'.format(tdir=testdir, role=role)),
+            ],
+        logger=log.getChild(role),
+        )
diff --git a/tasks/devstack.py b/tasks/devstack.py
new file mode 100644 (file)
index 0000000..c676ace
--- /dev/null
@@ -0,0 +1,382 @@
+#!/usr/bin/env python
+import contextlib
+import logging
+from cStringIO import StringIO
+import textwrap
+from configparser import ConfigParser
+import time
+
+from ..orchestra import run
+from .. import misc
+from ..contextutil import nested
+
+log = logging.getLogger(__name__)
+
+DEVSTACK_GIT_REPO = 'https://github.com/openstack-dev/devstack.git'
+DS_STABLE_BRANCHES = ("havana", "grizzly")
+
+is_devstack_node = lambda role: role.startswith('devstack')
+is_osd_node = lambda role: role.startswith('osd')
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    if config is None:
+        config = {}
+    if not isinstance(config, dict):
+        raise TypeError("config must be a dict")
+    with nested(lambda: install(ctx=ctx, config=config),
+                lambda: smoke(ctx=ctx, config=config),
+                ):
+        yield
+
+
+@contextlib.contextmanager
+def install(ctx, config):
+    """
+    Install OpenStack DevStack and configure it to use a Ceph cluster for
+    Glance and Cinder.
+
+    Requires one node with a role 'devstack'
+
+    Since devstack runs rampant on the system it's used on, typically you will
+    want to reprovision that machine after using devstack on it.
+
+    Also, the default 2GB of RAM that is given to vps nodes is insufficient. I
+    recommend 4GB. Downburst can be instructed to give 4GB to a vps node by
+    adding this to the yaml:
+
+    downburst:
+        ram: 4G
+
+    This was created using documentation found here:
+        https://github.com/openstack-dev/devstack/blob/master/README.md
+        http://ceph.com/docs/master/rbd/rbd-openstack/
+    """
+    if config is None:
+        config = {}
+    if not isinstance(config, dict):
+        raise TypeError("config must be a dict")
+
+    devstack_node = ctx.cluster.only(is_devstack_node).remotes.keys()[0]
+    an_osd_node = ctx.cluster.only(is_osd_node).remotes.keys()[0]
+
+    devstack_branch = config.get("branch", "master")
+    install_devstack(devstack_node, devstack_branch)
+    try:
+        configure_devstack_and_ceph(ctx, config, devstack_node, an_osd_node)
+        yield
+    finally:
+        pass
+
+
+def install_devstack(devstack_node, branch="master"):
+    log.info("Cloning DevStack repo...")
+
+    args = ['git', 'clone', DEVSTACK_GIT_REPO]
+    devstack_node.run(args=args)
+
+    if branch != "master":
+        if branch in DS_STABLE_BRANCHES and not branch.startswith("stable"):
+            branch = "stable/" + branch
+        log.info("Checking out {branch} branch...".format(branch=branch))
+        cmd = "cd devstack && git checkout " + branch
+        devstack_node.run(args=cmd)
+
+    log.info("Installing DevStack...")
+    args = ['cd', 'devstack', run.Raw('&&'), './stack.sh']
+    devstack_node.run(args=args)
+
+
+def configure_devstack_and_ceph(ctx, config, devstack_node, ceph_node):
+    pool_size = config.get('pool_size', '128')
+    create_pools(ceph_node, pool_size)
+    distribute_ceph_conf(devstack_node, ceph_node)
+    # This is where we would install python-ceph and ceph-common but it appears
+    # the ceph task does that for us.
+    generate_ceph_keys(ceph_node)
+    distribute_ceph_keys(devstack_node, ceph_node)
+    secret_uuid = set_libvirt_secret(devstack_node, ceph_node)
+    update_devstack_config_files(devstack_node, secret_uuid)
+    set_apache_servername(devstack_node)
+    # Rebooting is the most-often-used method of restarting devstack services
+    misc.reboot(devstack_node)
+    start_devstack(devstack_node)
+    restart_apache(devstack_node)
+
+
+def create_pools(ceph_node, pool_size):
+    log.info("Creating pools on Ceph cluster...")
+
+    for pool_name in ['volumes', 'images', 'backups']:
+        args = ['ceph', 'osd', 'pool', 'create', pool_name, pool_size]
+        ceph_node.run(args=args)
+
+
+def distribute_ceph_conf(devstack_node, ceph_node):
+    log.info("Copying ceph.conf to DevStack node...")
+
+    ceph_conf_path = '/etc/ceph/ceph.conf'
+    ceph_conf = misc.get_file(ceph_node, ceph_conf_path, sudo=True)
+    misc.sudo_write_file(devstack_node, ceph_conf_path, ceph_conf)
+
+
+def generate_ceph_keys(ceph_node):
+    log.info("Generating Ceph keys...")
+
+    ceph_auth_cmds = [
+        ['ceph', 'auth', 'get-or-create', 'client.cinder', 'mon',
+            'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=volumes, allow rx pool=images'],  # noqa
+        ['ceph', 'auth', 'get-or-create', 'client.glance', 'mon',
+            'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=images'],  # noqa
+        ['ceph', 'auth', 'get-or-create', 'client.cinder-backup', 'mon',
+            'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=backups'],  # noqa
+    ]
+    for cmd in ceph_auth_cmds:
+        ceph_node.run(args=cmd)
+
+
+def distribute_ceph_keys(devstack_node, ceph_node):
+    log.info("Copying Ceph keys to DevStack node...")
+
+    def copy_key(from_remote, key_name, to_remote, dest_path, owner):
+        key_stringio = StringIO()
+        from_remote.run(
+            args=['ceph', 'auth', 'get-or-create', key_name],
+            stdout=key_stringio)
+        key_stringio.seek(0)
+        misc.sudo_write_file(to_remote, dest_path,
+                             key_stringio, owner=owner)
+    keys = [
+        dict(name='client.glance',
+             path='/etc/ceph/ceph.client.glance.keyring',
+             # devstack appears to just want root:root
+             #owner='glance:glance',
+             ),
+        dict(name='client.cinder',
+             path='/etc/ceph/ceph.client.cinder.keyring',
+             # devstack appears to just want root:root
+             #owner='cinder:cinder',
+             ),
+        dict(name='client.cinder-backup',
+             path='/etc/ceph/ceph.client.cinder-backup.keyring',
+             # devstack appears to just want root:root
+             #owner='cinder:cinder',
+             ),
+    ]
+    for key_dict in keys:
+        copy_key(ceph_node, key_dict['name'], devstack_node,
+                 key_dict['path'], key_dict.get('owner'))
+
+
+def set_libvirt_secret(devstack_node, ceph_node):
+    log.info("Setting libvirt secret...")
+
+    cinder_key_stringio = StringIO()
+    ceph_node.run(args=['ceph', 'auth', 'get-key', 'client.cinder'],
+                  stdout=cinder_key_stringio)
+    cinder_key = cinder_key_stringio.getvalue().strip()
+
+    uuid_stringio = StringIO()
+    devstack_node.run(args=['uuidgen'], stdout=uuid_stringio)
+    uuid = uuid_stringio.getvalue().strip()
+
+    secret_path = '/tmp/secret.xml'
+    secret_template = textwrap.dedent("""
+    <secret ephemeral='no' private='no'>
+        <uuid>{uuid}</uuid>
+        <usage type='ceph'>
+            <name>client.cinder secret</name>
+        </usage>
+    </secret>""")
+    misc.sudo_write_file(devstack_node, secret_path,
+                         secret_template.format(uuid=uuid))
+    devstack_node.run(args=['sudo', 'virsh', 'secret-define', '--file',
+                            secret_path])
+    devstack_node.run(args=['sudo', 'virsh', 'secret-set-value', '--secret',
+                            uuid, '--base64', cinder_key])
+    return uuid
+
+
+def update_devstack_config_files(devstack_node, secret_uuid):
+    log.info("Updating DevStack config files to use Ceph...")
+
+    def backup_config(node, file_name, backup_ext='.orig.teuth'):
+        node.run(args=['cp', '-f', file_name, file_name + backup_ext])
+
+    def update_config(config_name, config_stream, update_dict,
+                      section='DEFAULT'):
+        parser = ConfigParser()
+        parser.read_file(config_stream)
+        for (key, value) in update_dict.items():
+            parser.set(section, key, value)
+        out_stream = StringIO()
+        parser.write(out_stream)
+        out_stream.seek(0)
+        return out_stream
+
+    updates = [
+        dict(name='/etc/glance/glance-api.conf', options=dict(
+            default_store='rbd',
+            rbd_store_user='glance',
+            rbd_store_pool='images',
+            show_image_direct_url='True',)),
+        dict(name='/etc/cinder/cinder.conf', options=dict(
+            volume_driver='cinder.volume.drivers.rbd.RBDDriver',
+            rbd_pool='volumes',
+            rbd_ceph_conf='/etc/ceph/ceph.conf',
+            rbd_flatten_volume_from_snapshot='false',
+            rbd_max_clone_depth='5',
+            glance_api_version='2',
+            rbd_user='cinder',
+            rbd_secret_uuid=secret_uuid,
+            backup_driver='cinder.backup.drivers.ceph',
+            backup_ceph_conf='/etc/ceph/ceph.conf',
+            backup_ceph_user='cinder-backup',
+            backup_ceph_chunk_size='134217728',
+            backup_ceph_pool='backups',
+            backup_ceph_stripe_unit='0',
+            backup_ceph_stripe_count='0',
+            restore_discard_excess_bytes='true',
+            )),
+        dict(name='/etc/nova/nova.conf', options=dict(
+            libvirt_images_type='rbd',
+            libvirt_images_rbd_pool='volumes',
+            libvirt_images_rbd_ceph_conf='/etc/ceph/ceph.conf',
+            rbd_user='cinder',
+            rbd_secret_uuid=secret_uuid,
+            libvirt_inject_password='false',
+            libvirt_inject_key='false',
+            libvirt_inject_partition='-2',
+            )),
+    ]
+
+    for update in updates:
+        file_name = update['name']
+        options = update['options']
+        config_str = misc.get_file(devstack_node, file_name, sudo=True)
+        config_stream = StringIO(config_str)
+        backup_config(devstack_node, file_name)
+        new_config_stream = update_config(file_name, config_stream, options)
+        misc.sudo_write_file(devstack_node, file_name, new_config_stream)
+
+
+def set_apache_servername(node):
+    # Apache complains: "Could not reliably determine the server's fully
+    # qualified domain name, using 127.0.0.1 for ServerName"
+    # So, let's make sure it knows its name.
+    log.info("Setting Apache ServerName...")
+
+    hostname = node.hostname
+    config_file = '/etc/apache2/conf.d/servername'
+    misc.sudo_write_file(node, config_file,
+                         "ServerName {name}".format(name=hostname))
+
+
+def start_devstack(devstack_node):
+    log.info("Patching devstack start script...")
+    # This causes screen to start headless - otherwise rejoin-stack.sh fails
+    # because there is no terminal attached.
+    cmd = "cd devstack && sed -ie 's/screen -c/screen -dm -c/' rejoin-stack.sh"
+    devstack_node.run(args=cmd)
+
+    log.info("Starting devstack...")
+    cmd = "cd devstack && ./rejoin-stack.sh"
+    devstack_node.run(args=cmd)
+
+    # This was added because I was getting timeouts on Cinder requests - which
+    # were trying to access Keystone on port 5000. A more robust way to handle
+    # this would be to introduce a wait-loop on devstack_node that checks to
+    # see if a service is listening on port 5000.
+    log.info("Waiting 30s for devstack to start...")
+    time.sleep(30)
+
+
+def restart_apache(node):
+    node.run(args=['sudo', '/etc/init.d/apache2', 'restart'], wait=True)
+
+
+@contextlib.contextmanager
+def exercise(ctx, config):
+    log.info("Running devstack exercises...")
+
+    if config is None:
+        config = {}
+    if not isinstance(config, dict):
+        raise TypeError("config must be a dict")
+
+    devstack_node = ctx.cluster.only(is_devstack_node).remotes.keys()[0]
+
+    # TODO: save the log *and* preserve failures
+    #devstack_archive_dir = create_devstack_archive(ctx, devstack_node)
+
+    try:
+        #cmd = "cd devstack && ./exercise.sh 2>&1 | tee {dir}/exercise.log".format(  # noqa
+        #    dir=devstack_archive_dir)
+        cmd = "cd devstack && ./exercise.sh"
+        devstack_node.run(args=cmd, wait=True)
+        yield
+    finally:
+        pass
+
+
+def create_devstack_archive(ctx, devstack_node):
+    test_dir = misc.get_testdir(ctx)
+    devstack_archive_dir = "{test_dir}/archive/devstack".format(
+        test_dir=test_dir)
+    devstack_node.run(args="mkdir -p " + devstack_archive_dir)
+    return devstack_archive_dir
+
+
+@contextlib.contextmanager
+def smoke(ctx, config):
+    log.info("Running a basic smoketest...")
+
+    devstack_node = ctx.cluster.only(is_devstack_node).remotes.keys()[0]
+    an_osd_node = ctx.cluster.only(is_osd_node).remotes.keys()[0]
+
+    try:
+        create_volume(devstack_node, an_osd_node, 'smoke0', 1)
+        yield
+    finally:
+        pass
+
+
+def create_volume(devstack_node, ceph_node, vol_name, size):
+    """
+    :param size: The size of the volume, in GB
+    """
+    size = str(size)
+    log.info("Creating a {size}GB volume named {name}...".format(
+        name=vol_name,
+        size=size))
+    args = ['source', 'devstack/openrc', run.Raw('&&'), 'cinder', 'create',
+            '--display-name', vol_name, size]
+    out_stream = StringIO()
+    devstack_node.run(args=args, stdout=out_stream, wait=True)
+    vol_info = parse_os_table(out_stream.getvalue())
+    log.debug("Volume info: %s", str(vol_info))
+
+    out_stream = StringIO()
+    try:
+        ceph_node.run(args="rbd --id cinder ls -l volumes", stdout=out_stream,
+                      wait=True)
+    except run.CommandFailedError:
+        log.debug("Original rbd call failed; retrying without '--id cinder'")
+        ceph_node.run(args="rbd ls -l volumes", stdout=out_stream,
+                      wait=True)
+
+    assert vol_info['id'] in out_stream.getvalue(), \
+        "Volume not found on Ceph cluster"
+    assert vol_info['size'] == size, \
+        "Volume size on Ceph cluster is different than specified"
+    return vol_info['id']
+
+
+def parse_os_table(table_str):
+    out_dict = dict()
+    for line in table_str.split('\n'):
+        if line.startswith('|'):
+            items = line.split()
+            out_dict[items[1]] = items[3]
+    return out_dict
diff --git a/tasks/die_on_err.py b/tasks/die_on_err.py
new file mode 100644 (file)
index 0000000..1dfd370
--- /dev/null
@@ -0,0 +1,70 @@
+"""
+Raise exceptions on osd coredumps or test err directories
+"""
+import contextlib
+import logging
+import time
+from ..orchestra import run
+
+import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Die if {testdir}/err exists or if an OSD dumps core
+    """
+    if config is None:
+        config = {}
+
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    log.info('num_osds is %s' % num_osds)
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < num_osds:
+        time.sleep(10)
+
+    testdir = teuthology.get_testdir(ctx)
+
+    while True:
+        for i in range(num_osds):
+            (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.iterkeys()
+            p = osd_remote.run(
+                args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ],
+                wait=True,
+                check_status=False,
+            )
+            exit_status = p.exitstatus
+
+            if exit_status == 0:
+                log.info("osd %d has an error" % i)
+                raise Exception("osd %d error" % i)
+
+            log_path = '/var/log/ceph/osd.%d.log' % (i)
+
+            p = osd_remote.run(
+                args = [
+                         'tail', '-1', log_path,
+                         run.Raw('|'),
+                         'grep', '-q', 'end dump'
+                       ],
+                wait=True,
+                check_status=False,
+            )
+            exit_status = p.exitstatus
+
+            if exit_status == 0:
+                log.info("osd %d dumped core" % i)
+                raise Exception("osd %d dumped core" % i)
+
+        time.sleep(5)
diff --git a/tasks/divergent_priors.py b/tasks/divergent_priors.py
new file mode 100644 (file)
index 0000000..432614f
--- /dev/null
@@ -0,0 +1,148 @@
+"""
+Special case divergence test
+"""
+import logging
+import time
+
+import ceph_manager
+from teuthology import misc as teuthology
+from teuthology.task_util.rados import rados
+
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test handling of divergent entries with prior_version
+    prior to log_tail
+
+    config: none
+
+    Requires 3 osds.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'divergent_priors task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+    ctx.manager = manager
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.raw_cluster_cmd('osd', 'set', 'noout')
+    manager.raw_cluster_cmd('osd', 'set', 'noin')
+    manager.raw_cluster_cmd('osd', 'set', 'nodown')
+    manager.wait_for_clean()
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+    dummyfile2 = '/etc/resolv.conf'
+
+    # create 1 pg pool
+    log.info('creating foo')
+    manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
+
+    osds = [0, 1, 2]
+    for i in osds:
+        manager.set_config(i, osd_min_pg_log_entries=1)
+
+    # determine primary
+    divergent = manager.get_pg_primary('foo', 0)
+    log.info("primary and soon to be divergent is %d", divergent)
+    non_divergent = [0,1,2]
+    non_divergent.remove(divergent)
+
+    log.info('writing initial objects')
+    # write 1000 objects
+    for i in range(1000):
+        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])
+
+    manager.wait_for_clean()
+
+    # blackhole non_divergent
+    log.info("blackholing osds %s", str(non_divergent))
+    for i in non_divergent:
+        manager.set_config(i, filestore_blackhole='')
+
+    # write 1 (divergent) object
+    log.info('writing divergent object existing_0')
+    rados(
+        ctx, mon, ['-p', 'foo', 'put', 'existing_0', dummyfile2],
+        wait=False)
+    time.sleep(10)
+    mon.run(
+        args=['killall', '-9', 'rados'],
+        wait=True,
+        check_status=False)
+
+    # kill all the osds
+    log.info('killing all the osds')
+    for i in osds:
+        manager.kill_osd(i)
+    for i in osds:
+        manager.mark_down_osd(i)
+    for i in osds:
+        manager.mark_out_osd(i)
+
+    # bring up non-divergent
+    log.info("bringing up non_divergent %s", str(non_divergent))
+    for i in non_divergent:
+        manager.revive_osd(i)
+    for i in non_divergent:
+        manager.mark_in_osd(i)
+
+    log.info('making log long to prevent backfill')
+    for i in non_divergent:
+        manager.set_config(i, osd_min_pg_log_entries=100000)
+
+    # write 1 non-divergent object (ensure that old divergent one is divergent)
+    log.info('writing non-divergent object existing_1')
+    rados(ctx, mon, ['-p', 'foo', 'put', 'existing_1', dummyfile2])
+
+    manager.wait_for_recovery()
+
+    # ensure no recovery
+    log.info('delay recovery')
+    for i in non_divergent:
+        manager.set_config(i, osd_recovery_delay_start=100000)
+
+    # bring in our divergent friend
+    log.info("revive divergent %d", divergent)
+    manager.revive_osd(divergent)
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+
+    log.info('delay recovery divergent')
+    manager.set_config(divergent, osd_recovery_delay_start=100000)
+    log.info('mark divergent in')
+    manager.mark_in_osd(divergent)
+
+    log.info('wait for peering')
+    rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])
+
+    log.info("killing divergent %d", divergent)
+    manager.kill_osd(divergent)
+    log.info("reviving divergent %d", divergent)
+    manager.revive_osd(divergent)
+
+    log.info('allowing recovery')
+    for i in non_divergent:
+        manager.set_config(i, osd_recovery_delay_start=0)
+
+    log.info('reading existing_0')
+    exit_status = rados(ctx, mon,
+                        ['-p', 'foo', 'get', 'existing_0',
+                         '-o', '/tmp/existing'])
+    assert exit_status is 0
+    log.info("success")
diff --git a/tasks/dump_stuck.py b/tasks/dump_stuck.py
new file mode 100644 (file)
index 0000000..9e1780f
--- /dev/null
@@ -0,0 +1,146 @@
+"""
+Dump_stuck command
+"""
+import logging
+import re
+import time
+
+import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+def check_stuck(manager, num_inactive, num_unclean, num_stale, timeout=10):
+    """
+    Do checks.  Make sure get_stuck_pgs return the right amout of information, then
+    extract health information from the raw_cluster_cmd and compare the results with
+    values passed in.  This passes if all asserts pass.
+    :param num_manager: Ceph manager
+    :param num_inactive: number of inaactive pages that are stuck
+    :param num_unclean: number of unclean pages that are stuck
+    :paran num_stale: number of stale pages that are stuck
+    :param timeout: timeout value for get_stuck_pgs calls
+    """
+    inactive = manager.get_stuck_pgs('inactive', timeout)
+    assert len(inactive) == num_inactive
+    unclean = manager.get_stuck_pgs('unclean', timeout)
+    assert len(unclean) == num_unclean
+    stale = manager.get_stuck_pgs('stale', timeout)
+    assert len(stale) == num_stale
+
+    # check health output as well
+    health = manager.raw_cluster_cmd('health')
+    log.debug('ceph health is: %s', health)
+    if num_inactive > 0:
+        m = re.search('(\d+) pgs stuck inactive', health)
+        assert int(m.group(1)) == num_inactive
+    if num_unclean > 0:
+        m = re.search('(\d+) pgs stuck unclean', health)
+        assert int(m.group(1)) == num_unclean
+    if num_stale > 0:
+        m = re.search('(\d+) pgs stuck stale', health)
+        assert int(m.group(1)) == num_stale
+
+def task(ctx, config):
+    """
+    Test the dump_stuck command.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    assert config is None, \
+        'dump_stuck requires no configuration'
+    assert teuthology.num_instances_of_type(ctx.cluster, 'osd') == 2, \
+        'dump_stuck requires exactly 2 osds'
+
+    timeout = 60
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.wait_for_clean(timeout)
+
+    manager.raw_cluster_cmd('tell', 'mon.0', 'injectargs', '--',
+#                            '--mon-osd-report-timeout 90',
+                            '--mon-pg-stuck-threshold 10')
+
+    check_stuck(
+        manager,
+        num_inactive=0,
+        num_unclean=0,
+        num_stale=0,
+        )
+    num_pgs = manager.get_num_pgs()
+
+    manager.mark_out_osd(0)
+    time.sleep(timeout)
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.wait_for_recovery(timeout)
+
+    check_stuck(
+        manager,
+        num_inactive=0,
+        num_unclean=num_pgs,
+        num_stale=0,
+        )
+
+    manager.mark_in_osd(0)
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.wait_for_clean(timeout)
+
+    check_stuck(
+        manager,
+        num_inactive=0,
+        num_unclean=0,
+        num_stale=0,
+        )
+
+    for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'):
+        manager.kill_osd(id_)
+        manager.mark_down_osd(id_)
+
+    starttime = time.time()
+    done = False
+    while not done:
+        try:
+            check_stuck(
+                manager,
+                num_inactive=0,
+                num_unclean=0,
+                num_stale=num_pgs,
+                )
+            done = True
+        except AssertionError:
+            # wait up to 15 minutes to become stale
+            if time.time() - starttime > 900:
+                raise
+
+    for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'):
+        manager.revive_osd(id_)
+        manager.mark_in_osd(id_)
+    while True:
+        try:
+            manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+            manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+            break
+        except Exception:
+            log.exception('osds must not be started yet, waiting...')
+            time.sleep(1)
+    manager.wait_for_clean(timeout)
+
+    check_stuck(
+        manager,
+        num_inactive=0,
+        num_unclean=0,
+        num_stale=0,
+        )
diff --git a/tasks/ec_lost_unfound.py b/tasks/ec_lost_unfound.py
new file mode 100644 (file)
index 0000000..25bac6c
--- /dev/null
@@ -0,0 +1,134 @@
+"""
+Lost_unfound
+"""
+import logging
+import ceph_manager
+from teuthology import misc as teuthology
+from teuthology.task_util.rados import rados
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test handling of lost objects on an ec pool.
+
+    A pretty rigid cluster is brought up andtested by this task
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'lost_unfound task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
+    manager.wait_for_clean()
+
+
+    pool = manager.create_pool_with_unique_name(
+        ec_pool=True,
+        ec_m=2,
+        ec_k=2)
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+
+    # kludge to make sure they get a map
+    rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile])
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.wait_for_recovery()
+
+    # create old objects
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f])
+
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.1',
+            'injectargs',
+            '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
+            )
+
+    manager.kill_osd(0)
+    manager.mark_down_osd(0)
+    manager.kill_osd(3)
+    manager.mark_down_osd(3)
+    
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])
+
+    # take out osd.1 and a necessary shard of those objects.
+    manager.kill_osd(1)
+    manager.mark_down_osd(1)
+    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
+    manager.revive_osd(0)
+    manager.wait_till_osd_is_up(0)
+    manager.revive_osd(3)
+    manager.wait_till_osd_is_up(3)
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
+    manager.wait_till_active()
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
+
+    # verify that there are unfound objects
+    unfound = manager.get_num_unfound_objects()
+    log.info("there are %d unfound objects" % unfound)
+    assert unfound
+
+    # mark stuff lost
+    pgs = manager.get_pg_stats()
+    for pg in pgs:
+        if pg['stat_sum']['num_objects_unfound'] > 0:
+            # verify that i can list them direct from the osd
+            log.info('listing missing/lost in %s state %s', pg['pgid'],
+                     pg['state']);
+            m = manager.list_pg_missing(pg['pgid'])
+            log.info('%s' % m)
+            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
+
+            log.info("reverting unfound in %s", pg['pgid'])
+            manager.raw_cluster_cmd('pg', pg['pgid'],
+                                    'mark_unfound_lost', 'delete')
+        else:
+            log.info("no unfound in %s", pg['pgid'])
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
+    manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5')
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
+    manager.wait_for_recovery()
+
+    # verify result
+    for f in range(1, 10):
+        err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-'])
+        assert err
+        err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-'])
+        assert err
+        err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-'])
+        assert err
+
+    # see if osd.1 can cope
+    manager.revive_osd(1)
+    manager.wait_till_osd_is_up(1)
+    manager.wait_for_clean()
diff --git a/tasks/filestore_idempotent.py b/tasks/filestore_idempotent.py
new file mode 100644 (file)
index 0000000..d33ad64
--- /dev/null
@@ -0,0 +1,81 @@
+"""
+Filestore/filejournal handler
+"""
+import logging
+from ..orchestra import run
+import random
+
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test filestore/filejournal handling of non-idempotent events.
+
+    Currently this is a kludge; we require the ceph task preceeds us just
+    so that we get the tarball installed to run the test binary.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+    clients = config.keys()
+
+    # just use the first client...
+    client = clients[0];
+    (remote,) = ctx.cluster.only(client).remotes.iterkeys()
+
+    testdir = teuthology.get_testdir(ctx)
+
+    dir = '%s/data/test.%s' % (testdir, client)
+
+    seed = str(int(random.uniform(1,100)))
+
+    try:
+        log.info('creating a working dir')
+        remote.run(args=['mkdir', dir])
+        remote.run(
+            args=[
+                'cd', dir,
+                run.Raw('&&'),
+                'wget','-q', '-Orun_seed_to.sh',
+                'http://ceph.com/git/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to.sh;hb=HEAD',
+                run.Raw('&&'),
+                'wget','-q', '-Orun_seed_to_range.sh',
+                'http://ceph.com/git/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to_range.sh;hb=HEAD',
+                run.Raw('&&'),
+                'chmod', '+x', 'run_seed_to.sh', 'run_seed_to_range.sh',
+                ]);
+
+        log.info('running a series of tests')
+        proc = remote.run(
+            args=[
+                'cd', dir,
+                run.Raw('&&'),
+                './run_seed_to_range.sh', seed, '50', '300',
+                ],
+            wait=False,
+            check_status=False)
+        result = proc.exitstatus.get();
+    
+        if result != 0:
+            remote.run(
+                args=[
+                    'cp', '-a', dir, '{tdir}/archive/idempotent_failure'.format(tdir=testdir),
+                    ])
+            raise Exception("./run_seed_to_range.sh errored out")
+
+    finally:
+        remote.run(args=[
+                'rm', '-rf', '--', dir
+                ])
+        
diff --git a/tasks/kclient.py b/tasks/kclient.py
new file mode 100644 (file)
index 0000000..34595e8
--- /dev/null
@@ -0,0 +1,112 @@
+"""
+Mount/unmount a ``kernel`` client.
+"""
+import contextlib
+import logging
+import os
+
+from teuthology import misc as teuthology
+from teuthology.task_util.kclient import write_secret_file
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Mount/unmount a ``kernel`` client.
+
+    The config is optional and defaults to mounting on all clients. If
+    a config is given, it is expected to be a list of clients to do
+    this operation on. This lets you e.g. set up one client with
+    ``ceph-fuse`` and another with ``kclient``.
+
+    Example that mounts all clients::
+
+        tasks:
+        - ceph:
+        - kclient:
+        - interactive:
+
+    Example that uses both ``kclient` and ``ceph-fuse``::
+
+        tasks:
+        - ceph:
+        - ceph-fuse: [client.0]
+        - kclient: [client.1]
+        - interactive:
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    log.info('Mounting kernel clients...')
+    assert config is None or isinstance(config, list), \
+        "task kclient got invalid config"
+
+    if config is None:
+        config = ['client.{id}'.format(id=id_)
+                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    clients = list(teuthology.get_clients(ctx=ctx, roles=config))
+
+    testdir = teuthology.get_testdir(ctx)
+
+    for id_, remote in clients:
+        mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+        log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format(
+                id=id_, remote=remote, mnt=mnt))
+
+        # figure mon ips
+        remotes_and_roles = ctx.cluster.remotes.items()
+        roles = [roles for (remote_, roles) in remotes_and_roles]
+        ips = [host for (host, port) in (remote_.ssh.get_transport().getpeername() for (remote_, roles) in remotes_and_roles)]
+        mons = teuthology.get_mons(roles, ips).values()
+
+        keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
+        secret = '{tdir}/data/client.{id}.secret'.format(tdir=testdir, id=id_)
+        write_secret_file(ctx, remote, 'client.{id}'.format(id=id_),
+                                     keyring, secret)
+
+        remote.run(
+            args=[
+                'mkdir',
+                '--',
+                mnt,
+                ],
+            )
+
+        remote.run(
+            args=[
+                'sudo',
+                'adjust-ulimits',
+                'ceph-coverage',
+                '{tdir}/archive/coverage'.format(tdir=testdir),
+                '/sbin/mount.ceph',
+                '{mons}:/'.format(mons=','.join(mons)),
+                mnt,
+                '-v',
+                '-o',
+                'name={id},secretfile={secret}'.format(id=id_,
+                                                       secret=secret),
+                ],
+            )
+
+    try:
+        yield
+    finally:
+        log.info('Unmounting kernel clients...')
+        for id_, remote in clients:
+            log.debug('Unmounting client client.{id}...'.format(id=id_))
+            mnt = os.path.join(testdir,  'mnt.{id}'.format(id=id_))
+            remote.run(
+                args=[
+                    'sudo',
+                    'umount',
+                    mnt,
+                    ],
+                )
+            remote.run(
+                args=[
+                    'rmdir',
+                    '--',
+                    mnt,
+                    ],
+                )
diff --git a/tasks/locktest.py b/tasks/locktest.py
new file mode 100755 (executable)
index 0000000..7832369
--- /dev/null
@@ -0,0 +1,134 @@
+"""
+locktests
+"""
+import logging
+
+from ..orchestra import run
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Run locktests, from the xfstests suite, on the given
+    clients. Whether the clients are ceph-fuse or kernel does not
+    matter, and the two clients can refer to the same mount.
+
+    The config is a list of two clients to run the locktest on. The
+    first client will be the host.
+
+    For example:
+       tasks:
+       - ceph:
+       - ceph-fuse: [client.0, client.1]
+       - locktest:
+           [client.0, client.1]
+
+    This task does not yield; there would be little point.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+
+    assert isinstance(config, list)
+    log.info('fetching and building locktests...')
+    (host,) = ctx.cluster.only(config[0]).remotes
+    (client,) = ctx.cluster.only(config[1]).remotes
+    ( _, _, host_id) = config[0].partition('.')
+    ( _, _, client_id) = config[1].partition('.')
+    testdir = teuthology.get_testdir(ctx)
+    hostmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=host_id)
+    clientmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=client_id)
+
+    try:
+        for client_name in config:
+            log.info('building on {client_}'.format(client_=client_name))
+            ctx.cluster.only(client_name).run(
+                args=[
+                    # explicitly does not support multiple autotest tasks
+                    # in a single run; the result archival would conflict
+                    'mkdir', '{tdir}/archive/locktest'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    'mkdir', '{tdir}/locktest'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    'wget',
+                    '-nv',
+                    'https://raw.github.com/gregsfortytwo/xfstests-ceph/master/src/locktest.c',
+                    '-O', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    'g++', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
+                    '-o', '{tdir}/locktest/locktest'.format(tdir=testdir)
+                    ],
+                logger=log.getChild('locktest_client.{id}'.format(id=client_name)),
+                )
+
+        log.info('built locktest on each client')
+        
+        host.run(args=['sudo', 'touch',
+                       '{mnt}/locktestfile'.format(mnt=hostmnt),
+                       run.Raw('&&'),
+                       'sudo', 'chown', 'ubuntu.ubuntu',
+                       '{mnt}/locktestfile'.format(mnt=hostmnt)
+                       ]
+                 )
+
+        log.info('starting on host')
+        hostproc = host.run(
+            args=[
+                '{tdir}/locktest/locktest'.format(tdir=testdir),
+                '-p', '6788',
+                '-d',
+                '{mnt}/locktestfile'.format(mnt=hostmnt),
+                ],
+            wait=False,
+            logger=log.getChild('locktest.host'),
+            )
+        log.info('starting on client')
+        (_,_,hostaddr) = host.name.partition('@')
+        clientproc = client.run(
+            args=[
+                '{tdir}/locktest/locktest'.format(tdir=testdir),
+                '-p', '6788',
+                '-d',
+                '-h', hostaddr,
+                '{mnt}/locktestfile'.format(mnt=clientmnt),
+                ],
+            logger=log.getChild('locktest.client'),
+            wait=False
+            )
+        
+        hostresult = hostproc.exitstatus.get()
+        clientresult = clientproc.exitstatus.get()
+        if (hostresult != 0) or (clientresult != 0):
+            raise Exception("Did not pass locking test!")
+        log.info('finished locktest executable with results {r} and {s}'. \
+                     format(r=hostresult, s=clientresult))
+
+    finally:
+        log.info('cleaning up host dir')
+        host.run(
+            args=[
+                'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir),
+                run.Raw('&&'),
+                'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
+                run.Raw('&&'),
+                'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir),
+                run.Raw('&&'),
+                'rmdir', '{tdir}/locktest'
+                ],
+            logger=log.getChild('.{id}'.format(id=config[0])),
+            )
+        log.info('cleaning up client dir')
+        client.run(
+            args=[
+                'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir),
+                run.Raw('&&'),
+                'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
+                run.Raw('&&'),
+                'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir),
+                run.Raw('&&'),
+                'rmdir', '{tdir}/locktest'.format(tdir=testdir)
+                ],
+            logger=log.getChild('.{id}'.format(\
+                    id=config[1])),
+            )
diff --git a/tasks/lost_unfound.py b/tasks/lost_unfound.py
new file mode 100644 (file)
index 0000000..700a300
--- /dev/null
@@ -0,0 +1,153 @@
+"""
+Lost_unfound
+"""
+import logging
+import ceph_manager
+from teuthology import misc as teuthology
+from teuthology.task_util.rados import rados
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test handling of lost objects.
+
+    A pretty rigid cluseter is brought up andtested by this task
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'lost_unfound task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 3:
+        manager.sleep(10)
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_for_clean()
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+
+    # take an osd out until the very end
+    manager.kill_osd(2)
+    manager.mark_down_osd(2)
+    manager.mark_out_osd(2)
+
+    # kludge to make sure they get a map
+    rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.wait_for_recovery()
+
+    # create old objects
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', 'data', 'rm', 'existed_%d' % f])
+
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.1',
+            'injectargs',
+            '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
+            )
+
+    manager.kill_osd(0)
+    manager.mark_down_osd(0)
+    
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', 'data', 'put', 'new_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
+
+    # bring osd.0 back up, let it peer, but don't replicate the new
+    # objects...
+    log.info('osd.0 command_args is %s' % 'foo')
+    log.info(ctx.daemons.get_daemon('osd', 0).command_args)
+    ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([
+            '--osd-recovery-delay-start', '1000'
+            ])
+    manager.revive_osd(0)
+    manager.mark_in_osd(0)
+    manager.wait_till_osd_is_up(0)
+
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.wait_till_active()
+
+    # take out osd.1 and the only copy of those objects.
+    manager.kill_osd(1)
+    manager.mark_down_osd(1)
+    manager.mark_out_osd(1)
+    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
+
+    # bring up osd.2 so that things would otherwise, in theory, recovery fully
+    manager.revive_osd(2)
+    manager.mark_in_osd(2)
+    manager.wait_till_osd_is_up(2)
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_till_active()
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+
+    # verify that there are unfound objects
+    unfound = manager.get_num_unfound_objects()
+    log.info("there are %d unfound objects" % unfound)
+    assert unfound
+
+    # mark stuff lost
+    pgs = manager.get_pg_stats()
+    for pg in pgs:
+        if pg['stat_sum']['num_objects_unfound'] > 0:
+            primary = 'osd.%d' % pg['acting'][0]
+
+            # verify that i can list them direct from the osd
+            log.info('listing missing/lost in %s state %s', pg['pgid'],
+                     pg['state']);
+            m = manager.list_pg_missing(pg['pgid'])
+            #log.info('%s' % m)
+            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
+            num_unfound=0
+            for o in m['objects']:
+                if len(o['locations']) == 0:
+                    num_unfound += 1
+            assert m['num_unfound'] == num_unfound
+
+            log.info("reverting unfound in %s on %s", pg['pgid'], primary)
+            manager.raw_cluster_cmd('pg', pg['pgid'],
+                                    'mark_unfound_lost', 'revert')
+        else:
+            log.info("no unfound in %s", pg['pgid'])
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_for_recovery()
+
+    # verify result
+    for f in range(1, 10):
+        err = rados(ctx, mon, ['-p', 'data', 'get', 'new_%d' % f, '-'])
+        assert err
+        err = rados(ctx, mon, ['-p', 'data', 'get', 'existed_%d' % f, '-'])
+        assert err
+        err = rados(ctx, mon, ['-p', 'data', 'get', 'existing_%d' % f, '-'])
+        assert not err
+
+    # see if osd.1 can cope
+    manager.revive_osd(1)
+    manager.mark_in_osd(1)
+    manager.wait_till_osd_is_up(1)
+    manager.wait_for_clean()
diff --git a/tasks/manypools.py b/tasks/manypools.py
new file mode 100644 (file)
index 0000000..32b9d56
--- /dev/null
@@ -0,0 +1,73 @@
+"""
+Force pg creation on all osds
+"""
+from teuthology import misc as teuthology
+from ..orchestra import run
+import logging
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Create the specified number of pools and write 16 objects to them (thereby forcing
+    the PG creation on each OSD). This task creates pools from all the clients,
+    in parallel. It is easy to add other daemon types which have the appropriate
+    permissions, but I don't think anything else does.
+    The config is just the number of pools to create. I recommend setting
+    "mon create pg interval" to a very low value in your ceph config to speed
+    this up.
+    
+    You probably want to do this to look at memory consumption, and
+    maybe to test how performance changes with the number of PGs. For example:
+    
+    tasks:
+    - ceph:
+        config:
+          mon:
+            mon create pg interval: 1
+    - manypools: 3000
+    - radosbench:
+        clients: [client.0]
+        time: 360
+    """
+    
+    log.info('creating {n} pools'.format(n=config))
+    
+    poolnum = int(config)
+    creator_remotes = []
+    client_roles = teuthology.all_roles_of_type(ctx.cluster, 'client')
+    log.info('got client_roles={client_roles_}'.format(client_roles_=client_roles))
+    for role in client_roles:
+        log.info('role={role_}'.format(role_=role))
+        (creator_remote, ) = ctx.cluster.only('client.{id}'.format(id=role)).remotes.iterkeys()
+        creator_remotes.append((creator_remote, 'client.{id}'.format(id=role)))
+
+    remaining_pools = poolnum
+    poolprocs=dict()
+    while (remaining_pools > 0):
+        log.info('{n} pools remaining to create'.format(n=remaining_pools))
+       for remote, role_ in creator_remotes:
+            poolnum = remaining_pools
+            remaining_pools -= 1
+            if remaining_pools < 0:
+                continue
+            log.info('creating pool{num} on {role}'.format(num=poolnum, role=role_))
+           proc = remote.run(
+               args=[
+                   'rados',
+                   '--name', role_,
+                   'mkpool', 'pool{num}'.format(num=poolnum), '-1',
+                   run.Raw('&&'),
+                   'rados',
+                   '--name', role_,
+                   '--pool', 'pool{num}'.format(num=poolnum),
+                   'bench', '0', 'write', '-t', '16', '--block-size', '1'
+                   ],
+               wait = False
+           )
+            log.info('waiting for pool and object creates')
+           poolprocs[remote] = proc
+        
+        run.wait(poolprocs.itervalues())
+    
+    log.info('created all {n} pools and wrote 16 objects to each'.format(n=poolnum))
diff --git a/tasks/mds_creation_failure.py b/tasks/mds_creation_failure.py
new file mode 100644 (file)
index 0000000..a3d052f
--- /dev/null
@@ -0,0 +1,83 @@
+
+import logging
+import contextlib
+import time
+import ceph_manager
+from teuthology import misc
+from teuthology.orchestra.run import CommandFailedError, Raw
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Go through filesystem creation with a synthetic failure in an MDS
+    in its 'up:creating' state, to exercise the retry behaviour.
+    """
+    # Grab handles to the teuthology objects of interest
+    mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
+    if len(mdslist) != 1:
+        # Require exactly one MDS, the code path for creation failure when
+        # a standby is available is different
+        raise RuntimeError("This task requires exactly one MDS")
+
+    mds_id = mdslist[0]
+    (mds_remote,) = ctx.cluster.only('mds.{_id}'.format(_id=mds_id)).remotes.iterkeys()
+    manager = ceph_manager.CephManager(
+        mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'),
+    )
+
+    # Stop the MDS and reset the filesystem so that next start will go into CREATING
+    mds = ctx.daemons.get_daemon('mds', mds_id)
+    mds.stop()
+    data_pool_id = manager.get_pool_num("data")
+    md_pool_id = manager.get_pool_num("metadata")
+    manager.raw_cluster_cmd_result('mds', 'newfs', md_pool_id.__str__(), data_pool_id.__str__(),
+                                   '--yes-i-really-mean-it')
+
+    # Start the MDS with mds_kill_create_at set, it will crash during creation
+    mds.restart_with_args(["--mds_kill_create_at=1"])
+    try:
+        mds.wait_for_exit()
+    except CommandFailedError as e:
+        if e.exitstatus == 1:
+            log.info("MDS creation killed as expected")
+        else:
+            log.error("Unexpected status code %s" % e.exitstatus)
+            raise
+
+    # Since I have intentionally caused a crash, I will clean up the resulting core
+    # file to avoid task.internal.coredump seeing it as a failure.
+    log.info("Removing core file from synthetic MDS failure")
+    mds_remote.run(args=['rm', '-f', Raw("{archive}/coredump/*.core".format(archive=misc.get_archive_dir(ctx)))])
+
+    # It should have left the MDS map state still in CREATING
+    status = manager.get_mds_status(mds_id)
+    assert status['state'] == 'up:creating'
+
+    # Start the MDS again without the kill flag set, it should proceed with creation successfully
+    mds.restart()
+
+    # Wait for state ACTIVE
+    t = 0
+    create_timeout = 120
+    while True:
+        status = manager.get_mds_status(mds_id)
+        if status['state'] == 'up:active':
+            log.info("MDS creation completed successfully")
+            break
+        elif status['state'] == 'up:creating':
+            log.info("MDS still in creating state")
+            if t > create_timeout:
+                log.error("Creating did not complete within %ss" % create_timeout)
+                raise RuntimeError("Creating did not complete within %ss" % create_timeout)
+            t += 1
+            time.sleep(1)
+        else:
+            log.error("Unexpected MDS state: %s" % status['state'])
+            assert(status['state'] in ['up:active', 'up:creating'])
+
+    # The system should be back up in a happy healthy state, go ahead and run any further tasks
+    # inside this context.
+    yield
diff --git a/tasks/mds_thrash.py b/tasks/mds_thrash.py
new file mode 100644 (file)
index 0000000..c60b741
--- /dev/null
@@ -0,0 +1,352 @@
+"""
+Thrash mds by simulating failures
+"""
+import logging
+import contextlib
+import ceph_manager
+import random
+import time
+from gevent.greenlet import Greenlet
+from gevent.event import Event
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+
+class MDSThrasher(Greenlet):
+    """
+    MDSThrasher::
+
+    The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc).
+
+    The config is optional.  Many of the config parameters are a a maximum value
+    to use when selecting a random value from a range.  To always use the maximum
+    value, set no_random to true.  The config is a dict containing some or all of:
+
+    seed: [no default] seed the random number generator
+
+    randomize: [default: true] enables randomization and use the max/min values
+
+    max_thrash: [default: 1] the maximum number of MDSs that will be thrashed at
+      any given time.
+
+    max_thrash_delay: [default: 30] maximum number of seconds to delay before
+      thrashing again.
+
+    max_revive_delay: [default: 10] maximum number of seconds to delay before
+      bringing back a thrashed MDS
+
+    thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed
+      during replay.  Value should be between 0.0 and 1.0
+
+    max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in
+      the replay state before thrashing
+
+    thrash_weights: allows specific MDSs to be thrashed more/less frequently.  This option
+      overrides anything specified by max_thrash.  This option is a dict containing
+      mds.x: weight pairs.  For example, [mds.a: 0.7, mds.b: 0.3, mds.c: 0.0].  Each weight
+      is a value from 0.0 to 1.0.  Any MDSs not specified will be automatically
+      given a weight of 0.0.  For a given MDS, by default the trasher delays for up
+      to max_thrash_delay, trashes, waits for the MDS to recover, and iterates.  If a non-zero
+      weight is specified for an MDS, for each iteration the thrasher chooses whether to thrash
+      during that iteration based on a random value [0-1] not exceeding the weight of that MDS.
+
+    Examples::
+
+
+      The following example sets the likelihood that mds.a will be thrashed
+      to 80%, mds.b to 20%, and other MDSs will not be thrashed.  It also sets the
+      likelihood that an MDS will be thrashed in replay to 40%.
+      Thrash weights do not have to sum to 1.
+
+      tasks:
+      - ceph:
+      - mds_thrash:
+          thrash_weights:
+            - mds.a: 0.8
+            - mds.b: 0.2
+          thrash_in_replay: 0.4
+      - ceph-fuse:
+      - workunit:
+          clients:
+            all: [suites/fsx.sh]
+
+      The following example disables randomization, and uses the max delay values:
+
+      tasks:
+      - ceph:
+      - mds_thrash:
+          max_thrash_delay: 10
+          max_revive_delay: 1
+          max_replay_thrash_delay: 4
+
+    """
+
+    def __init__(self, ctx, manager, config, logger, failure_group, weight):
+        super(MDSThrasher, self).__init__()
+
+        self.ctx = ctx
+        self.manager = manager
+        assert self.manager.is_clean()
+
+        self.stopping = Event()
+        self.logger = logger
+        self.config = config
+
+        self.randomize = bool(self.config.get('randomize', True))
+        self.max_thrash_delay = float(self.config.get('thrash_delay', 30.0))
+        self.thrash_in_replay = float(self.config.get('thrash_in_replay', False))
+        assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format(
+            v=self.thrash_in_replay)
+
+        self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0))
+
+        self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0))
+
+        self.failure_group = failure_group
+        self.weight = weight
+
+    def _run(self):
+        try:
+            self.do_thrash()
+        except:
+            # Log exceptions here so we get the full backtrace (it's lost
+            # by the time someone does a .get() on this greenlet)
+            self.logger.exception("Exception in do_thrash:")
+            raise
+
+    def log(self, x):
+        """Write data to logger assigned to this MDThrasher"""
+        self.logger.info(x)
+
+    def stop(self):
+        self.stopping.set()
+
+    def do_thrash(self):
+        """
+        Perform the random thrashing action
+        """
+        self.log('starting mds_do_thrash for failure group: ' + ', '.join(
+            ['mds.{_id}'.format(_id=_f) for _f in self.failure_group]))
+        while not self.stopping.is_set():
+            delay = self.max_thrash_delay
+            if self.randomize:
+                delay = random.randrange(0.0, self.max_thrash_delay)
+
+            if delay > 0.0:
+                self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
+                self.stopping.wait(delay)
+                if self.stopping.is_set():
+                    continue
+
+            skip = random.randrange(0.0, 1.0)
+            if self.weight < 1.0 and skip > self.weight:
+                self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip,
+                                                                                                   weight=self.weight))
+                continue
+
+            # find the active mds in the failure group
+            statuses = [self.manager.get_mds_status(m) for m in self.failure_group]
+            actives = filter(lambda s: s and s['state'] == 'up:active', statuses)
+            assert len(actives) == 1, 'Can only have one active in a failure group'
+
+            active_mds = actives[0]['name']
+            active_rank = actives[0]['rank']
+
+            self.log('kill mds.{id} (rank={r})'.format(id=active_mds, r=active_rank))
+            self.manager.kill_mds_by_rank(active_rank)
+
+            # wait for mon to report killed mds as crashed
+            last_laggy_since = None
+            itercount = 0
+            while True:
+                failed = self.manager.get_mds_status_all()['failed']
+                status = self.manager.get_mds_status(active_mds)
+                if not status:
+                    break
+                if 'laggy_since' in status:
+                    last_laggy_since = status['laggy_since']
+                    break
+                if any([(f == active_mds) for f in failed]):
+                    break
+                self.log(
+                    'waiting till mds map indicates mds.{_id} is laggy/crashed, in failed state, or mds.{_id} is removed from mdsmap'.format(
+                        _id=active_mds))
+                itercount = itercount + 1
+                if itercount > 10:
+                    self.log('mds map: {status}'.format(status=self.manager.get_mds_status_all()))
+                time.sleep(2)
+            if last_laggy_since:
+                self.log(
+                    'mds.{_id} reported laggy/crashed since: {since}'.format(_id=active_mds, since=last_laggy_since))
+            else:
+                self.log('mds.{_id} down, removed from mdsmap'.format(_id=active_mds, since=last_laggy_since))
+
+            # wait for a standby mds to takeover and become active
+            takeover_mds = None
+            takeover_rank = None
+            itercount = 0
+            while True:
+                statuses = [self.manager.get_mds_status(m) for m in self.failure_group]
+                actives = filter(lambda s: s and s['state'] == 'up:active', statuses)
+                if len(actives) > 0:
+                    assert len(actives) == 1, 'Can only have one active in failure group'
+                    takeover_mds = actives[0]['name']
+                    takeover_rank = actives[0]['rank']
+                    break
+                itercount = itercount + 1
+                if itercount > 10:
+                    self.log('mds map: {status}'.format(status=self.manager.get_mds_status_all()))
+
+            self.log('New active mds is mds.{_id}'.format(_id=takeover_mds))
+
+            # wait for a while before restarting old active to become new
+            # standby
+            delay = self.max_revive_delay
+            if self.randomize:
+                delay = random.randrange(0.0, self.max_revive_delay)
+
+            self.log('waiting for {delay} secs before reviving mds.{id}'.format(
+                delay=delay, id=active_mds))
+            time.sleep(delay)
+
+            self.log('reviving mds.{id}'.format(id=active_mds))
+            self.manager.revive_mds(active_mds, standby_for_rank=takeover_rank)
+
+            status = {}
+            while True:
+                status = self.manager.get_mds_status(active_mds)
+                if status and (status['state'] == 'up:standby' or status['state'] == 'up:standby-replay'):
+                    break
+                self.log(
+                    'waiting till mds map indicates mds.{_id} is in standby or standby-replay'.format(_id=active_mds))
+                time.sleep(2)
+            self.log('mds.{_id} reported in {state} state'.format(_id=active_mds, state=status['state']))
+
+            # don't do replay thrashing right now
+            continue
+            # this might race with replay -> active transition...
+            if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay:
+
+                delay = self.max_replay_thrash_delay
+                if self.randomize:
+                    delay = random.randrange(0.0, self.max_replay_thrash_delay)
+                time.sleep(delay)
+                self.log('kill replaying mds.{id}'.format(id=self.to_kill))
+                self.manager.kill_mds(self.to_kill)
+
+                delay = self.max_revive_delay
+                if self.randomize:
+                    delay = random.randrange(0.0, self.max_revive_delay)
+
+                self.log('waiting for {delay} secs before reviving mds.{id}'.format(
+                    delay=delay, id=self.to_kill))
+                time.sleep(delay)
+
+                self.log('revive mds.{id}'.format(id=self.to_kill))
+                self.manager.revive_mds(self.to_kill)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Stress test the mds by thrashing while another task/workunit
+    is running.
+
+    Please refer to MDSThrasher class for further information on the
+    available options.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'mds_thrash task only accepts a dict for configuration'
+    mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))
+    assert len(mdslist) > 1, \
+        'mds_thrash task requires at least 2 metadata servers'
+
+    # choose random seed
+    seed = None
+    if 'seed' in config:
+        seed = int(config['seed'])
+    else:
+        seed = int(time.time())
+    log.info('mds thrasher using random seed: {seed}'.format(seed=seed))
+    random.seed(seed)
+
+    max_thrashers = config.get('max_thrash', 1)
+    thrashers = {}
+
+    (first,) = ctx.cluster.only('mds.{_id}'.format(_id=mdslist[0])).remotes.iterkeys()
+    manager = ceph_manager.CephManager(
+        first, ctx=ctx, logger=log.getChild('ceph_manager'),
+    )
+
+    # make sure everyone is in active, standby, or standby-replay
+    log.info('Wait for all MDSs to reach steady state...')
+    statuses = None
+    statuses_by_rank = None
+    while True:
+        statuses = {m: manager.get_mds_status(m) for m in mdslist}
+        statuses_by_rank = {}
+        for _, s in statuses.iteritems():
+            if isinstance(s, dict):
+                statuses_by_rank[s['rank']] = s
+
+        ready = filter(lambda (_, s): s is not None and (s['state'] == 'up:active'
+                                                         or s['state'] == 'up:standby'
+                                                         or s['state'] == 'up:standby-replay'),
+                       statuses.items())
+        if len(ready) == len(statuses):
+            break
+        time.sleep(2)
+    log.info('Ready to start thrashing')
+
+    # setup failure groups
+    failure_groups = {}
+    actives = {s['name']: s for (_, s) in statuses.iteritems() if s['state'] == 'up:active'}
+    log.info('Actives is: {d}'.format(d=actives))
+    log.info('Statuses is: {d}'.format(d=statuses_by_rank))
+    for active in actives:
+        for (r, s) in statuses.iteritems():
+            if s['standby_for_name'] == active:
+                if not active in failure_groups:
+                    failure_groups[active] = []
+                log.info('Assigning mds rank {r} to failure group {g}'.format(r=r, g=active))
+                failure_groups[active].append(r)
+
+    manager.wait_for_clean()
+    for (active, standbys) in failure_groups.iteritems():
+        weight = 1.0
+        if 'thrash_weights' in config:
+            weight = int(config['thrash_weights'].get('mds.{_id}'.format(_id=active), '0.0'))
+
+        failure_group = [active]
+        failure_group.extend(standbys)
+
+        thrasher = MDSThrasher(
+            ctx, manager, config,
+            logger=log.getChild('mds_thrasher.failure_group.[{a}, {sbs}]'.format(
+                a=active,
+                sbs=', '.join(standbys)
+            )
+            ),
+            failure_group=failure_group,
+            weight=weight)
+        thrasher.start()
+        thrashers[active] = thrasher
+
+        # if thrash_weights isn't specified and we've reached max_thrash,
+        # we're done
+        if not 'thrash_weights' in config and len(thrashers) == max_thrashers:
+            break
+
+    try:
+        log.debug('Yielding')
+        yield
+    finally:
+        log.info('joining mds_thrashers')
+        for t in thrashers:
+            log.info('join thrasher for failure group [{fg}]'.format(fg=', '.join(failure_group)))
+            thrashers[t].stop()
+            thrashers[t].join()
+        log.info('done joining')
diff --git a/tasks/metadata.yaml b/tasks/metadata.yaml
new file mode 100644 (file)
index 0000000..ccdc3b0
--- /dev/null
@@ -0,0 +1,2 @@
+instance-id: test
+local-hostname: test
diff --git a/tasks/mon_clock_skew_check.py b/tasks/mon_clock_skew_check.py
new file mode 100644 (file)
index 0000000..891e6ec
--- /dev/null
@@ -0,0 +1,261 @@
+"""
+Handle clock skews in monitors.
+"""
+import logging
+import contextlib
+import ceph_manager
+import time
+import gevent
+from StringIO import StringIO
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+class ClockSkewCheck:
+    """
+    Periodically check if there are any clock skews among the monitors in the
+    quorum. By default, assume no skews are supposed to exist; that can be
+    changed using the 'expect-skew' option. If 'fail-on-skew' is set to false,
+    then we will always succeed and only report skews if any are found.
+
+    This class does not spawn a thread. It assumes that, if that is indeed
+    wanted, it should be done by a third party (for instance, the task using
+    this class). We intend it as such in order to reuse this class if need be.
+
+    This task accepts the following options:
+
+    interval     amount of seconds to wait in-between checks. (default: 30.0)
+    max-skew     maximum skew, in seconds, that is considered tolerable before
+                 issuing a warning. (default: 0.05)
+    expect-skew  'true' or 'false', to indicate whether to expect a skew during
+                 the run or not. If 'true', the test will fail if no skew is
+                 found, and succeed if a skew is indeed found; if 'false', it's
+                 the other way around. (default: false)
+    never-fail   Don't fail the run if a skew is detected and we weren't
+                 expecting it, or if no skew is detected and we were expecting
+                 it. (default: False)
+
+    at-least-once          Runs at least once, even if we are told to stop.
+                           (default: True)
+    at-least-once-timeout  If we were told to stop but we are attempting to
+                           run at least once, timeout after this many seconds.
+                           (default: 600)
+
+    Example:
+        Expect a skew higher than 0.05 seconds, but only report it without
+        failing the teuthology run.
+
+    - mon_clock_skew_check:
+        interval: 30
+        max-skew: 0.05
+        expect_skew: true
+        never-fail: true
+    """
+
+    def __init__(self, ctx, manager, config, logger):
+        self.ctx = ctx
+        self.manager = manager
+
+        self.stopping = False
+        self.logger = logger
+        self.config = config
+
+        if self.config is None:
+            self.config = dict()
+
+        self.check_interval = float(self.config.get('interval', 30.0))
+
+        first_mon = teuthology.get_first_mon(ctx, config)
+        remote = ctx.cluster.only(first_mon).remotes.keys()[0]
+        proc = remote.run(
+            args=[
+                'sudo',
+                'ceph-mon',
+                '-i', first_mon[4:],
+                '--show-config-value', 'mon_clock_drift_allowed'
+                ], stdout=StringIO(), wait=True
+                )
+        self.max_skew = self.config.get('max-skew', float(proc.stdout.getvalue()))
+
+        self.expect_skew = self.config.get('expect-skew', False)
+        self.never_fail = self.config.get('never-fail', False)
+        self.at_least_once = self.config.get('at-least-once', True)
+        self.at_least_once_timeout = self.config.get('at-least-once-timeout', 600.0)
+
+    def info(self, x):
+        """
+        locally define logger for info messages
+        """
+        self.logger.info(x)
+
+    def warn(self, x):
+        """
+        locally define logger for warnings
+        """
+        self.logger.warn(x)
+
+    def debug(self, x):
+        """
+        locally define logger for debug messages
+        """
+        self.logger.info(x)
+        self.logger.debug(x)
+
+    def finish(self):
+        """
+        Break out of the do_check loop.
+        """
+        self.stopping = True
+
+    def sleep_interval(self):
+        """
+        If a sleep interval is set, sleep for that amount of time.
+        """
+        if self.check_interval > 0.0:
+            self.debug('sleeping for {s} seconds'.format(
+                s=self.check_interval))
+            time.sleep(self.check_interval)
+
+    def print_skews(self, skews):
+        """
+        Display skew values.
+        """
+        total = len(skews)
+        if total > 0:
+            self.info('---------- found {n} skews ----------'.format(n=total))
+            for mon_id, values in skews.iteritems():
+                self.info('mon.{id}: {v}'.format(id=mon_id, v=values))
+            self.info('-------------------------------------')
+        else:
+            self.info('---------- no skews were found ----------')
+
+    def do_check(self):
+        """
+        Clock skew checker.  Loops until finish() is called.
+        """
+        self.info('start checking for clock skews')
+        skews = dict()
+        ran_once = False
+        
+        started_on = None
+
+        while not self.stopping or (self.at_least_once and not ran_once):
+
+            if self.at_least_once and not ran_once and self.stopping:
+                if started_on is None:
+                    self.info('kicking-off timeout (if any)')
+                    started_on = time.time()
+                elif self.at_least_once_timeout > 0.0:
+                    assert time.time() - started_on < self.at_least_once_timeout, \
+                        'failed to obtain a timecheck before timeout expired'
+
+            quorum_size = len(teuthology.get_mon_names(self.ctx))
+            self.manager.wait_for_mon_quorum_size(quorum_size)
+
+            health = self.manager.get_mon_health(True)
+            timechecks = health['timechecks']
+
+            clean_check = False
+
+            if timechecks['round_status'] == 'finished':
+                assert (timechecks['round'] % 2) == 0, \
+                    'timecheck marked as finished but round ' \
+                    'disagrees (r {r})'.format(
+                        r=timechecks['round'])
+                clean_check = True
+            else:
+                assert timechecks['round_status'] == 'on-going', \
+                        'timecheck status expected \'on-going\' ' \
+                        'but found \'{s}\' instead'.format(
+                            s=timechecks['round_status'])
+                if 'mons' in timechecks.keys() and len(timechecks['mons']) > 1:
+                    self.info('round still on-going, but there are available reports')
+                else:
+                    self.info('no timechecks available just yet')
+                    self.sleep_interval()
+                    continue
+
+            assert len(timechecks['mons']) > 1, \
+                'there are not enough reported timechecks; ' \
+                'expected > 1 found {n}'.format(n=len(timechecks['mons']))
+
+            for check in timechecks['mons']:
+                mon_skew = float(check['skew'])
+                mon_health = check['health']
+                mon_id = check['name']
+                if abs(mon_skew) > self.max_skew:
+                    assert mon_health == 'HEALTH_WARN', \
+                        'mon.{id} health is \'{health}\' but skew {s} > max {ms}'.format(
+                            id=mon_id,health=mon_health,s=abs(mon_skew),ms=self.max_skew)
+
+                    log_str = 'mon.{id} with skew {s} > max {ms}'.format(
+                        id=mon_id,s=abs(mon_skew),ms=self.max_skew)
+
+                    """ add to skew list """
+                    details = check['details']
+                    skews[mon_id] = {'skew': mon_skew, 'details': details}
+
+                    if self.expect_skew:
+                        self.info('expected skew: {str}'.format(str=log_str))
+                    else:
+                        self.warn('unexpected skew: {str}'.format(str=log_str))
+
+            if clean_check or (self.expect_skew and len(skews) > 0):
+                ran_once = True
+                self.print_skews(skews)
+            self.sleep_interval()
+
+        total = len(skews)
+        self.print_skews(skews)
+
+        error_str = ''
+        found_error = False
+
+        if self.expect_skew:
+            if total == 0:
+                error_str = 'We were expecting a skew, but none was found!'
+                found_error = True
+        else:
+            if total > 0:
+                error_str = 'We were not expecting a skew, but we did find it!'
+                found_error = True
+
+        if found_error:
+            self.info(error_str)
+            if not self.never_fail:
+                assert False, error_str
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Use clas ClockSkewCheck to check for clock skews on the monitors.
+    This task will spawn a thread running ClockSkewCheck's do_check().
+
+    All the configuration will be directly handled by ClockSkewCheck,
+    so please refer to the class documentation for further information.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'mon_clock_skew_check task only accepts a dict for configuration'
+    log.info('Beginning mon_clock_skew_check...')
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    skew_check = ClockSkewCheck(ctx,
+        manager, config,
+        logger=log.getChild('mon_clock_skew_check'))
+    skew_check_thread = gevent.spawn(skew_check.do_check)
+    try:
+        yield
+    finally:
+        log.info('joining mon_clock_skew_check')
+        skew_check.finish()
+        skew_check_thread.get()
+
+
diff --git a/tasks/mon_recovery.py b/tasks/mon_recovery.py
new file mode 100644 (file)
index 0000000..bfa2cdf
--- /dev/null
@@ -0,0 +1,80 @@
+"""
+Monitor recovery
+"""
+import logging
+import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test monitor recovery.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)]
+    log.info("mon ids = %s" % mons)
+
+    manager.wait_for_mon_quorum_size(len(mons))
+
+    log.info('verifying all monitors are in the quorum')
+    for m in mons:
+        s = manager.get_mon_status(m)
+        assert s['state'] == 'leader' or s['state'] == 'peon'
+        assert len(s['quorum']) == len(mons)
+
+    log.info('restarting each monitor in turn')
+    for m in mons:
+        # stop a monitor
+        manager.kill_mon(m)
+        manager.wait_for_mon_quorum_size(len(mons) - 1)
+
+        # restart
+        manager.revive_mon(m)
+        manager.wait_for_mon_quorum_size(len(mons))
+
+    # in forward and reverse order,
+    rmons = mons
+    rmons.reverse()
+    for mons in mons, rmons:
+        log.info('stopping all monitors')
+        for m in mons:
+            manager.kill_mon(m)
+
+        log.info('forming a minimal quorum for %s, then adding monitors' % mons)
+        qnum = (len(mons) / 2) + 1
+        num = 0
+        for m in mons:
+            manager.revive_mon(m)
+            num += 1
+            if num >= qnum:
+                manager.wait_for_mon_quorum_size(num)
+
+    # on both leader and non-leader ranks...
+    for rank in [0, 1]:
+        # take one out
+        log.info('removing mon %s' % mons[rank])
+        manager.kill_mon(mons[rank])
+        manager.wait_for_mon_quorum_size(len(mons) - 1)
+
+        log.info('causing some monitor log activity')
+        m = 30
+        for n in range(1, m):
+            manager.raw_cluster_cmd('log', '%d of %d' % (n, m))
+
+        log.info('adding mon %s back in' % mons[rank])
+        manager.revive_mon(mons[rank])
+        manager.wait_for_mon_quorum_size(len(mons))
diff --git a/tasks/mon_thrash.py b/tasks/mon_thrash.py
new file mode 100644 (file)
index 0000000..b45aaa9
--- /dev/null
@@ -0,0 +1,343 @@
+"""
+Monitor thrash
+"""
+import logging
+import contextlib
+import ceph_manager
+import random
+import time
+import gevent
+import json
+import math
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def _get_mons(ctx):
+    """
+    Get monitor names from the context value.
+    """
+    mons = [f[len('mon.'):] for f in teuthology.get_mon_names(ctx)]
+    return mons
+
+class MonitorThrasher:
+    """
+    How it works::
+
+    - pick a monitor
+    - kill it
+    - wait for quorum to be formed
+    - sleep for 'revive_delay' seconds
+    - revive monitor
+    - wait for quorum to be formed
+    - sleep for 'thrash_delay' seconds
+
+    Options::
+
+    seed                Seed to use on the RNG to reproduce a previous
+                        behaviour (default: None; i.e., not set)
+    revive_delay        Number of seconds to wait before reviving
+                        the monitor (default: 10)
+    thrash_delay        Number of seconds to wait in-between
+                        test iterations (default: 0)
+    thrash_store        Thrash monitor store before killing the monitor being thrashed (default: False)
+    thrash_store_probability  Probability of thrashing a monitor's store
+                              (default: 50)
+    thrash_many         Thrash multiple monitors instead of just one. If
+                        'maintain-quorum' is set to False, then we will
+                        thrash up to as many monitors as there are
+                        available. (default: False)
+    maintain_quorum     Always maintain quorum, taking care on how many
+                        monitors we kill during the thrashing. If we
+                        happen to only have one or two monitors configured,
+                        if this option is set to True, then we won't run
+                        this task as we cannot guarantee maintenance of
+                        quorum. Setting it to false however would allow the
+                        task to run with as many as just one single monitor.
+                        (default: True)
+    freeze_mon_probability: how often to freeze the mon instead of killing it,
+                        in % (default: 0)
+    freeze_mon_duration: how many seconds to freeze the mon (default: 15)
+    scrub               Scrub after each iteration (default: True)
+
+    Note: if 'store-thrash' is set to True, then 'maintain-quorum' must also
+          be set to True.
+
+    For example::
+
+    tasks:
+    - ceph:
+    - mon_thrash:
+        revive_delay: 20
+        thrash_delay: 1
+        thrash_store: true
+        thrash_store_probability: 40
+        seed: 31337
+        maintain_quorum: true
+        thrash_many: true
+    - ceph-fuse:
+    - workunit:
+        clients:
+          all:
+            - mon/workloadgen.sh
+    """
+    def __init__(self, ctx, manager, config, logger):
+        self.ctx = ctx
+        self.manager = manager
+        self.manager.wait_for_clean()
+
+        self.stopping = False
+        self.logger = logger
+        self.config = config
+
+        if self.config is None:
+            self.config = dict()
+
+        """ Test reproducibility """
+        self.random_seed = self.config.get('seed', None)
+
+        if self.random_seed is None:
+            self.random_seed = int(time.time())
+
+        self.rng = random.Random()
+        self.rng.seed(int(self.random_seed))
+
+        """ Monitor thrashing """
+        self.revive_delay = float(self.config.get('revive_delay', 10.0))
+        self.thrash_delay = float(self.config.get('thrash_delay', 0.0))
+
+        self.thrash_many = self.config.get('thrash_many', False)
+        self.maintain_quorum = self.config.get('maintain_quorum', True)
+
+        self.scrub = self.config.get('scrub', True)
+
+        self.freeze_mon_probability = float(self.config.get('freeze_mon_probability', 10))
+        self.freeze_mon_duration = float(self.config.get('freeze_mon_duration', 15.0))
+
+        assert self.max_killable() > 0, \
+            'Unable to kill at least one monitor with the current config.'
+
+        """ Store thrashing """
+        self.store_thrash = self.config.get('store_thrash', False)
+        self.store_thrash_probability = int(
+            self.config.get('store_thrash_probability', 50))
+        if self.store_thrash:
+            assert self.store_thrash_probability > 0, \
+                'store_thrash is set, probability must be > 0'
+            assert self.maintain_quorum, \
+                'store_thrash = true must imply maintain_quorum = true'
+
+        self.thread = gevent.spawn(self.do_thrash)
+
+    def log(self, x):
+        """
+        locally log info messages
+        """
+        self.logger.info(x)
+
+    def do_join(self):
+        """
+        Break out of this processes thrashing loop.
+        """
+        self.stopping = True
+        self.thread.get()
+
+    def should_thrash_store(self):
+        """
+        If allowed, indicate that we should thrash a certain percentage of
+        the time as determined by the store_thrash_probability value.
+        """
+        if not self.store_thrash:
+            return False
+        return self.rng.randrange(0, 101) < self.store_thrash_probability
+
+    def thrash_store(self, mon):
+        """
+        Thrash the monitor specified.
+        :param mon: monitor to thrash
+        """
+        addr = self.ctx.ceph.conf['mon.%s' % mon]['mon addr']
+        self.log('thrashing mon.{id}@{addr} store'.format(id=mon, addr=addr))
+        out = self.manager.raw_cluster_cmd('-m', addr, 'sync', 'force')
+        j = json.loads(out)
+        assert j['ret'] == 0, \
+            'error forcing store sync on mon.{id}:\n{ret}'.format(
+                id=mon,ret=out)
+
+    def should_freeze_mon(self):
+        """
+        Indicate that we should freeze a certain percentago of the time
+        as determined by the freeze_mon_probability value.
+        """
+        return self.rng.randrange(0, 101) < self.freeze_mon_probability
+
+    def freeze_mon(self, mon):
+        """
+        Send STOP signal to freeze the monitor.
+        """
+        log.info('Sending STOP to mon %s', mon)
+        self.manager.signal_mon(mon, 19)  # STOP
+
+    def unfreeze_mon(self, mon):
+        """
+        Send CONT signal to unfreeze the monitor.
+        """
+        log.info('Sending CONT to mon %s', mon)
+        self.manager.signal_mon(mon, 18)  # CONT
+
+    def kill_mon(self, mon):
+        """
+        Kill the monitor specified
+        """
+        self.log('killing mon.{id}'.format(id=mon))
+        self.manager.kill_mon(mon)
+
+    def revive_mon(self, mon):
+        """
+        Revive the monitor specified
+        """
+        self.log('killing mon.{id}'.format(id=mon))
+        self.log('reviving mon.{id}'.format(id=mon))
+        self.manager.revive_mon(mon)
+
+    def max_killable(self):
+        """
+        Return the maximum number of monitors we can kill.
+        """
+        m = len(_get_mons(self.ctx))
+        if self.maintain_quorum:
+            return max(math.ceil(m/2.0)-1, 0)
+        else:
+            return m
+
+    def do_thrash(self):
+        """
+        Cotinuously loop and thrash the monitors.
+        """
+        self.log('start thrashing')
+        self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\
+                   'thrash many: {tm}, maintain quorum: {mq} '\
+                   'store thrash: {st}, probability: {stp} '\
+                   'freeze mon: prob {fp} duration {fd}'.format(
+                s=self.random_seed,r=self.revive_delay,t=self.thrash_delay,
+                tm=self.thrash_many, mq=self.maintain_quorum,
+                st=self.store_thrash,stp=self.store_thrash_probability,
+                fp=self.freeze_mon_probability,fd=self.freeze_mon_duration,
+                ))
+
+        while not self.stopping:
+            mons = _get_mons(self.ctx)
+            self.manager.wait_for_mon_quorum_size(len(mons))
+            self.log('making sure all monitors are in the quorum')
+            for m in mons:
+                s = self.manager.get_mon_status(m)
+                assert s['state'] == 'leader' or s['state'] == 'peon'
+                assert len(s['quorum']) == len(mons)
+
+            kill_up_to = self.rng.randrange(1, self.max_killable()+1)
+            mons_to_kill = self.rng.sample(mons, kill_up_to)
+            self.log('monitors to thrash: {m}'.format(m=mons_to_kill))
+
+            mons_to_freeze = []
+            for mon in mons:
+                if mon in mons_to_kill:
+                    continue
+                if self.should_freeze_mon():
+                    mons_to_freeze.append(mon)
+            self.log('monitors to freeze: {m}'.format(m=mons_to_freeze))
+
+            for mon in mons_to_kill:
+                self.log('thrashing mon.{m}'.format(m=mon))
+
+                """ we only thrash stores if we are maintaining quorum """
+                if self.should_thrash_store() and self.maintain_quorum:
+                    self.thrash_store(mon)
+
+                self.kill_mon(mon)
+
+            if mons_to_freeze:
+                for mon in mons_to_freeze:
+                    self.freeze_mon(mon)
+                self.log('waiting for {delay} secs to unfreeze mons'.format(
+                    delay=self.freeze_mon_duration))
+                time.sleep(self.freeze_mon_duration)
+                for mon in mons_to_freeze:
+                    self.unfreeze_mon(mon)
+
+            if self.maintain_quorum:
+                self.manager.wait_for_mon_quorum_size(len(mons)-len(mons_to_kill))
+                for m in mons:
+                    if m in mons_to_kill:
+                        continue
+                    s = self.manager.get_mon_status(m)
+                    assert s['state'] == 'leader' or s['state'] == 'peon'
+                    assert len(s['quorum']) == len(mons)-len(mons_to_kill)
+
+            self.log('waiting for {delay} secs before reviving monitors'.format(
+                delay=self.revive_delay))
+            time.sleep(self.revive_delay)
+
+            for mon in mons_to_kill:
+                self.revive_mon(mon)
+            # do more freezes
+            if mons_to_freeze:
+                for mon in mons_to_freeze:
+                    self.freeze_mon(mon)
+                self.log('waiting for {delay} secs to unfreeze mons'.format(
+                    delay=self.freeze_mon_duration))
+                time.sleep(self.freeze_mon_duration)
+                for mon in mons_to_freeze:
+                    self.unfreeze_mon(mon)
+
+            self.manager.wait_for_mon_quorum_size(len(mons))
+            for m in mons:
+                s = self.manager.get_mon_status(m)
+                assert s['state'] == 'leader' or s['state'] == 'peon'
+                assert len(s['quorum']) == len(mons)
+
+            if self.scrub:
+                self.log('triggering scrub')
+                try:
+                    self.manager.raw_cluster_cmd('scrub')
+                except Exception:
+                    log.exception("Saw exception while triggering scrub")
+
+            if self.thrash_delay > 0.0:
+                self.log('waiting for {delay} secs before continuing thrashing'.format(
+                    delay=self.thrash_delay))
+                time.sleep(self.thrash_delay)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Stress test the monitor by thrashing them while another task/workunit
+    is running.
+
+    Please refer to MonitorThrasher class for further information on the
+    available options.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'mon_thrash task only accepts a dict for configuration'
+    assert len(_get_mons(ctx)) > 2, \
+        'mon_thrash task requires at least 3 monitors'
+    log.info('Beginning mon_thrash...')
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+    thrash_proc = MonitorThrasher(ctx,
+        manager, config,
+        logger=log.getChild('mon_thrasher'))
+    try:
+        log.debug('Yielding')
+        yield
+    finally:
+        log.info('joining mon_thrasher')
+        thrash_proc.do_join()
+        mons = _get_mons(ctx)
+        manager.wait_for_mon_quorum_size(len(mons))
diff --git a/tasks/multibench.py b/tasks/multibench.py
new file mode 100644 (file)
index 0000000..bc22b47
--- /dev/null
@@ -0,0 +1,57 @@
+"""
+Multibench testing
+"""
+import contextlib
+import logging
+import radosbench
+import time
+import copy
+import gevent
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run multibench
+
+    The config should be as follows:
+
+    multibench:
+        time: <seconds to run total>
+        segments: <number of concurrent benches>
+        radosbench: <config for radosbench>
+
+    example:
+
+    tasks:
+    - ceph:
+    - multibench:
+        clients: [client.0]
+        time: 360
+    - interactive:
+    """
+    log.info('Beginning multibench...')
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+
+    def run_one(num):
+    """Run test spawn from gevent"""
+        start = time.time()
+        benchcontext = copy.copy(config.get('radosbench'))
+        iterations = 0
+        while time.time() - start < int(config.get('time', 600)):
+            log.info("Starting iteration %s of segment %s"%(iterations, num))
+            benchcontext['pool'] = str(num) + "-" + str(iterations)
+            with radosbench.task(ctx, benchcontext):
+                time.sleep()
+            iterations += 1
+    log.info("Starting %s threads"%(str(config.get('segments', 3)),))
+    segments = [
+        gevent.spawn(run_one, i) 
+        for i in range(0, int(config.get('segments', 3)))]
+
+    try:
+        yield
+    finally:
+        [i.get() for i in segments]
diff --git a/tasks/object_source_down.py b/tasks/object_source_down.py
new file mode 100644 (file)
index 0000000..1696c55
--- /dev/null
@@ -0,0 +1,103 @@
+"""
+Test Object locations going down
+"""
+import logging
+import ceph_manager
+from teuthology import misc as teuthology
+from teuthology.task_util.rados import rados
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test handling of object location going down
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'lost_unfound task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 3:
+        manager.sleep(10)
+    manager.wait_for_clean()
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+
+    # take 0, 1 out
+    manager.mark_out_osd(0)
+    manager.mark_out_osd(1)
+    manager.wait_for_clean()
+
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.0',
+            'injectargs',
+            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
+            )
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.1',
+            'injectargs',
+            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
+            )
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.2',
+            'injectargs',
+            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
+            )
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.3',
+            'injectargs',
+            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
+            )
+
+    # kludge to make sure they get a map
+    rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])
+
+    # create old objects
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
+
+    manager.mark_out_osd(3)
+    manager.wait_till_active()
+
+    manager.mark_in_osd(0)
+    manager.wait_till_active()
+
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+
+    manager.mark_out_osd(2)
+    manager.wait_till_active()
+
+    # bring up 1
+    manager.mark_in_osd(1)
+    manager.wait_till_active()
+
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    log.info("Getting unfound objects")
+    unfound = manager.get_num_unfound_objects()
+    assert not unfound
+
+    manager.kill_osd(2)
+    manager.mark_down_osd(2)
+    manager.kill_osd(3)
+    manager.mark_down_osd(3)
+
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    log.info("Getting unfound objects")
+    unfound = manager.get_num_unfound_objects()
+    assert unfound
diff --git a/tasks/omapbench.py b/tasks/omapbench.py
new file mode 100644 (file)
index 0000000..7d25354
--- /dev/null
@@ -0,0 +1,83 @@
+"""
+Run omapbench executable within teuthology
+"""
+import contextlib
+import logging
+
+from ..orchestra import run
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run omapbench
+
+    The config should be as follows::
+
+                 omapbench:
+                     clients: [client list]
+                     threads: <threads at once>
+                     objects: <number of objects to write>
+                     entries: <number of entries per object map>
+                     keysize: <number of characters per object map key>
+                     valsize: <number of characters per object map val>
+                     increment: <interval to show in histogram (in ms)>
+                     omaptype: <how the omaps should be generated>
+
+    example::
+
+                 tasks:
+                 - ceph:
+                 - omapbench:
+                     clients: [client.0]
+                     threads: 30
+                     objects: 1000
+                     entries: 10
+                     keysize: 10
+                     valsize: 100
+                     increment: 100
+                     omaptype: uniform
+                 - interactive:
+    """
+    log.info('Beginning omapbench...')
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+    omapbench = {}
+    testdir = teuthology.get_testdir(ctx)
+    print(str(config.get('increment',-1)))
+    for role in config.get('clients', ['client.0']):
+        assert isinstance(role, basestring)
+        PREFIX = 'client.'
+        assert role.startswith(PREFIX)
+        id_ = role[len(PREFIX):]
+        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
+        proc = remote.run(
+            args=[
+                "/bin/sh", "-c",
+                " ".join(['adjust-ulimits',
+                          'ceph-coverage',
+                          '{tdir}/archive/coverage',
+                          'omapbench',
+                          '--name', role[len(PREFIX):],
+                          '-t', str(config.get('threads', 30)),
+                          '-o', str(config.get('objects', 1000)),
+                          '--entries', str(config.get('entries',10)),
+                          '--keysize', str(config.get('keysize',10)),
+                          '--valsize', str(config.get('valsize',1000)),
+                          '--inc', str(config.get('increment',10)),
+                          '--omaptype', str(config.get('omaptype','uniform'))
+                          ]).format(tdir=testdir),
+                ],
+            logger=log.getChild('omapbench.{id}'.format(id=id_)),
+            stdin=run.PIPE,
+            wait=False
+            )
+        omapbench[id_] = proc
+
+    try:
+        yield
+    finally:
+        log.info('joining omapbench')
+        run.wait(omapbench.itervalues())
diff --git a/tasks/osd_backfill.py b/tasks/osd_backfill.py
new file mode 100644 (file)
index 0000000..d80ea22
--- /dev/null
@@ -0,0 +1,105 @@
+"""
+Osd backfill test
+"""
+import logging
+import ceph_manager
+import time
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+
+def rados_start(ctx, remote, cmd):
+    """
+    Run a remote rados command (currently used to only write data)
+    """
+    log.info("rados %s" % ' '.join(cmd))
+    testdir = teuthology.get_testdir(ctx)
+    pre = [
+        'adjust-ulimits',
+        'ceph-coverage',
+        '{tdir}/archive/coverage'.format(tdir=testdir),
+        'rados',
+        ];
+    pre.extend(cmd)
+    proc = remote.run(
+        args=pre,
+        wait=False,
+        )
+    return proc
+
+def task(ctx, config):
+    """
+    Test backfill
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'thrashosds task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+    
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    log.info('num_osds is %s' % num_osds)
+    assert num_osds == 3        
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 3:
+        manager.sleep(10)
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_for_clean()
+
+    # write some data
+    p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096',
+                          '--no-cleanup'])
+    err = p.exitstatus.get();
+    log.info('err is %d' % err)
+
+    # mark osd.0 out to trigger a rebalance/backfill
+    manager.mark_out_osd(0)
+
+    # also mark it down to it won't be included in pg_temps
+    manager.kill_osd(0)
+    manager.mark_down_osd(0)
+
+    # wait for everything to peer and be happy...
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_for_recovery()
+
+    # write some new data
+    p = rados_start(ctx, mon, ['-p', 'data', 'bench', '30', 'write', '-b', '4096',
+                          '--no-cleanup'])
+
+    time.sleep(15)
+
+    # blackhole + restart osd.1
+    # this triggers a divergent backfill target
+    manager.blackhole_kill_osd(1)
+    time.sleep(2)
+    manager.revive_osd(1)
+
+    # wait for our writes to complete + succeed
+    err = p.exitstatus.get()
+    log.info('err is %d' % err)
+
+    # cluster must recover
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_for_recovery()
+
+    # re-add osd.0
+    manager.revive_osd(0)
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_for_clean()
+
+
diff --git a/tasks/osd_failsafe_enospc.py b/tasks/osd_failsafe_enospc.py
new file mode 100644 (file)
index 0000000..39b5b5c
--- /dev/null
@@ -0,0 +1,218 @@
+"""
+Handle osdfailsafe configuration settings (nearfull ratio and full ratio)
+"""
+from cStringIO import StringIO
+import logging
+import time
+
+import ceph_manager
+from ..orchestra import run
+from teuthology.task_util.rados import rados
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio
+    configuration settings
+
+    In order for test to pass must use log-whitelist as follows
+
+        tasks:
+            - chef:
+            - install:
+            - ceph:
+                log-whitelist: ['OSD near full', 'OSD full dropping all updates']
+            - osd_failsafe_enospc:
+
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'osd_failsafe_enospc task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+    ctx.manager = manager
+
+    # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding
+    sleep_time = 50
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+    dummyfile2 = '/etc/resolv.conf'
+
+    # create 1 pg pool with 1 rep which can only be on osd.0
+    osds = manager.get_osd_dump()
+    for osd in osds:
+        if osd['osd'] != 0:
+            manager.mark_out_osd(osd['osd'])
+
+    log.info('creating pool foo')
+    manager.create_pool("foo")
+    manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1')
+
+    # State NONE -> NEAR
+    log.info('1. Verify warning messages when exceeding nearfull_ratio')
+
+    proc = mon.run(
+             args=[
+                'daemon-helper',
+                'kill',
+                'ceph', '-w'
+             ],
+             stdin=run.PIPE,
+             stdout=StringIO(),
+             wait=False,
+        )
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001')
+
+    time.sleep(sleep_time)
+    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+    proc.exitstatus.get()
+
+    lines = proc.stdout.getvalue().split('\n')
+
+    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
+    assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count
+    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
+
+    # State NEAR -> FULL
+    log.info('2. Verify error messages when exceeding full_ratio')
+
+    proc = mon.run(
+             args=[
+                'daemon-helper',
+                'kill',
+                'ceph', '-w'
+             ],
+             stdin=run.PIPE,
+             stdout=StringIO(),
+             wait=False,
+        )
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
+
+    time.sleep(sleep_time)
+    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+    proc.exitstatus.get()
+
+    lines = proc.stdout.getvalue().split('\n')
+
+    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+    assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count
+
+    log.info('3. Verify write failure when exceeding full_ratio')
+
+    # Write data should fail
+    ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile])
+    assert ret != 0, 'Expected write failure but it succeeded with exit status 0'
+
+    # Put back default
+    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
+    time.sleep(10)
+
+    # State FULL -> NEAR
+    log.info('4. Verify write success when NOT exceeding full_ratio')
+
+    # Write should succeed
+    ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2])
+    assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret
+
+    log.info('5. Verify warning messages again when exceeding nearfull_ratio')
+
+    proc = mon.run(
+             args=[
+                'daemon-helper',
+                'kill',
+                'ceph', '-w'
+             ],
+             stdin=run.PIPE,
+             stdout=StringIO(),
+             wait=False,
+        )
+
+    time.sleep(sleep_time)
+    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+    proc.exitstatus.get()
+
+    lines = proc.stdout.getvalue().split('\n')
+
+    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
+    assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count
+    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90')
+    time.sleep(10)
+
+    # State NONE -> FULL
+    log.info('6. Verify error messages again when exceeding full_ratio')
+
+    proc = mon.run(
+             args=[
+                'daemon-helper',
+                'kill',
+                'ceph', '-w'
+             ],
+             stdin=run.PIPE,
+             stdout=StringIO(),
+             wait=False,
+        )
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
+
+    time.sleep(sleep_time)
+    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+    proc.exitstatus.get()
+
+    lines = proc.stdout.getvalue().split('\n')
+
+    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
+    assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
+    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+    assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count
+
+    # State FULL -> NONE
+    log.info('7. Verify no messages settings back to default')
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
+    time.sleep(10)
+
+    proc = mon.run(
+             args=[
+                'daemon-helper',
+                'kill',
+                'ceph', '-w'
+             ],
+             stdin=run.PIPE,
+             stdout=StringIO(),
+             wait=False,
+        )
+
+    time.sleep(sleep_time)
+    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+    proc.exitstatus.get()
+
+    lines = proc.stdout.getvalue().split('\n')
+
+    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
+    assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
+    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
+
+    log.info('Test Passed')
+
+    # Bring all OSDs back in
+    manager.remove_pool("foo")
+    for osd in osds:
+        if osd['osd'] != 0:
+            manager.mark_in_osd(osd['osd'])
diff --git a/tasks/osd_recovery.py b/tasks/osd_recovery.py
new file mode 100644 (file)
index 0000000..1ff1733
--- /dev/null
@@ -0,0 +1,206 @@
+"""
+osd recovery
+"""
+import logging
+import ceph_manager
+import time
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+
+def rados_start(testdir, remote, cmd):
+    """
+    Run a remote rados command (currently used to only write data)
+    """
+    log.info("rados %s" % ' '.join(cmd))
+    pre = [
+        'adjust-ulimits',
+        'ceph-coverage',
+        '{tdir}/archive/coverage'.format(tdir=testdir),
+        'rados',
+        ];
+    pre.extend(cmd)
+    proc = remote.run(
+        args=pre,
+        wait=False,
+        )
+    return proc
+
+def task(ctx, config):
+    """
+    Test (non-backfill) recovery
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'task only accepts a dict for configuration'
+    testdir = teuthology.get_testdir(ctx)
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+    
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    log.info('num_osds is %s' % num_osds)
+    assert num_osds == 3        
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 3:
+        manager.sleep(10)
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_for_clean()
+
+    # test some osdmap flags
+    manager.raw_cluster_cmd('osd', 'set', 'noin')
+    manager.raw_cluster_cmd('osd', 'set', 'noout')
+    manager.raw_cluster_cmd('osd', 'set', 'noup')
+    manager.raw_cluster_cmd('osd', 'set', 'nodown')
+    manager.raw_cluster_cmd('osd', 'unset', 'noin')
+    manager.raw_cluster_cmd('osd', 'unset', 'noout')
+    manager.raw_cluster_cmd('osd', 'unset', 'noup')
+    manager.raw_cluster_cmd('osd', 'unset', 'nodown')
+
+    # write some new data
+    p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '60', 'write', '-b', '4096',
+                          '--no-cleanup'])
+
+    time.sleep(15)
+
+    # trigger a divergent target:
+    #  blackhole + restart osd.1 (shorter log)
+    manager.blackhole_kill_osd(1)
+    #  kill osd.2 (longer log... we'll make it divergent below)
+    manager.kill_osd(2)
+    time.sleep(2)
+    manager.revive_osd(1)
+
+    # wait for our writes to complete + succeed
+    err = p.exitstatus.get()
+    log.info('err is %d' % err)
+
+    # cluster must repeer
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.wait_for_active_or_down()
+
+    # write some more (make sure osd.2 really is divergent)
+    p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096'])
+    p.exitstatus.get();
+
+    # revive divergent osd
+    manager.revive_osd(2)
+
+    while len(manager.get_osd_status()['up']) < 3:
+        log.info('waiting a bit...')
+        time.sleep(2)
+    log.info('3 are up!')
+
+    # cluster must recover
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_for_clean()
+
+
+def test_incomplete_pgs(ctx, config):
+    """
+    Test handling of incomplete pgs.  Requires 4 osds.
+    """
+    testdir = teuthology.get_testdir(ctx)
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    log.info('num_osds is %s' % num_osds)
+    assert num_osds == 4
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 4:
+        time.sleep(10)
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
+    manager.wait_for_clean()
+
+    log.info('Testing incomplete pgs...')
+
+    for i in range(4):
+        manager.set_config(
+            i,
+            osd_recovery_delay_start=1000)
+
+    # move data off of osd.0, osd.1
+    manager.raw_cluster_cmd('osd', 'out', '0', '1')
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
+    manager.wait_for_clean()
+
+    # lots of objects in rbd (no pg log, will backfill)
+    p = rados_start(testdir, mon,
+                    ['-p', 'rbd', 'bench', '60', 'write', '-b', '1',
+                     '--no-cleanup'])
+    p.exitstatus.get()
+
+    # few objects in metadata pool (with pg log, normal recovery)
+    for f in range(1, 20):
+        p = rados_start(testdir, mon, ['-p', 'metadata', 'put',
+                              'foo.%d' % f, '/etc/passwd'])
+        p.exitstatus.get()
+
+    # move it back
+    manager.raw_cluster_cmd('osd', 'in', '0', '1')
+    manager.raw_cluster_cmd('osd', 'out', '2', '3')
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
+    manager.wait_for_active()
+
+    assert not manager.is_clean()
+    assert not manager.is_recovered()
+
+    # kill 2 + 3
+    log.info('stopping 2,3')
+    manager.kill_osd(2)
+    manager.kill_osd(3)
+    log.info('...')
+    manager.raw_cluster_cmd('osd', 'down', '2', '3')
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.wait_for_active_or_down()
+
+    assert manager.get_num_down() > 0
+
+    # revive 2 + 3
+    manager.revive_osd(2)
+    manager.revive_osd(3)
+    while len(manager.get_osd_status()['up']) < 4:
+        log.info('waiting a bit...')
+        time.sleep(2)
+    log.info('all are up!')
+
+    for i in range(4):
+        manager.kick_recovery_wq(i)
+
+    # cluster must recover
+    manager.wait_for_clean()
diff --git a/tasks/peer.py b/tasks/peer.py
new file mode 100644 (file)
index 0000000..8006c38
--- /dev/null
@@ -0,0 +1,96 @@
+"""
+Peer test (Single test, not much configurable here)
+"""
+import logging
+import json
+
+import ceph_manager
+from teuthology import misc as teuthology
+from teuthology.task_util.rados import rados
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test peering.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'peer task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 3:
+        manager.sleep(10)
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_for_clean()
+
+    for i in range(3):
+        manager.set_config(
+            i,
+            osd_recovery_delay_start=120)
+
+    # take on osd down
+    manager.kill_osd(2)
+    manager.mark_down_osd(2)
+
+    # kludge to make sure they get a map
+    rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-'])
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.wait_for_recovery()
+
+    # kill another and revive 2, so that some pgs can't peer.
+    manager.kill_osd(1)
+    manager.mark_down_osd(1)
+    manager.revive_osd(2)
+    manager.wait_till_osd_is_up(2)
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+
+    manager.wait_for_active_or_down()
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+
+    # look for down pgs
+    num_down_pgs = 0
+    pgs = manager.get_pg_stats()
+    for pg in pgs:
+        out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query')
+       log.debug("out string %s",out)
+        j = json.loads(out)
+        log.info("pg is %s, query json is %s", pg, j)
+
+        if pg['state'].count('down'):
+            num_down_pgs += 1
+            # verify that it is blocked on osd.1
+            rs = j['recovery_state']
+            assert len(rs) > 0
+            assert rs[0]['name'] == 'Started/Primary/Peering/GetInfo'
+            assert rs[1]['name'] == 'Started/Primary/Peering'
+            assert rs[1]['blocked']
+            assert rs[1]['down_osds_we_would_probe'] == [1]
+            assert len(rs[1]['peering_blocked_by']) == 1
+            assert rs[1]['peering_blocked_by'][0]['osd'] == 1
+
+    assert num_down_pgs > 0
+
+    # bring it all back
+    manager.revive_osd(1)
+    manager.wait_till_osd_is_up(1)
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_for_clean()
diff --git a/tasks/peering_speed_test.py b/tasks/peering_speed_test.py
new file mode 100644 (file)
index 0000000..6c885f1
--- /dev/null
@@ -0,0 +1,93 @@
+"""
+Remotely run peering tests.
+"""
+import logging
+import time
+from teuthology import misc as teuthology
+import ceph_manager
+
+log = logging.getLogger(__name__)
+
+from args import argify
+
+POOLNAME = "POOLNAME"
+ARGS = [
+    ('num_pgs', 'number of pgs to create', 256, int),
+    ('max_time', 'seconds to complete peering', 0, int),
+    ('runs', 'trials to run', 10, int),
+    ('num_objects', 'objects to create', 256 * 1024, int),
+    ('object_size', 'size in bytes for objects', 64, int),
+    ('creation_time_limit', 'time limit for pool population', 60*60, int),
+    ('create_threads', 'concurrent writes for create', 256, int)
+    ]
+
+def setup(ctx, config):
+    """
+    Setup peering test on remotes.
+    """
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+    ctx.manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+    ctx.manager.clear_pools()
+    ctx.manager.create_pool(POOLNAME, config.num_pgs)
+    log.info("populating pool")
+    ctx.manager.rados_write_objects(
+        POOLNAME,
+        config.num_objects,
+        config.object_size,
+        config.creation_time_limit,
+        config.create_threads)
+    log.info("done populating pool")
+
+def do_run(ctx, config):
+    """
+    Perform the test.
+    """
+    start = time.time()
+    # mark in osd
+    ctx.manager.mark_in_osd(0)
+    log.info("writing out objects")
+    ctx.manager.rados_write_objects(
+        POOLNAME,
+        config.num_pgs, # write 1 object per pg or so
+        1,
+        config.creation_time_limit,
+        config.num_pgs, # lots of concurrency
+        cleanup = True)
+    peering_end = time.time()
+
+    log.info("peering done, waiting on recovery")
+    ctx.manager.wait_for_clean()
+
+    log.info("recovery done")
+    recovery_end = time.time()
+    if config.max_time:
+        assert(peering_end - start < config.max_time)
+    ctx.manager.mark_out_osd(0)
+    ctx.manager.wait_for_clean()
+    return {
+        'time_to_active': peering_end - start,
+        'time_to_clean': recovery_end - start
+        }
+
+@argify("peering_speed_test", ARGS)
+def task(ctx, config):
+    """
+    Peering speed test
+    """
+    setup(ctx, config)
+    ctx.manager.mark_out_osd(0)
+    ctx.manager.wait_for_clean()
+    ret = []
+    for i in range(config.runs):
+        log.info("Run {i}".format(i = i))
+        ret.append(do_run(ctx, config))
+
+    ctx.manager.mark_in_osd(0)
+    ctx.summary['recovery_times'] = {
+        'runs': ret
+        }
diff --git a/tasks/qemu.py b/tasks/qemu.py
new file mode 100644 (file)
index 0000000..a05b4db
--- /dev/null
@@ -0,0 +1,327 @@
+"""
+Qemu task
+"""
+from cStringIO import StringIO
+
+import contextlib
+import logging
+import os
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.task import rbd
+from ..orchestra import run
+
+log = logging.getLogger(__name__)
+
+DEFAULT_NUM_RBD = 1
+DEFAULT_IMAGE_URL = 'http://ceph.com/qa/ubuntu-12.04.qcow2'
+DEFAULT_MEM = 4096 # in megabytes
+
+@contextlib.contextmanager
+def create_dirs(ctx, config):
+    """
+    Handle directory creation and cleanup
+    """
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.iteritems():
+        assert 'test' in client_config, 'You must specify a test to run'
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        remote.run(
+            args=[
+                'install', '-d', '-m0755', '--',
+                '{tdir}/qemu'.format(tdir=testdir),
+                '{tdir}/archive/qemu'.format(tdir=testdir),
+                ]
+            )
+    try:
+        yield
+    finally:
+        for client, client_config in config.iteritems():
+            assert 'test' in client_config, 'You must specify a test to run'
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            remote.run(
+                args=[
+                    'rmdir', '{tdir}/qemu'.format(tdir=testdir), run.Raw('||'), 'true',
+                    ]
+                )
+
+@contextlib.contextmanager
+def generate_iso(ctx, config):
+    """Execute system commands to generate iso"""
+    log.info('generating iso...')
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.iteritems():
+        assert 'test' in client_config, 'You must specify a test to run'
+        src_dir = os.path.dirname(__file__)
+        userdata_path = os.path.join(testdir, 'qemu', 'userdata.' + client)
+        metadata_path = os.path.join(testdir, 'qemu', 'metadata.' + client)
+
+        with file(os.path.join(src_dir, 'userdata_setup.yaml'), 'rb') as f:
+            test_setup = ''.join(f.readlines())
+
+        with file(os.path.join(src_dir, 'userdata_teardown.yaml'), 'rb') as f:
+            test_teardown = ''.join(f.readlines())
+
+        user_data = test_setup
+        if client_config.get('type', 'filesystem') == 'filesystem':
+            for i in xrange(0, client_config.get('num_rbd', DEFAULT_NUM_RBD)):
+                dev_letter = chr(ord('b') + i)
+                user_data += """
+- |
+  #!/bin/bash
+  mkdir /mnt/test_{dev_letter}
+  mkfs -t xfs /dev/vd{dev_letter}
+  mount -t xfs /dev/vd{dev_letter} /mnt/test_{dev_letter}
+""".format(dev_letter=dev_letter)
+
+        # this may change later to pass the directories as args to the
+        # script or something. xfstests needs that.
+        user_data += """
+- |
+  #!/bin/bash
+  test -d /mnt/test_b && cd /mnt/test_b
+  /mnt/cdrom/test.sh > /mnt/log/test.log 2>&1 && touch /mnt/log/success
+""" + test_teardown
+
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        teuthology.write_file(remote, userdata_path, StringIO(user_data))
+
+        with file(os.path.join(src_dir, 'metadata.yaml'), 'rb') as f:
+            teuthology.write_file(remote, metadata_path, f)
+
+        test_file = '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client)
+        remote.run(
+            args=[
+                'wget', '-nv', '-O', test_file,
+                client_config['test'],
+                run.Raw('&&'),
+                'chmod', '755', test_file,
+                ],
+            )
+        remote.run(
+            args=[
+                'genisoimage', '-quiet', '-input-charset', 'utf-8',
+                '-volid', 'cidata', '-joliet', '-rock',
+                '-o', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client),
+                '-graft-points',
+                'user-data={userdata}'.format(userdata=userdata_path),
+                'meta-data={metadata}'.format(metadata=metadata_path),
+                'test.sh={file}'.format(file=test_file),
+                ],
+            )
+    try:
+        yield
+    finally:
+        for client in config.iterkeys():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            remote.run(
+                args=[
+                    'rm', '-f',
+                    '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client),
+                    os.path.join(testdir, 'qemu', 'userdata.' + client),
+                    os.path.join(testdir, 'qemu', 'metadata.' + client),
+                    '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client),
+                    ],
+                )
+
+@contextlib.contextmanager
+def download_image(ctx, config):
+    """Downland base image, remove image file when done"""
+    log.info('downloading base image')
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.iteritems():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        base_file = '{tdir}/qemu/base.{client}.qcow2'.format(tdir=testdir, client=client)
+        remote.run(
+            args=[
+                'wget', '-nv', '-O', base_file, DEFAULT_IMAGE_URL,
+                ]
+            )
+    try:
+        yield
+    finally:
+        log.debug('cleaning up base image files')
+        for client in config.iterkeys():
+            base_file = '{tdir}/qemu/base.{client}.qcow2'.format(
+                tdir=testdir,
+                client=client,
+                )
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            remote.run(
+                args=[
+                    'rm', '-f', base_file,
+                    ],
+                )
+
+@contextlib.contextmanager
+def run_qemu(ctx, config):
+    """Setup kvm environment and start qemu"""
+    procs = []
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.iteritems():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir, client=client)
+        remote.run(
+            args=[
+                'mkdir', log_dir, run.Raw('&&'),
+                'sudo', 'modprobe', 'kvm',
+                ]
+            )
+
+        base_file = '{tdir}/qemu/base.{client}.qcow2'.format(tdir=testdir, client=client)
+        args=[
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            'daemon-helper',
+            'term',
+            'qemu-system-x86_64', '-enable-kvm', '-nographic',
+            '-m', str(client_config.get('memory', DEFAULT_MEM)),
+            # base OS device
+            '-drive',
+            'file={base},format=qcow2,if=virtio'.format(base=base_file),
+            # cd holding metadata for cloud-init
+            '-cdrom', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client),
+            # virtio 9p fs for logging
+            '-fsdev',
+            'local,id=log,path={log},security_model=none'.format(log=log_dir),
+            '-device',
+            'virtio-9p-pci,fsdev=log,mount_tag=test_log',
+            ]
+
+        cachemode = 'none'
+        ceph_config = ctx.ceph.conf.get('global', {})
+        ceph_config.update(ctx.ceph.conf.get('client', {}))
+        ceph_config.update(ctx.ceph.conf.get(client, {}))
+        if ceph_config.get('rbd cache'):
+            if ceph_config.get('rbd cache max dirty', 1) > 0:
+                cachemode = 'writeback'
+            else:
+                cachemode = 'writethrough'
+
+        for i in xrange(client_config.get('num_rbd', DEFAULT_NUM_RBD)):
+            args.extend([
+                '-drive',
+                'file=rbd:rbd/{img}:id={id},format=raw,if=virtio,cache={cachemode}'.format(
+                    img='{client}.{num}'.format(client=client, num=i),
+                    id=client[len('client.'):],
+                    cachemode=cachemode,
+                    ),
+                ])
+
+        log.info('starting qemu...')
+        procs.append(
+            remote.run(
+                args=args,
+                logger=log.getChild(client),
+                stdin=run.PIPE,
+                wait=False,
+                )
+            )
+
+    try:
+        yield
+    finally:
+        log.info('waiting for qemu tests to finish...')
+        run.wait(procs)
+
+        log.debug('checking that qemu tests succeeded...')
+        for client in config.iterkeys():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            remote.run(
+                args=[
+                    'test', '-f',
+                    '{tdir}/archive/qemu/{client}/success'.format(
+                        tdir=testdir,
+                        client=client
+                        ),
+                    ],
+                )
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run a test inside of QEMU on top of rbd. Only one test
+    is supported per client.
+
+    For example, you can specify which clients to run on::
+
+        tasks:
+        - ceph:
+        - qemu:
+            client.0:
+              test: http://ceph.com/qa/test.sh
+            client.1:
+              test: http://ceph.com/qa/test2.sh
+
+    Or use the same settings on all clients:
+
+        tasks:
+        - ceph:
+        - qemu:
+            all:
+              test: http://ceph.com/qa/test.sh
+
+    For tests that don't need a filesystem, set type to block::
+
+        tasks:
+        - ceph:
+        - qemu:
+            client.0:
+              test: http://ceph.com/qa/test.sh
+              type: block
+
+    The test should be configured to run on /dev/vdb and later
+    devices.
+
+    If you want to run a test that uses more than one rbd image,
+    specify how many images to use::
+
+        tasks:
+        - ceph:
+        - qemu:
+            client.0:
+              test: http://ceph.com/qa/test.sh
+              type: block
+              num_rbd: 2
+
+    You can set the amount of memory the VM has (default is 1024 MB)::
+
+        tasks:
+        - ceph:
+        - qemu:
+            client.0:
+              test: http://ceph.com/qa/test.sh
+              memory: 512 # megabytes
+    """
+    assert isinstance(config, dict), \
+           "task qemu only supports a dictionary for configuration"
+
+    config = teuthology.replace_all_with_clients(ctx.cluster, config)
+
+    managers = []
+    for client, client_config in config.iteritems():
+        num_rbd = client_config.get('num_rbd', 1)
+        assert num_rbd > 0, 'at least one rbd device must be used'
+        for i in xrange(num_rbd):
+            create_config = {
+                client: {
+                    'image_name':
+                    '{client}.{num}'.format(client=client, num=i),
+                    }
+                }
+            managers.append(
+                lambda create_config=create_config:
+                rbd.create_image(ctx=ctx, config=create_config)
+                )
+
+    managers.extend([
+        lambda: create_dirs(ctx=ctx, config=config),
+        lambda: generate_iso(ctx=ctx, config=config),
+        lambda: download_image(ctx=ctx, config=config),
+        lambda: run_qemu(ctx=ctx, config=config),
+        ])
+
+    with contextutil.nested(*managers):
+        yield
diff --git a/tasks/rados.py b/tasks/rados.py
new file mode 100644 (file)
index 0000000..0897726
--- /dev/null
@@ -0,0 +1,170 @@
+"""
+Rados modle-based integration tests
+"""
+import contextlib
+import logging
+import gevent
+from ceph_manager import CephManager
+from teuthology import misc as teuthology
+
+from ..orchestra import run
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run RadosModel-based integration tests.
+
+    The config should be as follows::
+
+        rados:
+          clients: [client list]
+          ops: <number of ops>
+          objects: <number of objects to use>
+          max_in_flight: <max number of operations in flight>
+          object_size: <size of objects in bytes>
+          min_stride_size: <minimum write stride size in bytes>
+          max_stride_size: <maximum write stride size in bytes>
+          op_weights: <dictionary mapping operation type to integer weight>
+          runs: <number of times to run> - the pool is remade between runs
+          ec_pool: use an ec pool
+
+    For example::
+
+        tasks:
+        - ceph:
+        - rados:
+            clients: [client.0]
+            ops: 1000
+            max_seconds: 0   # 0 for no limit
+            objects: 25
+            max_in_flight: 16
+            object_size: 4000000
+            min_stride_size: 1024
+            max_stride_size: 4096
+            op_weights:
+              read: 20
+              write: 10
+              delete: 2
+              snap_create: 3
+              rollback: 2
+              snap_remove: 0
+            ec_pool: true
+            runs: 10
+        - interactive:
+
+    Optionally, you can provide the pool name to run against:
+
+        tasks:
+        - ceph:
+        - exec:
+            client.0:
+              - ceph osd pool create foo
+        - rados:
+            clients: [client.0]
+            pools: [foo]
+            ...
+
+    Alternatively, you can provide a pool prefix:
+
+        tasks:
+        - ceph:
+        - exec:
+            client.0:
+              - ceph osd pool create foo.client.0
+        - rados:
+            clients: [client.0]
+            pool_prefix: foo
+            ...
+
+    """
+    log.info('Beginning rados...')
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+
+    object_size = int(config.get('object_size', 4000000))
+    op_weights = config.get('op_weights', {})
+    testdir = teuthology.get_testdir(ctx)
+    args = [
+        'adjust-ulimits',
+        'ceph-coverage',
+        '{tdir}/archive/coverage'.format(tdir=testdir),
+        'ceph_test_rados']
+    if config.get('ec_pool', False):
+        args.extend(['--ec-pool'])
+    args.extend([
+        '--op', 'read', str(op_weights.get('read', 100)),
+        '--op', 'write', str(op_weights.get('write', 100)),
+        '--op', 'delete', str(op_weights.get('delete', 10)),
+        '--max-ops', str(config.get('ops', 10000)),
+        '--objects', str(config.get('objects', 500)),
+        '--max-in-flight', str(config.get('max_in_flight', 16)),
+        '--size', str(object_size),
+        '--min-stride-size', str(config.get('min_stride_size', object_size / 10)),
+        '--max-stride-size', str(config.get('max_stride_size', object_size / 5)),
+        '--max-seconds', str(config.get('max_seconds', 0))
+        ])
+    for field in [
+        'copy_from', 'is_dirty', 'undirty', 'cache_flush',
+        'cache_try_flush', 'cache_evict',
+        'snap_create', 'snap_remove', 'rollback', 'setattr', 'rmattr',
+        'watch', 'append',
+        ]:
+        if field in op_weights:
+            args.extend([
+                    '--op', field, str(op_weights[field]),
+                    ])
+
+    def thread():
+        """Thread spawned by gevent"""
+        if not hasattr(ctx, 'manager'):
+            first_mon = teuthology.get_first_mon(ctx, config)
+            (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+            ctx.manager = CephManager(
+                mon,
+                ctx=ctx,
+                logger=log.getChild('ceph_manager'),
+                )
+
+        clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+        log.info('clients are %s' % clients)
+        for i in range(int(config.get('runs', '1'))):
+            log.info("starting run %s out of %s", str(i), config.get('runs', '1'))
+            tests = {}
+            existing_pools = config.get('pools', [])
+            created_pools = []
+            for role in config.get('clients', clients):
+                assert isinstance(role, basestring)
+                PREFIX = 'client.'
+                assert role.startswith(PREFIX)
+                id_ = role[len(PREFIX):]
+
+                pool = config.get('pool', None)
+                if not pool and existing_pools:
+                    pool = existing_pools.pop()
+                else:
+                    pool = ctx.manager.create_pool_with_unique_name(ec_pool=config.get('ec_pool', False))
+                    created_pools.append(pool)
+
+                (remote,) = ctx.cluster.only(role).remotes.iterkeys()
+                proc = remote.run(
+                    args=["CEPH_CLIENT_ID={id_}".format(id_=id_)] + args +
+                    ["--pool", pool],
+                    logger=log.getChild("rados.{id}".format(id=id_)),
+                    stdin=run.PIPE,
+                    wait=False
+                    )
+                tests[id_] = proc
+            run.wait(tests.itervalues())
+
+            for pool in created_pools:
+                ctx.manager.remove_pool(pool)
+
+    running = gevent.spawn(thread)
+
+    try:
+        yield
+    finally:
+        log.info('joining rados')
+        running.get()
diff --git a/tasks/radosbench.py b/tasks/radosbench.py
new file mode 100644 (file)
index 0000000..d2e7571
--- /dev/null
@@ -0,0 +1,83 @@
+"""
+Rados benchmarking
+"""
+import contextlib
+import logging
+
+from ..orchestra import run
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run radosbench
+
+    The config should be as follows:
+
+    radosbench:
+        clients: [client list]
+        time: <seconds to run>
+        pool: <pool to use>
+        unique_pool: use a unique pool, defaults to False
+        ec_pool: create ec pool, defaults to False
+
+    example:
+
+    tasks:
+    - ceph:
+    - radosbench:
+        clients: [client.0]
+        time: 360
+    - interactive:
+    """
+    log.info('Beginning radosbench...')
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+    radosbench = {}
+
+    testdir = teuthology.get_testdir(ctx)
+
+    for role in config.get('clients', ['client.0']):
+        assert isinstance(role, basestring)
+        PREFIX = 'client.'
+        assert role.startswith(PREFIX)
+        id_ = role[len(PREFIX):]
+        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
+
+        pool = 'data'
+        if config.get('pool'):
+            pool = config.get('pool')
+            if pool is not 'data':
+                ctx.manager.create_pool(pool, ec_pool=config.get('ec_pool', False))
+        else:
+            pool = ctx.manager.create_pool_with_unique_name(ec_pool=config.get('ec_pool', False))
+
+        proc = remote.run(
+            args=[
+                "/bin/sh", "-c",
+                " ".join(['adjust-ulimits',
+                          'ceph-coverage',
+                          '{tdir}/archive/coverage',
+                          'rados',
+                          '--name', role,
+                          '-p' , pool,
+                          'bench', str(config.get('time', 360)), 'write',
+                          ]).format(tdir=testdir),
+                ],
+            logger=log.getChild('radosbench.{id}'.format(id=id_)),
+            stdin=run.PIPE,
+            wait=False
+            )
+        radosbench[id_] = proc
+
+    try:
+        yield
+    finally:
+        timeout = config.get('time', 360) * 5
+        log.info('joining radosbench (timing out after %ss)', timeout)
+        run.wait(radosbench.itervalues(), timeout=timeout)
+
+        if pool is not 'data':
+            ctx.manager.remove_pool(pool)
diff --git a/tasks/radosgw_admin.py b/tasks/radosgw_admin.py
new file mode 100644 (file)
index 0000000..6936b79
--- /dev/null
@@ -0,0 +1,974 @@
+"""
+Rgw admin testing against a running instance
+"""
+# The test cases in this file have been annotated for inventory.
+# To extract the inventory (in csv format) use the command:
+#
+#   grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
+#
+
+import copy
+import json
+import logging
+import time
+
+from cStringIO import StringIO
+
+import boto.exception
+import boto.s3.connection
+import boto.s3.acl
+
+import teuthology.task_util.rgw as rgw_utils
+
+from teuthology import misc as teuthology
+from teuthology.task_util.rgw import rgwadmin
+
+log = logging.getLogger(__name__)
+
+
+def successful_ops(out):
+    """Extract total from the first summary entry (presumed to be only one)"""
+    summary = out['summary']
+    if len(summary) == 0:
+        return 0
+    entry = summary[0]
+    return entry['total']['successful_ops']
+
+
+def task(ctx, config):
+    """
+    Test radosgw-admin functionality against a running rgw instance.
+    """
+    global log
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task s3tests only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+    clients = config.keys()
+
+    multi_region_run = rgw_utils.multi_region_enabled(ctx)
+
+    client = clients[0]; # default choice, multi-region code may overwrite this
+    if multi_region_run:
+        client = rgw_utils.get_master_client(ctx, clients)
+
+    # once the client is chosen, pull the host name and  assigned port out of
+    # the role_endpoints that were assigned by the rgw task
+    (remote_host, remote_port) = ctx.rgw.role_endpoints[client]
+
+    ##
+    user1='foo'
+    user2='fud'
+    subuser1='foo:foo1'
+    subuser2='foo:foo2'
+    display_name1='Foo'
+    display_name2='Fud'
+    email='foo@foo.com'
+    email2='bar@bar.com'
+    access_key='9te6NH5mcdcq0Tc5i8i1'
+    secret_key='Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu'
+    access_key2='p5YnriCv1nAtykxBrupQ'
+    secret_key2='Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh'
+    swift_secret1='gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL'
+    swift_secret2='ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy'
+
+    bucket_name='myfoo'
+    bucket_name2='mybar'
+
+    # connect to rgw
+    connection = boto.s3.connection.S3Connection(
+        aws_access_key_id=access_key,
+        aws_secret_access_key=secret_key,
+        is_secure=False,
+        port=remote_port,
+        host=remote_host,
+        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+        )
+    connection2 = boto.s3.connection.S3Connection(
+        aws_access_key_id=access_key2,
+        aws_secret_access_key=secret_key2,
+        is_secure=False,
+        port=remote_port,
+        host=remote_host,
+        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+        )
+
+    # legend (test cases can be easily grep-ed out)
+    # TESTCASE 'testname','object','method','operation','assertion'
+    # TESTCASE 'info-nosuch','user','info','non-existent user','fails'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1])
+    assert err
+
+    # TESTCASE 'create-ok','user','create','w/all valid info','succeeds'
+    (err, out) = rgwadmin(ctx, client, [
+            'user', 'create',
+            '--uid', user1,
+            '--display-name', display_name1,
+            '--email', email,
+            '--access-key', access_key,
+            '--secret', secret_key,
+            '--max-buckets', '4'
+            ],
+            check_status=True)
+
+    # TESTCASE 'duplicate email','user','create','existing user email','fails'
+    (err, out) = rgwadmin(ctx, client, [
+            'user', 'create',
+            '--uid', user2,
+            '--display-name', display_name2,
+            '--email', email,
+            ])
+    assert err
+
+    # TESTCASE 'info-existing','user','info','existing user','returns correct info'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+    assert out['user_id'] == user1
+    assert out['email'] == email
+    assert out['display_name'] == display_name1
+    assert len(out['keys']) == 1
+    assert out['keys'][0]['access_key'] == access_key
+    assert out['keys'][0]['secret_key'] == secret_key
+    assert not out['suspended']
+
+    # this whole block should only be run if regions have been configured
+    if multi_region_run:
+        rgw_utils.radosgw_agent_sync_all(ctx)
+        # post-sync, validate that user1 exists on the sync destination host
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            dest_client = c_config['dest']
+            (err, out) = rgwadmin(ctx, dest_client, ['metadata', 'list', 'user'])
+            (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user1], check_status=True)
+            assert out['user_id'] == user1
+            assert out['email'] == email
+            assert out['display_name'] == display_name1
+            assert len(out['keys']) == 1
+            assert out['keys'][0]['access_key'] == access_key
+            assert out['keys'][0]['secret_key'] == secret_key
+            assert not out['suspended']
+
+        # compare the metadata between different regions, make sure it matches
+        log.debug('compare the metadata between different regions, make sure it matches')
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            source_client = c_config['src']
+            dest_client = c_config['dest']
+            (err1, out1) = rgwadmin(ctx, source_client,
+                ['metadata', 'get', 'user:{uid}'.format(uid=user1)], check_status=True)
+            (err2, out2) = rgwadmin(ctx, dest_client,
+                ['metadata', 'get', 'user:{uid}'.format(uid=user1)], check_status=True)
+            assert out1 == out2
+
+        # suspend a user on the master, then check the status on the destination
+        log.debug('suspend a user on the master, then check the status on the destination')
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            source_client = c_config['src']
+            dest_client = c_config['dest']
+            (err, out) = rgwadmin(ctx, source_client, ['user', 'suspend', '--uid', user1])
+            rgw_utils.radosgw_agent_sync_all(ctx)
+            (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user1], check_status=True)
+            assert out['suspended']
+
+        # delete a user on the master, then check that it's gone on the destination
+        log.debug('delete a user on the master, then check that it\'s gone on the destination')
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            source_client = c_config['src']
+            dest_client = c_config['dest']
+            (err, out) = rgwadmin(ctx, source_client, ['user', 'rm', '--uid', user1], check_status=True)
+            rgw_utils.radosgw_agent_sync_all(ctx)
+            (err, out) = rgwadmin(ctx, source_client, ['user', 'info', '--uid', user1])
+            assert out is None
+            (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user1])
+            assert out is None
+
+            # then recreate it so later tests pass
+            (err, out) = rgwadmin(ctx, client, [
+                'user', 'create',
+                '--uid', user1,
+                '--display-name', display_name1,
+                '--email', email,
+                '--access-key', access_key,
+                '--secret', secret_key,
+                '--max-buckets', '4'
+                ],
+                check_status=True)
+
+        # now do the multi-region bucket tests
+        log.debug('now do the multi-region bucket tests')
+
+        # Create a second user for the following tests
+        log.debug('Create a second user for the following tests')
+        (err, out) = rgwadmin(ctx, client, [
+            'user', 'create',
+            '--uid', user2,
+            '--display-name', display_name2,
+            '--email', email2,
+            '--access-key', access_key2,
+            '--secret', secret_key2,
+            '--max-buckets', '4'
+            ],
+            check_status=True)
+        (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user2], check_status=True)
+        assert out is not None
+
+        # create a bucket and do a sync
+        log.debug('create a bucket and do a sync')
+        bucket = connection.create_bucket(bucket_name2)
+        rgw_utils.radosgw_agent_sync_all(ctx)
+
+        # compare the metadata for the bucket between different regions, make sure it matches
+        log.debug('compare the metadata for the bucket between different regions, make sure it matches')
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            source_client = c_config['src']
+            dest_client = c_config['dest']
+            (err1, out1) = rgwadmin(ctx, source_client,
+                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
+                check_status=True)
+            (err2, out2) = rgwadmin(ctx, dest_client,
+                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
+                check_status=True)
+            assert out1 == out2
+
+            # get the bucket.instance info and compare that
+            src_bucket_id = out1['data']['bucket']['bucket_id']
+            dest_bucket_id = out2['data']['bucket']['bucket_id']
+            (err1, out1) = rgwadmin(ctx, source_client, ['metadata', 'get',
+                'bucket.instance:{bucket_name}:{bucket_instance}'.format(
+                bucket_name=bucket_name2,bucket_instance=src_bucket_id)],
+                check_status=True)
+            (err2, out2) = rgwadmin(ctx, dest_client, ['metadata', 'get',
+                'bucket.instance:{bucket_name}:{bucket_instance}'.format(
+                bucket_name=bucket_name2,bucket_instance=dest_bucket_id)],
+                check_status=True)
+            del out1['data']['bucket_info']['bucket']['pool']
+            del out1['data']['bucket_info']['bucket']['index_pool']
+            del out2['data']['bucket_info']['bucket']['pool']
+            del out2['data']['bucket_info']['bucket']['index_pool']
+            assert out1 == out2
+
+        same_region = 0
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            source_client = c_config['src']
+            dest_client = c_config['dest']
+
+            source_region = rgw_utils.region_for_client(ctx, source_client)
+            dest_region = rgw_utils.region_for_client(ctx, dest_client)
+
+            # 301 is only returned for requests to something in a different region
+            if source_region == dest_region:
+                log.debug('301 is only returned for requests to something in a different region')
+                same_region += 1
+                continue
+
+            # Attempt to create a new connection with user1 to the destination RGW
+            log.debug('Attempt to create a new connection with user1 to the destination RGW')
+            # and use that to attempt a delete (that should fail)
+            exception_encountered = False
+            try:
+                (dest_remote_host, dest_remote_port) = ctx.rgw.role_endpoints[dest_client]
+                connection_dest = boto.s3.connection.S3Connection(
+                    aws_access_key_id=access_key,
+                    aws_secret_access_key=secret_key,
+                    is_secure=False,
+                    port=dest_remote_port,
+                    host=dest_remote_host,
+                    calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+                    )
+
+                # this should fail
+                connection_dest.delete_bucket(bucket_name2)
+            except boto.exception.S3ResponseError as e:
+                assert e.status == 301
+                exception_encountered = True
+
+            # confirm that the expected exception was seen
+            assert exception_encountered
+
+            # now delete the bucket on the source RGW and do another sync
+            log.debug('now delete the bucket on the source RGW and do another sync')
+            bucket.delete()
+            rgw_utils.radosgw_agent_sync_all(ctx)
+
+        if same_region == len(ctx.radosgw_agent.config):
+            bucket.delete()
+            rgw_utils.radosgw_agent_sync_all(ctx)
+
+        # make sure that the bucket no longer exists in either region
+        log.debug('make sure that the bucket no longer exists in either region')
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            source_client = c_config['src']
+            dest_client = c_config['dest']
+            (err1, out1) = rgwadmin(ctx, source_client, ['metadata', 'get',
+                'bucket:{bucket_name}'.format(bucket_name=bucket_name2)])
+            (err2, out2) = rgwadmin(ctx, dest_client, ['metadata', 'get',
+                'bucket:{bucket_name}'.format(bucket_name=bucket_name2)])
+            # Both of the previous calls should have errors due to requesting
+            # metadata for non-existent buckets
+            assert err1
+            assert err2
+
+        # create a bucket and then sync it
+        log.debug('create a bucket and then sync it')
+        bucket = connection.create_bucket(bucket_name2)
+        rgw_utils.radosgw_agent_sync_all(ctx)
+
+        # compare the metadata for the bucket between different regions, make sure it matches
+        log.debug('compare the metadata for the bucket between different regions, make sure it matches')
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            source_client = c_config['src']
+            dest_client = c_config['dest']
+            (err1, out1) = rgwadmin(ctx, source_client,
+                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
+                check_status=True)
+            (err2, out2) = rgwadmin(ctx, dest_client,
+                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
+                check_status=True)
+            assert out1 == out2
+
+        # Now delete the bucket and recreate it with a different user
+        log.debug('Now delete the bucket and recreate it with a different user')
+        # within the same window of time and then sync.
+        bucket.delete()
+        bucket = connection2.create_bucket(bucket_name2)
+        rgw_utils.radosgw_agent_sync_all(ctx)
+
+        # compare the metadata for the bucket between different regions, make sure it matches
+        log.debug('compare the metadata for the bucket between different regions, make sure it matches')
+        # user2 should own the bucket in both regions
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            source_client = c_config['src']
+            dest_client = c_config['dest']
+            (err1, out1) = rgwadmin(ctx, source_client,
+                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
+                check_status=True)
+            (err2, out2) = rgwadmin(ctx, dest_client,
+                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
+                check_status=True)
+            assert out1 == out2
+            assert out1['data']['owner'] == user2
+            assert out1['data']['owner'] != user1
+
+        # now we're going to use this bucket to test meta-data update propagation
+        log.debug('now we\'re going to use this bucket to test meta-data update propagation')
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            source_client = c_config['src']
+            dest_client = c_config['dest']
+
+            # get the metadata so we can tweak it
+            log.debug('get the metadata so we can tweak it')
+            (err, orig_data) = rgwadmin(ctx, source_client,
+                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
+                check_status=True)
+
+            # manually edit mtime for this bucket to be 300 seconds in the past
+            log.debug('manually edit mtime for this bucket to be 300 seconds in the past')
+            new_data = copy.deepcopy(orig_data)
+            new_data['mtime'] =  orig_data['mtime'] - 300
+            assert new_data != orig_data
+            (err, out) = rgwadmin(ctx, source_client,
+                ['metadata', 'put', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
+                stdin=StringIO(json.dumps(new_data)),
+                check_status=True)
+
+            # get the metadata and make sure that the 'put' worked
+            log.debug('get the metadata and make sure that the \'put\' worked')
+            (err, out) = rgwadmin(ctx, source_client,
+                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
+                check_status=True)
+            assert out == new_data
+
+            # sync to propagate the new metadata
+            log.debug('sync to propagate the new metadata')
+            rgw_utils.radosgw_agent_sync_all(ctx)
+
+            # get the metadata from the dest and compare it to what we just set
+            log.debug('get the metadata from the dest and compare it to what we just set')
+            # and what the source region has.
+            (err1, out1) = rgwadmin(ctx, source_client,
+                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
+                check_status=True)
+            (err2, out2) = rgwadmin(ctx, dest_client,
+                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
+                check_status=True)
+            # yeah for the transitive property
+            assert out1 == out2
+            assert out1 == new_data
+
+        # now we delete the bucket
+        log.debug('now we delete the bucket')
+        bucket.delete()
+
+        log.debug('sync to propagate the deleted bucket')
+        rgw_utils.radosgw_agent_sync_all(ctx)
+
+        # Delete user2 as later tests do not expect it to exist.
+        # Verify that it is gone on both regions
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            source_client = c_config['src']
+            dest_client = c_config['dest']
+            (err, out) = rgwadmin(ctx, source_client,
+                ['user', 'rm', '--uid', user2], check_status=True)
+            rgw_utils.radosgw_agent_sync_all(ctx)
+            # The two 'user info' calls should fail and not return any data
+            # since we just deleted this user.
+            (err, out) = rgwadmin(ctx, source_client, ['user', 'info', '--uid', user2])
+            assert out is None
+            (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user2])
+            assert out is None
+
+        # Test data sync
+
+        # First create a bucket for data sync test purpose
+        bucket = connection.create_bucket(bucket_name + 'data')
+
+        # Create a tiny file and check if in sync
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            if c_config.get('metadata-only'):
+                continue
+
+            source_client = c_config['src']
+            dest_client = c_config['dest']
+            k = boto.s3.key.Key(bucket)
+            k.key = 'tiny_file'
+            k.set_contents_from_string("123456789")
+            time.sleep(rgw_utils.radosgw_data_log_window(ctx, source_client))
+            rgw_utils.radosgw_agent_sync_all(ctx, data=True)
+            (dest_host, dest_port) = ctx.rgw.role_endpoints[dest_client]
+            dest_connection = boto.s3.connection.S3Connection(
+                aws_access_key_id=access_key,
+                aws_secret_access_key=secret_key,
+                is_secure=False,
+                port=dest_port,
+                host=dest_host,
+                calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+                )
+            dest_k = dest_connection.get_bucket(bucket_name + 'data').get_key('tiny_file')
+            assert k.get_contents_as_string() == dest_k.get_contents_as_string()
+
+            # check that deleting it removes it from the dest zone
+            k.delete()
+            time.sleep(rgw_utils.radosgw_data_log_window(ctx, source_client))
+            rgw_utils.radosgw_agent_sync_all(ctx, data=True)
+
+            dest_bucket = dest_connection.get_bucket(bucket_name + 'data')
+            dest_k = dest_bucket.get_key('tiny_file')
+            assert dest_k == None, 'object not deleted from destination zone'
+
+        # finally we delete the bucket
+        bucket.delete()
+
+        bucket = connection.create_bucket(bucket_name + 'data2')
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            if c_config.get('metadata-only'):
+                continue
+
+            source_client = c_config['src']
+            dest_client = c_config['dest']
+            (dest_host, dest_port) = ctx.rgw.role_endpoints[dest_client]
+            dest_connection = boto.s3.connection.S3Connection(
+                aws_access_key_id=access_key,
+                aws_secret_access_key=secret_key,
+                is_secure=False,
+                port=dest_port,
+                host=dest_host,
+                calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+                )
+            for i in range(20):
+                k = boto.s3.key.Key(bucket)
+                k.key = 'tiny_file_' + str(i)
+                k.set_contents_from_string(str(i) * 100)
+
+            time.sleep(rgw_utils.radosgw_data_log_window(ctx, source_client))
+            rgw_utils.radosgw_agent_sync_all(ctx, data=True)
+
+            for i in range(20):
+                dest_k = dest_connection.get_bucket(bucket_name + 'data2').get_key('tiny_file_' + str(i))
+                assert (str(i) * 100) == dest_k.get_contents_as_string()
+                k = boto.s3.key.Key(bucket)
+                k.key = 'tiny_file_' + str(i)
+                k.delete()
+
+            # check that deleting removes the objects from the dest zone
+            time.sleep(rgw_utils.radosgw_data_log_window(ctx, source_client))
+            rgw_utils.radosgw_agent_sync_all(ctx, data=True)
+
+            for i in range(20):
+                dest_bucket = dest_connection.get_bucket(bucket_name + 'data2')
+                dest_k = dest_bucket.get_key('tiny_file_' + str(i))
+                assert dest_k == None, 'object %d not deleted from destination zone' % i
+        bucket.delete()
+
+    # end of 'if multi_region_run:'
+
+    # TESTCASE 'suspend-ok','user','suspend','active user','succeeds'
+    (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1],
+        check_status=True)
+
+    # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+    assert out['suspended']
+
+    # TESTCASE 're-enable','user','enable','suspended user','succeeds'
+    (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1], check_status=True)
+
+    # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+    assert not out['suspended']
+
+    # TESTCASE 'add-keys','key','create','w/valid info','succeeds'
+    (err, out) = rgwadmin(ctx, client, [
+            'key', 'create', '--uid', user1,
+            '--access-key', access_key2, '--secret', secret_key2,
+            ], check_status=True)
+
+    # TESTCASE 'info-new-key','user','info','after key addition','returns all keys'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1],
+        check_status=True)
+    assert len(out['keys']) == 2
+    assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2
+    assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2
+
+    # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed'
+    (err, out) = rgwadmin(ctx, client, [
+            'key', 'rm', '--uid', user1,
+            '--access-key', access_key2,
+            ], check_status=True)
+    assert len(out['keys']) == 1
+    assert out['keys'][0]['access_key'] == access_key
+    assert out['keys'][0]['secret_key'] == secret_key
+
+    # TESTCASE 'add-swift-key','key','create','swift key','succeeds'
+    subuser_access = 'full'
+    subuser_perm = 'full-control'
+
+    (err, out) = rgwadmin(ctx, client, [
+            'subuser', 'create', '--subuser', subuser1,
+            '--access', subuser_access
+            ], check_status=True)
+
+    # TESTCASE 'add-swift-key','key','create','swift key','succeeds'
+    (err, out) = rgwadmin(ctx, client, [
+            'subuser', 'modify', '--subuser', subuser1,
+            '--secret', swift_secret1,
+            '--key-type', 'swift',
+            ], check_status=True)
+
+    # TESTCASE 'subuser-perm-mask', 'subuser', 'info', 'test subuser perm mask durability', 'succeeds'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1])
+
+    assert out['subusers'][0]['permissions'] == subuser_perm
+
+    # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+    assert len(out['swift_keys']) == 1
+    assert out['swift_keys'][0]['user'] == subuser1
+    assert out['swift_keys'][0]['secret_key'] == swift_secret1
+
+    # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds'
+    (err, out) = rgwadmin(ctx, client, [
+            'subuser', 'create', '--subuser', subuser2,
+            '--secret', swift_secret2,
+            '--key-type', 'swift',
+            ], check_status=True)
+
+    # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+    assert len(out['swift_keys']) == 2
+    assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2
+    assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2
+
+    # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed'
+    (err, out) = rgwadmin(ctx, client, [
+            'key', 'rm', '--subuser', subuser1,
+            '--key-type', 'swift',
+            ], check_status=True)
+    assert len(out['swift_keys']) == 1
+
+    # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed'
+    (err, out) = rgwadmin(ctx, client, [
+            'subuser', 'rm', '--subuser', subuser1,
+            ], check_status=True)
+    assert len(out['subusers']) == 1
+
+    # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed'
+    (err, out) = rgwadmin(ctx, client, [
+            'subuser', 'rm', '--subuser', subuser2,
+            '--key-type', 'swift', '--purge-keys',
+            ], check_status=True)
+    assert len(out['swift_keys']) == 0
+    assert len(out['subusers']) == 0
+
+    # TESTCASE 'bucket-stats','bucket','stats','no session/buckets','succeeds, empty list'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1],
+        check_status=True)
+    assert len(out) == 0
+
+    if multi_region_run:
+        rgw_utils.radosgw_agent_sync_all(ctx)
+
+    # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True)
+    assert len(out) == 0
+
+    # create a first bucket
+    bucket = connection.create_bucket(bucket_name)
+
+    # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True)
+    assert len(out) == 1
+    assert out[0] == bucket_name
+
+    # TESTCASE 'bucket-list-all','bucket','list','all buckets','succeeds, expected list'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'list'], check_status=True)
+    assert len(out) >= 1
+    assert bucket_name in out;
+
+    # TESTCASE 'max-bucket-limit,'bucket','create','4 buckets','5th bucket fails due to max buckets == 4'
+    bucket2 = connection.create_bucket(bucket_name + '2')
+    bucket3 = connection.create_bucket(bucket_name + '3')
+    bucket4 = connection.create_bucket(bucket_name + '4')
+    # the 5th should fail.
+    failed = False
+    try:
+        connection.create_bucket(bucket_name + '5')
+    except Exception:
+        failed = True
+    assert failed
+
+    # delete the buckets
+    bucket2.delete()
+    bucket3.delete()
+    bucket4.delete()
+
+    # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list'
+    (err, out) = rgwadmin(ctx, client, [
+            'bucket', 'stats', '--bucket', bucket_name], check_status=True)
+    assert out['owner'] == user1
+    bucket_id = out['id']
+
+    # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1], check_status=True)
+    assert len(out) == 1
+    assert out[0]['id'] == bucket_id    # does it return the same ID twice in a row?
+
+    # use some space
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('one')
+
+    # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object'
+    (err, out) = rgwadmin(ctx, client, [
+            'bucket', 'stats', '--bucket', bucket_name], check_status=True)
+    assert out['id'] == bucket_id
+    assert out['usage']['rgw.main']['num_objects'] == 1
+    assert out['usage']['rgw.main']['size_kb'] > 0
+
+    # reclaim it
+    key.delete()
+
+    # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error'
+    (err, out) = rgwadmin(ctx, client,
+        ['bucket', 'unlink', '--uid', user1, '--bucket', bucket_name],
+        check_status=True)
+
+    # create a second user to link the bucket to
+    (err, out) = rgwadmin(ctx, client, [
+            'user', 'create',
+            '--uid', user2,
+            '--display-name', display_name2,
+            '--access-key', access_key2,
+            '--secret', secret_key2,
+            '--max-buckets', '1',
+            ],
+            check_status=True)
+
+    # try creating an object with the first user before the bucket is relinked
+    denied = False
+    key = boto.s3.key.Key(bucket)
+
+    try:
+        key.set_contents_from_string('two')
+    except boto.exception.S3ResponseError:
+        denied = True
+
+    assert not denied
+
+    # delete the object
+    key.delete()
+
+    # link the bucket to another user
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--uid', user2, '--bucket', bucket_name],
+        check_status=True)
+
+    # try to remove user, should fail (has a linked bucket)
+    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2])
+    assert err
+
+    # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'succeeds, bucket unlinked'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'unlink', '--uid', user2, '--bucket', bucket_name],
+        check_status=True)
+
+    # relink the bucket to the first user and delete the second user
+    (err, out) = rgwadmin(ctx, client,
+        ['bucket', 'link', '--uid', user1, '--bucket', bucket_name],
+        check_status=True)
+
+    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2],
+        check_status=True)
+
+    # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed'
+
+    # upload an object
+    object_name = 'four'
+    key = boto.s3.key.Key(bucket, object_name)
+    key.set_contents_from_string(object_name)
+
+    # now delete it
+    (err, out) = rgwadmin(ctx, client,
+        ['object', 'rm', '--bucket', bucket_name, '--object', object_name],
+        check_status=True)
+
+    # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects'
+    (err, out) = rgwadmin(ctx, client, [
+            'bucket', 'stats', '--bucket', bucket_name],
+            check_status=True)
+    assert out['id'] == bucket_id
+    assert out['usage']['rgw.main']['num_objects'] == 0
+
+    # list log objects
+    # TESTCASE 'log-list','log','list','after activity','succeeds, lists one no objects'
+    (err, out) = rgwadmin(ctx, client, ['log', 'list'], check_status=True)
+    assert len(out) > 0
+
+    for obj in out:
+        # TESTCASE 'log-show','log','show','after activity','returns expected info'
+        if obj[:4] == 'meta' or obj[:4] == 'data':
+            continue
+
+        (err, rgwlog) = rgwadmin(ctx, client, ['log', 'show', '--object', obj],
+            check_status=True)
+        assert len(rgwlog) > 0
+
+        # exempt bucket_name2 from checking as it was only used for multi-region tests
+        assert rgwlog['bucket'].find(bucket_name) == 0 or rgwlog['bucket'].find(bucket_name2) == 0
+        assert rgwlog['bucket'] != bucket_name or rgwlog['bucket_id'] == bucket_id
+        assert rgwlog['bucket_owner'] == user1 or rgwlog['bucket'] == bucket_name + '5' or rgwlog['bucket'] == bucket_name2
+        for entry in rgwlog['log_entries']:
+            log.debug('checking log entry: ', entry)
+            assert entry['bucket'] == rgwlog['bucket']
+            possible_buckets = [bucket_name + '5', bucket_name2]
+            user = entry['user']
+            assert user == user1 or user.endswith('system-user') or \
+                rgwlog['bucket'] in possible_buckets
+
+        # TESTCASE 'log-rm','log','rm','delete log objects','succeeds'
+        (err, out) = rgwadmin(ctx, client, ['log', 'rm', '--object', obj],
+            check_status=True)
+
+    # TODO: show log by bucket+date
+
+    # need to wait for all usage data to get flushed, should take up to 30 seconds
+    timestamp = time.time()
+    while time.time() - timestamp <= (20 * 60):      # wait up to 20 minutes
+        (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--categories', 'delete_obj'])  # last operation we did is delete obj, wait for it to flush
+        if successful_ops(out) > 0:
+            break;
+        time.sleep(1)
+
+    assert time.time() - timestamp <= (20 * 60)
+
+    # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds'
+    (err, out) = rgwadmin(ctx, client, ['usage', 'show'], check_status=True)
+    assert len(out['entries']) > 0
+    assert len(out['summary']) > 0
+    user_summary = out['summary'][0]
+    total = user_summary['total']
+    assert total['successful_ops'] > 0
+
+    # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds'
+    (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1],
+        check_status=True)
+    assert len(out['entries']) > 0
+    assert len(out['summary']) > 0
+    user_summary = out['summary'][0]
+    for entry in user_summary['categories']:
+        assert entry['successful_ops'] > 0
+    assert user_summary['user'] == user1
+
+    # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds'
+    test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket']
+    for cat in test_categories:
+        (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1, '--categories', cat],
+            check_status=True)
+        assert len(out['summary']) > 0
+        user_summary = out['summary'][0]
+        assert user_summary['user'] == user1
+        assert len(user_summary['categories']) == 1
+        entry = user_summary['categories'][0]
+        assert entry['category'] == cat
+        assert entry['successful_ops'] > 0
+
+    # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed'
+    (err, out) = rgwadmin(ctx, client, ['usage', 'trim', '--uid', user1],
+        check_status=True)
+    (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1],
+        check_status=True)
+    assert len(out['entries']) == 0
+    assert len(out['summary']) == 0
+
+    # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds'
+    (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1],
+        check_status=True)
+
+    # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects'
+    try:
+        key = boto.s3.key.Key(bucket)
+        key.set_contents_from_string('five')
+    except boto.exception.S3ResponseError as e:
+        assert e.status == 403
+
+    # TESTCASE 'user-renable2','user','enable','suspended user','succeeds'
+    (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1],
+        check_status=True)
+
+    # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects'
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('six')
+
+    # TESTCASE 'gc-list', 'gc', 'list', 'get list of objects ready for garbage collection'
+
+    # create an object large enough to be split into multiple parts
+    test_string = 'foo'*10000000
+
+    big_key = boto.s3.key.Key(bucket)
+    big_key.set_contents_from_string(test_string)
+
+    # now delete the head
+    big_key.delete()
+
+    # wait a bit to give the garbage collector time to cycle
+    time.sleep(15)
+
+    (err, out) = rgwadmin(ctx, client, ['gc', 'list'])
+
+    assert len(out) > 0
+
+    # TESTCASE 'gc-process', 'gc', 'process', 'manually collect garbage'
+    (err, out) = rgwadmin(ctx, client, ['gc', 'process'], check_status=True)
+
+    #confirm
+    (err, out) = rgwadmin(ctx, client, ['gc', 'list'])
+
+    assert len(out) == 0
+
+    # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets'
+    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1])
+    assert err
+
+    # delete should fail because ``key`` still exists
+    try:
+        bucket.delete()
+    except boto.exception.S3ResponseError as e:
+        assert e.status == 409
+
+    key.delete()
+    bucket.delete()
+
+    # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy'
+    bucket = connection.create_bucket(bucket_name)
+
+    # create an object
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('seven')
+
+    # should be private already but guarantee it
+    key.set_acl('private')
+
+    (err, out) = rgwadmin(ctx, client,
+        ['policy', '--bucket', bucket.name, '--object', key.key],
+        check_status=True)
+
+    acl = key.get_xml_acl()
+
+    assert acl == out.strip('\n')
+
+    # add another grantee by making the object public read
+    key.set_acl('public-read')
+
+    (err, out) = rgwadmin(ctx, client,
+        ['policy', '--bucket', bucket.name, '--object', key.key],
+        check_status=True)
+
+    acl = key.get_xml_acl()
+    assert acl == out.strip('\n')
+
+    # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds'
+    bucket = connection.create_bucket(bucket_name)
+    key_name = ['eight', 'nine', 'ten', 'eleven']
+    for i in range(4):
+        key = boto.s3.key.Key(bucket)
+        key.set_contents_from_string(key_name[i])
+
+    (err, out) = rgwadmin(ctx, client,
+        ['bucket', 'rm', '--bucket', bucket_name, '--purge-objects'],
+        check_status=True)
+
+    # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds'
+    caps='user=read'
+    (err, out) = rgwadmin(ctx, client, ['caps', 'add', '--uid', user1, '--caps', caps])
+
+    assert out['caps'][0]['perm'] == 'read'
+
+    # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds'
+    (err, out) = rgwadmin(ctx, client, ['caps', 'rm', '--uid', user1, '--caps', caps])
+
+    assert not out['caps']
+
+    # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets'
+    bucket = connection.create_bucket(bucket_name)
+    key = boto.s3.key.Key(bucket)
+
+    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1])
+    assert err
+
+    # TESTCASE 'rm-user2', 'user', 'rm', 'user with data', 'succeeds'
+    bucket = connection.create_bucket(bucket_name)
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('twelve')
+
+    (err, out) = rgwadmin(ctx, client,
+        ['user', 'rm', '--uid', user1, '--purge-data' ],
+        check_status=True)
+
+    # TESTCASE 'rm-user3','user','rm','deleted user','fails'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1])
+    assert err
+
+    # TESTCASE 'zone-info', 'zone', 'get', 'get zone info', 'succeeds, has default placement rule'
+    #
+
+    (err, out) = rgwadmin(ctx, client, ['zone', 'get'])
+    orig_placement_pools = len(out['placement_pools'])
+
+    # removed this test, it is not correct to assume that zone has default placement, it really
+    # depends on how we set it up before
+    #
+    # assert len(out) > 0
+    # assert len(out['placement_pools']) == 1
+
+    # default_rule = out['placement_pools'][0]
+    # assert default_rule['key'] == 'default-placement'
+
+    rule={'key': 'new-placement', 'val': {'data_pool': '.rgw.buckets.2', 'index_pool': '.rgw.buckets.index.2'}}
+
+    out['placement_pools'].append(rule)
+
+    (err, out) = rgwadmin(ctx, client, ['zone', 'set'],
+        stdin=StringIO(json.dumps(out)),
+        check_status=True)
+
+    (err, out) = rgwadmin(ctx, client, ['zone', 'get'])
+    assert len(out) > 0
+    assert len(out['placement_pools']) == orig_placement_pools + 1
diff --git a/tasks/radosgw_admin_rest.py b/tasks/radosgw_admin_rest.py
new file mode 100644 (file)
index 0000000..866ff4f
--- /dev/null
@@ -0,0 +1,678 @@
+"""
+Run a series of rgw admin commands through the rest interface.
+
+The test cases in this file have been annotated for inventory.
+To extract the inventory (in csv format) use the command:
+
+   grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
+
+"""
+from cStringIO import StringIO
+import logging
+import json
+
+import boto.exception
+import boto.s3.connection
+import boto.s3.acl
+
+import requests
+import time
+
+from boto.connection import AWSAuthConnection
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def successful_ops(out):
+    """
+    Extract successful operations
+    :param out: list
+    """
+    summary = out['summary']
+    if len(summary) == 0:
+        return 0
+    entry = summary[0]
+    return entry['total']['successful_ops']
+
+def rgwadmin(ctx, client, cmd):
+    """
+    Perform rgw admin command
+
+    :param client: client
+    :param cmd: command to execute.
+    :return: command exit status, json result.
+    """
+    log.info('radosgw-admin: %s' % cmd)
+    testdir = teuthology.get_testdir(ctx)
+    pre = [
+        'adjust-ulimits',
+        'ceph-coverage',
+        '{tdir}/archive/coverage'.format(tdir=testdir),
+        'radosgw-admin',
+        '--log-to-stderr',
+        '--format', 'json',
+        ]
+    pre.extend(cmd)
+    (remote,) = ctx.cluster.only(client).remotes.iterkeys()
+    proc = remote.run(
+        args=pre,
+        check_status=False,
+        stdout=StringIO(),
+        stderr=StringIO(),
+        )
+    r = proc.exitstatus
+    out = proc.stdout.getvalue()
+    j = None
+    if not r and out != '':
+        try:
+            j = json.loads(out)
+            log.info(' json result: %s' % j)
+        except ValueError:
+            j = out
+            log.info(' raw result: %s' % j)
+    return (r, j)
+
+
+def rgwadmin_rest(connection, cmd, params=None, headers=None, raw=False):
+    """
+    perform a rest command
+    """
+    log.info('radosgw-admin-rest: %s %s' % (cmd, params))
+    put_cmds = ['create', 'link', 'add']
+    post_cmds = ['unlink', 'modify']
+    delete_cmds = ['trim', 'rm', 'process']
+    get_cmds = ['check', 'info', 'show', 'list']
+
+    bucket_sub_resources = ['object', 'policy', 'index']
+    user_sub_resources = ['subuser', 'key', 'caps']
+    zone_sub_resources = ['pool', 'log', 'garbage']
+
+    def get_cmd_method_and_handler(cmd):
+        """
+        Get the rest command and handler from information in cmd and
+        from the imported requests object.
+        """
+        if cmd[1] in put_cmds:
+            return 'PUT', requests.put
+        elif cmd[1] in delete_cmds:
+            return 'DELETE', requests.delete
+        elif cmd[1] in post_cmds:
+            return 'POST', requests.post
+        elif cmd[1] in get_cmds:
+            return 'GET', requests.get
+
+    def get_resource(cmd):
+        """
+        Get the name of the resource from information in cmd.
+        """
+        if cmd[0] == 'bucket' or cmd[0] in bucket_sub_resources:
+            if cmd[0] == 'bucket':
+                return 'bucket', ''
+            else:
+                return 'bucket', cmd[0]
+        elif cmd[0] == 'user' or cmd[0] in user_sub_resources:
+            if cmd[0] == 'user':
+                return 'user', ''
+            else:
+                return 'user', cmd[0]
+        elif cmd[0] == 'usage':
+            return 'usage', ''
+        elif cmd[0] == 'zone' or cmd[0] in zone_sub_resources:
+            if cmd[0] == 'zone':
+                return 'zone', ''
+            else:
+                return 'zone', cmd[0]
+
+    def build_admin_request(conn, method, resource = '', headers=None, data='',
+            query_args=None, params=None):
+        """
+        Build an administative request adapted from the build_request()
+        method of boto.connection
+        """
+
+        path = conn.calling_format.build_path_base('admin', resource)
+        auth_path = conn.calling_format.build_auth_path('admin', resource)
+        host = conn.calling_format.build_host(conn.server_name(), 'admin')
+        if query_args:
+            path += '?' + query_args
+            boto.log.debug('path=%s' % path)
+            auth_path += '?' + query_args
+            boto.log.debug('auth_path=%s' % auth_path)
+        return AWSAuthConnection.build_base_http_request(conn, method, path,
+                auth_path, params, headers, data, host)
+
+    method, handler = get_cmd_method_and_handler(cmd)
+    resource, query_args = get_resource(cmd)
+    request = build_admin_request(connection, method, resource,
+            query_args=query_args, headers=headers)
+
+    url = '{protocol}://{host}{path}'.format(protocol=request.protocol,
+            host=request.host, path=request.path)
+
+    request.authorize(connection=connection)
+    result = handler(url, params=params, headers=request.headers)
+
+    if raw:
+        log.info(' text result: %s' % result.txt)
+        return result.status_code, result.txt
+    else:
+        log.info(' json result: %s' % result.json())
+        return result.status_code, result.json()
+
+
+def task(ctx, config):
+    """
+    Test radosgw-admin functionality through the RESTful interface
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task s3tests only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+    clients = config.keys()
+
+    # just use the first client...
+    client = clients[0]
+
+    ##
+    admin_user = 'ada'
+    admin_display_name = 'Ms. Admin User'
+    admin_access_key = 'MH1WC2XQ1S8UISFDZC8W'
+    admin_secret_key = 'dQyrTPA0s248YeN5bBv4ukvKU0kh54LWWywkrpoG'
+    admin_caps = 'users=read, write; usage=read, write; buckets=read, write; zone=read, write'
+
+    user1 = 'foo'
+    user2 = 'fud'
+    subuser1 = 'foo:foo1'
+    subuser2 = 'foo:foo2'
+    display_name1 = 'Foo'
+    display_name2 = 'Fud'
+    email = 'foo@foo.com'
+    access_key = '9te6NH5mcdcq0Tc5i8i1'
+    secret_key = 'Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu'
+    access_key2 = 'p5YnriCv1nAtykxBrupQ'
+    secret_key2 = 'Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh'
+    swift_secret1 = 'gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL'
+    swift_secret2 = 'ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy'
+
+    bucket_name = 'myfoo'
+
+    # legend (test cases can be easily grep-ed out)
+    # TESTCASE 'testname','object','method','operation','assertion'
+    # TESTCASE 'create-admin-user','user','create','administrative user','succeeds'
+    (err, out) = rgwadmin(ctx, client, [
+            'user', 'create',
+            '--uid', admin_user,
+            '--display-name', admin_display_name,
+            '--access-key', admin_access_key,
+            '--secret', admin_secret_key,
+            '--max-buckets', '0',
+            '--caps', admin_caps
+            ])
+    logging.error(out)
+    logging.error(err)
+    assert not err
+
+    (remote,) = ctx.cluster.only(client).remotes.iterkeys()
+    remote_host = remote.name.split('@')[1]
+    admin_conn = boto.s3.connection.S3Connection(
+        aws_access_key_id=admin_access_key,
+        aws_secret_access_key=admin_secret_key,
+        is_secure=False,
+        port=7280,
+        host=remote_host,
+        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+        )
+
+    # TESTCASE 'info-nosuch','user','info','non-existent user','fails'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {"uid": user1})
+    assert ret == 404
+
+    # TESTCASE 'create-ok','user','create','w/all valid info','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['user', 'create'],
+            {'uid' : user1,
+             'display-name' :  display_name1,
+             'email' : email,
+             'access-key' : access_key,
+             'secret-key' : secret_key,
+             'max-buckets' : '4'
+            })
+
+    assert ret == 200
+
+    # TESTCASE 'info-existing','user','info','existing user','returns correct info'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+
+    assert out['user_id'] == user1
+    assert out['email'] == email
+    assert out['display_name'] == display_name1
+    assert len(out['keys']) == 1
+    assert out['keys'][0]['access_key'] == access_key
+    assert out['keys'][0]['secret_key'] == secret_key
+    assert not out['suspended']
+
+    # TESTCASE 'suspend-ok','user','suspend','active user','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True})
+    assert ret == 200
+
+    # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+    assert ret == 200
+    assert out['suspended']
+
+    # TESTCASE 're-enable','user','enable','suspended user','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'})
+    assert not err
+
+    # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+    assert ret == 200
+    assert not out['suspended']
+
+    # TESTCASE 'add-keys','key','create','w/valid info','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['key', 'create'],
+            {'uid' : user1,
+             'access-key' : access_key2,
+             'secret-key' : secret_key2
+            })
+
+
+    assert ret == 200
+
+    # TESTCASE 'info-new-key','user','info','after key addition','returns all keys'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+    assert ret == 200
+    assert len(out['keys']) == 2
+    assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2
+    assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2
+
+    # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['key', 'rm'],
+            {'uid' : user1,
+             'access-key' : access_key2
+            })
+
+    assert ret == 200
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+
+    assert len(out['keys']) == 1
+    assert out['keys'][0]['access_key'] == access_key
+    assert out['keys'][0]['secret_key'] == secret_key
+
+    # TESTCASE 'add-swift-key','key','create','swift key','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['subuser', 'create'],
+            {'subuser' : subuser1,
+             'secret-key' : swift_secret1,
+             'key-type' : 'swift'
+            })
+
+    assert ret == 200
+
+    # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+    assert ret == 200
+    assert len(out['swift_keys']) == 1
+    assert out['swift_keys'][0]['user'] == subuser1
+    assert out['swift_keys'][0]['secret_key'] == swift_secret1
+
+    # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['subuser', 'create'],
+            {'subuser' : subuser2,
+             'secret-key' : swift_secret2,
+             'key-type' : 'swift'
+            })
+
+    assert ret == 200
+
+    # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
+    assert ret == 200
+    assert len(out['swift_keys']) == 2
+    assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2
+    assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2
+
+    # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['key', 'rm'],
+            {'subuser' : subuser1,
+             'key-type' :'swift'
+            })
+
+    assert ret == 200
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
+    assert len(out['swift_keys']) == 1
+
+    # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['subuser', 'rm'],
+            {'subuser' : subuser1
+            })
+
+    assert ret == 200
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
+    assert len(out['subusers']) == 1
+
+    # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['subuser', 'rm'],
+            {'subuser' : subuser2,
+             'key-type' : 'swift',
+             '{purge-keys' :True
+            })
+
+    assert ret == 200
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
+    assert len(out['swift_keys']) == 0
+    assert len(out['subusers']) == 0
+
+    # TESTCASE 'bucket-stats','bucket','info','no session/buckets','succeeds, empty list'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' :  user1})
+    assert ret == 200
+    assert len(out) == 0
+
+    # connect to rgw
+    connection = boto.s3.connection.S3Connection(
+        aws_access_key_id=access_key,
+        aws_secret_access_key=secret_key,
+        is_secure=False,
+        port=7280,
+        host=remote_host,
+        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+        )
+
+    # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True})
+    assert ret == 200
+    assert len(out) == 0
+
+    # create a first bucket
+    bucket = connection.create_bucket(bucket_name)
+
+    # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1})
+    assert ret == 200
+    assert len(out) == 1
+    assert out[0] == bucket_name
+
+    # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True})
+
+    assert ret == 200
+    assert out['owner'] == user1
+    bucket_id = out['id']
+
+    # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True})
+    assert ret == 200
+    assert len(out) == 1
+    assert out[0]['id'] == bucket_id    # does it return the same ID twice in a row?
+
+    # use some space
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('one')
+
+    # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True})
+    assert ret == 200
+    assert out['id'] == bucket_id
+    assert out['usage']['rgw.main']['num_objects'] == 1
+    assert out['usage']['rgw.main']['size_kb'] > 0
+
+    # reclaim it
+    key.delete()
+
+    # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'unlink'], {'uid' : user1, 'bucket' : bucket_name})
+
+    assert ret == 200
+
+    # create a second user to link the bucket to
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['user', 'create'],
+            {'uid' : user2,
+            'display-name' :  display_name2,
+            'access-key' : access_key2,
+            'secret-key' : secret_key2,
+            'max-buckets' : '1',
+            })
+
+    assert ret == 200
+
+    # try creating an object with the first user before the bucket is relinked
+    denied = False
+    key = boto.s3.key.Key(bucket)
+
+    try:
+        key.set_contents_from_string('two')
+    except boto.exception.S3ResponseError:
+        denied = True
+
+    assert not denied
+
+    # delete the object
+    key.delete()
+
+    # link the bucket to another user
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'link'], {'uid' : user2, 'bucket' : bucket_name})
+
+    assert ret == 200
+
+    # try creating an object with the first user which should cause an error
+    key = boto.s3.key.Key(bucket)
+
+    try:
+        key.set_contents_from_string('three')
+    except boto.exception.S3ResponseError:
+        denied = True
+
+    assert denied
+
+    # relink the bucket to the first user and delete the second user
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'link'], {'uid' : user1, 'bucket' : bucket_name})
+    assert ret == 200
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user2})
+    assert ret == 200
+
+    # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed'
+
+    # upload an object
+    object_name = 'four'
+    key = boto.s3.key.Key(bucket, object_name)
+    key.set_contents_from_string(object_name)
+
+    # now delete it
+    (ret, out) = rgwadmin_rest(admin_conn, ['object', 'rm'], {'bucket' : bucket_name, 'object' : object_name})
+    assert ret == 200
+
+    # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True})
+    assert ret == 200
+    assert out['id'] == bucket_id
+    assert out['usage']['rgw.main']['num_objects'] == 0
+
+    # create a bucket for deletion stats
+    useless_bucket = connection.create_bucket('useless_bucket')
+    useless_key = useless_bucket.new_key('useless_key')
+    useless_key.set_contents_from_string('useless string')
+
+    # delete it
+    useless_key.delete()
+    useless_bucket.delete()
+
+    # wait for the statistics to flush
+    time.sleep(60)
+
+    # need to wait for all usage data to get flushed, should take up to 30 seconds
+    timestamp = time.time()
+    while time.time() - timestamp <= (20 * 60):      # wait up to 20 minutes
+        (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'categories' : 'delete_obj'})  # last operation we did is delete obj, wait for it to flush
+
+        if successful_ops(out) > 0:
+            break
+        time.sleep(1)
+
+    assert time.time() - timestamp <= (20 * 60)
+
+    # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'])
+    assert ret == 200
+    assert len(out['entries']) > 0
+    assert len(out['summary']) > 0
+    user_summary = out['summary'][0]
+    total = user_summary['total']
+    assert total['successful_ops'] > 0
+
+    # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1})
+    assert ret == 200
+    assert len(out['entries']) > 0
+    assert len(out['summary']) > 0
+    user_summary = out['summary'][0]
+    for entry in user_summary['categories']:
+        assert entry['successful_ops'] > 0
+    assert user_summary['user'] == user1
+
+    # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds'
+    test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket']
+    for cat in test_categories:
+        (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1, 'categories' : cat})
+        assert ret == 200
+        assert len(out['summary']) > 0
+        user_summary = out['summary'][0]
+        assert user_summary['user'] == user1
+        assert len(user_summary['categories']) == 1
+        entry = user_summary['categories'][0]
+        assert entry['category'] == cat
+        assert entry['successful_ops'] > 0
+
+    # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed'
+    (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'trim'], {'uid' : user1})
+    assert ret == 200
+    (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1})
+    assert ret == 200
+    assert len(out['entries']) == 0
+    assert len(out['summary']) == 0
+
+    # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True})
+    assert ret == 200
+
+    # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects'
+    try:
+        key = boto.s3.key.Key(bucket)
+        key.set_contents_from_string('five')
+    except boto.exception.S3ResponseError as e:
+        assert e.status == 403
+
+    # TESTCASE 'user-renable2','user','enable','suspended user','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' :  user1, 'suspended' : 'false'})
+    assert ret == 200
+
+    # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects'
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('six')
+
+    # TESTCASE 'garbage-list', 'garbage', 'list', 'get list of objects ready for garbage collection'
+
+    # create an object large enough to be split into multiple parts
+    test_string = 'foo'*10000000
+
+    big_key = boto.s3.key.Key(bucket)
+    big_key.set_contents_from_string(test_string)
+
+    # now delete the head
+    big_key.delete()
+
+    # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1})
+    assert ret == 409
+
+    # delete should fail because ``key`` still exists
+    try:
+        bucket.delete()
+    except boto.exception.S3ResponseError as e:
+        assert e.status == 409
+
+    key.delete()
+    bucket.delete()
+
+    # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy'
+    bucket = connection.create_bucket(bucket_name)
+
+    # create an object
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('seven')
+
+    # should be private already but guarantee it
+    key.set_acl('private')
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key})
+    assert ret == 200
+
+    acl = key.get_xml_acl()
+    assert acl == out.strip('\n')
+
+    # add another grantee by making the object public read
+    key.set_acl('public-read')
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key})
+    assert ret == 200
+
+    acl = key.get_xml_acl()
+    assert acl == out.strip('\n')
+
+    # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds'
+    bucket = connection.create_bucket(bucket_name)
+    key_name = ['eight', 'nine', 'ten', 'eleven']
+    for i in range(4):
+        key = boto.s3.key.Key(bucket)
+        key.set_contents_from_string(key_name[i])
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'rm'], {'bucket' : bucket_name, 'purge-objects' : True})
+    assert ret == 200
+
+    # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds'
+    caps = 'usage=read'
+    (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'add'], {'uid' :  user1, 'user-caps' : caps})
+    assert ret == 200
+    assert out[0]['perm'] == 'read'
+
+    # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'rm'], {'uid' :  user1, 'user-caps' : caps})
+    assert ret == 200
+    assert not out
+
+    # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets'
+    bucket = connection.create_bucket(bucket_name)
+    key = boto.s3.key.Key(bucket)
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1})
+    assert ret == 409
+
+    # TESTCASE 'rm-user2', 'user', 'rm', user with data', 'succeeds'
+    bucket = connection.create_bucket(bucket_name)
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('twelve')
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1, 'purge-data' : True})
+    assert ret == 200
+
+    # TESTCASE 'rm-user3','user','info','deleted user','fails'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
+    assert ret == 404
+
diff --git a/tasks/radosgw_agent.py b/tasks/radosgw_agent.py
new file mode 100644 (file)
index 0000000..e8ffe94
--- /dev/null
@@ -0,0 +1,211 @@
+"""
+Run rados gateway agent in test mode
+"""
+import contextlib
+import logging
+import argparse
+
+from ..orchestra import run
+from teuthology import misc as teuthology
+import teuthology.task_util.rgw as rgw_utils
+
+log = logging.getLogger(__name__)
+
+def run_radosgw_agent(ctx, config):
+    """
+    Run a single radosgw-agent. See task() for config format.
+    """
+    return_list = list()
+    for (client, cconf) in config.items():
+        # don't process entries that are not clients
+        if not client.startswith('client.'):
+            log.debug('key {data} does not start with \'client.\', moving on'.format(
+                      data=client))
+            continue
+
+        src_client = cconf['src']
+        dest_client = cconf['dest']
+
+        src_zone = rgw_utils.zone_for_client(ctx, src_client)
+        dest_zone = rgw_utils.zone_for_client(ctx, dest_client)
+
+        log.info("source is %s", src_zone)
+        log.info("dest is %s", dest_zone)
+
+        testdir = teuthology.get_testdir(ctx)
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        # figure out which branch to pull from
+        branch = cconf.get('force-branch', None)
+        if not branch:
+            branch = cconf.get('branch', 'master')
+        sha1 = cconf.get('sha1')
+        remote.run(
+            args=[
+                'cd', testdir, run.Raw('&&'),
+                'git', 'clone',
+                '-b', branch,
+#                'https://github.com/ceph/radosgw-agent.git',
+                'git://ceph.com/git/radosgw-agent.git',
+                'radosgw-agent.{client}'.format(client=client),
+                ]
+            )
+        if sha1 is not None:
+            remote.run(
+                args=[
+                    'cd', testdir, run.Raw('&&'),
+                    run.Raw('&&'),
+                    'git', 'reset', '--hard', sha1,
+                ]
+            )
+        remote.run(
+            args=[
+                'cd', testdir, run.Raw('&&'),
+                'cd', 'radosgw-agent.{client}'.format(client=client),
+                run.Raw('&&'),
+                './bootstrap',
+                ]
+            )
+
+        src_host, src_port = rgw_utils.get_zone_host_and_port(ctx, src_client,
+                                                              src_zone)
+        dest_host, dest_port = rgw_utils.get_zone_host_and_port(ctx, dest_client,
+                                                                 dest_zone)
+        src_access, src_secret = rgw_utils.get_zone_system_keys(ctx, src_client,
+                                                               src_zone)
+        dest_access, dest_secret = rgw_utils.get_zone_system_keys(ctx, dest_client,
+                                                                 dest_zone)
+        sync_scope = cconf.get('sync-scope', None)
+        port = cconf.get('port', 8000)
+        daemon_name = '{host}.{port}.syncdaemon'.format(host=remote.name, port=port)
+        in_args=[
+            'daemon-helper',
+            'kill',
+            '{tdir}/radosgw-agent.{client}/radosgw-agent'.format(tdir=testdir,
+                                                                 client=client),
+            '-v',
+            '--src-access-key', src_access,
+            '--src-secret-key', src_secret,
+            '--source', "http://{addr}:{port}".format(addr=src_host, port=src_port),
+            '--dest-access-key', dest_access,
+            '--dest-secret-key', dest_secret,
+            '--max-entries', str(cconf.get('max-entries', 1000)),
+            '--log-file', '{tdir}/archive/rgw_sync_agent.{client}.log'.format(
+                tdir=testdir,
+                client=client),
+            '--object-sync-timeout', '30',
+            ]
+
+        if cconf.get('metadata-only', False):
+            in_args.append('--metadata-only')
+
+        # the test server and full/incremental flags are mutually exclusive
+        if sync_scope is None:
+            in_args.append('--test-server-host')
+            in_args.append('0.0.0.0')
+            in_args.append('--test-server-port')
+            in_args.append(str(port))
+            log.debug('Starting a sync test server on {client}'.format(client=client))
+            # Stash the radosgw-agent server / port # for use by subsequent tasks
+            ctx.radosgw_agent.endpoint = (client, str(port))
+        else:
+            in_args.append('--sync-scope')
+            in_args.append(sync_scope)
+            log.debug('Starting a {scope} sync on {client}'.format(scope=sync_scope,client=client))
+
+        # positional arg for destination must come last
+        in_args.append("http://{addr}:{port}".format(addr=dest_host,
+                                                     port=dest_port))
+
+        return_list.append((client, remote.run(
+            args=in_args,
+            wait=False,
+            stdin=run.PIPE,
+            logger=log.getChild(daemon_name),
+            )))
+    return return_list
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run radosgw-agents in test mode.
+
+    Configuration is clients to run the agents on, with settings for
+    source client, destination client, and port to listen on.  Binds
+    to 0.0.0.0. Port defaults to 8000. This must be run on clients
+    that have the correct zone root pools and rgw zone set in
+    ceph.conf, or the task cannot read the region information from the
+    cluster.
+
+    By default, this task will start an HTTP server that will trigger full
+    or incremental syncs based on requests made to it.
+    Alternatively, a single full sync can be triggered by
+    specifying 'sync-scope: full' or a loop of incremental syncs can be triggered
+    by specifying 'sync-scope: incremental' (the loop will sleep
+    '--incremental-sync-delay' seconds between each sync, default is 30 seconds).
+
+    By default, both data and metadata are synced. To only sync
+    metadata, for example because you want to sync between regions,
+    set metadata-only: true.
+
+    An example::
+
+      tasks:
+      - ceph:
+          conf:
+            client.0:
+              rgw zone = foo
+              rgw zone root pool = .root.pool
+            client.1:
+              rgw zone = bar
+              rgw zone root pool = .root.pool2
+      - rgw: # region configuration omitted for brevity
+      - radosgw-agent:
+          client.0:
+            branch: wip-next-feature-branch
+            src: client.0
+            dest: client.1
+            sync-scope: full
+            metadata-only: true
+            # port: 8000 (default)
+          client.1:
+            src: client.1
+            dest: client.0
+            port: 8001
+    """
+    assert isinstance(config, dict), 'rgw_sync_agent requires a dictionary config'
+    log.debug("config is %s", config)
+
+    overrides = ctx.config.get('overrides', {})
+    # merge each client section, but only if it exists in config since there isn't
+    # a sensible default action for this task
+    for client in config.iterkeys():
+        if config[client]:
+            log.debug('config[{client}]: {data}'.format(client=client, data=config[client]))
+            teuthology.deep_merge(config[client], overrides.get('radosgw-agent', {}))
+
+    ctx.radosgw_agent = argparse.Namespace()
+    ctx.radosgw_agent.config = config
+
+    procs = run_radosgw_agent(ctx, config)
+
+    ctx.radosgw_agent.procs = procs
+
+    try:
+        yield
+    finally:
+        testdir = teuthology.get_testdir(ctx)
+        try:
+            for client, proc in procs:
+                log.info("shutting down sync agent on %s", client)
+                proc.stdin.close()
+                proc.exitstatus.get()
+        finally:
+            for client, proc in procs:
+                ctx.cluster.only(client).run(
+                    args=[
+                        'rm', '-rf',
+                        '{tdir}/radosgw-agent.{client}'.format(tdir=testdir,
+                                                               client=client)
+                        ]
+                    )
diff --git a/tasks/rbd.py b/tasks/rbd.py
new file mode 100644 (file)
index 0000000..7d07a61
--- /dev/null
@@ -0,0 +1,506 @@
+"""
+Rbd testing task
+"""
+import contextlib
+import logging
+import os
+
+from cStringIO import StringIO
+from ..orchestra import run
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.parallel import parallel
+from teuthology.task.common_fs_utils import generic_mkfs
+from teuthology.task.common_fs_utils import generic_mount
+from teuthology.task.common_fs_utils import default_image_name
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def create_image(ctx, config):
+    """
+    Create an rbd image.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - rbd.create_image:
+            client.0:
+                image_name: testimage
+                image_size: 100
+                image_format: 1
+            client.1:
+
+    Image size is expressed as a number of megabytes; default value
+    is 10240.
+
+    Image format value must be either 1 or 2; default value is 1.
+
+    """
+    assert isinstance(config, dict) or isinstance(config, list), \
+        "task create_image only supports a list or dictionary for configuration"
+
+    if isinstance(config, dict):
+        images = config.items()
+    else:
+        images = [(role, None) for role in config]
+
+    testdir = teuthology.get_testdir(ctx)
+    for role, properties in images:
+        if properties is None:
+            properties = {}
+        name = properties.get('image_name', default_image_name(role))
+        size = properties.get('image_size', 10240)
+        fmt = properties.get('image_format', 1)
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        log.info('Creating image {name} with size {size}'.format(name=name,
+                                                                 size=size))
+        args = [
+                'adjust-ulimits',
+                'ceph-coverage'.format(tdir=testdir),
+                '{tdir}/archive/coverage'.format(tdir=testdir),
+                'rbd',
+                '-p', 'rbd',
+                'create',
+                '--size', str(size),
+                name,
+            ]
+        # omit format option if using the default (format 1)
+        # since old versions of don't support it
+        if int(fmt) != 1:
+            args += ['--format', str(fmt)]
+        remote.run(args=args)
+    try:
+        yield
+    finally:
+        log.info('Deleting rbd images...')
+        for role, properties in images:
+            if properties is None:
+                properties = {}
+            name = properties.get('image_name', default_image_name(role))
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            remote.run(
+                args=[
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'rbd',
+                    '-p', 'rbd',
+                    'rm',
+                    name,
+                    ],
+                )
+
+@contextlib.contextmanager
+def modprobe(ctx, config):
+    """
+    Load the rbd kernel module..
+
+    For example::
+
+        tasks:
+        - ceph:
+        - rbd.create_image: [client.0]
+        - rbd.modprobe: [client.0]
+    """
+    log.info('Loading rbd kernel module...')
+    for role in config:
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        remote.run(
+            args=[
+                'sudo',
+                'modprobe',
+                'rbd',
+                ],
+            )
+    try:
+        yield
+    finally:
+        log.info('Unloading rbd kernel module...')
+        for role in config:
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            remote.run(
+                args=[
+                    'sudo',
+                    'modprobe',
+                    '-r',
+                    'rbd',
+                    # force errors to be ignored; necessary if more
+                    # than one device was created, which may mean
+                    # the module isn't quite ready to go the first
+                    # time through.
+                    run.Raw('||'),
+                    'true',
+                    ],
+                )
+
+@contextlib.contextmanager
+def dev_create(ctx, config):
+    """
+    Map block devices to rbd images.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - rbd.create_image: [client.0]
+        - rbd.modprobe: [client.0]
+        - rbd.dev_create:
+            client.0: testimage.client.0
+    """
+    assert isinstance(config, dict) or isinstance(config, list), \
+        "task dev_create only supports a list or dictionary for configuration"
+
+    if isinstance(config, dict):
+        role_images = config.items()
+    else:
+        role_images = [(role, None) for role in config]
+
+    log.info('Creating rbd block devices...')
+
+    testdir = teuthology.get_testdir(ctx)
+
+    for role, image in role_images:
+        if image is None:
+            image = default_image_name(role)
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+
+        remote.run(
+            args=[
+                'sudo',
+                'adjust-ulimits',
+                'ceph-coverage',
+                '{tdir}/archive/coverage'.format(tdir=testdir),
+                'rbd',
+                '--user', role.rsplit('.')[-1],
+                '-p', 'rbd',
+                'map',
+                image,
+                run.Raw('&&'),
+                # wait for the symlink to be created by udev
+                'while', 'test', '!', '-e', '/dev/rbd/rbd/{image}'.format(image=image), run.Raw(';'), 'do',
+                'sleep', '1', run.Raw(';'),
+                'done',
+                ],
+            )
+    try:
+        yield
+    finally:
+        log.info('Unmapping rbd devices...')
+        for role, image in role_images:
+            if image is None:
+                image = default_image_name(role)
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            remote.run(
+                args=[
+                    'LD_LIBRARY_PATH={tdir}/binary/usr/local/lib'.format(tdir=testdir),
+                    'sudo',
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'rbd',
+                    '-p', 'rbd',
+                    'unmap',
+                    '/dev/rbd/rbd/{imgname}'.format(imgname=image),
+                    run.Raw('&&'),
+                    # wait for the symlink to be deleted by udev
+                    'while', 'test', '-e', '/dev/rbd/rbd/{image}'.format(image=image),
+                    run.Raw(';'),
+                    'do',
+                    'sleep', '1', run.Raw(';'),
+                    'done',
+                    ],
+                )
+
+
+def rbd_devname_rtn(ctx, image):
+    return '/dev/rbd/rbd/{image}'.format(image=image)    
+
+def canonical_path(ctx, role, path):
+    """
+    Determine the canonical path for a given path on the host
+    representing the given role.  A canonical path contains no
+    . or .. components, and includes no symbolic links.
+    """
+    version_fp = StringIO()
+    ctx.cluster.only(role).run(
+        args=[ 'readlink', '-f', path ],
+        stdout=version_fp,
+        )
+    canonical_path = version_fp.getvalue().rstrip('\n')
+    version_fp.close()
+    return canonical_path
+
+@contextlib.contextmanager
+def run_xfstests(ctx, config):
+    """
+    Run xfstests over specified devices.
+
+    Warning: both the test and scratch devices specified will be
+    overwritten.  Normally xfstests modifies (but does not destroy)
+    the test device, but for now the run script used here re-makes
+    both filesystems.
+
+    Note: Only one instance of xfstests can run on a single host at
+    a time, although this is not enforced.
+
+    This task in its current form needs some improvement.  For
+    example, it assumes all roles provided in the config are
+    clients, and that the config provided is a list of key/value
+    pairs.  For now please use the xfstests() interface, below.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - rbd.run_xfstests:
+            client.0:
+                count: 2
+                test_dev: 'test_dev'
+                scratch_dev: 'scratch_dev'
+                fs_type: 'xfs'
+                tests: '1-9 11-15 17 19-21 26-28 31-34 41 45-48'
+    """
+    with parallel() as p:
+        for role, properties in config.items():
+            p.spawn(run_xfstests_one_client, ctx, role, properties)
+    yield
+
+def run_xfstests_one_client(ctx, role, properties):
+    """
+    Spawned routine to handle xfs tests for a single client
+    """
+    testdir = teuthology.get_testdir(ctx)
+    try:
+        count = properties.get('count')
+        test_dev = properties.get('test_dev')
+        assert test_dev is not None, \
+            "task run_xfstests requires test_dev to be defined"
+        test_dev = canonical_path(ctx, role, test_dev)
+
+        scratch_dev = properties.get('scratch_dev')
+        assert scratch_dev is not None, \
+            "task run_xfstests requires scratch_dev to be defined"
+        scratch_dev = canonical_path(ctx, role, scratch_dev)
+
+        fs_type = properties.get('fs_type')
+        tests = properties.get('tests')
+
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+
+        # Fetch the test script
+        test_root = teuthology.get_testdir(ctx)
+        test_script = 'run_xfstests.sh'
+        test_path = os.path.join(test_root, test_script)
+
+        git_branch = 'master'
+        test_url = 'https://raw.github.com/ceph/ceph/{branch}/qa/{script}'.format(branch=git_branch, script=test_script)
+        # test_url = 'http://ceph.newdream.net/git/?p=ceph.git;a=blob_plain;hb=refs/heads/{branch};f=qa/{script}'.format(branch=git_branch, script=test_script)
+
+        log.info('Fetching {script} for {role} from {url}'.format(script=test_script,
+                                                                role=role,
+                                                                url=test_url))
+        args = [ 'wget', '-O', test_path, '--', test_url ]
+        remote.run(args=args)
+
+        log.info('Running xfstests on {role}:'.format(role=role))
+        log.info('   iteration count: {count}:'.format(count=count))
+        log.info('       test device: {dev}'.format(dev=test_dev))
+        log.info('    scratch device: {dev}'.format(dev=scratch_dev))
+        log.info('     using fs_type: {fs_type}'.format(fs_type=fs_type))
+        log.info('      tests to run: {tests}'.format(tests=tests))
+
+        # Note that the device paths are interpreted using
+        # readlink -f <path> in order to get their canonical
+        # pathname (so it matches what the kernel remembers).
+        args = [
+            '/usr/bin/sudo',
+            'TESTDIR={tdir}'.format(tdir=testdir),
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            '/bin/bash',
+            test_path,
+            '-c', str(count),
+            '-f', fs_type,
+            '-t', test_dev,
+            '-s', scratch_dev,
+            ]
+        if tests:
+            args.append(tests)
+        remote.run(args=args, logger=log.getChild(role))
+    finally:
+        log.info('Removing {script} on {role}'.format(script=test_script,
+                                                      role=role))
+        remote.run(args=['rm', '-f', test_path])
+
+@contextlib.contextmanager
+def xfstests(ctx, config):
+    """
+    Run xfstests over rbd devices.  This interface sets up all
+    required configuration automatically if not otherwise specified.
+    Note that only one instance of xfstests can run on a single host
+    at a time.  By default, the set of tests specified is run once.
+    If a (non-zero) count value is supplied, the complete set of
+    tests will be run that number of times.
+
+    For example::
+
+        tasks:
+        - ceph:
+        # Image sizes are in MB
+        - rbd.xfstests:
+            client.0:
+                count: 3
+                test_image: 'test_image'
+                test_size: 250
+                test_format: 2
+                scratch_image: 'scratch_image'
+                scratch_size: 250
+                scratch_format: 1
+                fs_type: 'xfs'
+                tests: '1-9 11-15 17 19-21 26-28 31-34 41 45-48'
+    """
+    if config is None:
+        config = { 'all': None }
+    assert isinstance(config, dict) or isinstance(config, list), \
+        "task xfstests only supports a list or dictionary for configuration"
+    if isinstance(config, dict):
+        config = teuthology.replace_all_with_clients(ctx.cluster, config)
+        runs = config.items()
+    else:
+        runs = [(role, None) for role in config]
+
+    running_xfstests = {}
+    for role, properties in runs:
+        assert role.startswith('client.'), \
+            "task xfstests can only run on client nodes"
+        for host, roles_for_host in ctx.cluster.remotes.items():
+            if role in roles_for_host:
+                assert host not in running_xfstests, \
+                    "task xfstests allows only one instance at a time per host"
+                running_xfstests[host] = True
+
+    images_config = {}
+    scratch_config = {}
+    modprobe_config = {}
+    image_map_config = {}
+    scratch_map_config = {}
+    xfstests_config = {}
+    for role, properties in runs:
+        if properties is None:
+            properties = {}
+
+        test_image = properties.get('test_image', 'test_image.{role}'.format(role=role))
+        test_size = properties.get('test_size', 2000) # 2G
+        test_fmt = properties.get('test_format', 1)
+        scratch_image = properties.get('scratch_image', 'scratch_image.{role}'.format(role=role))
+        scratch_size = properties.get('scratch_size', 10000) # 10G
+        scratch_fmt = properties.get('scratch_format', 1)
+
+        images_config[role] = dict(
+            image_name=test_image,
+            image_size=test_size,
+            image_format=test_fmt,
+            )
+
+        scratch_config[role] = dict(
+            image_name=scratch_image,
+            image_size=scratch_size,
+            image_format=scratch_fmt,
+            )
+
+        xfstests_config[role] = dict(
+            count=properties.get('count', 1),
+            test_dev='/dev/rbd/rbd/{image}'.format(image=test_image),
+            scratch_dev='/dev/rbd/rbd/{image}'.format(image=scratch_image),
+            fs_type=properties.get('fs_type', 'xfs'),
+            tests=properties.get('tests'),
+            )
+
+        log.info('Setting up xfstests using RBD images:')
+        log.info('      test ({size} MB): {image}'.format(size=test_size,
+                                                        image=test_image))
+        log.info('   scratch ({size} MB): {image}'.format(size=scratch_size,
+                                                        image=scratch_image))
+        modprobe_config[role] = None
+        image_map_config[role] = test_image
+        scratch_map_config[role] = scratch_image
+
+    with contextutil.nested(
+        lambda: create_image(ctx=ctx, config=images_config),
+        lambda: create_image(ctx=ctx, config=scratch_config),
+        lambda: modprobe(ctx=ctx, config=modprobe_config),
+        lambda: dev_create(ctx=ctx, config=image_map_config),
+        lambda: dev_create(ctx=ctx, config=scratch_map_config),
+        lambda: run_xfstests(ctx=ctx, config=xfstests_config),
+        ):
+        yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Create and mount an rbd image.
+
+    For example, you can specify which clients to run on::
+
+        tasks:
+        - ceph:
+        - rbd: [client.0, client.1]
+
+    There are a few image options::
+
+        tasks:
+        - ceph:
+        - rbd:
+            client.0: # uses defaults
+            client.1:
+                image_name: foo
+                image_size: 2048
+                image_format: 2
+                fs_type: xfs
+
+    To use default options on all clients::
+
+        tasks:
+        - ceph:
+        - rbd:
+            all:
+
+    To create 20GiB images and format them with xfs on all clients::
+
+        tasks:
+        - ceph:
+        - rbd:
+            all:
+              image_size: 20480
+              fs_type: xfs
+    """
+    if config is None:
+        config = { 'all': None }
+    norm_config = config
+    if isinstance(config, dict):
+        norm_config = teuthology.replace_all_with_clients(ctx.cluster, config)
+    if isinstance(norm_config, dict):
+        role_images = {}
+        for role, properties in norm_config.iteritems():
+            if properties is None:
+                properties = {}
+            role_images[role] = properties.get('image_name')
+    else:
+        role_images = norm_config
+
+    log.debug('rbd config is: %s', norm_config)
+
+    with contextutil.nested(
+        lambda: create_image(ctx=ctx, config=norm_config),
+        lambda: modprobe(ctx=ctx, config=norm_config),
+        lambda: dev_create(ctx=ctx, config=role_images),
+        lambda: generic_mkfs(ctx=ctx, config=norm_config,
+                devname_rtn=rbd_devname_rtn),
+        lambda: generic_mount(ctx=ctx, config=role_images,
+                devname_rtn=rbd_devname_rtn),
+        ):
+        yield
diff --git a/tasks/rbd_fsx.py b/tasks/rbd_fsx.py
new file mode 100644 (file)
index 0000000..6d55b5c
--- /dev/null
@@ -0,0 +1,64 @@
+"""
+Run fsx on an rbd image
+"""
+import contextlib
+import logging
+
+from teuthology.parallel import parallel
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run fsx on an rbd image.
+
+    Currently this requires running as client.admin
+    to create a pool.
+
+    Specify which clients to run on as a list::
+
+      tasks:
+        ceph:
+        rbd_fsx:
+          clients: [client.0, client.1]
+
+    You can optionally change some properties of fsx:
+
+      tasks:
+        ceph:
+        rbd_fsx:
+          clients: <list of clients>
+          seed: <random seed number, or 0 to use the time>
+          ops: <number of operations to do>
+          size: <maximum image size in bytes>
+    """
+    log.info('starting rbd_fsx...')
+    with parallel() as p:
+        for role in config['clients']:
+            p.spawn(_run_one_client, ctx, config, role)
+    yield
+
+def _run_one_client(ctx, config, role):
+    """Spawned task that runs the client"""
+    testdir = teuthology.get_testdir(ctx)
+    (remote,) = ctx.cluster.only(role).remotes.iterkeys()
+    remote.run(
+        args=[
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            'ceph_test_librbd_fsx',
+            '-d',
+            '-W', '-R', # mmap doesn't work with rbd
+            '-p', str(config.get('progress_interval', 100)),  # show progress
+            '-P', '{tdir}/archive'.format(tdir=testdir),
+            '-t', str(config.get('truncbdy',1)),
+            '-l', str(config.get('size', 250000000)),
+            '-S', str(config.get('seed', 0)),
+            '-N', str(config.get('ops', 1000)),
+            'pool_{pool}'.format(pool=role),
+            'image_{image}'.format(image=role),
+            ],
+        )
diff --git a/tasks/recovery_bench.py b/tasks/recovery_bench.py
new file mode 100644 (file)
index 0000000..1984b97
--- /dev/null
@@ -0,0 +1,208 @@
+"""
+Recovery system benchmarking
+"""
+from cStringIO import StringIO
+
+import contextlib
+import gevent
+import json
+import logging
+import random
+import time
+
+import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Benchmark the recovery system.
+
+    Generates objects with smalliobench, runs it normally to get a
+    baseline performance measurement, then marks an OSD out and reruns
+    to measure performance during recovery.
+
+    The config should be as follows:
+
+    recovery_bench:
+        duration: <seconds for each measurement run>
+        num_objects: <number of objects>
+        io_size: <io size in bytes>
+
+    example:
+
+    tasks:
+    - ceph:
+    - recovery_bench:
+        duration: 60
+        num_objects: 500
+        io_size: 4096
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'recovery_bench task only accepts a dict for configuration'
+
+    log.info('Beginning recovery bench...')
+
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    while len(manager.get_osd_status()['up']) < num_osds:
+        manager.sleep(10)
+
+    bench_proc = RecoveryBencher(
+        manager,
+        config,
+        )
+    try:
+        yield
+    finally:
+        log.info('joining recovery bencher')
+        bench_proc.do_join()
+
+class RecoveryBencher:
+    """
+    RecoveryBencher
+    """
+    def __init__(self, manager, config):
+        self.ceph_manager = manager
+        self.ceph_manager.wait_for_clean()
+
+        osd_status = self.ceph_manager.get_osd_status()
+        self.osds = osd_status['up']
+
+        self.config = config
+        if self.config is None:
+            self.config = dict()
+
+        else:
+            def tmp(x):
+                """
+                Local wrapper to print value.
+                """
+                print x
+            self.log = tmp
+
+        log.info("spawning thread")
+
+        self.thread = gevent.spawn(self.do_bench)
+
+    def do_join(self):
+        """
+        Join the recovery bencher.  This is called after the main
+        task exits.
+        """
+        self.thread.get()
+
+    def do_bench(self):
+        """
+        Do the benchmarking.
+        """
+        duration = self.config.get("duration", 60)
+        num_objects = self.config.get("num_objects", 500)
+        io_size = self.config.get("io_size", 4096)
+
+        osd = str(random.choice(self.osds))
+        (osd_remote,) = self.ceph_manager.ctx.cluster.only('osd.%s' % osd).remotes.iterkeys()
+
+        testdir = teuthology.get_testdir(self.ceph_manager.ctx)
+
+        # create the objects
+        osd_remote.run(
+            args=[
+                'adjust-ulimits',
+                'ceph-coverage',
+                '{tdir}/archive/coverage'.format(tdir=testdir),
+                'smalliobench'.format(tdir=testdir),
+                '--use-prefix', 'recovery_bench',
+                '--init-only', '1',
+                '--num-objects', str(num_objects),
+                '--io-size', str(io_size),
+                ],
+            wait=True,
+        )
+
+        # baseline bench
+        log.info('non-recovery (baseline)')
+        p = osd_remote.run(
+            args=[
+                'adjust-ulimits',
+                'ceph-coverage',
+                '{tdir}/archive/coverage'.format(tdir=testdir),
+                'smalliobench',
+                '--use-prefix', 'recovery_bench',
+                '--do-not-init', '1',
+                '--duration', str(duration),
+                '--io-size', str(io_size),
+                ],
+            stdout=StringIO(),
+            stderr=StringIO(),
+            wait=True,
+        )
+        self.process_samples(p.stderr.getvalue())
+
+        self.ceph_manager.raw_cluster_cmd('osd', 'out', osd)
+        time.sleep(5)
+
+        # recovery bench
+        log.info('recovery active')
+        p = osd_remote.run(
+            args=[
+                'adjust-ulimits',
+                'ceph-coverage',
+                '{tdir}/archive/coverage'.format(tdir=testdir),
+                'smalliobench',
+                '--use-prefix', 'recovery_bench',
+                '--do-not-init', '1',
+                '--duration', str(duration),
+                '--io-size', str(io_size),
+                ],
+            stdout=StringIO(),
+            stderr=StringIO(),
+            wait=True,
+        )
+        self.process_samples(p.stderr.getvalue())
+
+        self.ceph_manager.raw_cluster_cmd('osd', 'in', osd)
+
+    def process_samples(self, input):
+        """
+        Extract samples from the input and process the results
+
+        :param input: input lines in JSON format
+        """
+        lat = {}
+        for line in input.split('\n'):
+            try:
+                sample = json.loads(line)
+                samples = lat.setdefault(sample['type'], [])
+                samples.append(float(sample['latency']))
+            except Exception:
+                pass
+
+        for type in lat:
+            samples = lat[type]
+            samples.sort()
+
+            num = len(samples)
+
+            # median
+            if num & 1 == 1: # odd number of samples
+                median = samples[num / 2]
+            else:
+                median = (samples[num / 2] + samples[num / 2 - 1]) / 2
+
+            # 99%
+            ninety_nine = samples[int(num * 0.99)]
+
+            log.info("%s: median %f, 99%% %f" % (type, median, ninety_nine))
diff --git a/tasks/rep_lost_unfound_delete.py b/tasks/rep_lost_unfound_delete.py
new file mode 100644 (file)
index 0000000..f75a4d2
--- /dev/null
@@ -0,0 +1,153 @@
+"""
+Lost_unfound
+"""
+import logging
+import ceph_manager
+from teuthology import misc as teuthology
+from teuthology.task_util.rados import rados
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test handling of lost objects.
+
+    A pretty rigid cluseter is brought up andtested by this task
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'lost_unfound task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 3:
+        manager.sleep(10)
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_for_clean()
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+
+    # take an osd out until the very end
+    manager.kill_osd(2)
+    manager.mark_down_osd(2)
+    manager.mark_out_osd(2)
+
+    # kludge to make sure they get a map
+    rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.wait_for_recovery()
+
+    # create old objects
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', 'data', 'rm', 'existed_%d' % f])
+
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.1',
+            'injectargs',
+            '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
+            )
+
+    manager.kill_osd(0)
+    manager.mark_down_osd(0)
+    
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', 'data', 'put', 'new_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
+
+    # bring osd.0 back up, let it peer, but don't replicate the new
+    # objects...
+    log.info('osd.0 command_args is %s' % 'foo')
+    log.info(ctx.daemons.get_daemon('osd', 0).command_args)
+    ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([
+            '--osd-recovery-delay-start', '1000'
+            ])
+    manager.revive_osd(0)
+    manager.mark_in_osd(0)
+    manager.wait_till_osd_is_up(0)
+
+    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.wait_till_active()
+
+    # take out osd.1 and the only copy of those objects.
+    manager.kill_osd(1)
+    manager.mark_down_osd(1)
+    manager.mark_out_osd(1)
+    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
+
+    # bring up osd.2 so that things would otherwise, in theory, recovery fully
+    manager.revive_osd(2)
+    manager.mark_in_osd(2)
+    manager.wait_till_osd_is_up(2)
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_till_active()
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+
+    # verify that there are unfound objects
+    unfound = manager.get_num_unfound_objects()
+    log.info("there are %d unfound objects" % unfound)
+    assert unfound
+
+    # mark stuff lost
+    pgs = manager.get_pg_stats()
+    for pg in pgs:
+        if pg['stat_sum']['num_objects_unfound'] > 0:
+            primary = 'osd.%d' % pg['acting'][0]
+
+            # verify that i can list them direct from the osd
+            log.info('listing missing/lost in %s state %s', pg['pgid'],
+                     pg['state']);
+            m = manager.list_pg_missing(pg['pgid'])
+            #log.info('%s' % m)
+            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
+            num_unfound=0
+            for o in m['objects']:
+                if len(o['locations']) == 0:
+                    num_unfound += 1
+            assert m['num_unfound'] == num_unfound
+
+            log.info("reverting unfound in %s on %s", pg['pgid'], primary)
+            manager.raw_cluster_cmd('pg', pg['pgid'],
+                                    'mark_unfound_lost', 'delete')
+        else:
+            log.info("no unfound in %s", pg['pgid'])
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
+    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
+    manager.wait_for_recovery()
+
+    # verify result
+    for f in range(1, 10):
+        err = rados(ctx, mon, ['-p', 'data', 'get', 'new_%d' % f, '-'])
+        assert err
+        err = rados(ctx, mon, ['-p', 'data', 'get', 'existed_%d' % f, '-'])
+        assert err
+        err = rados(ctx, mon, ['-p', 'data', 'get', 'existing_%d' % f, '-'])
+        assert err
+
+    # see if osd.1 can cope
+    manager.revive_osd(1)
+    manager.mark_in_osd(1)
+    manager.wait_till_osd_is_up(1)
+    manager.wait_for_clean()
diff --git a/tasks/repair_test.py b/tasks/repair_test.py
new file mode 100644 (file)
index 0000000..1dd8f2f
--- /dev/null
@@ -0,0 +1,213 @@
+import logging
+import time
+
+import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def setup(ctx, config):
+    ctx.manager.wait_for_clean()
+    ctx.manager.create_pool("repair_test_pool", 1)
+    return "repair_test_pool"
+
+def teardown(ctx, config, pool):
+    ctx.manager.remove_pool(pool)
+    ctx.manager.wait_for_clean()
+
+def run_test(ctx, config, test):
+    s = setup(ctx, config)
+    test(ctx, config, s)
+    teardown(ctx, config, s)
+
+def choose_primary(ctx):
+    def ret(pool, num):
+        log.info("Choosing primary")
+        return ctx.manager.get_pg_primary(pool, num)
+    return ret
+
+def choose_replica(ctx):
+    def ret(pool, num):
+        log.info("Choosing replica")
+        return ctx.manager.get_pg_replica(pool, num)
+    return ret
+
+def trunc(ctx):
+    def ret(osd, pool, obj):
+        log.info("truncating object")
+        return ctx.manager.osd_admin_socket(
+            osd,
+            ['truncobj', pool, obj, '1'])
+    return ret
+
+def dataerr(ctx):
+    def ret(osd, pool, obj):
+        log.info("injecting data err on object")
+        return ctx.manager.osd_admin_socket(
+            osd,
+            ['injectdataerr', pool, obj])
+    return ret
+
+def mdataerr(ctx):
+    def ret(osd, pool, obj):
+        log.info("injecting mdata err on object")
+        return ctx.manager.osd_admin_socket(
+            osd,
+            ['injectmdataerr', pool, obj])
+    return ret
+
+def omaperr(ctx):
+    def ret(osd, pool, obj):
+        log.info("injecting omap err on object")
+        return ctx.manager.osd_admin_socket(osd, ['setomapval', pool, obj, 'badkey', 'badval']);
+    return ret
+
+def gen_repair_test_1(corrupter, chooser, scrub_type):
+    def ret(ctx, config, pool):
+        log.info("starting repair test type 1")
+        victim_osd = chooser(pool, 0)
+
+        # create object
+        log.info("doing put")
+        ctx.manager.do_put(pool, 'repair_test_obj', '/etc/hosts')
+
+        # corrupt object
+        log.info("corrupting object")
+        corrupter(victim_osd, pool, 'repair_test_obj')
+
+        # verify inconsistent
+        log.info("scrubbing")
+        ctx.manager.do_pg_scrub(pool, 0, scrub_type)
+
+        assert ctx.manager.pg_inconsistent(pool, 0)
+
+        # repair
+        log.info("repairing")
+        ctx.manager.do_pg_scrub(pool, 0, "repair")
+
+        log.info("re-scrubbing")
+        ctx.manager.do_pg_scrub(pool, 0, scrub_type)
+
+        # verify consistent
+        assert not ctx.manager.pg_inconsistent(pool, 0)
+        log.info("done")
+    return ret
+
+def gen_repair_test_2(chooser):
+    def ret(ctx, config, pool):
+        log.info("starting repair test type 2")
+        victim_osd = chooser(pool, 0)
+        first_mon = teuthology.get_first_mon(ctx, config)
+        (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+        # create object
+        log.info("doing put and setomapval")
+        ctx.manager.do_put(pool, 'file1', '/etc/hosts')
+        ctx.manager.do_rados(mon, ['-p', pool, 'setomapval', 'file1', 'key', 'val'])
+        ctx.manager.do_put(pool, 'file2', '/etc/hosts')
+        ctx.manager.do_put(pool, 'file3', '/etc/hosts')
+        ctx.manager.do_put(pool, 'file4', '/etc/hosts')
+        ctx.manager.do_put(pool, 'file5', '/etc/hosts')
+        ctx.manager.do_rados(mon, ['-p', pool, 'setomapval', 'file5', 'key', 'val'])
+        ctx.manager.do_put(pool, 'file6', '/etc/hosts')
+
+        # corrupt object
+        log.info("corrupting object")
+        omaperr(ctx)(victim_osd, pool, 'file1')
+
+        # verify inconsistent
+        log.info("scrubbing")
+        ctx.manager.do_pg_scrub(pool, 0, 'deep-scrub')
+
+        assert ctx.manager.pg_inconsistent(pool, 0)
+
+        # Regression test for bug #4778, should still
+        # be inconsistent after scrub
+        ctx.manager.do_pg_scrub(pool, 0, 'scrub')
+
+        assert ctx.manager.pg_inconsistent(pool, 0)
+
+        # Additional corruptions including 2 types for file1
+        log.info("corrupting more objects")
+        dataerr(ctx)(victim_osd, pool, 'file1')
+        mdataerr(ctx)(victim_osd, pool, 'file2')
+        trunc(ctx)(victim_osd, pool, 'file3')
+        omaperr(ctx)(victim_osd, pool, 'file6')
+
+        # see still inconsistent
+        log.info("scrubbing")
+        ctx.manager.do_pg_scrub(pool, 0, 'deep-scrub')
+
+        assert ctx.manager.pg_inconsistent(pool, 0)
+
+        # repair
+        log.info("repairing")
+        ctx.manager.do_pg_scrub(pool, 0, "repair")
+
+        # Let repair clear inconsistent flag
+        time.sleep(10)
+
+        # verify consistent
+        assert not ctx.manager.pg_inconsistent(pool, 0)
+
+        # In the future repair might determine state of
+        # inconsistency itself, verify with a deep-scrub
+        log.info("scrubbing")
+        ctx.manager.do_pg_scrub(pool, 0, 'deep-scrub')
+
+        # verify consistent
+        assert not ctx.manager.pg_inconsistent(pool, 0)
+
+        log.info("done")
+    return ret
+
+def task(ctx, config):
+    """
+    Test [deep] repair in several situations:
+      Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica]
+
+    The config should be as follows:
+
+      Must include the log-whitelist below
+      Must enable filestore_debug_inject_read_err config
+
+    example:
+
+    tasks:
+    - chef:
+    - install:
+    - ceph:
+        log-whitelist: ['candidate had a read error', 'deep-scrub 0 missing, 1 inconsistent objects', 'deep-scrub 0 missing, 4 inconsistent objects', 'deep-scrub 1 errors', 'deep-scrub 4 errors', '!= known omap_digest', 'repair 0 missing, 1 inconsistent objects', 'repair 0 missing, 4 inconsistent objects', 'repair 1 errors, 1 fixed', 'repair 4 errors, 4 fixed', 'scrub 0 missing, 1 inconsistent', 'scrub 1 errors', 'size 1 != known size']
+        conf:
+          osd:
+            filestore debug inject read err: true
+    - repair_test:
+
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'repair_test task only accepts a dict for config'
+
+    if not hasattr(ctx, 'manager'):
+        first_mon = teuthology.get_first_mon(ctx, config)
+        (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+        ctx.manager = ceph_manager.CephManager(
+            mon,
+            ctx=ctx,
+            logger=log.getChild('ceph_manager')
+            )
+
+    tests = [
+        gen_repair_test_1(mdataerr(ctx), choose_primary(ctx), "scrub"),
+        gen_repair_test_1(mdataerr(ctx), choose_replica(ctx), "scrub"),
+        gen_repair_test_1(dataerr(ctx), choose_primary(ctx), "deep-scrub"),
+        gen_repair_test_1(dataerr(ctx), choose_replica(ctx), "deep-scrub"),
+        gen_repair_test_1(trunc(ctx), choose_primary(ctx), "scrub"),
+        gen_repair_test_1(trunc(ctx), choose_replica(ctx), "scrub"),
+        gen_repair_test_2(choose_primary(ctx)),
+        gen_repair_test_2(choose_replica(ctx))
+        ]
+
+    for test in tests:
+        run_test(ctx, config, test)
diff --git a/tasks/rest_api.py b/tasks/rest_api.py
new file mode 100644 (file)
index 0000000..d34d31a
--- /dev/null
@@ -0,0 +1,183 @@
+"""
+Rest Api
+"""
+import logging
+import contextlib
+import time
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from ..orchestra import run
+from teuthology.task.ceph import CephState
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def run_rest_api_daemon(ctx, api_clients):
+    """
+    Wrapper starts the rest api daemons
+    """
+    if not hasattr(ctx, 'daemons'):
+        ctx.daemons = CephState()
+    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
+    for rems, roles in remotes.iteritems():
+        for whole_id_ in roles:
+            if whole_id_ in api_clients:
+                id_ = whole_id_[len('clients'):]
+                run_cmd = [
+                    'sudo',
+                    'daemon-helper',
+                    'kill',
+                    'ceph-rest-api',
+                    '-n',
+                    'client.rest{id}'.format(id=id_), ]
+                cl_rest_id = 'client.rest{id}'.format(id=id_)
+                ctx.daemons.add_daemon(rems, 'restapi',
+                    cl_rest_id,
+                    args=run_cmd,
+                    logger=log.getChild(cl_rest_id),
+                    stdin=run.PIPE,
+                    wait=False,
+                    )
+                for i in range(1, 12):
+                    log.info('testing for ceph-rest-api try {0}'.format(i))
+                    run_cmd = [
+                        'wget',
+                        '-O',
+                        '/dev/null',
+                        '-q',
+                        'http://localhost:5000/api/v0.1/status'
+                    ]
+                    proc = rems.run(
+                        args=run_cmd,
+                        check_status=False
+                    )
+                    if proc.exitstatus == 0:
+                        break
+                    time.sleep(5)
+                if proc.exitstatus != 0:
+                    raise RuntimeError('Cannot contact ceph-rest-api')
+    try:
+        yield
+
+    finally:
+        """
+        TO DO: destroy daemons started -- modify iter_daemons_of_role
+        """
+        teuthology.stop_daemons_of_type(ctx, 'restapi')
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Start up rest-api.
+
+    To start on on all clients::
+
+        tasks:
+        - ceph:
+        - rest-api:
+
+    To only run on certain clients::
+
+        tasks:
+        - ceph:
+        - rest-api: [client.0, client.3]
+
+    or
+
+        tasks:
+        - ceph:
+        - rest-api:
+            client.0:
+            client.3:
+
+    The general flow of things here is:
+        1. Find clients on which rest-api is supposed to run (api_clients)
+        2. Generate keyring values
+        3. Start up ceph-rest-api daemons
+    On cleanup:
+        4. Stop the daemons
+        5. Delete keyring value files.
+    """
+    api_clients = []
+    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
+    log.info(remotes)
+    if config == None:
+        api_clients = ['client.{id}'.format(id=id_)
+            for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    else:
+        api_clients = config
+    log.info(api_clients)
+    testdir = teuthology.get_testdir(ctx)
+    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+    for rems, roles in remotes.iteritems():
+        for whole_id_ in roles:
+            if whole_id_ in api_clients:
+                id_ = whole_id_[len('client.'):]
+                keyring = '/etc/ceph/ceph.client.rest{id}.keyring'.format(
+                        id=id_)
+                rems.run(
+                    args=[
+                        'sudo',
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        coverage_dir,
+                        'ceph-authtool',
+                        '--create-keyring',
+                        '--gen-key',
+                        '--name=client.rest{id}'.format(id=id_),
+                        '--set-uid=0',
+                        '--cap', 'mon', 'allow *',
+                        '--cap', 'osd', 'allow *',
+                        '--cap', 'mds', 'allow',
+                        keyring,
+                        run.Raw('&&'),
+                        'sudo',
+                        'chmod',
+                        '0644',
+                        keyring,
+                        ],
+                    )
+                rems.run(
+                    args=[
+                        'sudo',
+                        'sh',
+                        '-c',
+                        run.Raw("'"),
+                        "echo",
+                        '[client.rest{id}]'.format(id=id_),
+                        run.Raw('>>'),
+                        "/etc/ceph/ceph.conf",
+                        run.Raw("'")
+                        ]
+                    )
+                rems.run(
+                    args=[
+                        'sudo',
+                        'sh',
+                        '-c',
+                        run.Raw("'"),
+                        'echo',
+                        'restapi',
+                        'keyring',
+                        '=',
+                        '/etc/ceph/ceph.client.rest{id}.keyring'.format(id=id_),
+                        run.Raw('>>'),
+                        '/etc/ceph/ceph.conf',
+                        run.Raw("'"),
+                        ]
+                    )
+                rems.run(
+                    args=[
+                        'ceph',
+                        'auth',
+                        'import',
+                        '-i',
+                        '/etc/ceph/ceph.client.rest{id}.keyring'.format(id=id_),
+                    ]
+                )
+    with contextutil.nested(
+            lambda: run_rest_api_daemon(ctx=ctx, api_clients=api_clients),):
+        yield
+
diff --git a/tasks/restart.py b/tasks/restart.py
new file mode 100644 (file)
index 0000000..87ca2b0
--- /dev/null
@@ -0,0 +1,163 @@
+"""
+Daemon restart
+"""
+import logging
+import pipes
+
+from teuthology import misc as teuthology
+from teuthology.orchestra import run as tor
+
+from ..orchestra import run
+log = logging.getLogger(__name__)
+
+def restart_daemon(ctx, config, role, id_, *args):
+    """
+    Handle restart (including the execution of the command parameters passed)
+    """
+    log.info('Restarting {r}.{i} daemon...'.format(r=role, i=id_))
+    daemon = ctx.daemons.get_daemon(role, id_)
+    log.debug('Waiting for exit of {r}.{i} daemon...'.format(r=role, i=id_))
+    try:
+        daemon.wait_for_exit()
+    except tor.CommandFailedError as e:
+        log.debug('Command Failed: {e}'.format(e=e))
+    if len(args) > 0:
+        confargs = ['--{k}={v}'.format(k=k, v=v) for k,v in zip(args[0::2], args[1::2])]
+        log.debug('Doing restart of {r}.{i} daemon with args: {a}...'.format(r=role, i=id_, a=confargs))
+        daemon.restart_with_args(confargs)
+    else:
+        log.debug('Doing restart of {r}.{i} daemon...'.format(r=role, i=id_))
+        daemon.restart()
+
+def get_tests(ctx, config, role, remote, testdir):
+    """Download restart tests"""
+    srcdir = '{tdir}/restart.{role}'.format(tdir=testdir, role=role)
+
+    refspec = config.get('branch')
+    if refspec is None:
+        refspec = config.get('sha1')
+    if refspec is None:
+        refspec = config.get('tag')
+    if refspec is None:
+        refspec = 'HEAD'
+    log.info('Pulling restart qa/workunits from ref %s', refspec)
+
+    remote.run(
+        logger=log.getChild(role),
+        args=[
+            'mkdir', '--', srcdir,
+            run.Raw('&&'),
+            'git',
+            'archive',
+            '--remote=git://ceph.newdream.net/git/ceph.git',
+            '%s:qa/workunits' % refspec,
+            run.Raw('|'),
+            'tar',
+            '-C', srcdir,
+            '-x',
+            '-f-',
+            run.Raw('&&'),
+            'cd', '--', srcdir,
+            run.Raw('&&'),
+            'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi',
+            run.Raw('&&'),
+            'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir),
+            run.Raw('>{tdir}/restarts.list'.format(tdir=testdir)),
+            ],
+        )
+    restarts = sorted(teuthology.get_file(
+                        remote,
+                        '{tdir}/restarts.list'.format(tdir=testdir)).split('\0'))
+    return (srcdir, restarts)
+
+def task(ctx, config):
+    """
+    Execute commands and allow daemon restart with config options.
+    Each process executed can output to stdout restart commands of the form:
+        restart <role> <id> <conf_key1> <conf_value1> <conf_key2> <conf_value2>
+    This will restart the daemon <role>.<id> with the specified config values once
+    by modifying the conf file with those values, and then replacing the old conf file
+    once the daemon is restarted.
+    This task does not kill a running daemon, it assumes the daemon will abort on an
+    assert specified in the config.
+
+        tasks:
+        - install:
+        - ceph:
+        - restart:
+            exec:
+              client.0:
+                - test_backtraces.py
+
+    """
+    assert isinstance(config, dict), "task kill got invalid config"
+
+    testdir = teuthology.get_testdir(ctx)
+
+    try:
+        assert 'exec' in config, "config requires exec key with <role>: <command> entries"
+        for role, task in config['exec'].iteritems():
+            log.info('restart for role {r}'.format(r=role))
+            (remote,) = ctx.cluster.only(role).remotes.iterkeys()
+            srcdir, restarts = get_tests(ctx, config, role, remote, testdir)
+            log.info('Running command on role %s host %s', role, remote.name)
+            spec = '{spec}'.format(spec=task[0])
+            log.info('Restarts list: %s', restarts)
+            log.info('Spec is %s', spec)
+            to_run = [w for w in restarts if w == task or w.find(spec) != -1]
+            log.info('To run: %s', to_run)
+            for c in to_run:
+                log.info('Running restart script %s...', c)
+                args = [
+                    run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
+                    ]
+                env = config.get('env')
+                if env is not None:
+                    for var, val in env.iteritems():
+                        quoted_val = pipes.quote(val)
+                        env_arg = '{var}={val}'.format(var=var, val=quoted_val)
+                        args.append(run.Raw(env_arg))
+                args.extend([
+                            'adjust-ulimits',
+                            'ceph-coverage',
+                            '{tdir}/archive/coverage'.format(tdir=testdir),
+                            '{srcdir}/{c}'.format(
+                                srcdir=srcdir,
+                                c=c,
+                                ),
+                            ])
+                proc = remote.run(
+                    args=args,
+                    stdout=tor.PIPE,
+                    stdin=tor.PIPE,
+                    stderr=log,
+                    wait=False,
+                    )
+                log.info('waiting for a command from script')
+                while True:
+                    l = proc.stdout.readline()
+                    if not l or l == '':
+                        break
+                    log.debug('script command: {c}'.format(c=l))
+                    ll = l.strip()
+                    cmd = ll.split(' ')
+                    if cmd[0] == "done":
+                        break
+                    assert cmd[0] == 'restart', "script sent invalid command request to kill task"
+                    # cmd should be: restart <role> <id> <conf_key1> <conf_value1> <conf_key2> <conf_value2>
+                    # or to clear, just: restart <role> <id>
+                    restart_daemon(ctx, config, cmd[1], cmd[2], *cmd[3:])
+                    proc.stdin.writelines(['restarted\n'])
+                    proc.stdin.flush()
+                try:
+                    proc.exitstatus.get()
+                except tor.CommandFailedError:
+                    raise Exception('restart task got non-zero exit status from script: {s}'.format(s=c))
+    finally:
+        log.info('Finishing %s on %s...', task, role)
+        remote.run(
+            logger=log.getChild(role),
+            args=[
+                'rm', '-rf', '--', '{tdir}/restarts.list'.format(tdir=testdir), srcdir,
+                ],
+            )
diff --git a/tasks/rgw.py b/tasks/rgw.py
new file mode 100644 (file)
index 0000000..afa464d
--- /dev/null
@@ -0,0 +1,808 @@
+"""
+rgw routines
+"""
+import argparse
+import contextlib
+import json
+import logging
+import os
+
+from cStringIO import StringIO
+
+from ..orchestra import run
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.task_util.rgw import rgwadmin
+from teuthology.task_util.rados import (rados, create_ec_pool,
+                                        create_replicated_pool,
+                                        create_cache_pool)
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def create_apache_dirs(ctx, config):
+    """
+    Remotely create apache directories.  Delete when finished.
+    """
+    log.info('Creating apache directories...')
+    testdir = teuthology.get_testdir(ctx)
+    for client in config.iterkeys():
+        ctx.cluster.only(client).run(
+            args=[
+                'mkdir',
+                '-p',
+                '{tdir}/apache/htdocs.{client}'.format(tdir=testdir,
+                                                       client=client),
+                '{tdir}/apache/tmp.{client}/fastcgi_sock'.format(
+                    tdir=testdir,
+                    client=client),
+                run.Raw('&&'),
+                'mkdir',
+                '{tdir}/archive/apache.{client}'.format(tdir=testdir,
+                                                        client=client),
+                ],
+            )
+    try:
+        yield
+    finally:
+        log.info('Cleaning up apache directories...')
+        for client in config.iterkeys():
+            ctx.cluster.only(client).run(
+                args=[
+                    'rm',
+                    '-rf',
+                    '{tdir}/apache/tmp.{client}'.format(tdir=testdir,
+                                                        client=client),
+                    run.Raw('&&'),
+                    'rmdir',
+                    '{tdir}/apache/htdocs.{client}'.format(tdir=testdir,
+                                                           client=client),
+                    ],
+                )
+
+        for client in config.iterkeys():
+            ctx.cluster.only(client).run(
+                args=[
+                    'rmdir',
+                    '{tdir}/apache'.format(tdir=testdir),
+                    ],
+                check_status=False,  # only need to remove once per host
+                )
+
+
+@contextlib.contextmanager
+def ship_apache_configs(ctx, config, role_endpoints):
+    """
+    Ship apache config and rgw.fgci to all clients.  Clean up on termination
+    """
+    assert isinstance(config, dict)
+    assert isinstance(role_endpoints, dict)
+    testdir = teuthology.get_testdir(ctx)
+    log.info('Shipping apache config and rgw.fcgi...')
+    src = os.path.join(os.path.dirname(__file__), 'apache.conf.template')
+    for client, conf in config.iteritems():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        system_type = teuthology.get_system_type(remote)
+        if not conf:
+            conf = {}
+        idle_timeout = conf.get('idle_timeout', 30)
+        if system_type == 'deb':
+            mod_path = '/usr/lib/apache2/modules'
+            print_continue = 'on'
+        else:
+            mod_path = '/usr/lib64/httpd/modules'
+            print_continue = 'off'
+        host, port = role_endpoints[client]
+        with file(src, 'rb') as f:
+            conf = f.read().format(
+                testdir=testdir,
+                mod_path=mod_path,
+                print_continue=print_continue,
+                host=host,
+                port=port,
+                client=client,
+                idle_timeout=idle_timeout,
+                )
+            teuthology.write_file(
+                remote=remote,
+                path='{tdir}/apache/apache.{client}.conf'.format(
+                    tdir=testdir,
+                    client=client),
+                data=conf,
+                )
+        teuthology.write_file(
+            remote=remote,
+            path='{tdir}/apache/htdocs.{client}/rgw.fcgi'.format(
+                tdir=testdir,
+                client=client),
+            data="""#!/bin/sh
+ulimit -c unlimited
+exec radosgw -f -n {client} -k /etc/ceph/ceph.{client}.keyring --rgw-socket-path {tdir}/apache/tmp.{client}/fastcgi_sock/rgw_sock
+
+""".format(tdir=testdir, client=client)
+            )
+        remote.run(
+            args=[
+                'chmod',
+                'a=rx',
+                '{tdir}/apache/htdocs.{client}/rgw.fcgi'.format(tdir=testdir,
+                                                                client=client),
+                ],
+            )
+    try:
+        yield
+    finally:
+        log.info('Removing apache config...')
+        for client in config.iterkeys():
+            ctx.cluster.only(client).run(
+                args=[
+                    'rm',
+                    '-f',
+                    '{tdir}/apache/apache.{client}.conf'.format(tdir=testdir,
+                                                                client=client),
+                    run.Raw('&&'),
+                    'rm',
+                    '-f',
+                    '{tdir}/apache/htdocs.{client}/rgw.fcgi'.format(
+                        tdir=testdir,
+                        client=client),
+                    ],
+                )
+
+
+@contextlib.contextmanager
+def start_rgw(ctx, config):
+    """
+    Start rgw on remote sites.
+    """
+    log.info('Starting rgw...')
+    testdir = teuthology.get_testdir(ctx)
+    for client in config.iterkeys():
+        (remote,) = ctx.cluster.only(client).remotes.iterkeys()
+
+        client_config = config.get(client)
+        if client_config is None:
+            client_config = {}
+        log.info("rgw %s config is %s", client, client_config)
+        id_ = client.split('.', 1)[1]
+        log.info('client {client} is id {id}'.format(client=client, id=id_))
+        cmd_prefix = [
+            'sudo',
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            'daemon-helper',
+            'term',
+            ]
+
+        rgw_cmd = ['radosgw']
+
+        if ctx.rgw.frontend == 'apache':
+            rgw_cmd.extend([
+                '--rgw-socket-path',
+                '{tdir}/apache/tmp.{client}/fastcgi_sock/rgw_sock'.format(
+                    tdir=testdir,
+                    client=client,
+                    ),
+            ])
+        elif ctx.rgw.frontend == 'civetweb':
+            host, port = ctx.rgw.role_endpoints[client]
+            rgw_cmd.extend([
+                '--rgw-frontends',
+                'civetweb port={port}'.format(port=port),
+            ])
+
+        rgw_cmd.extend([
+            '-n', client,
+            '-k', '/etc/ceph/ceph.{client}.keyring'.format(client=client),
+            '--log-file',
+            '/var/log/ceph/rgw.{client}.log'.format(client=client),
+            '--rgw_ops_log_socket_path',
+            '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir,
+                                                     client=client),
+            '--foreground',
+            run.Raw('|'),
+            'sudo',
+            'tee',
+            '/var/log/ceph/rgw.{client}.stdout'.format(tdir=testdir,
+                                                       client=client),
+            run.Raw('2>&1'),
+            ])
+
+        if client_config.get('valgrind'):
+            cmd_prefix = teuthology.get_valgrind_args(
+                testdir,
+                client,
+                cmd_prefix,
+                client_config.get('valgrind')
+                )
+
+        run_cmd = list(cmd_prefix)
+        run_cmd.extend(rgw_cmd)
+
+        ctx.daemons.add_daemon(
+            remote, 'rgw', client,
+            args=run_cmd,
+            logger=log.getChild(client),
+            stdin=run.PIPE,
+            wait=False,
+            )
+
+    try:
+        yield
+    finally:
+        teuthology.stop_daemons_of_type(ctx, 'rgw')
+        for client in config.iterkeys():
+            ctx.cluster.only(client).run(
+                args=[
+                    'rm',
+                    '-f',
+                    '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir,
+                                                             client=client),
+                    ],
+                )
+
+
+@contextlib.contextmanager
+def start_apache(ctx, config):
+    """
+    Start apache on remote sites.
+    """
+    log.info('Starting apache...')
+    testdir = teuthology.get_testdir(ctx)
+    apaches = {}
+    for client in config.iterkeys():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        system_type = teuthology.get_system_type(remote)
+        if system_type == 'deb':
+            apache_name = 'apache2'
+        else:
+            apache_name = '/usr/sbin/httpd.worker'
+        proc = remote.run(
+            args=[
+                'adjust-ulimits',
+                'daemon-helper',
+                'kill',
+                apache_name,
+                '-X',
+                '-f',
+                '{tdir}/apache/apache.{client}.conf'.format(tdir=testdir,
+                                                            client=client),
+                ],
+            logger=log.getChild(client),
+            stdin=run.PIPE,
+            wait=False,
+            )
+        apaches[client] = proc
+
+    try:
+        yield
+    finally:
+        log.info('Stopping apache...')
+        for client, proc in apaches.iteritems():
+            proc.stdin.close()
+
+        run.wait(apaches.itervalues())
+
+
+def extract_user_info(client_config):
+    """
+    Extract user info from the client config specified.  Returns a dict
+    that includes system key information.
+    """
+    # test if there isn't a system user or if there isn't a name for that
+    # user, return None
+    if ('system user' not in client_config or
+            'name' not in client_config['system user']):
+        return None
+
+    user_info = dict()
+    user_info['system_key'] = dict(
+        user=client_config['system user']['name'],
+        access_key=client_config['system user']['access key'],
+        secret_key=client_config['system user']['secret key'],
+        )
+    return user_info
+
+
+def extract_zone_info(ctx, client, client_config):
+    """
+    Get zone information.
+    :param client: dictionary of client information
+    :param client_config: dictionary of client configuration information
+    :returns: zone extracted from client and client_config information
+    """
+    ceph_config = ctx.ceph.conf.get('global', {})
+    ceph_config.update(ctx.ceph.conf.get('client', {}))
+    ceph_config.update(ctx.ceph.conf.get(client, {}))
+    for key in ['rgw zone', 'rgw region', 'rgw zone root pool']:
+        assert key in ceph_config, \
+            'ceph conf must contain {key} for {client}'.format(key=key,
+                                                               client=client)
+    region = ceph_config['rgw region']
+    zone = ceph_config['rgw zone']
+    zone_info = dict()
+    for key in ['rgw control pool', 'rgw gc pool', 'rgw log pool',
+                'rgw intent log pool', 'rgw usage log pool',
+                'rgw user keys pool', 'rgw user email pool',
+                'rgw user swift pool', 'rgw user uid pool',
+                'rgw domain root']:
+        new_key = key.split(' ', 1)[1]
+        new_key = new_key.replace(' ', '_')
+
+        if key in ceph_config:
+            value = ceph_config[key]
+            log.debug('{key} specified in ceph_config ({val})'.format(
+                key=key, val=value))
+            zone_info[new_key] = value
+        else:
+            zone_info[new_key] = '.' + region + '.' + zone + '.' + new_key
+
+    index_pool = '.' + region + '.' + zone + '.' + 'index_pool'
+    data_pool = '.' + region + '.' + zone + '.' + 'data_pool'
+    data_extra_pool = '.' + region + '.' + zone + '.' + 'data_extra_pool'
+
+    zone_info['placement_pools'] = [{'key': 'default_placement',
+                                     'val': {'index_pool': index_pool,
+                                             'data_pool': data_pool,
+                                             'data_extra_pool': data_extra_pool}
+                                     }]
+
+    # these keys are meant for the zones argument in the region info.  We
+    # insert them into zone_info with a different format and then remove them
+    # in the fill_in_endpoints() method
+    for key in ['rgw log meta', 'rgw log data']:
+        if key in ceph_config:
+            zone_info[key] = ceph_config[key]
+
+    # these keys are meant for the zones argument in the region info.  We
+    # insert them into zone_info with a different format and then remove them
+    # in the fill_in_endpoints() method
+    for key in ['rgw log meta', 'rgw log data']:
+        if key in ceph_config:
+            zone_info[key] = ceph_config[key]
+
+    return region, zone, zone_info
+
+
+def extract_region_info(region, region_info):
+    """
+    Extract region information from the region_info parameter, using get
+    to set default values.
+
+    :param region: name of the region
+    :param region_info: region information (in dictionary form).
+    :returns: dictionary of region information set from region_info, using
+            default values for missing fields.
+    """
+    assert isinstance(region_info['zones'], list) and region_info['zones'], \
+        'zones must be a non-empty list'
+    return dict(
+        name=region,
+        api_name=region_info.get('api name', region),
+        is_master=region_info.get('is master', False),
+        log_meta=region_info.get('log meta', False),
+        log_data=region_info.get('log data', False),
+        master_zone=region_info.get('master zone', region_info['zones'][0]),
+        placement_targets=region_info.get('placement targets',
+                                          [{'name': 'default_placement',
+                                            'tags': []}]),
+        default_placement=region_info.get('default placement',
+                                          'default_placement'),
+        )
+
+
+def assign_ports(ctx, config):
+    """
+    Assign port numberst starting with port 7280.
+    """
+    port = 7280
+    role_endpoints = {}
+    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
+        for role in roles_for_host:
+            if role in config:
+                role_endpoints[role] = (remote.name.split('@')[1], port)
+                port += 1
+
+    return role_endpoints
+
+
+def fill_in_endpoints(region_info, role_zones, role_endpoints):
+    """
+    Iterate through the list of role_endpoints, filling in zone information
+
+    :param region_info: region data
+    :param role_zones: region and zone information.
+    :param role_endpoints: endpoints being used
+    """
+    for role, (host, port) in role_endpoints.iteritems():
+        region, zone, zone_info, _ = role_zones[role]
+        host, port = role_endpoints[role]
+        endpoint = 'http://{host}:{port}/'.format(host=host, port=port)
+        # check if the region specified under client actually exists
+        # in region_info (it should, if properly configured).
+        # If not, throw a reasonable error
+        if region not in region_info:
+            raise Exception(
+                'Region: {region} was specified but no corresponding'
+                ' entry was found under \'regions\''.format(region=region))
+
+        region_conf = region_info[region]
+        region_conf.setdefault('endpoints', [])
+        region_conf['endpoints'].append(endpoint)
+
+        # this is the payload for the 'zones' field in the region field
+        zone_payload = dict()
+        zone_payload['endpoints'] = [endpoint]
+        zone_payload['name'] = zone
+
+        # Pull the log meta and log data settings out of zone_info, if they
+        # exist, then pop them as they don't actually belong in the zone info
+        for key in ['rgw log meta', 'rgw log data']:
+            new_key = key.split(' ', 1)[1]
+            new_key = new_key.replace(' ', '_')
+
+            if key in zone_info:
+                value = zone_info.pop(key)
+            else:
+                value = 'false'
+
+            zone_payload[new_key] = value
+
+        region_conf.setdefault('zones', [])
+        region_conf['zones'].append(zone_payload)
+
+
+@contextlib.contextmanager
+def configure_users(ctx, config, everywhere=False):
+    """
+    Create users by remotely running rgwadmin commands using extracted
+    user information.
+    """
+    log.info('Configuring users...')
+
+    # extract the user info and append it to the payload tuple for the given
+    # client
+    for client, c_config in config.iteritems():
+        if not c_config:
+            continue
+        user_info = extract_user_info(c_config)
+        if not user_info:
+            continue
+
+        # For data sync the master zones and regions must have the
+        # system users of the secondary zones. To keep this simple,
+        # just create the system users on every client if regions are
+        # configured.
+        clients_to_create_as = [client]
+        if everywhere:
+            clients_to_create_as = config.keys()
+        for client_name in clients_to_create_as:
+            log.debug('Creating user {user} on {client}'.format(
+                      user=user_info['system_key']['user'], client=client))
+            rgwadmin(ctx, client_name,
+                     cmd=[
+                         'user', 'create',
+                         '--uid', user_info['system_key']['user'],
+                         '--access-key', user_info['system_key']['access_key'],
+                         '--secret', user_info['system_key']['secret_key'],
+                         '--display-name', user_info['system_key']['user'],
+                         '--system',
+                     ],
+                     check_status=True,
+                     )
+
+    yield
+
+
+@contextlib.contextmanager
+def create_nonregion_pools(ctx, config, regions):
+    """Create replicated or erasure coded data pools for rgw."""
+    if regions:
+        yield
+        return
+
+    log.info('creating data pools')
+    for client in config.keys():
+        (remote,) = ctx.cluster.only(client).remotes.iterkeys()
+        data_pool = '.rgw.buckets'
+        if ctx.rgw.ec_data_pool:
+            create_ec_pool(remote, data_pool, client, 64)
+        else:
+            create_replicated_pool(remote, data_pool, 64)
+        if ctx.rgw.cache_pools:
+            create_cache_pool(remote, data_pool, data_pool + '.cache', 64,
+                              64*1024*1024)
+    yield
+
+
+@contextlib.contextmanager
+def configure_regions_and_zones(ctx, config, regions, role_endpoints):
+    """
+    Configure regions and zones from rados and rgw.
+    """
+    if not regions:
+        log.debug(
+            'In rgw.configure_regions_and_zones() and regions is None. '
+            'Bailing')
+        yield
+        return
+
+    log.info('Configuring regions and zones...')
+
+    log.debug('config is %r', config)
+    log.debug('regions are %r', regions)
+    log.debug('role_endpoints = %r', role_endpoints)
+    # extract the zone info
+    role_zones = dict([(client, extract_zone_info(ctx, client, c_config))
+                       for client, c_config in config.iteritems()])
+    log.debug('roles_zones = %r', role_zones)
+
+    # extract the user info and append it to the payload tuple for the given
+    # client
+    for client, c_config in config.iteritems():
+        if not c_config:
+            user_info = None
+        else:
+            user_info = extract_user_info(c_config)
+
+        (region, zone, zone_info) = role_zones[client]
+        role_zones[client] = (region, zone, zone_info, user_info)
+
+    region_info = dict([
+        (region_name, extract_region_info(region_name, r_config))
+        for region_name, r_config in regions.iteritems()])
+
+    fill_in_endpoints(region_info, role_zones, role_endpoints)
+
+    # clear out the old defaults
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+    # removing these objects from .rgw.root and the per-zone root pools
+    # may or may not matter
+    rados(ctx, mon,
+          cmd=['-p', '.rgw.root', 'rm', 'region_info.default'])
+    rados(ctx, mon,
+          cmd=['-p', '.rgw.root', 'rm', 'zone_info.default'])
+
+    for client in config.iterkeys():
+        for role, (_, zone, zone_info, user_info) in role_zones.iteritems():
+            rados(ctx, mon,
+                  cmd=['-p', zone_info['domain_root'],
+                       'rm', 'region_info.default'])
+            rados(ctx, mon,
+                  cmd=['-p', zone_info['domain_root'],
+                       'rm', 'zone_info.default'])
+
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            for pool_info in zone_info['placement_pools']:
+                remote.run(args=['ceph', 'osd', 'pool', 'create',
+                                 pool_info['val']['index_pool'], '64', '64'])
+                if ctx.rgw.ec_data_pool:
+                    create_ec_pool(remote, pool_info['val']['data_pool'],
+                                   zone, 64)
+                else:
+                    create_replicated_pool(
+                        remote, pool_info['val']['data_pool'],
+                        64)
+
+            rgwadmin(ctx, client,
+                     cmd=['-n', client, 'zone', 'set', '--rgw-zone', zone],
+                     stdin=StringIO(json.dumps(dict(
+                         zone_info.items() + user_info.items()))),
+                     check_status=True)
+
+        for region, info in region_info.iteritems():
+            region_json = json.dumps(info)
+            log.debug('region info is: %s', region_json)
+            rgwadmin(ctx, client,
+                     cmd=['-n', client, 'region', 'set'],
+                     stdin=StringIO(region_json),
+                     check_status=True)
+            if info['is_master']:
+                rgwadmin(ctx, client,
+                         cmd=['-n', client,
+                              'region', 'default',
+                              '--rgw-region', region],
+                         check_status=True)
+
+        rgwadmin(ctx, client, cmd=['-n', client, 'regionmap', 'update'])
+    yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Either use configure apache to run a rados gateway, or use the built-in
+    civetweb server.
+    Only one should be run per machine, since it uses a hard-coded port for
+    now.
+
+    For example, to run rgw on all clients::
+
+        tasks:
+        - ceph:
+        - rgw:
+
+    To only run on certain clients::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0, client.3]
+
+    or
+
+        tasks:
+        - ceph:
+        - rgw:
+            client.0:
+            client.3:
+
+    You can adjust the idle timeout for fastcgi (default is 30 seconds):
+
+        tasks:
+        - ceph:
+        - rgw:
+            client.0:
+              idle_timeout: 90
+
+    To run radosgw through valgrind:
+
+        tasks:
+        - ceph:
+        - rgw:
+            client.0:
+              valgrind: [--tool=memcheck]
+            client.3:
+              valgrind: [--tool=memcheck]
+
+    To use civetweb instead of apache:
+
+        tasks:
+        - ceph:
+        - rgw:
+          - client.0
+        overrides:
+          rgw:
+            frontend: civetweb
+
+    Note that without a modified fastcgi module e.g. with the default
+    one on CentOS, you must have rgw print continue = false in ceph.conf::
+
+        tasks:
+        - ceph:
+            conf:
+              global:
+                rgw print continue: false
+        - rgw: [client.0]
+
+    To run rgws for multiple regions or zones, describe the regions
+    and their zones in a regions section. The endpoints will be
+    generated by this task. Each client must have a region, zone,
+    and pools assigned in ceph.conf::
+
+        tasks:
+        - install:
+        - ceph:
+            conf:
+              client.0:
+                rgw region: foo
+                rgw zone: foo-1
+                rgw region root pool: .rgw.rroot.foo
+                rgw zone root pool: .rgw.zroot.foo
+                rgw log meta: true
+                rgw log data: true
+              client.1:
+                rgw region: bar
+                rgw zone: bar-master
+                rgw region root pool: .rgw.rroot.bar
+                rgw zone root pool: .rgw.zroot.bar
+                rgw log meta: true
+                rgw log data: true
+              client.2:
+                rgw region: bar
+                rgw zone: bar-secondary
+                rgw region root pool: .rgw.rroot.bar
+                rgw zone root pool: .rgw.zroot.bar-secondary
+        - rgw:
+            ec-data-pool: true
+            regions:
+              foo:
+                api name: api_name # default: region name
+                is master: true    # default: false
+                master zone: foo-1 # default: first zone
+                zones: [foo-1]
+                log meta: true
+                log data: true
+                placement targets: [target1, target2] # default: []
+                default placement: target2            # default: ''
+              bar:
+                api name: bar-api
+                zones: [bar-master, bar-secondary]
+            client.0:
+              system user:
+                name: foo-system
+                access key: X2IYPSTY1072DDY1SJMC
+                secret key: YIMHICpPvT+MhLTbSsiBJ1jQF15IFvJA8tgwJEcm
+            client.1:
+              system user:
+                name: bar1
+                access key: Y2IYPSTY1072DDY1SJMC
+                secret key: XIMHICpPvT+MhLTbSsiBJ1jQF15IFvJA8tgwJEcm
+            client.2:
+              system user:
+                name: bar2
+                access key: Z2IYPSTY1072DDY1SJMC
+                secret key: ZIMHICpPvT+MhLTbSsiBJ1jQF15IFvJA8tgwJEcm
+    """
+    if config is None:
+        config = dict(('client.{id}'.format(id=id_), None)
+                      for id_ in teuthology.all_roles_of_type(
+                          ctx.cluster, 'client'))
+    elif isinstance(config, list):
+        config = dict((name, None) for name in config)
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('rgw', {}))
+
+    regions = {}
+    if 'regions' in config:
+        # separate region info so only clients are keys in config
+        regions = config['regions']
+        del config['regions']
+
+    role_endpoints = assign_ports(ctx, config)
+    ctx.rgw = argparse.Namespace()
+    ctx.rgw.role_endpoints = role_endpoints
+    # stash the region info for later, since it was deleted from the config
+    # structure
+    ctx.rgw.regions = regions
+
+    ctx.rgw.ec_data_pool = False
+    if 'ec-data-pool' in config:
+        ctx.rgw.ec_data_pool = bool(config['ec-data-pool'])
+        del config['ec-data-pool']
+    ctx.rgw.cache_pools = False
+    if 'cache-pools' in config:
+        ctx.rgw.cache_pools = bool(config['cache-pools'])
+        del config['cache-pools']
+
+    ctx.rgw.frontend = 'apache'
+    if 'frontend' in config:
+        ctx.rgw.frontend = config['frontend']
+        del config['frontend']
+
+    subtasks = [
+        lambda: configure_regions_and_zones(
+            ctx=ctx,
+            config=config,
+            regions=regions,
+            role_endpoints=role_endpoints,
+            ),
+        lambda: configure_users(
+            ctx=ctx,
+            config=config,
+            everywhere=bool(regions),
+            ),
+        lambda: create_nonregion_pools(
+            ctx=ctx, config=config, regions=regions),
+    ]
+    if ctx.rgw.frontend == 'apache':
+        subtasks.insert(0, lambda: create_apache_dirs(ctx=ctx, config=config))
+        subtasks.extend([
+            lambda: ship_apache_configs(ctx=ctx, config=config,
+                                        role_endpoints=role_endpoints),
+            lambda: start_rgw(ctx=ctx, config=config),
+            lambda: start_apache(ctx=ctx, config=config),
+        ])
+    elif ctx.rgw.frontend == 'civetweb':
+        subtasks.extend([
+            lambda: start_rgw(ctx=ctx, config=config),
+        ])
+    else:
+        raise ValueError("frontend must be 'apache' or 'civetweb'")
+
+    log.info("Using %s as radosgw frontend", ctx.rgw.frontend)
+    with contextutil.nested(*subtasks):
+        yield
diff --git a/tasks/rgw_logsocket.py b/tasks/rgw_logsocket.py
new file mode 100644 (file)
index 0000000..6f49b00
--- /dev/null
@@ -0,0 +1,161 @@
+"""
+rgw s3tests logging wrappers
+"""
+from cStringIO import StringIO
+from configobj import ConfigObj
+import contextlib
+import logging
+import s3tests
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def download(ctx, config):
+    """
+    Run s3tests download function
+    """
+    return s3tests.download(ctx, config)
+
+def _config_user(s3tests_conf, section, user):
+    """
+    Run s3tests user config function
+    """
+    return s3tests._config_user(s3tests_conf, section, user)
+
+@contextlib.contextmanager
+def create_users(ctx, config):
+    """
+    Run s3tests user create function
+    """
+    return s3tests.create_users(ctx, config)
+
+@contextlib.contextmanager
+def configure(ctx, config):
+    """
+    Run s3tests user configure function
+    """
+    return s3tests.configure(ctx, config)
+
+@contextlib.contextmanager
+def run_tests(ctx, config):
+    """
+    Run remote netcat tests
+    """
+    assert isinstance(config, dict)
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.iteritems():
+        client_config['extra_args'] = [
+            's3tests.functional.test_s3:test_bucket_list_return_data',
+        ]
+#        args = [
+#                'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
+#                '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir),
+#                '-w',
+#                '{tdir}/s3-tests'.format(tdir=testdir),
+#                '-v',
+#              's3tests.functional.test_s3:test_bucket_list_return_data',
+#                ]
+#        if client_config is not None and 'extra_args' in client_config:
+#            args.extend(client_config['extra_args'])
+#
+#        ctx.cluster.only(client).run(
+#            args=args,
+#            )
+
+    s3tests.run_tests(ctx, config)
+
+    netcat_out = StringIO()
+
+    for client, client_config in config.iteritems():
+        ctx.cluster.only(client).run(
+            args = [
+                'netcat',
+                '-w', '5',
+                '-U', '{tdir}/rgw.opslog.sock'.format(tdir=testdir),
+                ],
+             stdout = netcat_out,
+        )
+
+        out = netcat_out.getvalue()
+
+        assert len(out) > 100
+
+        log.info('Received', out)
+
+    yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run some s3-tests suite against rgw, verify opslog socket returns data
+
+    Must restrict testing to a particular client::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3tests: [client.0]
+
+    To pass extra arguments to nose (e.g. to run a certain test)::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3tests:
+            client.0:
+              extra_args: ['test_s3:test_object_acl_grand_public_read']
+            client.1:
+              extra_args: ['--exclude', 'test_100_continue']
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task s3tests only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+    clients = config.keys()
+
+    overrides = ctx.config.get('overrides', {})
+    # merge each client section, not the top level.
+    for (client, cconf) in config.iteritems():
+        teuthology.deep_merge(cconf, overrides.get('rgw-logsocket', {}))
+
+    log.debug('config is %s', config)
+
+    s3tests_conf = {}
+    for client in clients:
+        s3tests_conf[client] = ConfigObj(
+            indent_type='',
+            infile={
+                'DEFAULT':
+                    {
+                    'port'      : 7280,
+                    'is_secure' : 'no',
+                    },
+                'fixtures' : {},
+                's3 main'  : {},
+                's3 alt'   : {},
+                }
+            )
+
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=config),
+        lambda: create_users(ctx=ctx, config=dict(
+                clients=clients,
+                s3tests_conf=s3tests_conf,
+                )),
+        lambda: configure(ctx=ctx, config=dict(
+                clients=config,
+                s3tests_conf=s3tests_conf,
+                )),
+        lambda: run_tests(ctx=ctx, config=config),
+        ):
+        yield
diff --git a/tasks/s3readwrite.py b/tasks/s3readwrite.py
new file mode 100644 (file)
index 0000000..476015d
--- /dev/null
@@ -0,0 +1,346 @@
+"""
+Run rgw s3 readwite tests
+"""
+from cStringIO import StringIO
+import base64
+import contextlib
+import logging
+import os
+import random
+import string
+import yaml
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from ..config import config as teuth_config
+from ..orchestra import run
+from ..orchestra.connection import split_user
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def download(ctx, config):
+    """
+    Download the s3 tests from the git builder.
+    Remove downloaded s3 file upon exit.
+    
+    The context passed in should be identical to the context
+    passed in to the main task.
+    """
+    assert isinstance(config, dict)
+    log.info('Downloading s3-tests...')
+    testdir = teuthology.get_testdir(ctx)
+    for (client, cconf) in config.items():
+        branch = cconf.get('force-branch', None)
+        if not branch:
+            branch = cconf.get('branch', 'master')
+        sha1 = cconf.get('sha1')
+        ctx.cluster.only(client).run(
+            args=[
+                'git', 'clone',
+                '-b', branch,
+                teuth_config.ceph_git_base_url + 's3-tests.git',
+                '{tdir}/s3-tests'.format(tdir=testdir),
+                ],
+            )
+        if sha1 is not None:
+            ctx.cluster.only(client).run(
+                args=[
+                    'cd', '{tdir}/s3-tests'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    'git', 'reset', '--hard', sha1,
+                    ],
+                )
+    try:
+        yield
+    finally:
+        log.info('Removing s3-tests...')
+        testdir = teuthology.get_testdir(ctx)
+        for client in config:
+            ctx.cluster.only(client).run(
+                args=[
+                    'rm',
+                    '-rf',
+                    '{tdir}/s3-tests'.format(tdir=testdir),
+                    ],
+                )
+
+
+def _config_user(s3tests_conf, section, user):
+    """
+    Configure users for this section by stashing away keys, ids, and
+    email addresses.
+    """
+    s3tests_conf[section].setdefault('user_id', user)
+    s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
+    s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
+    s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.uppercase) for i in xrange(20)))
+    s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40)))
+
+@contextlib.contextmanager
+def create_users(ctx, config):
+    """
+    Create a default s3 user.
+    """
+    assert isinstance(config, dict)
+    log.info('Creating rgw users...')
+    testdir = teuthology.get_testdir(ctx)
+    users = {'s3': 'foo'}
+    cached_client_user_names = dict()
+    for client in config['clients']:
+        cached_client_user_names[client] = dict()
+        s3tests_conf = config['s3tests_conf'][client]
+        s3tests_conf.setdefault('readwrite', {})
+        s3tests_conf['readwrite'].setdefault('bucket', 'rwtest-' + client + '-{random}-')
+        s3tests_conf['readwrite'].setdefault('readers', 10)
+        s3tests_conf['readwrite'].setdefault('writers', 3)
+        s3tests_conf['readwrite'].setdefault('duration', 300)
+        s3tests_conf['readwrite'].setdefault('files', {})
+        rwconf = s3tests_conf['readwrite']
+        rwconf['files'].setdefault('num', 10)
+        rwconf['files'].setdefault('size', 2000)
+        rwconf['files'].setdefault('stddev', 500)
+        for section, user in users.iteritems():
+            _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client))
+            log.debug('creating user {user} on {client}'.format(user=s3tests_conf[section]['user_id'],
+                                                                client=client))
+
+            # stash the 'delete_user' flag along with user name for easier cleanup
+            delete_this_user = True
+            if 'delete_user' in s3tests_conf['s3']:
+                delete_this_user = s3tests_conf['s3']['delete_user']
+                log.debug('delete_user set to {flag} for {client}'.format(flag=delete_this_user, client=client))
+            cached_client_user_names[client][section+user] = (s3tests_conf[section]['user_id'], delete_this_user)
+
+            # skip actual user creation if the create_user flag is set to false for this client
+            if 'create_user' in s3tests_conf['s3'] and s3tests_conf['s3']['create_user'] == False:
+                log.debug('create_user set to False, skipping user creation for {client}'.format(client=client))
+                continue
+            else:
+                ctx.cluster.only(client).run(
+                    args=[
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir),
+                        'radosgw-admin',
+                        '-n', client,
+                        'user', 'create',
+                        '--uid', s3tests_conf[section]['user_id'],
+                        '--display-name', s3tests_conf[section]['display_name'],
+                        '--access-key', s3tests_conf[section]['access_key'],
+                        '--secret', s3tests_conf[section]['secret_key'],
+                        '--email', s3tests_conf[section]['email'],
+                    ],
+                )
+    try:
+        yield
+    finally:
+        for client in config['clients']:
+            for section, user in users.iteritems():
+                #uid = '{user}.{client}'.format(user=user, client=client)
+                real_uid, delete_this_user  = cached_client_user_names[client][section+user]
+                if delete_this_user:
+                    ctx.cluster.only(client).run(
+                        args=[
+                            'adjust-ulimits',
+                            'ceph-coverage',
+                            '{tdir}/archive/coverage'.format(tdir=testdir),
+                            'radosgw-admin',
+                            '-n', client,
+                            'user', 'rm',
+                            '--uid', real_uid,
+                            '--purge-data',
+                            ],
+                        )
+                else:
+                    log.debug('skipping delete for user {uid} on {client}'.format(uid=real_uid, client=client))
+
+@contextlib.contextmanager
+def configure(ctx, config):
+    """
+    Configure the s3-tests.  This includes the running of the
+    bootstrap code and the updating of local conf files.
+    """
+    assert isinstance(config, dict)
+    log.info('Configuring s3-readwrite-tests...')
+    for client, properties in config['clients'].iteritems():
+        s3tests_conf = config['s3tests_conf'][client]
+        if properties is not None and 'rgw_server' in properties:
+            host = None
+            for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']):
+                log.info('roles: ' + str(roles))
+                log.info('target: ' + str(target))
+                if properties['rgw_server'] in roles:
+                    _, host = split_user(target)
+            assert host is not None, "Invalid client specified as the rgw_server"
+            s3tests_conf['s3']['host'] = host
+        else:
+            s3tests_conf['s3']['host'] = 'localhost'
+
+        def_conf = s3tests_conf['DEFAULT']
+        s3tests_conf['s3'].setdefault('port', def_conf['port'])
+        s3tests_conf['s3'].setdefault('is_secure', def_conf['is_secure'])
+
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        remote.run(
+            args=[
+                'cd',
+                '{tdir}/s3-tests'.format(tdir=teuthology.get_testdir(ctx)),
+                run.Raw('&&'),
+                './bootstrap',
+                ],
+            )
+        conf_fp = StringIO()
+        conf = dict(
+                        s3=s3tests_conf['s3'],
+                        readwrite=s3tests_conf['readwrite'],
+                    )
+        yaml.safe_dump(conf, conf_fp, default_flow_style=False)
+        teuthology.write_file(
+            remote=remote,
+            path='{tdir}/archive/s3readwrite.{client}.config.yaml'.format(tdir=teuthology.get_testdir(ctx), client=client),
+            data=conf_fp.getvalue(),
+            )
+    yield
+
+
+@contextlib.contextmanager
+def run_tests(ctx, config):
+    """
+    Run the s3readwrite tests after everything is set up.
+
+    :param ctx: Context passed to task
+    :param config: specific configuration information
+    """
+    assert isinstance(config, dict)
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.iteritems():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        conf = teuthology.get_file(remote, '{tdir}/archive/s3readwrite.{client}.config.yaml'.format(tdir=testdir, client=client))
+        args = [
+                '{tdir}/s3-tests/virtualenv/bin/s3tests-test-readwrite'.format(tdir=testdir),
+                ]
+        if client_config is not None and 'extra_args' in client_config:
+            args.extend(client_config['extra_args'])
+
+        ctx.cluster.only(client).run(
+            args=args,
+            stdin=conf,
+            )
+    yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run the s3tests-test-readwrite suite against rgw.
+
+    To run all tests on all clients::
+
+        tasks:
+        - ceph:
+        - rgw:
+        - s3readwrite:
+
+    To restrict testing to particular clients::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3readwrite: [client.0]
+
+    To run against a server on client.1::
+
+        tasks:
+        - ceph:
+        - rgw: [client.1]
+        - s3readwrite:
+            client.0:
+              rgw_server: client.1
+
+    To pass extra test arguments
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3readwrite:
+            client.0:
+              readwrite:
+                bucket: mybucket
+                readers: 10
+                writers: 3
+                duration: 600
+                files:
+                  num: 10
+                  size: 2000
+                  stddev: 500
+            client.1:
+              ...
+
+    To override s3 configuration
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3readwrite:
+            client.0:
+              s3:
+                user_id: myuserid
+                display_name: myname
+                email: my@email
+                access_key: myaccesskey
+                secret_key: mysecretkey
+
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task s3tests only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+    clients = config.keys()
+
+    overrides = ctx.config.get('overrides', {})
+    # merge each client section, not the top level.
+    for client in config.iterkeys():
+        if not config[client]:
+            config[client] = {}
+        teuthology.deep_merge(config[client], overrides.get('s3readwrite', {}))
+
+    log.debug('in s3readwrite, config is %s', config)
+
+    s3tests_conf = {}
+    for client in clients:
+        if config[client] is None:
+            config[client] = {}
+        config[client].setdefault('s3', {})
+        config[client].setdefault('readwrite', {})
+
+        s3tests_conf[client] = ({
+                'DEFAULT':
+                    {
+                    'port'      : 7280,
+                    'is_secure' : False,
+                    },
+                'readwrite' : config[client]['readwrite'],
+                's3'  : config[client]['s3'],
+                })
+
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=config),
+        lambda: create_users(ctx=ctx, config=dict(
+                clients=clients,
+                s3tests_conf=s3tests_conf,
+                )),
+        lambda: configure(ctx=ctx, config=dict(
+                clients=config,
+                s3tests_conf=s3tests_conf,
+                )),
+        lambda: run_tests(ctx=ctx, config=config),
+        ):
+        pass
+    yield
diff --git a/tasks/s3roundtrip.py b/tasks/s3roundtrip.py
new file mode 100644 (file)
index 0000000..5a7093d
--- /dev/null
@@ -0,0 +1,302 @@
+"""
+Run rgw roundtrip message tests
+"""
+from cStringIO import StringIO
+import base64
+import contextlib
+import logging
+import os
+import random
+import string
+import yaml
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from ..config import config as teuth_config
+from ..orchestra import run
+from ..orchestra.connection import split_user
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def download(ctx, config):
+    """
+    Download the s3 tests from the git builder.
+    Remove downloaded s3 file upon exit.
+    
+    The context passed in should be identical to the context
+    passed in to the main task.
+    """
+    assert isinstance(config, list)
+    log.info('Downloading s3-tests...')
+    testdir = teuthology.get_testdir(ctx)
+    for client in config:
+        ctx.cluster.only(client).run(
+            args=[
+                'git', 'clone',
+                teuth_config.ceph_git_base_url + 's3-tests.git',
+                '{tdir}/s3-tests'.format(tdir=testdir),
+                ],
+            )
+    try:
+        yield
+    finally:
+        log.info('Removing s3-tests...')
+        for client in config:
+            ctx.cluster.only(client).run(
+                args=[
+                    'rm',
+                    '-rf',
+                    '{tdir}/s3-tests'.format(tdir=testdir),
+                    ],
+                )
+
+def _config_user(s3tests_conf, section, user):
+    """
+    Configure users for this section by stashing away keys, ids, and
+    email addresses.
+    """
+    s3tests_conf[section].setdefault('user_id', user)
+    s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
+    s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
+    s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.uppercase) for i in xrange(20)))
+    s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40)))
+
+@contextlib.contextmanager
+def create_users(ctx, config):
+    """
+    Create a default s3 user.
+    """
+    assert isinstance(config, dict)
+    log.info('Creating rgw users...')
+    testdir = teuthology.get_testdir(ctx)
+    users = {'s3': 'foo'}
+    for client in config['clients']:
+        s3tests_conf = config['s3tests_conf'][client]
+        s3tests_conf.setdefault('roundtrip', {})
+        s3tests_conf['roundtrip'].setdefault('bucket', 'rttest-' + client + '-{random}-')
+        s3tests_conf['roundtrip'].setdefault('readers', 10)
+        s3tests_conf['roundtrip'].setdefault('writers', 3)
+        s3tests_conf['roundtrip'].setdefault('duration', 300)
+        s3tests_conf['roundtrip'].setdefault('files', {})
+        rtconf = s3tests_conf['roundtrip']
+        rtconf['files'].setdefault('num', 10)
+        rtconf['files'].setdefault('size', 2000)
+        rtconf['files'].setdefault('stddev', 500)
+        for section, user in [('s3', 'foo')]:
+            _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client))
+            ctx.cluster.only(client).run(
+                args=[
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'radosgw-admin',
+                    '-n', client,
+                    'user', 'create',
+                    '--uid', s3tests_conf[section]['user_id'],
+                    '--display-name', s3tests_conf[section]['display_name'],
+                    '--access-key', s3tests_conf[section]['access_key'],
+                    '--secret', s3tests_conf[section]['secret_key'],
+                    '--email', s3tests_conf[section]['email'],
+                ],
+            )
+    try:
+        yield
+    finally:
+        for client in config['clients']:
+            for user in users.itervalues():
+                uid = '{user}.{client}'.format(user=user, client=client)
+                ctx.cluster.only(client).run(
+                    args=[
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir),
+                        'radosgw-admin',
+                        '-n', client,
+                        'user', 'rm',
+                        '--uid', uid,
+                        '--purge-data',
+                        ],
+                    )
+
+@contextlib.contextmanager
+def configure(ctx, config):
+    """
+    Configure the s3-tests.  This includes the running of the
+    bootstrap code and the updating of local conf files.
+    """
+    assert isinstance(config, dict)
+    log.info('Configuring s3-roundtrip-tests...')
+    testdir = teuthology.get_testdir(ctx)
+    for client, properties in config['clients'].iteritems():
+        s3tests_conf = config['s3tests_conf'][client]
+        if properties is not None and 'rgw_server' in properties:
+            host = None
+            for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']):
+                log.info('roles: ' + str(roles))
+                log.info('target: ' + str(target))
+                if properties['rgw_server'] in roles:
+                    _, host = split_user(target)
+            assert host is not None, "Invalid client specified as the rgw_server"
+            s3tests_conf['s3']['host'] = host
+        else:
+            s3tests_conf['s3']['host'] = 'localhost'
+
+        def_conf = s3tests_conf['DEFAULT']
+        s3tests_conf['s3'].setdefault('port', def_conf['port'])
+        s3tests_conf['s3'].setdefault('is_secure', def_conf['is_secure'])
+
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        remote.run(
+            args=[
+                'cd',
+                '{tdir}/s3-tests'.format(tdir=testdir),
+                run.Raw('&&'),
+                './bootstrap',
+                ],
+            )
+        conf_fp = StringIO()
+        conf = dict(
+                        s3=s3tests_conf['s3'],
+                        roundtrip=s3tests_conf['roundtrip'],
+                    )
+        yaml.safe_dump(conf, conf_fp, default_flow_style=False)
+        teuthology.write_file(
+            remote=remote,
+            path='{tdir}/archive/s3roundtrip.{client}.config.yaml'.format(tdir=testdir, client=client),
+            data=conf_fp.getvalue(),
+            )
+    yield
+
+
+@contextlib.contextmanager
+def run_tests(ctx, config):
+    """
+    Run the s3 roundtrip after everything is set up.
+
+    :param ctx: Context passed to task
+    :param config: specific configuration information
+    """
+    assert isinstance(config, dict)
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.iteritems():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        conf = teuthology.get_file(remote, '{tdir}/archive/s3roundtrip.{client}.config.yaml'.format(tdir=testdir, client=client))
+        args = [
+                '{tdir}/s3-tests/virtualenv/bin/s3tests-test-roundtrip'.format(tdir=testdir),
+                ]
+        if client_config is not None and 'extra_args' in client_config:
+            args.extend(client_config['extra_args'])
+
+        ctx.cluster.only(client).run(
+            args=args,
+            stdin=conf,
+            )
+    yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run the s3tests-test-roundtrip suite against rgw.
+
+    To run all tests on all clients::
+
+        tasks:
+        - ceph:
+        - rgw:
+        - s3roundtrip:
+
+    To restrict testing to particular clients::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3roundtrip: [client.0]
+
+    To run against a server on client.1::
+
+        tasks:
+        - ceph:
+        - rgw: [client.1]
+        - s3roundtrip:
+            client.0:
+              rgw_server: client.1
+
+    To pass extra test arguments
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3roundtrip:
+            client.0:
+              roundtrip:
+                bucket: mybucket
+                readers: 10
+                writers: 3
+                duration: 600
+                files:
+                  num: 10
+                  size: 2000
+                  stddev: 500
+            client.1:
+              ...
+
+    To override s3 configuration
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3roundtrip:
+            client.0:
+              s3:
+                user_id: myuserid
+                display_name: myname
+                email: my@email
+                access_key: myaccesskey
+                secret_key: mysecretkey
+
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task s3tests only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+    clients = config.keys()
+
+    s3tests_conf = {}
+    for client in clients:
+        if config[client] is None:
+            config[client] = {}
+        config[client].setdefault('s3', {})
+        config[client].setdefault('roundtrip', {})
+
+        s3tests_conf[client] = ({
+                'DEFAULT':
+                    {
+                    'port'      : 7280,
+                    'is_secure' : False,
+                    },
+                'roundtrip' : config[client]['roundtrip'],
+                's3'  : config[client]['s3'],
+                })
+
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=clients),
+        lambda: create_users(ctx=ctx, config=dict(
+                clients=clients,
+                s3tests_conf=s3tests_conf,
+                )),
+        lambda: configure(ctx=ctx, config=dict(
+                clients=config,
+                s3tests_conf=s3tests_conf,
+                )),
+        lambda: run_tests(ctx=ctx, config=config),
+        ):
+        pass
+    yield
diff --git a/tasks/s3tests.py b/tasks/s3tests.py
new file mode 100644 (file)
index 0000000..abbacb9
--- /dev/null
@@ -0,0 +1,402 @@
+"""
+Run a set of s3 tests on rgw.
+"""
+from cStringIO import StringIO
+from configobj import ConfigObj
+import base64
+import contextlib
+import logging
+import os
+import random
+import string
+
+import teuthology.task_util.rgw as rgw_utils
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from ..config import config as teuth_config
+from ..orchestra import run
+from ..orchestra.connection import split_user
+
+log = logging.getLogger(__name__)
+
+def extract_sync_client_data(ctx, client_name):
+    """
+    Extract synchronized client rgw zone and rgw region information.
+    
+    :param ctx: Context passed to the s3tests task
+    :param name: Name of client that we are synching with
+    """
+    return_region_name = None
+    return_dict = None
+    client = ctx.ceph.conf.get(client_name, None)
+    if client:
+        current_client_zone = client.get('rgw zone', None)
+        if current_client_zone:
+            (endpoint_host, endpoint_port) = ctx.rgw.role_endpoints.get(client_name, (None, None))
+            # pull out the radosgw_agent stuff
+            regions = ctx.rgw.regions
+            for region in regions:
+                log.debug('jbuck, region is {region}'.format(region=region))
+                region_data = ctx.rgw.regions[region]
+                log.debug('region data is {region}'.format(region=region_data))
+                zones = region_data['zones']
+                for zone in zones:
+                    if current_client_zone in zone:
+                        return_region_name = region
+                        return_dict = dict()
+                        return_dict['api_name'] = region_data['api name']
+                        return_dict['is_master'] = region_data['is master']
+                        return_dict['port'] = endpoint_port
+                        return_dict['host'] = endpoint_host
+
+                        # The s3tests expect the sync_agent_[addr|port} to be
+                        # set on the non-master node for some reason
+                        if not region_data['is master']:
+                            (rgwagent_host, rgwagent_port) = ctx.radosgw_agent.endpoint
+                            (return_dict['sync_agent_addr'], _) = ctx.rgw.role_endpoints[rgwagent_host]
+                            return_dict['sync_agent_port'] = rgwagent_port
+
+        else: #if client_zone:
+            log.debug('No zone info for {host}'.format(host=client_name))
+    else: # if client
+        log.debug('No ceph conf for {host}'.format(host=client_name))
+
+    return return_region_name, return_dict
+
+def update_conf_with_region_info(ctx, config, s3tests_conf):
+    """
+    Scan for a client (passed in s3tests_conf) that is an s3agent
+    with which we can sync.  Update information in local conf file
+    if such a client is found.
+    """
+    for key in s3tests_conf.keys():
+        # we'll assume that there's only one sync relationship (source / destination) with client.X
+        # as the key for now
+
+        # Iterate through all of the radosgw_agent (rgwa) configs and see if a
+        # given client is involved in a relationship.
+        # If a given client isn't, skip it
+        this_client_in_rgwa_config = False
+        for rgwa in ctx.radosgw_agent.config.keys():
+            rgwa_data = ctx.radosgw_agent.config[rgwa]
+
+            if key in rgwa_data['src'] or key in rgwa_data['dest']:
+                this_client_in_rgwa_config = True
+                log.debug('{client} is in an radosgw-agent sync relationship'.format(client=key))
+                radosgw_sync_data = ctx.radosgw_agent.config[key]
+                break
+        if not this_client_in_rgwa_config:
+            log.debug('{client} is NOT in an radosgw-agent sync relationship'.format(client=key))
+            continue
+
+        source_client = radosgw_sync_data['src']
+        dest_client = radosgw_sync_data['dest']
+
+        # #xtract the pertinent info for the source side
+        source_region_name, source_region_dict = extract_sync_client_data(ctx, source_client)
+        log.debug('\t{key} source_region {source_region} source_dict {source_dict}'.format
+            (key=key,source_region=source_region_name,source_dict=source_region_dict))
+
+        # The source *should* be the master region, but test anyway and then set it as the default region
+        if source_region_dict['is_master']:
+            log.debug('Setting {region} as default_region'.format(region=source_region_name))
+            s3tests_conf[key]['fixtures'].setdefault('default_region', source_region_name)
+
+        # Extract the pertinent info for the destination side
+        dest_region_name, dest_region_dict = extract_sync_client_data(ctx, dest_client)
+        log.debug('\t{key} dest_region {dest_region} dest_dict {dest_dict}'.format
+            (key=key,dest_region=dest_region_name,dest_dict=dest_region_dict))
+
+        # now add these regions to the s3tests_conf object
+        s3tests_conf[key]['region {region_name}'.format(region_name=source_region_name)] = source_region_dict
+        s3tests_conf[key]['region {region_name}'.format(region_name=dest_region_name)] = dest_region_dict
+
+@contextlib.contextmanager
+def download(ctx, config):
+    """
+    Download the s3 tests from the git builder.
+    Remove downloaded s3 file upon exit.
+    
+    The context passed in should be identical to the context
+    passed in to the main task.
+    """
+    assert isinstance(config, dict)
+    log.info('Downloading s3-tests...')
+    testdir = teuthology.get_testdir(ctx)
+    for (client, cconf) in config.items():
+        branch = cconf.get('force-branch', None)
+        if not branch:
+            branch = cconf.get('branch', 'master')
+        sha1 = cconf.get('sha1')
+        ctx.cluster.only(client).run(
+            args=[
+                'git', 'clone',
+                '-b', branch,
+                teuth_config.ceph_git_base_url + 's3-tests.git',
+                '{tdir}/s3-tests'.format(tdir=testdir),
+                ],
+            )
+        if sha1 is not None:
+            ctx.cluster.only(client).run(
+                args=[
+                    'cd', '{tdir}/s3-tests'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    'git', 'reset', '--hard', sha1,
+                    ],
+                )
+    try:
+        yield
+    finally:
+        log.info('Removing s3-tests...')
+        testdir = teuthology.get_testdir(ctx)
+        for client in config:
+            ctx.cluster.only(client).run(
+                args=[
+                    'rm',
+                    '-rf',
+                    '{tdir}/s3-tests'.format(tdir=testdir),
+                    ],
+                )
+
+
+def _config_user(s3tests_conf, section, user):
+    """
+    Configure users for this section by stashing away keys, ids, and
+    email addresses.
+    """
+    s3tests_conf[section].setdefault('user_id', user)
+    s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
+    s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
+    s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.uppercase) for i in xrange(20)))
+    s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40)))
+
+
+@contextlib.contextmanager
+def create_users(ctx, config):
+    """
+    Create a main and an alternate s3 user.
+    """
+    assert isinstance(config, dict)
+    log.info('Creating rgw users...')
+    testdir = teuthology.get_testdir(ctx)
+    users = {'s3 main': 'foo', 's3 alt': 'bar'}
+    for client in config['clients']:
+        s3tests_conf = config['s3tests_conf'][client]
+        s3tests_conf.setdefault('fixtures', {})
+        s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-')
+        for section, user in users.iteritems():
+            _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client))
+            log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client))
+            ctx.cluster.only(client).run(
+                args=[
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'radosgw-admin',
+                    '-n', client,
+                    'user', 'create',
+                    '--uid', s3tests_conf[section]['user_id'],
+                    '--display-name', s3tests_conf[section]['display_name'],
+                    '--access-key', s3tests_conf[section]['access_key'],
+                    '--secret', s3tests_conf[section]['secret_key'],
+                    '--email', s3tests_conf[section]['email'],
+                ],
+            )
+    try:
+        yield
+    finally:
+        for client in config['clients']:
+            for user in users.itervalues():
+                uid = '{user}.{client}'.format(user=user, client=client)
+                ctx.cluster.only(client).run(
+                    args=[
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir),
+                        'radosgw-admin',
+                        '-n', client,
+                        'user', 'rm',
+                        '--uid', uid,
+                        '--purge-data',
+                        ],
+                    )
+
+
+@contextlib.contextmanager
+def configure(ctx, config):
+    """
+    Configure the s3-tests.  This includes the running of the
+    bootstrap code and the updating of local conf files.
+    """
+    assert isinstance(config, dict)
+    log.info('Configuring s3-tests...')
+    testdir = teuthology.get_testdir(ctx)
+    for client, properties in config['clients'].iteritems():
+        s3tests_conf = config['s3tests_conf'][client]
+        if properties is not None and 'rgw_server' in properties:
+            host = None
+            for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']):
+                log.info('roles: ' + str(roles))
+                log.info('target: ' + str(target))
+                if properties['rgw_server'] in roles:
+                    _, host = split_user(target)
+            assert host is not None, "Invalid client specified as the rgw_server"
+            s3tests_conf['DEFAULT']['host'] = host
+        else:
+            s3tests_conf['DEFAULT']['host'] = 'localhost'
+
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        remote.run(
+            args=[
+                'cd',
+                '{tdir}/s3-tests'.format(tdir=testdir),
+                run.Raw('&&'),
+                './bootstrap',
+                ],
+            )
+        conf_fp = StringIO()
+        s3tests_conf.write(conf_fp)
+        teuthology.write_file(
+            remote=remote,
+            path='{tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
+            data=conf_fp.getvalue(),
+            )
+    yield
+
+@contextlib.contextmanager
+def sync_users(ctx, config):
+    """
+    Sync this user.
+    """
+    assert isinstance(config, dict)
+    # do a full sync if this is a multi-region test
+    if rgw_utils.multi_region_enabled(ctx):
+        log.debug('Doing a full sync')
+        rgw_utils.radosgw_agent_sync_all(ctx)
+    else:
+        log.debug('Not a multi-region config; skipping the metadata sync')
+
+    yield
+
+@contextlib.contextmanager
+def run_tests(ctx, config):
+    """
+    Run the s3tests after everything is set up.
+
+    :param ctx: Context passed to task
+    :param config: specific configuration information
+    """
+    assert isinstance(config, dict)
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.iteritems():
+        args = [
+                'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
+                '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir),
+                '-w',
+                '{tdir}/s3-tests'.format(tdir=testdir),
+                '-v',
+                '-a', '!fails_on_rgw',
+                ]
+        if client_config is not None and 'extra_args' in client_config:
+            args.extend(client_config['extra_args'])
+
+        ctx.cluster.only(client).run(
+            args=args,
+            )
+    yield
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run the s3-tests suite against rgw.
+
+    To run all tests on all clients::
+
+        tasks:
+        - ceph:
+        - rgw:
+        - s3tests:
+
+    To restrict testing to particular clients::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3tests: [client.0]
+
+    To run against a server on client.1::
+
+        tasks:
+        - ceph:
+        - rgw: [client.1]
+        - s3tests:
+            client.0:
+              rgw_server: client.1
+
+    To pass extra arguments to nose (e.g. to run a certain test)::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3tests:
+            client.0:
+              extra_args: ['test_s3:test_object_acl_grand_public_read']
+            client.1:
+              extra_args: ['--exclude', 'test_100_continue']
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task s3tests only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+    clients = config.keys()
+
+    overrides = ctx.config.get('overrides', {})
+    # merge each client section, not the top level.
+    for client in config.iterkeys():
+        if not config[client]:
+            config[client] = {}
+        teuthology.deep_merge(config[client], overrides.get('s3tests', {}))
+
+    log.debug('s3tests config is %s', config)
+
+    s3tests_conf = {}
+    for client in clients:
+        s3tests_conf[client] = ConfigObj(
+            indent_type='',
+            infile={
+                'DEFAULT':
+                    {
+                    'port'      : 7280,
+                    'is_secure' : 'no',
+                    },
+                'fixtures' : {},
+                's3 main'  : {},
+                's3 alt'   : {},
+                }
+            )
+
+    # Only attempt to add in the region info if there's a radosgw_agent configured
+    if hasattr(ctx, 'radosgw_agent'):
+        update_conf_with_region_info(ctx, config, s3tests_conf)
+
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=config),
+        lambda: create_users(ctx=ctx, config=dict(
+                clients=clients,
+                s3tests_conf=s3tests_conf,
+                )),
+        lambda: sync_users(ctx=ctx, config=config),
+        lambda: configure(ctx=ctx, config=dict(
+                clients=config,
+                s3tests_conf=s3tests_conf,
+                )),
+        lambda: run_tests(ctx=ctx, config=config),
+        ):
+        pass
+    yield
diff --git a/tasks/samba.py b/tasks/samba.py
new file mode 100644 (file)
index 0000000..a0375c5
--- /dev/null
@@ -0,0 +1,241 @@
+"""
+Samba
+"""
+import contextlib
+import logging
+import sys
+
+from teuthology import misc as teuthology
+from ..orchestra import run
+
+log = logging.getLogger(__name__)
+
+def get_sambas(ctx, roles):
+    """
+    Scan for roles that are samba.  Yield the id of the the samba role
+    (samba.0, samba.1...)  and the associated remote site 
+    
+    :param ctx: Context
+    :param roles: roles for this test (extracted from yaml files)
+    """
+    for role in roles:
+        assert isinstance(role, basestring)
+        PREFIX = 'samba.'
+        assert role.startswith(PREFIX)
+        id_ = role[len(PREFIX):]
+        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
+        yield (id_, remote)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Setup samba smbd with ceph vfs module.  This task assumes the samba
+    package has already been installed via the install task.
+
+    The config is optional and defaults to starting samba on all nodes.
+    If a config is given, it is expected to be a list of
+    samba nodes to start smbd servers on.
+
+    Example that starts smbd on all samba nodes::
+
+        tasks:
+        - install:
+        - install:
+            project: samba
+            extra_packages: ['samba']
+        - ceph:
+        - samba:
+        - interactive:
+
+    Example that starts smbd on just one of the samba nodes and cifs on the other::
+
+        tasks:
+        - samba: [samba.0]
+        - cifs: [samba.1]
+
+    An optional backend can be specified, and requires a path which smbd will
+    use as the backend storage location:
+
+        roles:
+            - [osd.0, osd.1, osd.2, mon.0, mon.1, mon.2, mds.a]
+            - [client.0, samba.0]
+
+        tasks:
+        - ceph:
+        - ceph-fuse: [client.0]
+        - samba:
+            samba.0:
+              cephfuse: "{testdir}/mnt.0"
+
+    This mounts ceph to {testdir}/mnt.0 using fuse, and starts smbd with
+    a UNC of //localhost/cephfuse.  Access through that UNC will be on
+    the ceph fuse mount point.
+
+    If no arguments are specified in the samba
+    role, the default behavior is to enable the ceph UNC //localhost/ceph
+    and use the ceph vfs module as the smbd backend.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    log.info("Setting up smbd with ceph vfs...")
+    assert config is None or isinstance(config, list) or isinstance(config, dict), \
+        "task samba got invalid config"
+
+    if config is None:
+        config = dict(('samba.{id}'.format(id=id_), None)
+                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba'))
+    elif isinstance(config, list):
+        config = dict((name, None) for name in config)
+
+    samba_servers = list(get_sambas(ctx=ctx, roles=config.keys()))
+
+    testdir = teuthology.get_testdir(ctx)
+
+    from teuthology.task.ceph import CephState
+    if not hasattr(ctx, 'daemons'):
+        ctx.daemons = CephState()
+
+    for id_, remote in samba_servers:
+
+        rolestr = "samba.{id_}".format(id_=id_)
+
+        confextras = """vfs objects = ceph
+  ceph:config_file = /etc/ceph/ceph.conf"""
+
+        unc = "ceph"
+        backend = "/"
+
+        if config[rolestr] is not None:
+            # verify that there's just one parameter in role
+            if len(config[rolestr]) != 1:
+                log.error("samba config for role samba.{id_} must have only one parameter".format(id_=id_))
+                raise Exception('invalid config')
+            confextras = ""
+            (unc, backendstr) = config[rolestr].items()[0]
+            backend = backendstr.format(testdir=testdir)
+
+        # on first samba role, set ownership and permissions of ceph root
+        # so that samba tests succeed
+        if config[rolestr] is None and id_ == samba_servers[0][0]:
+            remote.run(
+                    args=[
+                        'mkdir', '-p', '/tmp/cmnt', run.Raw('&&'),
+                        'sudo', 'ceph-fuse', '/tmp/cmnt', run.Raw('&&'),
+                        'sudo', 'chown', 'ubuntu:ubuntu', '/tmp/cmnt/', run.Raw('&&'),
+                        'sudo', 'chmod', '1777', '/tmp/cmnt/', run.Raw('&&'),
+                        'sudo', 'umount', '/tmp/cmnt/', run.Raw('&&'),
+                        'rm', '-rf', '/tmp/cmnt',
+                        ],
+                    )
+        else:
+            remote.run(
+                    args=[
+                        'sudo', 'chown', 'ubuntu:ubuntu', backend, run.Raw('&&'),
+                        'sudo', 'chmod', '1777', backend,
+                        ],
+                    )
+
+        teuthology.sudo_write_file(remote, "/usr/local/samba/etc/smb.conf", """
+[global]
+  workgroup = WORKGROUP
+  netbios name = DOMAIN
+
+[{unc}]
+  path = {backend}
+  {extras}
+  writeable = yes
+  valid users = ubuntu
+""".format(extras=confextras, unc=unc, backend=backend))
+
+        # create ubuntu user
+        remote.run(
+            args=[
+                'sudo', '/usr/local/samba/bin/smbpasswd', '-e', 'ubuntu',
+                run.Raw('||'),
+                'printf', run.Raw('"ubuntu\nubuntu\n"'),
+                run.Raw('|'),
+                'sudo', '/usr/local/samba/bin/smbpasswd', '-s', '-a', 'ubuntu'
+            ])
+
+        smbd_cmd = [
+                'sudo',
+                'daemon-helper',
+                'kill',
+                'nostdin',
+                '/usr/local/samba/sbin/smbd',
+                '-F',
+                ]
+        ctx.daemons.add_daemon(remote, 'smbd', id_,
+                               args=smbd_cmd,
+                               logger=log.getChild("smbd.{id_}".format(id_=id_)),
+                               stdin=run.PIPE,
+                               wait=False,
+                               )
+
+        # let smbd initialize, probably a better way...
+        import time
+        seconds_to_sleep = 100        
+        log.info('Sleeping for %s  seconds...' % seconds_to_sleep)
+        time.sleep(seconds_to_sleep)
+        log.info('Sleeping stopped...')
+
+    try:
+        yield
+    finally:
+        log.info('Stopping smbd processes...')
+        exc_info = (None, None, None)
+        for d in ctx.daemons.iter_daemons_of_role('smbd'):
+            try:
+                d.stop()
+            except (run.CommandFailedError,
+                    run.CommandCrashedError,
+                    run.ConnectionLostError):
+                exc_info = sys.exc_info()
+                log.exception('Saw exception from %s.%s', d.role, d.id_)
+        if exc_info != (None, None, None):
+            raise exc_info[0], exc_info[1], exc_info[2]
+
+        for id_, remote in samba_servers:
+            remote.run(
+                args=[
+                    'sudo',
+                    'rm', '-rf',
+                    '/usr/local/samba/etc/smb.conf',
+                    '/usr/local/samba/private/*',
+                    '/usr/local/samba/var/run/',
+                    '/usr/local/samba/var/locks',
+                    '/usr/local/samba/var/lock',
+                    ],
+                )
+            # make sure daemons are gone
+            try:
+                remote.run(
+                    args=[
+                        'while',
+                        'sudo', 'killall', '-9', 'smbd',
+                        run.Raw(';'),
+                        'do', 'sleep', '1',
+                        run.Raw(';'),
+                        'done',
+                        ],
+                    )
+
+                remote.run(
+                    args=[
+                        'sudo',
+                        'lsof',
+                        backend,
+                        ],
+                    )
+                remote.run(
+                    args=[
+                        'sudo',
+                        'fuser',
+                        '-M',
+                        backend,
+                        ],
+                    )
+            except Exception:
+                log.exception("Saw exception")
+                pass
diff --git a/tasks/scrub.py b/tasks/scrub.py
new file mode 100644 (file)
index 0000000..7a25300
--- /dev/null
@@ -0,0 +1,117 @@
+"""
+Scrub osds
+"""
+import contextlib
+import gevent
+import logging
+import random
+import time
+
+import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run scrub periodically. Randomly chooses an OSD to scrub.
+
+    The config should be as follows:
+
+    scrub:
+        frequency: <seconds between scrubs>
+        deep: <bool for deepness>
+
+    example:
+
+    tasks:
+    - ceph:
+    - scrub:
+        frequency: 30
+        deep: 0
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'scrub task only accepts a dict for configuration'
+
+    log.info('Beginning scrub...')
+
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    while len(manager.get_osd_status()['up']) < num_osds:
+        manager.sleep(10)
+
+    scrub_proc = Scrubber(
+        manager,
+        config,
+        )
+    try:
+        yield
+    finally:
+        log.info('joining scrub')
+        scrub_proc.do_join()
+
+class Scrubber:
+    """
+    Scrubbing is actually performed during initialzation
+    """
+    def __init__(self, manager, config):
+        """
+        Spawn scrubbing thread upon completion.
+        """
+        self.ceph_manager = manager
+        self.ceph_manager.wait_for_clean()
+
+        osd_status = self.ceph_manager.get_osd_status()
+        self.osds = osd_status['up']
+
+        self.config = config
+        if self.config is None:
+            self.config = dict()
+
+        else:
+            def tmp(x):
+                """Local display"""
+                print x
+            self.log = tmp
+
+        self.stopping = False
+
+        log.info("spawning thread")
+
+        self.thread = gevent.spawn(self.do_scrub)
+
+    def do_join(self):
+        """Scrubbing thread finished"""
+        self.stopping = True
+        self.thread.get()
+
+    def do_scrub(self):
+        """Perform the scrub operation"""
+        frequency = self.config.get("frequency", 30)
+        deep = self.config.get("deep", 0)
+
+        log.info("stopping %s" % self.stopping)
+
+        while not self.stopping:
+            osd = str(random.choice(self.osds))
+
+            if deep:
+                cmd = 'deep-scrub'
+            else:
+                cmd = 'scrub'
+
+            log.info('%sbing %s' % (cmd, osd))
+            self.ceph_manager.raw_cluster_cmd('osd', cmd, osd)
+
+            time.sleep(frequency)
diff --git a/tasks/scrub_test.py b/tasks/scrub_test.py
new file mode 100644 (file)
index 0000000..3443ae9
--- /dev/null
@@ -0,0 +1,199 @@
+"""Scrub testing"""
+from cStringIO import StringIO
+
+import logging
+import os
+import time
+
+import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test [deep] scrub
+
+    tasks:
+    - chef:
+    - install:
+    - ceph:
+        log-whitelist:
+        - '!= known digest'
+        - '!= known omap_digest'
+        - deep-scrub 0 missing, 1 inconsistent objects
+        - deep-scrub 1 errors
+        - repair 0 missing, 1 inconsistent objects
+        - repair 1 errors, 1 fixed
+    - scrub_test: 
+    
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'scrub_test task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+    
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    log.info('num_osds is %s' % num_osds)
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < num_osds:
+        time.sleep(10)
+
+    for i in range(num_osds):
+        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'flush_pg_stats')
+    manager.wait_for_clean()
+
+    # write some data
+    p = manager.do_rados(mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1', 'write', '-b', '4096'])
+    err = p.exitstatus
+    log.info('err is %d' % err)
+
+    # wait for some PG to have data that we can mess with
+    victim = None
+    osd = None
+    while victim is None:
+        stats = manager.get_pg_stats()
+        for pg in stats:
+            size = pg['stat_sum']['num_bytes']
+            if size > 0:
+                victim = pg['pgid']
+                osd = pg['acting'][0]
+                break
+
+        if victim is None:
+            time.sleep(3)
+
+    log.info('messing with PG %s on osd %d' % (victim, osd))
+
+    (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.iterkeys()
+    data_path = os.path.join(
+        '/var/lib/ceph/osd',
+        'ceph-{id}'.format(id=osd),
+        'current',
+        '{pg}_head'.format(pg=victim)
+        )
+
+    # fuzz time
+    ls_fp = StringIO()
+    osd_remote.run(
+        args=[ 'ls', data_path ],
+        stdout=ls_fp,
+    )
+    ls_out = ls_fp.getvalue()
+    ls_fp.close()
+
+    # find an object file we can mess with
+    osdfilename = None
+    for line in ls_out.split('\n'):
+        if 'object' in line:
+            osdfilename = line
+            break
+    assert osdfilename is not None
+
+    # Get actual object name from osd stored filename
+    tmp=osdfilename.split('__')
+    objname=tmp[0]
+    objname=objname.replace('\u', '_')
+    log.info('fuzzing %s' % objname)
+
+    # put a single \0 at the beginning of the file
+    osd_remote.run(
+        args=[ 'sudo', 'dd',
+               'if=/dev/zero',
+               'of=%s' % os.path.join(data_path, osdfilename),
+               'bs=1', 'count=1', 'conv=notrunc'
+             ]
+    )
+
+    # scrub, verify inconsistent
+    manager.raw_cluster_cmd('pg', 'deep-scrub', victim)
+    # Give deep-scrub a chance to start
+    time.sleep(60)
+
+    while True:
+        stats = manager.get_single_pg_stats(victim)
+        state = stats['state']
+
+        # wait for the scrub to finish
+        if 'scrubbing' in state:
+            time.sleep(3)
+            continue
+
+        inconsistent = stats['state'].find('+inconsistent') != -1
+        assert inconsistent
+        break
+
+
+    # repair, verify no longer inconsistent
+    manager.raw_cluster_cmd('pg', 'repair', victim)
+    # Give repair a chance to start
+    time.sleep(60)
+
+    while True:
+        stats = manager.get_single_pg_stats(victim)
+        state = stats['state']
+
+        # wait for the scrub to finish
+        if 'scrubbing' in state:
+            time.sleep(3)
+            continue
+
+        inconsistent = stats['state'].find('+inconsistent') != -1
+        assert not inconsistent
+        break
+
+    # Test deep-scrub with various omap modifications
+    manager.do_rados(mon, ['-p', 'rbd', 'setomapval', objname, 'key', 'val'])
+    manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', objname, 'hdr'])
+
+    # Modify omap on specific osd
+    log.info('fuzzing omap of %s' % objname)
+    manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key']);
+    manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, 'badkey', 'badval']);
+    manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr']);
+
+    # scrub, verify inconsistent
+    manager.raw_cluster_cmd('pg', 'deep-scrub', victim)
+    # Give deep-scrub a chance to start
+    time.sleep(60)
+
+    while True:
+        stats = manager.get_single_pg_stats(victim)
+        state = stats['state']
+
+        # wait for the scrub to finish
+        if 'scrubbing' in state:
+            time.sleep(3)
+            continue
+
+        inconsistent = stats['state'].find('+inconsistent') != -1
+        assert inconsistent
+        break
+
+    # repair, verify no longer inconsistent
+    manager.raw_cluster_cmd('pg', 'repair', victim)
+    # Give repair a chance to start
+    time.sleep(60)
+
+    while True:
+        stats = manager.get_single_pg_stats(victim)
+        state = stats['state']
+
+        # wait for the scrub to finish
+        if 'scrubbing' in state:
+            time.sleep(3)
+            continue
+
+        inconsistent = stats['state'].find('+inconsistent') != -1
+        assert not inconsistent
+        break
+
+    log.info('test successful!')
diff --git a/tasks/test/__init__.py b/tasks/test/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tasks/test/test_devstack.py b/tasks/test/test_devstack.py
new file mode 100644 (file)
index 0000000..117b307
--- /dev/null
@@ -0,0 +1,48 @@
+from textwrap import dedent
+
+from .. import devstack
+
+
+class TestDevstack(object):
+    def test_parse_os_table(self):
+        table_str = dedent("""
+            +---------------------+--------------------------------------+
+            |       Property      |                Value                 |
+            +---------------------+--------------------------------------+
+            |     attachments     |                  []                  |
+            |  availability_zone  |                 nova                 |
+            |       bootable      |                false                 |
+            |      created_at     |      2014-02-21T17:14:47.548361      |
+            | display_description |                 None                 |
+            |     display_name    |                 NAME                 |
+            |          id         | ffdbd1bb-60dc-4d95-acfe-88774c09ad3e |
+            |       metadata      |                  {}                  |
+            |         size        |                  1                   |
+            |     snapshot_id     |                 None                 |
+            |     source_volid    |                 None                 |
+            |        status       |               creating               |
+            |     volume_type     |                 None                 |
+            +---------------------+--------------------------------------+
+            """).strip()
+        expected = {
+            'Property': 'Value',
+            'attachments': '[]',
+            'availability_zone': 'nova',
+            'bootable': 'false',
+            'created_at': '2014-02-21T17:14:47.548361',
+            'display_description': 'None',
+            'display_name': 'NAME',
+            'id': 'ffdbd1bb-60dc-4d95-acfe-88774c09ad3e',
+            'metadata': '{}',
+            'size': '1',
+            'snapshot_id': 'None',
+            'source_volid': 'None',
+            'status': 'creating',
+            'volume_type': 'None'}
+
+        vol_info = devstack.parse_os_table(table_str)
+        assert vol_info == expected
+
+
+
+
diff --git a/tasks/tgt.py b/tasks/tgt.py
new file mode 100644 (file)
index 0000000..c2b322e
--- /dev/null
@@ -0,0 +1,177 @@
+"""
+Task to handle tgt
+
+Assumptions made:
+    The ceph-extras tgt package may need to get installed.
+    The open-iscsi package needs to get installed.
+"""
+import logging
+import contextlib
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def start_tgt_remotes(ctx, start_tgtd):
+    """
+    This subtask starts up a tgtd on the clients specified
+    """
+    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
+    tgtd_list = []
+    for rem, roles in remotes.iteritems():
+        for _id in roles:
+            if _id in start_tgtd:
+                if not rem in tgtd_list:
+                    tgtd_list.append(rem)
+                    size = ctx.config.get('image_size', 10240)
+                    rem.run(
+                        args=[
+                            'rbd',
+                            'create',
+                            'iscsi-image',
+                            '--size',
+                            str(size),
+                    ])
+                    rem.run(
+                        args=[
+                            'sudo',
+                            'tgtadm',
+                            '--lld',
+                            'iscsi',
+                            '--mode',
+                            'target',
+                            '--op',
+                            'new',
+                            '--tid',
+                            '1',
+                            '--targetname',
+                            'rbd',
+                        ])
+                    rem.run(
+                        args=[
+                            'sudo',
+                            'tgtadm',
+                            '--lld',
+                            'iscsi',
+                            '--mode',
+                            'logicalunit',
+                            '--op',
+                            'new',
+                            '--tid',
+                            '1',
+                            '--lun',
+                            '1',
+                            '--backing-store',
+                            'iscsi-image',
+                            '--bstype',
+                            'rbd',
+                        ])
+                    rem.run(
+                        args=[
+                            'sudo',
+                            'tgtadm',
+                            '--lld',
+                            'iscsi',
+                            '--op',
+                            'bind',
+                            '--mode',
+                            'target',
+                            '--tid',
+                            '1',
+                            '-I',
+                            'ALL',
+                        ])
+    try:
+        yield
+
+    finally:
+        for rem in tgtd_list:
+            rem.run(
+                args=[
+                    'sudo',
+                    'tgtadm',
+                    '--lld',
+                    'iscsi',
+                    '--mode',
+                    'target',
+                    '--op',
+                    'delete',
+                    '--force',
+                    '--tid',
+                    '1',
+                ])
+            rem.run(
+                args=[
+                    'rbd',
+                    'snap',
+                    'purge',
+                    'iscsi-image',
+                ])
+            rem.run(
+                args=[
+                    'sudo',
+                    'rbd',
+                    'rm',
+                    'iscsi-image',
+                ])
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Start up tgt.
+
+    To start on on all clients::
+
+        tasks:
+        - ceph:
+        - tgt:
+
+    To start on certain clients::
+
+        tasks:
+        - ceph:
+        - tgt: [client.0, client.3]
+
+    or
+
+        tasks:
+        - ceph:
+        - tgt:
+            client.0:
+            client.3:
+
+    An image blocksize size can also be specified::
+        
+        tasks:
+        - ceph:
+        - tgt:
+            image_size = 20480
+
+    The general flow of things here is:
+        1. Find clients on which tgt is supposed to run (start_tgtd)
+        2. Remotely start up tgt daemon
+    On cleanup:
+        3. Stop tgt daemon
+
+    The iscsi administration is handled by the iscsi task.
+    """
+    if config:
+        config = {key : val for key, val in config.items()
+                if key.startswith('client')}
+    # config at this point should only contain keys starting with 'client'
+    start_tgtd = []
+    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
+    log.info(remotes)
+    if not config:
+        start_tgtd = ['client.{id}'.format(id=id_)
+            for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    else:
+        start_tgtd = config
+    log.info(start_tgtd)
+    with contextutil.nested(
+            lambda: start_tgt_remotes(ctx=ctx, start_tgtd=start_tgtd),):
+        yield
diff --git a/tasks/thrashosds.py b/tasks/thrashosds.py
new file mode 100644 (file)
index 0000000..ba166ed
--- /dev/null
@@ -0,0 +1,179 @@
+"""
+Thrash -- Simulate random osd failures.
+"""
+import contextlib
+import logging
+import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    "Thrash" the OSDs by randomly marking them out/down (and then back
+    in) until the task is ended. This loops, and every op_delay
+    seconds it randomly chooses to add or remove an OSD (even odds)
+    unless there are fewer than min_out OSDs out of the cluster, or
+    more than min_in OSDs in the cluster.
+
+    All commands are run on mon0 and it stops when __exit__ is called.
+
+    The config is optional, and is a dict containing some or all of:
+
+    min_in: (default 3) the minimum number of OSDs to keep in the
+       cluster
+
+    min_out: (default 0) the minimum number of OSDs to keep out of the
+       cluster
+
+    op_delay: (5) the length of time to sleep between changing an
+       OSD's status
+
+    min_dead: (0) minimum number of osds to leave down/dead.
+
+    max_dead: (0) maximum number of osds to leave down/dead before waiting
+       for clean.  This should probably be num_replicas - 1.
+
+    clean_interval: (60) the approximate length of time to loop before
+       waiting until the cluster goes clean. (In reality this is used
+       to probabilistically choose when to wait, and the method used
+       makes it closer to -- but not identical to -- the half-life.)
+
+    scrub_interval: (-1) the approximate length of time to loop before
+       waiting until a scrub is performed while cleaning. (In reality
+       this is used to probabilistically choose when to wait, and it
+       only applies to the cases where cleaning is being performed). 
+       -1 is used to indicate that no scrubbing will be done.
+  
+    chance_down: (0.4) the probability that the thrasher will mark an
+       OSD down rather than marking it out. (The thrasher will not
+       consider that OSD out of the cluster, since presently an OSD
+       wrongly marked down will mark itself back up again.) This value
+       can be either an integer (eg, 75) or a float probability (eg
+       0.75).
+
+    chance_test_min_size: (0) chance to run test_pool_min_size,
+       which:
+       - kills all but one osd
+       - waits
+       - kills that osd
+       - revives all other osds
+       - verifies that the osds fully recover
+
+    timeout: (360) the number of seconds to wait for the cluster
+       to become clean after each cluster change. If this doesn't
+       happen within the timeout, an exception will be raised.
+
+    revive_timeout: (75) number of seconds to wait for an osd asok to
+       appear after attempting to revive the osd
+
+    thrash_primary_affinity: (true) randomly adjust primary-affinity
+
+    chance_pgnum_grow: (0) chance to increase a pool's size
+    chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool
+    pool_grow_by: (10) amount to increase pgnum by
+    max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd
+
+    pause_short: (3) duration of short pause
+    pause_long: (80) duration of long pause
+    pause_check_after: (50) assert osd down after this long
+    chance_inject_pause_short: (1) chance of injecting short stall
+    chance_inject_pause_long: (0) chance of injecting long stall
+
+    clean_wait: (0) duration to wait before resuming thrashing once clean
+
+    powercycle: (false) whether to power cycle the node instead
+        of just the osd process. Note that this assumes that a single
+        osd is the only important process on the node.
+
+    chance_test_backfill_full: (0) chance to simulate full disks stopping
+        backfill
+
+    chance_test_map_discontinuity: (0) chance to test map discontinuity
+    map_discontinuity_sleep_time: (40) time to wait for map trims
+
+    example:
+
+    tasks:
+    - ceph:
+    - thrashosds:
+        chance_down: 10
+        op_delay: 3
+        min_in: 1
+        timeout: 600
+    - interactive:
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'thrashosds task only accepts a dict for configuration'
+
+    if 'powercycle' in config:
+
+        # sync everyone first to avoid collateral damage to / etc.
+        log.info('Doing preliminary sync to avoid collateral damage...')
+        ctx.cluster.run(args=['sync'])
+
+        if 'ipmi_user' in ctx.teuthology_config:
+            for t, key in ctx.config['targets'].iteritems():
+                host = t.split('@')[-1]
+                shortname = host.split('.')[0]
+                from ..orchestra import remote as oremote
+                console = oremote.getRemoteConsole(
+                    name=host,
+                    ipmiuser=ctx.teuthology_config['ipmi_user'],
+                    ipmipass=ctx.teuthology_config['ipmi_password'],
+                    ipmidomain=ctx.teuthology_config['ipmi_domain'])
+                cname = '{host}.{domain}'.format(
+                    host=shortname,
+                    domain=ctx.teuthology_config['ipmi_domain'])
+                log.debug('checking console status of %s' % cname)
+                if not console.check_status():
+                    log.info(
+                        'Failed to get console status for '
+                        '%s, disabling console...'
+                        % cname)
+                    console=None
+                else:
+                    # find the remote for this console and add it
+                    remotes = [
+                        r for r in ctx.cluster.remotes.keys() if r.name == t]
+                    if len(remotes) != 1:
+                        raise Exception(
+                            'Too many (or too few) remotes '
+                            'found for target {t}'.format(t=t))
+                    remotes[0].console = console
+                    log.debug('console ready on %s' % cname)
+
+            # check that all osd remotes have a valid console
+            osds = ctx.cluster.only(teuthology.is_type('osd'))
+            for remote, _ in osds.remotes.iteritems():
+                if not remote.console:
+                    raise Exception(
+                        'IPMI console required for powercycling, '
+                        'but not available on osd role: {r}'.format(
+                            r=remote.name))
+
+    log.info('Beginning thrashosds...')
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        config=config,
+        logger=log.getChild('ceph_manager'),
+        )
+    ctx.manager = manager
+    thrash_proc = ceph_manager.Thrasher(
+        manager,
+        config,
+        logger=log.getChild('thrasher')
+        )
+    try:
+        yield
+    finally:
+        log.info('joining thrashosds')
+        thrash_proc.do_join()
+        manager.wait_for_recovery(config.get('timeout', 360))
diff --git a/tasks/userdata_setup.yaml b/tasks/userdata_setup.yaml
new file mode 100644 (file)
index 0000000..eaa5f73
--- /dev/null
@@ -0,0 +1,22 @@
+#cloud-config-archive
+
+- type: text/cloud-config
+  content: |
+    output:
+      all: '| tee -a /var/log/cloud-init-output.log'
+
+# allow passwordless access for debugging
+- |
+  #!/bin/bash
+  exec passwd -d ubuntu
+
+- |
+  #!/bin/bash
+
+  # mount a 9p fs for storing logs
+  mkdir /mnt/log
+  mount -t 9p -o trans=virtio test_log /mnt/log
+
+  # mount the iso image that has the test script
+  mkdir /mnt/cdrom
+  mount -t auto /dev/cdrom /mnt/cdrom
diff --git a/tasks/userdata_teardown.yaml b/tasks/userdata_teardown.yaml
new file mode 100644 (file)
index 0000000..7f3d64f
--- /dev/null
@@ -0,0 +1,11 @@
+- |
+  #!/bin/bash
+  cp /var/log/cloud-init-output.log /mnt/log
+
+- |
+  #!/bin/bash
+  umount /mnt/log
+
+- |
+  #!/bin/bash
+  shutdown -h -P now
diff --git a/tasks/util/__init__.py b/tasks/util/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tasks/util/kclient.py b/tasks/util/kclient.py
new file mode 100644 (file)
index 0000000..c6a259f
--- /dev/null
@@ -0,0 +1,22 @@
+from teuthology.misc import get_testdir
+from teuthology.orchestra import run
+
+
+def write_secret_file(ctx, remote, role, keyring, filename):
+    """
+    Stash the kerying in the filename specified.
+    """
+    testdir = get_testdir(ctx)
+    remote.run(
+        args=[
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            'ceph-authtool',
+            '--name={role}'.format(role=role),
+            '--print-key',
+            keyring,
+            run.Raw('>'),
+            filename,
+            ],
+        )
diff --git a/tasks/util/rados.py b/tasks/util/rados.py
new file mode 100644 (file)
index 0000000..f6a806c
--- /dev/null
@@ -0,0 +1,50 @@
+import logging
+
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def rados(ctx, remote, cmd, wait=True, check_status=False):
+    testdir = teuthology.get_testdir(ctx)
+    log.info("rados %s" % ' '.join(cmd))
+    pre = [
+        'adjust-ulimits',
+        'ceph-coverage',
+        '{tdir}/archive/coverage'.format(tdir=testdir),
+        'rados',
+        ];
+    pre.extend(cmd)
+    proc = remote.run(
+        args=pre,
+        check_status=check_status,
+        wait=wait,
+        )
+    if wait:
+        return proc.exitstatus
+    else:
+        return proc
+
+def create_ec_pool(remote, name, profile_name, pgnum, m=1, k=2):
+    remote.run(args=[
+        'ceph', 'osd', 'erasure-code-profile', 'set',
+        profile_name, 'm=' + str(m), 'k=' + str(k),
+        'ruleset-failure-domain=osd',
+        ])
+    remote.run(args=[
+        'ceph', 'osd', 'pool', 'create', name,
+        str(pgnum), str(pgnum), 'erasure', profile_name,
+        ])
+
+def create_replicated_pool(remote, name, pgnum):
+    remote.run(args=[
+        'ceph', 'osd', 'pool', 'create', name, str(pgnum), str(pgnum),
+        ])
+
+def create_cache_pool(remote, base_name, cache_name, pgnum, size):
+    remote.run(args=[
+            'ceph', 'osd', 'pool', 'create', cache_name, str(pgnum)
+            ])
+    remote.run(args=[
+            'ceph', 'osd', 'tier', 'add-cache', base_name, cache_name,
+            str(size),
+            ])
diff --git a/tasks/util/rgw.py b/tasks/util/rgw.py
new file mode 100644 (file)
index 0000000..cbe3071
--- /dev/null
@@ -0,0 +1,153 @@
+from cStringIO import StringIO
+import logging
+import json
+import requests
+from urlparse import urlparse
+
+from ..orchestra.connection import split_user
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+# simple test to indicate if multi-region testing should occur
+def multi_region_enabled(ctx):
+    # this is populated by the radosgw-agent task, seems reasonable to
+    # use that as an indicator that we're testing multi-region sync
+    return 'radosgw_agent' in ctx
+
+def rgwadmin(ctx, client, cmd, stdin=StringIO(), check_status=False):
+    log.info('rgwadmin: {client} : {cmd}'.format(client=client,cmd=cmd))
+    testdir = teuthology.get_testdir(ctx)
+    pre = [
+        'adjust-ulimits',
+        'ceph-coverage'.format(tdir=testdir),
+        '{tdir}/archive/coverage'.format(tdir=testdir),
+        'radosgw-admin'.format(tdir=testdir),
+        '--log-to-stderr',
+        '--format', 'json',
+        '-n',  client,
+        ]
+    pre.extend(cmd)
+    log.info('rgwadmin: cmd=%s' % pre)
+    (remote,) = ctx.cluster.only(client).remotes.iterkeys()
+    proc = remote.run(
+        args=pre,
+        check_status=check_status,
+        stdout=StringIO(),
+        stderr=StringIO(),
+        stdin=stdin,
+        )
+    r = proc.exitstatus
+    out = proc.stdout.getvalue()
+    j = None
+    if not r and out != '':
+        try:
+            j = json.loads(out)
+            log.info(' json result: %s' % j)
+        except ValueError:
+            j = out
+            log.info(' raw result: %s' % j)
+    return (r, j)
+
+def get_zone_host_and_port(ctx, client, zone):
+    _, region_map = rgwadmin(ctx, client, check_status=True,
+                             cmd=['-n', client, 'region-map', 'get'])
+    regions = region_map['regions']
+    for region in regions:
+        for zone_info in region['val']['zones']:
+            if zone_info['name'] == zone:
+                endpoint = urlparse(zone_info['endpoints'][0])
+                host, port = endpoint.hostname, endpoint.port
+                if port is None:
+                    port = 80
+                return host, port
+    assert False, 'no endpoint for zone {zone} found'.format(zone=zone)
+
+def get_master_zone(ctx, client):
+    _, region_map = rgwadmin(ctx, client, check_status=True,
+                             cmd=['-n', client, 'region-map', 'get'])
+    regions = region_map['regions']
+    for region in regions:
+        is_master = (region['val']['is_master'] == "true")
+        log.info('region={r} is_master={ism}'.format(r=region, ism=is_master))
+        if not is_master:
+          continue
+        master_zone = region['val']['master_zone']
+        log.info('master_zone=%s' % master_zone)
+        for zone_info in region['val']['zones']:
+            if zone_info['name'] == master_zone:
+                return master_zone
+    log.info('couldn\'t find master zone')
+    return None
+
+def get_master_client(ctx, clients):
+    master_zone = get_master_zone(ctx, clients[0]) # can use any client for this as long as system configured correctly
+    if not master_zone:
+        return None
+
+    for client in clients:
+        zone = zone_for_client(ctx, client)
+        if zone == master_zone:
+            return client
+
+    return None
+
+def get_zone_system_keys(ctx, client, zone):
+    _, zone_info = rgwadmin(ctx, client, check_status=True,
+                            cmd=['-n', client,
+                                 'zone', 'get', '--rgw-zone', zone])
+    system_key = zone_info['system_key']
+    return system_key['access_key'], system_key['secret_key']
+
+def zone_for_client(ctx, client):
+    ceph_config = ctx.ceph.conf.get('global', {})
+    ceph_config.update(ctx.ceph.conf.get('client', {}))
+    ceph_config.update(ctx.ceph.conf.get(client, {}))
+    return ceph_config.get('rgw zone')
+
+def region_for_client(ctx, client):
+    ceph_config = ctx.ceph.conf.get('global', {})
+    ceph_config.update(ctx.ceph.conf.get('client', {}))
+    ceph_config.update(ctx.ceph.conf.get(client, {}))
+    return ceph_config.get('rgw region')
+
+def radosgw_data_log_window(ctx, client):
+    ceph_config = ctx.ceph.conf.get('global', {})
+    ceph_config.update(ctx.ceph.conf.get('client', {}))
+    ceph_config.update(ctx.ceph.conf.get(client, {}))
+    return ceph_config.get('rgw data log window', 30)
+
+def radosgw_agent_sync_data(ctx, agent_host, agent_port, full=False):
+    log.info('sync agent {h}:{p}'.format(h=agent_host, p=agent_port))
+    method = "full" if full else "incremental"
+    return requests.post('http://{addr}:{port}/data/{method}'.format(addr = agent_host, port = agent_port, method = method))
+
+def radosgw_agent_sync_metadata(ctx, agent_host, agent_port, full=False):
+    log.info('sync agent {h}:{p}'.format(h=agent_host, p=agent_port))
+    method = "full" if full else "incremental"
+    return requests.post('http://{addr}:{port}/metadata/{method}'.format(addr = agent_host, port = agent_port, method = method))
+
+def radosgw_agent_sync_all(ctx, full=False, data=False):
+    if ctx.radosgw_agent.procs:
+        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
+            zone_for_client(ctx, agent_client)
+            sync_host, sync_port = get_sync_agent(ctx, agent_client)
+            log.debug('doing a sync via {host1}'.format(host1=sync_host))
+            radosgw_agent_sync_metadata(ctx, sync_host, sync_port, full)
+            if (data):
+                radosgw_agent_sync_data(ctx, sync_host, sync_port, full)
+
+def host_for_role(ctx, role):
+    for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']):
+        if role in roles:
+            _, host = split_user(target)
+            return host
+
+def get_sync_agent(ctx, source):
+    for task in ctx.config['tasks']:
+        if 'radosgw-agent' not in task:
+            continue
+        for client, conf in task['radosgw-agent'].iteritems():
+            if conf['src'] == source:
+                return host_for_role(ctx, source), conf.get('port', 8000)
+    return None, None
diff --git a/tasks/watch_notify_stress.py b/tasks/watch_notify_stress.py
new file mode 100644 (file)
index 0000000..ab611c3
--- /dev/null
@@ -0,0 +1,69 @@
+"""
+test_stress_watch task
+"""
+import contextlib
+import logging
+import proc_thrasher
+
+from ..orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run test_stress_watch
+
+    The config should be as follows:
+
+    test_stress_watch:
+        clients: [client list]
+
+    example:
+
+    tasks:
+    - ceph:
+    - test_stress_watch:
+        clients: [client.0]
+    - interactive:
+    """
+    log.info('Beginning test_stress_watch...')
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+    testwatch = {}
+
+    remotes = []
+
+    for role in config.get('clients', ['client.0']):
+        assert isinstance(role, basestring)
+        PREFIX = 'client.'
+        assert role.startswith(PREFIX)
+        id_ = role[len(PREFIX):]
+        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
+        remotes.append(remote)
+
+        args =['CEPH_CLIENT_ID={id_}'.format(id_=id_),
+               'CEPH_ARGS="{flags}"'.format(flags=config.get('flags', '')),
+               'daemon-helper',
+               'kill',
+               'multi_stress_watch foo foo'
+               ]
+
+        log.info("args are %s" % (args,))
+
+        proc = proc_thrasher.ProcThrasher({}, remote,
+            args=[run.Raw(i) for i in args],
+            logger=log.getChild('testwatch.{id}'.format(id=id_)),
+            stdin=run.PIPE,
+            wait=False
+            )
+        proc.start()
+        testwatch[id_] = proc
+
+    try:
+        yield
+    finally:
+        log.info('joining watch_notify_stress')
+        for i in testwatch.itervalues():
+            i.join()
diff --git a/tasks/workunit.py b/tasks/workunit.py
new file mode 100644 (file)
index 0000000..b504eeb
--- /dev/null
@@ -0,0 +1,372 @@
+"""
+Workunit task -- Run ceph on sets of specific clients
+"""
+import logging
+import pipes
+import os
+
+from teuthology import misc as teuthology
+from teuthology.parallel import parallel
+from ..orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+def task(ctx, config):
+    """
+    Run ceph on all workunits found under the specified path.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - ceph-fuse: [client.0]
+        - workunit:
+            clients:
+              client.0: [direct_io, xattrs.sh]
+              client.1: [snaps]
+            branch: foo
+
+    You can also run a list of workunits on all clients:
+        tasks:
+        - ceph:
+        - ceph-fuse:
+        - workunit:
+            tag: v0.47
+            clients:
+              all: [direct_io, xattrs.sh, snaps]
+
+    If you have an "all" section it will run all the workunits
+    on each client simultaneously, AFTER running any workunits specified
+    for individual clients. (This prevents unintended simultaneous runs.)
+
+    To customize tests, you can specify environment variables as a dict. You
+    can also specify a time limit for each work unit (defaults to 3h):
+
+        tasks:
+        - ceph:
+        - ceph-fuse:
+        - workunit:
+            sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6
+            clients:
+              all: [snaps]
+            env:
+              FOO: bar
+              BAZ: quux
+            timeout: 3h
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    assert isinstance(config, dict)
+    assert isinstance(config.get('clients'), dict), \
+        'configuration must contain a dictionary of clients'
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('workunit', {}))
+
+    refspec = config.get('branch')
+    if refspec is None:
+        refspec = config.get('sha1')
+    if refspec is None:
+        refspec = config.get('tag')
+    if refspec is None:
+        refspec = 'HEAD'
+
+    timeout = config.get('timeout', '3h')
+
+    log.info('Pulling workunits from ref %s', refspec)
+
+    created_dir_dict = {}
+
+    if config.get('env') is not None:
+        assert isinstance(config['env'], dict), 'env must be a dictionary'
+    clients = config['clients']
+    log.info('Making a separate scratch dir for every client...')
+    for role in clients.iterkeys():
+        assert isinstance(role, basestring)
+        if role == "all":
+            continue
+        PREFIX = 'client.'
+        assert role.startswith(PREFIX)
+        created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir'))
+        created_dir_dict[role] = created_mnt_dir
+
+    all_spec = False #is there an all grouping?
+    with parallel() as p:
+        for role, tests in clients.iteritems():
+            if role != "all":
+                p.spawn(_run_tests, ctx, refspec, role, tests,
+                        config.get('env'), timeout=timeout)
+            else:
+                all_spec = True
+
+    if all_spec:
+        all_tasks = clients["all"]
+        _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'),
+                              config.get('subdir'), timeout=timeout)
+
+    for role in clients.iterkeys():
+        assert isinstance(role, basestring)
+        if role == "all":
+            continue
+        PREFIX = 'client.'
+        assert role.startswith(PREFIX)
+        if created_dir_dict[role]:
+            _delete_dir(ctx, role)
+
+
+def _delete_dir(ctx, role):
+    """
+    Delete file used by this role, and delete the directory that this
+    role appeared in.
+
+    :param ctx: Context
+    :param role: "role.#" where # is used for the role id.
+    """
+    PREFIX = 'client.'
+    testdir = teuthology.get_testdir(ctx)
+    id_ = role[len(PREFIX):]
+    (remote,) = ctx.cluster.only(role).remotes.iterkeys()
+    mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+    # Is there any reason why this is not: join(mnt, role) ?
+    client = os.path.join(mnt, 'client.{id}'.format(id=id_))
+    try:
+        remote.run(
+            args=[
+                'rm',
+                '-rf',
+                '--',
+                client,
+                ],
+            )
+        log.info("Deleted dir {dir}".format(dir=client))
+    except Exception:
+        log.exception("Caught an exception deleting dir {dir}".format(dir=client))
+
+    try:
+        remote.run(
+            args=[
+                'rmdir',
+                '--',
+                mnt,
+                ],
+            )
+        log.info("Deleted dir {dir}".format(dir=mnt))
+    except Exception:
+        log.exception("Caught an exception deleting dir {dir}".format(dir=mnt))
+
+def _make_scratch_dir(ctx, role, subdir):
+    """
+    Make scratch directories for this role.  This also makes the mount
+    point if that directory does not exist.
+
+    :param ctx: Context
+    :param role: "role.#" where # is used for the role id.
+    :param subdir: use this subdir (False if not used)
+    """
+    retVal = False
+    PREFIX = 'client.'
+    id_ = role[len(PREFIX):]
+    log.debug("getting remote for {id} role {role_}".format(id=id_, role_=role))
+    (remote,) = ctx.cluster.only(role).remotes.iterkeys()
+    dir_owner = remote.shortname.split('@', 1)[0]
+    mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_))
+    # if neither kclient nor ceph-fuse are required for a workunit,
+    # mnt may not exist. Stat and create the directory if it doesn't.
+    try:
+        remote.run(
+            args=[
+                'stat',
+                '--',
+                mnt,
+                ],
+            )
+        log.info('Did not need to create dir {dir}'.format(dir=mnt))
+    except Exception:
+        remote.run(
+            args=[
+                'mkdir',
+                '--',
+                mnt,
+                ],
+            )
+        log.info('Created dir {dir}'.format(dir=mnt))
+        retVal = True
+
+    if not subdir: subdir = 'client.{id}'.format(id=id_)
+    if retVal:
+        remote.run(
+            args=[
+                'cd',
+                '--',
+                mnt,
+                run.Raw('&&'),
+                'mkdir',
+                '--',
+                subdir,
+                ],
+            )
+    else:
+        remote.run(
+            args=[
+                # cd first so this will fail if the mount point does
+                # not exist; pure install -d will silently do the
+                # wrong thing
+                'cd',
+                '--',
+                mnt,
+                run.Raw('&&'),
+                'sudo',
+                'install',
+                '-d',
+                '-m', '0755',
+                '--owner={user}'.format(user=dir_owner),
+                '--',
+                subdir,
+                ],
+            )
+
+    return retVal
+
+
+def _spawn_on_all_clients(ctx, refspec, tests, env, subdir, timeout=None):
+    """
+    Make a scratch directory for each client in the cluster, and then for each
+    test spawn _run_tests() for each role.
+
+    See run_tests() for parameter documentation.
+    """
+    client_generator = teuthology.all_roles_of_type(ctx.cluster, 'client')
+    client_remotes = list()
+    for client in client_generator:
+        (client_remote,) = ctx.cluster.only('client.{id}'.format(id=client)).remotes.iterkeys()
+        client_remotes.append((client_remote, 'client.{id}'.format(id=client)))
+        _make_scratch_dir(ctx, "client.{id}".format(id=client), subdir)
+
+    for unit in tests:
+        with parallel() as p:
+            for remote, role in client_remotes:
+                p.spawn(_run_tests, ctx, refspec, role, [unit], env, subdir,
+                        timeout=timeout)
+
+    # cleanup the generated client directories
+    client_generator = teuthology.all_roles_of_type(ctx.cluster, 'client')
+    for client in client_generator:
+        _delete_dir(ctx, 'client.{id}'.format(id=client))
+
+
+def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None):
+    """
+    Run the individual test. Create a scratch directory and then extract the
+    workunits from git. Make the executables, and then run the tests.
+    Clean up (remove files created) after the tests are finished.
+
+    :param ctx:     Context
+    :param refspec: branch, sha1, or version tag used to identify this
+                    build
+    :param tests:   specific tests specified.
+    :param env:     environment set in yaml file.  Could be None.
+    :param subdir:  subdirectory set in yaml file.  Could be None
+    :param timeout: If present, use the 'timeout' command on the remote host
+                    to limit execution time. Must be specified by a number
+                    followed by 's' for seconds, 'm' for minutes, 'h' for
+                    hours, or 'd' for days. If '0' or anything that evaluates
+                    to False is passed, the 'timeout' command is not used.
+    """
+    testdir = teuthology.get_testdir(ctx)
+    assert isinstance(role, basestring)
+    PREFIX = 'client.'
+    assert role.startswith(PREFIX)
+    id_ = role[len(PREFIX):]
+    (remote,) = ctx.cluster.only(role).remotes.iterkeys()
+    mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+    # subdir so we can remove and recreate this a lot without sudo
+    if subdir is None:
+        scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp')
+    else:
+        scratch_tmp = os.path.join(mnt, subdir)
+    srcdir = '{tdir}/workunit.{role}'.format(tdir=testdir, role=role)
+
+    remote.run(
+        logger=log.getChild(role),
+        args=[
+            'mkdir', '--', srcdir,
+            run.Raw('&&'),
+            'git',
+            'archive',
+            '--remote=git://ceph.newdream.net/git/ceph.git',
+            '%s:qa/workunits' % refspec,
+            run.Raw('|'),
+            'tar',
+            '-C', srcdir,
+            '-x',
+            '-f-',
+            run.Raw('&&'),
+            'cd', '--', srcdir,
+            run.Raw('&&'),
+            'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi',
+            run.Raw('&&'),
+            'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir),
+            run.Raw('>{tdir}/workunits.list'.format(tdir=testdir)),
+            ],
+        )
+
+    workunits = sorted(teuthology.get_file(
+                            remote,
+                            '{tdir}/workunits.list'.format(tdir=testdir)).split('\0'))
+    assert workunits
+
+    try:
+        assert isinstance(tests, list)
+        for spec in tests:
+            log.info('Running workunits matching %s on %s...', spec, role)
+            prefix = '{spec}/'.format(spec=spec)
+            to_run = [w for w in workunits if w == spec or w.startswith(prefix)]
+            if not to_run:
+                raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec))
+            for workunit in to_run:
+                log.info('Running workunit %s...', workunit)
+                args = [
+                    'mkdir', '-p', '--', scratch_tmp,
+                    run.Raw('&&'),
+                    'cd', '--', scratch_tmp,
+                    run.Raw('&&'),
+                    run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'),
+                    run.Raw('CEPH_REF={ref}'.format(ref=refspec)),
+                    run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
+                    run.Raw('CEPH_ID="{id}"'.format(id=id_)),
+                    ]
+                if env is not None:
+                    for var, val in env.iteritems():
+                        quoted_val = pipes.quote(val)
+                        env_arg = '{var}={val}'.format(var=var, val=quoted_val)
+                        args.append(run.Raw(env_arg))
+                args.extend([
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir)])
+                if timeout and timeout != '0':
+                    args.extend(['timeout', timeout])
+                args.extend([
+                    '{srcdir}/{workunit}'.format(
+                        srcdir=srcdir,
+                        workunit=workunit,
+                        ),
+                    ])
+                remote.run(
+                    logger=log.getChild(role),
+                    args=args,
+                    )
+                remote.run(
+                    logger=log.getChild(role),
+                    args=['sudo', 'rm', '-rf', '--', scratch_tmp],
+                    )
+    finally:
+        log.info('Stopping %s on %s...', spec, role)
+        remote.run(
+            logger=log.getChild(role),
+            args=[
+                'rm', '-rf', '--', '{tdir}/workunits.list'.format(tdir=testdir), srcdir,
+                ],
+            )
diff --git a/teuthology/__init__.py b/teuthology/__init__.py
deleted file mode 100644 (file)
index bd88d3c..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-from gevent import monkey
-monkey.patch_all(dns=False)
-from .orchestra import monkey
-monkey.patch_all()
-
-import logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s')
-log = logging.getLogger(__name__)
diff --git a/teuthology/beanstalk.py b/teuthology/beanstalk.py
deleted file mode 100644 (file)
index 34e831c..0000000
+++ /dev/null
@@ -1,122 +0,0 @@
-import beanstalkc
-import yaml
-import logging
-import sys
-from collections import OrderedDict
-
-from .config import config
-from . import report
-
-log = logging.getLogger(__name__)
-
-
-def connect():
-    host = config.queue_host
-    port = config.queue_port
-    if host is None or port is None:
-        raise RuntimeError(
-            'Beanstalk queue information not found in {conf_path}'.format(
-                conf_path=config.teuthology_yaml))
-    return beanstalkc.Connection(host=host, port=port)
-
-
-def watch_tube(connection, tube_name):
-    connection.watch(tube_name)
-    connection.ignore('default')
-
-
-def walk_jobs(connection, tube_name, callback, pattern=None):
-    """
-    def callback(jobs_dict)
-    """
-    log.info("Checking Beanstalk Queue...")
-    job_count = connection.stats_tube(tube_name)['current-jobs-ready']
-    if job_count == 0:
-        log.info('No jobs in Beanstalk Queue')
-        return
-
-    # Try to figure out a sane timeout based on how many jobs are in the queue
-    timeout = job_count / 2000.0 * 60
-    matching_jobs = OrderedDict()
-    for i in range(1, job_count + 1):
-        sys.stderr.write("{i}/{count}\r".format(i=i, count=job_count))
-        sys.stderr.flush()
-        job = connection.reserve(timeout=timeout)
-        if job is None or job.body is None:
-            continue
-        job_config = yaml.safe_load(job.body)
-        job_name = job_config['name']
-        job_id = job.stats()['id']
-        if pattern is not None and pattern not in job_name:
-            continue
-        matching_jobs[job_id] = job
-    sys.stderr.write('\n')
-    sys.stderr.flush()
-    callback(matching_jobs)
-
-
-def _print_matching_jobs(show_desc=False):
-    def print_matching_jobs(jobs_dict):
-        i = 0
-        job_count = len(jobs_dict)
-        for job_id, job in jobs_dict.iteritems():
-            i += 1
-            job_config = yaml.safe_load(job.body)
-            job_name = job_config['name']
-            job_desc = job_config['description']
-            job_id = job.stats()['id']
-            print 'Job: {i}/{count} {job_name}/{job_id}'.format(
-                i=i,
-                count=job_count,
-                job_id=job_id,
-                job_name=job_name,
-                )
-            if job_desc and show_desc:
-                for desc in job_desc.split():
-                    print '\t {desc}'.format(desc=desc)
-    return print_matching_jobs
-
-
-def delete_matching_jobs(jobs_dict):
-    for job_id, job in jobs_dict.iteritems():
-        job_config = yaml.safe_load(job.body)
-        job_name = job_config['name']
-        job_id = job.stats()['id']
-        print 'Deleting {job_id}/{job_name}'.format(
-            job_id=job_id,
-            job_name=job_name,
-            )
-        job.delete()
-        report.try_delete_jobs(job_name, job_id)
-
-
-def print_matching_runs(jobs_dict):
-    runs = set()
-    for job_id, job in jobs_dict.iteritems():
-        job_config = yaml.safe_load(job.body)
-        runs.add(job_config['name'])
-    for run in runs:
-        print run
-
-
-def main(args):
-    machine_type = args['--machine_type']
-    delete = args['--delete']
-    runs = args['--runs']
-    show_desc = args['--description']
-    try:
-        connection = connect()
-        watch_tube(connection, machine_type)
-        if delete:
-            walk_jobs(connection, machine_type,
-                      delete_matching_jobs)
-        elif runs:
-            walk_jobs(connection, machine_type,
-                      print_matching_runs)
-        else:
-            walk_jobs(connection, machine_type,
-                      _print_matching_jobs(show_desc))
-    except KeyboardInterrupt:
-        log.info("Interrupted.")
-    finally:
-        connection.close()
diff --git a/teuthology/ceph.conf.template b/teuthology/ceph.conf.template
deleted file mode 100644 (file)
index 648f1de..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-[global]
-       chdir = ""
-       pid file = $name.pid
-        auth supported = cephx
-
-       filestore xattr use omap = true
-
-       mon clock drift allowed = .500
-
-       osd crush chooseleaf type = 0
-        auth debug = true
-
-       ms die on old message = true
-
-       mon pg warn min per osd = 5
-
-       osd pool default size = 2
-
-       mon osd allow primary affinity = true
-
-        osd pool default erasure code profile = "plugin=jerasure technique=reed_sol_van k=2 m=1 ruleset-failure-domain=osd"
-
-[osd]
-        osd journal size = 100
-
-        osd scrub load threshold = 5.0
-       osd scrub max interval = 600
-
-       osd recover clone overlap = true
-       osd recovery max chunk = 1048576
-
-        osd debug op order = true
-        osd debug verify stray on activate = true
-
-       osd open classes on start = true
-        osd debug pg log writeout = true
-
-
-[mon]
-       debug ms = 1
-       debug mon = 20
-       debug paxos = 20
-       mon data avail warn = 5
-
-[mds]
-        mds debug scatterstat = true
-        mds verify scatter = true
-        mds debug frag = true
-
-[client]
-        rgw cache enabled = true
-       rgw enable ops log = true
-       rgw enable usage log = true
-       log file = /var/log/ceph/ceph-$name.$pid.log
-       admin socket = /var/run/ceph/ceph-$name.$pid.asok
-
-
-       client mount timeout = 600
\ No newline at end of file
diff --git a/teuthology/config.py b/teuthology/config.py
deleted file mode 100644 (file)
index 017bb67..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-import os
-import yaml
-import logging
-
-log = logging.getLogger(__name__)
-
-
-class Config(object):
-    """
-    This class is intended to unify teuthology's many configuration files and
-    objects. Currently it serves as a convenient interface to
-    ~/.teuthology.yaml and nothing else.
-    """
-    teuthology_yaml = os.path.join(os.environ['HOME'], '.teuthology.yaml')
-    defaults = {
-        'archive_base': '/var/lib/teuthworker/archive',
-        'ceph_git_base_url': 'https://github.com/ceph/',
-        'lock_server': 'http://teuthology.front.sepia.ceph.com/locker/lock',
-        'max_job_time': 259200,  # 3 days
-        'verify_host_keys': True,
-        'watchdog_interval': 600,
-    }
-
-    def __init__(self):
-        self.load_files()
-
-    def load_files(self):
-        if os.path.exists(self.teuthology_yaml):
-            self.__conf = yaml.safe_load(file(self.teuthology_yaml))
-        else:
-            log.debug("%s not found", self.teuthology_yaml)
-            self.__conf = {}
-
-    def __getattr__(self, name):
-        return self.__conf.get(name, self.defaults.get(name))
-
-    def __setattribute__(self, name, value):
-        if name.endswith('__conf'):
-            setattr(self, name, value)
-        else:
-            self.__conf[name] = value
-
-config = Config()
diff --git a/teuthology/contextutil.py b/teuthology/contextutil.py
deleted file mode 100644 (file)
index 99b694c..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-import contextlib
-import sys
-import logging
-import time
-import itertools
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def nested(*managers):
-    """
-    Like contextlib.nested but takes callables returning context
-    managers, to avoid the major reason why contextlib.nested was
-    deprecated.
-
-    This version also logs any exceptions early, much like run_tasks,
-    to ease debugging. TODO combine nested and run_tasks.
-    """
-    exits = []
-    vars = []
-    exc = (None, None, None)
-    try:
-        for mgr_fn in managers:
-            mgr = mgr_fn()
-            exit = mgr.__exit__
-            enter = mgr.__enter__
-            vars.append(enter())
-            exits.append(exit)
-        yield vars
-    except Exception:
-        log.exception('Saw exception from nested tasks')
-        exc = sys.exc_info()
-    finally:
-        while exits:
-            exit = exits.pop()
-            try:
-                if exit(*exc):
-                    exc = (None, None, None)
-            except Exception:
-                exc = sys.exc_info()
-        if exc != (None, None, None):
-            # Don't rely on sys.exc_info() still containing
-            # the right information. Another exception may
-            # have been raised and caught by an exit method
-            raise exc[0], exc[1], exc[2]
-
-
-class MaxWhileTries(Exception):
-    pass
-
-
-class safe_while(object):
-    """
-    A context manager to remove boiler plate code that deals with `while` loops
-    that need a given number of tries and some seconds to sleep between each
-    one of those tries.
-
-    The most simple example possible will try 10 times sleeping for 6 seconds:
-
-        >>> from teuthology.contexutil import safe_while
-        >>> with safe_while() as proceed:
-        ...    while proceed():
-        ...        # repetitive code here
-        ...        print "hello world"
-        ...
-        Traceback (most recent call last):
-        ...
-        MaxWhileTries: reached maximum tries (5) after waiting for 75 seconds
-
-    Yes, this adds yet another level of indentation but it allows you to
-    implement while loops exactly the same as before with just 1 more
-    indentation level and one extra call. Everything else stays the same,
-    code-wise. So adding this helper to existing code is simpler.
-
-    :param sleep:     The amount of time to sleep between tries. Default 6
-    :param increment: The amount to add to the sleep value on each try.
-                      Default 0.
-    :param tries:     The amount of tries before giving up. Default 10.
-    :param action:    The name of the action being attempted. Default none.
-    :param _raise:    Whether to raise an exception (or log a warning).
-                      Default True.
-    :param _sleeper:  The function to use to sleep. Only used for testing.
-                      Default time.sleep
-    """
-
-    def __init__(self, sleep=6, increment=0, tries=10, action=None,
-                 _raise=True, _sleeper=None):
-        self.sleep = sleep
-        self.increment = increment
-        self.tries = tries
-        self.counter = 0
-        self.sleep_current = sleep
-        self.action = action
-        self._raise = _raise
-        self.sleeper = _sleeper or time.sleep
-
-    def _make_error_msg(self):
-        """
-        Sum the total number of seconds we waited while providing the number
-        of tries we attempted
-        """
-        total_seconds_waiting = sum(
-            itertools.islice(
-                itertools.count(self.sleep, self.increment),
-                self.tries
-            )
-        )
-        msg = 'reached maximum tries ({tries})' + \
-            ' after waiting for {total} seconds'
-        if self.action:
-            msg = "'{action}'" + msg
-
-        msg = msg.format(
-            action=self.action,
-            tries=self.tries,
-            total=total_seconds_waiting,
-        )
-        return msg
-
-    def __call__(self):
-        self.counter += 1
-        if self.counter == 1:
-            return True
-        if self.counter > self.tries:
-            error_msg = self._make_error_msg()
-            if self._raise:
-                raise MaxWhileTries(error_msg)
-            else:
-                log.warning(error_msg)
-                return False
-        self.sleeper(self.sleep_current)
-        self.sleep_current += self.increment
-        return True
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        return False
diff --git a/teuthology/coverage.py b/teuthology/coverage.py
deleted file mode 100644 (file)
index 3ad8aad..0000000
+++ /dev/null
@@ -1,211 +0,0 @@
-from contextlib import closing
-import logging
-import os
-import shutil
-import subprocess
-import MySQLdb
-import yaml
-
-import teuthology
-from teuthology.misc import read_config
-
-log = logging.getLogger(__name__)
-
-"""
-The coverage database can be created in mysql with:
-
-CREATE TABLE `coverage` (
-  `run_id` bigint(20) NOT NULL AUTO_INCREMENT,
-  `rev` char(40) NOT NULL,
-  `test` varchar(255) NOT NULL,
-  `suite` varchar(255) NOT NULL,
-  `lines` int(10) unsigned NOT NULL,
-  `line_cov` float unsigned NOT NULL,
-  `functions` int(10) unsigned NOT NULL,
-  `function_cov` float unsigned NOT NULL,
-  `branches` int(10) unsigned NOT NULL,
-  `branch_cov` float unsigned NOT NULL,
-  `run_date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
-  PRIMARY KEY (`run_id`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8
-
-"""
-
-
-def connect_to_db(ctx):
-    db = MySQLdb.connect(
-        host=ctx.teuthology_config['coverage_db_host'],
-        user=ctx.teuthology_config['coverage_db_user'],
-        db=ctx.teuthology_config['coverage_db_name'],
-        passwd=ctx.teuthology_config['coverage_db_password'],
-    )
-    db.autocommit(False)
-    return db
-
-
-def store_coverage(ctx, test_coverage, rev, suite):
-    with closing(connect_to_db(ctx)) as db:
-        rows = []
-        for test, coverage in test_coverage.iteritems():
-            flattened_cov = [item for sublist in coverage for item in sublist]
-            rows.append([rev, test, suite] + flattened_cov)
-        log.debug('inserting rows into db: %s', str(rows))
-        try:
-            cursor = db.cursor()
-            cursor.executemany(
-                'INSERT INTO `coverage`'
-                ' (`rev`, `test`, `suite`, `lines`, `line_cov`, `functions`,'
-                ' `function_cov`, `branches`, `branch_cov`)'
-                ' VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)',
-                rows)
-        except Exception:
-            log.exception('error updating database')
-            db.rollback()
-            raise
-        else:
-            db.commit()
-            log.info('added coverage to database')
-        finally:
-            cursor.close()
-
-
-def read_coverage(output):
-    log.debug('reading coverage from output: %s', output)
-    coverage = [None, None, None]
-    prefixes = ['  lines......: ', '  functions..: ', '  branches...: ']
-    for line in reversed(output.splitlines()):
-        for i, prefix in enumerate(prefixes):
-            if line.startswith(prefix):
-                if '%' in line:
-                    cov_num = int(line[line.find('(') + 1:line.find(' of')])
-                    cov_percent = float(line[len(prefix):line.find('%')])
-                    coverage[i] = (cov_num, cov_percent)
-                else:
-                    # may have no data for e.g. branches on the initial run
-                    coverage[i] = (None, None)
-                break
-        if None not in coverage:
-            break
-    return coverage
-
-
-def main(args):
-    if args.verbose:
-        teuthology.log.setLevel(logging.DEBUG)
-
-    log = logging.getLogger(__name__)
-
-    read_config(args)
-
-    handler = logging.FileHandler(
-        filename=os.path.join(args.test_dir, 'coverage.log'),
-    )
-    formatter = logging.Formatter(
-        fmt='%(asctime)s.%(msecs)03d %(levelname)s:%(message)s',
-        datefmt='%Y-%m-%dT%H:%M:%S',
-    )
-    handler.setFormatter(formatter)
-    logging.getLogger().addHandler(handler)
-
-    try:
-        analyze(args)
-    except Exception:
-        log.exception('error generating coverage')
-        raise
-
-
-def analyze(args):
-    tests = [
-        f for f in sorted(os.listdir(args.test_dir))
-        if not f.startswith('.')
-        and os.path.isdir(os.path.join(args.test_dir, f))
-        and os.path.exists(os.path.join(args.test_dir, f, 'summary.yaml'))
-        and os.path.exists(os.path.join(args.test_dir, f, 'ceph-sha1'))]
-
-    test_summaries = {}
-    for test in tests:
-        summary = {}
-        with file(os.path.join(args.test_dir, test, 'summary.yaml')) as f:
-            g = yaml.safe_load_all(f)
-            for new in g:
-                summary.update(new)
-
-        if summary['flavor'] != 'gcov':
-            log.debug('Skipping %s, since it does not include coverage', test)
-            continue
-        test_summaries[test] = summary
-
-    assert len(test_summaries) > 0
-
-    suite = os.path.basename(args.test_dir)
-
-    # only run cov-init once.
-    # this only works if all tests were run against the same version.
-    if not args.skip_init:
-        log.info('initializing coverage data...')
-        subprocess.check_call(
-            args=[
-                os.path.join(args.cov_tools_dir, 'cov-init.sh'),
-                os.path.join(args.test_dir, tests[0]),
-                args.lcov_output,
-                os.path.join(
-                    args.teuthology_config['ceph_build_output_dir'],
-                    '{suite}.tgz'.format(suite=suite),
-                ),
-            ])
-        shutil.copy(
-            os.path.join(args.lcov_output, 'base.lcov'),
-            os.path.join(args.lcov_output, 'total.lcov')
-        )
-
-    test_coverage = {}
-    for test, summary in test_summaries.iteritems():
-        lcov_file = '{name}.lcov'.format(name=test)
-
-        log.info('analyzing coverage for %s', test)
-        proc = subprocess.Popen(
-            args=[
-                os.path.join(args.cov_tools_dir, 'cov-analyze.sh'),
-                '-t', os.path.join(args.test_dir, test),
-                '-d', args.lcov_output,
-                '-o', test,
-            ],
-            stdout=subprocess.PIPE,
-        )
-        output, _ = proc.communicate()
-        desc = summary.get('description', test)
-        test_coverage[desc] = read_coverage(output)
-
-        log.info('adding %s data to total', test)
-        proc = subprocess.Popen(
-            args=[
-                'lcov',
-                '-a', os.path.join(args.lcov_output, lcov_file),
-                '-a', os.path.join(args.lcov_output, 'total.lcov'),
-                '-o', os.path.join(args.lcov_output, 'total_tmp.lcov'),
-            ],
-            stdout=subprocess.PIPE,
-        )
-        output, _ = proc.communicate()
-
-        os.rename(
-            os.path.join(args.lcov_output, 'total_tmp.lcov'),
-            os.path.join(args.lcov_output, 'total.lcov')
-        )
-
-    coverage = read_coverage(output)
-    test_coverage['total for {suite}'.format(suite=suite)] = coverage
-    log.debug('total coverage is %s', str(coverage))
-
-    if args.html_output:
-        subprocess.check_call(
-            args=[
-                'genhtml',
-                '-s',
-                '-o', os.path.join(args.html_output, 'total'),
-                '-t', 'Total for {suite}'.format(suite=suite),
-                '--',
-                os.path.join(args.lcov_output, 'total.lcov'),
-            ])
-
-    store_coverage(args, test_coverage, summary['ceph-sha1'], suite)
diff --git a/teuthology/kill.py b/teuthology/kill.py
deleted file mode 100755 (executable)
index d090ff4..0000000
+++ /dev/null
@@ -1,241 +0,0 @@
-#!/usr/bin/python
-import os
-import sys
-import beanstalkc
-import yaml
-import psutil
-import subprocess
-import tempfile
-import logging
-import getpass
-
-from . import report
-from .config import config
-
-log = logging.getLogger(__name__)
-
-
-def main(args):
-    run_name = args['--run']
-    job = args['--job']
-    archive_base = args['--archive']
-    owner = args['--owner']
-    machine_type = args['--machine_type']
-    preserve_queue = args['--preserve-queue']
-
-    if job:
-        for job_id in job:
-            kill_job(run_name, job_id, archive_base, owner, machine_type)
-    else:
-        kill_run(run_name, archive_base, owner, machine_type,
-                 preserve_queue=preserve_queue)
-
-
-def kill_run(run_name, archive_base=None, owner=None, machine_type=None,
-             preserve_queue=False):
-    run_info = {}
-    if archive_base:
-        run_archive_dir = os.path.join(archive_base, run_name)
-        if os.path.isdir(run_archive_dir):
-            run_info = find_run_info(run_archive_dir)
-            machine_type = run_info['machine_type']
-            owner = run_info['owner']
-        elif machine_type is None:
-            raise RuntimeError("The run is still entirely enqueued; " +
-                               "you must also pass --machine-type")
-
-    if not preserve_queue:
-        remove_beanstalk_jobs(run_name, machine_type)
-        remove_paddles_jobs(run_name)
-    kill_processes(run_name, run_info.get('pids'))
-    if owner is not None:
-        targets = find_targets(run_name, owner)
-        nuke_targets(targets, owner)
-
-
-def kill_job(run_name, job_id, archive_base=None, owner=None,
-             machine_type=None):
-    job_archive_dir = os.path.join(archive_base, run_name, job_id)
-    job_info = find_job_info(job_archive_dir)
-    owner = job_info['owner']
-    kill_processes(run_name, [job_info.get('pid')])
-    targets = dict(targets=job_info.get('targets', {}))
-    nuke_targets(targets, owner)
-
-
-def find_run_info(run_archive_dir):
-    log.info("Assembling run information...")
-    run_info_fields = [
-        'machine_type',
-        'owner',
-    ]
-
-    run_info = dict(pids=[])
-    job_info = {}
-    for job_id in os.listdir(run_archive_dir):
-        job_dir = os.path.join(run_archive_dir, job_id)
-        if not os.path.isdir(job_dir):
-            continue
-        job_info = find_job_info(job_dir)
-        for key in job_info.keys():
-            if key in run_info_fields and key not in run_info:
-                run_info[key] = job_info[key]
-        if 'pid' in job_info:
-            run_info['pids'].append(job_info['pid'])
-    return run_info
-
-
-def find_job_info(job_archive_dir):
-    job_info = {}
-
-    info_file = os.path.join(job_archive_dir, 'info.yaml')
-    if os.path.isfile(info_file):
-        job_info.update(yaml.safe_load(open(info_file, 'r')))
-
-    conf_file = os.path.join(job_archive_dir, 'config.yaml')
-    if os.path.isfile(conf_file):
-        job_info.update(yaml.safe_load(open(conf_file, 'r')))
-    else:
-        conf_file = os.path.join(job_archive_dir, 'orig.config.yaml')
-        if os.path.isfile(conf_file):
-            log.debug("config.yaml not found but orig.config.yaml found")
-            job_info.update(yaml.safe_load(open(conf_file, 'r')))
-
-    return job_info
-
-
-def remove_paddles_jobs(run_name):
-    jobs = report.ResultsReporter().get_jobs(run_name, fields=['status'])
-    job_ids = [job['job_id'] for job in jobs if job['status'] == 'queued']
-    if job_ids:
-        log.info("Deleting jobs from paddles: %s", str(job_ids))
-        report.try_delete_jobs(run_name, job_ids)
-
-
-def remove_beanstalk_jobs(run_name, tube_name):
-    qhost = config.queue_host
-    qport = config.queue_port
-    if qhost is None or qport is None:
-        raise RuntimeError(
-            'Beanstalk queue information not found in {conf_path}'.format(
-                conf_path=config.teuthology_yaml))
-    log.info("Checking Beanstalk Queue...")
-    beanstalk = beanstalkc.Connection(host=qhost, port=qport)
-    beanstalk.watch(tube_name)
-    beanstalk.ignore('default')
-
-    curjobs = beanstalk.stats_tube(tube_name)['current-jobs-ready']
-    if curjobs != 0:
-        x = 1
-        while x != curjobs:
-            x += 1
-            job = beanstalk.reserve(timeout=20)
-            if job is None:
-                continue
-            job_config = yaml.safe_load(job.body)
-            if run_name == job_config['name']:
-                job_id = job.stats()['id']
-                msg = "Deleting job from queue. ID: " + \
-                    "{id} Name: {name} Desc: {desc}".format(
-                        id=str(job_id),
-                        name=job_config['name'],
-                        desc=job_config['description'],
-                    )
-                log.info(msg)
-                job.delete()
-    else:
-        print "No jobs in Beanstalk Queue"
-    beanstalk.close()
-
-
-def kill_processes(run_name, pids=None):
-    if pids:
-        to_kill = set(pids).intersection(psutil.pids())
-    else:
-        to_kill = find_pids(run_name)
-
-    if len(to_kill) == 0:
-        log.info("No teuthology processes running")
-    else:
-        log.info("Killing Pids: " + str(to_kill))
-        for pid in to_kill:
-            args = ['kill', str(pid)]
-            # Don't attempt to use sudo if it's not necessary
-            proc_user = psutil.Process(int(pid)).username()
-            if proc_user != getpass.getuser():
-                args.insert(0, 'sudo')
-            subprocess.call(args)
-
-
-def process_matches_run(pid, run_name):
-    try:
-        p = psutil.Process(pid)
-        cmd = p.cmdline()
-        if run_name in cmd and sys.argv[0] not in cmd:
-            return True
-    except psutil.NoSuchProcess:
-        pass
-    return False
-
-
-def find_pids(run_name):
-    run_pids = []
-    for pid in psutil.pids():
-        if process_matches_run(pid, run_name):
-            run_pids.append(pid)
-    return run_pids
-
-
-def find_targets(run_name, owner):
-    lock_args = [
-        'teuthology-lock',
-        '--list-targets',
-        '--desc-pattern',
-        '/' + run_name + '/',
-        '--status',
-        'up',
-        '--owner',
-        owner
-    ]
-    proc = subprocess.Popen(lock_args, stdout=subprocess.PIPE)
-    stdout, stderr = proc.communicate()
-    out_obj = yaml.safe_load(stdout)
-    if not out_obj or 'targets' not in out_obj:
-        return {}
-
-    return out_obj
-
-
-def nuke_targets(targets_dict, owner):
-    targets = targets_dict.get('targets')
-    if not targets:
-        log.info("No locked machines. Not nuking anything")
-
-    to_nuke = []
-    for target in targets:
-        to_nuke.append(target.split('@')[1].split('.')[0])
-
-    target_file = tempfile.NamedTemporaryFile(delete=False)
-    target_file.write(yaml.safe_dump(targets_dict))
-    target_file.close()
-
-    log.info("Nuking machines: " + str(to_nuke))
-    nuke_args = [
-        'teuthology-nuke',
-        '-t',
-        target_file.name,
-        '--unlock',
-        '-r',
-        '--owner',
-        owner
-    ]
-    proc = subprocess.Popen(
-        nuke_args,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT)
-    for line in iter(proc.stdout.readline, ''):
-        line = line.replace('\r', '').replace('\n', '')
-        log.info(line)
-        sys.stdout.flush()
-
-    os.unlink(target_file.name)
diff --git a/teuthology/lock.py b/teuthology/lock.py
deleted file mode 100644 (file)
index fafe0b8..0000000
+++ /dev/null
@@ -1,532 +0,0 @@
-import argparse
-import json
-import logging
-import subprocess
-import urllib
-import yaml
-import re
-import collections
-import tempfile
-import os
-import time
-
-import teuthology
-from .config import config
-from . import lockstatus as ls
-from . import misc
-from teuthology.misc import get_distro
-from teuthology.misc import get_distro_version
-
-log = logging.getLogger(__name__)
-
-
-def lock_many(ctx, num, machinetype, user=None, description=None):
-    machinetypes = misc.get_multi_machine_types(machinetype)
-    if user is None:
-        user = misc.get_user()
-    for machinetype in machinetypes:
-        success, content, status = ls.send_request(
-            'POST',
-            config.lock_server,
-            urllib.urlencode(
-                dict(
-                    user=user,
-                    num=num,
-                    machinetype=machinetype,
-                    desc=description,
-                )))
-        if success:
-            machines = json.loads(content)
-            log.debug('locked {machines}'.format(
-                machines=', '.join(machines.keys())))
-            if machinetype == 'vps':
-                ok_machs = {}
-                for machine in machines:
-                    if create_if_vm(ctx, machine):
-                        ok_machs[machine] = machines[machine]
-                    else:
-                        log.error('Unable to create virtual machine: %s' % machine)
-                        unlock_one(ctx, machine)
-                return ok_machs
-            return machines
-        if status == 503:
-            log.error('Insufficient nodes available to lock %d %s nodes.', num,machinetype)
-        else:
-            log.error('Could not lock %d %s nodes, reason: unknown.', num, machinetype)
-    return []
-
-
-def lock_one(ctx, name, user=None, description=None):
-    if user is None:
-        user = misc.get_user()
-    success, _, _ = ls.send_request(
-        'POST',
-        config.lock_server + '/' + name,
-        urllib.urlencode(dict(user=user, desc=description)))
-    if success:
-        log.debug('locked %s as %s', name, user)
-    else:
-        log.error('failed to lock %s', name)
-    return success
-
-
-def unlock_one(ctx, name, user=None):
-    if user is None:
-        user = misc.get_user()
-    success, _, http_ret = ls.send_request(
-        'DELETE',
-        config.lock_server + '/' + name + '?' +
-        urllib.urlencode(dict(user=user)))
-    if success:
-        log.debug('unlocked %s', name)
-        if not destroy_if_vm(ctx, name):
-            log.error('downburst destroy failed for %s', name)
-            log.info('%s is not locked' % name)
-    else:
-        log.error('failed to unlock %s', name)
-        failure_types = {403: 'You do not have %s locked',
-                         404: '%s is an invalid host name'}
-        if http_ret in failure_types:
-            log.error(failure_types[http_ret], name)
-    return success
-
-
-def list_locks():
-    success, content, _ = ls.send_request('GET', config.lock_server)
-    if success:
-        return json.loads(content)
-    return None
-
-
-def update_lock(ctx, name, description=None, status=None, sshpubkey=None):
-    status_info = ls.get_status(ctx, name)
-    phys_host = status_info['vpshost']
-    if phys_host:
-        keyscan_out = ''
-        while not keyscan_out:
-            time.sleep(10)
-            keyscan_out, _ = keyscan_check(ctx, [name])
-    updated = {}
-    if description is not None:
-        updated['desc'] = description
-    if status is not None:
-        updated['status'] = status
-    if sshpubkey is not None:
-        updated['sshpubkey'] = sshpubkey
-
-    if updated:
-        success, _, _ = ls.send_request(
-            'PUT',
-            config.lock_server + '/' + name,
-            body=urllib.urlencode(updated),
-            headers={'Content-type': 'application/x-www-form-urlencoded'})
-        return success
-    return True
-
-
-def canonicalize_hostname(s):
-    if re.match('ubuntu@.*\.front\.sepia\.ceph\.com', s) is None:
-        s = 'ubuntu@' + s + '.front.sepia.ceph.com'
-    return s
-
-
-def main(ctx):
-    if ctx.verbose:
-        teuthology.log.setLevel(logging.DEBUG)
-
-    misc.read_config(ctx)
-
-    ret = 0
-    user = ctx.owner
-    machines = [canonicalize_hostname(m) for m in ctx.machines]
-    machines_to_update = []
-
-    if ctx.targets:
-        try:
-            with file(ctx.targets) as f:
-                g = yaml.safe_load_all(f)
-                for new in g:
-                    if 'targets' in new:
-                        for t in new['targets'].iterkeys():
-                            machines.append(t)
-        except IOError as e:
-            raise argparse.ArgumentTypeError(str(e))
-
-    if ctx.f:
-        assert ctx.lock or ctx.unlock, \
-            '-f is only supported by --lock and --unlock'
-    if machines:
-        assert ctx.lock or ctx.unlock or ctx.list or ctx.list_targets \
-            or ctx.update, \
-            'machines cannot be specified with that operation'
-    else:
-        assert ctx.num_to_lock or ctx.list or ctx.list_targets or \
-            ctx.summary or ctx.brief, \
-            'machines must be specified for that operation'
-    if ctx.all:
-        assert ctx.list or ctx.list_targets or ctx.brief, \
-            '--all can only be used with --list, --list-targets, and --brief'
-        assert ctx.owner is None, \
-            '--all and --owner are mutually exclusive'
-        assert not machines, \
-            '--all and listing specific machines are incompatible'
-    if ctx.num_to_lock:
-        assert ctx.machine_type, \
-            'must specify machine type to lock'
-
-    if ctx.brief or ctx.list or ctx.list_targets:
-        assert ctx.desc is None, '--desc does nothing with --list/--brief'
-
-        if machines:
-            statuses = []
-            for machine in machines:
-                status = ls.get_status(ctx, machine)
-                if status:
-                    statuses.append(status)
-                else:
-                    log.error("Lockserver doesn't know about machine: %s" %
-                              machine)
-        else:
-            statuses = list_locks()
-        vmachines = []
-
-        for vmachine in statuses:
-            if vmachine['vpshost']:
-                if vmachine['locked']:
-                    vmachines.append(vmachine['name'])
-        if vmachines:
-            # Avoid ssh-keyscans for everybody when listing all machines
-            # Listing specific machines will update the keys.
-            if machines:
-                scan_for_locks(ctx, vmachines)
-                statuses = [ls.get_status(ctx, machine)
-                            for machine in machines]
-            else:
-                statuses = list_locks()
-        if statuses:
-            if ctx.machine_type:
-                statuses = [_status for _status in statuses
-                            if _status['type'] == ctx.machine_type]
-            if not machines and ctx.owner is None and not ctx.all:
-                ctx.owner = misc.get_user()
-            if ctx.owner is not None:
-                statuses = [_status for _status in statuses
-                            if _status['locked_by'] == ctx.owner]
-            if ctx.status is not None:
-                statuses = [_status for _status in statuses
-                            if _status['up'] == (ctx.status == 'up')]
-            if ctx.locked is not None:
-                statuses = [_status for _status in statuses
-                            if _status['locked'] == (ctx.locked == 'true')]
-            if ctx.desc is not None:
-                statuses = [_status for _status in statuses
-                            if _status['description'] == ctx.desc]
-            if ctx.desc_pattern is not None:
-                statuses = [_status for _status in statuses
-                            if _status['description'] is not None and
-                            _status['description'].find(ctx.desc_pattern) >= 0]
-            if ctx.list:
-                    print json.dumps(statuses, indent=4)
-
-            elif ctx.brief:
-                for s in statuses:
-                    locked = "un" if s['locked'] == 0 else "  "
-                    mo = re.match('\w+@(\w+?)\..*', s['name'])
-                    host = mo.group(1) if mo else s['name']
-                    print '{host} {locked}locked {owner} "{desc}"'.format(
-                        locked=locked, host=host,
-                        owner=s['locked_by'], desc=s['description'])
-
-            else:
-                frag = {'targets': {}}
-                for f in statuses:
-                    frag['targets'][f['name']] = f['sshpubkey']
-                print yaml.safe_dump(frag, default_flow_style=False)
-        else:
-            log.error('error retrieving lock statuses')
-            ret = 1
-
-    elif ctx.summary:
-        do_summary(ctx)
-        return 0
-
-    elif ctx.lock:
-        for machine in machines:
-            if not lock_one(ctx, machine, user):
-                ret = 1
-                if not ctx.f:
-                    return ret
-            else:
-                machines_to_update.append(machine)
-                create_if_vm(ctx, machine)
-    elif ctx.unlock:
-        for machine in machines:
-            if not unlock_one(ctx, machine, user):
-                ret = 1
-                if not ctx.f:
-                    return ret
-            else:
-                machines_to_update.append(machine)
-    elif ctx.num_to_lock:
-        result = lock_many(ctx, ctx.num_to_lock, ctx.machine_type, user)
-        if not result:
-            ret = 1
-        else:
-            machines_to_update = result.keys()
-            if ctx.machine_type == 'vps':
-                shortnames = ' '.join(
-                    [name.split('@')[1].split('.')[0]
-                        for name in result.keys()]
-                )
-                if len(result) < ctx.num_to_lock:
-                    log.error("Locking failed.")
-                    for machn in result:
-                        unlock_one(ctx, machn)
-                    ret = 1
-                else:
-                    log.info("Successfully Locked:\n%s\n" % shortnames)
-                    log.info(
-                        "Unable to display keys at this time (virtual " +
-                        "machines are booting).")
-                    log.info(
-                        "Please run teuthology-lock --list-targets %s once " +
-                        "these machines come up.",
-                        shortnames)
-            else:
-                print yaml.safe_dump(
-                    dict(targets=result),
-                    default_flow_style=False)
-    elif ctx.update:
-        assert ctx.desc is not None or ctx.status is not None, \
-            'you must specify description or status to update'
-        assert ctx.owner is None, 'only description and status may be updated'
-        machines_to_update = machines
-
-    if ctx.desc is not None or ctx.status is not None:
-        for machine in machines_to_update:
-            update_lock(ctx, machine, ctx.desc, ctx.status)
-
-    return ret
-
-
-def updatekeys(ctx):
-    loglevel = logging.INFO
-    if ctx.verbose:
-        loglevel = logging.DEBUG
-
-    logging.basicConfig(
-        level=loglevel,
-    )
-
-    misc.read_config(ctx)
-
-    machines = [canonicalize_hostname(m) for m in ctx.machines]
-
-    if ctx.targets:
-        try:
-            with file(ctx.targets) as f:
-                g = yaml.safe_load_all(f)
-                for new in g:
-                    if 'targets' in new:
-                        for t in new['targets'].iterkeys():
-                            machines.append(t)
-        except IOError as e:
-            raise argparse.ArgumentTypeError(str(e))
-
-    return scan_for_locks(ctx, machines)
-
-
-def keyscan_check(ctx, machines):
-    locks = list_locks()
-    current_locks = {}
-    for lock in locks:
-        current_locks[lock['name']] = lock
-
-    if hasattr(ctx, 'all'):
-        if ctx.all:
-            machines = current_locks.keys()
-
-    for i, machine in enumerate(machines):
-        if '@' in machine:
-            _, machines[i] = machine.rsplit('@')
-    args = ['ssh-keyscan', '-t', 'rsa']
-    args.extend(machines)
-    p = subprocess.Popen(
-        args=args,
-        stdout=subprocess.PIPE,
-    )
-    out, err = p.communicate()
-    return (out, current_locks)
-
-
-def update_keys(ctx, out, current_locks):
-    ret = 0
-    for key_entry in out.splitlines():
-        hostname, pubkey = key_entry.split(' ', 1)
-        # TODO: separate out user
-        full_name = 'ubuntu@{host}'.format(host=hostname)
-        log.info('Checking %s', full_name)
-        assert full_name in current_locks, 'host is not in the database!'
-        if current_locks[full_name]['sshpubkey'] != pubkey:
-            log.info('New key found. Updating...')
-            if not update_lock(ctx, full_name, sshpubkey=pubkey):
-                log.error('failed to update %s!', full_name)
-                ret = 1
-    return ret
-
-
-def scan_for_locks(ctx, machines):
-    out, current_locks = keyscan_check(ctx, machines)
-    return update_keys(ctx, out, current_locks)
-
-
-def do_summary(ctx):
-    lockd = collections.defaultdict(lambda: [0, 0, 'unknown'])
-    for l in list_locks():
-        if ctx.machine_type and l['type'] != ctx.machine_type:
-            continue
-        who = l['locked_by'] if l['locked'] == 1 else '(free)', l['type']
-        lockd[who][0] += 1
-        lockd[who][1] += l['up']         # up is 1 or 0
-        lockd[who][2] = l['type']
-
-    locks = sorted([p for p in lockd.iteritems()
-                    ], key=lambda sort: (sort[1][2], sort[1][0]))
-    total_count, total_up = 0, 0
-    print "TYPE     COUNT  UP  OWNER"
-
-    for (owner, (count, upcount, machinetype)) in locks:
-            # if machinetype == spectype:
-            print "{machinetype:8s} {count:3d}  {up:3d}  {owner}".format(
-                count=count, up=upcount, owner=owner[0],
-                machinetype=machinetype)
-            total_count += count
-            total_up += upcount
-
-    print "         ---  ---"
-    print "{cnt:12d}  {up:3d}".format(cnt=total_count, up=total_up)
-
-
-def decanonicalize_hostname(s):
-    if re.match('ubuntu@.*\.front\.sepia\.ceph\.com', s):
-        s = s[len('ubuntu@'): -len('.front.sepia.ceph.com')]
-    return s
-
-
-def _get_downburst_exec():
-    """
-    First check for downburst in the user's path.
-    Then check in ~/src, ~ubuntu/src, and ~teuthology/src.
-    Return '' if no executable downburst is found.
-    """
-    path = os.environ.get('PATH', None)
-    if path:
-        for p in os.environ.get('PATH', '').split(os.pathsep):
-            pth = os.path.join(p, 'downburst')
-            if os.access(pth, os.X_OK):
-                return pth
-    import pwd
-    little_old_me = pwd.getpwuid(os.getuid()).pw_name
-    for user in [little_old_me, 'ubuntu', 'teuthology']:
-        pth = "/home/%s/src/downburst/virtualenv/bin/downburst" % user
-        if os.access(pth, os.X_OK):
-            return pth
-    return ''
-
-#
-# Use downburst to create a virtual machine
-#
-
-
-def create_if_vm(ctx, machine_name):
-    status_info = ls.get_status(ctx, machine_name)
-    phys_host = status_info['vpshost']
-    if not phys_host:
-        return False
-    os_type = get_distro(ctx)
-    os_version = get_distro_version(ctx)
-
-    createMe = decanonicalize_hostname(machine_name)
-    with tempfile.NamedTemporaryFile() as tmp:
-        try:
-            lfile = ctx.downburst_conf
-            with open(lfile) as downb_yaml:
-                lcnfg = yaml.safe_load(downb_yaml)
-                if lcnfg.keys() == ['downburst']:
-                    lcnfg = lcnfg['downburst']
-        except (TypeError, AttributeError):
-            if hasattr(ctx, 'config') and ctx.config is not None:
-                lcnfg = ctx.config.get('downburst', dict())
-            else:
-                lcnfg = {}
-        except IOError:
-            print "Error reading %s" % lfile
-            return False
-
-        distro = lcnfg.get('distro', os_type.lower())
-        distroversion = lcnfg.get('distroversion', os_version)
-
-        file_info = {}
-        file_info['disk-size'] = lcnfg.get('disk-size', '100G')
-        file_info['ram'] = lcnfg.get('ram', '1.9G')
-        file_info['cpus'] = lcnfg.get('cpus', 1)
-        file_info['networks'] = lcnfg.get('networks',
-                 [{'source': 'front', 'mac': status_info['mac']}])
-        file_info['distro'] = distro
-        file_info['distroversion'] = distroversion
-        file_info['additional-disks'] = lcnfg.get(
-            'additional-disks', 3)
-        file_info['additional-disks-size'] = lcnfg.get(
-            'additional-disks-size', '200G')
-        file_info['arch'] = lcnfg.get('arch', 'x86_64')
-        file_out = {'downburst': file_info}
-        yaml.safe_dump(file_out, tmp)
-        metadata = "--meta-data=%s" % tmp.name
-        dbrst = _get_downburst_exec()
-        if not dbrst:
-            log.error("No downburst executable found.")
-            return False
-        p = subprocess.Popen([dbrst, '-c', phys_host,
-                              'create', metadata, createMe],
-                             stdout=subprocess.PIPE, stderr=subprocess.PIPE,)
-        owt, err = p.communicate()
-        if err:
-            log.info("Downburst completed on %s: %s" %
-                    (machine_name, err))
-        else:
-            log.info("%s created: %s" % (machine_name, owt))
-        # If the guest already exists first destroy then re-create:
-        if 'exists' in err:
-            log.info("Guest files exist. Re-creating guest: %s" %
-                    (machine_name))
-            destroy_if_vm(ctx, machine_name)
-            create_if_vm(ctx, machine_name)
-    return True
-#
-# Use downburst to destroy a virtual machine
-#
-
-
-def destroy_if_vm(ctx, machine_name):
-    """
-    Return False only on vm downburst failures.
-    """
-    status_info = ls.get_status(ctx, machine_name)
-    phys_host = status_info['vpshost']
-    if not phys_host:
-        return True
-    destroyMe = decanonicalize_hostname(machine_name)
-    dbrst = _get_downburst_exec()
-    if not dbrst:
-        log.error("No downburst executable found.")
-        return False
-    p = subprocess.Popen([dbrst, '-c', phys_host,
-                          'destroy', destroyMe],
-                         stdout=subprocess.PIPE, stderr=subprocess.PIPE,)
-    owt, err = p.communicate()
-    if err:
-        log.error(err)
-        return False
-    else:
-        log.info("%s destroyed: %s" % (machine_name, owt))
-    return True
diff --git a/teuthology/locker/__init__.py b/teuthology/locker/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/teuthology/locker/api.py b/teuthology/locker/api.py
deleted file mode 100644 (file)
index 975a3a4..0000000
+++ /dev/null
@@ -1,200 +0,0 @@
-import json
-import web
-import subprocess
-
-from config import DB
-
-import logging
-log = logging.getLogger(__name__)
-
-def load_machine(name):
-    results = list(DB.select('machine', what='*',
-                             where='name = $name',
-                             vars=dict(name=name)))
-    if not results:
-        raise web.NotFound()
-    return results[0]
-
-def get_sshkey(name):
-    if '@' in name:
-        _, name = name.rsplit('@')
-    args = ['ssh-keyscan']
-    args.append(name)
-    p = subprocess.Popen(
-        args=args,
-        stdout=subprocess.PIPE,
-        )
-    out, _ = p.communicate()
-    pubkey = None
-    for key_entry in out.splitlines():
-        hostname, pubkey = key_entry.split(' ', 1)
-    if not pubkey:
-        status = 1
-    else:
-        status = 0
-    return (pubkey), status
-
-def update_sshkey(name, key, type):
-    if type == 'vps':
-        return
-    res = DB.update('machine', where='name = $name AND locked = false',
-                    vars=dict(name=name),
-                    sshpubkey=key,)
-    assert res == 1, 'Failed to update key of machine {name}'.format(name=name)
-    print 'Updated key on ', name
-
-class MachineLock:
-    def GET(self, name):
-        row = load_machine(name)
-        row.locked_since = row.locked_since.isoformat()
-        web.header('Content-type', 'text/json')
-        return json.dumps(row)
-
-    def DELETE(self, name):
-        user = web.input('user')['user']
-        machine = load_machine(name)
-        if not machine.locked:
-            raise web.BadRequest()
-        if machine.locked_by != user:
-            raise web.Forbidden()
-
-        res = DB.update('machine',
-                        where='locked = true AND name = $name AND locked_by = $user',
-                        vars=dict(name=name, user=user),
-                        locked=False, locked_by=None, description=None)
-        assert res == 1, 'Failed to unlock machine {name}'.format(name=name)
-        print user, 'unlocked', name
-
-    def POST(self, name):
-        user = web.input('user')['user']
-        desc = web.input(desc=None)['desc']
-        machine = load_machine(name)
-        if machine.locked:
-            raise web.Forbidden()
-
-        if machine.type == 'vps':
-            curkey = machine.sshpubkey
-        else:
-            curkey, getstatus = get_sshkey(name)
-            if getstatus != 0:
-                curkey = machine.sshpubkey
-        if machine.sshpubkey != curkey:
-            newkey = curkey
-        else:
-            newkey = machine.sshpubkey
-        res = DB.update('machine', where='name = $name AND locked = false',
-                        vars=dict(name=name),
-                        locked=True,
-                        description=desc,
-                        sshpubkey=newkey,
-                        locked_by=user,
-                        locked_since=web.db.SQLLiteral('NOW()'))
-        assert res == 1, 'Failed to lock machine {name}'.format(name=name)
-        print user, 'locked single machine', name, 'desc', desc
-
-    def PUT(self, name):
-        desc = web.input(desc=None)['desc']
-        status = web.input(status=None)['status']
-        sshpubkey = web.input(sshpubkey=None)['sshpubkey']
-
-        updated = {}
-        if desc is not None:
-            updated['description'] = desc
-        if status is not None:
-            updated['up'] = (status == 'up')
-        if sshpubkey is not None:
-            updated['sshpubkey'] = sshpubkey
-
-        if not updated:
-            raise web.BadRequest()
-        DB.update('machine', where='name = $name',
-                  vars=dict(name=name), **updated)
-        print 'updated', name, 'with', updated, 'desc', desc
-
-class Lock:
-    def GET(self):
-        rows = list(DB.select('machine', what='*'))
-        if not rows:
-            raise web.NotFound()
-        for row in rows:
-            row.locked_since = row.locked_since.isoformat()
-        web.header('Content-type', 'text/json')
-        return json.dumps(rows)
-
-    def POST(self):
-        user = web.input('user')['user']
-        desc = web.input(desc=None)['desc']
-        num = int(web.input('num')['num'])
-        machinetype = dict(machinetype=(web.input(machinetype='plana')['machinetype']))
-
-        if num < 1:
-            raise web.BadRequest()
-
-        tries = 0
-        check_existing = True
-        while True:
-            try:
-                # transaction will be rolled back if an exception is raised
-                with DB.transaction():
-                    if desc is not None and check_existing:
-                        # if a description is provided, treat it as a
-                        # key for locking in case the same run locked
-                        # machines in the db successfully before, but
-                        # the web server reported failure to it
-                        # because the request took too long. Only try
-                        # this once per request.
-                        check_existing = False
-                        results = list(DB.select('machine',
-                                                 machinetype, desc, user,
-                                                 what='name, sshpubkey',
-                                                 where='locked = true AND up = true AND type = $machinetype AND description = $desc AND locked_by = $user',
-                                                 limit=num))
-                        if len(results) == num:
-                            name_keys = {}
-                            for row in results:
-                                name_keys[row.name] = row.sshpubkey
-                            print 'reusing machines', name_keys.keys()
-                            break
-
-                    results = list(DB.select('machine', machinetype,
-                                             what='name, sshpubkey, type',
-                                             where='locked = false AND up = true AND type = $machinetype',
-                                             limit=num))
-                    if len(results) < num:
-                        raise web.HTTPError(status='503 Service Unavailable')
-                    name_keys = {}
-                    for row in results:
-                        if row.type == 'vps':
-                            curkey = row.sshpubkey
-                        else:
-                            curkey, getstatus = get_sshkey(row.name)
-                            if getstatus != 0:
-                                curkey = row.sshpubkey
-                        if row.sshpubkey != curkey:
-                            newkey = curkey
-                            update_sshkey(row.name, curkey, row.type)
-                        else:
-                            newkey = row.sshpubkey
-                        name_keys[row.name] = newkey
-                    where_cond = web.db.sqlors('name = ', name_keys.keys()) \
-                        + ' AND locked = false AND up = true'
-                    num_locked = DB.update('machine',
-                                           where=where_cond,
-                                           locked=True,
-                                           locked_by=user,
-                                           description=desc,
-                                           locked_since=web.db.SQLLiteral('NOW()'))
-                    assert num_locked == num, 'Failed to lock machines'
-            except Exception:
-                log.exception("Saw exception")
-                tries += 1
-                if tries < 10:
-                    continue
-                raise
-            else:
-                break
-
-        print user, 'locked', name_keys.keys(), 'desc', desc
-
-        web.header('Content-type', 'text/json')
-        return json.dumps(name_keys)
diff --git a/teuthology/locker/config.py b/teuthology/locker/config.py
deleted file mode 100644 (file)
index 090e8a0..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-"""
-This file contains database configuration.
-
-The schema can be created with::
-
-    CREATE TABLE machine (
-        name varchar(255),
-        type enum('burnupi','plana','vps') NOT NULL DEFAULT 'plana',
-        up boolean NOT NULL,
-        locked boolean NOT NULL,
-        locked_since timestamp NOT NULL DEFAULT '0000-00-00T00:00:00',
-        locked_by varchar(32),
-        description text,
-        sshpubkey text NOT NULL,
-        PRIMARY KEY (name),
-        INDEX (locked),
-        INDEX (up));
-
-If using MySQL, be sure to use an engine that supports
-transactions, like InnoDB.
-"""
-import web
-
-# Change these values to the connection info for your database.
-DB = web.database(dbn='dbms', db='db', user='user', pw='password', host='host')
diff --git a/teuthology/locker/locker.py b/teuthology/locker/locker.py
deleted file mode 100755 (executable)
index b017032..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import sys
-import web
-
-abspath = os.path.dirname(__file__)
-if abspath not in sys.path:
-    sys.path.append(abspath)
-
-from api import Lock, MachineLock # noqa
-
-urls = (
-    '/lock', 'Lock',
-    '/lock/(.*)', 'MachineLock',
-    )
-
-app = web.application(urls, globals())
-application = app.wsgifunc()
diff --git a/teuthology/lockstatus.py b/teuthology/lockstatus.py
deleted file mode 100644 (file)
index 44821cc..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-import json
-import httplib2
-import logging
-import os
-from .config import config
-
-log = logging.getLogger(__name__)
-
-
-def send_request(method, url, body=None, headers=None):
-    http = httplib2.Http()
-    resp, content = http.request(url, method=method, body=body, headers=headers)
-    if resp.status == 200:
-        return (True, content, resp.status)
-    log.info("%s request to '%s' with body '%s' failed with response code %d",
-             method, url, body, resp.status)
-    return (False, None, resp.status)
-
-
-def get_status(ctx, name):
-    success, content, _ = send_request('GET', os.path.join(config.lock_server, name))
-    if success:
-        return json.loads(content)
-    return None
diff --git a/teuthology/misc.py b/teuthology/misc.py
deleted file mode 100644 (file)
index 44132ab..0000000
+++ /dev/null
@@ -1,1200 +0,0 @@
-"""
-Miscellaneous teuthology functions.
-Used by other modules, but mostly called from tasks.
-"""
-from cStringIO import StringIO
-
-import argparse
-import os
-import logging
-import configobj
-import getpass
-import socket
-import sys
-import tarfile
-import time
-import urllib2
-import urlparse
-import yaml
-import json
-import re
-
-from teuthology import safepath
-from .orchestra import run
-from .config import config
-from .contextutil import safe_while
-
-log = logging.getLogger(__name__)
-
-import datetime
-stamp = datetime.datetime.now().strftime("%y%m%d%H%M")
-is_vm = lambda x: x.startswith('vpm') or x.startswith('ubuntu@vpm')
-
-is_arm = lambda x: x.startswith('tala') or x.startswith(
-    'ubuntu@tala') or x.startswith('saya') or x.startswith('ubuntu@saya')
-
-
-def config_file(string):
-    """
-    Create a config file
-
-    :param string: name of yaml file used for config.
-    :returns: Dictionary of configuration information.
-    """
-    config_dict = {}
-    try:
-        with file(string) as f:
-            g = yaml.safe_load_all(f)
-            for new in g:
-                config_dict.update(new)
-    except IOError as e:
-        raise argparse.ArgumentTypeError(str(e))
-    return config_dict
-
-
-class MergeConfig(argparse.Action):
-    """
-    Used by scripts to mergeg configurations.   (nuke, run, and
-    schedule, for example)
-    """
-    def __call__(self, parser, namespace, values, option_string=None):
-        """
-        Perform merges of all the day in the config dictionaries.
-        """
-        config_dict = getattr(namespace, self.dest)
-        for new in values:
-            deep_merge(config_dict, new)
-
-
-def get_testdir(ctx):
-    """
-    :returns: A test directory
-    """
-    if 'test_path' in ctx.teuthology_config:
-        return ctx.teuthology_config['test_path']
-    test_user = get_test_user(ctx)
-    # FIXME this ideally should use os.path.expanduser() in the future, in case
-    # $HOME isn't /home/$USER - e.g. on a Mac. However, since we're executing
-    # this on the server side, it won't work properly.
-    return ctx.teuthology_config.get('test_path', '/home/%s/cephtest' %
-                                     test_user)
-
-
-def get_test_user(ctx):
-    """
-    :returns: str -- the user to run tests as on remote hosts
-    """
-    return ctx.teuthology_config.get('test_user', 'ubuntu')
-
-
-def get_archive_dir(ctx):
-    """
-    :returns: archive directory (a subdirectory of the test directory)
-    """
-    test_dir = get_testdir(ctx)
-    return os.path.normpath(os.path.join(test_dir, 'archive'))
-
-
-def get_http_log_path(archive_dir, job_id=None):
-    """
-    :param archive_dir: directory to be searched
-    :param job_id: id of job that terminates the name of the log path
-    :returns: http log path
-    """
-    http_base = config.archive_server
-    if not http_base:
-        return None
-
-    sep = os.path.sep
-    archive_dir = archive_dir.rstrip(sep)
-    archive_subdir = archive_dir.split(sep)[-1]
-    if archive_subdir.endswith(str(job_id)):
-        archive_subdir = archive_dir.split(sep)[-2]
-
-    if job_id is None:
-        return os.path.join(http_base, archive_subdir, '')
-    return os.path.join(http_base, archive_subdir, str(job_id), '')
-
-
-def get_ceph_binary_url(package=None,
-                        branch=None, tag=None, sha1=None, dist=None,
-                        flavor=None, format=None, arch=None):
-    """
-    return the url of the ceph binary found on gitbuildder.
-    """
-    BASE = 'http://gitbuilder.ceph.com/{package}-{format}-{dist}-{arch}-{flavor}/'.format(
-        package=package,
-        flavor=flavor,
-        arch=arch,
-        format=format,
-        dist=dist
-        )
-
-    if sha1 is not None:
-        assert branch is None, "cannot set both sha1 and branch"
-        assert tag is None, "cannot set both sha1 and tag"
-    else:
-        # gitbuilder uses remote-style ref names for branches, mangled to
-        # have underscores instead of slashes; e.g. origin_master
-        if tag is not None:
-            ref = tag
-            assert branch is None, "cannot set both branch and tag"
-        else:
-            if branch is None:
-                branch = 'master'
-            ref = branch
-
-        sha1_url = urlparse.urljoin(BASE, 'ref/{ref}/sha1'.format(ref=ref))
-        log.debug('Translating ref to sha1 using url %s', sha1_url)
-
-        try:
-            sha1_fp = urllib2.urlopen(sha1_url)
-            sha1 = sha1_fp.read().rstrip('\n')
-            sha1_fp.close()
-        except urllib2.HTTPError as e:
-            log.error('Failed to get url %s', sha1_url)
-            raise e
-
-    log.debug('Using %s %s sha1 %s', package, format, sha1)
-    bindir_url = urlparse.urljoin(BASE, 'sha1/{sha1}/'.format(sha1=sha1))
-    return (sha1, bindir_url)
-
-
-def feed_many_stdins(fp, processes):
-    """
-    :param fp: input file
-    :param processes: list of processes to be written to.
-    """
-    while True:
-        data = fp.read(8192)
-        if not data:
-            break
-        for proc in processes:
-            proc.stdin.write(data)
-
-
-def feed_many_stdins_and_close(fp, processes):
-    """
-    Feed many and then close processes.
-
-    :param fp: input file
-    :param processes: list of processes to be written to.
-    """
-    feed_many_stdins(fp, processes)
-    for proc in processes:
-        proc.stdin.close()
-
-
-def get_mons(roles, ips):
-    """
-    Get monitors and their associated ports
-    """
-    mons = {}
-    mon_ports = {}
-    mon_id = 0
-    for idx, roles in enumerate(roles):
-        for role in roles:
-            if not role.startswith('mon.'):
-                continue
-            if ips[idx] not in mon_ports:
-                mon_ports[ips[idx]] = 6789
-            else:
-                mon_ports[ips[idx]] += 1
-            addr = '{ip}:{port}'.format(
-                ip=ips[idx],
-                port=mon_ports[ips[idx]],
-                )
-            mon_id += 1
-            mons[role] = addr
-    assert mons
-    return mons
-
-
-def generate_caps(type_):
-    """
-    Each call will return the next capability for each system type
-    (essentially a subset of possible role values).  Valid types are osd,
-    mds and client.
-    """
-    defaults = dict(
-        osd=dict(
-            mon='allow *',
-            osd='allow *',
-            ),
-        mds=dict(
-            mon='allow *',
-            osd='allow *',
-            mds='allow',
-            ),
-        client=dict(
-            mon='allow rw',
-            osd='allow rwx',
-            mds='allow',
-            ),
-        )
-    for subsystem, capability in defaults[type_].items():
-        yield '--cap'
-        yield subsystem
-        yield capability
-
-
-def skeleton_config(ctx, roles, ips):
-    """
-    Returns a ConfigObj that is prefilled with a skeleton config.
-
-    Use conf[section][key]=value or conf.merge to change it.
-
-    Use conf.write to write it out, override .filename first if you want.
-    """
-    path = os.path.join(os.path.dirname(__file__), 'ceph.conf.template')
-    t = open(path, 'r')
-    skconf = t.read().format(testdir=get_testdir(ctx))
-    conf = configobj.ConfigObj(StringIO(skconf), file_error=True)
-    mons = get_mons(roles=roles, ips=ips)
-    for role, addr in mons.iteritems():
-        conf.setdefault(role, {})
-        conf[role]['mon addr'] = addr
-    # set up standby mds's
-    for roles_subset in roles:
-        for role in roles_subset:
-            if role.startswith('mds.'):
-                conf.setdefault(role, {})
-                if role.find('-s-') != -1:
-                    standby_mds = role[role.find('-s-')+3:]
-                    conf[role]['mds standby for name'] = standby_mds
-    return conf
-
-
-def roles_of_type(roles_for_host, type_):
-    """
-    Generator of roles.
-
-    Each call returns the next possible role of the type specified.
-    :param roles_for host: list of roles possible
-    :param type_: type of role
-    """
-    prefix = '{type}.'.format(type=type_)
-    for name in roles_for_host:
-        if not name.startswith(prefix):
-            continue
-        id_ = name[len(prefix):]
-        yield id_
-
-
-def all_roles(cluster):
-    """
-    Generator of role values.  Each call returns another role.
-
-    :param cluster: Cluster extracted from the ctx.
-    """
-    for _, roles_for_host in cluster.remotes.iteritems():
-        for name in roles_for_host:
-            yield name
-
-
-def all_roles_of_type(cluster, type_):
-    """
-    Generator of role values.  Each call returns another role of the
-    type specified.
-
-    :param cluster: Cluster extracted from the ctx.
-    :type_: role type
-    """
-    prefix = '{type}.'.format(type=type_)
-    for _, roles_for_host in cluster.remotes.iteritems():
-        for name in roles_for_host:
-            if not name.startswith(prefix):
-                continue
-            id_ = name[len(prefix):]
-            yield id_
-
-
-def is_type(type_):
-    """
-    Returns a matcher function for whether role is of type given.
-    """
-    prefix = '{type}.'.format(type=type_)
-
-    def _is_type(role):
-        """
-        Return type based on the starting role name.  This should
-        probably be improved in the future.
-        """
-        return role.startswith(prefix)
-    return _is_type
-
-
-def num_instances_of_type(cluster, type_):
-    """
-    Total the number of instances of the role type specified in all remotes.
-
-    :param cluster: Cluster extracted from ctx.
-    :param type_: role
-    """
-    remotes_and_roles = cluster.remotes.items()
-    roles = [roles for (remote, roles) in remotes_and_roles]
-    prefix = '{type}.'.format(type=type_)
-    num = sum(sum(1 for role in hostroles if role.startswith(prefix))
-              for hostroles in roles)
-    return num
-
-
-def create_simple_monmap(ctx, remote, conf):
-    """
-    Writes a simple monmap based on current ceph.conf into <tmpdir>/monmap.
-
-    Assumes ceph_conf is up to date.
-
-    Assumes mon sections are named "mon.*", with the dot.
-
-    :return the FSID (as a string) of the newly created monmap
-    """
-    def gen_addresses():
-        """
-        Monitor address generator.
-
-        Each invocation returns the next monitor address
-        """
-        for section, data in conf.iteritems():
-            PREFIX = 'mon.'
-            if not section.startswith(PREFIX):
-                continue
-            name = section[len(PREFIX):]
-            addr = data['mon addr']
-            yield (name, addr)
-
-    addresses = list(gen_addresses())
-    assert addresses, "There are no monitors in config!"
-    log.debug('Ceph mon addresses: %s', addresses)
-
-    testdir = get_testdir(ctx)
-    args = [
-        'adjust-ulimits',
-        'ceph-coverage',
-        '{tdir}/archive/coverage'.format(tdir=testdir),
-        'monmaptool',
-        '--create',
-        '--clobber',
-        ]
-    for (name, addr) in addresses:
-        args.extend(('--add', name, addr))
-    args.extend([
-        '--print',
-        '{tdir}/monmap'.format(tdir=testdir),
-        ])
-
-    r = remote.run(
-        args=args,
-        stdout=StringIO()
-        )
-    monmap_output = r.stdout.getvalue()
-    fsid = re.search("generated fsid (.+)$",
-                     monmap_output, re.MULTILINE).group(1)
-    return fsid
-
-
-def write_file(remote, path, data):
-    """
-    Write data to a remote file
-
-    :param remote: Remote site.
-    :param path: Path on the remote being written to.
-    :param data: Data to be written.
-    """
-    remote.run(
-        args=[
-            'python',
-            '-c',
-            'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
-            path,
-            ],
-        stdin=data,
-        )
-
-
-def sudo_write_file(remote, path, data, perms=None, owner=None):
-    """
-    Write data to a remote file as super user
-
-    :param remote: Remote site.
-    :param path: Path on the remote being written to.
-    :param data: Data to be written.
-    :param perms: Permissions on the file being written
-    :param owner: Owner for the file being written
-
-    Both perms and owner are passed directly to chmod.
-    """
-    permargs = []
-    if perms:
-        permargs = [run.Raw('&&'), 'sudo', 'chmod', perms, path]
-    owner_args = []
-    if owner:
-        owner_args = [run.Raw('&&'), 'sudo', 'chown', owner, path]
-    remote.run(
-        args=[
-            'sudo',
-            'python',
-            '-c',
-            'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
-            path,
-            ] + owner_args + permargs,
-        stdin=data,
-        )
-
-
-def copy_file(from_remote, from_path, to_remote, to_path=None):
-    """
-    Copies a file from one remote to another.
-    """
-    if to_path is None:
-        to_path = from_path
-    from_remote.run(args=[
-        'sudo', 'scp', '-v', from_path, "{host}:{file}".format(
-            host=to_remote.name, file=to_path)
-    ])
-
-
-def move_file(remote, from_path, to_path, sudo=False):
-    """
-    Move a file from one path to another on a remote site
-
-    The file needs to be stat'ed first, to make sure we
-    maintain the same permissions
-    """
-    args = []
-    if sudo:
-        args.append('sudo')
-    args.extend([
-        'stat',
-        '-c',
-        '\"%a\"',
-        to_path
-        ])
-    proc = remote.run(
-        args=args,
-        stdout=StringIO(),
-        )
-    perms = proc.stdout.getvalue().rstrip().strip('\"')
-
-    args = []
-    if sudo:
-        args.append('sudo')
-    args.extend([
-        'mv',
-        '--',
-        from_path,
-        to_path,
-        ])
-    proc = remote.run(
-        args=args,
-        stdout=StringIO(),
-        )
-
-    # reset the file back to the original permissions
-    args = []
-    if sudo:
-        args.append('sudo')
-    args.extend([
-        'chmod',
-        perms,
-        to_path,
-        ])
-    proc = remote.run(
-        args=args,
-        stdout=StringIO(),
-        )
-
-
-def delete_file(remote, path, sudo=False, force=False):
-    """
-    rm a file on a remote site.
-    """
-    args = []
-    if sudo:
-        args.append('sudo')
-    args.extend(['rm'])
-    if force:
-        args.extend(['-f'])
-    args.extend([
-        '--',
-        path,
-    ])
-    remote.run(
-        args=args,
-        stdout=StringIO(),
-        )
-
-
-def remove_lines_from_file(remote, path, line_is_valid_test,
-                           string_to_test_for):
-    """
-    Remove lines from a file.  This involves reading the file in, removing
-    the appropriate lines, saving the file, and then replacing the original
-    file with the new file.  Intermediate files are used to prevent data loss
-    on when the main site goes up and down.
-    """
-    # read in the specified file
-    in_data = get_file(remote, path, False)
-    out_data = ""
-
-    first_line = True
-    # use the 'line_is_valid_test' function to remove unwanted lines
-    for line in in_data.split('\n'):
-        if line_is_valid_test(line, string_to_test_for):
-            if not first_line:
-                out_data += '\n'
-            else:
-                first_line = False
-
-            out_data += '{line}'.format(line=line)
-
-        else:
-            log.info('removing line: {bad_line}'.format(bad_line=line))
-
-    # get a temp file path on the remote host to write to,
-    # we don't want to blow away the remote file and then have the
-    # network drop out
-    temp_file_path = remote_mktemp(remote)
-
-    # write out the data to a temp file
-    write_file(remote, temp_file_path, out_data)
-
-    # then do a 'mv' to the actual file location
-    move_file(remote, temp_file_path, path)
-
-
-def append_lines_to_file(remote, path, lines, sudo=False):
-    """
-    Append lines to a file.
-    An intermediate file is used in the same manner as in
-    Remove_lines_from_list.
-    """
-
-    temp_file_path = remote_mktemp(remote)
-
-    data = get_file(remote, path, sudo)
-
-    # add the additional data and write it back out, using a temp file
-    # in case of connectivity of loss, and then mv it to the
-    # actual desired location
-    data += lines
-    temp_file_path
-    write_file(remote, temp_file_path, data)
-
-    # then do a 'mv' to the actual file location
-    move_file(remote, temp_file_path, path)
-
-
-def remote_mktemp(remote, sudo=False):
-    """
-    Make a temporary file on a remote system
-    """
-    args = []
-    if sudo:
-        args.append('sudo')
-    args.extend([
-        'python',
-        '-c',
-        'import os; import tempfile; (fd,fname) = tempfile.mkstemp(); os.close(fd); print fname.rstrip()'
-        ])
-    proc = remote.run(
-        args=args,
-        stdout=StringIO(),
-        )
-    data = proc.stdout.getvalue()
-    return data
-
-
-def create_file(remote, path, data="", permissions=str(644), sudo=False):
-    """
-    Create a file on the remote host.
-    """
-    args = []
-    if sudo:
-        args.append('sudo')
-    args.extend([
-        'touch',
-        path,
-        run.Raw('&&'),
-        'chmod',
-        permissions,
-        '--',
-        path
-    ])
-    remote.run(
-        args=args,
-        stdout=StringIO(),
-        )
-    # now write out the data if any was passed in
-    if "" != data:
-        append_lines_to_file(remote, path, data, sudo)
-
-
-def get_file(remote, path, sudo=False):
-    """
-    Read a file from remote host into memory.
-    """
-    args = []
-    if sudo:
-        args.append('sudo')
-    args.extend([
-        'cat',
-        '--',
-        path,
-        ])
-    proc = remote.run(
-        args=args,
-        stdout=StringIO(),
-        )
-    data = proc.stdout.getvalue()
-    return data
-
-
-def pull_directory(remote, remotedir, localdir):
-    """
-    Copy a remote directory to a local directory.
-    """
-    log.debug('Transferring archived files from %s:%s to %s',
-              remote.shortname, remotedir, localdir)
-    if not os.path.exists(localdir):
-        os.mkdir(localdir)
-    proc = remote.run(
-        args=[
-            'sudo',
-            'tar',
-            'c',
-            '-f', '-',
-            '-C', remotedir,
-            '--',
-            '.',
-            ],
-        stdout=run.PIPE,
-        wait=False,
-        )
-    tar = tarfile.open(mode='r|', fileobj=proc.stdout)
-    while True:
-        ti = tar.next()
-        if ti is None:
-            break
-
-        if ti.isdir():
-            # ignore silently; easier to just create leading dirs below
-            pass
-        elif ti.isfile():
-            sub = safepath.munge(ti.name)
-            safepath.makedirs(root=localdir, path=os.path.dirname(sub))
-            tar.makefile(ti, targetpath=os.path.join(localdir, sub))
-        else:
-            if ti.isdev():
-                type_ = 'device'
-            elif ti.issym():
-                type_ = 'symlink'
-            elif ti.islnk():
-                type_ = 'hard link'
-            else:
-                type_ = 'unknown'
-                log.info('Ignoring tar entry: %r type %r', ti.name, type_)
-                continue
-    proc.exitstatus.get()
-
-
-def pull_directory_tarball(remote, remotedir, localfile):
-    """
-    Copy a remote directory to a local tarball.
-    """
-    log.debug('Transferring archived files from %s:%s to %s',
-              remote.shortname, remotedir, localfile)
-    out = open(localfile, 'w')
-    proc = remote.run(
-        args=[
-            'sudo',
-            'tar',
-            'cz',
-            '-f', '-',
-            '-C', remotedir,
-            '--',
-            '.',
-            ],
-        stdout=out,
-        wait=False,
-        )
-    proc.exitstatus.get()
-
-
-def get_wwn_id_map(remote, devs):
-    """
-    Extract ww_id_map information from ls output on the associated devs.
-
-    Sample dev information:    /dev/sdb: /dev/disk/by-id/wwn-0xf00bad
-
-    :returns: map of devices to device id links
-    """
-    stdout = None
-    try:
-        r = remote.run(
-            args=[
-                'ls',
-                '-l',
-                '/dev/disk/by-id/wwn-*',
-                ],
-            stdout=StringIO(),
-            )
-        stdout = r.stdout.getvalue()
-    except Exception:
-        log.error('Failed to get wwn devices! Using /dev/sd* devices...')
-        return dict((d, d) for d in devs)
-
-    devmap = {}
-
-    # lines will be:
-    # lrwxrwxrwx 1 root root  9 Jan 22 14:58
-    # /dev/disk/by-id/wwn-0x50014ee002ddecaf -> ../../sdb
-    for line in stdout.splitlines():
-        comps = line.split(' ')
-        # comps[-1] should be:
-        # ../../sdb
-        rdev = comps[-1]
-        # translate to /dev/sdb
-        dev = '/dev/{d}'.format(d=rdev.split('/')[-1])
-
-        # comps[-3] should be:
-        # /dev/disk/by-id/wwn-0x50014ee002ddecaf
-        iddev = comps[-3]
-
-        if dev in devs:
-            devmap[dev] = iddev
-
-    return devmap
-
-
-def get_scratch_devices(remote):
-    """
-    Read the scratch disk list from remote host
-    """
-    devs = []
-    try:
-        file_data = get_file(remote, "/scratch_devs")
-        devs = file_data.split()
-    except Exception:
-        r = remote.run(
-            args=['ls', run.Raw('/dev/[sv]d?')],
-            stdout=StringIO()
-            )
-        devs = r.stdout.getvalue().strip().split('\n')
-
-    # Remove root device (vm guests) from the disk list
-    for dev in devs:
-        if 'vda' in dev:
-            devs.remove(dev)
-            log.warn("Removing root device: %s from device list" % dev)
-
-    log.debug('devs={d}'.format(d=devs))
-
-    retval = []
-    for dev in devs:
-        try:
-            # FIXME: Split this into multiple calls.
-            remote.run(
-                args=[
-                    # node exists
-                    'stat',
-                    dev,
-                    run.Raw('&&'),
-                    # readable
-                    'sudo', 'dd', 'if=%s' % dev, 'of=/dev/null', 'count=1',
-                    run.Raw('&&'),
-                    # not mounted
-                    run.Raw('!'),
-                    'mount',
-                    run.Raw('|'),
-                    'grep', '-q', dev,
-                ]
-            )
-            retval.append(dev)
-        except run.CommandFailedError:
-            log.debug("get_scratch_devices: %s is in use" % dev)
-    return retval
-
-
-def wait_until_healthy(ctx, remote):
-    """
-    Wait until a Ceph cluster is healthy. Give up after 15min.
-    """
-    testdir = get_testdir(ctx)
-    with safe_while(tries=(900 / 6), action="wait_until_healthy") as proceed:
-        while proceed():
-            r = remote.run(
-                args=[
-                    'adjust-ulimits',
-                    'ceph-coverage',
-                    '{tdir}/archive/coverage'.format(tdir=testdir),
-                    'ceph',
-                    'health',
-                    ],
-                stdout=StringIO(),
-                logger=log.getChild('health'),
-                )
-            out = r.stdout.getvalue()
-            log.debug('Ceph health: %s', out.rstrip('\n'))
-            if out.split(None, 1)[0] == 'HEALTH_OK':
-                break
-            time.sleep(1)
-
-
-def wait_until_osds_up(ctx, cluster, remote):
-    """Wait until all Ceph OSDs are booted."""
-    num_osds = num_instances_of_type(cluster, 'osd')
-    testdir = get_testdir(ctx)
-    while True:
-        r = remote.run(
-            args=[
-                'adjust-ulimits',
-                'ceph-coverage',
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                'ceph',
-                'osd', 'dump', '--format=json'
-                ],
-            stdout=StringIO(),
-            logger=log.getChild('health'),
-            )
-        out = r.stdout.getvalue()
-        j = json.loads('\n'.join(out.split('\n')[1:]))
-        up = len(j['osds'])
-        log.debug('%d of %d OSDs are up' % (up, num_osds))
-        if up == num_osds:
-            break
-        time.sleep(1)
-
-
-def wait_until_fuse_mounted(remote, fuse, mountpoint):
-    """
-    Check to make sure that fuse is mounted on mountpoint.  If not,
-    sleep for 5 seconds and check again.
-    """
-    while True:
-        proc = remote.run(
-            args=[
-                'stat',
-                '--file-system',
-                '--printf=%T\n',
-                '--',
-                mountpoint,
-                ],
-            stdout=StringIO(),
-            )
-        fstype = proc.stdout.getvalue().rstrip('\n')
-        if fstype == 'fuseblk':
-            break
-        log.debug('ceph-fuse not yet mounted, got fs type {fstype!r}'.format(
-            fstype=fstype))
-
-        # it shouldn't have exited yet; exposes some trivial problems
-        assert not fuse.exitstatus.ready()
-
-        time.sleep(5)
-    log.info('ceph-fuse is mounted on %s', mountpoint)
-
-
-def reboot(node, timeout=300, interval=30):
-    """
-    Reboots a given system, then waits for it to come back up and
-    re-establishes the ssh connection.
-
-    :param node: The teuthology.orchestra.remote.Remote object of the node
-    :param timeout: The amount of time, in seconds, after which to give up
-                    waiting for the node to return
-    :param interval: The amount of time, in seconds, to wait between attempts
-                     to re-establish with the node. This should not be set to
-                     less than maybe 10, to make sure the node actually goes
-                     down first.
-    """
-    log.info("Rebooting {host}...".format(host=node.hostname))
-    node.run(args=['sudo', 'shutdown', '-r', 'now'])
-    reboot_start_time = time.time()
-    while time.time() - reboot_start_time < timeout:
-        time.sleep(interval)
-        if node.is_online or node.reconnect():
-            return
-    raise RuntimeError(
-        "{host} did not come up after reboot within {time}s".format(
-            host=node.hostname, time=timeout))
-
-
-def reconnect(ctx, timeout, remotes=None):
-    """
-    Connect to all the machines in ctx.cluster.
-
-    Presumably, some of them won't be up. Handle this
-    by waiting for them, unless the wait time exceeds
-    the specified timeout.
-
-    ctx needs to contain the cluster of machines you
-    wish it to try and connect to, as well as a config
-    holding the ssh keys for each of them. As long as it
-    contains this data, you can construct a context
-    that is a subset of your full cluster.
-    """
-    log.info('Re-opening connections...')
-    starttime = time.time()
-
-    if remotes:
-        need_reconnect = remotes
-    else:
-        need_reconnect = ctx.cluster.remotes.keys()
-
-    while need_reconnect:
-        for remote in need_reconnect:
-            log.info('trying to connect to %s', remote.name)
-            success = remote.reconnect()
-            if not success:
-                if time.time() - starttime > timeout:
-                    raise RuntimeError("Could not reconnect to %s" %
-                                       remote.name)
-            else:
-                need_reconnect.remove(remote)
-
-        log.debug('waited {elapsed}'.format(
-            elapsed=str(time.time() - starttime)))
-        time.sleep(1)
-
-
-def get_clients(ctx, roles):
-    """
-    return all remote roles that are clients.
-    """
-    for role in roles:
-        assert isinstance(role, basestring)
-        PREFIX = 'client.'
-        assert role.startswith(PREFIX)
-        id_ = role[len(PREFIX):]
-        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-        yield (id_, remote)
-
-
-def get_user():
-    """
-    Return the username in the format user@host.
-    """
-    return getpass.getuser() + '@' + socket.gethostname()
-
-
-def read_config(ctx):
-    """
-    read the default teuthology yaml configuration file.
-    """
-    ctx.teuthology_config = {}
-    filename = os.path.join(os.environ['HOME'], '.teuthology.yaml')
-
-    if not os.path.exists(filename):
-        log.debug("%s not found", filename)
-        return
-
-    with file(filename) as f:
-        g = yaml.safe_load_all(f)
-        for new in g:
-            ctx.teuthology_config.update(new)
-
-
-def get_mon_names(ctx):
-    """
-    :returns: a list of monitor names
-    """
-    mons = []
-    for remote, roles in ctx.cluster.remotes.items():
-        for role in roles:
-            if not role.startswith('mon.'):
-                continue
-            mons.append(role)
-    return mons
-
-
-def get_first_mon(ctx, config):
-    """
-    return the "first" mon (alphanumerically, for lack of anything better)
-    """
-    firstmon = sorted(get_mon_names(ctx))[0]
-    assert firstmon
-    return firstmon
-
-
-def replace_all_with_clients(cluster, config):
-    """
-    Converts a dict containing a key all to one
-    mapping all clients to the value of config['all']
-    """
-    assert isinstance(config, dict), 'config must be a dict'
-    if 'all' not in config:
-        return config
-    norm_config = {}
-    assert len(config) == 1, \
-        "config cannot have 'all' and specific clients listed"
-    for client in all_roles_of_type(cluster, 'client'):
-        norm_config['client.{id}'.format(id=client)] = config['all']
-    return norm_config
-
-
-def deep_merge(a, b):
-    """
-    Deep Merge.  If a and b are both lists, all elements in b are
-    added into a.  If a and b are both dictionaries, elements in b are
-    recursively added to a.
-    :param a: object items will be merged into
-    :param b: object items will be merged from
-    """
-    if a is None:
-        return b
-    if b is None:
-        return a
-    if isinstance(a, list):
-        assert isinstance(b, list)
-        a.extend(b)
-        return a
-    if isinstance(a, dict):
-        assert isinstance(b, dict)
-        for (k, v) in b.iteritems():
-            if k in a:
-                a[k] = deep_merge(a[k], v)
-            else:
-                a[k] = v
-        return a
-    return b
-
-
-def get_valgrind_args(testdir, name, preamble, v):
-    """
-    Build a command line for running valgrind.
-
-    testdir - test results directory
-    name - name of daemon (for naming hte log file)
-    preamble - stuff we should run before valgrind
-    v - valgrind arguments
-    """
-    if v is None:
-        return []
-    if not isinstance(v, list):
-        v = [v]
-    val_path = '/var/log/ceph/valgrind'.format(tdir=testdir)
-    if '--tool=memcheck' in v or '--tool=helgrind' in v:
-        extra_args = [
-
-            'valgrind',
-            '--num-callers=50',
-            '--suppressions={tdir}/valgrind.supp'.format(tdir=testdir),
-            '--xml=yes',
-            '--xml-file={vdir}/{n}.log'.format(vdir=val_path, n=name)
-            ]
-    else:
-        extra_args = [
-            'valgrind',
-            '--suppressions={tdir}/valgrind.supp'.format(tdir=testdir),
-            '--log-file={vdir}/{n}.log'.format(vdir=val_path, n=name)
-            ]
-    args = [
-        'cd', testdir,
-        run.Raw('&&'),
-        ] + preamble + extra_args + v
-    log.debug('running %s under valgrind with args %s', name, args)
-    return args
-
-
-def stop_daemons_of_type(ctx, type_):
-    """
-    :param type_: type of daemons to be stopped.
-    """
-    log.info('Shutting down %s daemons...' % type_)
-    exc_info = (None, None, None)
-    for daemon in ctx.daemons.iter_daemons_of_role(type_):
-        try:
-            daemon.stop()
-        except (run.CommandFailedError,
-                run.CommandCrashedError,
-                run.ConnectionLostError):
-            exc_info = sys.exc_info()
-            log.exception('Saw exception from %s.%s', daemon.role, daemon.id_)
-    if exc_info != (None, None, None):
-        raise exc_info[0], exc_info[1], exc_info[2]
-
-
-def get_system_type(remote, distro=False, version=False):
-    """
-    Return this system type (deb or rpm) or Distro.
-    """
-    r = remote.run(
-        args=[
-            'sudo', 'lsb_release', '-is',
-        ],
-        stdout=StringIO(),
-    )
-    system_value = r.stdout.getvalue().strip()
-    log.debug("System to be installed: %s" % system_value)
-    if version:
-        v = remote.run(args=['sudo', 'lsb_release', '-rs'], stdout=StringIO())
-        version = v.stdout.getvalue().strip()
-    if distro and version:
-        return system_value.lower(), version
-    if distro:
-        return system_value.lower()
-    if system_value in ['Ubuntu', 'Debian']:
-        return "deb"
-    if system_value in ['CentOS', 'Fedora', 'RedHatEnterpriseServer']:
-        return "rpm"
-    if version:
-        return version
-    return system_value
-
-
-def get_distro(ctx):
-    """
-    Get the name of the distro that we are using (usually the os_type).
-    """
-    try:
-        os_type = ctx.config.get('os_type', ctx.os_type)
-    except AttributeError:
-        os_type = 'ubuntu'
-    try:
-        return ctx.config['downburst'].get('distro', os_type)
-    except KeyError:
-        return os_type
-    except AttributeError:
-        return ctx.os_type
-
-
-def get_distro_version(ctx):
-    """
-    Get the verstion of the distro that we are using (release number).
-    """
-    default_os_version = dict(
-        ubuntu="12.04",
-        fedora="18",
-        centos="6.4",
-        opensuse="12.2",
-        sles="11-sp2",
-        rhel="6.4",
-        debian='7.0'
-    )
-    distro = get_distro(ctx)
-    if ctx.os_version is not None:
-        return ctx.os_version
-    try:
-        os_version = ctx.config.get('os_version', default_os_version[distro])
-    except AttributeError:
-        os_version = default_os_version[distro]
-    try:
-        return ctx.config['downburst'].get('distroversion', os_version)
-    except (KeyError, AttributeError):
-        return os_version
-
-
-def get_multi_machine_types(machinetype):
-    """
-    Converts machine type string to list based on common deliminators
-    """
-    machinetypes = []
-    machine_type_deliminator = [',', ' ', '\t']
-    for deliminator in machine_type_deliminator:
-        if deliminator in machinetype:
-            machinetypes = machinetype.split(deliminator)
-            break
-    if not machinetypes:
-        machinetypes.append(machinetype)
-    return machinetypes
diff --git a/teuthology/nuke.py b/teuthology/nuke.py
deleted file mode 100644 (file)
index c971cbd..0000000
+++ /dev/null
@@ -1,460 +0,0 @@
-import argparse
-import logging
-import os
-import subprocess
-import time
-import yaml
-
-import teuthology
-from . import orchestra
-import orchestra.remote
-from .orchestra import run
-from .lock import list_locks
-from .lock import unlock_one
-from .misc import config_file
-from .misc import get_testdir
-from .misc import get_user
-from .misc import read_config
-from .misc import reconnect
-from .parallel import parallel
-from .task import install as install_task
-from .task.internal import check_lock
-from .task.internal import connect
-
-log = logging.getLogger(__name__)
-
-
-def shutdown_daemons(ctx):
-    nodes = {}
-    for remote in ctx.cluster.remotes.iterkeys():
-        proc = remote.run(
-            args=[
-                'if', 'grep', '-q', 'ceph-fuse', '/etc/mtab', run.Raw(';'),
-                'then',
-                'grep', 'ceph-fuse', '/etc/mtab', run.Raw('|'),
-                'grep', '-o', " /.* fuse", run.Raw('|'),
-                'grep', '-o', "/.* ", run.Raw('|'),
-                'xargs', 'sudo', 'fusermount', '-u', run.Raw(';'),
-                'fi',
-                run.Raw(';'),
-                'sudo',
-                'killall',
-                '--quiet',
-                'ceph-mon',
-                'ceph-osd',
-                'ceph-mds',
-                'ceph-fuse',
-                'ceph-disk',
-                'radosgw',
-                'ceph_test_rados',
-                'rados',
-                'apache2',
-                run.Raw('||'),
-                'true',  # ignore errors from ceph binaries not being found
-            ],
-            wait=False,
-        )
-        nodes[remote.name] = proc
-
-    for name, proc in nodes.iteritems():
-        log.info('Waiting for %s to finish shutdowns...', name)
-        proc.exitstatus.get()
-
-
-def find_kernel_mounts(ctx):
-    nodes = {}
-    log.info('Looking for kernel mounts to handle...')
-    for remote in ctx.cluster.remotes.iterkeys():
-        proc = remote.run(
-            args=[
-                'grep', '-q', ' ceph ', '/etc/mtab',
-                run.Raw('||'),
-                'grep', '-q', '^/dev/rbd', '/etc/mtab',
-            ],
-            wait=False,
-        )
-        nodes[remote] = proc
-    kernel_mounts = list()
-    for remote, proc in nodes.iteritems():
-        try:
-            proc.exitstatus.get()
-            log.debug('kernel mount exists on %s', remote.name)
-            kernel_mounts.append(remote)
-        except run.CommandFailedError:  # no mounts!
-            log.debug('no kernel mount on %s', remote.name)
-
-    return kernel_mounts
-
-
-def remove_kernel_mounts(ctx, kernel_mounts):
-    """
-    properly we should be able to just do a forced unmount,
-    but that doesn't seem to be working, so you should reboot instead
-    """
-    nodes = {}
-    for remote in kernel_mounts:
-        log.info('clearing kernel mount from %s', remote.name)
-        proc = remote.run(
-            args=[
-                'grep', 'ceph', '/etc/mtab', run.Raw('|'),
-                'grep', '-o', "on /.* type", run.Raw('|'),
-                'grep', '-o', "/.* ", run.Raw('|'),
-                'xargs', '-r',
-                'sudo', 'umount', '-f', run.Raw(';'),
-                'fi'
-            ],
-            wait=False
-        )
-        nodes[remote] = proc
-
-    for remote, proc in nodes:
-        proc.exitstatus.get()
-
-
-def remove_osd_mounts(ctx):
-    """
-    unmount any osd data mounts (scratch disks)
-    """
-    ctx.cluster.run(
-        args=[
-            'grep',
-            '/var/lib/ceph/osd/',
-            '/etc/mtab',
-            run.Raw('|'),
-            'awk', '{print $2}', run.Raw('|'),
-            'xargs', '-r',
-            'sudo', 'umount', run.Raw(';'),
-            'true'
-        ],
-    )
-
-
-def remove_osd_tmpfs(ctx):
-    """
-    unmount tmpfs mounts
-    """
-    ctx.cluster.run(
-        args=[
-            'egrep', 'tmpfs\s+/mnt', '/etc/mtab', run.Raw('|'),
-            'awk', '{print $2}', run.Raw('|'),
-            'xargs', '-r',
-            'sudo', 'umount', run.Raw(';'),
-            'true'
-        ],
-    )
-
-
-def reboot(ctx, remotes):
-    nodes = {}
-    for remote in remotes:
-        log.info('rebooting %s', remote.name)
-        try:
-            proc = remote.run(  # note use of -n to force a no-sync reboot
-                args=[
-                    'sync',
-                    run.Raw('&'),
-                    'sleep', '5',
-                    run.Raw(';'),
-                    'sudo', 'reboot', '-f', '-n'
-                    ],
-                wait=False
-                )
-        except Exception:
-            log.exception('ignoring exception during reboot command')
-        nodes[remote] = proc
-        # we just ignore these procs because reboot -f doesn't actually
-        # send anything back to the ssh client!
-        # for remote, proc in nodes.iteritems():
-        # proc.exitstatus.get()
-    if remotes:
-        log.info('waiting for nodes to reboot')
-        time.sleep(8)  # if we try and reconnect too quickly, it succeeds!
-        reconnect(ctx, 480)  # allow 8 minutes for the reboots
-
-
-def reset_syslog_dir(ctx):
-    nodes = {}
-    for remote in ctx.cluster.remotes.iterkeys():
-        proc = remote.run(
-            args=[
-                'if', 'test', '-e', '/etc/rsyslog.d/80-cephtest.conf',
-                run.Raw(';'),
-                'then',
-                'sudo', 'rm', '-f', '--', '/etc/rsyslog.d/80-cephtest.conf',
-                run.Raw('&&'),
-                'sudo', 'service', 'rsyslog', 'restart',
-                run.Raw(';'),
-                'fi',
-                run.Raw(';'),
-            ],
-            wait=False,
-        )
-        nodes[remote.name] = proc
-
-    for name, proc in nodes.iteritems():
-        log.info('Waiting for %s to restart syslog...', name)
-        proc.exitstatus.get()
-
-
-def dpkg_configure(ctx):
-    nodes = {}
-    for remote in ctx.cluster.remotes.iterkeys():
-        proc = remote.run(
-            args=[
-                'sudo', 'dpkg', '--configure', '-a',
-                run.Raw('&&'),
-                'sudo', 'apt-get', '-f', 'install',
-                run.Raw('||'),
-                ':',
-            ],
-            wait=False,
-        )
-        nodes[remote.name] = proc
-
-    for name, proc in nodes.iteritems():
-        log.info(
-            'Waiting for %s to dpkg --configure -a and apt-get -f install...',
-            name)
-        proc.exitstatus.get()
-
-
-def remove_installed_packages(ctx):
-
-    dpkg_configure(ctx)
-    config = {'project': 'ceph'}
-    install_task.remove_packages(
-        ctx,
-        config,
-        {"deb": install_task.deb_packages['ceph'] + ['salt-common', 'salt-minion'],
-         "rpm": install_task.rpm_packages['ceph']})
-    install_task.remove_sources(ctx, config)
-    install_task.purge_data(ctx)
-
-
-def remove_testing_tree(ctx):
-    nodes = {}
-    for remote in ctx.cluster.remotes.iterkeys():
-        proc = remote.run(
-            args=[
-                'sudo', 'rm', '-rf', get_testdir(ctx),
-                # just for old time's sake
-                run.Raw('&&'),
-                'sudo', 'rm', '-rf', '/tmp/cephtest',
-                run.Raw('&&'),
-                'sudo', 'rm', '-rf', '/home/ubuntu/cephtest',
-                run.Raw('&&'),
-                'sudo', 'rm', '-rf', '/etc/ceph',
-            ],
-            wait=False,
-        )
-        nodes[remote.name] = proc
-
-    for name, proc in nodes.iteritems():
-        log.info('Waiting for %s to clear filesystem...', name)
-        proc.exitstatus.get()
-
-
-def synch_clocks(remotes):
-    nodes = {}
-    for remote in remotes:
-        proc = remote.run(
-            args=[
-                'sudo', 'service', 'ntp', 'stop',
-                run.Raw('&&'),
-                'sudo', 'ntpdate-debian',
-                run.Raw('&&'),
-                'sudo', 'hwclock', '--systohc', '--utc',
-                run.Raw('&&'),
-                'sudo', 'service', 'ntp', 'start',
-                run.Raw('||'),
-                'true',    # ignore errors; we may be racing with ntpd startup
-            ],
-            wait=False,
-        )
-        nodes[remote.name] = proc
-    for name, proc in nodes.iteritems():
-        log.info('Waiting for clock to synchronize on %s...', name)
-        proc.exitstatus.get()
-
-
-def main(ctx):
-    if ctx.verbose:
-        teuthology.log.setLevel(logging.DEBUG)
-
-    info = {}
-    if ctx.archive:
-        ctx.config = config_file(ctx.archive + '/config.yaml')
-        ifn = os.path.join(ctx.archive, 'info.yaml')
-        if os.path.exists(ifn):
-            with file(ifn, 'r') as fd:
-                info = yaml.load(fd.read())
-        if not ctx.pid:
-            ctx.pid = info.get('pid')
-            if not ctx.pid:
-                ctx.pid = int(open(ctx.archive + '/pid').read().rstrip('\n'))
-        if not ctx.owner:
-            ctx.owner = info.get('owner')
-            if not ctx.owner:
-                ctx.owner = open(ctx.archive + '/owner').read().rstrip('\n')
-
-    read_config(ctx)
-
-    log.info(
-        '\n  '.join(
-            ['targets:', ] + yaml.safe_dump(
-                ctx.config['targets'],
-                default_flow_style=False).splitlines()))
-
-    if ctx.owner is None:
-        ctx.owner = get_user()
-
-    if ctx.pid:
-        if ctx.archive:
-            log.info('Killing teuthology process at pid %d', ctx.pid)
-            os.system('grep -q %s /proc/%d/cmdline && sudo kill %d' % (
-                ctx.archive,
-                ctx.pid,
-                ctx.pid))
-        else:
-            subprocess.check_call(["kill", "-9", str(ctx.pid)])
-
-    nuke(ctx, ctx.unlock, ctx.synch_clocks, ctx.reboot_all, ctx.noipmi)
-
-
-def nuke(ctx, should_unlock, sync_clocks=True, reboot_all=True, noipmi=False):
-    total_unnuked = {}
-    targets = dict(ctx.config['targets'])
-    if ctx.name:
-        log.info('Checking targets against current locks')
-        locks = list_locks()
-        # Remove targets who's description doesn't match archive name.
-        for lock in locks:
-            for target in targets:
-                if target == lock['name']:
-                    if ctx.name not in lock['description']:
-                        del ctx.config['targets'][lock['name']]
-                        log.info(
-                            "Not nuking %s because description doesn't match",
-                            lock['name'])
-    with parallel() as p:
-        for target, hostkey in ctx.config['targets'].iteritems():
-            p.spawn(
-                nuke_one,
-                ctx,
-                {target: hostkey},
-                should_unlock,
-                sync_clocks,
-                reboot_all,
-                ctx.config.get('check-locks', True),
-                noipmi,
-            )
-        for unnuked in p:
-            if unnuked:
-                total_unnuked.update(unnuked)
-    if total_unnuked:
-        log.error('Could not nuke the following targets:\n' +
-                  '\n  '.join(['targets:', ] +
-                              yaml.safe_dump(
-                                  total_unnuked,
-                                  default_flow_style=False).splitlines()))
-
-
-def nuke_one(ctx, targets, should_unlock, synch_clocks, reboot_all,
-             check_locks, noipmi):
-    ret = None
-    ctx = argparse.Namespace(
-        config=dict(targets=targets),
-        owner=ctx.owner,
-        check_locks=check_locks,
-        synch_clocks=synch_clocks,
-        reboot_all=reboot_all,
-        teuthology_config=ctx.teuthology_config,
-        name=ctx.name,
-        noipmi=noipmi,
-    )
-    try:
-        nuke_helper(ctx)
-    except Exception:
-        log.exception('Could not nuke all targets in %s' % targets)
-        # not re-raising the so that parallel calls aren't killed
-        ret = targets
-    else:
-        if should_unlock:
-            for target in targets.keys():
-                unlock_one(ctx, target, ctx.owner)
-    return ret
-
-
-def nuke_helper(ctx):
-    # ensure node is up with ipmi
-
-    (target,) = ctx.config['targets'].keys()
-    host = target.split('@')[-1]
-    shortname = host.split('.')[0]
-    if 'vpm' in shortname:
-        return
-    log.debug('shortname: %s' % shortname)
-    log.debug('{ctx}'.format(ctx=ctx))
-    if not ctx.noipmi and 'ipmi_user' in ctx.teuthology_config:
-        console = orchestra.remote.getRemoteConsole(
-            name=host,
-            ipmiuser=ctx.teuthology_config['ipmi_user'],
-            ipmipass=ctx.teuthology_config['ipmi_password'],
-            ipmidomain=ctx.teuthology_config['ipmi_domain'])
-        cname = '{host}.{domain}'.format(
-            host=shortname,
-            domain=ctx.teuthology_config['ipmi_domain'])
-        log.info('checking console status of %s' % cname)
-        if not console.check_status():
-            # not powered on or can't get IPMI status.  Try to power on
-            console.power_on()
-            # try to get status again, waiting for login prompt this time
-            log.info('checking console status of %s' % cname)
-            if not console.check_status(100):
-                log.error('Failed to get console status for %s, ' +
-                          'disabling console...' % cname)
-            log.info('console ready on %s' % cname)
-        else:
-            log.info('console ready on %s' % cname)
-
-    if ctx.check_locks:
-        check_lock(ctx, None)
-    connect(ctx, None)
-
-    log.info('Unmount ceph-fuse and killing daemons...')
-    shutdown_daemons(ctx)
-    log.info('All daemons killed.')
-
-    need_reboot = find_kernel_mounts(ctx)
-
-    # no need to unmount anything if we're rebooting
-    if ctx.reboot_all:
-        need_reboot = ctx.cluster.remotes.keys()
-    else:
-        log.info('Unmount any osd data directories...')
-        remove_osd_mounts(ctx)
-        log.info('Unmount any osd tmpfs dirs...')
-        remove_osd_tmpfs(ctx)
-        # log.info('Dealing with any kernel mounts...')
-        # remove_kernel_mounts(ctx, need_reboot)
-
-    if need_reboot:
-        reboot(ctx, need_reboot)
-    log.info('All kernel mounts gone.')
-
-    log.info('Synchronizing clocks...')
-    if ctx.synch_clocks:
-        need_reboot = ctx.cluster.remotes.keys()
-    synch_clocks(need_reboot)
-
-    log.info('Making sure firmware.git is not locked...')
-    ctx.cluster.run(args=['sudo', 'rm', '-f',
-                          '/lib/firmware/updates/.git/index.lock', ])
-
-    log.info('Reseting syslog output locations...')
-    reset_syslog_dir(ctx)
-    log.info('Clearing filesystem of test data...')
-    remove_testing_tree(ctx)
-    log.info('Filesystem Cleared.')
-    remove_installed_packages(ctx)
-    log.info('Installed packages removed.')
diff --git a/teuthology/orchestra/__init__.py b/teuthology/orchestra/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/teuthology/orchestra/cluster.py b/teuthology/orchestra/cluster.py
deleted file mode 100644 (file)
index 7d96dc1..0000000
+++ /dev/null
@@ -1,135 +0,0 @@
-"""
-Cluster definition
-part of context, Cluster is used to save connection information.
-"""
-import teuthology.misc
-
-
-class Cluster(object):
-    """
-    Manage SSH connections to a cluster of machines.
-    """
-
-    def __init__(self, remotes=None):
-        """
-        :param remotes: A sequence of 2-tuples of this format:
-                            (Remote, [role_1, role_2 ...])
-        """
-        self.remotes = {}
-        if remotes is not None:
-            for remote, roles in remotes:
-                self.add(remote, roles)
-
-    def __repr__(self):
-        remotes = [(k, v) for k, v in self.remotes.items()]
-        remotes.sort(key=lambda tup: tup[0].name)
-        remotes = '[' + ', '.join('[{remote!r}, {roles!r}]'.format(
-            remote=k, roles=v) for k, v in remotes) + ']'
-        return '{classname}(remotes={remotes})'.format(
-            classname=self.__class__.__name__,
-            remotes=remotes,
-            )
-
-    def __str__(self):
-        remotes = list(self.remotes.items())
-        remotes.sort(key=lambda tup: tup[0].name)
-        remotes = ((k, ','.join(v)) for k, v in remotes)
-        remotes = ('{k}[{v}]'.format(k=k, v=v) for k, v in remotes)
-        return ' '.join(remotes)
-
-    def add(self, remote, roles):
-        """
-        Add roles to the list of remotes.
-        """
-        if remote in self.remotes:
-            raise RuntimeError(
-                'Remote {new!r} already found in remotes: {old!r}'.format(
-                    new=remote,
-                    old=self.remotes[remote],
-                    ),
-                )
-        self.remotes[remote] = list(roles)
-
-    def run(self, **kwargs):
-        """
-        Run a command on all the nodes in this cluster.
-
-        Goes through nodes in alphabetical order.
-
-        If you don't specify wait=False, this will be sequentially.
-
-        Returns a list of `RemoteProcess`.
-        """
-        remotes = sorted(self.remotes.iterkeys(), key=lambda rem: rem.name)
-        return [remote.run(**kwargs) for remote in remotes]
-
-    def write_file(self, file_name, content, sudo=False, perms=None):
-        """
-        Write text to a file on each node.
-
-        :param file_name: file name
-        :param content: file content
-        :param sudo: use sudo
-        :param perms: file permissions (passed to chmod) ONLY if sudo is True
-        """
-        remotes = sorted(self.remotes.iterkeys(), key=lambda rem: rem.name)
-        for remote in remotes:
-            if sudo:
-                teuthology.misc.sudo_write_file(remote, file_name, content, perms)
-            else:
-                if perms is not None:
-                    raise ValueError("To specify perms, sudo must be True")
-                teuthology.misc.write_file(remote, file_name, content, perms)
-
-    def only(self, *roles):
-        """
-        Return a cluster with only the remotes that have all of given roles.
-
-        For roles given as strings, they are matched against the roles
-        on a remote, and the remote passes the check only if all the
-        roles listed are present.
-
-        Argument can be callable, and will act as a match on roles of
-        the remote. The matcher will be evaluated one role at a time,
-        but a match on any role is good enough. Note that this is
-        subtly diffent from the behavior of string roles, but is
-        logical if you consider a callable to be similar to passing a
-        non-string object with an `__eq__` method.
-
-        For example::
-
-           web = mycluster.only(lambda role: role.startswith('web-'))
-        """
-        c = self.__class__()
-        want = frozenset(r for r in roles if not callable(r))
-        matchers = [r for r in roles if callable(r)]
-
-        for remote, has_roles in self.remotes.iteritems():
-            # strings given as roles must all match
-            if frozenset(has_roles) & want != want:
-                # not a match
-                continue
-
-            # every matcher given must match at least one role
-            if not all(
-                any(matcher(role) for role in has_roles)
-                for matcher in matchers
-                ):
-                continue
-
-            c.add(remote, has_roles)
-
-        return c
-
-    def exclude(self, *roles):
-        """
-        Return a cluster *without* remotes that have all of given roles.
-
-        This is the opposite of `only`.
-        """
-        matches = self.only(*roles)
-        c = self.__class__()
-        for remote, has_roles in self.remotes.iteritems():
-            if remote not in matches.remotes:
-                c.add(remote, has_roles)
-        return c
diff --git a/teuthology/orchestra/connection.py b/teuthology/orchestra/connection.py
deleted file mode 100644 (file)
index 9317d69..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-"""
-Connection utilities
-"""
-import base64
-import paramiko
-import os
-from ..config import config
-
-
-def split_user(user_at_host):
-    """
-    break apart user@host fields into user and host.
-    """
-    try:
-        user, host = user_at_host.rsplit('@', 1)
-    except ValueError:
-        user, host = None, user_at_host
-    assert user != '', \
-        "Bad input to split_user: {user_at_host!r}".format(user_at_host=user_at_host)
-    return user, host
-
-
-def create_key(keytype, key):
-    """
-    Create an ssh-rsa or ssh-dss key.
-    """
-    if keytype == 'ssh-rsa':
-        return paramiko.rsakey.RSAKey(data=base64.decodestring(key))
-    elif keytype == 'ssh-dss':
-        return paramiko.dsskey.DSSKey(data=base64.decodestring(key))
-    else:
-        raise ValueError('keytype must be ssh-rsa or ssh-dsa')
-
-
-def connect(user_at_host, host_key=None, keep_alive=False,
-            _SSHClient=None, _create_key=None):
-    """
-    ssh connection routine.
-
-    :param user_at_host: user@host
-    :param host_key: ssh key
-    :param keep_alive: keep_alive indicator
-    :param _SSHClient: client, default is paramiko ssh client
-    :param _create_key: routine to create a key (defaults to local reate_key)
-    :return: ssh connection.
-    """
-    user, host = split_user(user_at_host)
-    if _SSHClient is None:
-        _SSHClient = paramiko.SSHClient
-    ssh = _SSHClient()
-
-    if _create_key is None:
-        _create_key = create_key
-
-    if host_key is None:
-        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
-        if config.verify_host_keys is True:
-            ssh.load_system_host_keys()
-
-    else:
-        keytype, key = host_key.split(' ', 1)
-        ssh.get_host_keys().add(
-            hostname=host,
-            keytype=keytype,
-            key=_create_key(keytype, key)
-            )
-
-    connect_args = dict(
-        hostname=host,
-        username=user,
-        timeout=60
-    )
-
-    ssh_config_path = os.path.expanduser("~/.ssh/config")
-    if os.path.exists(ssh_config_path):
-        ssh_config = paramiko.SSHConfig()
-        ssh_config.parse(open(ssh_config_path))
-        opts = ssh_config.lookup(host)
-        opts_to_args = {
-            'identityfile': 'key_filename',
-            'host': 'hostname',
-            'user': 'username'
-        }
-        for opt_name, arg_name in opts_to_args.items():
-            if opt_name in opts:
-                connect_args[arg_name] = opts[opt_name]
-
-    # just let the exceptions bubble up to caller
-    ssh.connect(**connect_args)
-    ssh.get_transport().set_keepalive(keep_alive)
-    return ssh
diff --git a/teuthology/orchestra/monkey.py b/teuthology/orchestra/monkey.py
deleted file mode 100644 (file)
index cd6104b..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-"""
-Monkey patches (paramiko support)
-"""
-import logging
-
-log = logging.getLogger(__name__)
-
-def patch_001_paramiko_deprecation():
-    """
-    Silence an an unhelpful Deprecation Warning triggered by Paramiko.
-
-    Not strictly a monkeypatch.
-    """
-    import warnings
-    warnings.filterwarnings(
-        category=DeprecationWarning,
-        message='This application uses RandomPool,',
-        action='ignore',
-        )
-
-
-def patch_100_paramiko_log():
-    """
-    Silence some noise paramiko likes to log.
-
-    Not strictly a monkeypatch.
-    """
-    logging.getLogger('paramiko.transport').setLevel(logging.WARNING)
-
-
-def patch_100_logger_getChild():
-    """
-    Imitate Python 2.7 feature Logger.getChild.
-    """
-    import logging
-    if not hasattr(logging.Logger, 'getChild'):
-        def getChild(self, name):
-            return logging.getLogger('.'.join([self.name, name]))
-        logging.Logger.getChild = getChild
-
-
-def patch_all():
-    """
-    Run all the patch_* functions in this module.
-    """
-    monkeys = [(k, v) for (k, v) in globals().iteritems() if k.startswith('patch_') and k != 'patch_all']
-    monkeys.sort()
-    for k, v in monkeys:
-        log.debug('Patching %s', k)
-        v()
diff --git a/teuthology/orchestra/remote.py b/teuthology/orchestra/remote.py
deleted file mode 100644 (file)
index 79dbc2d..0000000
+++ /dev/null
@@ -1,397 +0,0 @@
-"""
-Support for paramiko remote objects.
-"""
-from . import run
-import connection
-from teuthology import misc
-import time
-import pexpect
-import re
-import logging
-from teuthology import lockstatus as ls
-
-try:
-    import libvirt
-except ImportError:
-    libvirt = None
-
-log = logging.getLogger(__name__)
-
-
-class Remote(object):
-
-    """
-    A connection to a remote host.
-
-    This is a higher-level wrapper around Paramiko's `SSHClient`.
-    """
-
-    # for unit tests to hook into
-    _runner = staticmethod(run.run)
-
-    def __init__(self, name, ssh=None, shortname=None, console=None,
-                 host_key=None, keep_alive=True):
-        self.name = name
-        self._shortname = shortname
-        self.host_key = host_key
-        self.keep_alive = keep_alive
-        self.console = console
-        self.ssh = ssh or self.connect()
-
-    def connect(self):
-        self.ssh = connection.connect(user_at_host=self.name,
-                                      host_key=self.host_key,
-                                      keep_alive=self.keep_alive)
-        return self.ssh
-
-    def reconnect(self):
-        """
-        Attempts to re-establish connection. Returns True for success; False
-        for failure.
-        """
-        self.ssh.close()
-        try:
-            self.ssh = self.connect()
-            return self.is_online
-        except Exception as e:
-            log.debug(e)
-            return False
-
-    @property
-    def shortname(self):
-        """
-        shortname decorator
-        """
-        name = self._shortname
-        if name is None:
-            name = self.name
-        return name
-
-    @property
-    def hostname(self):
-        return self.name.split('@')[1]
-
-    @property
-    def is_online(self):
-        if self.ssh is None:
-            return False
-        try:
-            self.run(args="echo online")
-        except Exception:
-            return False
-        return self.ssh.get_transport().is_active()
-
-    @property
-    def system_type(self):
-        """
-        System type decorator
-        """
-        return misc.get_system_type(self)
-
-    def __str__(self):
-        return self.shortname
-
-    def __repr__(self):
-        return '{classname}(name={name!r})'.format(
-            classname=self.__class__.__name__,
-            name=self.name,
-            )
-
-    def run(self, **kwargs):
-        """
-        This calls `orchestra.run.run` with our SSH client.
-
-        TODO refactor to move run.run here?
-        """
-        r = self._runner(client=self.ssh, **kwargs)
-        r.remote = self
-        return r
-
-
-def getShortName(name):
-    """
-    Extract the name portion from remote name strings.
-    """
-    hn = name.split('@')[-1]
-    p = re.compile('([^.]+)\.?.*')
-    return p.match(hn).groups()[0]
-
-
-class PhysicalConsole():
-    """
-    Physical Console (set from getRemoteConsole)
-    """
-    def __init__(self, name, ipmiuser, ipmipass, ipmidomain, logfile=None,
-                 timeout=20):
-        self.name = name
-        self.shortname = getShortName(name)
-        self.timeout = timeout
-        self.logfile = None
-        self.ipmiuser = ipmiuser
-        self.ipmipass = ipmipass
-        self.ipmidomain = ipmidomain
-
-    def _exec(self, cmd):
-        """
-        Run the cmd specified using ipmitool.
-        """
-        if not self.ipmiuser or not self.ipmipass or not self.ipmidomain:
-            log.error('Must set ipmi_user, ipmi_password, and ipmi_domain in .teuthology.yaml')  # noqa
-        log.debug('pexpect command: ipmitool -H {s}.{dn} -I lanplus -U {ipmiuser} -P {ipmipass} {cmd}'.format(  # noqa
-                  cmd=cmd,
-                  s=self.shortname,
-                  dn=self.ipmidomain,
-                  ipmiuser=self.ipmiuser,
-                  ipmipass=self.ipmipass))
-
-        child = pexpect.spawn('ipmitool -H {s}.{dn} -I lanplus -U {ipmiuser} -P {ipmipass} {cmd}'.format(  # noqa
-                              cmd=cmd,
-                              s=self.shortname,
-                              dn=self.ipmidomain,
-                              ipmiuser=self.ipmiuser,
-                              ipmipass=self.ipmipass))
-        if self.logfile:
-            child.logfile = self.logfile
-        return child
-
-    def _exit_session(self, child, timeout=None):
-        child.send('~.')
-        t = timeout
-        if not t:
-            t = self.timeout
-        r = child.expect(
-            ['terminated ipmitool', pexpect.TIMEOUT, pexpect.EOF], timeout=t)
-        if r != 0:
-            self._exec('sol deactivate')
-
-    def _wait_for_login(self, timeout=None, attempts=6):
-        """
-        Wait for login.  Retry if timeouts occur on commands.
-        """
-        log.debug('Waiting for login prompt on {s}'.format(s=self.shortname))
-        # wait for login prompt to indicate boot completed
-        t = timeout
-        if not t:
-            t = self.timeout
-        for i in range(0, attempts):
-            start = time.time()
-            while time.time() - start < t:
-                child = self._exec('sol activate')
-                child.send('\n')
-                log.debug('expect: {s} login'.format(s=self.shortname))
-                r = child.expect(
-                    ['{s} login: '.format(s=self.shortname),
-                     pexpect.TIMEOUT,
-                     pexpect.EOF],
-                    timeout=(t - (time.time() - start)))
-                log.debug('expect before: {b}'.format(b=child.before))
-                log.debug('expect after: {a}'.format(a=child.after))
-
-                self._exit_session(child)
-                if r == 0:
-                    return
-
-    def check_power(self, state, timeout=None):
-        """
-        Check power.  Retry if EOF encountered on power check read.
-        """
-        total_timeout = timeout
-        if not total_timeout:
-            total_timeout = self.timeout
-        t = 1
-        total = t
-        ta = time.time()
-        while total < total_timeout:
-            c = self._exec('power status')
-            r = c.expect(['Chassis Power is {s}'.format(
-                s=state), pexpect.EOF, pexpect.TIMEOUT], timeout=t)
-            tb = time.time()
-            if r == 0:
-                return True
-            elif r == 1:
-                # keep trying if EOF is reached, first sleep for remaining
-                # timeout interval
-                if tb - ta < t:
-                    time.sleep(t - (tb - ta))
-            # go around again if EOF or TIMEOUT
-            ta = tb
-            t *= 2
-            total += t
-        return False
-
-    def check_status(self, timeout=None):
-        """
-        Check status.  Returns True if console is at login prompt
-        """
-        try:
-            # check for login prompt at console
-            self._wait_for_login(timeout)
-            return True
-        except Exception as e:
-            log.info('Failed to get ipmi console status for {s}: {e}'.format(
-                s=self.shortname, e=e))
-            return False
-
-    def power_cycle(self):
-        """
-        Power cycle and wait for login.
-        """
-        log.info('Power cycling {s}'.format(s=self.shortname))
-        child = self._exec('power cycle')
-        child.expect('Chassis Power Control: Cycle', timeout=self.timeout)
-        self._wait_for_login()
-        log.info('Power cycle for {s} completed'.format(s=self.shortname))
-
-    def hard_reset(self):
-        """
-        Perform physical hard reset.  Retry if EOF returned from read
-        and wait for login when complete.
-        """
-        log.info('Performing hard reset of {s}'.format(s=self.shortname))
-        start = time.time()
-        while time.time() - start < self.timeout:
-            child = self._exec('power reset')
-            r = child.expect(['Chassis Power Control: Reset', pexpect.EOF],
-                             timeout=self.timeout)
-            if r == 0:
-                break
-        self._wait_for_login()
-        log.info('Hard reset for {s} completed'.format(s=self.shortname))
-
-    def power_on(self):
-        """
-        Physical power on.  Loop checking cmd return.
-        """
-        log.info('Power on {s}'.format(s=self.shortname))
-        start = time.time()
-        while time.time() - start < self.timeout:
-            child = self._exec('power on')
-            r = child.expect(['Chassis Power Control: Up/On', pexpect.EOF],
-                             timeout=self.timeout)
-            if r == 0:
-                break
-        if not self.check_power('on'):
-            log.error('Failed to power on {s}'.format(s=self.shortname))
-        log.info('Power on for {s} completed'.format(s=self.shortname))
-
-    def power_off(self):
-        """
-        Physical power off.  Loop checking cmd return.
-        """
-        log.info('Power off {s}'.format(s=self.shortname))
-        start = time.time()
-        while time.time() - start < self.timeout:
-            child = self._exec('power off')
-            r = child.expect(['Chassis Power Control: Down/Off', pexpect.EOF],
-                             timeout=self.timeout)
-            if r == 0:
-                break
-        if not self.check_power('off', 60):
-            log.error('Failed to power off {s}'.format(s=self.shortname))
-        log.info('Power off for {s} completed'.format(s=self.shortname))
-
-    def power_off_for_interval(self, interval=30):
-        """
-        Physical power off for an interval. Wait for login when complete.
-
-        :param interval: Length of power-off period.
-        """
-        log.info('Power off {s} for {i} seconds'.format(
-            s=self.shortname, i=interval))
-        child = self._exec('power off')
-        child.expect('Chassis Power Control: Down/Off', timeout=self.timeout)
-
-        time.sleep(interval)
-
-        child = self._exec('power on')
-        child.expect('Chassis Power Control: Up/On', timeout=self.timeout)
-        self._wait_for_login()
-        log.info('Power off for {i} seconds completed'.format(
-            s=self.shortname, i=interval))
-
-
-class VirtualConsole():
-    """
-    Virtual Console (set from getRemoteConsole)
-    """
-    def __init__(self, name, ipmiuser, ipmipass, ipmidomain, logfile=None,
-                 timeout=20):
-        if libvirt is None:
-            raise RuntimeError("libvirt not found")
-
-        self.shortname = getShortName(name)
-        status_info = ls.get_status('', self.shortname)
-        try:
-            phys_host = status_info['vpshost']
-        except TypeError:
-            return
-        self.connection = libvirt.open(phys_host)
-        for i in self.connection.listDomainsID():
-            d = self.connection.lookupByID(i)
-            if d.name() == self.shortname:
-                self.vm_domain = d
-                break
-        return
-
-    def check_power(self, state, timeout=None):
-        """
-        Return true if vm domain state indicates power is on.
-        """
-        return self.vm_domain.info[0] in [libvirt.VIR_DOMAIN_RUNNING,
-                                          libvirt.VIR_DOMAIN_BLOCKED,
-                                          libvirt.VIR_DOMAIN_PAUSED]
-
-    def check_status(self, timeout=None):
-        """
-        Return true if running.
-        """
-        return self.vm_domain.info()[0] == libvirt.VIR_DOMAIN_RUNNING
-
-    def power_cycle(self):
-        """
-        Simiulate virtual machine power cycle
-        """
-        self.vm_domain.info().destroy()
-        self.vm_domain.info().create()
-
-    def hard_reset(self):
-        """
-        Simiulate hard reset
-        """
-        self.vm_domain.info().destroy()
-
-    def power_on(self):
-        """
-        Simiulate power on
-        """
-        self.vm_domain.info().create()
-
-    def power_off(self):
-        """
-        Simiulate power off
-        """
-        self.vm_domain.info().destroy()
-
-    def power_off_for_interval(self, interval=30):
-        """
-        Simiulate power off for an interval.
-        """
-        log.info('Power off {s} for {i} seconds'.format(
-            s=self.shortname, i=interval))
-        self.vm_domain.info().destroy()
-        time.sleep(interval)
-        self.vm_domain.info().create()
-        log.info('Power off for {i} seconds completed'.format(
-            s=self.shortname, i=interval))
-
-
-def getRemoteConsole(name, ipmiuser, ipmipass, ipmidomain, logfile=None,
-                     timeout=20):
-    """
-    Return either VirtualConsole or PhysicalConsole depending on name.
-    """
-    if misc.is_vm(name):
-        return VirtualConsole(name, ipmiuser, ipmipass, ipmidomain, logfile,
-                              timeout)
-    return PhysicalConsole(name, ipmiuser, ipmipass, ipmidomain, logfile,
-                           timeout)
diff --git a/teuthology/orchestra/run.py b/teuthology/orchestra/run.py
deleted file mode 100644 (file)
index f7e4d48..0000000
+++ /dev/null
@@ -1,356 +0,0 @@
-"""
-Paramiko run support
-"""
-from cStringIO import StringIO
-
-import gevent
-import gevent.event
-import pipes
-import logging
-import shutil
-
-from ..contextutil import safe_while
-
-log = logging.getLogger(__name__)
-
-class RemoteProcess(object):
-    """
-    Remote process object used to keep track of attributes of a process.
-    """
-    __slots__ = [
-        'command', 'stdin', 'stdout', 'stderr', 'exitstatus', 'exited',
-        # for orchestra.remote.Remote to place a backreference
-        'remote',
-        ]
-    def __init__(self, command, stdin, stdout, stderr, exitstatus, exited):
-        self.command = command
-        self.stdin = stdin
-        self.stdout = stdout
-        self.stderr = stderr
-        self.exitstatus = exitstatus
-        self.exited = exited
-
-class Raw(object):
-    """
-    Raw objects are passed to remote objects and are not processed locally.
-    """
-    def __init__(self, value):
-        self.value = value
-
-    def __repr__(self):
-        return '{cls}({value!r})'.format(
-            cls=self.__class__.__name__,
-            value=self.value,
-            )
-
-def quote(args):
-    """
-    Internal quote wrapper.
-    """
-    def _quote(args):
-        """
-        Handle quoted string, testing for raw charaters.
-        """
-        for a in args:
-            if isinstance(a, Raw):
-                yield a.value
-            else:
-                yield pipes.quote(a)
-    return ' '.join(_quote(args))
-
-
-def execute(client, args):
-    """
-    Execute a command remotely.
-
-    Caller needs to handle stdin etc.
-
-    :param client: SSHConnection to run the command with
-    :param args: command to run
-    :type args: string or list of strings
-
-    Returns a RemoteProcess, where exitstatus is a callable that will
-    block until the exit status is available.
-    """
-    if isinstance(args, basestring):
-        cmd = args
-    else:
-        cmd = quote(args)
-    (host, port) = client.get_transport().getpeername()
-    log.debug('Running [{h}]: {cmd!r}'.format(h=host, cmd=cmd))
-    (in_, out, err) = client.exec_command(cmd)
-
-    def get_exitstatus():
-        """
-        Get exit status.
-
-        When -1 on connection loss *and* signals occur, this
-        maps to more pythonic None
-        """
-        status = out.channel.recv_exit_status()
-        if status == -1:
-            status = None
-        return status
-
-    def exitstatus_ready():
-        """
-        out.channel exit wrapper.
-        """
-        return out.channel.exit_status_ready()
-
-    r = RemoteProcess(
-        command=cmd,
-        stdin=in_,
-        stdout=out,
-        stderr=err,
-        # this is a callable that will block until the status is
-        # available
-        exitstatus=get_exitstatus,
-        exited=exitstatus_ready,
-        )
-    return r
-
-def copy_to_log(f, logger, host, loglevel=logging.INFO):
-    """
-    Interface to older xreadlines api.
-    """
-    # i can't seem to get fudge to fake an iterable, so using this old
-    # api for now
-    for line in f.xreadlines():
-        line = line.rstrip()
-        logger.log(loglevel, '[' + host + ']: ' + line)
-
-def copy_and_close(src, fdst):
-    """
-    copyfileobj call wrapper.
-    """
-    if src is not None:
-        if isinstance(src, basestring):
-            src = StringIO(src)
-        shutil.copyfileobj(src, fdst)
-    fdst.close()
-
-def copy_file_to(f, dst, host):
-    """
-    Copy file
-    :param f: file to be copied.
-    :param dst: destination
-    :param host: original host location
-    """
-    if hasattr(dst, 'log'):
-        # looks like a Logger to me; not using isinstance to make life
-        # easier for unit tests
-        handler = copy_to_log
-        return handler(f, dst, host)
-    else:
-        handler = shutil.copyfileobj
-        return handler(f, dst)
-
-
-class CommandFailedError(Exception):
-    """
-    Exception thrown on command failure
-    """
-    def __init__(self, command, exitstatus, node=None):
-        self.command = command
-        self.exitstatus = exitstatus
-        self.node = node
-
-    def __str__(self):
-        return "Command failed on {node} with status {status}: {command!r}".format(
-            node=self.node,
-            status=self.exitstatus,
-            command=self.command,
-            )
-
-
-class CommandCrashedError(Exception):
-    """
-    Exception thrown on crash
-    """
-    def __init__(self, command):
-        self.command = command
-
-    def __str__(self):
-        return "Command crashed: {command!r}".format(
-            command=self.command,
-            )
-
-
-class ConnectionLostError(Exception):
-    """
-    Exception thrown when the connection is lost
-    """
-    def __init__(self, command):
-        self.command = command
-
-    def __str__(self):
-        return "SSH connection was lost: {command!r}".format(
-            command=self.command,
-            )
-
-def spawn_asyncresult(fn, *args, **kwargs):
-    """
-    Spawn a Greenlet and pass it's results to an AsyncResult.
-
-    This function is useful to shuffle data from a Greenlet to
-    AsyncResult, which then again is useful because any Greenlets that
-    raise exceptions will cause tracebacks to be shown on stderr by
-    gevent, even when ``.link_exception`` has been called. Using an
-    AsyncResult avoids this.
-    """
-    r = gevent.event.AsyncResult()
-    def wrapper():
-        """
-        Internal wrapper.
-        """
-        try:
-            value = fn(*args, **kwargs)
-        except Exception as e:
-            r.set_exception(e)
-        else:
-            r.set(value)
-    gevent.spawn(wrapper)
-
-    return r
-
-class Sentinel(object):
-    """
-    Sentinel -- used to define PIPE file-like object.
-    """
-    def __init__(self, name):
-        self.name = name
-
-    def __str__(self):
-        return self.name
-
-PIPE = Sentinel('PIPE')
-
-class KludgeFile(object):
-    """
-    Wrap Paramiko's ChannelFile in a way that lets ``f.close()``
-    actually cause an EOF for the remote command.
-    """
-    def __init__(self, wrapped):
-        self._wrapped = wrapped
-
-    def __getattr__(self, name):
-        return getattr(self._wrapped, name)
-
-    def close(self):
-        """
-        Close and shutdown.
-        """
-        self._wrapped.close()
-        self._wrapped.channel.shutdown_write()
-
-def run(
-    client, args,
-    stdin=None, stdout=None, stderr=None,
-    logger=None,
-    check_status=True,
-    wait=True,
-    ):
-    """
-    Run a command remotely.
-
-    :param client: SSHConnection to run the command with
-    :param args: command to run
-    :type args: list of string
-    :param stdin: Standard input to send; either a string, a file-like object, None, or `PIPE`. `PIPE` means caller is responsible for closing stdin, or command may never exit.
-    :param stdout: What to do with standard output. Either a file-like object, a `logging.Logger`, `PIPE`, or `None` for copying to default log. `PIPE` means caller is responsible for reading, or command may never exit.
-    :param stderr: What to do with standard error. See `stdout`.
-    :param logger: If logging, write stdout/stderr to "out" and "err" children of this logger. Defaults to logger named after this module.
-    :param check_status: Whether to raise CommandFailedError on non-zero exit status, and . Defaults to True. All signals and connection loss are made to look like SIGHUP.
-    :param wait: Whether to wait for process to exit. If False, returned ``r.exitstatus`` s a `gevent.event.AsyncResult`, and the actual status is available via ``.get()``.
-    """
-    r = execute(client, args)
-
-    r.stdin = KludgeFile(wrapped=r.stdin)
-
-    g_in = None
-    if stdin is not PIPE:
-        g_in = gevent.spawn(copy_and_close, stdin, r.stdin)
-        r.stdin = None
-    else:
-        assert not wait, "Using PIPE for stdin without wait=False would deadlock."
-
-    if logger is None:
-        logger = log
-    (host, port) = client.get_transport().getpeername()
-    g_err = None
-    if stderr is not PIPE:
-        if stderr is None:
-            stderr = logger.getChild('err')
-        g_err = gevent.spawn(copy_file_to, r.stderr, stderr, host)
-        r.stderr = stderr
-    else:
-        assert not wait, "Using PIPE for stderr without wait=False would deadlock."
-
-    g_out = None
-    if stdout is not PIPE:
-        if stdout is None:
-            stdout = logger.getChild('out')
-        g_out = gevent.spawn(copy_file_to, r.stdout, stdout, host)
-        r.stdout = stdout
-    else:
-        assert not wait, "Using PIPE for stdout without wait=False would deadlock."
-
-    def _check_status(status):
-        """
-        get values needed if uninitialized.  Handle ssh issues when checking
-        the status.
-        """
-        if g_err is not None:
-            g_err.get()
-        if g_out is not None:
-            g_out.get()
-        if g_in is not None:
-            g_in.get()
-
-        status = status()
-        if check_status:
-            if status is None:
-                # command either died due to a signal, or the connection
-                # was lost
-                transport = client.get_transport()
-                if not transport.is_active():
-                    # look like we lost the connection
-                    raise ConnectionLostError(command=r.command)
-
-                # connection seems healthy still, assuming it was a
-                # signal; sadly SSH does not tell us which signal
-                raise CommandCrashedError(command=r.command)
-            if status != 0:
-                (host, port) = client.get_transport().getpeername()
-                raise CommandFailedError(command=r.command, exitstatus=status, node=host)
-        return status
-
-    if wait:
-        r.exitstatus = _check_status(r.exitstatus)
-    else:
-        r.exitstatus = spawn_asyncresult(_check_status, r.exitstatus)
-
-    return r
-
-
-def wait(processes, timeout=None):
-    """
-    Wait for all given processes to exit.
-
-    Raise if any one of them fails.
-
-    Optionally, timeout after 'timeout' seconds.
-    """
-    if timeout and timeout > 0:
-        with safe_while(tries=(timeout / 6)) as check_time:
-            not_ready = list(processes)
-            while len(not_ready) > 0:
-                check_time()
-                for proc in list(not_ready):
-                    if proc.exitstatus.ready():
-                        not_ready.remove(proc)
-
-    for proc in processes:
-        assert isinstance(proc.exitstatus, gevent.event.AsyncResult)
-        proc.exitstatus.get()
diff --git a/teuthology/orchestra/test/__init__.py b/teuthology/orchestra/test/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/teuthology/orchestra/test/test_cluster.py b/teuthology/orchestra/test/test_cluster.py
deleted file mode 100644 (file)
index 7596583..0000000
+++ /dev/null
@@ -1,206 +0,0 @@
-import fudge
-
-from .. import cluster, remote
-
-
-class TestCluster(object):
-    @fudge.with_fakes
-    def test_init_empty(self):
-        fudge.clear_expectations()
-        c = cluster.Cluster()
-        assert c.remotes == {}
-
-    @fudge.with_fakes
-    def test_init(self):
-        fudge.clear_expectations()
-        r1 = fudge.Fake('Remote')
-        r2 = fudge.Fake('Remote')
-        c = cluster.Cluster(
-            remotes=[
-                (r1, ['foo', 'bar']),
-                (r2, ['baz']),
-                ],
-            )
-        r3 = fudge.Fake('Remote')
-        c.add(r3, ['xyzzy', 'thud', 'foo'])
-        assert c.remotes == {
-            r1: ['foo', 'bar'],
-            r2: ['baz'],
-            r3: ['xyzzy', 'thud', 'foo'],
-        }
-
-    @fudge.with_fakes
-    def test_repr(self):
-        fudge.clear_expectations()
-        r1 = remote.Remote('r1', ssh=fudge.Fake('SSH'))
-        r2 = remote.Remote('r2', ssh=fudge.Fake('SSH'))
-        c = cluster.Cluster(
-            remotes=[
-                (r1, ['foo', 'bar']),
-                (r2, ['baz']),
-                ],
-            )
-        assert repr(c) == "Cluster(remotes=[[Remote(name='r1'), ['foo', 'bar']], [Remote(name='r2'), ['baz']]])" # noqa
-
-    @fudge.with_fakes
-    def test_str(self):
-        fudge.clear_expectations()
-        r1 = remote.Remote('r1', ssh=fudge.Fake('SSH'))
-        r2 = remote.Remote('r2', ssh=fudge.Fake('SSH'))
-        c = cluster.Cluster(
-            remotes=[
-                (r1, ['foo', 'bar']),
-                (r2, ['baz']),
-                ],
-            )
-        assert str(c) == "r1[foo,bar] r2[baz]"
-
-    @fudge.with_fakes
-    def test_run_all(self):
-        fudge.clear_expectations()
-        r1 = fudge.Fake('Remote').has_attr(name='r1')
-        ret1 = fudge.Fake('RemoteProcess')
-        r1.expects('run').with_args(args=['test']).returns(ret1)
-        r2 = fudge.Fake('Remote').has_attr(name='r2')
-        ret2 = fudge.Fake('RemoteProcess')
-        r2.expects('run').with_args(args=['test']).returns(ret2)
-        c = cluster.Cluster(
-            remotes=[
-                (r1, ['foo', 'bar']),
-                (r2, ['baz']),
-                ],
-            )
-        got = c.run(args=['test'])
-        assert len(got) == 2
-        assert got, [ret1 == ret2]
-        # check identity not equality
-        assert got[0] is ret1
-        assert got[1] is ret2
-
-    @fudge.with_fakes
-    def test_only_one(self):
-        fudge.clear_expectations()
-        r1 = fudge.Fake('r1')
-        r2 = fudge.Fake('r2')
-        r3 = fudge.Fake('r3')
-        c = cluster.Cluster(
-            remotes=[
-                (r1, ['foo', 'bar']),
-                (r2, ['bar']),
-                (r3, ['foo']),
-                ],
-            )
-        c_foo = c.only('foo')
-        assert c_foo.remotes == {r1: ['foo', 'bar'], r3: ['foo']}
-
-    @fudge.with_fakes
-    def test_only_two(self):
-        fudge.clear_expectations()
-        r1 = fudge.Fake('r1')
-        r2 = fudge.Fake('r2')
-        r3 = fudge.Fake('r3')
-        c = cluster.Cluster(
-            remotes=[
-                (r1, ['foo', 'bar']),
-                (r2, ['bar']),
-                (r3, ['foo']),
-                ],
-            )
-        c_both = c.only('foo', 'bar')
-        assert c_both.remotes, {r1: ['foo' == 'bar']}
-
-    @fudge.with_fakes
-    def test_only_none(self):
-        fudge.clear_expectations()
-        r1 = fudge.Fake('r1')
-        r2 = fudge.Fake('r2')
-        r3 = fudge.Fake('r3')
-        c = cluster.Cluster(
-            remotes=[
-                (r1, ['foo', 'bar']),
-                (r2, ['bar']),
-                (r3, ['foo']),
-                ],
-            )
-        c_none = c.only('impossible')
-        assert c_none.remotes == {}
-
-    @fudge.with_fakes
-    def test_only_match(self):
-        fudge.clear_expectations()
-        r1 = fudge.Fake('r1')
-        r2 = fudge.Fake('r2')
-        r3 = fudge.Fake('r3')
-        c = cluster.Cluster(
-            remotes=[
-                (r1, ['foo', 'bar']),
-                (r2, ['bar']),
-                (r3, ['foo']),
-                ],
-            )
-        c_foo = c.only('foo', lambda role: role.startswith('b'))
-        assert c_foo.remotes, {r1: ['foo' == 'bar']}
-
-    @fudge.with_fakes
-    def test_exclude_one(self):
-        fudge.clear_expectations()
-        r1 = fudge.Fake('r1')
-        r2 = fudge.Fake('r2')
-        r3 = fudge.Fake('r3')
-        c = cluster.Cluster(
-            remotes=[
-                (r1, ['foo', 'bar']),
-                (r2, ['bar']),
-                (r3, ['foo']),
-                ],
-            )
-        c_foo = c.exclude('foo')
-        assert c_foo.remotes == {r2: ['bar']}
-
-    @fudge.with_fakes
-    def test_exclude_two(self):
-        fudge.clear_expectations()
-        r1 = fudge.Fake('r1')
-        r2 = fudge.Fake('r2')
-        r3 = fudge.Fake('r3')
-        c = cluster.Cluster(
-            remotes=[
-                (r1, ['foo', 'bar']),
-                (r2, ['bar']),
-                (r3, ['foo']),
-                ],
-            )
-        c_both = c.exclude('foo', 'bar')
-        assert c_both.remotes == {r2: ['bar'], r3: ['foo']}
-
-    @fudge.with_fakes
-    def test_exclude_none(self):
-        fudge.clear_expectations()
-        r1 = fudge.Fake('r1')
-        r2 = fudge.Fake('r2')
-        r3 = fudge.Fake('r3')
-        c = cluster.Cluster(
-            remotes=[
-                (r1, ['foo', 'bar']),
-                (r2, ['bar']),
-                (r3, ['foo']),
-                ],
-            )
-        c_none = c.exclude('impossible')
-        assert c_none.remotes == {r1: ['foo', 'bar'], r2: ['bar'], r3: ['foo']}
-
-    @fudge.with_fakes
-    def test_exclude_match(self):
-        fudge.clear_expectations()
-        r1 = fudge.Fake('r1')
-        r2 = fudge.Fake('r2')
-        r3 = fudge.Fake('r3')
-        c = cluster.Cluster(
-            remotes=[
-                (r1, ['foo', 'bar']),
-                (r2, ['bar']),
-                (r3, ['foo']),
-                ],
-            )
-        c_foo = c.exclude('foo', lambda role: role.startswith('b'))
-        assert c_foo.remotes == {r2: ['bar'], r3: ['foo']}
diff --git a/teuthology/orchestra/test/test_connection.py b/teuthology/orchestra/test/test_connection.py
deleted file mode 100644 (file)
index 951a674..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-from teuthology import config
-
-import fudge
-
-from .util import assert_raises
-
-from .. import connection
-
-
-class TestConnection(object):
-    def clear_config(self):
-        config.config.teuthology_yaml = ''
-        config.config.load_files()
-
-    def test_split_user_just_host(self):
-        got = connection.split_user('somehost.example.com')
-        assert got == (None, 'somehost.example.com')
-
-    def test_split_user_both(self):
-        got = connection.split_user('jdoe@somehost.example.com')
-        assert got == ('jdoe', 'somehost.example.com')
-
-    def test_split_user_empty_user(self):
-        s = '@somehost.example.com'
-        e = assert_raises(AssertionError, connection.split_user, s)
-        assert str(e) == 'Bad input to split_user: {s!r}'.format(s=s)
-
-    @fudge.with_fakes
-    def test_connect(self):
-        self.clear_config()
-        config.config.verify_host_keys = True
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHClient')
-        ssh.expects_call().with_args().returns(ssh)
-        ssh.expects('set_missing_host_key_policy')
-        ssh.expects('load_system_host_keys').with_args()
-        ssh.expects('connect').with_args(
-            hostname='orchestra.test.newdream.net.invalid',
-            username='jdoe',
-            timeout=60,
-        )
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.remember_order()
-        transport.expects('set_keepalive').with_args(False)
-        got = connection.connect(
-            'jdoe@orchestra.test.newdream.net.invalid',
-            _SSHClient=ssh,
-        )
-        assert got is ssh
-
-    @fudge.with_fakes
-    def test_connect_no_verify_host_keys(self):
-        self.clear_config()
-        config.config.verify_host_keys = False
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHClient')
-        ssh.expects_call().with_args().returns(ssh)
-        ssh.expects('set_missing_host_key_policy')
-        ssh.expects('connect').with_args(
-            hostname='orchestra.test.newdream.net.invalid',
-            username='jdoe',
-            timeout=60,
-        )
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.remember_order()
-        transport.expects('set_keepalive').with_args(False)
-        got = connection.connect(
-            'jdoe@orchestra.test.newdream.net.invalid',
-            _SSHClient=ssh,
-        )
-        assert got is ssh
-
-    @fudge.with_fakes
-    def test_connect_override_hostkeys(self):
-        self.clear_config()
-        fudge.clear_expectations()
-        sshclient = fudge.Fake('SSHClient')
-        ssh = sshclient.expects_call().with_args().returns_fake()
-        ssh.remember_order()
-        host_keys = fudge.Fake('HostKeys')
-        host_keys.expects('add').with_args(
-            hostname='orchestra.test.newdream.net.invalid',
-            keytype='ssh-rsa',
-            key='frobnitz',
-            )
-        ssh.expects('get_host_keys').with_args().returns(host_keys)
-        ssh.expects('connect').with_args(
-            hostname='orchestra.test.newdream.net.invalid',
-            username='jdoe',
-            timeout=60,
-            )
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.remember_order()
-        transport.expects('set_keepalive').with_args(False)
-        create_key = fudge.Fake('create_key')
-        create_key.expects_call().with_args('ssh-rsa',
-                                            'testkey').returns('frobnitz')
-        got = connection.connect(
-            'jdoe@orchestra.test.newdream.net.invalid',
-            host_key='ssh-rsa testkey',
-            _SSHClient=sshclient,
-            _create_key=create_key,
-            )
-        assert got is ssh
diff --git a/teuthology/orchestra/test/test_integration.py b/teuthology/orchestra/test/test_integration.py
deleted file mode 100644 (file)
index 03aa962..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-from .. import monkey
-monkey.patch_all()
-
-from cStringIO import StringIO
-
-import os
-from .. import connection, run
-from .util import assert_raises
-
-from pytest import skip
-
-HOST = None
-
-
-class TestIntegration():
-    def setup(self):
-        try:
-            host = os.environ['ORCHESTRA_TEST_HOST']
-        except KeyError:
-            skip('To run integration tests, set environment ' +
-                 'variable ORCHESTRA_TEST_HOST to user@host to use.')
-        global HOST
-        HOST = host
-
-    def test_crash(self):
-        ssh = connection.connect(HOST)
-        e = assert_raises(
-            run.CommandCrashedError,
-            run.run,
-            client=ssh,
-            args=['sh', '-c', 'kill -ABRT $$'],
-            )
-        assert e.command == "sh -c 'kill -ABRT $$'"
-        assert str(e) == "Command crashed: \"sh -c 'kill -ABRT $$'\""
-
-    def test_lost(self):
-        ssh = connection.connect(HOST)
-        e = assert_raises(
-            run.ConnectionLostError,
-            run.run,
-            client=ssh,
-            args=['sh', '-c', 'kill -ABRT $PPID'],
-            )
-        assert e.command == "sh -c 'kill -ABRT $PPID'"
-        assert str(e) == \
-            "SSH connection was lost: \"sh -c 'kill -ABRT $PPID'\""
-
-    def test_pipe(self):
-        ssh = connection.connect(HOST)
-        r = run.run(
-            client=ssh,
-            args=['cat'],
-            stdin=run.PIPE,
-            stdout=StringIO(),
-            wait=False,
-            )
-        assert r.stdout.getvalue() == ''
-        r.stdin.write('foo\n')
-        r.stdin.write('bar\n')
-        r.stdin.close()
-
-        got = r.exitstatus.get()
-        assert got == 0
-        assert r.stdout.getvalue() == 'foo\nbar\n'
-
-    def test_and(self):
-        ssh = connection.connect(HOST)
-        r = run.run(
-            client=ssh,
-            args=['true', run.Raw('&&'), 'echo', 'yup'],
-            stdout=StringIO(),
-            )
-        assert r.stdout.getvalue() == 'yup\n'
diff --git a/teuthology/orchestra/test/test_remote.py b/teuthology/orchestra/test/test_remote.py
deleted file mode 100644 (file)
index dbff6ff..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-import fudge
-import fudge.inspector
-
-from .. import remote
-from ..run import RemoteProcess
-
-
-class TestRemote(object):
-    def test_shortname(self):
-        r = remote.Remote(
-            name='jdoe@xyzzy.example.com',
-            shortname='xyz',
-            ssh=fudge.Fake('SSHConnection'),
-            )
-        assert r.shortname == 'xyz'
-        assert str(r) == 'xyz'
-
-    def test_shortname_default(self):
-        r = remote.Remote(
-            name='jdoe@xyzzy.example.com',
-            ssh=fudge.Fake('SSHConnection'),
-            )
-        assert r.shortname == 'jdoe@xyzzy.example.com'
-        assert str(r) == 'jdoe@xyzzy.example.com'
-
-    @fudge.with_fakes
-    def test_run(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        run = fudge.Fake('run')
-        args = [
-            'something',
-            'more',
-            ]
-        foo = object()
-        ret = RemoteProcess(
-            command='fakey',
-            stdin=None,
-            stdout=None,
-            stderr=None,
-            exitstatus=None,
-            exited=None,
-            )
-        run.expects_call().with_args(
-            client=fudge.inspector.arg.passes_test(lambda v: v is ssh),
-            args=fudge.inspector.arg.passes_test(lambda v: v is args),
-            foo=fudge.inspector.arg.passes_test(lambda v: v is foo),
-            ).returns(ret)
-        r = remote.Remote(name='jdoe@xyzzy.example.com', ssh=ssh)
-        # monkey patch ook ook
-        r._runner = run
-        got = r.run(
-            args=args,
-            foo=foo,
-            )
-        assert got is ret
-        assert got.remote is r
diff --git a/teuthology/orchestra/test/test_run.py b/teuthology/orchestra/test/test_run.py
deleted file mode 100644 (file)
index 96cee44..0000000
+++ /dev/null
@@ -1,402 +0,0 @@
-from cStringIO import StringIO
-
-import fudge
-import gevent.event
-import logging
-
-from .. import run
-
-from .util import assert_raises
-
-
-class TestRun(object):
-    @fudge.with_fakes
-    def test_run_log_simple(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.expects('getpeername').with_args().returns(('HOST', 22))
-        cmd = ssh.expects('exec_command')
-        cmd.with_args("foo 'bar baz'")
-        in_ = fudge.Fake('ChannelFile(stdin)')
-        out = fudge.Fake('ChannelFile(stdout)')
-        err = fudge.Fake('ChannelFile(stderr)')
-        cmd.returns((in_, out, err))
-        in_.expects('close').with_args()
-        in_chan = fudge.Fake('channel')
-        in_chan.expects('shutdown_write').with_args()
-        in_.has_attr(channel=in_chan)
-        out.expects('xreadlines').with_args().returns(['foo', 'bar'])
-        err.expects('xreadlines').with_args().returns(['bad'])
-        logger = fudge.Fake('logger')
-        log_err = fudge.Fake('log_err')
-        logger.expects('getChild').with_args('err').returns(log_err)
-        log_err.expects('log').with_args(logging.INFO, '[HOST]: bad')
-        log_out = fudge.Fake('log_out')
-        logger.expects('getChild').with_args('out').returns(log_out)
-        log_out.expects('log').with_args(logging.INFO, '[HOST]: foo')
-        log_out.expects('log').with_args(logging.INFO, '[HOST]: bar')
-        channel = fudge.Fake('channel')
-        out.has_attr(channel=channel)
-        channel.expects('recv_exit_status').with_args().returns(0)
-        r = run.run(
-            client=ssh,
-            logger=logger,
-            args=['foo', 'bar baz'],
-            )
-        assert r.exitstatus == 0
-
-    @fudge.with_fakes
-    def test_run_capture_stdout(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.expects('getpeername').with_args().returns(('HOST', 22))
-        cmd = ssh.expects('exec_command')
-        cmd.with_args("foo 'bar baz'")
-        in_ = fudge.Fake('ChannelFile(stdin)')
-        out = fudge.Fake('ChannelFile(stdout)')
-        err = fudge.Fake('ChannelFile(stderr)')
-        cmd.returns((in_, out, err))
-        in_.expects('close').with_args()
-        in_chan = fudge.Fake('channel')
-        in_chan.expects('shutdown_write').with_args()
-        in_.has_attr(channel=in_chan)
-        out.remember_order()
-        out.expects('read').with_args().returns('foo\nb')
-        out.expects('read').with_args().returns('ar\n')
-        out.expects('read').with_args().returns('')
-        err.expects('xreadlines').with_args().returns(['bad'])
-        logger = fudge.Fake('logger')
-        log_err = fudge.Fake('log_err')
-        logger.expects('getChild').with_args('err').returns(log_err)
-        log_err.expects('log').with_args(logging.INFO, '[HOST]: bad')
-        channel = fudge.Fake('channel')
-        out.has_attr(channel=channel)
-        channel.expects('recv_exit_status').with_args().returns(0)
-        out_f = StringIO()
-        r = run.run(
-            client=ssh,
-            logger=logger,
-            args=['foo', 'bar baz'],
-            stdout=out_f,
-            )
-        assert r.exitstatus == 0
-        assert r.stdout is out_f
-        assert r.stdout.getvalue() == 'foo\nbar\n'
-
-    @fudge.with_fakes
-    def test_run_status_bad(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.expects('getpeername').with_args().returns(('HOST', 22))
-        cmd = ssh.expects('exec_command')
-        cmd.with_args("foo")
-        in_ = fudge.Fake('ChannelFile').is_a_stub()
-        out = fudge.Fake('ChannelFile').is_a_stub()
-        err = fudge.Fake('ChannelFile').is_a_stub()
-        cmd.returns((in_, out, err))
-        out.expects('xreadlines').with_args().returns([])
-        err.expects('xreadlines').with_args().returns([])
-        logger = fudge.Fake('logger').is_a_stub()
-        channel = fudge.Fake('channel')
-        out.has_attr(channel=channel)
-        channel.expects('recv_exit_status').with_args().returns(42)
-        e = assert_raises(
-            run.CommandFailedError,
-            run.run,
-            client=ssh,
-            logger=logger,
-            args=['foo'],
-            )
-        assert e.command == 'foo'
-        assert e.exitstatus == 42
-        assert str(e) == "Command failed on HOST with status 42: 'foo'"
-
-    @fudge.with_fakes
-    def test_run_status_bad_nocheck(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.expects('getpeername').with_args().returns(('HOST', 22))
-        cmd = ssh.expects('exec_command')
-        cmd.with_args("foo")
-        in_ = fudge.Fake('ChannelFile').is_a_stub()
-        out = fudge.Fake('ChannelFile').is_a_stub()
-        err = fudge.Fake('ChannelFile').is_a_stub()
-        cmd.returns((in_, out, err))
-        out.expects('xreadlines').with_args().returns([])
-        err.expects('xreadlines').with_args().returns([])
-        logger = fudge.Fake('logger').is_a_stub()
-        channel = fudge.Fake('channel')
-        out.has_attr(channel=channel)
-        channel.expects('recv_exit_status').with_args().returns(42)
-        r = run.run(
-            client=ssh,
-            logger=logger,
-            args=['foo'],
-            check_status=False,
-            )
-        assert r.exitstatus == 42
-
-    @fudge.with_fakes
-    def test_run_status_crash(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.expects('getpeername').with_args().returns(('HOST', 22))
-        transport.expects('is_active').with_args().returns(True)
-        cmd = ssh.expects('exec_command')
-        cmd.with_args("foo")
-        in_ = fudge.Fake('ChannelFile').is_a_stub()
-        out = fudge.Fake('ChannelFile').is_a_stub()
-        err = fudge.Fake('ChannelFile').is_a_stub()
-        cmd.returns((in_, out, err))
-        out.expects('xreadlines').with_args().returns([])
-        err.expects('xreadlines').with_args().returns([])
-        logger = fudge.Fake('logger').is_a_stub()
-        channel = fudge.Fake('channel')
-        out.has_attr(channel=channel)
-        channel.expects('recv_exit_status').with_args().returns(-1)
-        e = assert_raises(
-            run.CommandCrashedError,
-            run.run,
-            client=ssh,
-            logger=logger,
-            args=['foo'],
-            )
-        assert e.command == 'foo'
-        assert str(e) == "Command crashed: 'foo'"
-
-    @fudge.with_fakes
-    def test_run_status_crash_nocheck(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.expects('getpeername').with_args().returns(('HOST', 22))
-        cmd = ssh.expects('exec_command')
-        cmd.with_args("foo")
-        in_ = fudge.Fake('ChannelFile').is_a_stub()
-        out = fudge.Fake('ChannelFile').is_a_stub()
-        err = fudge.Fake('ChannelFile').is_a_stub()
-        cmd.returns((in_, out, err))
-        out.expects('xreadlines').with_args().returns([])
-        err.expects('xreadlines').with_args().returns([])
-        logger = fudge.Fake('logger').is_a_stub()
-        channel = fudge.Fake('channel')
-        out.has_attr(channel=channel)
-        channel.expects('recv_exit_status').with_args().returns(-1)
-        r = run.run(
-            client=ssh,
-            logger=logger,
-            args=['foo'],
-            check_status=False,
-            )
-        assert r.exitstatus is None
-
-    @fudge.with_fakes
-    def test_run_status_lost(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        cmd = ssh.expects('exec_command')
-        cmd.with_args("foo")
-        in_ = fudge.Fake('ChannelFile').is_a_stub()
-        out = fudge.Fake('ChannelFile').is_a_stub()
-        err = fudge.Fake('ChannelFile').is_a_stub()
-        cmd.returns((in_, out, err))
-        out.expects('xreadlines').with_args().returns([])
-        err.expects('xreadlines').with_args().returns([])
-        logger = fudge.Fake('logger').is_a_stub()
-        channel = fudge.Fake('channel')
-        out.has_attr(channel=channel)
-        channel.expects('recv_exit_status').with_args().returns(-1)
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.expects('getpeername').with_args().returns(('HOST', 22))
-        transport.expects('is_active').with_args().returns(False)
-        e = assert_raises(
-            run.ConnectionLostError,
-            run.run,
-            client=ssh,
-            logger=logger,
-            args=['foo'],
-            )
-
-        assert e.command == 'foo'
-        assert str(e) == "SSH connection was lost: 'foo'"
-
-    @fudge.with_fakes
-    def test_run_status_lost_nocheck(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.expects('getpeername').with_args().returns(('HOST', 22))
-        cmd = ssh.expects('exec_command')
-        cmd.with_args("foo")
-        in_ = fudge.Fake('ChannelFile').is_a_stub()
-        out = fudge.Fake('ChannelFile').is_a_stub()
-        err = fudge.Fake('ChannelFile').is_a_stub()
-        cmd.returns((in_, out, err))
-        out.expects('xreadlines').with_args().returns([])
-        err.expects('xreadlines').with_args().returns([])
-        logger = fudge.Fake('logger').is_a_stub()
-        channel = fudge.Fake('channel')
-        out.has_attr(channel=channel)
-        channel.expects('recv_exit_status').with_args().returns(-1)
-        r = run.run(
-            client=ssh,
-            logger=logger,
-            args=['foo'],
-            check_status=False,
-            )
-        assert r.exitstatus is None
-
-    @fudge.with_fakes
-    def test_run_nowait(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.expects('getpeername').with_args().returns(('HOST', 22))
-        cmd = ssh.expects('exec_command')
-        cmd.with_args("foo")
-        in_ = fudge.Fake('ChannelFile').is_a_stub()
-        out = fudge.Fake('ChannelFile').is_a_stub()
-        err = fudge.Fake('ChannelFile').is_a_stub()
-        cmd.returns((in_, out, err))
-        out.expects('xreadlines').with_args().returns([])
-        err.expects('xreadlines').with_args().returns([])
-        logger = fudge.Fake('logger').is_a_stub()
-        channel = fudge.Fake('channel')
-        out.has_attr(channel=channel)
-        channel.expects('recv_exit_status').with_args().returns(42)
-        r = run.run(
-            client=ssh,
-            logger=logger,
-            args=['foo'],
-            wait=False,
-            )
-        assert r.command == 'foo'
-        assert isinstance(r.exitstatus, gevent.event.AsyncResult)
-        e = assert_raises(
-            run.CommandFailedError,
-            r.exitstatus.get,
-            )
-        assert e.exitstatus == 42
-        assert str(e) == "Command failed on HOST with status 42: 'foo'"
-
-    @fudge.with_fakes
-    def test_run_stdin_pipe(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.expects('getpeername').with_args().returns(('HOST', 22))
-        cmd = ssh.expects('exec_command')
-        cmd.with_args("foo")
-        in_ = fudge.Fake('ChannelFile').is_a_stub()
-        out = fudge.Fake('ChannelFile').is_a_stub()
-        err = fudge.Fake('ChannelFile').is_a_stub()
-        cmd.returns((in_, out, err))
-        out.expects('xreadlines').with_args().returns([])
-        err.expects('xreadlines').with_args().returns([])
-        logger = fudge.Fake('logger').is_a_stub()
-        channel = fudge.Fake('channel')
-        out.has_attr(channel=channel)
-        channel.expects('recv_exit_status').with_args().returns(0)
-        r = run.run(
-            client=ssh,
-            logger=logger,
-            args=['foo'],
-            stdin=run.PIPE,
-            wait=False,
-            )
-        r.stdin.write('bar')
-        assert r.command == 'foo'
-        assert isinstance(r.exitstatus, gevent.event.AsyncResult)
-        assert r.exitstatus.ready() == False
-        got = r.exitstatus.get()
-        assert got == 0
-
-    @fudge.with_fakes
-    def test_run_stdout_pipe(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.expects('getpeername').with_args().returns(('HOST', 22))
-        cmd = ssh.expects('exec_command')
-        cmd.with_args("foo")
-        in_ = fudge.Fake('ChannelFile').is_a_stub()
-        out = fudge.Fake('ChannelFile').is_a_stub()
-        err = fudge.Fake('ChannelFile').is_a_stub()
-        cmd.returns((in_, out, err))
-        out.expects('read').with_args().returns('one')
-        out.expects('read').with_args().returns('two')
-        out.expects('read').with_args().returns('')
-        err.expects('xreadlines').with_args().returns([])
-        logger = fudge.Fake('logger').is_a_stub()
-        channel = fudge.Fake('channel')
-        out.has_attr(channel=channel)
-        channel.expects('recv_exit_status').with_args().returns(0)
-        r = run.run(
-            client=ssh,
-            logger=logger,
-            args=['foo'],
-            stdout=run.PIPE,
-            wait=False,
-            )
-        assert r.command == 'foo'
-        assert isinstance(r.exitstatus, gevent.event.AsyncResult)
-        assert r.exitstatus.ready() == False
-        assert r.stdout.read() == 'one'
-        assert r.stdout.read() == 'two'
-        assert r.stdout.read() == ''
-        got = r.exitstatus.get()
-        assert got == 0
-
-    @fudge.with_fakes
-    def test_run_stderr_pipe(self):
-        fudge.clear_expectations()
-        ssh = fudge.Fake('SSHConnection')
-        transport = ssh.expects('get_transport').with_args().returns_fake()
-        transport.expects('getpeername').with_args().returns(('HOST', 22))
-        cmd = ssh.expects('exec_command')
-        cmd.with_args("foo")
-        in_ = fudge.Fake('ChannelFile').is_a_stub()
-        out = fudge.Fake('ChannelFile').is_a_stub()
-        err = fudge.Fake('ChannelFile').is_a_stub()
-        cmd.returns((in_, out, err))
-        out.expects('xreadlines').with_args().returns([])
-        err.expects('read').with_args().returns('one')
-        err.expects('read').with_args().returns('two')
-        err.expects('read').with_args().returns('')
-        logger = fudge.Fake('logger').is_a_stub()
-        channel = fudge.Fake('channel')
-        out.has_attr(channel=channel)
-        channel.expects('recv_exit_status').with_args().returns(0)
-        r = run.run(
-            client=ssh,
-            logger=logger,
-            args=['foo'],
-            stderr=run.PIPE,
-            wait=False,
-            )
-        assert r.command == 'foo'
-        assert isinstance(r.exitstatus, gevent.event.AsyncResult)
-        assert r.exitstatus.ready() is False
-        assert r.stderr.read() == 'one'
-        assert r.stderr.read() == 'two'
-        assert r.stderr.read() == ''
-        got = r.exitstatus.get()
-        assert got == 0
-
-    def test_quote_simple(self):
-        got = run.quote(['a b', ' c', 'd e '])
-        assert got == "'a b' ' c' 'd e '"
-
-    def test_quote_and_quote(self):
-        got = run.quote(['echo', 'this && is embedded', '&&',
-                         'that was standalone'])
-        assert got == "echo 'this && is embedded' '&&' 'that was standalone'"
-
-    def test_quote_and_raw(self):
-        got = run.quote(['true', run.Raw('&&'), 'echo', 'yay'])
-        assert got == "true && echo yay"
diff --git a/teuthology/orchestra/test/util.py b/teuthology/orchestra/test/util.py
deleted file mode 100644 (file)
index f693d40..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-def assert_raises(excClass, callableObj, *args, **kwargs):
-    """
-    Like unittest.TestCase.assertRaises, but returns the exception.
-    """
-    try:
-        callableObj(*args, **kwargs)
-    except excClass, e:
-        return e
-    else:
-        if hasattr(excClass,'__name__'): excName = excClass.__name__
-        else: excName = str(excClass)
-        raise AssertionError("%s not raised" % excName)
diff --git a/teuthology/packaging.py b/teuthology/packaging.py
deleted file mode 100644 (file)
index 94fef24..0000000
+++ /dev/null
@@ -1,235 +0,0 @@
-from cStringIO import StringIO
-from .orchestra import run
-import logging
-import teuthology.misc as teuthology
-import textwrap
-
-log = logging.getLogger(__name__)
-
-'''
-Infer things about platform type with this map.
-The key comes from processing lsb_release -ics or -irs (see _get_relmap).
-'''
-_RELEASE_MAP = {
-    'Ubuntu precise': dict(flavor='deb', release='ubuntu', version='precise'),
-    'Debian wheezy': dict(flavor='deb', release='debian', version='wheezy'),
-    'CentOS 6.4': dict(flavor='rpm', release='centos', version='6.4'),
-    'RedHatEnterpriseServer 6.4': dict(flavor='rpm', release='rhel',
-                                       version='6.4'),
-}
-
-'''
-Map 'generic' package name to 'flavor-specific' package name.
-If entry is None, either the package isn't known here, or
-it's known but should not be installed on remotes of this flavor
-'''
-
-_PACKAGE_MAP = {
-    'sqlite': {'deb': 'sqlite3', 'rpm': None}
-}
-
-'''
-Map 'generic' service name to 'flavor-specific' service name.
-'''
-_SERVICE_MAP = {
-    'httpd': {'deb': 'apache2', 'rpm': 'httpd'}
-}
-
-
-def _get_relmap(rem):
-    """
-    Internal worker to get the appropriate dict from RELEASE_MAP
-    """
-    relmap = getattr(rem, 'relmap', None)
-    if relmap is not None:
-        return relmap
-    lsb_release_out = StringIO()
-    rem.run(args=['lsb_release', '-ics'], stdout=lsb_release_out)
-    release = lsb_release_out.getvalue().replace('\n', ' ').rstrip()
-    if release in _RELEASE_MAP:
-        rem.relmap = _RELEASE_MAP[release]
-        return rem.relmap
-    else:
-        lsb_release_out = StringIO()
-        rem.run(args=['lsb_release', '-irs'], stdout=lsb_release_out)
-        release = lsb_release_out.getvalue().replace('\n', ' ').rstrip()
-        if release in _RELEASE_MAP:
-            rem.relmap = _RELEASE_MAP[release]
-            return rem.relmap
-    raise RuntimeError('Can\'t get release info for {}'.format(rem))
-
-
-def get_package_name(pkg, rem):
-    """
-    Find the remote-specific name of the generic 'pkg'
-    """
-    flavor = _get_relmap(rem)['flavor']
-
-    try:
-        return _PACKAGE_MAP[pkg][flavor]
-    except KeyError:
-        return None
-
-
-def get_service_name(service, rem):
-    """
-    Find the remote-specific name of the generic 'service'
-    """
-    flavor = _get_relmap(rem)['flavor']
-    try:
-        return _SERVICE_MAP[service][flavor]
-    except KeyError:
-        return None
-
-
-def install_repo(remote, reposerver, pkgdir, username=None, password=None):
-    """
-    Install a package repo for reposerver on remote.
-    URL will be http if username and password are none, otherwise https.
-    pkgdir is the piece path between "reposerver" and "deb" or "rpm"
-     (say, 'packages', or 'packages-staging/my-branch', for example).
-    so:
-        http[s]://[<username>:<password>@]<reposerver>/<pkgdir>/{deb|rpm}
-    will be written to deb's apt inktank.list or rpm's inktank.repo
-    """
-
-    relmap = _get_relmap(remote)
-    log.info('Installing repo on %s', remote)
-    if username is None or password is None:
-        repo_uri = 'http://{reposerver}/{pkgdir}'
-    else:
-        repo_uri = 'https://{username}:{password}@{reposerver}/{pkgdir}'
-
-    if relmap['flavor'] == 'deb':
-        contents = 'deb ' + repo_uri + '/deb {codename} main'
-        contents = contents.format(username=username, password=password,
-                                   reposerver=reposerver, pkgdir=pkgdir,
-                                   codename=relmap['version'],)
-        teuthology.sudo_write_file(remote,
-                                   '/etc/apt/sources.list.d/inktank.list',
-                                   contents)
-        remote.run(args=['sudo',
-                         'apt-get',
-                         'install',
-                         'apt-transport-https',
-                         '-y'])
-        result = remote.run(args=['sudo', 'apt-get', 'update', '-y'],
-                            stdout=StringIO())
-        return result
-
-    elif relmap['flavor'] == 'rpm':
-        baseurl = repo_uri + '/rpm/{release}{version}'
-        contents = textwrap.dedent('''
-            [inktank]
-            name=Inktank Storage, Inc.
-            baseurl={baseurl}
-            gpgcheck=1
-            enabled=1
-            '''.format(baseurl=baseurl))
-        contents = contents.format(username=username,
-                                   password=password,
-                                   pkgdir=pkgdir,
-                                   release=relmap['release'],
-                                   version=relmap['version'])
-        teuthology.sudo_write_file(remote,
-                                   '/etc/yum.repos.d/inktank.repo',
-                                   contents)
-        return remote.run(args=['sudo', 'yum', 'makecache'])
-
-    else:
-        return False
-
-
-def remove_repo(remote):
-    log.info('Removing repo on %s', remote)
-    flavor = _get_relmap(remote)['flavor']
-    if flavor == 'deb':
-        teuthology.delete_file(remote, '/etc/apt/sources.list.d/inktank.list',
-                               sudo=True, force=True)
-        result = remote.run(args=['sudo', 'apt-get', 'update', '-y'],
-                            stdout=StringIO())
-        return result
-
-    elif flavor == 'rpm':
-        teuthology.delete_file(remote, '/etc/yum.repos.d/inktank.repo',
-                               sudo=True, force=True)
-        return remote.run(args=['sudo', 'yum', 'makecache'])
-
-    else:
-        return False
-
-
-def install_repokey(remote, keyurl):
-    """
-    Install a repo key from keyurl on remote.
-    Installing keys is assumed to be idempotent.
-    Example keyurl: 'http://download.inktank.com/keys/release.asc'
-    """
-    log.info('Installing repo key on %s', remote)
-    flavor = _get_relmap(remote)['flavor']
-    if flavor == 'deb':
-        return remote.run(args=['wget',
-                                '-q',
-                                '-O-',
-                                keyurl,
-                                run.Raw('|'),
-                                'sudo',
-                                'apt-key',
-                                'add',
-                                '-'])
-    elif flavor == 'rpm':
-        return remote.run(args=['sudo', 'rpm', '--import', keyurl])
-    else:
-        return False
-
-
-def install_package(package, remote):
-    """
-    Install 'package' on 'remote'
-    Assumes repo has already been set up (perhaps with install_repo)
-    """
-    log.info('Installing package %s on %s', package, remote)
-    flavor = _get_relmap(remote)['flavor']
-    if flavor == 'deb':
-        pkgcmd = ['DEBIAN_FRONTEND=noninteractive',
-                  'sudo',
-                  '-E',
-                  'apt-get',
-                  '-y',
-                  'install',
-                  '{package}'.format(package=package)]
-    elif flavor == 'rpm':
-        pkgcmd = ['sudo',
-                  'yum',
-                  '-y',
-                  'install',
-                  '{package}'.format(package=package)]
-    else:
-        log.error('install_package: bad flavor ' + flavor + '\n')
-        return False
-    return remote.run(args=pkgcmd)
-
-
-def remove_package(package, remote):
-    """
-    Remove package from remote
-    """
-    flavor = _get_relmap(remote)['flavor']
-    if flavor == 'deb':
-        pkgcmd = ['DEBIAN_FRONTEND=noninteractive',
-                  'sudo',
-                  '-E',
-                  'apt-get',
-                  '-y',
-                  'purge',
-                  '{package}'.format(package=package)]
-    elif flavor == 'rpm':
-        pkgcmd = ['sudo',
-                  'yum',
-                  '-y',
-                  'erase',
-                  '{package}'.format(package=package)]
-    else:
-        log.error('remove_package: bad flavor ' + flavor + '\n')
-        return False
-    return remote.run(args=pkgcmd)
diff --git a/teuthology/parallel.py b/teuthology/parallel.py
deleted file mode 100644 (file)
index 13e426e..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-import logging
-import sys
-
-import gevent.pool
-import gevent.queue
-
-log = logging.getLogger(__name__)
-
-class ExceptionHolder(object):
-    def __init__(self, exc_info):
-        self.exc_info = exc_info
-
-def capture_traceback(func, *args, **kwargs):
-    """
-    Utility function to capture tracebacks of any exception func
-    raises.
-    """
-    try:
-        return func(*args, **kwargs)
-    except Exception:
-        return ExceptionHolder(sys.exc_info())
-
-def resurrect_traceback(exc):
-    if isinstance(exc, ExceptionHolder):
-        exc_info = exc.exc_info
-    elif isinstance(exc, BaseException):
-        exc_info = (type(exc), exc, None)
-    else:
-        return
-
-    raise exc_info[0], exc_info[1], exc_info[2]
-
-class parallel(object):
-    """
-    This class is a context manager for running functions in parallel.
-
-    You add functions to be run with the spawn method::
-
-        with parallel() as p:
-            for foo in bar:
-                p.spawn(quux, foo, baz=True)
-
-    You can iterate over the results (which are in arbitrary order)::
-
-        with parallel() as p:
-            for foo in bar:
-                p.spawn(quux, foo, baz=True)
-            for result in p:
-                print result
-
-    If one of the spawned functions throws an exception, it will be thrown
-    when iterating over the results, or when the with block ends.
-
-    At the end of the with block, the main thread waits until all
-    spawned functions have completed, or, if one exited with an exception,
-    kills the rest and raises the exception.
-    """
-
-    def __init__(self):
-        self.group = gevent.pool.Group()
-        self.results = gevent.queue.Queue()
-        self.count = 0
-        self.any_spawned = False
-        self.iteration_stopped = False
-
-    def spawn(self, func, *args, **kwargs):
-        self.count += 1
-        self.any_spawned = True
-        greenlet = self.group.spawn(capture_traceback, func, *args, **kwargs)
-        greenlet.link(self._finish)
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, type_, value, traceback):
-        if value is not None:
-            self.group.kill(block=True)
-            return False
-
-        try:
-            # raises if any greenlets exited with an exception
-            for result in self:
-                log.debug('result is %s', repr(result))
-                pass
-        except Exception:
-            # Emit message here because traceback gets stomped when we re-raise
-            log.exception("Exception in parallel execution")
-            self.group.kill(block=True)
-            raise
-        return True
-
-    def __iter__(self):
-        return self
-
-    def next(self):
-        if not self.any_spawned or self.iteration_stopped:
-            raise StopIteration()
-        result = self.results.get()
-
-        try:
-            resurrect_traceback(result)
-        except StopIteration:
-            self.iteration_stopped = True
-            raise
-
-        return result
-
-    def _finish(self, greenlet):
-        if greenlet.successful():
-            self.results.put(greenlet.value)
-        else:
-            self.results.put(greenlet.exception)
-
-        self.count -= 1
-        if self.count <= 0:
-            self.results.put(StopIteration())
diff --git a/teuthology/report.py b/teuthology/report.py
deleted file mode 100644 (file)
index 2325fad..0000000
+++ /dev/null
@@ -1,482 +0,0 @@
-import os
-import yaml
-import json
-import re
-import requests
-import logging
-import socket
-from datetime import datetime
-
-import teuthology
-from .config import config
-
-
-def init_logging():
-    """
-    Set up logging for the module
-
-    :returns: a logger
-    """
-    # Don't need to see connection pool INFO messages
-    logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(
-        logging.WARNING)
-
-    log = logging.getLogger(__name__)
-    return log
-
-
-def main(args):
-    run = args['--run']
-    job = args['--job']
-    dead = args['--dead']
-    refresh = dead or args['--refresh']
-    server = args['--server']
-    if server:
-        config.results_server = server
-    if args['--verbose']:
-        teuthology.log.setLevel(logging.DEBUG)
-
-    archive_base = os.path.abspath(os.path.expanduser(args['--archive']))
-    save = not args['--no-save']
-
-    log = init_logging()
-    reporter = ResultsReporter(archive_base, save=save, refresh=refresh,
-                               log=log)
-    if dead and not job:
-        for run_name in run:
-            reporter.report_run(run[0], dead=True)
-    elif dead and len(run) == 1 and job:
-        reporter.report_jobs(run[0], job, dead=True)
-    elif len(run) == 1 and job:
-        reporter.report_jobs(run[0], job)
-    elif run and len(run) > 1:
-        reporter.report_runs(run)
-    elif run:
-        reporter.report_run(run[0])
-    elif args['--all-runs']:
-        reporter.report_all_runs()
-
-
-class ResultsSerializer(object):
-    """
-    This class exists to poke around in the archive directory doing things like
-    assembling lists of test runs, lists of their jobs, and merging sets of job
-    YAML files together to form JSON objects.
-    """
-    yamls = ('orig.config.yaml', 'config.yaml', 'info.yaml', 'summary.yaml')
-
-    def __init__(self, archive_base, log=None):
-        self.archive_base = archive_base
-        self.log = log or init_logging()
-
-    def job_info(self, run_name, job_id, pretty=False):
-        """
-        Given a run name and job id, merge the job's YAML files together.
-
-        :param run_name: The name of the run.
-        :param job_id:   The job's id.
-        :returns:        A dict.
-        """
-        job_archive_dir = os.path.join(self.archive_base,
-                                       run_name,
-                                       job_id)
-        job_info = {}
-        for yaml_name in self.yamls:
-            yaml_path = os.path.join(job_archive_dir, yaml_name)
-            if not os.path.exists(yaml_path):
-                continue
-            with file(yaml_path) as yaml_file:
-                partial_info = yaml.safe_load(yaml_file)
-                if partial_info is not None:
-                    job_info.update(partial_info)
-
-        log_path = os.path.join(job_archive_dir, 'teuthology.log')
-        if os.path.exists(log_path):
-            mtime = int(os.path.getmtime(log_path))
-            mtime_dt = datetime.fromtimestamp(mtime)
-            job_info['updated'] = str(mtime_dt)
-
-        if 'job_id' not in job_info:
-            job_info['job_id'] = job_id
-
-        return job_info
-
-    def json_for_job(self, run_name, job_id, pretty=False):
-        """
-        Given a run name and job id, merge the job's YAML files together to
-        create a JSON object.
-
-        :param run_name: The name of the run.
-        :param job_id:   The job's id.
-        :returns:        A JSON object.
-        """
-        job_info = self.job_info(run_name, job_id, pretty)
-        if pretty:
-            job_json = json.dumps(job_info, sort_keys=True, indent=4)
-        else:
-            job_json = json.dumps(job_info)
-
-        return job_json
-
-    def jobs_for_run(self, run_name):
-        """
-        Given a run name, look on the filesystem for directories containing job
-        information, and return a dict mapping job IDs to job directories.
-
-        :param run_name: The name of the run.
-        :returns:        A dict like: {'1': '/path/to/1', '2': 'path/to/2'}
-        """
-        archive_dir = os.path.join(self.archive_base, run_name)
-        if not os.path.isdir(archive_dir):
-            return {}
-        jobs = {}
-        for item in os.listdir(archive_dir):
-            if not re.match('\d+$', item):
-                continue
-            job_id = item
-            job_dir = os.path.join(archive_dir, job_id)
-            if os.path.isdir(job_dir):
-                jobs[job_id] = job_dir
-        return jobs
-
-    def running_jobs_for_run(self, run_name):
-        """
-        Like jobs_for_run(), but only returns jobs with no summary.yaml
-
-        :param run_name: The name of the run.
-        :returns:        A dict like: {'1': '/path/to/1', '2': 'path/to/2'}
-        """
-        jobs = self.jobs_for_run(run_name)
-        for job_id in jobs.keys():
-            if os.path.exists(os.path.join(jobs[job_id], 'summary.yaml')):
-                jobs.pop(job_id)
-        return jobs
-
-    @property
-    def all_runs(self):
-        """
-        Look in the base archive directory for all test runs. Return a list of
-        their names.
-        """
-        archive_base = self.archive_base
-        if not os.path.isdir(archive_base):
-            return []
-        runs = []
-        for run_name in os.listdir(archive_base):
-            if not os.path.isdir(os.path.join(archive_base, run_name)):
-                continue
-            runs.append(run_name)
-        return runs
-
-
-class ResultsReporter(object):
-    last_run_file = 'last_successful_run'
-
-    def __init__(self, archive_base=None, base_uri=None, save=False,
-                 refresh=False, log=None):
-        self.log = log or init_logging()
-        self.archive_base = archive_base or config.archive_base
-        self.base_uri = base_uri or config.results_server
-        if self.base_uri:
-            self.base_uri = self.base_uri.rstrip('/')
-        self.serializer = ResultsSerializer(archive_base, log=self.log)
-        self.save_last_run = save
-        self.refresh = refresh
-        self.session = self._make_session()
-
-    def _make_session(self, max_retries=10):
-        session = requests.Session()
-        adapter = requests.adapters.HTTPAdapter(max_retries=max_retries)
-        session.mount('http://', adapter)
-        return session
-
-    def report_all_runs(self):
-        """
-        Report *all* runs in self.archive_dir to the results server.
-        """
-        all_runs = self.serializer.all_runs
-        last_run = self.last_run
-        if self.save_last_run and last_run and last_run in all_runs:
-            next_index = all_runs.index(last_run) + 1
-            runs = all_runs[next_index:]
-        else:
-            runs = all_runs
-        return self.report_runs(runs)
-
-    def report_runs(self, run_names):
-        """
-        Report several runs to the results server.
-
-        :param run_names: The names of the runs.
-        """
-        num_runs = len(run_names)
-        num_jobs = 0
-        self.log.info("Posting %s runs", num_runs)
-        for run in run_names:
-            job_count = self.report_run(run)
-            num_jobs += job_count
-            if self.save_last_run:
-                self.last_run = run
-        del self.last_run
-        self.log.info("Total: %s jobs in %s runs", num_jobs, len(run_names))
-
-    def report_run(self, run_name, dead=False):
-        """
-        Report a single run to the results server.
-
-        :param run_name: The name of the run.
-        :returns:        The number of jobs reported.
-        """
-        jobs = self.serializer.jobs_for_run(run_name)
-        self.log.info("{name} {jobs} jobs dead={dead}".format(
-            name=run_name,
-            jobs=len(jobs),
-            dead=str(dead),
-        ))
-        if jobs:
-            if not self.refresh:
-                response = self.session.head("{base}/runs/{name}/".format(
-                    base=self.base_uri, name=run_name))
-                if response.status_code == 200:
-                    self.log.info("    already present; skipped")
-                    return 0
-            self.report_jobs(run_name, jobs.keys(), dead=dead)
-        elif not jobs:
-            self.log.debug("    no jobs; skipped")
-        return len(jobs)
-
-    def report_jobs(self, run_name, job_ids, dead=False):
-        """
-        Report several jobs to the results server.
-
-        :param run_name: The name of the run.
-        :param job_ids:  The jobs' ids
-        """
-        for job_id in job_ids:
-            self.report_job(run_name, job_id, dead=dead)
-
-    def report_job(self, run_name, job_id, job_info=None, dead=False):
-        """
-        Report a single job to the results server.
-
-        :param run_name: The name of the run. The run must already exist.
-        :param job_id:   The job's id
-        :param job_info: The job's info dict. Optional - if not present, we
-                         look at the archive.
-        """
-        if job_info is not None and not isinstance(job_info, dict):
-            raise TypeError("job_info must be a dict")
-        run_uri = "{base}/runs/{name}/jobs/".format(
-            base=self.base_uri, name=run_name,)
-        if job_info is None:
-            job_info = self.serializer.job_info(run_name, job_id)
-        if dead and job_info.get('success') is None:
-            job_info['status'] = 'dead'
-        job_json = json.dumps(job_info)
-        headers = {'content-type': 'application/json'}
-        response = self.session.post(run_uri, data=job_json, headers=headers)
-
-        if response.status_code == 200:
-            return job_id
-
-        # This call is wrapped in a try/except because of:
-        #  http://tracker.ceph.com/issues/8166
-        try:
-            resp_json = response.json()
-        except ValueError:
-            resp_json = dict()
-
-        if resp_json:
-            msg = resp_json.get('message', '')
-        else:
-            msg = response.text
-
-        if msg and msg.endswith('already exists'):
-            job_uri = os.path.join(run_uri, job_id, '')
-            response = self.session.put(job_uri, data=job_json,
-                                        headers=headers)
-        elif msg:
-            self.log.error(
-                "POST to {uri} failed with status {status}: {msg}".format(
-                    uri=run_uri,
-                    status=response.status_code,
-                    msg=msg,
-                ))
-        response.raise_for_status()
-
-        return job_id
-
-    @property
-    def last_run(self):
-        """
-        The last run to be successfully reported.
-        """
-        if hasattr(self, '__last_run'):
-            return self.__last_run
-        elif os.path.exists(self.last_run_file):
-            with file(self.last_run_file) as f:
-                self.__last_run = f.read().strip()
-            return self.__last_run
-
-    @last_run.setter
-    def last_run(self, run_name):
-        self.__last_run = run_name
-        with file(self.last_run_file, 'w') as f:
-            f.write(run_name)
-
-    @last_run.deleter
-    def last_run(self):
-        self.__last_run = None
-        if os.path.exists(self.last_run_file):
-            os.remove(self.last_run_file)
-
-    def get_jobs(self, run_name, fields=None):
-        """
-        Query the results server for jobs in a run
-
-        :param run_name: The name of the run
-        :param fields:   Optional. A list of fields to include in the result.
-                         Defaults to returning all fields.
-        """
-        uri = "{base}/runs/{name}/jobs/".format(base=self.base_uri,
-                                                name=run_name)
-        if fields:
-            if not 'job_id' in fields:
-                fields.append('job_id')
-            uri += "?fields=" + ','.join(fields)
-        response = self.session.get(uri)
-        response.raise_for_status()
-        return response.json()
-
-    def delete_job(self, run_name, job_id):
-        """
-        Delete a job from the results server.
-
-        :param run_name: The name of the run
-        :param job_id:   The job's id
-        """
-        uri = "{base}/runs/{name}/jobs/{job_id}/".format(
-            base=self.base_uri, name=run_name, job_id=job_id)
-        response = self.session.delete(uri)
-        response.raise_for_status()
-
-    def delete_jobs(self, run_name, job_ids):
-        """
-        Delete multiple jobs from the results server.
-
-        :param run_name: The name of the run
-        :param job_ids:  A list of job ids
-        """
-        for job_id in job_ids:
-            self.delete_job(self, run_name, job_id)
-
-    def delete_run(self, run_name):
-        """
-        Delete a run from the results server.
-
-        :param run_name: The name of the run
-        """
-        uri = "{base}/runs/{name}/".format(
-            base=self.base_uri, name=run_name)
-        response = self.session.delete(uri)
-        response.raise_for_status()
-
-
-def push_job_info(run_name, job_id, job_info, base_uri=None):
-    """
-    Push a job's info (example: ctx.config) to the results server.
-
-    :param run_name: The name of the run.
-    :param job_id:   The job's id
-    :param job_info: A dict containing the job's information.
-    :param base_uri: The endpoint of the results server. If you leave it out
-                     ResultsReporter will ask teuthology.config.
-    """
-    reporter = ResultsReporter()
-    reporter.report_job(run_name, job_id, job_info)
-
-
-def try_push_job_info(job_config, extra_info=None):
-    """
-    Wrap push_job_info, gracefully doing nothing if:
-        Anything inheriting from requests.exceptions.RequestException is raised
-        A socket.error is raised
-        config.results_server is not set
-        config['job_id'] is not present or is None
-
-    :param job_config: The ctx.config object to push
-    :param extra_info: Optional second dict to push
-    """
-    log = init_logging()
-
-    if not config.results_server:
-        msg = "No results_server set in {yaml}; not attempting to push results"
-        log.debug(msg.format(yaml=config.teuthology_yaml))
-        return
-    elif job_config.get('job_id') is None:
-        log.warning('No job_id found; not reporting results')
-        return
-
-    run_name = job_config['name']
-    job_id = job_config['job_id']
-
-    if extra_info is not None:
-        job_info = extra_info.copy()
-        job_info.update(job_config)
-    else:
-        job_info = job_config
-
-    try:
-        log.debug("Pushing job info to %s", config.results_server)
-        push_job_info(run_name, job_id, job_info)
-        return
-    except (requests.exceptions.RequestException, socket.error):
-        log.exception("Could not report results to %s",
-                      config.results_server)
-
-
-def try_delete_jobs(run_name, job_ids, delete_empty_run=True):
-    """
-    Using the same error checking and retry mechanism as try_push_job_info(),
-    delete one or more jobs
-
-    :param run_name:         The name of the run.
-    :param job_ids:          Either a single job_id, or a list of job_ids
-    :param delete_empty_run: If this would empty the run, delete it.
-    """
-    log = init_logging()
-
-    if not config.results_server:
-        msg = "No results_server set in {yaml}; not attempting to delete job"
-        log.debug(msg.format(yaml=config.teuthology_yaml))
-        return
-
-    if isinstance(job_ids, int):
-        job_ids = [str(job_ids)]
-    elif isinstance(job_ids, basestring):
-        job_ids = [job_ids]
-
-    reporter = ResultsReporter()
-
-    log.debug("Deleting jobs from {server}: {jobs}".format(
-        server=config.results_server, jobs=str(job_ids)))
-
-    if delete_empty_run:
-        got_jobs = reporter.get_jobs(run_name, fields=['job_id'])
-        got_job_ids = [j['job_id'] for j in got_jobs]
-        if sorted(got_job_ids) == sorted(job_ids):
-            try:
-                reporter.delete_run(run_name)
-                return
-            except (requests.exceptions.RequestException, socket.error):
-                log.exception("Run deletion failed")
-
-    def try_delete_job(job_id):
-            try:
-                reporter.delete_job(run_name, job_id)
-                return
-            except (requests.exceptions.RequestException, socket.error):
-                log.exception("Job deletion failed")
-
-    for job_id in job_ids:
-        try_delete_job(job_id)
diff --git a/teuthology/results.py b/teuthology/results.py
deleted file mode 100644 (file)
index b80684c..0000000
+++ /dev/null
@@ -1,252 +0,0 @@
-import os
-import sys
-import time
-import yaml
-import logging
-import subprocess
-from textwrap import dedent
-from textwrap import fill
-
-import teuthology
-from teuthology import misc
-from teuthology import suite
-from .report import ResultsSerializer
-
-log = logging.getLogger(__name__)
-
-
-def main(args):
-
-    log = logging.getLogger(__name__)
-    if args.verbose:
-        teuthology.log.setLevel(logging.DEBUG)
-
-    misc.read_config(args)
-
-    handler = logging.FileHandler(
-        filename=os.path.join(args.archive_dir, 'results.log'),
-    )
-    formatter = logging.Formatter(
-        fmt='%(asctime)s.%(msecs)03d %(levelname)s:%(message)s',
-        datefmt='%Y-%m-%dT%H:%M:%S',
-    )
-    handler.setFormatter(formatter)
-    logging.getLogger().addHandler(handler)
-
-    try:
-        results(args)
-    except Exception:
-        log.exception('error generating results')
-        raise
-
-
-def results(args):
-    archive_base = os.path.split(args.archive_dir)[0]
-    serializer = ResultsSerializer(archive_base)
-    starttime = time.time()
-
-    log.info('Waiting up to %d seconds for tests to finish...', args.timeout)
-    while serializer.running_jobs_for_run(args.name) and args.timeout > 0:
-        if time.time() - starttime > args.timeout:
-            log.warn('test(s) did not finish before timeout of %d seconds',
-                     args.timeout)
-            break
-        time.sleep(10)
-    log.info('Tests finished! gathering results...')
-
-    (subject, body) = build_email_body(args.name, args.archive_dir,
-                                       args.timeout)
-
-    try:
-        if args.email:
-            email_results(
-                subject=subject,
-                from_=args.teuthology_config['results_sending_email'],
-                to=args.email,
-                body=body,
-            )
-    finally:
-        generate_coverage(args)
-
-
-def generate_coverage(args):
-    log.info('starting coverage generation')
-    subprocess.Popen(
-        args=[
-            os.path.join(os.path.dirname(sys.argv[0]), 'teuthology-coverage'),
-            '-v',
-            '-o',
-            os.path.join(args.teuthology_config[
-                         'coverage_output_dir'], args.name),
-            '--html-output',
-            os.path.join(args.teuthology_config[
-                         'coverage_html_dir'], args.name),
-            '--cov-tools-dir',
-            args.teuthology_config['coverage_tools_dir'],
-            args.archive_dir,
-        ],
-    )
-
-
-def email_results(subject, from_, to, body):
-    log.info('Sending results to {to}: {body}'.format(to=to, body=body))
-    import smtplib
-    from email.mime.text import MIMEText
-    msg = MIMEText(body)
-    msg['Subject'] = subject
-    msg['From'] = from_
-    msg['To'] = to
-    log.debug('sending email %s', msg.as_string())
-    smtp = smtplib.SMTP('localhost')
-    smtp.sendmail(msg['From'], [msg['To']], msg.as_string())
-    smtp.quit()
-
-
-def build_email_body(name, archive_dir, timeout):
-    failed = {}
-    hung = {}
-    passed = {}
-
-    for job in suite.get_jobs(archive_dir):
-        job_dir = os.path.join(archive_dir, job)
-        summary_file = os.path.join(job_dir, 'summary.yaml')
-
-        # Unfinished jobs will have no summary.yaml
-        if not os.path.exists(summary_file):
-            info_file = os.path.join(job_dir, 'info.yaml')
-
-            desc = ''
-            if os.path.exists(info_file):
-                with file(info_file) as f:
-                    info = yaml.safe_load(f)
-                    desc = info['description']
-
-            hung[job] = email_templates['hung_templ'].format(
-                job_id=job,
-                desc=desc,
-            )
-            continue
-
-        with file(summary_file) as f:
-            summary = yaml.safe_load(f)
-
-        if summary['success']:
-            passed[job] = email_templates['pass_templ'].format(
-                job_id=job,
-                desc=summary.get('description'),
-                time=int(summary.get('duration', 0)),
-            )
-        else:
-            log = misc.get_http_log_path(archive_dir, job)
-            if log:
-                log_line = email_templates['fail_log_templ'].format(log=log)
-            else:
-                log_line = ''
-            # Transitioning from sentry_events -> sentry_event
-            sentry_events = summary.get('sentry_events')
-            if sentry_events:
-                sentry_event = sentry_events[0]
-            else:
-                sentry_event = summary.get('sentry_event', '')
-            if sentry_event:
-                sentry_line = email_templates['fail_sentry_templ'].format(
-                    sentry_event=sentry_event)
-            else:
-                sentry_line = ''
-
-            # 'fill' is from the textwrap module and it collapses a given
-            # string into multiple lines of a maximum width as specified. We
-            # want 75 characters here so that when we indent by 4 on the next
-            # line, we have 79-character exception paragraphs.
-            reason = fill(summary.get('failure_reason'), 75)
-            reason = '\n'.join(('    ') + line for line in reason.splitlines())
-
-            failed[job] = email_templates['fail_templ'].format(
-                job_id=job,
-                desc=summary.get('description'),
-                time=int(summary.get('duration', 0)),
-                reason=reason,
-                log_line=log_line,
-                sentry_line=sentry_line,
-            )
-
-    maybe_comma = lambda s: ', ' if s else ' '
-
-    subject = ''
-    fail_sect = ''
-    hung_sect = ''
-    pass_sect = ''
-    if failed:
-        subject += '{num_failed} failed{sep}'.format(
-            num_failed=len(failed),
-            sep=maybe_comma(hung or passed)
-        )
-        fail_sect = email_templates['sect_templ'].format(
-            title='Failed',
-            jobs=''.join(failed.values())
-        )
-    if hung:
-        subject += '{num_hung} hung{sep}'.format(
-            num_hung=len(hung),
-            sep=maybe_comma(passed),
-        )
-        hung_sect = email_templates['sect_templ'].format(
-            title='Hung',
-            jobs=''.join(hung.values()),
-        )
-    if passed:
-        subject += '%s passed ' % len(passed)
-        pass_sect = email_templates['sect_templ'].format(
-            title='Passed',
-            jobs=''.join(passed.values()),
-        )
-
-    body = email_templates['body_templ'].format(
-        name=name,
-        log_root=misc.get_http_log_path(archive_dir),
-        fail_count=len(failed),
-        hung_count=len(hung),
-        pass_count=len(passed),
-        fail_sect=fail_sect,
-        hung_sect=hung_sect,
-        pass_sect=pass_sect,
-    )
-
-    subject += 'in {suite}'.format(suite=name)
-    return (subject.strip(), body.strip())
-
-email_templates = {
-    'body_templ': dedent("""\
-        Test Run: {name}
-        =================================================================
-        logs:   {log_root}
-        failed: {fail_count}
-        hung:   {hung_count}
-        passed: {pass_count}
-
-        {fail_sect}{hung_sect}{pass_sect}
-        """),
-    'sect_templ': dedent("""\
-        {title}
-        =================================================================
-        {jobs}
-        """),
-    'fail_templ': dedent("""\
-        [{job_id}]  {desc}
-        -----------------------------------------------------------------
-        time:   {time}s{log_line}{sentry_line}
-
-        {reason}
-
-        """),
-    'fail_log_templ': "\nlog:    {log}",
-    'fail_sentry_templ': "\nsentry: {sentry_event}",
-    'hung_templ': dedent("""\
-        [{job_id}] {desc}
-        """),
-    'pass_templ': dedent("""\
-        [{job_id}] {desc}
-        time:    {time}s
-
-        """),
-}
diff --git a/teuthology/run.py b/teuthology/run.py
deleted file mode 100644 (file)
index eba7fb3..0000000
+++ /dev/null
@@ -1,187 +0,0 @@
-import os
-import yaml
-import StringIO
-import contextlib
-import sys
-import logging
-from traceback import format_tb
-
-import teuthology
-from . import report
-from .misc import get_distro
-from .misc import get_user
-from .misc import read_config
-from .nuke import nuke
-from .run_tasks import run_tasks
-from .results import email_results
-
-
-def set_up_logging(ctx):
-    if ctx.verbose:
-        teuthology.log.setLevel(logging.DEBUG)
-
-    if ctx.archive is not None:
-        os.mkdir(ctx.archive)
-
-        handler = logging.FileHandler(
-            filename=os.path.join(ctx.archive, 'teuthology.log'),
-        )
-        formatter = logging.Formatter(
-            fmt='%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s',
-            datefmt='%Y-%m-%dT%H:%M:%S',
-        )
-        handler.setFormatter(formatter)
-        logging.getLogger().addHandler(handler)
-
-    install_except_hook()
-
-
-def install_except_hook():
-    def log_exception(exception_class, exception, traceback):
-        logging.critical(''.join(format_tb(traceback)))
-        if not exception.message:
-            logging.critical(exception_class.__name__)
-            return
-        logging.critical('{0}: {1}'.format(
-            exception_class.__name__, exception))
-
-    sys.excepthook = log_exception
-
-
-def write_initial_metadata(ctx):
-    if ctx.archive is not None:
-        with file(os.path.join(ctx.archive, 'pid'), 'w') as f:
-            f.write('%d' % os.getpid())
-
-        with file(os.path.join(ctx.archive, 'owner'), 'w') as f:
-            f.write(ctx.owner + '\n')
-
-        with file(os.path.join(ctx.archive, 'orig.config.yaml'), 'w') as f:
-            yaml.safe_dump(ctx.config, f, default_flow_style=False)
-
-        info = {
-            'name': ctx.name,
-            'description': ctx.description,
-            'owner': ctx.owner,
-            'pid': os.getpid(),
-        }
-        if 'job_id' in ctx.config:
-            info['job_id'] = ctx.config['job_id']
-
-        with file(os.path.join(ctx.archive, 'info.yaml'), 'w') as f:
-            yaml.safe_dump(info, f, default_flow_style=False)
-
-
-def main(ctx):
-    set_up_logging(ctx)
-    log = logging.getLogger(__name__)
-
-    if ctx.owner is None:
-        ctx.owner = get_user()
-
-    # Older versions of teuthology stored job_id as an int. Convert it to a str
-    # if necessary.
-    job_id = ctx.config.get('job_id')
-    if job_id is not None:
-        job_id = str(job_id)
-        ctx.config['job_id'] = job_id
-
-    write_initial_metadata(ctx)
-    report.try_push_job_info(ctx.config, dict(status='running'))
-
-    if 'targets' in ctx.config and 'roles' in ctx.config:
-        targets = len(ctx.config['targets'])
-        roles = len(ctx.config['roles'])
-        assert targets >= roles, \
-            '%d targets are needed for all roles but found %d listed.' % (
-                roles, targets)
-
-    machine_type = ctx.machine_type
-    if machine_type is None:
-        fallback_default = ctx.config.get('machine_type', 'plana')
-        machine_type = ctx.config.get('machine-type', fallback_default)
-
-    if ctx.block:
-        assert ctx.lock, \
-            'the --block option is only supported with the --lock option'
-
-    read_config(ctx)
-
-    log.debug('\n  '.join(['Config:', ] + yaml.safe_dump(
-        ctx.config, default_flow_style=False).splitlines()))
-
-    ctx.summary = dict(success=True)
-
-    ctx.summary['owner'] = ctx.owner
-
-    if ctx.description is not None:
-        ctx.summary['description'] = ctx.description
-
-    for task in ctx.config['tasks']:
-        msg = ('kernel installation shouldn be a base-level item, not part ' +
-               'of the tasks list')
-        assert 'kernel' not in task, msg
-
-    init_tasks = []
-    if ctx.lock:
-        msg = ('You cannot specify targets in a config file when using the ' +
-               '--lock option')
-        assert 'targets' not in ctx.config, msg
-        init_tasks.append({'internal.lock_machines': (
-            len(ctx.config['roles']), machine_type)})
-
-    init_tasks.extend([
-        {'internal.save_config': None},
-        {'internal.check_lock': None},
-        {'internal.connect': None},
-        {'internal.serialize_remote_roles': None},
-        {'internal.check_conflict': None},
-    ])
-    if not ctx.config.get('use_existing_cluster', False):
-        init_tasks.extend([
-            {'internal.check_ceph_data': None},
-            {'internal.vm_setup': None},
-        ])
-    if 'kernel' in ctx.config:
-        init_tasks.append({'kernel': ctx.config['kernel']})
-    init_tasks.extend([
-        {'internal.base': None},
-        {'internal.archive': None},
-        {'internal.coredump': None},
-        {'internal.sudo': None},
-        {'internal.syslog': None},
-        {'internal.timer': None},
-    ])
-
-    ctx.config['tasks'][:0] = init_tasks
-
-    try:
-        run_tasks(tasks=ctx.config['tasks'], ctx=ctx)
-    finally:
-        if not ctx.summary.get('success') and ctx.config.get('nuke-on-error'):
-            # only unlock if we locked them in the first place
-            nuke(ctx, ctx.lock)
-        if ctx.archive is not None:
-            with file(os.path.join(ctx.archive, 'summary.yaml'), 'w') as f:
-                yaml.safe_dump(ctx.summary, f, default_flow_style=False)
-        with contextlib.closing(StringIO.StringIO()) as f:
-            yaml.safe_dump(ctx.summary, f)
-            log.info('Summary data:\n%s' % f.getvalue())
-        with contextlib.closing(StringIO.StringIO()) as f:
-            if ('email-on-error' in ctx.config
-                    and not ctx.summary.get('success', False)):
-                yaml.safe_dump(ctx.summary, f)
-                yaml.safe_dump(ctx.config, f)
-                emsg = f.getvalue()
-                subject = "Teuthology error -- %s" % ctx.summary[
-                    'failure_reason']
-                email_results(subject, "Teuthology", ctx.config[
-                              'email-on-error'], emsg)
-
-        report.try_push_job_info(ctx.config, ctx.summary)
-
-        if ctx.summary.get('success', True):
-            log.info('pass')
-        else:
-            log.info('FAIL')
-            sys.exit(1)
diff --git a/teuthology/run_tasks.py b/teuthology/run_tasks.py
deleted file mode 100644 (file)
index b709024..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-import sys
-import logging
-from .sentry import get_client as get_sentry_client
-from .misc import get_http_log_path
-from .config import config as teuth_config
-from copy import deepcopy
-
-log = logging.getLogger(__name__)
-
-
-def run_one_task(taskname, **kwargs):
-    submod = taskname
-    subtask = 'task'
-    if '.' in taskname:
-        (submod, subtask) = taskname.rsplit('.', 1)
-
-    # Teuthology configs may refer to modules like ceph_deploy as ceph-deploy
-    submod = submod.replace('-', '_')
-
-    parent = __import__('teuthology.task', globals(), locals(), [submod], 0)
-    try:
-        mod = getattr(parent, submod)
-    except AttributeError:
-        log.error("No task named %s was found", submod)
-        raise
-    try:
-        fn = getattr(mod, subtask)
-    except AttributeError:
-        log.error("No subtask of %s named %s was found", mod, subtask)
-        raise
-    return fn(**kwargs)
-
-
-def run_tasks(tasks, ctx):
-    stack = []
-    try:
-        for taskdict in tasks:
-            try:
-                ((taskname, config),) = taskdict.iteritems()
-            except ValueError:
-                raise RuntimeError('Invalid task definition: %s' % taskdict)
-            log.info('Running task %s...', taskname)
-            manager = run_one_task(taskname, ctx=ctx, config=config)
-            if hasattr(manager, '__enter__'):
-                manager.__enter__()
-                stack.append((taskname, manager))
-    except Exception as e:
-        ctx.summary['success'] = False
-        if 'failure_reason' not in ctx.summary:
-            ctx.summary['failure_reason'] = str(e)
-        log.exception('Saw exception from tasks.')
-
-        sentry = get_sentry_client()
-        if sentry:
-            config = deepcopy(ctx.config)
-
-            tags = {
-                'task': taskname,
-                'owner': ctx.owner,
-            }
-            if 'teuthology_branch' in config:
-                tags['teuthology_branch'] = config['teuthology_branch']
-
-            # Remove ssh keys from reported config
-            if 'targets' in config:
-                targets = config['targets']
-                for host in targets.keys():
-                    targets[host] = '<redacted>'
-
-            job_id = ctx.config.get('job_id')
-            archive_path = ctx.config.get('archive_path')
-            extra = {
-                'config': config,
-                'logs': get_http_log_path(archive_path, job_id),
-            }
-            exc_id = sentry.get_ident(sentry.captureException(
-                tags=tags,
-                extra=extra,
-            ))
-            event_url = "{server}/search?q={id}".format(
-                server=teuth_config.sentry_server.strip('/'), id=exc_id)
-            log.exception(" Sentry event: %s" % event_url)
-            ctx.summary['sentry_event'] = event_url
-
-        if ctx.config.get('interactive-on-error'):
-            from .task import interactive
-            log.warning('Saw failure, going into interactive mode...')
-            interactive.task(ctx=ctx, config=None)
-    finally:
-        try:
-            exc_info = sys.exc_info()
-            while stack:
-                taskname, manager = stack.pop()
-                log.debug('Unwinding manager %s', taskname)
-                try:
-                    suppress = manager.__exit__(*exc_info)
-                except Exception as e:
-                    ctx.summary['success'] = False
-                    if 'failure_reason' not in ctx.summary:
-                        ctx.summary['failure_reason'] = str(e)
-                    log.exception('Manager failed: %s', taskname)
-
-                    if exc_info == (None, None, None):
-                        # if first failure is in an __exit__, we don't
-                        # have exc_info set yet
-                        exc_info = sys.exc_info()
-
-                    if ctx.config.get('interactive-on-error'):
-                        from .task import interactive
-                        log.warning(
-                            'Saw failure, going into interactive mode...')
-                        interactive.task(ctx=ctx, config=None)
-                else:
-                    if suppress:
-                        sys.exc_clear()
-                        exc_info = (None, None, None)
-
-            if exc_info != (None, None, None):
-                log.debug('Exception was not quenched, exiting: %s: %s',
-                          exc_info[0].__name__, exc_info[1])
-                raise SystemExit(1)
-        finally:
-            # be careful about cyclic references
-            del exc_info
diff --git a/teuthology/safepath.py b/teuthology/safepath.py
deleted file mode 100644 (file)
index b8115a2..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-import errno
-import os
-
-def munge(path):
-    """
-    Munge a potentially hostile path name to be safe to use.
-
-    This very definitely changes the meaning of the path,
-    but it only does that for unsafe paths.
-    """
-    # explicitly ignoring windows as a platform
-    segments = path.split('/')
-    # filter out empty segments like foo//bar
-    segments = [s for s in segments if s!='']
-    # filter out no-op segments like foo/./bar
-    segments = [s for s in segments if s!='.']
-    # all leading dots become underscores; makes .. safe too
-    for idx, seg in enumerate(segments):
-        if seg.startswith('.'):
-            segments[idx] = '_'+seg[1:]
-    # empty string, "/", "//", etc
-    if not segments:
-        segments = ['_']
-    return '/'.join(segments)
-
-
-def makedirs(root, path):
-    """
-    os.makedirs gets confused if the path contains '..', and root might.
-
-    This relies on the fact that `path` has been normalized by munge().
-    """
-    segments = path.split('/')
-    for seg in segments:
-        root = os.path.join(root, seg)
-        try:
-            os.mkdir(root)
-        except OSError as e:
-            if e.errno == errno.EEXIST:
-                pass
-            else:
-                raise
diff --git a/teuthology/schedule.py b/teuthology/schedule.py
deleted file mode 100644 (file)
index a147e3d..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-import yaml
-
-import teuthology.beanstalk
-from teuthology.misc import get_user
-from teuthology.misc import read_config
-from teuthology import report
-
-
-def main(ctx):
-    if ctx.owner is None:
-        ctx.owner = 'scheduled_{user}'.format(user=get_user())
-    read_config(ctx)
-
-    beanstalk = teuthology.beanstalk.connect()
-
-    tube = ctx.worker
-    beanstalk.use(tube)
-
-    if ctx.show:
-        for job_id in ctx.show:
-            job = beanstalk.peek(job_id)
-            if job is None and ctx.verbose:
-                print 'job {jid} is not in the queue'.format(jid=job_id)
-            else:
-                print '--- job {jid} priority {prio} ---\n'.format(
-                    jid=job_id,
-                    prio=job.stats()['pri']), job.body
-        return
-
-    if ctx.delete:
-        for job_id in ctx.delete:
-            job = beanstalk.peek(job_id)
-            if job is None:
-                print 'job {jid} is not in the queue'.format(jid=job_id)
-            else:
-                job.delete()
-                name = yaml.safe_load(job.body).get('name')
-                if name:
-                    report.try_delete_jobs(name, job_id)
-        return
-
-    # strip out targets; the worker will allocate new ones when we run
-    # the job with --lock.
-    if ctx.config.get('targets'):
-        del ctx.config['targets']
-
-    job_config = dict(
-        name=ctx.name,
-        last_in_suite=ctx.last_in_suite,
-        email=ctx.email,
-        description=ctx.description,
-        owner=ctx.owner,
-        verbose=ctx.verbose,
-        machine_type=ctx.worker,
-    )
-    # Merge job_config and ctx.config
-    job_config.update(ctx.config)
-    if ctx.timeout is not None:
-        job_config['results_timeout'] = ctx.timeout
-
-    job = yaml.safe_dump(job_config)
-    num = ctx.num
-    while num > 0:
-        jid = beanstalk.put(
-            job,
-            ttr=60 * 60 * 24,
-            priority=ctx.priority,
-        )
-        print 'Job scheduled with name {name} and ID {jid}'.format(
-            name=ctx.name, jid=jid)
-        job_config['job_id'] = str(jid)
-        report.try_push_job_info(job_config, dict(status='queued'))
-        num -= 1
diff --git a/teuthology/sentry.py b/teuthology/sentry.py
deleted file mode 100644 (file)
index 3d83a29..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-import logging
-from raven import Client
-from .config import config
-
-log = logging.getLogger(__name__)
-
-client = None
-
-
-def get_client():
-    global client
-    if client:
-        return client
-
-    dsn = config.sentry_dsn
-    if dsn:
-        client = Client(dsn=dsn)
-        return client
diff --git a/teuthology/suite.py b/teuthology/suite.py
deleted file mode 100644 (file)
index e1fd4cb..0000000
+++ /dev/null
@@ -1,321 +0,0 @@
-# this file is responsible for submitting tests into the queue
-# by generating combinations of facets found in
-# https://github.com/ceph/ceph-qa-suite.git
-
-import copy
-import errno
-import itertools
-import logging
-import os
-import re
-import subprocess
-import sys
-import yaml
-
-import teuthology
-from teuthology import lock as lock
-
-log = logging.getLogger(__name__)
-
-
-def main(args):
-    if args.verbose:
-        teuthology.log.setLevel(logging.DEBUG)
-
-    base_arg = [
-        os.path.join(os.path.dirname(sys.argv[0]), 'teuthology-schedule'),
-        '--name', args.name,
-        '--num', str(args.num),
-        '--worker', args.worker,
-    ]
-    if args.priority:
-        base_arg.extend(['--priority', str(args.priority)])
-    if args.verbose:
-        base_arg.append('-v')
-    if args.owner:
-        base_arg.extend(['--owner', args.owner])
-
-    collections = [
-        (os.path.join(args.base, collection), collection)
-        for collection in args.collections
-    ]
-    
-    count = 1
-    num_jobs = 0
-    for collection, collection_name in sorted(collections):
-        log.debug('Collection %s in %s' % (collection_name, collection))
-        configs = [(combine_path(collection_name, item[0]), item[1])
-                   for item in build_matrix(collection)]
-        log.info('Collection %s in %s generated %d jobs' %
-                 (collection_name, collection, len(configs)))
-        num_jobs += len(configs)
-
-        arch = get_arch(args.config)
-        machine_type = get_machine_type(args.config)
-        for description, config in configs:
-            if args.limit > 0:
-                if count > args.limit:
-                    log.info('Stopped after {limit} jobs due to --limit={limit}'.format(
-                    limit=args.limit))
-
-                    break
-            raw_yaml = '\n'.join([file(a, 'r').read() for a in config])
-
-            parsed_yaml = yaml.load(raw_yaml)
-            os_type = parsed_yaml.get('os_type')
-            exclude_arch = parsed_yaml.get('exclude_arch')
-            exclude_os_type = parsed_yaml.get('exclude_os_type')
-
-            if exclude_arch:
-                if exclude_arch == arch:
-                    log.info('Skipping due to excluded_arch: %s facets %s',
-                             exclude_arch, description)
-                    continue
-            if exclude_os_type:
-                if exclude_os_type == os_type:
-                    log.info('Skipping due to excluded_os_type: %s facets %s',
-                             exclude_os_type, description)
-                    continue
-            # We should not run multiple tests (changing distros) unless the
-            # machine is a VPS.
-            # Re-imaging baremetal is not yet supported.
-            if machine_type != 'vps':
-                if os_type and os_type != 'ubuntu':
-                    log.info(
-                        'Skipping due to non-ubuntu on baremetal facets %s',
-                        description)
-                    continue
-
-            log.info(
-                'Scheduling %s', description
-            )
-
-            arg = copy.deepcopy(base_arg)
-            arg.extend([
-                '--description', description,
-                '--',
-            ])
-            arg.extend(args.config)
-            arg.extend(config)
-
-            if args.dry_run:
-                log.info('dry-run: %s' % ' '.join(arg))
-            else:
-                subprocess.check_call(
-                    args=arg,
-                )
-            count += 1
-
-    if num_jobs:
-        arg = copy.deepcopy(base_arg)
-        arg.append('--last-in-suite')
-        if args.email:
-            arg.extend(['--email', args.email])
-        if args.timeout:
-            arg.extend(['--timeout', args.timeout])
-        if args.dry_run:
-            log.info('dry-run: %s' % ' '.join(arg))
-        else:
-            subprocess.check_call(
-                args=arg,
-            )
-
-
-def combine_path(left, right):
-    """
-    os.path.join(a, b) doesn't like it when b is None
-    """
-    if right:
-        return os.path.join(left, right)
-    return left
-
-
-def build_matrix(path):
-    """
-    Return a list of items describe by path
-
-    The input is just a path.  The output is an array of (description,
-    [file list]) tuples.
-
-    For a normal file we generate a new item for the result list.
-
-    For a directory, we (recursively) generate a new item for each
-    file/dir.
-
-    For a directory with a magic '+' file, we generate a single item
-    that concatenates all files/subdirs.
-
-    For a directory with a magic '%' file, we generate a result set
-    for each tiem in the directory, and then do a product to generate
-    a result list with all combinations.
-
-    The final description (after recursion) for each item will look
-    like a relative path.  If there was a % product, that path
-    component will appear as a file with braces listing the selection
-    of chosen subitems.
-    """
-    if os.path.isfile(path):
-        if path.endswith('.yaml'):
-            return [(None, [path])]
-    if os.path.isdir(path):
-        files = sorted(os.listdir(path))
-        if '+' in files:
-            # concatenate items
-            files.remove('+')
-            raw = []
-            for fn in files:
-                raw.extend(build_matrix(os.path.join(path, fn)))
-            out = [(
-                '{' + ' '.join(files) + '}',
-                [a[1][0] for a in raw]
-            )]
-            return out
-        elif '%' in files:
-            # convolve items
-            files.remove('%')
-            sublists = []
-            for fn in files:
-                raw = build_matrix(os.path.join(path, fn))
-                sublists.append([(combine_path(fn, item[0]), item[1])
-                                for item in raw])
-            out = []
-            if sublists:
-                for sublist in itertools.product(*sublists):
-                    name = '{' + ' '.join([item[0] for item in sublist]) + '}'
-                    val = []
-                    for item in sublist:
-                        val.extend(item[1])
-                    out.append((name, val))
-            return out
-        else:
-            # list items
-            out = []
-            for fn in files:
-                raw = build_matrix(os.path.join(path, fn))
-                out.extend([(combine_path(fn, item[0]), item[1])
-                           for item in raw])
-            return out
-    return []
-
-
-def ls(archive_dir, verbose):
-    for j in get_jobs(archive_dir):
-        job_dir = os.path.join(archive_dir, j)
-        summary = {}
-        try:
-            with file(os.path.join(job_dir, 'summary.yaml')) as f:
-                g = yaml.safe_load_all(f)
-                for new in g:
-                    summary.update(new)
-        except IOError as e:
-            if e.errno == errno.ENOENT:
-                print '%s      ' % j,
-
-                # pid
-                try:
-                    pidfile = os.path.join(job_dir, 'pid')
-                    found = False
-                    if os.path.isfile(pidfile):
-                        pid = open(pidfile, 'r').read()
-                        if os.path.isdir("/proc/%s" % pid):
-                            cmdline = open('/proc/%s/cmdline' % pid,
-                                           'r').read()
-                            if cmdline.find(archive_dir) >= 0:
-                                print '(pid %s)' % pid,
-                                found = True
-                    if not found:
-                        print '(no process or summary.yaml)',
-                    # tail
-                    tail = os.popen(
-                        'tail -1 %s/%s/teuthology.log' % (archive_dir, j)
-                    ).read().rstrip()
-                    print tail,
-                except IOError as e:
-                    continue
-                print ''
-                continue
-            else:
-                raise
-
-        print "{job} {success} {owner} {desc} {duration}s".format(
-            job=j,
-            owner=summary.get('owner', '-'),
-            desc=summary.get('description', '-'),
-            success='pass' if summary.get('success', False) else 'FAIL',
-            duration=int(summary.get('duration', 0)),
-        )
-        if verbose and 'failure_reason' in summary:
-            print '    {reason}'.format(reason=summary['failure_reason'])
-
-
-def get_jobs(archive_dir):
-    dir_contents = os.listdir(archive_dir)
-
-    def is_job_dir(parent, subdir):
-        if (os.path.isdir(os.path.join(parent, subdir)) and re.match('\d+$',
-                                                                     subdir)):
-            return True
-        return False
-
-    jobs = [job for job in dir_contents if is_job_dir(archive_dir, job)]
-    return sorted(jobs)
-
-
-def get_arch(config):
-    for yamlfile in config:
-        y = yaml.safe_load(file(yamlfile))
-        machine_type = y.get('machine_type')
-        if machine_type:
-            locks = lock.list_locks()
-            for machine in locks:
-                if machine['type'] == machine_type:
-                    arch = machine['arch']
-                    return arch
-    return None
-
-
-def get_os_type(configs):
-    for config in configs:
-        yamlfile = config[2]
-        y = yaml.safe_load(file(yamlfile))
-        if not y:
-            y = {}
-        os_type = y.get('os_type')
-        if os_type:
-            return os_type
-    return None
-
-
-def get_exclude_arch(configs):
-    for config in configs:
-        yamlfile = config[2]
-        y = yaml.safe_load(file(yamlfile))
-        if not y:
-            y = {}
-        exclude_arch = y.get('exclude_arch')
-        if exclude_arch:
-            return exclude_arch
-    return None
-
-
-def get_exclude_os_type(configs):
-    for config in configs:
-        yamlfile = config[2]
-        y = yaml.safe_load(file(yamlfile))
-        if not y:
-            y = {}
-        exclude_os_type = y.get('exclude_os_type')
-        if exclude_os_type:
-            return exclude_os_type
-    return None
-
-
-def get_machine_type(config):
-    for yamlfile in config:
-        y = yaml.safe_load(file(yamlfile))
-        if not y:
-            y = {}
-        machine_type = y.get('machine_type')
-        if machine_type:
-            return machine_type
-    return None
diff --git a/teuthology/task/__init__.py b/teuthology/task/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/teuthology/task/adjust-ulimits b/teuthology/task/adjust-ulimits
deleted file mode 100755 (executable)
index 4825049..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/sh
-# If we're running as root, allow large amounts of open files.
-USER=$(whoami)
-
-# If a ulimit call fails, exit immediately.
-set -e
-
-if [ "$USER" = "root" ]
-then
-    # Enable large number of open files
-    ulimit -n 16384
-fi
-
-# Enable core dumps for everything
-ulimit -c unlimited
-exec "$@"
diff --git a/teuthology/task/admin_socket.py b/teuthology/task/admin_socket.py
deleted file mode 100644 (file)
index 20a6701..0000000
+++ /dev/null
@@ -1,192 +0,0 @@
-"""
-Admin Socket task -- used in rados, powercycle, and smoke testing
-"""
-from cStringIO import StringIO
-
-import json
-import logging
-import os
-import time
-
-from ..orchestra import run
-from teuthology import misc as teuthology
-from teuthology.parallel import parallel
-
-log = logging.getLogger(__name__)
-
-
-def task(ctx, config):
-    """
-    Run an admin socket command, make sure the output is json, and run
-    a test program on it. The test program should read json from
-    stdin. This task succeeds if the test program exits with status 0.
-
-    To run the same test on all clients::
-
-        tasks:
-        - ceph:
-        - rados:
-        - admin_socket:
-            all:
-              dump_requests:
-                test: http://example.com/script
-
-    To restrict it to certain clients::
-
-        tasks:
-        - ceph:
-        - rados: [client.1]
-        - admin_socket:
-            client.1:
-              dump_requests:
-                test: http://example.com/script
-
-    If an admin socket command has arguments, they can be specified as
-    a list::
-
-        tasks:
-        - ceph:
-        - rados: [client.0]
-        - admin_socket:
-            client.0:
-              dump_requests:
-                test: http://example.com/script
-              help:
-                test: http://example.com/test_help_version
-                args: [version]
-
-    Note that there must be a ceph client with an admin socket running
-    before this task is run. The tests are parallelized at the client
-    level. Tests for a single client are run serially.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    assert isinstance(config, dict), \
-        'admin_socket task requires a dict for configuration'
-    teuthology.replace_all_with_clients(ctx.cluster, config)
-
-    with parallel() as ptask:
-        for client, tests in config.iteritems():
-            ptask.spawn(_run_tests, ctx, client, tests)
-
-
-def _socket_command(ctx, remote, socket_path, command, args):
-    """
-    Run an admin socket command and return the result as a string.
-
-    :param ctx: Context
-    :param remote: Remote site
-    :param socket_path: path to socket
-    :param command: command to be run remotely
-    :param args: command arguments
-
-    :returns: output of command in json format
-    """
-    json_fp = StringIO()
-    testdir = teuthology.get_testdir(ctx)
-    max_tries = 60
-    while True:
-        proc = remote.run(
-            args=[
-                'sudo',
-                'adjust-ulimits',
-                'ceph-coverage',
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                'ceph',
-                '--admin-daemon', socket_path,
-                ] + command.split(' ') + args,
-            stdout=json_fp,
-            check_status=False,
-            )
-        if proc.exitstatus == 0:
-            break
-        assert max_tries > 0
-        max_tries -= 1
-        log.info('ceph cli returned an error, command not registered yet?')
-        log.info('sleeping and retrying ...')
-        time.sleep(1)
-    out = json_fp.getvalue()
-    json_fp.close()
-    log.debug('admin socket command %s returned %s', command, out)
-    return json.loads(out)
-
-def _run_tests(ctx, client, tests):
-    """
-    Create a temp directory and wait for a client socket to be created.
-    For each test, copy the executable locally and run the test.
-    Remove temp directory when finished.
-
-    :param ctx: Context
-    :param client: client machine to run the test
-    :param tests: list of tests to run
-    """
-    testdir = teuthology.get_testdir(ctx)
-    log.debug('Running admin socket tests on %s', client)
-    (remote,) = ctx.cluster.only(client).remotes.iterkeys()
-    socket_path = '/var/run/ceph/ceph-{name}.asok'.format(name=client)
-    overrides = ctx.config.get('overrides', {}).get('admin_socket', {})
-
-    try:
-        tmp_dir = os.path.join(
-            testdir,
-            'admin_socket_{client}'.format(client=client),
-            )
-        remote.run(
-            args=[
-                'mkdir',
-                '--',
-                tmp_dir,
-                run.Raw('&&'),
-                # wait for client process to create the socket
-                'while', 'test', '!', '-e', socket_path, run.Raw(';'),
-                'do', 'sleep', '1', run.Raw(';'), 'done',
-                ],
-            )
-
-        for command, config in tests.iteritems():
-            if config is None:
-                config = {}
-            teuthology.deep_merge(config, overrides)
-            log.debug('Testing %s with config %s', command, str(config))
-
-            test_path = None
-            if 'test' in config:
-                url = config['test'].format(
-                    branch=config.get('branch', 'master')
-                    )
-                test_path = os.path.join(tmp_dir, command)
-                remote.run(
-                    args=[
-                        'wget',
-                        '-q',
-                        '-O',
-                        test_path,
-                        '--',
-                        url,
-                        run.Raw('&&'),
-                        'chmod',
-                        'u=rx',
-                        '--',
-                        test_path,
-                        ],
-                    )
-
-            args = config.get('args', [])
-            assert isinstance(args, list), \
-                'admin socket command args must be a list'
-            sock_out = _socket_command(ctx, remote, socket_path, command, args)
-            if test_path is not None:
-                remote.run(
-                    args=[
-                        test_path,
-                        ],
-                    stdin=json.dumps(sock_out),
-                    )
-
-    finally:
-        remote.run(
-            args=[
-                'rm', '-rf', '--', tmp_dir,
-                ],
-            )
diff --git a/teuthology/task/apache.conf.template b/teuthology/task/apache.conf.template
deleted file mode 100644 (file)
index c6fc662..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-<IfModule !env_module>
-  LoadModule env_module {mod_path}/mod_env.so
-</IfModule>
-<IfModule !rewrite_module>
-  LoadModule rewrite_module {mod_path}/mod_rewrite.so
-</IfModule>
-<IfModule !fastcgi_module>
-  LoadModule fastcgi_module {mod_path}/mod_fastcgi.so
-</IfModule>
-<IfModule !log_config_module>
-  LoadModule log_config_module {mod_path}/mod_log_config.so
-</IfModule>
-
-Listen {port}
-ServerName {host}
-
-ServerRoot {testdir}/apache
-ErrorLog {testdir}/archive/apache.{client}/error.log
-LogFormat "%h l %u %t \"%r\" %>s %b \"{{Referer}}i\" \"%{{User-agent}}i\"" combined
-CustomLog {testdir}/archive/apache.{client}/access.log combined
-PidFile {testdir}/apache/tmp.{client}/apache.pid
-DocumentRoot {testdir}/apache/htdocs.{client}
-FastCgiIPCDir {testdir}/apache/tmp.{client}/fastcgi_sock
-FastCgiExternalServer {testdir}/apache/htdocs.{client}/rgw.fcgi -socket rgw_sock -idle-timeout {idle_timeout}
-RewriteEngine On
-
-RewriteRule ^/([a-zA-Z0-9-_.]*)([/]?.*) /rgw.fcgi?page=$1&params=$2&%{{QUERY_STRING}} [E=HTTP_AUTHORIZATION:%{{HTTP:Authorization}},L]
-
-# Set fastcgi environment variables.
-# Note that this is separate from Unix environment variables!
-SetEnv RGW_LOG_LEVEL 20
-SetEnv RGW_SHOULD_LOG yes
-SetEnv RGW_PRINT_CONTINUE {print_continue}
-
-<Directory {testdir}/apache/htdocs.{client}>
-  Options +ExecCGI
-  AllowOverride All
-  SetHandler fastcgi-script
-</Directory>
-
-AllowEncodedSlashes On
-ServerSignature Off
diff --git a/teuthology/task/args.py b/teuthology/task/args.py
deleted file mode 100644 (file)
index 17e9e9d..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-"""
-These routines only appear to be used by the peering_speed tests.
-"""
-def gen_args(name, args):
-    """
-    Called from argify to generate arguments.
-    """
-    usage = [""]
-    usage += [name + ':']
-    usage += \
-        ["    {key}: <{usage}> ({default})".format(
-            key=key, usage=_usage, default=default)
-         for (key, _usage, default, _) in args]
-    usage.append('')
-    usage.append(name + ':')
-    usage += \
-        ["    {key}: {default}".format(
-                key = key, default = default)
-         for (key, _, default, _) in args]
-    usage = '\n'.join('    ' + i for i in usage)
-    def ret(config):
-        """
-        return an object with attributes set from args. 
-        """
-        class Object(object):
-            """
-            simple object
-            """
-            pass
-        obj = Object()
-        for (key, usage, default, conv) in args:
-            if key in config:
-                setattr(obj, key, conv(config[key]))
-            else:
-                setattr(obj, key, conv(default))
-        return obj
-    return usage, ret
-
-def argify(name, args):
-    """
-    Object used as a decorator for the peering speed tests.
-    See peering_spee_test.py
-    """
-    (usage, config_func) = gen_args(name, args)
-    def ret1(f):
-        """
-        Wrapper to handle doc and usage information
-        """
-        def ret2(**kwargs):
-            """
-            Call f (the parameter passed to ret1) 
-            """
-            config = kwargs.get('config', {})
-            if config is None:
-                config = {}
-            kwargs['config'] = config_func(config)
-            return f(**kwargs)
-        ret2.__doc__ = f.__doc__ + usage
-        return ret2
-    return ret1
diff --git a/teuthology/task/autotest.py b/teuthology/task/autotest.py
deleted file mode 100644 (file)
index 24a7675..0000000
+++ /dev/null
@@ -1,166 +0,0 @@
-""" 
-Run an autotest test on the ceph cluster.
-"""
-import json
-import logging
-import os
-
-from teuthology import misc as teuthology
-from teuthology.parallel import parallel
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Run an autotest test on the ceph cluster.
-
-    Only autotest client tests are supported.
-
-    The config is a mapping from role name to list of tests to run on
-    that client.
-
-    For example::
-
-        tasks:
-        - ceph:
-        - ceph-fuse: [client.0, client.1]
-        - autotest:
-            client.0: [dbench]
-            client.1: [bonnie]
-
-    You can also specify a list of tests to run on all clients::
-
-        tasks:
-        - ceph:
-        - ceph-fuse:
-        - autotest:
-            all: [dbench]
-    """
-    assert isinstance(config, dict)
-    config = teuthology.replace_all_with_clients(ctx.cluster, config)
-    log.info('Setting up autotest...')
-    testdir = teuthology.get_testdir(ctx)
-    with parallel() as p:
-        for role in config.iterkeys():
-            (remote,) = ctx.cluster.only(role).remotes.keys()
-            p.spawn(_download, testdir, remote)
-
-    log.info('Making a separate scratch dir for every client...')
-    for role in config.iterkeys():
-        assert isinstance(role, basestring)
-        PREFIX = 'client.'
-        assert role.startswith(PREFIX)
-        id_ = role[len(PREFIX):]
-        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-        mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-        scratch = os.path.join(mnt, 'client.{id}'.format(id=id_))
-        remote.run(
-            args=[
-                'sudo',
-                'install',
-                '-d',
-                '-m', '0755',
-                '--owner={user}'.format(user='ubuntu'), #TODO
-                '--',
-                scratch,
-                ],
-            )
-
-    with parallel() as p:
-        for role, tests in config.iteritems():
-            (remote,) = ctx.cluster.only(role).remotes.keys()
-            p.spawn(_run_tests, testdir, remote, role, tests)
-
-def _download(testdir, remote):
-    """
-    Download.  Does not explicitly support muliple tasks in a single run.
-    """
-    remote.run(
-        args=[
-            # explicitly does not support multiple autotest tasks
-            # in a single run; the result archival would conflict
-            'mkdir', '{tdir}/archive/autotest'.format(tdir=testdir),
-            run.Raw('&&'),
-            'mkdir', '{tdir}/autotest'.format(tdir=testdir),
-            run.Raw('&&'),
-            'wget',
-            '-nv',
-            '--no-check-certificate',
-            'https://github.com/ceph/autotest/tarball/ceph',
-            '-O-',
-            run.Raw('|'),
-            'tar',
-            '-C', '{tdir}/autotest'.format(tdir=testdir),
-            '-x',
-            '-z',
-            '-f-',
-            '--strip-components=1',
-            ],
-        )
-
-def _run_tests(testdir, remote, role, tests):
-    """
-    Spawned to run test on remote site
-    """
-    assert isinstance(role, basestring)
-    PREFIX = 'client.'
-    assert role.startswith(PREFIX)
-    id_ = role[len(PREFIX):]
-    mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-    scratch = os.path.join(mnt, 'client.{id}'.format(id=id_))
-
-    assert isinstance(tests, list)
-    for idx, testname in enumerate(tests):
-        log.info('Running autotest client test #%d: %s...', idx, testname)
-
-        tag = 'client.{id}.num{idx}.{testname}'.format(
-            idx=idx,
-            testname=testname,
-            id=id_,
-            )
-        control = '{tdir}/control.{tag}'.format(tdir=testdir, tag=tag)
-        teuthology.write_file(
-            remote=remote,
-            path=control,
-            data='import json; data=json.loads({data!r}); job.run_test(**data)'.format(
-                data=json.dumps(dict(
-                        url=testname,
-                        dir=scratch,
-                        # TODO perhaps tag
-                        # results will be in {testdir}/autotest/client/results/dbench
-                        # or {testdir}/autotest/client/results/dbench.{tag}
-                        )),
-                ),
-            )
-        remote.run(
-            args=[
-                '{tdir}/autotest/client/bin/autotest'.format(tdir=testdir),
-                '--verbose',
-                '--harness=simple',
-                '--tag={tag}'.format(tag=tag),
-                control,
-                run.Raw('3>&1'),
-                ],
-            )
-
-        remote.run(
-            args=[
-                'rm', '-rf', '--', control,
-                ],
-            )
-
-        remote.run(
-            args=[
-                'mv',
-                '--',
-                '{tdir}/autotest/client/results/{tag}'.format(tdir=testdir, tag=tag),
-                '{tdir}/archive/autotest/{tag}'.format(tdir=testdir, tag=tag),
-                ],
-            )
-
-    remote.run(
-        args=[
-            'rm', '-rf', '--', '{tdir}/autotest'.format(tdir=testdir),
-            ],
-        )
diff --git a/teuthology/task/blktrace.py b/teuthology/task/blktrace.py
deleted file mode 100644 (file)
index 208bfd5..0000000
+++ /dev/null
@@ -1,93 +0,0 @@
-"""
-Run blktrace program through teuthology
-"""
-import contextlib
-import logging
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from ..orchestra import run 
-
-log = logging.getLogger(__name__)
-blktrace = '/usr/sbin/blktrace'
-daemon_signal = 'term'
-
-@contextlib.contextmanager
-def setup(ctx, config):
-    """
-    Setup all the remotes
-    """
-    osds = ctx.cluster.only(teuthology.is_type('osd'))
-    log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=teuthology.get_testdir(ctx))
-
-    for remote, roles_for_host in osds.remotes.iteritems():
-        log.info('Creating %s on %s' % (log_dir, remote.name))
-        remote.run(
-            args=['mkdir', '-p', '-m0755', '--', log_dir],
-            wait=False,
-            )
-    yield
-
-@contextlib.contextmanager
-def execute(ctx, config):
-    """
-    Run the blktrace program on remote machines.
-    """
-    procs = []
-    testdir = teuthology.get_testdir(ctx)
-    log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir)
-
-    osds = ctx.cluster.only(teuthology.is_type('osd'))
-    for remote, roles_for_host in osds.remotes.iteritems():
-        roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote]
-        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
-            if roles_to_devs.get(id_):
-                dev = roles_to_devs[id_]
-                log.info("running blktrace on %s: %s" % (remote.name, dev))
-
-                proc = remote.run(
-                    args=[
-                        'cd',
-                        log_dir,
-                        run.Raw(';'),
-                        'daemon-helper',
-                        daemon_signal,
-                        'sudo',
-                        blktrace,
-                        '-o',
-                        dev.rsplit("/", 1)[1],
-                        '-d',
-                        dev,
-                        ],
-                    wait=False,   
-                    stdin=run.PIPE,
-                    )
-                procs.append(proc)
-    try:
-        yield
-    finally:
-        osds = ctx.cluster.only(teuthology.is_type('osd'))
-        log.info('stopping blktrace processs')
-        for proc in procs:
-            proc.stdin.close()
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Usage:
-        blktrace:
-      
-    Runs blktrace on all clients.
-    """
-    if config is None:
-        config = dict(('client.{id}'.format(id=id_), None)
-                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client'))
-    elif isinstance(config, list):
-        config = dict.fromkeys(config)
-
-    with contextutil.nested(
-        lambda: setup(ctx=ctx, config=config),
-        lambda: execute(ctx=ctx, config=config),
-        ):
-        yield
-
diff --git a/teuthology/task/calamari.py b/teuthology/task/calamari.py
deleted file mode 100644 (file)
index f3c1495..0000000
+++ /dev/null
@@ -1,345 +0,0 @@
-"""
-calamari - set up various machines with roles for participating
-in Calamari management app testing.  Requires secret info for
-accessing authenticated package repos to install Calamari, supplied
-in an override: clause for calamari.reposetup below.  Contains
-five tasks:
-
-- calamari.reposetup: set up the calamari package repos (all targets)
-- calamari.agent: install stats collection daemons (all cluster targets)
-- calamari.restapi: cluster-management api access (one monitor target)
-- calamari.server: main webapp/gui front end (management target)
-- calamari.test: run automated test against calamari.server target (local)
-
-calamari.test runs on the local machine, as it accesses the Calamari
-server across https using requests.py, which must be present.  It uses
-several external modules in calamari_test/.
-
-Sample configuration:
-roles:
-- [osd.0, osd.1, mon.0, calamari.restapi]
-- [osd.2, osd.3, calamari.server]
-
-tasks:
-- install:
-    branch: dumpling
-- ceph:
-- calamari.reposetup:
-- calamari.agent:
-- calamari.restapi:
-- calamari.server:
-- calamari.test:
-    delay: 40
-
-calamari.reposetup will happen on all osd/mon/calamari.* remotes
-calamari.agent will run on all osd/mon
-calamari.restapi must run on a remote with a monitor
-calamari.server must be able to find calamari.restapi to talk to
-calamari.test has an optional delay to allow the webapp to settle before
- talking to it (we could make the test retry/timeout instead)
-
-"""
-
-from cStringIO import StringIO
-import contextlib
-import logging
-import os
-import subprocess
-import teuthology.misc as teuthology
-import teuthology.packaging as pkg
-import textwrap
-import time
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-
-def _edit_diamond_config(remote, serverhost):
-    """ Edit remote's diamond config to send stats to serverhost """
-    ret = remote.run(args=['sudo',
-                     'sed',
-                     '-i',
-                     's/calamari/{host}/'.format(host=serverhost),
-                     '/etc/diamond/diamond.conf'],
-                     stdout=StringIO())
-    if not ret:
-        return False
-    return remote.run(args=['sudo', 'service', 'diamond', 'restart'])
-
-
-def _disable_default_nginx(remote):
-    """
-    Fix up nginx values
-    """
-    script = textwrap.dedent('''
-        if [ -f /etc/nginx/conf.d/default.conf ]; then
-            mv /etc/nginx/conf.d/default.conf \
-                /etc/nginx/conf.d/default.disabled
-        fi
-        if [ -f /etc/nginx/sites-enabled/default ] ; then
-            rm /etc/nginx/sites-enabled/default
-        fi
-        service nginx restart
-        service {service} restart
-    ''')
-    service = pkg.get_service_name('httpd', remote)
-    script = script.format(service=service)
-    teuthology.sudo_write_file(remote, '/tmp/disable.nginx', script)
-    return remote.run(args=['sudo', 'bash', '/tmp/disable.nginx'],
-                      stdout=StringIO())
-
-
-def _setup_calamari_cluster(remote, restapi_remote):
-    """
-    Add restapi db entry to the server.
-    """
-    restapi_hostname = str(restapi_remote).split('@')[1]
-    sqlcmd = 'insert into ceph_cluster (name, api_base_url) ' \
-             'values ("{host}", "http://{host}:5000/api/v0.1/");'. \
-             format(host=restapi_hostname)
-    teuthology.write_file(remote, '/tmp/create.cluster.sql', sqlcmd)
-    return remote.run(args=['cat',
-                            '/tmp/create.cluster.sql',
-                            run.Raw('|'),
-                            'sudo',
-                            'sqlite3',
-                            '/opt/calamari/webapp/calamari/db.sqlite3'],
-                      stdout=StringIO())
-
-
-def _remotes(ctx, selector):
-    return ctx.cluster.only(selector).remotes.keys()
-
-"""
-Tasks
-"""
-
-
-@contextlib.contextmanager
-def agent(ctx, config):
-    """
-    task agent
-    calamari.agent: install stats collection
-       (for each role of type 'mon.' or 'osd.')
-
-    For example::
-
-        roles:
-        - [osd.0, mon.a]
-        - [osd.1]
-        tasks:
-        - calamari.agent:
-    """
-
-    log.info('calamari.agent starting')
-    overrides = ctx.config.get('overrides', {})
-    teuthology.deep_merge(config, overrides.get('calamari.agent', {}))
-
-    # agent gets installed on any remote with role mon or osd
-    def needs_agent(role):
-        for type in 'mon.', 'osd.':
-            if role.startswith(type):
-                return True
-        return False
-
-    remotes = _remotes(ctx, needs_agent)
-    if remotes is None:
-        raise RuntimeError('No role configured')
-    try:
-        for rem in remotes:
-            log.info('Installing calamari-agent on %s', rem)
-            pkg.install_package('calamari-agent', rem)
-            server_remote = _remotes(ctx,
-                lambda r: r.startswith('calamari.server'))
-            if not server_remote:
-                raise RuntimeError('No calamari.server role available')
-            server_remote = server_remote[0]
-            # why isn't shortname available by default?
-            serverhost = server_remote.name.split('@')[1]
-            log.info('configuring Diamond for {}'.format(serverhost))
-            if not _edit_diamond_config(rem, serverhost):
-                raise RuntimeError(
-                    'Diamond config edit failed on {0}'.format(rem)
-                )
-        yield
-    finally:
-            for rem in remotes:
-                pkg.remove_package('calamari-agent', rem)
-
-
-@contextlib.contextmanager
-def reposetup(ctx, config):
-    """
-    task reposetup
-    Sets up calamari repository on all 'osd', 'mon', and 'calamari.' remotes;
-     cleans up when done
-
-    calamari.reposetup:
-        pkgdir:
-        username:
-        password:
-
-    Supply the above in an override file if you need to manage the
-    secret repo credentials separately from the test definition (likely).
-
-    pkgdir encodes package directory (possibly more than one path component)
-    as in https://<username>:<password>@SERVER/<pkgdir>/{deb,rpm}{..}
-
-    """
-    overrides = ctx.config.get('overrides', {})
-    # XXX deep_merge returns the result, which matters if either is None
-    # make sure that doesn't happen
-    if config is None:
-        config = {'dummy': 'dummy'}
-    teuthology.deep_merge(config, overrides.get('calamari.reposetup', {}))
-
-    try:
-        pkgdir = config['pkgdir']
-        username = config['username']
-        password = config['password']
-    except KeyError:
-        raise RuntimeError('requires pkgdir, username, and password')
-
-    # repo gets installed on any remote with role mon, osd, or calamari
-    def needs_repo(role):
-        for type in 'mon.', 'osd.', 'calamari.':
-            if role.startswith(type):
-                return True
-        return False
-
-    remotes = _remotes(ctx, needs_repo)
-    if remotes is None:
-        raise RuntimeError('No roles configured')
-
-    try:
-        for rem in remotes:
-            log.info(rem)
-            keypath = 'http://download.inktank.com/keys/release.asc'
-            pkg.install_repokey(rem, keypath)
-            pkg.install_repo(rem, 'download.inktank.com', pkgdir,
-                             username, password)
-        yield
-
-    finally:
-        for rem in remotes:
-            pkg.remove_repo(rem)
-
-
-@contextlib.contextmanager
-def restapi(ctx, config):
-    """
-    task restapi
-
-    Calamari Rest API
-
-    For example::
-
-        roles:
-        - [mon.a, osd.0, osd.1, calamari.restapi]
-        - [osd.2, osd.3]
-        tasks:
-        - calamari.restapi:
-    """
-    overrides = ctx.config.get('overrides', {})
-    teuthology.deep_merge(config, overrides.get('calamari.restapi', {}))
-
-    remotes_and_roles = \
-        ctx.cluster.only(lambda r: r.startswith('calamari.restapi')).remotes
-    if remotes_and_roles is None:
-        raise RuntimeError('No role configured')
-
-    # check that the role selected also has at least one mon role
-    for rem, roles in remotes_and_roles.iteritems():
-        if not any([r for r in roles if r.startswith('mon.')]):
-            raise RuntimeError('no mon on remote with roles %s', roles)
-
-    try:
-        for rem in remotes_and_roles.iterkeys():
-            log.info(rem)
-            pkg.install_package('calamari-restapi', rem)
-        yield
-
-    finally:
-        for rem in remotes_and_roles.iterkeys():
-            pkg.remove_package('calamari-restapi', rem)
-
-
-@contextlib.contextmanager
-def server(ctx, config):
-    """
-    task server:
-
-    Calamari server setup.  Add role 'calamari.server' to the remote
-    that will run the webapp.  'calamari.restapi' role must be present
-    to serve as the cluster-api target for calamari-server.  Only one
-    of calamari.server and calamari.restapi roles is supported currently.
-
-    For example::
-
-        roles:
-        - [calamari.server]
-        - [mon.0, calamari.restapi]
-        - [osd.0, osd.1]
-        tasks:
-        - calamari.restapi:
-        - calamari.server:
-    """
-    overrides = ctx.config.get('overrides', {})
-    teuthology.deep_merge(config, overrides.get('calamari.server', {}))
-
-    remote = _remotes(ctx, lambda r: r.startswith('calamari.server'))
-    if not remote:
-        raise RuntimeError('No role configured')
-
-    restapi_remote = _remotes(ctx, lambda r: r.startswith('calamari.restapi'))
-    if not restapi_remote:
-        raise RuntimeError('Must supply calamari.restapi role')
-
-    remote = remote[0]
-    restapi_remote = restapi_remote[0]
-
-    try:
-        # sqlite3 command is required; on some platforms it's already
-        # there and not removable (required for, say yum)
-        sqlite_package = pkg.get_package_name('sqlite', remote)
-        if sqlite_package and not pkg.install_package(sqlite_package, remote):
-            raise RuntimeError('{} install failed'.format(sqlite_package))
-
-        if not pkg.install_package('calamari-server', remote) or \
-           not pkg.install_package('calamari-clients', remote) or \
-           not _disable_default_nginx(remote) or \
-           not _setup_calamari_cluster(remote, restapi_remote):
-            raise RuntimeError('Server installation failure')
-
-        log.info('client/server setup complete')
-        yield
-    finally:
-        pkg.remove_package('calamari-server', remote)
-        pkg.remove_package('calamari-clients', remote)
-        if sqlite_package:
-            pkg.remove_package(sqlite_package, remote)
-
-
-def test(ctx, config):
-    """
-    task test
-    Run the calamari smoketest on the teuthology host (no role required)
-    Tests to run are in calamari_testdir.
-    delay: wait this long before starting
-
-        tasks:
-        - calamari.test:
-            delay: 30
-            server: server.0
-    """
-    delay = config.get('delay', 0)
-    if delay:
-        log.info("delaying %d sec", delay)
-        time.sleep(delay)
-    testhost = _remotes(ctx, lambda r: r.startswith('calamari.server'))[0]
-    testhost = testhost.name.split('@')[1]
-    mypath = os.path.dirname(__file__)
-    cmd_list = [os.path.join(mypath, 'calamari', 'servertest_1_0.py')]
-    os.environ['CALAMARI_BASE_URI'] = 'http://{0}/api/v1/'.format(testhost)
-    log.info("testing %s", testhost)
-    return subprocess.call(cmd_list)
diff --git a/teuthology/task/calamari/http_client.py b/teuthology/task/calamari/http_client.py
deleted file mode 100755 (executable)
index 84a03c7..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env python
-
-import json
-import logging
-import requests
-
-log = logging.getLogger(__name__)
-
-
-class AuthenticatedHttpClient(requests.Session):
-    """
-    Client for the calamari REST API, principally exists to do
-    authentication, but also helpfully prefixes
-    URLs in requests with the API base URL and JSONizes
-    POST data.
-    """
-    def __init__(self, api_url, username, password):
-        super(AuthenticatedHttpClient, self).__init__()
-        self._username = username
-        self._password = password
-        self._api_url = api_url
-        self.headers = {
-            'Content-type': "application/json; charset=UTF-8"
-        }
-
-    def request(self, method, url, **kwargs):
-        if not url.startswith('/'):
-            url = self._api_url + url
-        response = super(AuthenticatedHttpClient, self).request(method, url, **kwargs)
-        if response.status_code >= 400:
-            # For the benefit of test logs
-            print "%s: %s" % (response.status_code, response.content)
-        return response
-
-    def post(self, url, data=None, **kwargs):
-        if isinstance(data, dict):
-            data = json.dumps(data)
-        return super(AuthenticatedHttpClient, self).post(url, data, **kwargs)
-
-    def patch(self, url, data=None, **kwargs):
-        if isinstance(data, dict):
-            data = json.dumps(data)
-        return super(AuthenticatedHttpClient, self).patch(url, data, **kwargs)
-
-    def login(self):
-        """
-        Authenticate with the Django auth system as
-        it is exposed in the Calamari REST API.
-        """
-        log.info("Logging in as %s" % self._username)
-        response = self.get("auth/login/")
-        response.raise_for_status()
-        self.headers['X-XSRF-TOKEN'] = response.cookies['XSRF-TOKEN']
-
-        self.post("auth/login/", {
-            'next': "/",
-            'username': self._username,
-            'password': self._password
-        })
-        response.raise_for_status()
-
-        # Check we're allowed in now.
-        response = self.get("cluster")
-        response.raise_for_status()
-
-if __name__ == "__main__":
-
-    import argparse
-
-    p = argparse.ArgumentParser()
-    p.add_argument('-u', '--uri', default='http://mira035/api/v1/')
-    p.add_argument('--user', default='admin')
-    p.add_argument('--pass', dest='password', default='admin')
-    args, remainder = p.parse_known_args()
-
-    c = AuthenticatedHttpClient(args.uri, args.user, args.password)
-    c.login()
-    response = c.request('GET', ''.join(remainder)).json()
-    print json.dumps(response, indent=2)
diff --git a/teuthology/task/calamari/servertest_1_0.py b/teuthology/task/calamari/servertest_1_0.py
deleted file mode 100755 (executable)
index b9b07a3..0000000
+++ /dev/null
@@ -1,269 +0,0 @@
-#!/usr/bin/env python
-
-import datetime
-import os
-import logging
-import logging.handlers
-import requests
-import uuid
-import unittest
-from http_client import AuthenticatedHttpClient
-
-log = logging.getLogger(__name__)
-log.addHandler(logging.StreamHandler())
-log.setLevel(logging.INFO)
-
-global base_uri
-global client
-base_uri = None
-server_uri = None
-client = None
-
-def setUpModule():
-    global base_uri
-    global server_uri
-    global client
-    try:
-        base_uri = os.environ['CALAMARI_BASE_URI']
-    except KeyError:
-        log.error('Must define CALAMARI_BASE_URI')
-        os._exit(1)
-    if not base_uri.endswith('/'):
-        base_uri += '/'
-    if not base_uri.endswith('api/v1/'):
-        base_uri += 'api/v1/'
-    client = AuthenticatedHttpClient(base_uri, 'admin', 'admin')
-    server_uri = base_uri.replace('api/v1/', '')
-    client.login()
-
-class RestTest(unittest.TestCase):
-    'Base class for all tests here; get class\'s data'
-
-    def setUp(self):
-        # Called once for each test_* case.  A bit wasteful, but we
-        # really like using the simple class variable self.uri
-        # to customize each derived TestCase
-        method = getattr(self, 'method', 'GET')
-        raw = self.uri.startswith('/')
-        self.response = self.get_object(method, self.uri, raw=raw)
-
-    def get_object(self, method, url, raw=False):
-        global server_uri
-        'Return Python object decoded from JSON response to method/url'
-        if not raw:
-            return client.request(method, url).json()
-        else:
-            return requests.request(method, server_uri + url).json()
-
-class TestUserMe(RestTest):
-
-    uri = 'user/me'
-
-    def test_me(self):
-        self.assertEqual(self.response['username'], 'admin')
-
-class TestCluster(RestTest):
-
-    uri = 'cluster'
-
-    def test_id(self):
-        self.assertEqual(self.response[0]['id'], 1)
-
-    def test_times(self):
-        for time in (
-            self.response[0]['cluster_update_time'],
-            self.response[0]['cluster_update_attempt_time'],
-        ):
-            self.assertTrue(is_datetime(time))
-
-    def test_api_base_url(self):
-        api_base_url = self.response[0]['api_base_url']
-        self.assertTrue(api_base_url.startswith('http'))
-        self.assertIn('api/v0.1', api_base_url)
-
-class TestHealth(RestTest):
-
-    uri = 'cluster/1/health'
-
-    def test_cluster(self):
-        self.assertEqual(self.response['cluster'], 1)
-
-    def test_times(self):
-        for time in (
-            self.response['cluster_update_time'],
-            self.response['added'],
-        ):
-            self.assertTrue(is_datetime(time))
-
-    def test_report_and_overall_status(self):
-        self.assertIn('report', self.response)
-        self.assertIn('overall_status', self.response['report'])
-
-class TestHealthCounters(RestTest):
-
-    uri = 'cluster/1/health_counters'
-
-    def test_cluster(self):
-        self.assertEqual(self.response['cluster'], 1)
-
-    def test_time(self):
-        self.assertTrue(is_datetime(self.response['cluster_update_time']))
-
-    def test_existence(self):
-        for section in ('pg', 'mon', 'osd'):
-            for counter in ('warn', 'critical', 'ok'):
-                count = self.response[section][counter]['count']
-                self.assertIsInstance(count, int)
-        self.assertIsInstance(self.response['pool']['total'], int)
-
-    def test_mds_sum(self):
-        count = self.response['mds']
-        self.assertEqual(
-            count['up_not_in'] + count['not_up_not_in'] + count['up_in'],
-            count['total']
-        )
-
-class TestSpace(RestTest):
-
-    uri = 'cluster/1/space'
-
-    def test_cluster(self):
-        self.assertEqual(self.response['cluster'], 1)
-
-    def test_times(self):
-        for time in (
-            self.response['cluster_update_time'],
-            self.response['added'],
-        ):
-            self.assertTrue(is_datetime(time))
-
-    def test_space(self):
-        for size in ('free_bytes', 'used_bytes', 'capacity_bytes'):
-            self.assertIsInstance(self.response['space'][size], int)
-            self.assertGreater(self.response['space'][size], 0)
-
-    def test_report(self):
-        for size in ('total_used', 'total_space', 'total_avail'):
-            self.assertIsInstance(self.response['report'][size], int)
-            self.assertGreater(self.response['report'][size], 0)
-
-class TestOSD(RestTest):
-
-    uri = 'cluster/1/osd'
-
-    def test_cluster(self):
-        self.assertEqual(self.response['cluster'], 1)
-
-    def test_times(self):
-        for time in (
-            self.response['cluster_update_time'],
-            self.response['added'],
-        ):
-            self.assertTrue(is_datetime(time))
-
-    def test_osd_uuid(self):
-        for osd in self.response['osds']:
-            uuidobj = uuid.UUID(osd['uuid'])
-            self.assertEqual(str(uuidobj), osd['uuid'])
-
-    def test_osd_pools(self):
-        for osd in self.response['osds']:
-            if osd['up'] != 1:
-                continue
-            self.assertIsInstance(osd['pools'], list)
-            self.assertIsInstance(osd['pools'][0], basestring)
-
-    def test_osd_up_in(self):
-        for osd in self.response['osds']:
-            for flag in ('up', 'in'):
-                self.assertIn(osd[flag], (0, 1))
-
-    def test_osd_0(self):
-        osd0 = self.get_object('GET', 'cluster/1/osd/0')['osd']
-        for field in osd0.keys():
-            if not field.startswith('cluster_update_time'):
-                self.assertEqual(self.response['osds'][0][field], osd0[field])
-
-class TestPool(RestTest):
-
-    uri = 'cluster/1/pool'
-
-    def test_cluster(self):
-        for pool in self.response:
-            self.assertEqual(pool['cluster'], 1)
-
-    def test_fields_are_ints(self):
-        for pool in self.response:
-            for field in ('id', 'used_objects', 'used_bytes'):
-                self.assertIsInstance(pool[field], int)
-
-    def test_name_is_str(self):
-        for pool in self.response:
-            self.assertIsInstance(pool['name'], basestring)
-
-    def test_pool_0(self):
-        poolid = self.response[0]['id']
-        pool = self.get_object('GET', 'cluster/1/pool/{id}'.format(id=poolid))
-        self.assertEqual(self.response[0], pool)
-
-class TestServer(RestTest):
-
-    uri = 'cluster/1/server'
-
-    def test_ipaddr(self):
-        for server in self.response:
-            octets = server['addr'].split('.')
-            self.assertEqual(len(octets), 4)
-            for octetstr in octets:
-                octet = int(octetstr)
-                self.assertIsInstance(octet, int)
-                self.assertGreaterEqual(octet, 0)
-                self.assertLessEqual(octet, 255)
-
-    def test_hostname_name_strings(self):
-        for server in self.response:
-            for field in ('name', 'hostname'):
-                self.assertIsInstance(server[field], basestring)
-
-    def test_services(self):
-        for server in self.response:
-            self.assertIsInstance(server['services'], list)
-            for service in server['services']:
-                self.assertIn(service['type'], ('osd', 'mon', 'mds'))
-
-class TestGraphitePoolIOPS(RestTest):
-
-    uri = '/graphite/render?format=json-array&' \
-          'target=ceph.cluster.ceph.pool.0.num_read&' \
-          'target=ceph.cluster.ceph.pool.0.num_write'
-
-    def test_targets_contain_request(self):
-        self.assertIn('targets', self.response)
-        self.assertIn('ceph.cluster.ceph.pool.0.num_read',
-                      self.response['targets'])
-        self.assertIn('ceph.cluster.ceph.pool.0.num_write',
-                      self.response['targets'])
-
-    def test_datapoints(self):
-        self.assertIn('datapoints', self.response)
-        self.assertGreater(len(self.response['datapoints']), 0)
-        data = self.response['datapoints'][0]
-        self.assertEqual(len(data), 3)
-        self.assertIsInstance(data[0], int)
-        if data[1]:
-            self.assertIsInstance(data[1], float)
-        if data[2]:
-            self.assertIsInstance(data[2], float)
-
-#
-# Utility functions
-#
-
-DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%fZ'
-
-def is_datetime(time):
-    datetime.datetime.strptime(time, DATETIME_FORMAT)
-    return True
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/teuthology/task/ceph.py b/teuthology/task/ceph.py
deleted file mode 100644 (file)
index 39e3a56..0000000
+++ /dev/null
@@ -1,1389 +0,0 @@
-"""
-Ceph cluster task.
-
-Handle the setup, starting, and clean-up of a Ceph cluster.
-"""
-from cStringIO import StringIO
-
-import argparse
-import contextlib
-import logging
-import os
-import struct
-import json
-import time
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from ..orchestra import run
-import ceph_client as cclient
-
-log = logging.getLogger(__name__)
-
-class DaemonState(object):
-    """
-    Daemon State.  A daemon exists for each instance of each role.
-    """
-    def __init__(self, remote, role, id_, *command_args, **command_kwargs):
-        """
-        Pass remote command information as parameters to remote site
-
-        :param remote: Remote site
-        :param role: Role (osd, rgw, mon, mds)
-        :param id_: Id within role (osd.1, osd.2, for eaxmple)
-        :param command_args: positional arguments (used in restart commands)
-        :param command_kwargs: keyword arguments (used in restart commands)
-        """
-        self.remote = remote
-        self.command_args = command_args
-        self.command_kwargs = command_kwargs
-        self.role = role
-        self.id_ = id_
-        self.log = command_kwargs.get('logger', log)
-        self.proc = None
-
-    def stop(self):
-        """
-        Stop this daemon instance.
-
-        Note: this can raise a run.CommandFailedError,
-        run.CommandCrashedError, or run.ConnectionLostError.
-        """
-        if not self.running():
-            self.log.error('tried to stop a non-running daemon')
-            return
-        self.proc.stdin.close()
-        self.log.debug('waiting for process to exit')
-        run.wait([self.proc])
-        self.proc = None
-        self.log.info('Stopped')
-
-    def restart(self, *args, **kwargs):
-        """
-        Restart with a new command passed in the arguments
-
-        :param args: positional arguments passed to remote.run
-        :param kwargs: keyword arguments passed to remote.run
-        """
-        self.log.info('Restarting')
-        if self.proc is not None:
-            self.log.debug('stopping old one...')
-            self.stop()
-        cmd_args = list(self.command_args)
-        cmd_args.extend(args)
-        cmd_kwargs = self.command_kwargs
-        cmd_kwargs.update(kwargs)
-        self.proc = self.remote.run(*cmd_args, **cmd_kwargs)
-        self.log.info('Started')
-
-    def restart_with_args(self, extra_args):
-        """
-        Restart, adding new paramaters to the current command.
-
-        :param extra_args: Extra keyword arguments to be added.
-        """
-        self.log.info('Restarting')
-        if self.proc is not None:
-            self.log.debug('stopping old one...')
-            self.stop()
-        cmd_args = list(self.command_args)
-        # we only want to make a temporary mod of the args list
-        # so we shallow copy the dict, and deepcopy the args list
-        cmd_kwargs = self.command_kwargs.copy()
-        from copy import deepcopy
-        cmd_kwargs['args'] = deepcopy(self.command_kwargs['args'])
-        cmd_kwargs['args'].extend(extra_args)
-        self.proc = self.remote.run(*cmd_args, **cmd_kwargs)
-        self.log.info('Started')
-
-    def signal(self, sig):
-        """
-        Send a signal to associated remote commnad
-
-        :param sig: signal to send
-        """
-        self.proc.stdin.write(struct.pack('!b', sig))
-        self.log.info('Sent signal %d', sig)
-
-    def running(self):
-        """
-        Are we running?
-        :return: True if remote run command value is set, False otherwise.
-        """
-        return self.proc is not None
-
-    def reset(self):
-        """
-        clear remote run command value.
-        """
-        self.proc = None
-
-    def wait_for_exit(self):
-        """
-        clear remote run command value after waiting for exit.
-        """
-        if self.proc:
-            try:
-                run.wait([self.proc])
-            finally:
-                self.proc = None
-
-class CephState(object):
-    """
-    Collection of daemon state instances
-    """
-    def __init__(self):
-        """
-        self.daemons is a dictionary indexed by role.  Each entry is a dictionary of
-        DaemonState values indexcd by an id parameter.
-        """
-        self.daemons = {}
-
-    def add_daemon(self, remote, role, id_, *args, **kwargs):
-        """
-        Add a daemon.  If there already is a daemon for this id_ and role, stop that
-        daemon and.  Restart the damon once the new value is set.
-
-        :param remote: Remote site
-        :param role: Role (osd, mds, mon, rgw,  for example)
-        :param id_: Id (index into role dictionary)
-        :param args: Daemonstate positional parameters
-        :param kwargs: Daemonstate keyword parameters
-        """
-        if role not in self.daemons:
-            self.daemons[role] = {}
-        if id_ in self.daemons[role]:
-            self.daemons[role][id_].stop()
-            self.daemons[role][id_] = None
-        self.daemons[role][id_] = DaemonState(remote, role, id_, *args, **kwargs)
-        self.daemons[role][id_].restart()
-
-    def get_daemon(self, role, id_):
-        """
-        get the daemon associated with this id_ for this role.
-
-        :param role: Role (osd, mds, mon, rgw,  for example)
-        :param id_: Id (index into role dictionary)
-        """
-        if role not in self.daemons:
-            return None
-        return self.daemons[role].get(str(id_), None)
-
-    def iter_daemons_of_role(self, role):
-        """
-        Iterate through all daemon instances for this role.  Return dictionary of
-        daemon values.
-
-        :param role: Role (osd, mds, mon, rgw,  for example)
-        """
-        return self.daemons.get(role, {}).values()
-
-
-@contextlib.contextmanager
-def ceph_log(ctx, config):
-    """
-    Create /var/log/ceph log directory that is open to everyone.
-    Add valgrind and profiling-logger directories.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    log.info('Making ceph log dir writeable by non-root...')
-    run.wait(
-        ctx.cluster.run(
-            args=[
-                'sudo',
-                'chmod',
-                '777',
-                '/var/log/ceph',
-                ],
-            wait=False,
-            )
-        )
-    log.info('Disabling ceph logrotate...')
-    run.wait(
-        ctx.cluster.run(
-            args=[
-                'sudo',
-                'rm', '-f', '--',
-                '/etc/logrotate.d/ceph',
-                ],
-            wait=False,
-            )
-        )
-    log.info('Creating extra log directories...')
-    run.wait(
-        ctx.cluster.run(
-            args=[
-                'sudo',
-                'install', '-d', '-m0755', '--',
-                '/var/log/ceph/valgrind',
-                '/var/log/ceph/profiling-logger',
-                ],
-            wait=False,
-            )
-        )
-
-    try:
-        yield
-
-    finally:
-        pass
-
-
-def assign_devs(roles, devs):
-    """
-    Create a dictionary of devs indexed by roles
-
-    :param roles: List of roles
-    :param devs: Corresponding list of devices.
-    :returns: Dictionary of devs indexed by roles.
-    """
-    return dict(zip(roles, devs))
-
-@contextlib.contextmanager
-def valgrind_post(ctx, config):
-    """
-    After the tests run, look throught all the valgrind logs.  Exceptions are raised
-    if textual errors occured in the logs, or if valgrind exceptions were detected in
-    the logs.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    try:
-        yield
-    finally:
-        lookup_procs = list()
-        log.info('Checking for errors in any valgrind logs...');
-        for remote in ctx.cluster.remotes.iterkeys():
-            #look at valgrind logs for each node
-            proc = remote.run(
-                args=[
-                    'sudo',
-                    'zgrep',
-                    '<kind>',
-                    run.Raw('/var/log/ceph/valgrind/*'),
-                    '/dev/null', # include a second file so that we always get a filename prefix on the output
-                    run.Raw('|'),
-                    'sort',
-                    run.Raw('|'),
-                    'uniq',
-                    ],
-                wait=False,
-                check_status=False,
-                stdout=StringIO(),
-                )
-            lookup_procs.append((proc, remote))
-
-        valgrind_exception = None
-        for (proc, remote) in lookup_procs:
-            proc.exitstatus.get()
-            out = proc.stdout.getvalue()
-            for line in out.split('\n'):
-                if line == '':
-                    continue
-                try:
-                    (file, kind) = line.split(':')
-                except Exception:
-                    log.error('failed to split line %s', line)
-                    raise
-                log.debug('file %s kind %s', file, kind)
-                if (file.find('mds') >= 0) and kind.find('Lost') > 0:
-                    continue
-                log.error('saw valgrind issue %s in %s', kind, file)
-                valgrind_exception = Exception('saw valgrind issues')
-
-        if valgrind_exception is not None:
-            raise valgrind_exception
-
-
-def mount_osd_data(ctx, remote, osd):
-    """
-    Mount a remote OSD
-
-    :param ctx: Context
-    :param remote: Remote site
-    :param ods: Osd name
-    """
-    log.debug('Mounting data for osd.{o} on {r}'.format(o=osd, r=remote))
-    if remote in ctx.disk_config.remote_to_roles_to_dev and osd in ctx.disk_config.remote_to_roles_to_dev[remote]:
-        dev = ctx.disk_config.remote_to_roles_to_dev[remote][osd]
-        mount_options = ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][osd]
-        fstype = ctx.disk_config.remote_to_roles_to_dev_fstype[remote][osd]
-        mnt = os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=osd))
-
-        log.info('Mounting osd.{o}: dev: {n}, mountpoint: {p}, type: {t}, options: {v}'.format(
-                 o=osd, n=remote.name, p=mnt, t=fstype, v=mount_options))
-
-        remote.run(
-            args=[
-                'sudo',
-                'mount',
-                '-t', fstype,
-                '-o', ','.join(mount_options),
-                dev,
-                mnt,
-            ]
-            )
-
-def make_admin_daemon_dir(ctx, remote):
-    """
-    Create /var/run/ceph directory on remote site.
-
-    :param ctx: Context
-    :param remote: Remote site
-    """
-    remote.run(
-            args=[
-                'sudo',
-                'install', '-d', '-m0777', '--', '/var/run/ceph',
-                ],
-            )
-
-@contextlib.contextmanager
-def cluster(ctx, config):
-    """
-    Handle the creation and removal of a ceph cluster.
-
-    On startup:
-        Create directories needed for the cluster.
-        Create remote journals for all osds.
-        Create and set keyring.
-        Copy the monmap to tht test systems.
-        Setup mon nodes.
-        Setup mds nodes.
-        Mkfs osd nodes.
-        Add keyring information to monmaps
-        Mkfs mon nodes.
-
-    On exit:
-        If errors occured, extract a failure message and store in ctx.summary.
-        Unmount all test files and temporary journaling files.
-        Save the monitor information and archive all ceph logs.
-        Cleanup the keyring setup, and remove all monitor map and data files left over.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    if ctx.config.get('use_existing_cluster', False) is True:
-        log.info("'use_existing_cluster' is true; skipping cluster creation")
-        yield
-
-    testdir = teuthology.get_testdir(ctx)
-    log.info('Creating ceph cluster...')
-    run.wait(
-        ctx.cluster.run(
-            args=[
-                'install', '-d', '-m0755', '--',
-                '{tdir}/data'.format(tdir=testdir),
-                ],
-            wait=False,
-            )
-        )
-
-    run.wait(
-        ctx.cluster.run(
-            args=[
-                'sudo',
-                'install', '-d', '-m0777', '--', '/var/run/ceph',
-                ],
-            wait=False,
-            )
-        )
-
-
-    devs_to_clean = {}
-    remote_to_roles_to_devs = {}
-    remote_to_roles_to_journals = {}
-    osds = ctx.cluster.only(teuthology.is_type('osd'))
-    for remote, roles_for_host in osds.remotes.iteritems():
-        devs = teuthology.get_scratch_devices(remote)
-        roles_to_devs = {}
-        roles_to_journals = {}
-        if config.get('fs'):
-            log.info('fs option selected, checking for scratch devs')
-            log.info('found devs: %s' % (str(devs),))
-            devs_id_map = teuthology.get_wwn_id_map(remote, devs)
-            iddevs = devs_id_map.values()
-            roles_to_devs = assign_devs(
-                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs
-                )
-            if len(roles_to_devs) < len(iddevs):
-                iddevs = iddevs[len(roles_to_devs):]
-            devs_to_clean[remote] = []
-
-        if config.get('block_journal'):
-            log.info('block journal enabled')
-            roles_to_journals = assign_devs(
-                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs
-                )
-            log.info('journal map: %s', roles_to_journals)
-
-        if config.get('tmpfs_journal'):
-            log.info('tmpfs journal enabled')
-            roles_to_journals = {}
-            remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] )
-            for osd in teuthology.roles_of_type(roles_for_host, 'osd'):
-                tmpfs = '/mnt/osd.%s' % osd
-                roles_to_journals[osd] = tmpfs
-                remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] )
-            log.info('journal map: %s', roles_to_journals)
-
-        log.info('dev map: %s' % (str(roles_to_devs),))
-        remote_to_roles_to_devs[remote] = roles_to_devs
-        remote_to_roles_to_journals[remote] = roles_to_journals
-
-
-    log.info('Generating config...')
-    remotes_and_roles = ctx.cluster.remotes.items()
-    roles = [role_list for (remote, role_list) in remotes_and_roles]
-    ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
-    conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips)
-    for remote, roles_to_journals in remote_to_roles_to_journals.iteritems():
-        for role, journal in roles_to_journals.iteritems():
-            key = "osd." + str(role)
-            if key not in conf:
-                conf[key] = {}
-            conf[key]['osd journal'] = journal
-    for section, keys in config['conf'].iteritems():
-        for key, value in keys.iteritems():
-            log.info("[%s] %s = %s" % (section, key, value))
-            if section not in conf:
-                conf[section] = {}
-            conf[section][key] = value
-
-    if config.get('tmpfs_journal'):
-        conf['journal dio'] = False
-
-    ctx.ceph = argparse.Namespace()
-    ctx.ceph.conf = conf
-
-    keyring_path = config.get('keyring_path', '/etc/ceph/ceph.keyring')
-
-    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
-
-    firstmon = teuthology.get_first_mon(ctx, config)
-
-    log.info('Setting up %s...' % firstmon)
-    ctx.cluster.only(firstmon).run(
-        args=[
-            'sudo',
-            'adjust-ulimits',
-            'ceph-coverage',
-            coverage_dir,
-            'ceph-authtool',
-            '--create-keyring',
-            keyring_path,
-            ],
-        )
-    ctx.cluster.only(firstmon).run(
-        args=[
-            'sudo',
-            'adjust-ulimits',
-            'ceph-coverage',
-            coverage_dir,
-            'ceph-authtool',
-            '--gen-key',
-            '--name=mon.',
-            keyring_path,
-            ],
-        )
-    ctx.cluster.only(firstmon).run(
-        args=[
-            'sudo',
-            'chmod',
-            '0644',
-            keyring_path,
-            ],
-        )
-    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
-    fsid = teuthology.create_simple_monmap(
-        ctx,
-        remote=mon0_remote,
-        conf=conf,
-        )
-    if not 'global' in conf:
-        conf['global'] = {}
-    conf['global']['fsid'] = fsid
-
-    log.info('Writing ceph.conf for FSID %s...' % fsid)
-    conf_path = config.get('conf_path', '/etc/ceph/ceph.conf')
-    conf_fp = StringIO()
-    conf.write(conf_fp)
-    conf_fp.seek(0)
-    writes = ctx.cluster.run(
-        args=[
-            'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'),
-            'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'),
-            'sudo', 'python',
-            '-c',
-            'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
-            conf_path,
-            run.Raw('&&'),
-            'sudo', 'chmod', '0644', conf_path,
-            ],
-        stdin=run.PIPE,
-        wait=False,
-        )
-    teuthology.feed_many_stdins_and_close(conf_fp, writes)
-    run.wait(writes)
-
-    log.info('Creating admin key on %s...' % firstmon)
-    ctx.cluster.only(firstmon).run(
-        args=[
-            'sudo',
-            'adjust-ulimits',
-            'ceph-coverage',
-            coverage_dir,
-            'ceph-authtool',
-            '--gen-key',
-            '--name=client.admin',
-            '--set-uid=0',
-            '--cap', 'mon', 'allow *',
-            '--cap', 'osd', 'allow *',
-            '--cap', 'mds', 'allow',
-            keyring_path,
-            ],
-        )
-
-    log.info('Copying monmap to all nodes...')
-    keyring = teuthology.get_file(
-        remote=mon0_remote,
-        path=keyring_path,
-        )
-    monmap = teuthology.get_file(
-        remote=mon0_remote,
-        path='{tdir}/monmap'.format(tdir=testdir),
-        )
-
-    for rem in ctx.cluster.remotes.iterkeys():
-        # copy mon key and initial monmap
-        log.info('Sending monmap to node {remote}'.format(remote=rem))
-        teuthology.sudo_write_file(
-            remote=rem,
-            path=keyring_path,
-            data=keyring,
-            perms='0644'
-            )
-        teuthology.write_file(
-            remote=rem,
-            path='{tdir}/monmap'.format(tdir=testdir),
-            data=monmap,
-            )
-
-    log.info('Setting up mon nodes...')
-    mons = ctx.cluster.only(teuthology.is_type('mon'))
-    run.wait(
-        mons.run(
-            args=[
-                'adjust-ulimits',
-                'ceph-coverage',
-                coverage_dir,
-                'osdmaptool',
-                '-c', conf_path,
-                '--clobber',
-                '--createsimple', '{num:d}'.format(
-                    num=teuthology.num_instances_of_type(ctx.cluster, 'osd'),
-                    ),
-                '{tdir}/osdmap'.format(tdir=testdir),
-                '--pg_bits', '2',
-                '--pgp_bits', '4',
-                ],
-            wait=False,
-            ),
-        )
-
-    log.info('Setting up mds nodes...')
-    mdss = ctx.cluster.only(teuthology.is_type('mds'))
-    for remote, roles_for_host in mdss.remotes.iteritems():
-        for id_ in teuthology.roles_of_type(roles_for_host, 'mds'):
-            remote.run(
-                args=[
-                    'sudo',
-                    'mkdir',
-                    '-p',
-                    '/var/lib/ceph/mds/ceph-{id}'.format(id=id_),
-                    run.Raw('&&'),
-                    'sudo',
-                    'adjust-ulimits',
-                    'ceph-coverage',
-                    coverage_dir,
-                    'ceph-authtool',
-                    '--create-keyring',
-                    '--gen-key',
-                    '--name=mds.{id}'.format(id=id_),
-                    '/var/lib/ceph/mds/ceph-{id}/keyring'.format(id=id_),
-                    ],
-                )
-
-    cclient.create_keyring(ctx)
-    log.info('Running mkfs on osd nodes...')
-
-    ctx.disk_config = argparse.Namespace()
-    ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs
-    ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals
-    ctx.disk_config.remote_to_roles_to_dev_mount_options = {}
-    ctx.disk_config.remote_to_roles_to_dev_fstype = {}
-
-    log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev)))
-    for remote, roles_for_host in osds.remotes.iteritems():
-        roles_to_devs = remote_to_roles_to_devs[remote]
-        roles_to_journals = remote_to_roles_to_journals[remote]
-
-
-        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
-            remote.run(
-                args=[
-                    'sudo',
-                    'mkdir',
-                    '-p',
-                    '/var/lib/ceph/osd/ceph-{id}'.format(id=id_),
-                    ])
-            log.info(str(roles_to_journals))
-            log.info(id_)
-            if roles_to_devs.get(id_):
-                dev = roles_to_devs[id_]
-                fs = config.get('fs')
-                package = None
-                mkfs_options = config.get('mkfs_options')
-                mount_options = config.get('mount_options')
-                if fs == 'btrfs':
-                    #package = 'btrfs-tools'
-                    if mount_options is None:
-                        mount_options = ['noatime','user_subvol_rm_allowed']
-                    if mkfs_options is None:
-                        mkfs_options = ['-m', 'single',
-                                        '-l', '32768',
-                                        '-n', '32768']
-                if fs == 'xfs':
-                    #package = 'xfsprogs'
-                    if mount_options is None:
-                        mount_options = ['noatime']
-                    if mkfs_options is None:
-                        mkfs_options = ['-f', '-i', 'size=2048']
-                if fs == 'ext4' or fs == 'ext3':
-                    if mount_options is None:
-                        mount_options = ['noatime','user_xattr']
-
-                if mount_options is None:
-                    mount_options = []
-                if mkfs_options is None:
-                    mkfs_options = []
-                mkfs = ['mkfs.%s' % fs] + mkfs_options
-                log.info('%s on %s on %s' % (mkfs, dev, remote))
-                if package is not None:
-                    remote.run(
-                        args=[
-                            'sudo',
-                            'apt-get', 'install', '-y', package
-                            ],
-                        stdout=StringIO(),
-                        )
-
-                try:
-                    remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])
-                except run.CommandFailedError:
-                    # Newer btfs-tools doesn't prompt for overwrite, use -f
-                    if '-f' not in mount_options:
-                        mkfs_options.append('-f')
-                        mkfs = ['mkfs.%s' % fs] + mkfs_options
-                        log.info('%s on %s on %s' % (mkfs, dev, remote))
-                    remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])
-                        
-                log.info('mount %s on %s -o %s' % (dev, remote,
-                                                   ','.join(mount_options)))
-                remote.run(
-                    args=[
-                        'sudo',
-                        'mount',
-                        '-t', fs,
-                        '-o', ','.join(mount_options),
-                        dev,
-                        os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)),
-                        ]
-                    )
-                if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options:
-                    ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {}
-                ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][id_] = mount_options
-                if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype:
-                    ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {}
-                ctx.disk_config.remote_to_roles_to_dev_fstype[remote][id_] = fs
-                devs_to_clean[remote].append(
-                    os.path.join(
-                        os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)),
-                        )
-                    )
-
-        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
-            remote.run(
-                args=[
-                    'sudo',
-                    'MALLOC_CHECK_=3',
-                    'adjust-ulimits',
-                    'ceph-coverage',
-                    coverage_dir,
-                    'ceph-osd',
-                    '--mkfs',
-                    '--mkkey',
-                    '-i', id_,
-                    '--monmap', '{tdir}/monmap'.format(tdir=testdir),
-                    ],
-                )
-
-
-    log.info('Reading keys from all nodes...')
-    keys_fp = StringIO()
-    keys = []
-    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
-        for type_ in ['mds','osd']:
-            for id_ in teuthology.roles_of_type(roles_for_host, type_):
-                data = teuthology.get_file(
-                    remote=remote,
-                    path='/var/lib/ceph/{type}/ceph-{id}/keyring'.format(
-                        type=type_,
-                        id=id_,
-                        ),
-                    sudo=True,
-                    )
-                keys.append((type_, id_, data))
-                keys_fp.write(data)
-    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
-        for type_ in ['client']:
-            for id_ in teuthology.roles_of_type(roles_for_host, type_):
-                data = teuthology.get_file(
-                    remote=remote,
-                    path='/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
-                    )
-                keys.append((type_, id_, data))
-                keys_fp.write(data)
-
-    log.info('Adding keys to all mons...')
-    writes = mons.run(
-        args=[
-            'sudo', 'tee', '-a',
-            keyring_path,
-            ],
-        stdin=run.PIPE,
-        wait=False,
-        stdout=StringIO(),
-        )
-    keys_fp.seek(0)
-    teuthology.feed_many_stdins_and_close(keys_fp, writes)
-    run.wait(writes)
-    for type_, id_, data in keys:
-        run.wait(
-            mons.run(
-                args=[
-                    'sudo',
-                    'adjust-ulimits',
-                    'ceph-coverage',
-                    coverage_dir,
-                    'ceph-authtool',
-                    keyring_path,
-                    '--name={type}.{id}'.format(
-                        type=type_,
-                        id=id_,
-                        ),
-                    ] + list(teuthology.generate_caps(type_)),
-                wait=False,
-                ),
-            )
-
-    log.info('Running mkfs on mon nodes...')
-    for remote, roles_for_host in mons.remotes.iteritems():
-        for id_ in teuthology.roles_of_type(roles_for_host, 'mon'):
-            remote.run(
-                args=[
-                  'sudo',
-                  'mkdir',
-                  '-p',
-                  '/var/lib/ceph/mon/ceph-{id}'.format(id=id_),
-                  ],
-                )
-            remote.run(
-                args=[
-                    'sudo',
-                    'adjust-ulimits',
-                    'ceph-coverage',
-                    coverage_dir,
-                    'ceph-mon',
-                    '--mkfs',
-                    '-i', id_,
-                    '--monmap={tdir}/monmap'.format(tdir=testdir),
-                    '--osdmap={tdir}/osdmap'.format(tdir=testdir),
-                    '--keyring={kpath}'.format(kpath=keyring_path),
-                    ],
-                )
-
-
-    run.wait(
-        mons.run(
-            args=[
-                'rm',
-                '--',
-                '{tdir}/monmap'.format(tdir=testdir),
-                '{tdir}/osdmap'.format(tdir=testdir),
-                ],
-            wait=False,
-            ),
-        )
-
-    try:
-        yield
-    except Exception:
-        # we need to know this below
-        ctx.summary['success'] = False
-        raise
-    finally:
-        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
-
-        log.info('Checking cluster log for badness...')
-        def first_in_ceph_log(pattern, excludes):
-            """
-            Find the first occurence of the pattern specified in the Ceph log,
-            Returns None if none found.
-
-            :param pattern: Pattern scanned for.
-            :param excludes: Patterns to ignore.
-            :return: First line of text (or None if not found)
-            """
-            args = [
-                'sudo',
-                'egrep', pattern,
-                '/var/log/ceph/ceph.log',
-                ]
-            for exclude in excludes:
-                args.extend([run.Raw('|'), 'egrep', '-v', exclude])
-            args.extend([
-                    run.Raw('|'), 'head', '-n', '1',
-                    ])
-            r = mon0_remote.run(
-                stdout=StringIO(),
-                args=args,
-                )
-            stdout = r.stdout.getvalue()
-            if stdout != '':
-                return stdout
-            return None
-
-        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
-                             config['log_whitelist']) is not None:
-            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
-            ctx.summary['success'] = False
-            # use the most severe problem as the failure reason
-            if 'failure_reason' not in ctx.summary:
-                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
-                    match = first_in_ceph_log(pattern, config['log_whitelist'])
-                    if match is not None:
-                        ctx.summary['failure_reason'] = \
-                            '"{match}" in cluster log'.format(
-                            match=match.rstrip('\n'),
-                            )
-                        break
-
-        for remote, dirs in devs_to_clean.iteritems():
-            for dir_ in dirs:
-                log.info('Unmounting %s on %s' % (dir_, remote))
-                remote.run(
-                    args=[
-                        'sync',
-                        run.Raw('&&'),
-                        'sudo',
-                        'umount',
-                        '-f',
-                        dir_
-                        ]
-                    )
-
-        if config.get('tmpfs_journal'):
-            log.info('tmpfs journal enabled - unmounting tmpfs at /mnt')
-            for remote, roles_for_host in osds.remotes.iteritems():
-                remote.run(
-                    args=[ 'sudo', 'umount', '-f', '/mnt' ],
-                    check_status=False,
-                )
-
-        if ctx.archive is not None and \
-                not (ctx.config.get('archive-on-error') and ctx.summary['success']):
-            # archive mon data, too
-            log.info('Archiving mon data...')
-            path = os.path.join(ctx.archive, 'data')
-            os.makedirs(path)
-            for remote, roles in mons.remotes.iteritems():
-                for role in roles:
-                    if role.startswith('mon.'):
-                        teuthology.pull_directory_tarball(
-                            remote,
-                            '/var/lib/ceph/mon',
-                            path + '/' + role + '.tgz')
-
-            # and logs
-            log.info('Compressing logs...')
-            run.wait(
-                ctx.cluster.run(
-                    args=[
-                        'sudo',
-                        'find',
-                        '/var/log/ceph',
-                        '-name',
-                        '*.log',
-                        '-print0',
-                        run.Raw('|'),
-                        'sudo',
-                        'xargs',
-                        '-0',
-                        '--no-run-if-empty',
-                        '--',
-                        'gzip',
-                        '--',
-                        ],
-                    wait=False,
-                    ),
-                )
-
-            log.info('Archiving logs...')
-            path = os.path.join(ctx.archive, 'remote')
-            os.makedirs(path)
-            for remote in ctx.cluster.remotes.iterkeys():
-                sub = os.path.join(path, remote.shortname)
-                os.makedirs(sub)
-                teuthology.pull_directory(remote, '/var/log/ceph',
-                                          os.path.join(sub, 'log'))
-
-
-        log.info('Cleaning ceph cluster...')
-        run.wait(
-            ctx.cluster.run(
-                args=[
-                    'sudo',
-                    'rm',
-                    '-rf',
-                    '--',
-                    conf_path,
-                    keyring_path,
-                    '{tdir}/data'.format(tdir=testdir),
-                    '{tdir}/monmap'.format(tdir=testdir),
-                    ],
-                wait=False,
-                ),
-            )
-
-def get_all_pg_info(rem_site, testdir):
-    """
-    Get the results of a ceph pg dump
-    """
-    info = rem_site.run(args=[
-                        'adjust-ulimits',
-                        'ceph-coverage',
-                        '{tdir}/archive/coverage'.format(tdir=testdir),
-                        'ceph', 'pg', 'dump',
-                        '--format', 'json'], stdout=StringIO())
-    all_info = json.loads(info.stdout.getvalue())
-    return all_info['pg_stats']
-    
-def osd_scrub_pgs(ctx, config):
-    """
-    Scrub pgs when we exit.
-    
-    First make sure all pgs are active and clean.
-    Next scrub all osds.
-    Then periodically check until all pgs have scrub time stamps that
-    indicate the last scrub completed.  Time out if no progess is made
-    here after two minutes.
-    """
-    retries = 12
-    delays = 10
-    vlist = ctx.cluster.remotes.values()
-    testdir = teuthology.get_testdir(ctx)
-    rem_site = ctx.cluster.remotes.keys()[0]
-    all_clean = False
-    for _ in range(0, retries):
-       stats = get_all_pg_info(rem_site, testdir)
-        states = [stat['state'] for stat in stats]
-        if len(set(states)) == 1 and states[0] == 'active+clean':
-            all_clean = True
-            break
-        log.info("Waiting for all osds to be active and clean.")
-        time.sleep(delays)
-    if not all_clean:
-        log.info("Scrubbing terminated -- not all pgs were active and clean.")
-        return
-    check_time_now = time.localtime()
-    time.sleep(1)
-    for slists in vlist:
-        for role in slists:
-            if role.startswith('osd.'):
-                log.info("Scrubbing osd {osd}".format(osd=role))
-                rem_site.run(args=[
-                            'adjust-ulimits',
-                            'ceph-coverage',
-                            '{tdir}/archive/coverage'.format(tdir=testdir),
-                            'ceph', 'osd', 'scrub', role])
-    prev_good = 0
-    gap_cnt = 0
-    loop = True
-    while loop:
-       stats = get_all_pg_info(rem_site, testdir)
-        timez = [stat['last_scrub_stamp'] for stat in stats]
-        loop = False
-        thiscnt = 0
-        for tmval in timez:
-            pgtm = time.strptime(tmval[0:tmval.find('.')], '%Y-%m-%d %H:%M:%S')
-            if pgtm > check_time_now:
-                thiscnt += 1
-            else:
-                loop = True
-        if thiscnt > prev_good:
-            prev_good = thiscnt
-            gap_cnt = 0
-        else:
-            gap_cnt += 1
-            if gap_cnt > retries:
-                log.info('Exiting scrub checking -- not all pgs scrubbed.')
-                return
-        if loop:
-            log.info('Still waiting for all pgs to be scrubbed.')
-            time.sleep(delays)
-
-@contextlib.contextmanager
-def run_daemon(ctx, config, type_):
-    """
-    Run daemons for a role type.  Handle the startup and termination of a a daemon.
-    On startup -- set coverages, cpu_profile, valgrind values for all remotes,
-    and a max_mds value for one mds.
-    On cleanup -- Stop all existing daemons of this type.
-
-    :param ctx: Context
-    :param config: Configuration
-    :paran type_: Role type
-    """
-    log.info('Starting %s daemons...' % type_)
-    testdir = teuthology.get_testdir(ctx)
-    daemons = ctx.cluster.only(teuthology.is_type(type_))
-
-    # check whether any daemons if this type are configured
-    if daemons is None:
-        return
-    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
-
-    daemon_signal = 'kill'
-    if config.get('coverage') or config.get('valgrind') is not None:
-        daemon_signal = 'term'
-
-    num_active = 0
-    for remote, roles_for_host in daemons.remotes.iteritems():
-        for id_ in teuthology.roles_of_type(roles_for_host, type_):
-            name = '%s.%s' % (type_, id_)
-
-            if not (id_.endswith('-s')) and (id_.find('-s-') == -1):
-                num_active += 1
-
-            run_cmd = [
-                'sudo',
-                'adjust-ulimits',
-                'ceph-coverage',
-                coverage_dir,
-                'daemon-helper',
-                daemon_signal,
-                ]
-            run_cmd_tail = [
-                'ceph-%s' % (type_),
-                '-f',
-                '-i', id_]
-
-            if type_ in config.get('cpu_profile', []):
-                profile_path = '/var/log/ceph/profiling-logger/%s.%s.prof' % (type_, id_)
-                run_cmd.extend([ 'env', 'CPUPROFILE=%s' % profile_path ])
-
-            if config.get('valgrind') is not None:
-                valgrind_args = None
-                if type_ in config['valgrind']:
-                    valgrind_args = config['valgrind'][type_]
-                if name in config['valgrind']:
-                    valgrind_args = config['valgrind'][name]
-                run_cmd = teuthology.get_valgrind_args(testdir, name,
-                                                       run_cmd,
-                                                       valgrind_args)
-
-            run_cmd.extend(run_cmd_tail)
-
-            ctx.daemons.add_daemon(remote, type_, id_,
-                                   args=run_cmd,
-                                   logger=log.getChild(name),
-                                   stdin=run.PIPE,
-                                   wait=False,
-                                   )
-
-    if type_ == 'mds':
-        firstmon = teuthology.get_first_mon(ctx, config)
-        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
-
-        mon0_remote.run(args=[
-            'adjust-ulimits',
-            'ceph-coverage',
-            coverage_dir,
-            'ceph',
-            'mds', 'set_max_mds', str(num_active)])
-
-    try:
-        yield
-    finally:
-        teuthology.stop_daemons_of_type(ctx, type_)
-
-def healthy(ctx, config):
-    """
-    Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    log.info('Waiting until ceph is healthy...')
-    firstmon = teuthology.get_first_mon(ctx, config)
-    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
-    teuthology.wait_until_osds_up(
-        ctx,
-        cluster=ctx.cluster,
-        remote=mon0_remote
-        )
-    teuthology.wait_until_healthy(
-        ctx,
-        remote=mon0_remote,
-        )
-
-def wait_for_osds_up(ctx, config):
-    """
-    Wait for all osd's to come up.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    log.info('Waiting until ceph osds are all up...')
-    firstmon = teuthology.get_first_mon(ctx, config)
-    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
-    teuthology.wait_until_osds_up(
-        ctx,
-        cluster=ctx.cluster,
-        remote=mon0_remote
-        )
-
-def wait_for_mon_quorum(ctx, config):
-    """
-    Check renote ceph status until all monitors are up.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-
-    assert isinstance(config, list)
-    firstmon = teuthology.get_first_mon(ctx, config)
-    (remote,) = ctx.cluster.only(firstmon).remotes.keys()
-    while True:
-        r = remote.run(
-            args=[
-                'ceph',
-                'quorum_status',
-                ],
-            stdout=StringIO(),
-            logger=log.getChild('quorum_status'),
-            )
-        j = json.loads(r.stdout.getvalue())
-        q = j.get('quorum_names', [])
-        log.debug('Quorum: %s', q)
-        if sorted(q) == sorted(config):
-            break
-        time.sleep(1)
-
-
-@contextlib.contextmanager
-def restart(ctx, config):
-    """
-   restart ceph daemons
-
-   For example::
-      tasks:
-      - ceph.restart: [all]
-
-   For example::
-      tasks:
-      - ceph.restart: [osd.0, mon.1]
-
-   or::
-
-      tasks:
-      - ceph.restart:
-          daemons: [osd.0, mon.1]
-          wait-for-healthy: false
-          wait-for-osds-up: true
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    if config is None:
-        config = {}
-    if isinstance(config, list):
-        config = { 'daemons': config }
-    if 'daemons' not in config:
-        config['daemons'] = []
-        type_daemon = ['mon', 'osd', 'mds', 'rgw']
-        for d in type_daemon:
-            type_ = d
-            for daemon in ctx.daemons.iter_daemons_of_role(type_):
-                config['daemons'].append(type_ + '.' + daemon.id_)
-
-    assert isinstance(config['daemons'], list)
-    daemons = dict.fromkeys(config['daemons'])
-    for i in daemons.keys():
-        type_ = i.split('.', 1)[0]
-        id_ = i.split('.', 1)[1]
-        ctx.daemons.get_daemon(type_, id_).stop()
-        ctx.daemons.get_daemon(type_, id_).restart()
-
-    if config.get('wait-for-healthy', True):
-        healthy(ctx=ctx, config=None)
-    if config.get('wait-for-osds-up', False):
-        wait_for_osds_up(ctx=ctx, config=None)
-    yield
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Set up and tear down a Ceph cluster.
-
-    For example::
-
-        tasks:
-        - ceph:
-        - interactive:
-
-    You can also specify what branch to run::
-
-        tasks:
-        - ceph:
-            branch: foo
-
-    Or a tag::
-
-        tasks:
-        - ceph:
-            tag: v0.42.13
-
-    Or a sha1::
-
-        tasks:
-        - ceph:
-            sha1: 1376a5ab0c89780eab39ffbbe436f6a6092314ed
-
-    Or a local source dir::
-
-        tasks:
-        - ceph:
-            path: /home/sage/ceph
-
-    To capture code coverage data, use::
-
-        tasks:
-        - ceph:
-            coverage: true
-
-    To use btrfs, ext4, or xfs on the target's scratch disks, use::
-
-        tasks:
-        - ceph:
-            fs: xfs
-            mkfs_options: [-b,size=65536,-l,logdev=/dev/sdc1]
-            mount_options: [nobarrier, inode64]
-
-    Note, this will cause the task to check the /scratch_devs file on each node
-    for available devices.  If no such file is found, /dev/sdb will be used.
-
-    To run some daemons under valgrind, include their names
-    and the tool/args to use in a valgrind section::
-
-        tasks:
-        - ceph:
-          valgrind:
-            mds.1: --tool=memcheck
-            osd.1: [--tool=memcheck, --leak-check=no]
-
-    Those nodes which are using memcheck or valgrind will get
-    checked for bad results.
-
-    To adjust or modify config options, use::
-
-        tasks:
-        - ceph:
-            conf:
-              section:
-                key: value
-
-    For example::
-
-        tasks:
-        - ceph:
-            conf:
-              mds.0:
-                some option: value
-                other key: other value
-              client.0:
-                debug client: 10
-                debug ms: 1
-
-    By default, the cluster log is checked for errors and warnings,
-    and the run marked failed if any appear. You can ignore log
-    entries by giving a list of egrep compatible regexes, i.e.:
-
-        tasks:
-        - ceph:
-            log-whitelist: ['foo.*bar', 'bad message']
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        "task ceph only supports a dictionary for configuration"
-
-    overrides = ctx.config.get('overrides', {})
-    teuthology.deep_merge(config, overrides.get('ceph', {}))
-
-    ctx.daemons = CephState()
-
-    testdir = teuthology.get_testdir(ctx)
-    if config.get('coverage'):
-        coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
-        log.info('Creating coverage directory...')
-        run.wait(
-            ctx.cluster.run(
-                args=[
-                    'install', '-d', '-m0755', '--',
-                    coverage_dir,
-                    ],
-                wait=False,
-                )
-            )
-
-    with contextutil.nested(
-        lambda: ceph_log(ctx=ctx, config=None),
-        lambda: valgrind_post(ctx=ctx, config=config),
-        lambda: cluster(ctx=ctx, config=dict(
-                conf=config.get('conf', {}),
-                fs=config.get('fs', None),
-                mkfs_options=config.get('mkfs_options', None),
-                mount_options=config.get('mount_options',None),
-                block_journal=config.get('block_journal', None),
-                tmpfs_journal=config.get('tmpfs_journal', None),
-                log_whitelist=config.get('log-whitelist', []),
-                cpu_profile=set(config.get('cpu_profile', [])),
-                )),
-        lambda: run_daemon(ctx=ctx, config=config, type_='mon'),
-        lambda: run_daemon(ctx=ctx, config=config, type_='osd'),
-        lambda: run_daemon(ctx=ctx, config=config, type_='mds'),
-        ):
-        try:
-            if config.get('wait-for-healthy', True):
-                healthy(ctx=ctx, config=None)
-            yield
-        finally:
-            osd_scrub_pgs(ctx, config)
diff --git a/teuthology/task/ceph_client.py b/teuthology/task/ceph_client.py
deleted file mode 100644 (file)
index 8935fc8..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-Set up client keyring
-"""
-import logging
-
-from teuthology import misc as teuthology
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-def create_keyring(ctx):
-    """
-    Set up key ring on remote sites
-    """
-    log.info('Setting up client nodes...')
-    clients = ctx.cluster.only(teuthology.is_type('client'))
-    testdir = teuthology.get_testdir(ctx)
-    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
-    for remote, roles_for_host in clients.remotes.iteritems():
-        for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
-            client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
-            remote.run(
-                args=[
-                    'sudo',
-                    'adjust-ulimits',
-                    'ceph-coverage',
-                    coverage_dir,
-                    'ceph-authtool',
-                    '--create-keyring',
-                    '--gen-key',
-                    # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
-                    '--name=client.{id}'.format(id=id_),
-                    client_keyring,
-                    run.Raw('&&'),
-                    'sudo',
-                    'chmod',
-                    '0644',
-                    client_keyring,
-                    ],
-                )
diff --git a/teuthology/task/ceph_deploy.py b/teuthology/task/ceph_deploy.py
deleted file mode 100644 (file)
index 9964bab..0000000
+++ /dev/null
@@ -1,478 +0,0 @@
-"""
-Execute ceph-deploy as a task
-"""
-from cStringIO import StringIO
-
-import contextlib
-import os
-import time
-import logging
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from ..config import config as teuth_config
-import install as install_fn
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def download_ceph_deploy(ctx, config):
-    """
-    Downloads ceph-deploy from the ceph.com git mirror and (by default)
-    switches to the master branch. If the `ceph-deploy-branch` is specified, it
-    will use that instead.
-    """
-    log.info('Downloading ceph-deploy...')
-    testdir = teuthology.get_testdir(ctx)
-    ceph_admin = teuthology.get_first_mon(ctx, config)
-    default_cd_branch = {'ceph-deploy-branch': 'master'}
-    ceph_deploy_branch = config.get(
-        'ceph-deploy',
-        default_cd_branch).get('ceph-deploy-branch')
-
-    ctx.cluster.only(ceph_admin).run(
-        args=[
-            'git', 'clone', '-b', ceph_deploy_branch,
-            teuth_config.ceph_git_base_url + 'ceph-deploy.git',
-            '{tdir}/ceph-deploy'.format(tdir=testdir),
-            ],
-        )
-    ctx.cluster.only(ceph_admin).run(
-        args=[
-            'cd',
-            '{tdir}/ceph-deploy'.format(tdir=testdir),
-            run.Raw('&&'),
-            './bootstrap',
-            ],
-        )
-
-    try:
-        yield
-    finally:
-        log.info('Removing ceph-deploy ...')
-        ctx.cluster.only(ceph_admin).run(
-            args=[
-                'rm',
-                '-rf',
-                '{tdir}/ceph-deploy'.format(tdir=testdir),
-                ],
-            )
-
-
-def is_healthy(ctx, config):
-    """Wait until a Ceph cluster is healthy."""
-    testdir = teuthology.get_testdir(ctx)
-    ceph_admin = teuthology.get_first_mon(ctx, config)
-    (remote,) = ctx.cluster.only(ceph_admin).remotes.keys()
-    max_tries = 90  # 90 tries * 10 secs --> 15 minutes
-    tries = 0
-    while True:
-        tries += 1
-        if tries >= max_tries:
-            msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes"
-            raise RuntimeError(msg)
-
-        r = remote.run(
-            args=[
-                'cd',
-                '{tdir}'.format(tdir=testdir),
-                run.Raw('&&'),
-                'sudo', 'ceph',
-                'health',
-                ],
-            stdout=StringIO(),
-            logger=log.getChild('health'),
-            )
-        out = r.stdout.getvalue()
-        log.debug('Ceph health: %s', out.rstrip('\n'))
-        if out.split(None, 1)[0] == 'HEALTH_OK':
-            break
-        time.sleep(10)
-
-def get_nodes_using_roles(ctx, config, role):
-    """Extract the names of nodes that match a given role from a cluster"""
-    newl = []
-    for _remote, roles_for_host in ctx.cluster.remotes.iteritems():
-        for id_ in teuthology.roles_of_type(roles_for_host, role):
-            rem = _remote
-            if role == 'mon':
-                req1 = str(rem).split('@')[-1]
-            else:
-                req = str(rem).split('.')[0]
-                req1 = str(req).split('@')[1]
-            newl.append(req1)
-    return newl
-
-def get_dev_for_osd(ctx, config):
-    """Get a list of all osd device names."""
-    osd_devs = []
-    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
-        host = remote.name.split('@')[-1]
-        shortname = host.split('.')[0]
-        devs = teuthology.get_scratch_devices(remote)
-        num_osd_per_host = list(teuthology.roles_of_type(roles_for_host, 'osd'))
-        num_osds = len(num_osd_per_host)
-        assert num_osds <= len(devs), 'fewer disks than osds on ' + shortname
-        for dev in devs[:num_osds]:
-            dev_short = dev.split('/')[-1]
-            osd_devs.append('{host}:{dev}'.format(host=shortname, dev=dev_short))
-    return osd_devs
-
-def get_all_nodes(ctx, config):
-    """Return a string of node names separated by blanks"""
-    nodelist = []
-    for t, k in ctx.config['targets'].iteritems():
-        host = t.split('@')[-1]
-        simple_host = host.split('.')[0]
-        nodelist.append(simple_host)
-    nodelist = " ".join(nodelist)
-    return nodelist
-
-def execute_ceph_deploy(ctx, config, cmd):
-    """Remotely execute a ceph_deploy command"""
-    testdir = teuthology.get_testdir(ctx)
-    ceph_admin = teuthology.get_first_mon(ctx, config)
-    exec_cmd = cmd
-    (remote,) = ctx.cluster.only(ceph_admin).remotes.iterkeys()
-    proc = remote.run(
-        args = [
-            'cd',
-            '{tdir}/ceph-deploy'.format(tdir=testdir),
-            run.Raw('&&'),
-            run.Raw(exec_cmd),
-            ],
-            check_status=False,
-        )
-    exitstatus = proc.exitstatus
-    return exitstatus
-
-
-@contextlib.contextmanager
-def build_ceph_cluster(ctx, config):
-    """Build a ceph cluster"""
-
-    try:
-        log.info('Building ceph cluster using ceph-deploy...')
-        testdir = teuthology.get_testdir(ctx)
-        ceph_branch = None
-        if config.get('branch') is not None:
-            cbranch = config.get('branch')
-            for var, val in cbranch.iteritems():
-                if var == 'testing':
-                    ceph_branch = '--{var}'.format(var=var)
-                ceph_branch = '--{var}={val}'.format(var=var, val=val)
-        node_dev_list = []
-        all_nodes = get_all_nodes(ctx, config)
-        mds_nodes = get_nodes_using_roles(ctx, config, 'mds')
-        mds_nodes = " ".join(mds_nodes)
-        mon_node = get_nodes_using_roles(ctx, config, 'mon')
-        mon_nodes = " ".join(mon_node)
-        new_mon = './ceph-deploy new'+" "+mon_nodes
-        install_nodes = './ceph-deploy install '+ceph_branch+" "+all_nodes
-        purge_nodes = './ceph-deploy purge'+" "+all_nodes
-        purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes
-        mon_hostname = mon_nodes.split(' ')[0]
-        mon_hostname = str(mon_hostname)
-        gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname
-        deploy_mds = './ceph-deploy mds create'+" "+mds_nodes
-        no_of_osds = 0
-
-        if mon_nodes is None:
-            raise RuntimeError("no monitor nodes in the config file")
-
-        estatus_new = execute_ceph_deploy(ctx, config, new_mon)
-        if estatus_new != 0:
-            raise RuntimeError("ceph-deploy: new command failed")
-
-        log.info('adding config inputs...')
-        testdir = teuthology.get_testdir(ctx)
-        conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
-        first_mon = teuthology.get_first_mon(ctx, config)
-        (remote,) = ctx.cluster.only(first_mon).remotes.keys()
-
-        lines = None
-        if config.get('conf') is not None:
-            confp = config.get('conf')
-            for section, keys in confp.iteritems():
-                lines = '[{section}]\n'.format(section=section)
-                teuthology.append_lines_to_file(remote, conf_path, lines,
-                                                sudo=True)
-                for key, value in keys.iteritems():
-                    log.info("[%s] %s = %s" % (section, key, value))
-                    lines = '{key} = {value}\n'.format(key=key, value=value)
-                    teuthology.append_lines_to_file(remote, conf_path, lines,
-                                                    sudo=True)
-
-        estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
-        if estatus_install != 0:
-            raise RuntimeError("ceph-deploy: Failed to install ceph")
-
-        mon_no = None
-        mon_no = config.get('mon_initial_members')
-        if mon_no is not None:
-            i = 0
-            mon1 = []
-            while(i < mon_no):
-                mon1.append(mon_node[i])
-                i = i + 1
-            initial_mons = " ".join(mon1)
-            for k in range(mon_no, len(mon_node)):
-                mon_create_nodes = './ceph-deploy mon create' + " " + \
-                    initial_mons + " " + mon_node[k]
-                estatus_mon = execute_ceph_deploy(ctx, config,
-                                                  mon_create_nodes)
-                if estatus_mon != 0:
-                    raise RuntimeError("ceph-deploy: Failed to create monitor")
-        else:
-            mon_create_nodes = './ceph-deploy mon create-initial'
-            estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
-            if estatus_mon != 0:
-                raise RuntimeError("ceph-deploy: Failed to create monitors")
-
-        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
-        max_gather_tries = 90
-        gather_tries = 0
-        while (estatus_gather != 0):
-            gather_tries += 1
-            if gather_tries >= max_gather_tries:
-                msg = 'ceph-deploy was not able to gatherkeys after 15 minutes'
-                raise RuntimeError(msg)
-            estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
-            time.sleep(10)
-
-        if mds_nodes:
-            estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
-            if estatus_mds != 0:
-                raise RuntimeError("ceph-deploy: Failed to deploy mds")
-
-        if config.get('test_mon_destroy') is not None:
-            for d in range(1, len(mon_node)):
-                mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d]
-                estatus_mon_d = execute_ceph_deploy(ctx, config,
-                                                    mon_destroy_nodes)
-                if estatus_mon_d != 0:
-                    raise RuntimeError("ceph-deploy: Failed to delete monitor")
-
-        node_dev_list = get_dev_for_osd(ctx, config)
-        for d in node_dev_list:
-            osd_create_cmds = './ceph-deploy osd create --zap-disk'+" "+d
-            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
-            if estatus_osd == 0:
-                log.info('successfully created osd')
-                no_of_osds += 1
-            else:
-                zap_disk = './ceph-deploy disk zap'+" "+d
-                execute_ceph_deploy(ctx, config, zap_disk)
-                estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
-                if estatus_osd == 0:
-                    log.info('successfully created osd')
-                    no_of_osds += 1
-                else:
-                    raise RuntimeError("ceph-deploy: Failed to create osds")
-
-        if config.get('wait-for-healthy', True) and no_of_osds >= 2:
-            is_healthy(ctx=ctx, config=None)
-
-            log.info('Setting up client nodes...')
-            conf_path = '/etc/ceph/ceph.conf'
-            admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
-            first_mon = teuthology.get_first_mon(ctx, config)
-            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
-            conf_data = teuthology.get_file(
-                remote=mon0_remote,
-                path=conf_path,
-                sudo=True,
-                )
-            admin_keyring = teuthology.get_file(
-                remote=mon0_remote,
-                path=admin_keyring_path,
-                sudo=True,
-                )
-
-            clients = ctx.cluster.only(teuthology.is_type('client'))
-            for remot, roles_for_host in clients.remotes.iteritems():
-                for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
-                    client_keyring = \
-                        '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
-                    mon0_remote.run(
-                        args=[
-                            'cd',
-                            '{tdir}'.format(tdir=testdir),
-                            run.Raw('&&'),
-                            'sudo', 'bash', '-c',
-                            run.Raw('"'), 'ceph',
-                            'auth',
-                            'get-or-create',
-                            'client.{id}'.format(id=id_),
-                            'mds', 'allow',
-                            'mon', 'allow *',
-                            'osd', 'allow *',
-                            run.Raw('>'),
-                            client_keyring,
-                            run.Raw('"'),
-                            ],
-                        )
-                    key_data = teuthology.get_file(
-                        remote=mon0_remote,
-                        path=client_keyring,
-                        sudo=True,
-                        )
-                    teuthology.sudo_write_file(
-                        remote=remot,
-                        path=client_keyring,
-                        data=key_data,
-                        perms='0644'
-                    )
-                    teuthology.sudo_write_file(
-                        remote=remot,
-                        path=admin_keyring_path,
-                        data=admin_keyring,
-                        perms='0644'
-                    )
-                    teuthology.sudo_write_file(
-                        remote=remot,
-                        path=conf_path,
-                        data=conf_data,
-                        perms='0644'
-                    )
-        else:
-            raise RuntimeError(
-                "The cluster is NOT operational due to insufficient OSDs")
-        yield
-
-    finally:
-        log.info('Stopping ceph...')
-        ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
-                              'sudo', 'service', 'ceph', 'stop' ])
-
-        # Are you really not running anymore?
-        # try first with the init tooling
-        ctx.cluster.run(args=['sudo', 'status', 'ceph-all', run.Raw('||'),
-                              'sudo', 'service',  'status', 'ceph-all'])
-
-        # and now just check for the processes themselves, as if upstart/sysvinit
-        # is lying to us. Ignore errors if the grep fails
-        ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
-                              'grep', '-v', 'grep', run.Raw('|'),
-                              'grep', 'ceph'], check_status=False)
-
-        if ctx.archive is not None:
-            # archive mon data, too
-            log.info('Archiving mon data...')
-            path = os.path.join(ctx.archive, 'data')
-            os.makedirs(path)
-            mons = ctx.cluster.only(teuthology.is_type('mon'))
-            for remote, roles in mons.remotes.iteritems():
-                for role in roles:
-                    if role.startswith('mon.'):
-                        teuthology.pull_directory_tarball(
-                            remote,
-                            '/var/lib/ceph/mon',
-                            path + '/' + role + '.tgz')
-
-            log.info('Compressing logs...')
-            run.wait(
-                ctx.cluster.run(
-                    args=[
-                        'sudo',
-                        'find',
-                        '/var/log/ceph',
-                        '-name',
-                        '*.log',
-                        '-print0',
-                        run.Raw('|'),
-                        'sudo',
-                        'xargs',
-                        '-0',
-                        '--no-run-if-empty',
-                        '--',
-                        'gzip',
-                        '--',
-                        ],
-                    wait=False,
-                    ),
-                )
-
-            log.info('Archiving logs...')
-            path = os.path.join(ctx.archive, 'remote')
-            os.makedirs(path)
-            for remote in ctx.cluster.remotes.iterkeys():
-                sub = os.path.join(path, remote.shortname)
-                os.makedirs(sub)
-                teuthology.pull_directory(remote, '/var/log/ceph',
-                                          os.path.join(sub, 'log'))
-
-        # Prevent these from being undefined if the try block fails
-        all_nodes = get_all_nodes(ctx, config)
-        purge_nodes = './ceph-deploy purge'+" "+all_nodes
-        purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes
-
-        log.info('Purging package...')
-        execute_ceph_deploy(ctx, config, purge_nodes)
-        log.info('Purging data...')
-        execute_ceph_deploy(ctx, config, purgedata_nodes)
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Set up and tear down a Ceph cluster.
-
-    For example::
-
-        tasks:
-        - install:
-             extras: yes
-        - ssh_keys:
-        - ceph-deploy:
-             branch:
-                stable: bobtail
-             mon_initial_members: 1
-
-        tasks:
-        - install:
-             extras: yes
-        - ssh_keys:
-        - ceph-deploy:
-             branch:
-                dev: master
-             conf:
-                mon:
-                   debug mon = 20
-
-        tasks:
-        - install:
-             extras: yes
-        - ssh_keys:
-        - ceph-deploy:
-             branch:
-                testing:
-    """
-    if config is None:
-        config = {}
-
-    overrides = ctx.config.get('overrides', {})
-    teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
-
-    assert isinstance(config, dict), \
-        "task ceph-deploy only supports a dictionary for configuration"
-
-    overrides = ctx.config.get('overrides', {})
-    teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
-
-    if config.get('branch') is not None:
-        assert isinstance(config['branch'], dict), 'branch must be a dictionary'
-
-    with contextutil.nested(
-         lambda: install_fn.ship_utilities(ctx=ctx, config=None),
-         lambda: download_ceph_deploy(ctx=ctx, config=config),
-         lambda: build_ceph_cluster(ctx=ctx, config=dict(
-                 conf=config.get('conf', {}),
-                 branch=config.get('branch',{}),
-                 mon_initial_members=config.get('mon_initial_members', None),
-                 test_mon_destroy=config.get('test_mon_destroy', None),
-                 )),
-        ):
-        yield
diff --git a/teuthology/task/ceph_fuse.py b/teuthology/task/ceph_fuse.py
deleted file mode 100644 (file)
index ef3998b..0000000
+++ /dev/null
@@ -1,207 +0,0 @@
-"""
-Ceph FUSE client task
-"""
-import contextlib
-import logging
-import os
-import time
-from cStringIO import StringIO
-
-from teuthology import misc
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Mount/unmount a ``ceph-fuse`` client.
-
-    The config is optional and defaults to mounting on all clients. If
-    a config is given, it is expected to be a list of clients to do
-    this operation on. This lets you e.g. set up one client with
-    ``ceph-fuse`` and another with ``kclient``.
-
-    Example that mounts all clients::
-
-        tasks:
-        - ceph:
-        - ceph-fuse:
-        - interactive:
-
-    Example that uses both ``kclient` and ``ceph-fuse``::
-
-        tasks:
-        - ceph:
-        - ceph-fuse: [client.0]
-        - kclient: [client.1]
-        - interactive:
-
-    Example that enables valgrind:
-
-        tasks:
-        - ceph:
-        - ceph-fuse:
-            client.0:
-              valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
-        - interactive:
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    log.info('Mounting ceph-fuse clients...')
-    fuse_daemons = {}
-
-    testdir = misc.get_testdir(ctx)
-
-    if config is None:
-        config = dict(('client.{id}'.format(id=id_), None)
-                  for id_ in misc.all_roles_of_type(ctx.cluster, 'client'))
-    elif isinstance(config, list):
-        config = dict((name, None) for name in config)
-
-    overrides = ctx.config.get('overrides', {})
-    misc.deep_merge(config, overrides.get('ceph-fuse', {}))
-
-    clients = list(misc.get_clients(ctx=ctx, roles=config.keys()))
-
-    for id_, remote in clients:
-        client_config = config.get("client.%s" % id_)
-        if client_config is None:
-            client_config = {}
-        log.info("Client client.%s config is %s" % (id_, client_config))
-
-        daemon_signal = 'kill'
-        if client_config.get('coverage') or client_config.get('valgrind') is not None:
-            daemon_signal = 'term'
-
-        mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-        log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format(
-                id=id_, remote=remote,mnt=mnt))
-
-        remote.run(
-            args=[
-                'mkdir',
-                '--',
-                mnt,
-                ],
-            )
-
-        run_cmd=[
-            'sudo',
-            'adjust-ulimits',
-            'ceph-coverage',
-            '{tdir}/archive/coverage'.format(tdir=testdir),
-            'daemon-helper',
-            daemon_signal,
-            ]
-        run_cmd_tail=[
-            'ceph-fuse',
-            '-f',
-            '--name', 'client.{id}'.format(id=id_),
-            # TODO ceph-fuse doesn't understand dash dash '--',
-            mnt,
-            ]
-
-        if client_config.get('valgrind') is not None:
-            run_cmd = misc.get_valgrind_args(
-                testdir,
-                'client.{id}'.format(id=id_),
-                run_cmd,
-                client_config.get('valgrind'),
-                )
-
-        run_cmd.extend(run_cmd_tail)
-
-        proc = remote.run(
-            args=run_cmd,
-            logger=log.getChild('ceph-fuse.{id}'.format(id=id_)),
-            stdin=run.PIPE,
-            wait=False,
-            )
-        fuse_daemons[id_] = proc
-
-    for id_, remote in clients:
-        mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-        wait_until_fuse_mounted(
-            remote=remote,
-            fuse=fuse_daemons[id_],
-            mountpoint=mnt,
-            )
-        remote.run(args=['sudo', 'chmod', '1777', '{tdir}/mnt.{id}'.format(tdir=testdir, id=id_)],)
-
-    try:
-        yield
-    finally:
-        log.info('Unmounting ceph-fuse clients...')
-        for id_, remote in clients:
-            mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-            try:
-              remote.run(
-                  args=[
-                      'sudo',
-                      'fusermount',
-                      '-u',
-                      mnt,
-                      ],
-                  )
-            except run.CommandFailedError:
-              log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=remote.name))
-              # abort the fuse mount, killing all hung processes
-              remote.run(
-                  args=[
-                      'if', 'test', '-e', '/sys/fs/fuse/connections/*/abort',
-                      run.Raw(';'), 'then',
-                      'echo',
-                      '1',
-                      run.Raw('>'),
-                      run.Raw('/sys/fs/fuse/connections/*/abort'),
-                      run.Raw(';'), 'fi',
-                      ],
-                 )
-              # make sure its unmounted
-              remote.run(
-                  args=[
-                      'sudo',
-                      'umount',
-                      '-l',
-                      '-f',
-                      mnt,
-                      ],
-                  )
-
-        run.wait(fuse_daemons.itervalues())
-
-        for id_, remote in clients:
-            mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-            remote.run(
-                args=[
-                    'rmdir',
-                    '--',
-                    mnt,
-                    ],
-                )
-
-
-def wait_until_fuse_mounted(remote, fuse, mountpoint):
-    while True:
-        proc = remote.run(
-            args=[
-                'stat',
-                '--file-system',
-                '--printf=%T\n',
-                '--',
-                mountpoint,
-                ],
-            stdout=StringIO(),
-            )
-        fstype = proc.stdout.getvalue().rstrip('\n')
-        if fstype == 'fuseblk':
-            break
-        log.debug('ceph-fuse not yet mounted, got fs type {fstype!r}'.format(fstype=fstype))
-
-        # it shouldn't have exited yet; exposes some trivial problems
-        assert not fuse.exitstatus.ready()
-
-        time.sleep(5)
-    log.info('ceph-fuse is mounted on %s', mountpoint)
diff --git a/teuthology/task/ceph_manager.py b/teuthology/task/ceph_manager.py
deleted file mode 100644 (file)
index 39d2466..0000000
+++ /dev/null
@@ -1,1421 +0,0 @@
-"""
-ceph manager -- Thrasher and CephManager objects
-"""
-from cStringIO import StringIO
-import random
-import time
-import gevent
-import json
-import threading
-from teuthology import misc as teuthology
-from teuthology.task import ceph as ceph_task
-from teuthology.task.scrub import Scrubber
-
-class Thrasher:
-    """
-    Object used to thrash Ceph
-    """
-    def __init__(self, manager, config, logger=None):
-        self.ceph_manager = manager
-        self.ceph_manager.wait_for_clean()
-        osd_status = self.ceph_manager.get_osd_status()
-        self.in_osds = osd_status['in']
-        self.live_osds = osd_status['live']
-        self.out_osds = osd_status['out']
-        self.dead_osds = osd_status['dead']
-        self.stopping = False
-        self.logger = logger
-        self.config = config
-        self.revive_timeout = self.config.get("revive_timeout", 75)
-        if self.config.get('powercycle'):
-            self.revive_timeout += 120
-        self.clean_wait = self.config.get('clean_wait', 0)
-        self.minin = self.config.get("min_in", 3)
-
-        num_osds = self.in_osds + self.out_osds
-        self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * num_osds
-        if self.logger is not None:
-            self.log = lambda x: self.logger.info(x)
-        else:
-            def tmp(x):
-                """
-                Implement log behavior
-                """
-                print x
-            self.log = tmp
-        if self.config is None:
-            self.config = dict()
-        # prevent monitor from auto-marking things out while thrasher runs
-        # try both old and new tell syntax, in case we are testing old code
-        try:
-            manager.raw_cluster_cmd('--', 'tell', 'mon.*', 'injectargs',
-                                    '--mon-osd-down-out-interval 0')
-        except Exception:
-            manager.raw_cluster_cmd('--', 'mon', 'tell', '*', 'injectargs',
-                                    '--mon-osd-down-out-interval 0')
-        self.thread = gevent.spawn(self.do_thrash)
-
-    def kill_osd(self, osd=None, mark_down=False, mark_out=False):
-        """
-        :param osd: Osd to be killed.
-        :mark_down: Mark down if true.
-        :mark_out: Mark out if true.
-        """
-        if osd is None:
-            osd = random.choice(self.live_osds)
-        self.log("Killing osd %s, live_osds are %s" % (str(osd), str(self.live_osds)))
-        self.live_osds.remove(osd)
-        self.dead_osds.append(osd)
-        self.ceph_manager.kill_osd(osd)
-        if mark_down:
-            self.ceph_manager.mark_down_osd(osd)
-        if mark_out and osd in self.in_osds:
-            self.out_osd(osd)
-
-    def blackhole_kill_osd(self, osd=None):
-        """
-        If all else fails, kill the osd.
-        :param osd: Osd to be killed.
-        """
-        if osd is None:
-            osd = random.choice(self.live_osds)
-        self.log("Blackholing and then killing osd %s, live_osds are %s" % (str(osd), str(self.live_osds)))
-        self.live_osds.remove(osd)
-        self.dead_osds.append(osd)
-        self.ceph_manager.blackhole_kill_osd(osd)
-
-    def revive_osd(self, osd=None):
-        """
-        Revive the osd.
-        :param osd: Osd to be revived.
-        """
-        if osd is None:
-            osd = random.choice(self.dead_osds)
-        self.log("Reviving osd %s" % (str(osd),))
-        self.live_osds.append(osd)
-        self.dead_osds.remove(osd)
-        self.ceph_manager.revive_osd(osd, self.revive_timeout)
-
-    def out_osd(self, osd=None):
-        """
-        Mark the osd out
-        :param osd: Osd to be marked.
-        """
-        if osd is None:
-            osd = random.choice(self.in_osds)
-        self.log("Removing osd %s, in_osds are: %s" % (str(osd), str(self.in_osds)))
-        self.ceph_manager.mark_out_osd(osd)
-        self.in_osds.remove(osd)
-        self.out_osds.append(osd)
-
-    def in_osd(self, osd=None):
-        """
-        Mark the osd out
-        :param osd: Osd to be marked.
-        """
-        if osd is None:
-            osd = random.choice(self.out_osds)
-        if osd in self.dead_osds:
-            return self.revive_osd(osd)
-        self.log("Adding osd %s" % (str(osd),))
-        self.out_osds.remove(osd)
-        self.in_osds.append(osd)
-        self.ceph_manager.mark_in_osd(osd)
-        self.log("Added osd %s"%(str(osd),))
-
-    def reweight_osd(self, osd=None):
-        """
-        Reweight an osd that is in
-        :param osd: Osd to be marked.
-        """
-        if osd is None:
-            osd = random.choice(self.in_osds)
-        val = random.uniform(.1, 1.0)
-        self.log("Reweighting osd %s to %s" % (str(osd), str(val)))
-        self.ceph_manager.raw_cluster_cmd('osd', 'reweight', str(osd), str(val))
-
-    def primary_affinity(self, osd=None):
-        if osd is None:
-            osd = random.choice(self.in_osds)
-        if random.random() >= .5:
-            pa = random.random()
-        elif random.random() >= .5:
-            pa = 1
-        else:
-            pa = 0
-        self.log('Setting osd %s primary_affinity to %f' % (str(osd), pa))
-        self.ceph_manager.raw_cluster_cmd('osd', 'primary-affinity', str(osd), str(pa))
-
-    def all_up(self):
-        """
-        Make sure all osds are up and not out.
-        """
-        while len(self.dead_osds) > 0:
-            self.log("reviving osd")
-            self.revive_osd()
-        while len(self.out_osds) > 0:
-            self.log("inning osd")
-            self.in_osd()
-
-    def do_join(self):
-        """
-        Break out of this Ceph loop
-        """
-        self.stopping = True
-        self.thread.get()
-
-    def grow_pool(self):
-        """
-        Increase the size of the pool
-        """
-        pool = self.ceph_manager.get_pool()
-        self.log("Growing pool %s"%(pool,))
-        self.ceph_manager.expand_pool(pool, self.config.get('pool_grow_by', 10), self.max_pgs)
-
-    def fix_pgp_num(self):
-        """
-        Fix number of pgs in pool.
-        """
-        pool = self.ceph_manager.get_pool()
-        self.log("fixing pg num pool %s"%(pool,))
-        self.ceph_manager.set_pool_pgpnum(pool)
-
-    def test_pool_min_size(self):
-        """
-        Kill and revive all osds except one.
-        """
-        self.log("test_pool_min_size")
-        self.all_up()
-        self.ceph_manager.wait_for_recovery(
-            timeout=self.config.get('timeout')
-            )
-        the_one = random.choice(self.in_osds)
-        self.log("Killing everyone but %s", the_one)
-        to_kill = filter(lambda x: x != the_one, self.in_osds)
-        [self.kill_osd(i) for i in to_kill]
-        [self.out_osd(i) for i in to_kill]
-        time.sleep(self.config.get("test_pool_min_size_time", 10))
-        self.log("Killing %s" % (the_one,))
-        self.kill_osd(the_one)
-        self.out_osd(the_one)
-        self.log("Reviving everyone but %s" % (the_one,))
-        [self.revive_osd(i) for i in to_kill]
-        [self.in_osd(i) for i in to_kill]
-        self.log("Revived everyone but %s" % (the_one,))
-        self.log("Waiting for clean")
-        self.ceph_manager.wait_for_recovery(
-            timeout=self.config.get('timeout')
-            )
-
-    def inject_pause(self, conf_key, duration, check_after, should_be_down):
-        """
-        Pause injection testing. Check for osd being down when finished.
-        """
-        the_one = random.choice(self.live_osds)
-        self.log("inject_pause on {osd}".format(osd = the_one))
-        self.log(
-            "Testing {key} pause injection for duration {duration}".format(
-                key = conf_key,
-                duration = duration
-                ))
-        self.log(
-            "Checking after {after}, should_be_down={shouldbedown}".format(
-                after = check_after,
-                shouldbedown = should_be_down
-                ))
-        self.ceph_manager.set_config(the_one, **{conf_key:duration})
-        if not should_be_down:
-            return
-        time.sleep(check_after)
-        status = self.ceph_manager.get_osd_status()
-        assert the_one in status['down']
-        time.sleep(duration - check_after + 20)
-        status = self.ceph_manager.get_osd_status()
-        assert not the_one in status['down']
-
-    def test_backfill_full(self):
-        """
-        Test backfills stopping when the replica fills up.
-
-        First, use osd_backfill_full_ratio to simulate a now full
-        osd by setting it to 0 on all of the OSDs.
-
-        Second, on a random subset, set
-        osd_debug_skip_full_check_in_backfill_reservation to force
-        the more complicated check in do_scan to be exercised.
-
-        Then, verify that all backfills stop.
-        """
-        self.log("injecting osd_backfill_full_ratio = 0")
-        for i in self.live_osds:
-            self.ceph_manager.set_config(
-                i,
-                osd_debug_skip_full_check_in_backfill_reservation = random.choice(
-                    ['false', 'true']),
-                osd_backfill_full_ratio = 0)
-        for i in range(30):
-            status = self.ceph_manager.compile_pg_status()
-            if 'backfill' not in status.keys():
-                break
-            self.log(
-                "waiting for {still_going} backfills".format(
-                    still_going=status.get('backfill')))
-            time.sleep(1)
-        assert('backfill' not in self.ceph_manager.compile_pg_status().keys())
-        for i in self.live_osds:
-            self.ceph_manager.set_config(
-                i,
-                osd_debug_skip_full_check_in_backfill_reservation = \
-                    'false',
-                osd_backfill_full_ratio = 0.85)
-
-    def test_map_discontinuity(self):
-        """
-        1) Allows the osds to recover
-        2) kills an osd
-        3) allows the remaining osds to recover
-        4) waits for some time
-        5) revives the osd
-        This sequence should cause the revived osd to have to handle
-        a map gap since the mons would have trimmed
-        """
-        while len(self.in_osds) < (self.minin + 1):
-            self.in_osd()
-        self.log("Waiting for recovery")
-        self.ceph_manager.wait_for_all_up(
-            timeout=self.config.get('timeout')
-            )
-        # now we wait 20s for the pg status to change, if it takes longer,
-        # the test *should* fail!
-        time.sleep(20)
-        self.ceph_manager.wait_for_clean(
-            timeout=self.config.get('timeout')
-            )
-
-        # now we wait 20s for the backfill replicas to hear about the clean
-        time.sleep(20)
-        self.log("Recovered, killing an osd")
-        self.kill_osd(mark_down=True, mark_out=True)
-        self.log("Waiting for clean again")
-        self.ceph_manager.wait_for_clean(
-            timeout=self.config.get('timeout')
-            )
-        self.log("Waiting for trim")
-        time.sleep(int(self.config.get("map_discontinuity_sleep_time", 40)))
-        self.revive_osd()
-
-    def choose_action(self):
-        """
-        Random action selector.
-        """
-        chance_down = self.config.get('chance_down', 0.4)
-        chance_test_min_size = self.config.get('chance_test_min_size', 0)
-        chance_test_backfill_full = self.config.get('chance_test_backfill_full', 0)
-        if isinstance(chance_down, int):
-            chance_down = float(chance_down) / 100
-        minin = self.minin
-        minout = self.config.get("min_out", 0)
-        minlive = self.config.get("min_live", 2)
-        mindead = self.config.get("min_dead", 0)
-
-        self.log('choose_action: min_in %d min_out %d min_live %d min_dead %d' %
-                 (minin, minout, minlive, mindead))
-        actions = []
-        if len(self.in_osds) > minin:
-            actions.append((self.out_osd, 1.0,))
-        if len(self.live_osds) > minlive and chance_down > 0:
-            actions.append((self.kill_osd, chance_down,))
-        if len(self.out_osds) > minout:
-            actions.append((self.in_osd, 1.7,))
-        if len(self.dead_osds) > mindead:
-            actions.append((self.revive_osd, 1.0,))
-        if self.config.get('thrash_primary_affinity', True):
-            actions.append((self.primary_affinity, 1.0,))
-        actions.append((self.reweight_osd, self.config.get('reweight_osd',.5),))
-        actions.append((self.grow_pool, self.config.get('chance_pgnum_grow', 0),))
-        actions.append((self.fix_pgp_num, self.config.get('chance_pgpnum_fix', 0),))
-        actions.append((self.test_pool_min_size, chance_test_min_size,))
-        actions.append((self.test_backfill_full, chance_test_backfill_full,))
-        for key in ['heartbeat_inject_failure', 'filestore_inject_stall']:
-            for scenario in [
-                (lambda: self.inject_pause(key,
-                                           self.config.get('pause_short', 3),
-                                           0,
-                                           False),
-                 self.config.get('chance_inject_pause_short', 1),),
-                (lambda: self.inject_pause(key,
-                                           self.config.get('pause_long', 80),
-                                           self.config.get('pause_check_after', 70),
-                                           True),
-                 self.config.get('chance_inject_pause_long', 0),)]:
-                actions.append(scenario)
-
-        total = sum([y for (x, y) in actions])
-        val = random.uniform(0, total)
-        for (action, prob) in actions:
-            if val < prob:
-                return action
-            val -= prob
-        return None
-
-    def do_thrash(self):
-        """
-        Loop to select random actions to thrash ceph manager with.
-        """
-        cleanint = self.config.get("clean_interval", 60)
-        scrubint = self.config.get("scrub_interval", -1)
-        maxdead = self.config.get("max_dead", 0)
-        delay = self.config.get("op_delay", 5)
-        self.log("starting do_thrash")
-        while not self.stopping:
-            self.log(" ".join([str(x) for x in ["in_osds: ", self.in_osds, " out_osds: ", self.out_osds,
-                                                "dead_osds: ", self.dead_osds, "live_osds: ",
-                                                self.live_osds]]))
-            if random.uniform(0, 1) < (float(delay) / cleanint):
-                while len(self.dead_osds) > maxdead:
-                    self.revive_osd()
-                for osd in self.in_osds:
-                    self.ceph_manager.raw_cluster_cmd('osd', 'reweight',
-                                                      str(osd), str(1))
-                if random.uniform(0, 1) < float(
-                    self.config.get('chance_test_map_discontinuity', 0)):
-                    self.test_map_discontinuity()
-                else:
-                    self.ceph_manager.wait_for_recovery(
-                        timeout=self.config.get('timeout')
-                        )
-                time.sleep(self.clean_wait)
-                if scrubint > 0:
-                    if random.uniform(0, 1) < (float(delay) / scrubint):
-                        self.log('Scrubbing while thrashing being performed')
-                        Scrubber(self.ceph_manager, self.config)
-            self.choose_action()()
-            time.sleep(delay)
-        self.all_up()
-
-class CephManager:
-    """
-    Ceph manager object.
-    Contains several local functions that form a bulk of this module.
-    """
-    def __init__(self, controller, ctx=None, config=None, logger=None):
-        self.lock = threading.RLock()
-        self.ctx = ctx
-        self.config = config
-        self.controller = controller
-        self.next_pool_id = 0
-        self.created_erasure_pool = False
-        if (logger):
-            self.log = lambda x: logger.info(x)
-        else:
-            def tmp(x):
-                """
-                implement log behavior.
-                """
-                print x
-            self.log = tmp
-        if self.config is None:
-            self.config = dict()
-        pools = self.list_pools()
-        self.pools = {}
-        for pool in pools:
-            self.pools[pool] = self.get_pool_property(pool, 'pg_num')
-
-    def raw_cluster_cmd(self, *args):
-        """
-        Start ceph on a raw cluster.  Return count
-        """
-        testdir = teuthology.get_testdir(self.ctx)
-        ceph_args = [
-                'adjust-ulimits',
-                'ceph-coverage',
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                'ceph',
-                ]
-        ceph_args.extend(args)
-        proc = self.controller.run(
-            args=ceph_args,
-            stdout=StringIO(),
-            )
-        return proc.stdout.getvalue()
-
-    def raw_cluster_cmd_result(self, *args):
-        """
-        Start ceph on a cluster.  Return success or failure information.
-        """
-        testdir = teuthology.get_testdir(self.ctx)
-        ceph_args = [
-                'adjust-ulimits',
-                'ceph-coverage',
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                'ceph',
-                ]
-        ceph_args.extend(args)
-        proc = self.controller.run(
-            args=ceph_args,
-            check_status=False,
-            )
-        return proc.exitstatus
-
-    def do_rados(self, remote, cmd):
-        """
-        Execute a remote rados command.
-        """
-        testdir = teuthology.get_testdir(self.ctx)
-        pre = [
-            'adjust-ulimits',
-            'ceph-coverage',
-            '{tdir}/archive/coverage'.format(tdir=testdir),
-            'rados',
-            ]
-        pre.extend(cmd)
-        proc = remote.run(
-            args=pre,
-            wait=True,
-            )
-        return proc
-
-    def rados_write_objects(
-        self, pool, num_objects, size, timelimit, threads, cleanup=False):
-        """
-        Write rados objects
-        Threads not used yet.
-        """
-        args = [
-            '-p', pool,
-            '--num-objects', num_objects,
-            '-b', size,
-            'bench', timelimit,
-            'write'
-            ]
-        if not cleanup: args.append('--no-cleanup')
-        return self.do_rados(self.controller, map(str, args))
-
-    def do_put(self, pool, obj, fname):
-        """
-        Implement rados put operation
-        """
-        return self.do_rados(
-            self.controller,
-            [
-                '-p',
-                pool,
-                'put',
-                obj,
-                fname
-                ]
-            )
-
-    def do_get(self, pool, obj, fname='/dev/null'):
-        """
-        Implement rados get operation
-        """
-        return self.do_rados(
-            self.controller,
-            [
-                '-p',
-                pool,
-                'stat',
-                obj,
-                fname
-                ]
-            )
-
-    def osd_admin_socket(self, osdnum, command, check_status=True):
-        """
-        Remotely start up ceph specifying the admin socket
-        """
-        testdir = teuthology.get_testdir(self.ctx)
-        remote = None
-        for _remote, roles_for_host in self.ctx.cluster.remotes.iteritems():
-            for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
-                if int(id_) == int(osdnum):
-                    remote = _remote
-        assert remote is not None
-        args = [
-            'sudo',
-            'adjust-ulimits',
-            'ceph-coverage',
-            '{tdir}/archive/coverage'.format(tdir=testdir),
-            'ceph',
-            '--admin-daemon',
-            '/var/run/ceph/ceph-osd.{id}.asok'.format(id=osdnum),
-            ]
-        args.extend(command)
-        return remote.run(
-            args=args,
-            stdout=StringIO(),
-            wait=True,
-            check_status=check_status
-            )
-
-    def get_pgid(self, pool, pgnum):
-        """
-        :param pool: pool number
-        :param pgnum: pg number
-        :returns: a string representing this pg.
-        """
-        poolnum = self.get_pool_num(pool)
-        pg_str = "{poolnum}.{pgnum}".format(
-            poolnum=poolnum,
-            pgnum=pgnum)
-        return pg_str
-
-    def get_pg_replica(self, pool, pgnum):
-        """
-        get replica for pool, pgnum (e.g. (data, 0)->0
-        """
-        output = self.raw_cluster_cmd("pg", "dump", '--format=json')
-        j = json.loads('\n'.join(output.split('\n')[1:]))
-        pg_str = self.get_pgid(pool, pgnum)
-        for pg in j['pg_stats']:
-            if pg['pgid'] == pg_str:
-                return int(pg['acting'][-1])
-        assert False
-
-    def get_pg_primary(self, pool, pgnum):
-        """
-        get primary for pool, pgnum (e.g. (data, 0)->0
-        """
-        output = self.raw_cluster_cmd("pg", "dump", '--format=json')
-        j = json.loads('\n'.join(output.split('\n')[1:]))
-        pg_str = self.get_pgid(pool, pgnum)
-        for pg in j['pg_stats']:
-            if pg['pgid'] == pg_str:
-                return int(pg['acting'][0])
-        assert False
-
-    def get_pool_num(self, pool):
-        """
-        get number for pool (e.g., data -> 2)
-        """
-        out = self.raw_cluster_cmd('osd', 'dump', '--format=json')
-        j = json.loads('\n'.join(out.split('\n')[1:]))
-        for i in j['pools']:
-            if i['pool_name'] == pool:
-                return int(i['pool'])
-        assert False
-
-    def list_pools(self):
-        """
-        list all pool names
-        """
-        out = self.raw_cluster_cmd('osd', 'dump', '--format=json')
-        j = json.loads('\n'.join(out.split('\n')[1:]))
-        self.log(j['pools'])
-        return [str(i['pool_name']) for i in j['pools']]
-
-    def clear_pools(self):
-        """
-        remove all pools
-        """
-        [self.remove_pool(i) for i in self.list_pools()]
-
-    def kick_recovery_wq(self, osdnum):
-        """
-        Run kick_recovery_wq on cluster.
-        """
-        return self.raw_cluster_cmd(
-            'tell', "osd.%d" % (int(osdnum),),
-            'debug',
-            'kick_recovery_wq',
-            '0')
-
-    def wait_run_admin_socket(self, osdnum, args=['version'], timeout=75):
-        """
-        If osd_admin_socket call suceeds, return.  Otherwise wait
-        five seconds and try again.
-        """
-        tries = 0
-        while True:
-            proc = self.osd_admin_socket(
-                osdnum, args,
-                check_status=False)
-            if proc.exitstatus is 0:
-                break
-            else:
-                tries += 1
-                if (tries * 5) > timeout:
-                    raise Exception('timed out waiting for admin_socket to appear after osd.{o} restart'.format(o=osdnum))
-                self.log(
-                    "waiting on admin_socket for {osdnum}, {command}".format(
-                        osdnum=osdnum,
-                        command=args))
-                time.sleep(5)
-
-    def set_config(self, osdnum, **argdict):
-        """
-        :param osdnum: osd number
-        :param argdict: dictionary containing values to set.
-        """
-        for k, v in argdict.iteritems():
-            self.wait_run_admin_socket(
-                osdnum,
-                ['config', 'set', str(k), str(v)])
-
-    def raw_cluster_status(self):
-        """
-        Get status from cluster
-        """
-        status = self.raw_cluster_cmd('status', '--format=json-pretty')
-        return json.loads(status)
-
-    def raw_osd_status(self):
-        """
-        Get osd status from cluster
-        """
-        return self.raw_cluster_cmd('osd', 'dump')
-
-    def get_osd_status(self):
-        """
-        Get osd statuses sorted by states that the osds are in.
-        """
-        osd_lines = filter(
-            lambda x: x.startswith('osd.') and (("up" in x) or ("down" in x)),
-            self.raw_osd_status().split('\n'))
-        self.log(osd_lines)
-        in_osds = [int(i[4:].split()[0]) for i in filter(
-                lambda x: " in " in x,
-                osd_lines)]
-        out_osds = [int(i[4:].split()[0]) for i in filter(
-                lambda x: " out " in x,
-                osd_lines)]
-        up_osds = [int(i[4:].split()[0]) for i in filter(
-                lambda x: " up " in x,
-                osd_lines)]
-        down_osds = [int(i[4:].split()[0]) for i in filter(
-                lambda x: " down " in x,
-                osd_lines)]
-        dead_osds = [int(x.id_) for x in
-                     filter(lambda x: not x.running(), self.ctx.daemons.iter_daemons_of_role('osd'))]
-        live_osds = [int(x.id_) for x in
-                     filter(lambda x: x.running(), self.ctx.daemons.iter_daemons_of_role('osd'))]
-        return { 'in' : in_osds, 'out' : out_osds, 'up' : up_osds,
-                 'down' : down_osds, 'dead' : dead_osds, 'live' : live_osds,
-                 'raw' : osd_lines}
-
-    def get_num_pgs(self):
-        """
-        Check cluster status for the number of pgs
-        """
-        status = self.raw_cluster_status()
-        self.log(status)
-        return status['pgmap']['num_pgs']
-
-    def create_pool_with_unique_name(self, pg_num=16, ec_pool=False, ec_m=1, ec_k=2):
-        """
-        Create a pool named unique_pool_X where X is unique.
-        """
-        name = ""
-        with self.lock:
-            name = "unique_pool_%s" % (str(self.next_pool_id),)
-            self.next_pool_id += 1
-            self.create_pool(
-                name,
-                pg_num,
-                ec_pool=ec_pool,
-                ec_m=ec_m,
-                ec_k=ec_k)
-        return name
-
-    def create_pool(self, pool_name, pg_num=16, ec_pool=False, ec_m=1, ec_k=2):
-        """
-        Create a pool named from the pool_name parameter.
-        :param pool_name: name of the pool being created.
-        :param pg_num: initial number of pgs.
-        """
-        with self.lock:
-            assert isinstance(pool_name, str)
-            assert isinstance(pg_num, int)
-            assert pool_name not in self.pools
-            self.log("creating pool_name %s"%(pool_name,))
-            if ec_pool and not self.created_erasure_pool:
-                self.created_erasure_pool = True
-                self.raw_cluster_cmd('osd', 'erasure-code-profile', 'set', 'teuthologyprofile', 'ruleset-failure-domain=osd', 'm='+str(ec_m), 'k='+str(ec_k))
-
-            if ec_pool:
-                self.raw_cluster_cmd('osd', 'pool', 'create', pool_name, str(pg_num), str(pg_num), 'erasure', 'teuthologyprofile')
-            else:
-                self.raw_cluster_cmd('osd', 'pool', 'create', pool_name, str(pg_num))
-            self.pools[pool_name] = pg_num
-
-    def remove_pool(self, pool_name):
-        """
-        Remove the indicated pool
-        :param pool_name: Pool to be removed
-        """
-        with self.lock:
-            assert isinstance(pool_name, str)
-            assert pool_name in self.pools
-            self.log("removing pool_name %s" % (pool_name,))
-            del self.pools[pool_name]
-            self.do_rados(
-                self.controller,
-                ['rmpool', pool_name, pool_name, "--yes-i-really-really-mean-it"]
-                )
-
-    def get_pool(self):
-        """
-        Pick a random pool
-        """
-        with self.lock:
-            return random.choice(self.pools.keys())
-
-    def get_pool_pg_num(self, pool_name):
-        """
-        Return the number of pgs in the pool specified.
-        """
-        with self.lock:
-            assert isinstance(pool_name, str)
-            if pool_name in self.pools:
-                return self.pools[pool_name]
-            return 0
-
-    def get_pool_property(self, pool_name, prop):
-        """
-        :param pool_name: pool
-        :param prop: property to be checked.
-        :returns: property as an int value.
-        """
-        with self.lock:
-            assert isinstance(pool_name, str)
-            assert isinstance(prop, str)
-            output = self.raw_cluster_cmd(
-                'osd',
-                'pool',
-                'get',
-                pool_name,
-                prop)
-            return int(output.split()[1])
-
-    def set_pool_property(self, pool_name, prop, val):
-        """
-        :param pool_name: pool
-        :param prop: property to be set.
-        :param val: value to set.
-
-        This routine retries if set operation fails.
-        """
-        with self.lock:
-            assert isinstance(pool_name, str)
-            assert isinstance(prop, str)
-            assert isinstance(val, int)
-            tries = 0
-            while True:
-                r = self.raw_cluster_cmd_result(
-                    'osd',
-                    'pool',
-                    'set',
-                    pool_name,
-                    prop,
-                    str(val))
-                if r != 11: # EAGAIN
-                    break
-                tries += 1
-                if tries > 50:
-                    raise Exception('timed out getting EAGAIN when setting pool property %s %s = %s' % (pool_name, prop, val))
-                self.log('got EAGAIN setting pool property, waiting a few seconds...')
-                time.sleep(2)
-
-    def expand_pool(self, pool_name, by, max_pgs):
-        """
-        Increase the number of pgs in a pool
-        """
-        with self.lock:
-            assert isinstance(pool_name, str)
-            assert isinstance(by, int)
-            assert pool_name in self.pools
-            if self.get_num_creating() > 0:
-                return
-            if (self.pools[pool_name] + by) > max_pgs:
-                return
-            self.log("increase pool size by %d"%(by,))
-            new_pg_num = self.pools[pool_name] + by
-            self.set_pool_property(pool_name, "pg_num", new_pg_num)
-            self.pools[pool_name] = new_pg_num
-
-    def set_pool_pgpnum(self, pool_name):
-        """
-        Set pgpnum property of pool_name pool.
-        """
-        with self.lock:
-            assert isinstance(pool_name, str)
-            assert pool_name in self.pools
-            if self.get_num_creating() > 0:
-                return
-            self.set_pool_property(pool_name, 'pgp_num', self.pools[pool_name])
-
-    def list_pg_missing(self, pgid):
-        """
-        return list of missing pgs with the id specified
-        """
-        r = None
-        offset = {}
-        while True:
-            out = self.raw_cluster_cmd('--', 'pg', pgid, 'list_missing',
-                                       json.dumps(offset))
-            j = json.loads(out)
-            if r is None:
-                r = j
-            else:
-                r['objects'].extend(j['objects'])
-            if not 'more' in j:
-                break
-            if j['more'] == 0:
-                break
-            offset = j['objects'][-1]['oid']
-        if 'more' in r:
-            del r['more']
-        return r
-
-    def get_pg_stats(self):
-        """
-        Dump the cluster and get pg stats
-        """
-        out = self.raw_cluster_cmd('pg', 'dump', '--format=json')
-        j = json.loads('\n'.join(out.split('\n')[1:]))
-        return j['pg_stats']
-
-    def compile_pg_status(self):
-        """
-        Return a histogram of pg state values
-        """
-        ret = {}
-        j = self.get_pg_stats()
-        for pg in j:
-            for status in pg['state'].split('+'):
-                if status not in ret:
-                    ret[status] = 0
-                ret[status] += 1
-        return ret
-
-    def pg_scrubbing(self, pool, pgnum):
-        """
-        pg scrubbing wrapper
-        """
-        pgstr = self.get_pgid(pool, pgnum)
-        stats = self.get_single_pg_stats(pgstr)
-        return 'scrub' in stats['state']
-
-    def pg_repairing(self, pool, pgnum):
-        """
-        pg repairing wrapper
-        """
-        pgstr = self.get_pgid(pool, pgnum)
-        stats = self.get_single_pg_stats(pgstr)
-        return 'repair' in stats['state']
-
-    def pg_inconsistent(self, pool, pgnum):
-        """
-        pg inconsistent wrapper
-        """
-        pgstr = self.get_pgid(pool, pgnum)
-        stats = self.get_single_pg_stats(pgstr)
-        return 'inconsistent' in stats['state']
-
-    def get_last_scrub_stamp(self, pool, pgnum):
-        """
-        Get the timestamp of the last scrub.
-        """
-        stats = self.get_single_pg_stats(self.get_pgid(pool, pgnum))
-        return stats["last_scrub_stamp"]
-
-    def do_pg_scrub(self, pool, pgnum, stype):
-        """
-        Scrub pg and wait for scrubbing to finish
-        """
-        init = self.get_last_scrub_stamp(pool, pgnum)
-        self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum))
-        while init == self.get_last_scrub_stamp(pool, pgnum):
-            self.log("waiting for scrub type %s"%(stype,))
-            time.sleep(10)
-
-    def get_single_pg_stats(self, pgid):
-        """
-        Return pg for the pgid specified.
-        """
-        all_stats = self.get_pg_stats()
-
-        for pg in all_stats:
-            if pg['pgid'] == pgid:
-                return pg
-
-        return None
-
-    def get_osd_dump(self):
-        """
-        Dump osds
-        :returns: all osds
-        """
-        out = self.raw_cluster_cmd('osd', 'dump', '--format=json')
-        j = json.loads('\n'.join(out.split('\n')[1:]))
-        return j['osds']
-
-    def get_stuck_pgs(self, type_, threshold):
-        """
-        :returns: stuck pg information from the cluster
-        """
-        out = self.raw_cluster_cmd('pg', 'dump_stuck', type_, str(threshold),
-                                   '--format=json')
-        return json.loads(out)
-
-    def get_num_unfound_objects(self):
-        """
-        Check cluster status to get the number of unfound objects
-        """
-        status = self.raw_cluster_status()
-        self.log(status)
-        return status['pgmap'].get('unfound_objects', 0)
-
-    def get_num_creating(self):
-        """
-        Find the number of pgs in creating mode.
-        """
-        pgs = self.get_pg_stats()
-        num = 0
-        for pg in pgs:
-            if 'creating' in pg['state']:
-                num += 1
-        return num
-
-    def get_num_active_clean(self):
-        """
-        Find the number of active and clean pgs.
-        """
-        pgs = self.get_pg_stats()
-        num = 0
-        for pg in pgs:
-            if pg['state'].count('active') and pg['state'].count('clean') and not pg['state'].count('stale'):
-                num += 1
-        return num
-
-    def get_num_active_recovered(self):
-        """
-        Find the number of active and recovered pgs.
-        """
-        pgs = self.get_pg_stats()
-        num = 0
-        for pg in pgs:
-            if pg['state'].count('active') and not pg['state'].count('recover') and not pg['state'].count('backfill') and not pg['state'].count('stale'):
-                num += 1
-        return num
-
-    def get_is_making_recovery_progress(self):
-        """
-        Return whether there is recovery progress discernable in the
-        raw cluster status
-        """
-        status = self.raw_cluster_status()
-        kps = status['pgmap'].get('recovering_keys_per_sec', 0)
-        bps = status['pgmap'].get('recovering_bytes_per_sec', 0)
-        ops = status['pgmap'].get('recovering_objects_per_sec', 0)
-        return kps > 0 or bps > 0 or ops > 0
-
-    def get_num_active(self):
-        """
-        Find the number of active pgs.
-        """
-        pgs = self.get_pg_stats()
-        num = 0
-        for pg in pgs:
-            if pg['state'].count('active') and not pg['state'].count('stale'):
-                num += 1
-        return num
-
-    def get_num_down(self):
-        """
-        Find the number of pgs that are down.
-        """
-        pgs = self.get_pg_stats()
-        num = 0
-        for pg in pgs:
-            if (pg['state'].count('down') and not pg['state'].count('stale')) or \
-                    (pg['state'].count('incomplete') and not pg['state'].count('stale')):
-                num += 1
-        return num
-
-    def get_num_active_down(self):
-        """
-        Find the number of pgs that are either active or down.
-        """
-        pgs = self.get_pg_stats()
-        num = 0
-        for pg in pgs:
-            if (pg['state'].count('active') and not pg['state'].count('stale')) or \
-                    (pg['state'].count('down') and not pg['state'].count('stale')) or \
-                    (pg['state'].count('incomplete') and not pg['state'].count('stale')):
-                num += 1
-        return num
-
-    def is_clean(self):
-        """
-        True if all pgs are clean
-        """
-        return self.get_num_active_clean() == self.get_num_pgs()
-
-    def is_recovered(self):
-        """
-        True if all pgs have recovered
-        """
-        return self.get_num_active_recovered() == self.get_num_pgs()
-
-    def is_active_or_down(self):
-        """
-        True if all pgs are active or down
-        """
-        return self.get_num_active_down() == self.get_num_pgs()
-
-    def wait_for_clean(self, timeout=None):
-        """
-        Returns trues when all pgs are clean.
-        """
-        self.log("waiting for clean")
-        start = time.time()
-        num_active_clean = self.get_num_active_clean()
-        while not self.is_clean():
-            if timeout is not None:
-                if self.get_is_making_recovery_progress():
-                    self.log("making progress, resetting timeout")
-                    start = time.time()
-                else:
-                    self.log("no progress seen, keeping timeout for now")
-                    assert time.time() - start < timeout, \
-                        'failed to become clean before timeout expired'
-            cur_active_clean = self.get_num_active_clean()
-            if cur_active_clean != num_active_clean:
-                start = time.time()
-                num_active_clean = cur_active_clean
-            time.sleep(3)
-        self.log("clean!")
-
-    def are_all_osds_up(self):
-        """
-        Returns true if all osds are up.
-        """
-        x = self.get_osd_dump()
-        return (len(x) == \
-                    sum([(y['up'] > 0) for y in x]))
-
-    def wait_for_all_up(self, timeout=None):
-        """
-        When this exits, either the timeout has expired, or all
-        osds are up.
-        """
-        self.log("waiting for all up")
-        start = time.time()
-        while not self.are_all_osds_up():
-            if timeout is not None:
-                assert time.time() - start < timeout, \
-                    'timeout expired in wait_for_all_up'
-            time.sleep(3)
-        self.log("all up!")
-
-    def wait_for_recovery(self, timeout=None):
-        """
-        Check peering. When this exists, we have recovered.
-        """
-        self.log("waiting for recovery to complete")
-        start = time.time()
-        num_active_recovered = self.get_num_active_recovered()
-        while not self.is_recovered():
-            if timeout is not None:
-                assert time.time() - start < timeout, \
-                    'failed to recover before timeout expired'
-            cur_active_recovered = self.get_num_active_recovered()
-            if cur_active_recovered != num_active_recovered:
-                start = time.time()
-                num_active_recovered = cur_active_recovered
-            time.sleep(3)
-        self.log("recovered!")
-
-    def wait_for_active(self, timeout=None):
-        """
-        Check peering. When this exists, we are definitely active
-        """
-        self.log("waiting for peering to complete")
-        start = time.time()
-        num_active = self.get_num_active()
-        while not self.is_active():
-            if timeout is not None:
-                assert time.time() - start < timeout, \
-                    'failed to recover before timeout expired'
-            cur_active = self.get_num_active()
-            if cur_active != num_active:
-                start = time.time()
-                num_active = cur_active
-            time.sleep(3)
-        self.log("active!")
-
-    def wait_for_active_or_down(self, timeout=None):
-        """
-        Check peering. When this exists, we are definitely either
-        active or down
-        """
-        self.log("waiting for peering to complete or become blocked")
-        start = time.time()
-        num_active_down = self.get_num_active_down()
-        while not self.is_active_or_down():
-            if timeout is not None:
-                assert time.time() - start < timeout, \
-                    'failed to recover before timeout expired'
-            cur_active_down = self.get_num_active_down()
-            if cur_active_down != num_active_down:
-                start = time.time()
-                num_active_down = cur_active_down
-            time.sleep(3)
-        self.log("active or down!")
-
-    def osd_is_up(self, osd):
-        """
-        Wrapper for osd check
-        """
-        osds = self.get_osd_dump()
-        return osds[osd]['up'] > 0
-
-    def wait_till_osd_is_up(self, osd, timeout=None):
-        """
-        Loop waiting for osd.
-        """
-        self.log('waiting for osd.%d to be up' % osd)
-        start = time.time()
-        while not self.osd_is_up(osd):
-            if timeout is not None:
-                assert time.time() - start < timeout, \
-                    'osd.%d failed to come up before timeout expired' % osd
-            time.sleep(3)
-        self.log('osd.%d is up' % osd)
-
-    def is_active(self):
-        """
-        Wrapper to check if active
-        """
-        return self.get_num_active() == self.get_num_pgs()
-
-    def wait_till_active(self, timeout=None):
-        """
-        Wait until osds are active.
-        """
-        self.log("waiting till active")
-        start = time.time()
-        while not self.is_active():
-            if timeout is not None:
-                assert time.time() - start < timeout, \
-                    'failed to become active before timeout expired'
-            time.sleep(3)
-        self.log("active!")
-
-    def mark_out_osd(self, osd):
-        """
-        Wrapper to mark osd out.
-        """
-        self.raw_cluster_cmd('osd', 'out', str(osd))
-
-    def kill_osd(self, osd):
-        """
-        Kill osds by either power cycling (if indicated by the config)
-        or by stopping.
-        """
-        if self.config.get('powercycle'):
-            (remote,) = self.ctx.cluster.only('osd.{o}'.format(o=osd)).remotes.iterkeys()
-            self.log('kill_osd on osd.{o} doing powercycle of {s}'.format(o=osd, s=remote.name))
-            assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized.  Check ipmi config."
-            remote.console.power_off()
-        else:
-            self.ctx.daemons.get_daemon('osd', osd).stop()
-
-    def blackhole_kill_osd(self, osd):
-        """
-        Stop osd if nothing else works.
-        """
-        self.raw_cluster_cmd('--', 'tell', 'osd.%d' % osd,
-                             'injectargs', '--filestore-blackhole')
-        time.sleep(2)
-        self.ctx.daemons.get_daemon('osd', osd).stop()
-
-    def revive_osd(self, osd, timeout=150):
-        """
-        Revive osds by either power cycling (if indicated by the config)
-        or by restarting.
-        """
-        if self.config.get('powercycle'):
-            (remote,) = self.ctx.cluster.only('osd.{o}'.format(o=osd)).remotes.iterkeys()
-            self.log('kill_osd on osd.{o} doing powercycle of {s}'.format(o=osd, s=remote.name))
-            assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized.  Check ipmi config."
-            remote.console.power_on()
-            if not remote.console.check_status(300):
-                raise Exception('Failed to revive osd.{o} via ipmi'.format(o=osd))
-            teuthology.reconnect(self.ctx, 60, [remote])
-            ceph_task.mount_osd_data(self.ctx, remote, str(osd))
-            ceph_task.make_admin_daemon_dir(self.ctx, remote)
-            self.ctx.daemons.get_daemon('osd', osd).reset()
-        self.ctx.daemons.get_daemon('osd', osd).restart()
-        # wait for dump_ops_in_flight; this command doesn't appear
-        # until after the signal handler is installed and it is safe
-        # to stop the osd again without making valgrind leak checks
-        # unhappy.  see #5924.
-        self.wait_run_admin_socket(osd,
-                                   args=['dump_ops_in_flight'],
-                                   timeout=timeout)
-
-    def mark_down_osd(self, osd):
-        """
-        Cluster command wrapper
-        """
-        self.raw_cluster_cmd('osd', 'down', str(osd))
-
-    def mark_in_osd(self, osd):
-        """
-        Cluster command wrapper
-        """
-        self.raw_cluster_cmd('osd', 'in', str(osd))
-
-
-    ## monitors
-
-    def signal_mon(self, mon, sig):
-        """
-        Wrapper to local get_deamon call
-        """
-        self.ctx.daemons.get_daemon('mon', mon).signal(sig)
-
-    def kill_mon(self, mon):
-        """
-        Kill the monitor by either power cycling (if the config says so),
-        or by doing a stop.
-        """
-        if self.config.get('powercycle'):
-            (remote,) = self.ctx.cluster.only('mon.{m}'.format(m=mon)).remotes.iterkeys()
-            self.log('kill_mon on mon.{m} doing powercycle of {s}'.format(m=mon, s=remote.name))
-            assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized.  Check ipmi config."
-            remote.console.power_off()
-        else:
-            self.ctx.daemons.get_daemon('mon', mon).stop()
-
-    def revive_mon(self, mon):
-        """
-        Restart by either power cycling (if the config says so),
-        or by doing a normal restart.
-        """
-        if self.config.get('powercycle'):
-            (remote,) = self.ctx.cluster.only('mon.{m}'.format(m=mon)).remotes.iterkeys()
-            self.log('revive_mon on mon.{m} doing powercycle of {s}'.format(m=mon, s=remote.name))
-            assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized.  Check ipmi config."
-            remote.console.power_on()
-            ceph_task.make_admin_daemon_dir(self.ctx, remote)
-        self.ctx.daemons.get_daemon('mon', mon).restart()
-
-    def get_mon_status(self, mon):
-        """
-        Extract all the monitor status information from the cluster
-        """
-        addr = self.ctx.ceph.conf['mon.%s' % mon]['mon addr']
-        out = self.raw_cluster_cmd('-m', addr, 'mon_status')
-        return json.loads(out)
-
-    def get_mon_quorum(self):
-        """
-        Extract monitor quorum information from the cluster
-        """
-        out = self.raw_cluster_cmd('quorum_status')
-        j = json.loads(out)
-        self.log('quorum_status is %s' % out)
-        return j['quorum']
-
-    def wait_for_mon_quorum_size(self, size, timeout=300):
-        """
-        Loop until quorum size is reached.
-        """
-        self.log('waiting for quorum size %d' % size)
-        start = time.time()
-        while not len(self.get_mon_quorum()) == size:
-            if timeout is not None:
-                assert time.time() - start < timeout, \
-                    'failed to reach quorum size %d before timeout expired' % size
-            time.sleep(3)
-        self.log("quorum is size %d" % size)
-
-    def get_mon_health(self, debug=False):
-        """
-        Extract all the monitor health information.
-        """
-        out = self.raw_cluster_cmd('health', '--format=json')
-        if debug:
-            self.log('health:\n{h}'.format(h=out))
-        return json.loads(out)
-
-    ## metadata servers
-
-    def kill_mds(self, mds):
-        """
-        Powercyle if set in config, otherwise just stop.
-        """
-        if self.config.get('powercycle'):
-            (remote,) = self.ctx.cluster.only('mds.{m}'.format(m=mds)).remotes.iterkeys()
-            self.log('kill_mds on mds.{m} doing powercycle of {s}'.format(m=mds, s=remote.name))
-            assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized.  Check ipmi config."
-            remote.console.power_off()
-        else:
-            self.ctx.daemons.get_daemon('mds', mds).stop()
-
-    def kill_mds_by_rank(self, rank):
-        """
-        kill_mds wrapper to kill based on rank passed.
-        """
-        status = self.get_mds_status_by_rank(rank)
-        self.kill_mds(status['name'])
-
-    def revive_mds(self, mds, standby_for_rank=None):
-        """
-        Revive mds -- do an ipmpi powercycle (if indicated by the config)
-        and then restart (using --hot-standby if specified.
-        """
-        if self.config.get('powercycle'):
-            (remote,) = self.ctx.cluster.only('mds.{m}'.format(m=mds)).remotes.iterkeys()
-            self.log('revive_mds on mds.{m} doing powercycle of {s}'.format(m=mds, s=remote.name))
-            assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized.  Check ipmi config."
-            remote.console.power_on()
-            ceph_task.make_admin_daemon_dir(self.ctx, remote)
-        args = []
-        if standby_for_rank:
-            args.extend(['--hot-standby', standby_for_rank])
-        self.ctx.daemons.get_daemon('mds', mds).restart(*args)
-
-    def revive_mds_by_rank(self, rank, standby_for_rank=None):
-        """
-        revive_mds wrapper to revive based on rank passed.
-        """
-        status = self.get_mds_status_by_rank(rank)
-        self.revive_mds(status['name'], standby_for_rank)
-
-    def get_mds_status(self, mds):
-        """
-        Run cluster commands for the mds in order to get mds information
-        """
-        out = self.raw_cluster_cmd('mds', 'dump', '--format=json')
-        j = json.loads(' '.join(out.splitlines()[1:]))
-        # collate; for dup ids, larger gid wins.
-        for info in j['info'].itervalues():
-            if info['name'] == mds:
-                return info
-        return None
-
-    def get_mds_status_by_rank(self, rank):
-        """
-        Run cluster commands for the mds in order to get mds information
-        check rank.
-        """
-        out = self.raw_cluster_cmd('mds', 'dump', '--format=json')
-        j = json.loads(' '.join(out.splitlines()[1:]))
-        # collate; for dup ids, larger gid wins.
-        for info in j['info'].itervalues():
-            if info['rank'] == rank:
-                return info
-        return None
-
-    def get_mds_status_all(self):
-        """
-        Run cluster command to extract all the mds status.
-        """
-        out = self.raw_cluster_cmd('mds', 'dump', '--format=json')
-        j = json.loads(' '.join(out.splitlines()[1:]))
-        return j
diff --git a/teuthology/task/chef.py b/teuthology/task/chef.py
deleted file mode 100644 (file)
index db793c3..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-"""
-Chef-solo task
-"""
-import logging
-
-from ..orchestra import run
-from .. import misc
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Run chef-solo on all nodes.
-    """
-    log.info('Running chef-solo...')
-
-    run.wait(
-        ctx.cluster.run(
-            args=[
-                'wget',
-#                '-q',
-                '-O-',
-#                'https://raw.github.com/ceph/ceph-qa-chef/master/solo/solo-from-scratch',
-                'http://ceph.com/git/?p=ceph-qa-chef.git;a=blob_plain;f=solo/solo-from-scratch;hb=HEAD',
-                run.Raw('|'),
-                'sh',
-                '-x',
-                ],
-            wait=False,
-            )
-        )
-
-    log.info('Reconnecting after ceph-qa-chef run')
-    misc.reconnect(ctx, 10)     #Reconnect for ulimit and other ceph-qa-chef changes
-
diff --git a/teuthology/task/cifs_mount.py b/teuthology/task/cifs_mount.py
deleted file mode 100644 (file)
index ac58f31..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-"""
-Mount cifs clients.  Unmount when finished.
-"""
-import contextlib
-import logging
-import os
-
-from teuthology import misc as teuthology
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Mount/unmount a cifs client.
-
-    The config is optional and defaults to mounting on all clients. If
-    a config is given, it is expected to be a list of clients to do
-    this operation on.
-
-    Example that starts smbd and mounts cifs on all nodes::
-
-        tasks:
-        - ceph:
-        - samba:
-        - cifs-mount:
-        - interactive:
-
-    Example that splits smbd and cifs:
-
-        tasks:
-        - ceph:
-        - samba: [samba.0]
-        - cifs-mount: [client.0]
-        - ceph-fuse: [client.1]
-        - interactive:
-
-    Example that specifies the share name:
-
-        tasks:
-        - ceph:
-        - ceph-fuse:
-        - samba:
-            samba.0:
-                cephfuse: "{testdir}/mnt.0"
-        - cifs-mount:
-            client.0:
-                share: cephfuse
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    log.info('Mounting cifs clients...')
-
-    if config is None:
-        config = dict(('client.{id}'.format(id=id_), None)
-                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client'))
-    elif isinstance(config, list):
-        config = dict((name, None) for name in config)
-
-    clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys()))
-
-    from teuthology.task.samba import get_sambas
-    samba_roles = ['samba.{id_}'.format(id_=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')]
-    sambas = list(get_sambas(ctx=ctx, roles=samba_roles))
-    (ip, _) = sambas[0][1].ssh.get_transport().getpeername()
-    log.info('samba ip: {ip}'.format(ip=ip))
-
-    for id_, remote in clients:
-        mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_))
-        log.info('Mounting cifs client.{id} at {remote} {mnt}...'.format(
-                id=id_, remote=remote,mnt=mnt))
-
-        remote.run(
-            args=[
-                'mkdir',
-                '--',
-                mnt,
-                ],
-            )
-
-        rolestr = 'client.{id_}'.format(id_=id_)
-        unc = "ceph"
-        log.info("config: {c}".format(c=config))
-        if config[rolestr] is not None and 'share' in config[rolestr]:
-            unc = config[rolestr]['share']
-
-        remote.run(
-            args=[
-                'sudo',
-                'mount',
-                '-t',
-                'cifs',
-                '//{sambaip}/{unc}'.format(sambaip=ip, unc=unc),
-                '-o',
-                'username=ubuntu,password=ubuntu',
-                mnt,
-                ],
-            )
-
-        remote.run(
-            args=[
-                'sudo',
-                'chown',
-                'ubuntu:ubuntu',
-                '{m}/'.format(m=mnt),
-                ],
-            )
-
-    try:
-        yield
-    finally:
-        log.info('Unmounting cifs clients...')
-        for id_, remote in clients:
-            remote.run(
-                args=[
-                    'sudo',
-                    'umount',
-                    mnt,
-                    ],
-                )
-        for id_, remote in clients:
-            while True:
-                try:
-                    remote.run(
-                        args=[
-                            'rmdir', '--', mnt,
-                            run.Raw('2>&1'),
-                            run.Raw('|'),
-                            'grep', 'Device or resource busy',
-                            ],
-                        )
-                    import time
-                    time.sleep(1)
-                except Exception:
-                    break
diff --git a/teuthology/task/clock.py b/teuthology/task/clock.py
deleted file mode 100644 (file)
index d7a26c4..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-"""
-Clock synchronizer 
-"""
-import logging
-import contextlib
-
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Sync or skew clock
-
-    This will initially sync the clocks.  Eventually it should let us also
-    skew by some number of seconds.
-
-    example:
-
-    tasks:
-    - clock:
-    - ceph:
-    - interactive:
-
-    to sync.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-
-    log.info('Syncing clocks and checking initial clock skew...')
-    for rem in ctx.cluster.remotes.iterkeys():
-        rem.run(
-            args=[
-                'sudo',
-                'service', 'ntp', 'stop',
-                run.Raw(';'),
-                'sudo',
-                'ntpdate',
-#                'clock1.dreamhost.com',
-#                'clock2.dreamhost.com',
-#                'clock3.dreamhost.com',
-#                'time.apple.com',
-                '0.debian.pool.ntp.org',
-                '1.debian.pool.ntp.org',
-                '2.debian.pool.ntp.org',
-                '3.debian.pool.ntp.org',
-                run.Raw(';'),
-                'sudo',
-                'service', 'ntp', 'start',
-                run.Raw(';'),
-                'PATH=/usr/bin:/usr/sbin',
-                'ntpdc', '-p',
-                ],
-            logger=log.getChild(rem.name),
-        )
-
-    try:
-        yield
-
-    finally:
-        log.info('Checking final clock skew...')
-        for rem in ctx.cluster.remotes.iterkeys():
-            rem.run(
-                args=[
-                    'PATH=/usr/bin:/usr/sbin',
-                    'ntpdc', '-p',
-                    ],
-                logger=log.getChild(rem.name),
-                )
-
-
-@contextlib.contextmanager
-def check(ctx, config):
-    """
-    Run ntpdc at the start and the end of the task.
-   
-    :param ctx: Context
-    :param config: Configuration
-    """
-    log.info('Checking initial clock skew...')
-    for rem in ctx.cluster.remotes.iterkeys():
-        rem.run(
-            args=[
-                'PATH=/usr/bin:/usr/sbin',
-                'ntpdc', '-p',
-                ],
-            logger=log.getChild(rem.name),
-            )
-
-    try:
-        yield
-
-    finally:
-        log.info('Checking final clock skew...')
-        for rem in ctx.cluster.remotes.iterkeys():
-            rem.run(
-                args=[
-                    'PATH=/usr/bin:/usr/sbin',
-                    'ntpdc', '-p',
-                    ],
-                logger=log.getChild(rem.name),
-                )
diff --git a/teuthology/task/common_fs_utils.py b/teuthology/task/common_fs_utils.py
deleted file mode 100644 (file)
index b963e98..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-"""
-Common filesystem related utilities.  Originally this
-code was part of rbd.py.  It was broken out so that it
-could be used by other modules (tgt.py and iscsi.py for instance).
-"""
-import logging
-import contextlib
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-
-def default_image_name(role):
-    """
-    Image name used by rbd and iscsi
-    """
-    return 'testimage.{role}'.format(role=role)
-
-
-@contextlib.contextmanager
-def generic_mkfs(ctx, config, devname_rtn):
-    """
-    Create a filesystem (either rbd or tgt, depending on devname_rtn)
-
-    Rbd for example, now makes the following calls:
-        - rbd.create_image: [client.0]
-        - rbd.modprobe: [client.0]
-        - rbd.dev_create: [client.0]
-        - common_fs_utils.generic_mkfs: [client.0]
-        - common_fs_utils.generic_mount:
-            client.0: testimage.client.0
-    """
-    assert isinstance(config, list) or isinstance(config, dict), \
-        "task mkfs must be configured with a list or dictionary"
-    if isinstance(config, dict):
-        images = config.items()
-    else:
-        images = [(role, None) for role in config]
-
-    for role, properties in images:
-        if properties is None:
-            properties = {}
-        (remote,) = ctx.cluster.only(role).remotes.keys()
-        image = properties.get('image_name', default_image_name(role))
-        fs_type = properties.get('fs_type', 'ext3')
-        remote.run(
-            args=[
-                'sudo',
-                'mkfs',
-                '-t', fs_type,
-                devname_rtn(ctx, image),
-                ],
-            )
-    yield
-
-
-@contextlib.contextmanager
-def generic_mount(ctx, config, devname_rtn):
-    """
-    Generic Mount an rbd or tgt image.
-
-    Rbd for example, now makes the following calls:
-        - rbd.create_image: [client.0]
-        - rbd.modprobe: [client.0]
-        - rbd.dev_create: [client.0]
-        - common_fs_utils.generic_mkfs: [client.0]
-        - common_fs_utils.generic_mount:
-            client.0: testimage.client.0
-    """
-    assert isinstance(config, list) or isinstance(config, dict), \
-        "task mount must be configured with a list or dictionary"
-    if isinstance(config, dict):
-        role_images = config.items()
-    else:
-        role_images = [(role, None) for role in config]
-
-    def strip_client_prefix(role):
-        """
-        Extract the number from the name of a client role
-        """
-        prefix = 'client.'
-        assert role.startswith(prefix)
-        id_ = role[len(prefix):]
-        return id_
-
-    testdir = teuthology.get_testdir(ctx)
-
-    mnt_template = '{tdir}/mnt.{id}'
-    mounted = []
-    for role, image in role_images:
-        if image is None:
-            image = default_image_name(role)
-        (remote,) = ctx.cluster.only(role).remotes.keys()
-        id_ = strip_client_prefix(role)
-        mnt = mnt_template.format(tdir=testdir, id=id_)
-        mounted.append((remote, mnt))
-        remote.run(
-            args=[
-                'mkdir',
-                '--',
-                mnt,
-                ]
-            )
-
-        remote.run(
-            args=[
-                'sudo',
-                'mount',
-                devname_rtn(ctx, image),
-                mnt,
-                ],
-            )
-
-    try:
-        yield
-    finally:
-        log.info("Unmounting rbd images... %s", mounted)
-        for remote, mnt in mounted:
-            remote.run(
-                args=[
-                    'sudo',
-                    'umount',
-                    mnt,
-                    ],
-                )
-            remote.run(
-                args=[
-                    'rmdir',
-                    '--',
-                    mnt,
-                    ]
-                )
diff --git a/teuthology/task/cram.py b/teuthology/task/cram.py
deleted file mode 100644 (file)
index 05824d2..0000000
+++ /dev/null
@@ -1,135 +0,0 @@
-"""
-Cram tests
-"""
-import logging
-import os
-
-from teuthology import misc as teuthology
-from teuthology.parallel import parallel
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Run all cram tests from the specified urls on the specified
-    clients. Each client runs tests in parallel.
-
-    Limitations:
-    Tests must have a .t suffix. Tests with duplicate names will
-    overwrite each other, so only the last one will run.
-
-    For example::
-
-        tasks:
-        - ceph:
-        - cram:
-            clients:
-              client.0:
-              - http://ceph.com/qa/test.t
-              - http://ceph.com/qa/test2.t]
-              client.1: [http://ceph.com/qa/test.t]
-
-    You can also run a list of cram tests on all clients::
-
-        tasks:
-        - ceph:
-        - cram:
-            clients:
-              all: [http://ceph.com/qa/test.t]
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    assert isinstance(config, dict)
-    assert 'clients' in config and isinstance(config['clients'], dict), \
-           'configuration must contain a dictionary of clients'
-
-    clients = teuthology.replace_all_with_clients(ctx.cluster,
-                                                  config['clients'])
-    testdir = teuthology.get_testdir(ctx)
-
-    try:
-        for client, tests in clients.iteritems():
-            (remote,) = ctx.cluster.only(client).remotes.iterkeys()
-            client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client)
-            remote.run(
-                args=[
-                    'mkdir', '--', client_dir,
-                    run.Raw('&&'),
-                    'virtualenv', '{tdir}/virtualenv'.format(tdir=testdir),
-                    run.Raw('&&'),
-                    '{tdir}/virtualenv/bin/pip'.format(tdir=testdir),
-                    'install', 'cram',
-                    ],
-                )
-            for test in tests:
-                log.info('fetching test %s for %s', test, client)
-                assert test.endswith('.t'), 'tests must end in .t'
-                remote.run(
-                    args=[
-                        'wget', '-nc', '-nv', '-P', client_dir, '--', test,
-                        ],
-                    )
-
-        with parallel() as p:
-            for role in clients.iterkeys():
-                p.spawn(_run_tests, ctx, role)
-    finally:
-        for client, tests in clients.iteritems():
-            (remote,) = ctx.cluster.only(client).remotes.iterkeys()
-            client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client)
-            test_files = set([test.rsplit('/', 1)[1] for test in tests])
-
-            # remove test files unless they failed
-            for test_file in test_files:
-                abs_file = os.path.join(client_dir, test_file)
-                remote.run(
-                    args=[
-                        'test', '-f', abs_file + '.err',
-                        run.Raw('||'),
-                        'rm', '-f', '--', abs_file,
-                        ],
-                    )
-
-            # ignore failure since more than one client may
-            # be run on a host, and the client dir should be
-            # non-empty if the test failed
-            remote.run(
-                args=[
-                    'rm', '-rf', '--',
-                    '{tdir}/virtualenv'.format(tdir=testdir),
-                    run.Raw(';'),
-                    'rmdir', '--ignore-fail-on-non-empty', client_dir,
-                    ],
-                )
-
-def _run_tests(ctx, role):
-    """
-    For each role, check to make sure it's a client, then run the cram on that client
-
-    :param ctx: Context
-    :param role: Roles
-    """
-    assert isinstance(role, basestring)
-    PREFIX = 'client.'
-    assert role.startswith(PREFIX)
-    id_ = role[len(PREFIX):]
-    (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-    ceph_ref = ctx.summary.get('ceph-sha1', 'master')
-
-    testdir = teuthology.get_testdir(ctx)
-    log.info('Running tests for %s...', role)
-    remote.run(
-        args=[
-            run.Raw('CEPH_REF={ref}'.format(ref=ceph_ref)),
-            run.Raw('CEPH_ID="{id}"'.format(id=id_)),
-            'adjust-ulimits',
-            'ceph-coverage',
-            '{tdir}/archive/coverage'.format(tdir=testdir),
-            '{tdir}/virtualenv/bin/cram'.format(tdir=testdir),
-            '-v', '--',
-            run.Raw('{tdir}/archive/cram.{role}/*.t'.format(tdir=testdir, role=role)),
-            ],
-        logger=log.getChild(role),
-        )
diff --git a/teuthology/task/daemon-helper b/teuthology/task/daemon-helper
deleted file mode 100755 (executable)
index 6dc92b2..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/python
-
-"""
-Helper script for running long-living processes.
-
-(Name says daemon, but that is intended to mean "long-living", we
-assume child process does not double-fork.)
-
-We start the command passed as arguments, with /dev/null as stdin, and
-then wait for EOF on stdin.
-
-When EOF is seen on stdin, the child process is killed.
-
-When the child process exits, this helper exits too.
-"""
-
-import fcntl
-import os
-import select
-import signal
-import struct
-import subprocess
-import sys
-
-end_signal = signal.SIGKILL
-if sys.argv[1] == "term":
-    end_signal = signal.SIGTERM
-
-cmd_start = 2
-
-nostdin = False
-if sys.argv[cmd_start] == "nostdin":
-    nostdin = True
-    cmd_start += 1
-
-proc = None
-if nostdin:
-    proc = subprocess.Popen(
-        args=sys.argv[cmd_start:],
-        )
-else:
-    with file('/dev/null', 'rb') as devnull:
-        proc = subprocess.Popen(
-            args=sys.argv[cmd_start:],
-            stdin=devnull,
-            )
-
-flags = fcntl.fcntl(0, fcntl.F_GETFL)
-fcntl.fcntl(0, fcntl.F_SETFL, flags | os.O_NDELAY)
-
-saw_eof = False
-while True:
-    r,w,x = select.select([0], [], [0], 0.2)
-    if r:
-        data = os.read(0, 1)
-        if not data:
-            saw_eof = True
-            proc.send_signal(end_signal)
-            break
-        else:
-            sig, = struct.unpack('!b', data)
-            proc.send_signal(sig)
-
-    if proc.poll() is not None:
-        # child exited
-        break
-
-exitstatus = proc.wait()
-if exitstatus > 0:
-    print >>sys.stderr, '{me}: command failed with exit status {exitstatus:d}'.format(
-        me=os.path.basename(sys.argv[0]),
-        exitstatus=exitstatus,
-        )
-    sys.exit(exitstatus)
-elif exitstatus < 0:
-    if saw_eof and exitstatus == -end_signal:
-        # suppress error from the exit we intentionally caused
-        pass
-    else:
-        print >>sys.stderr, '{me}: command crashed with signal {signal:d}'.format(
-            me=os.path.basename(sys.argv[0]),
-            signal=-exitstatus,
-            )
-        sys.exit(1)
diff --git a/teuthology/task/devstack.py b/teuthology/task/devstack.py
deleted file mode 100644 (file)
index c676ace..0000000
+++ /dev/null
@@ -1,382 +0,0 @@
-#!/usr/bin/env python
-import contextlib
-import logging
-from cStringIO import StringIO
-import textwrap
-from configparser import ConfigParser
-import time
-
-from ..orchestra import run
-from .. import misc
-from ..contextutil import nested
-
-log = logging.getLogger(__name__)
-
-DEVSTACK_GIT_REPO = 'https://github.com/openstack-dev/devstack.git'
-DS_STABLE_BRANCHES = ("havana", "grizzly")
-
-is_devstack_node = lambda role: role.startswith('devstack')
-is_osd_node = lambda role: role.startswith('osd')
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    if config is None:
-        config = {}
-    if not isinstance(config, dict):
-        raise TypeError("config must be a dict")
-    with nested(lambda: install(ctx=ctx, config=config),
-                lambda: smoke(ctx=ctx, config=config),
-                ):
-        yield
-
-
-@contextlib.contextmanager
-def install(ctx, config):
-    """
-    Install OpenStack DevStack and configure it to use a Ceph cluster for
-    Glance and Cinder.
-
-    Requires one node with a role 'devstack'
-
-    Since devstack runs rampant on the system it's used on, typically you will
-    want to reprovision that machine after using devstack on it.
-
-    Also, the default 2GB of RAM that is given to vps nodes is insufficient. I
-    recommend 4GB. Downburst can be instructed to give 4GB to a vps node by
-    adding this to the yaml:
-
-    downburst:
-        ram: 4G
-
-    This was created using documentation found here:
-        https://github.com/openstack-dev/devstack/blob/master/README.md
-        http://ceph.com/docs/master/rbd/rbd-openstack/
-    """
-    if config is None:
-        config = {}
-    if not isinstance(config, dict):
-        raise TypeError("config must be a dict")
-
-    devstack_node = ctx.cluster.only(is_devstack_node).remotes.keys()[0]
-    an_osd_node = ctx.cluster.only(is_osd_node).remotes.keys()[0]
-
-    devstack_branch = config.get("branch", "master")
-    install_devstack(devstack_node, devstack_branch)
-    try:
-        configure_devstack_and_ceph(ctx, config, devstack_node, an_osd_node)
-        yield
-    finally:
-        pass
-
-
-def install_devstack(devstack_node, branch="master"):
-    log.info("Cloning DevStack repo...")
-
-    args = ['git', 'clone', DEVSTACK_GIT_REPO]
-    devstack_node.run(args=args)
-
-    if branch != "master":
-        if branch in DS_STABLE_BRANCHES and not branch.startswith("stable"):
-            branch = "stable/" + branch
-        log.info("Checking out {branch} branch...".format(branch=branch))
-        cmd = "cd devstack && git checkout " + branch
-        devstack_node.run(args=cmd)
-
-    log.info("Installing DevStack...")
-    args = ['cd', 'devstack', run.Raw('&&'), './stack.sh']
-    devstack_node.run(args=args)
-
-
-def configure_devstack_and_ceph(ctx, config, devstack_node, ceph_node):
-    pool_size = config.get('pool_size', '128')
-    create_pools(ceph_node, pool_size)
-    distribute_ceph_conf(devstack_node, ceph_node)
-    # This is where we would install python-ceph and ceph-common but it appears
-    # the ceph task does that for us.
-    generate_ceph_keys(ceph_node)
-    distribute_ceph_keys(devstack_node, ceph_node)
-    secret_uuid = set_libvirt_secret(devstack_node, ceph_node)
-    update_devstack_config_files(devstack_node, secret_uuid)
-    set_apache_servername(devstack_node)
-    # Rebooting is the most-often-used method of restarting devstack services
-    misc.reboot(devstack_node)
-    start_devstack(devstack_node)
-    restart_apache(devstack_node)
-
-
-def create_pools(ceph_node, pool_size):
-    log.info("Creating pools on Ceph cluster...")
-
-    for pool_name in ['volumes', 'images', 'backups']:
-        args = ['ceph', 'osd', 'pool', 'create', pool_name, pool_size]
-        ceph_node.run(args=args)
-
-
-def distribute_ceph_conf(devstack_node, ceph_node):
-    log.info("Copying ceph.conf to DevStack node...")
-
-    ceph_conf_path = '/etc/ceph/ceph.conf'
-    ceph_conf = misc.get_file(ceph_node, ceph_conf_path, sudo=True)
-    misc.sudo_write_file(devstack_node, ceph_conf_path, ceph_conf)
-
-
-def generate_ceph_keys(ceph_node):
-    log.info("Generating Ceph keys...")
-
-    ceph_auth_cmds = [
-        ['ceph', 'auth', 'get-or-create', 'client.cinder', 'mon',
-            'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=volumes, allow rx pool=images'],  # noqa
-        ['ceph', 'auth', 'get-or-create', 'client.glance', 'mon',
-            'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=images'],  # noqa
-        ['ceph', 'auth', 'get-or-create', 'client.cinder-backup', 'mon',
-            'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=backups'],  # noqa
-    ]
-    for cmd in ceph_auth_cmds:
-        ceph_node.run(args=cmd)
-
-
-def distribute_ceph_keys(devstack_node, ceph_node):
-    log.info("Copying Ceph keys to DevStack node...")
-
-    def copy_key(from_remote, key_name, to_remote, dest_path, owner):
-        key_stringio = StringIO()
-        from_remote.run(
-            args=['ceph', 'auth', 'get-or-create', key_name],
-            stdout=key_stringio)
-        key_stringio.seek(0)
-        misc.sudo_write_file(to_remote, dest_path,
-                             key_stringio, owner=owner)
-    keys = [
-        dict(name='client.glance',
-             path='/etc/ceph/ceph.client.glance.keyring',
-             # devstack appears to just want root:root
-             #owner='glance:glance',
-             ),
-        dict(name='client.cinder',
-             path='/etc/ceph/ceph.client.cinder.keyring',
-             # devstack appears to just want root:root
-             #owner='cinder:cinder',
-             ),
-        dict(name='client.cinder-backup',
-             path='/etc/ceph/ceph.client.cinder-backup.keyring',
-             # devstack appears to just want root:root
-             #owner='cinder:cinder',
-             ),
-    ]
-    for key_dict in keys:
-        copy_key(ceph_node, key_dict['name'], devstack_node,
-                 key_dict['path'], key_dict.get('owner'))
-
-
-def set_libvirt_secret(devstack_node, ceph_node):
-    log.info("Setting libvirt secret...")
-
-    cinder_key_stringio = StringIO()
-    ceph_node.run(args=['ceph', 'auth', 'get-key', 'client.cinder'],
-                  stdout=cinder_key_stringio)
-    cinder_key = cinder_key_stringio.getvalue().strip()
-
-    uuid_stringio = StringIO()
-    devstack_node.run(args=['uuidgen'], stdout=uuid_stringio)
-    uuid = uuid_stringio.getvalue().strip()
-
-    secret_path = '/tmp/secret.xml'
-    secret_template = textwrap.dedent("""
-    <secret ephemeral='no' private='no'>
-        <uuid>{uuid}</uuid>
-        <usage type='ceph'>
-            <name>client.cinder secret</name>
-        </usage>
-    </secret>""")
-    misc.sudo_write_file(devstack_node, secret_path,
-                         secret_template.format(uuid=uuid))
-    devstack_node.run(args=['sudo', 'virsh', 'secret-define', '--file',
-                            secret_path])
-    devstack_node.run(args=['sudo', 'virsh', 'secret-set-value', '--secret',
-                            uuid, '--base64', cinder_key])
-    return uuid
-
-
-def update_devstack_config_files(devstack_node, secret_uuid):
-    log.info("Updating DevStack config files to use Ceph...")
-
-    def backup_config(node, file_name, backup_ext='.orig.teuth'):
-        node.run(args=['cp', '-f', file_name, file_name + backup_ext])
-
-    def update_config(config_name, config_stream, update_dict,
-                      section='DEFAULT'):
-        parser = ConfigParser()
-        parser.read_file(config_stream)
-        for (key, value) in update_dict.items():
-            parser.set(section, key, value)
-        out_stream = StringIO()
-        parser.write(out_stream)
-        out_stream.seek(0)
-        return out_stream
-
-    updates = [
-        dict(name='/etc/glance/glance-api.conf', options=dict(
-            default_store='rbd',
-            rbd_store_user='glance',
-            rbd_store_pool='images',
-            show_image_direct_url='True',)),
-        dict(name='/etc/cinder/cinder.conf', options=dict(
-            volume_driver='cinder.volume.drivers.rbd.RBDDriver',
-            rbd_pool='volumes',
-            rbd_ceph_conf='/etc/ceph/ceph.conf',
-            rbd_flatten_volume_from_snapshot='false',
-            rbd_max_clone_depth='5',
-            glance_api_version='2',
-            rbd_user='cinder',
-            rbd_secret_uuid=secret_uuid,
-            backup_driver='cinder.backup.drivers.ceph',
-            backup_ceph_conf='/etc/ceph/ceph.conf',
-            backup_ceph_user='cinder-backup',
-            backup_ceph_chunk_size='134217728',
-            backup_ceph_pool='backups',
-            backup_ceph_stripe_unit='0',
-            backup_ceph_stripe_count='0',
-            restore_discard_excess_bytes='true',
-            )),
-        dict(name='/etc/nova/nova.conf', options=dict(
-            libvirt_images_type='rbd',
-            libvirt_images_rbd_pool='volumes',
-            libvirt_images_rbd_ceph_conf='/etc/ceph/ceph.conf',
-            rbd_user='cinder',
-            rbd_secret_uuid=secret_uuid,
-            libvirt_inject_password='false',
-            libvirt_inject_key='false',
-            libvirt_inject_partition='-2',
-            )),
-    ]
-
-    for update in updates:
-        file_name = update['name']
-        options = update['options']
-        config_str = misc.get_file(devstack_node, file_name, sudo=True)
-        config_stream = StringIO(config_str)
-        backup_config(devstack_node, file_name)
-        new_config_stream = update_config(file_name, config_stream, options)
-        misc.sudo_write_file(devstack_node, file_name, new_config_stream)
-
-
-def set_apache_servername(node):
-    # Apache complains: "Could not reliably determine the server's fully
-    # qualified domain name, using 127.0.0.1 for ServerName"
-    # So, let's make sure it knows its name.
-    log.info("Setting Apache ServerName...")
-
-    hostname = node.hostname
-    config_file = '/etc/apache2/conf.d/servername'
-    misc.sudo_write_file(node, config_file,
-                         "ServerName {name}".format(name=hostname))
-
-
-def start_devstack(devstack_node):
-    log.info("Patching devstack start script...")
-    # This causes screen to start headless - otherwise rejoin-stack.sh fails
-    # because there is no terminal attached.
-    cmd = "cd devstack && sed -ie 's/screen -c/screen -dm -c/' rejoin-stack.sh"
-    devstack_node.run(args=cmd)
-
-    log.info("Starting devstack...")
-    cmd = "cd devstack && ./rejoin-stack.sh"
-    devstack_node.run(args=cmd)
-
-    # This was added because I was getting timeouts on Cinder requests - which
-    # were trying to access Keystone on port 5000. A more robust way to handle
-    # this would be to introduce a wait-loop on devstack_node that checks to
-    # see if a service is listening on port 5000.
-    log.info("Waiting 30s for devstack to start...")
-    time.sleep(30)
-
-
-def restart_apache(node):
-    node.run(args=['sudo', '/etc/init.d/apache2', 'restart'], wait=True)
-
-
-@contextlib.contextmanager
-def exercise(ctx, config):
-    log.info("Running devstack exercises...")
-
-    if config is None:
-        config = {}
-    if not isinstance(config, dict):
-        raise TypeError("config must be a dict")
-
-    devstack_node = ctx.cluster.only(is_devstack_node).remotes.keys()[0]
-
-    # TODO: save the log *and* preserve failures
-    #devstack_archive_dir = create_devstack_archive(ctx, devstack_node)
-
-    try:
-        #cmd = "cd devstack && ./exercise.sh 2>&1 | tee {dir}/exercise.log".format(  # noqa
-        #    dir=devstack_archive_dir)
-        cmd = "cd devstack && ./exercise.sh"
-        devstack_node.run(args=cmd, wait=True)
-        yield
-    finally:
-        pass
-
-
-def create_devstack_archive(ctx, devstack_node):
-    test_dir = misc.get_testdir(ctx)
-    devstack_archive_dir = "{test_dir}/archive/devstack".format(
-        test_dir=test_dir)
-    devstack_node.run(args="mkdir -p " + devstack_archive_dir)
-    return devstack_archive_dir
-
-
-@contextlib.contextmanager
-def smoke(ctx, config):
-    log.info("Running a basic smoketest...")
-
-    devstack_node = ctx.cluster.only(is_devstack_node).remotes.keys()[0]
-    an_osd_node = ctx.cluster.only(is_osd_node).remotes.keys()[0]
-
-    try:
-        create_volume(devstack_node, an_osd_node, 'smoke0', 1)
-        yield
-    finally:
-        pass
-
-
-def create_volume(devstack_node, ceph_node, vol_name, size):
-    """
-    :param size: The size of the volume, in GB
-    """
-    size = str(size)
-    log.info("Creating a {size}GB volume named {name}...".format(
-        name=vol_name,
-        size=size))
-    args = ['source', 'devstack/openrc', run.Raw('&&'), 'cinder', 'create',
-            '--display-name', vol_name, size]
-    out_stream = StringIO()
-    devstack_node.run(args=args, stdout=out_stream, wait=True)
-    vol_info = parse_os_table(out_stream.getvalue())
-    log.debug("Volume info: %s", str(vol_info))
-
-    out_stream = StringIO()
-    try:
-        ceph_node.run(args="rbd --id cinder ls -l volumes", stdout=out_stream,
-                      wait=True)
-    except run.CommandFailedError:
-        log.debug("Original rbd call failed; retrying without '--id cinder'")
-        ceph_node.run(args="rbd ls -l volumes", stdout=out_stream,
-                      wait=True)
-
-    assert vol_info['id'] in out_stream.getvalue(), \
-        "Volume not found on Ceph cluster"
-    assert vol_info['size'] == size, \
-        "Volume size on Ceph cluster is different than specified"
-    return vol_info['id']
-
-
-def parse_os_table(table_str):
-    out_dict = dict()
-    for line in table_str.split('\n'):
-        if line.startswith('|'):
-            items = line.split()
-            out_dict[items[1]] = items[3]
-    return out_dict
diff --git a/teuthology/task/die_on_err.py b/teuthology/task/die_on_err.py
deleted file mode 100644 (file)
index 1dfd370..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-"""
-Raise exceptions on osd coredumps or test err directories
-"""
-import contextlib
-import logging
-import time
-from ..orchestra import run
-
-import ceph_manager
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Die if {testdir}/err exists or if an OSD dumps core
-    """
-    if config is None:
-        config = {}
-
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
-    log.info('num_osds is %s' % num_osds)
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    while len(manager.get_osd_status()['up']) < num_osds:
-        time.sleep(10)
-
-    testdir = teuthology.get_testdir(ctx)
-
-    while True:
-        for i in range(num_osds):
-            (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.iterkeys()
-            p = osd_remote.run(
-                args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ],
-                wait=True,
-                check_status=False,
-            )
-            exit_status = p.exitstatus
-
-            if exit_status == 0:
-                log.info("osd %d has an error" % i)
-                raise Exception("osd %d error" % i)
-
-            log_path = '/var/log/ceph/osd.%d.log' % (i)
-
-            p = osd_remote.run(
-                args = [
-                         'tail', '-1', log_path,
-                         run.Raw('|'),
-                         'grep', '-q', 'end dump'
-                       ],
-                wait=True,
-                check_status=False,
-            )
-            exit_status = p.exitstatus
-
-            if exit_status == 0:
-                log.info("osd %d dumped core" % i)
-                raise Exception("osd %d dumped core" % i)
-
-        time.sleep(5)
diff --git a/teuthology/task/divergent_priors.py b/teuthology/task/divergent_priors.py
deleted file mode 100644 (file)
index 432614f..0000000
+++ /dev/null
@@ -1,148 +0,0 @@
-"""
-Special case divergence test
-"""
-import logging
-import time
-
-import ceph_manager
-from teuthology import misc as teuthology
-from teuthology.task_util.rados import rados
-
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Test handling of divergent entries with prior_version
-    prior to log_tail
-
-    config: none
-
-    Requires 3 osds.
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'divergent_priors task only accepts a dict for configuration'
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-    ctx.manager = manager
-
-    while len(manager.get_osd_status()['up']) < 3:
-        time.sleep(10)
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.raw_cluster_cmd('osd', 'set', 'noout')
-    manager.raw_cluster_cmd('osd', 'set', 'noin')
-    manager.raw_cluster_cmd('osd', 'set', 'nodown')
-    manager.wait_for_clean()
-
-    # something that is always there
-    dummyfile = '/etc/fstab'
-    dummyfile2 = '/etc/resolv.conf'
-
-    # create 1 pg pool
-    log.info('creating foo')
-    manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
-
-    osds = [0, 1, 2]
-    for i in osds:
-        manager.set_config(i, osd_min_pg_log_entries=1)
-
-    # determine primary
-    divergent = manager.get_pg_primary('foo', 0)
-    log.info("primary and soon to be divergent is %d", divergent)
-    non_divergent = [0,1,2]
-    non_divergent.remove(divergent)
-
-    log.info('writing initial objects')
-    # write 1000 objects
-    for i in range(1000):
-        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])
-
-    manager.wait_for_clean()
-
-    # blackhole non_divergent
-    log.info("blackholing osds %s", str(non_divergent))
-    for i in non_divergent:
-        manager.set_config(i, filestore_blackhole='')
-
-    # write 1 (divergent) object
-    log.info('writing divergent object existing_0')
-    rados(
-        ctx, mon, ['-p', 'foo', 'put', 'existing_0', dummyfile2],
-        wait=False)
-    time.sleep(10)
-    mon.run(
-        args=['killall', '-9', 'rados'],
-        wait=True,
-        check_status=False)
-
-    # kill all the osds
-    log.info('killing all the osds')
-    for i in osds:
-        manager.kill_osd(i)
-    for i in osds:
-        manager.mark_down_osd(i)
-    for i in osds:
-        manager.mark_out_osd(i)
-
-    # bring up non-divergent
-    log.info("bringing up non_divergent %s", str(non_divergent))
-    for i in non_divergent:
-        manager.revive_osd(i)
-    for i in non_divergent:
-        manager.mark_in_osd(i)
-
-    log.info('making log long to prevent backfill')
-    for i in non_divergent:
-        manager.set_config(i, osd_min_pg_log_entries=100000)
-
-    # write 1 non-divergent object (ensure that old divergent one is divergent)
-    log.info('writing non-divergent object existing_1')
-    rados(ctx, mon, ['-p', 'foo', 'put', 'existing_1', dummyfile2])
-
-    manager.wait_for_recovery()
-
-    # ensure no recovery
-    log.info('delay recovery')
-    for i in non_divergent:
-        manager.set_config(i, osd_recovery_delay_start=100000)
-
-    # bring in our divergent friend
-    log.info("revive divergent %d", divergent)
-    manager.revive_osd(divergent)
-
-    while len(manager.get_osd_status()['up']) < 3:
-        time.sleep(10)
-
-    log.info('delay recovery divergent')
-    manager.set_config(divergent, osd_recovery_delay_start=100000)
-    log.info('mark divergent in')
-    manager.mark_in_osd(divergent)
-
-    log.info('wait for peering')
-    rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])
-
-    log.info("killing divergent %d", divergent)
-    manager.kill_osd(divergent)
-    log.info("reviving divergent %d", divergent)
-    manager.revive_osd(divergent)
-
-    log.info('allowing recovery')
-    for i in non_divergent:
-        manager.set_config(i, osd_recovery_delay_start=0)
-
-    log.info('reading existing_0')
-    exit_status = rados(ctx, mon,
-                        ['-p', 'foo', 'get', 'existing_0',
-                         '-o', '/tmp/existing'])
-    assert exit_status is 0
-    log.info("success")
diff --git a/teuthology/task/dump_stuck.py b/teuthology/task/dump_stuck.py
deleted file mode 100644 (file)
index 9e1780f..0000000
+++ /dev/null
@@ -1,146 +0,0 @@
-"""
-Dump_stuck command
-"""
-import logging
-import re
-import time
-
-import ceph_manager
-from teuthology import misc as teuthology
-
-
-log = logging.getLogger(__name__)
-
-def check_stuck(manager, num_inactive, num_unclean, num_stale, timeout=10):
-    """
-    Do checks.  Make sure get_stuck_pgs return the right amout of information, then
-    extract health information from the raw_cluster_cmd and compare the results with
-    values passed in.  This passes if all asserts pass.
-    :param num_manager: Ceph manager
-    :param num_inactive: number of inaactive pages that are stuck
-    :param num_unclean: number of unclean pages that are stuck
-    :paran num_stale: number of stale pages that are stuck
-    :param timeout: timeout value for get_stuck_pgs calls
-    """
-    inactive = manager.get_stuck_pgs('inactive', timeout)
-    assert len(inactive) == num_inactive
-    unclean = manager.get_stuck_pgs('unclean', timeout)
-    assert len(unclean) == num_unclean
-    stale = manager.get_stuck_pgs('stale', timeout)
-    assert len(stale) == num_stale
-
-    # check health output as well
-    health = manager.raw_cluster_cmd('health')
-    log.debug('ceph health is: %s', health)
-    if num_inactive > 0:
-        m = re.search('(\d+) pgs stuck inactive', health)
-        assert int(m.group(1)) == num_inactive
-    if num_unclean > 0:
-        m = re.search('(\d+) pgs stuck unclean', health)
-        assert int(m.group(1)) == num_unclean
-    if num_stale > 0:
-        m = re.search('(\d+) pgs stuck stale', health)
-        assert int(m.group(1)) == num_stale
-
-def task(ctx, config):
-    """
-    Test the dump_stuck command.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    assert config is None, \
-        'dump_stuck requires no configuration'
-    assert teuthology.num_instances_of_type(ctx.cluster, 'osd') == 2, \
-        'dump_stuck requires exactly 2 osds'
-
-    timeout = 60
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.wait_for_clean(timeout)
-
-    manager.raw_cluster_cmd('tell', 'mon.0', 'injectargs', '--',
-#                            '--mon-osd-report-timeout 90',
-                            '--mon-pg-stuck-threshold 10')
-
-    check_stuck(
-        manager,
-        num_inactive=0,
-        num_unclean=0,
-        num_stale=0,
-        )
-    num_pgs = manager.get_num_pgs()
-
-    manager.mark_out_osd(0)
-    time.sleep(timeout)
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.wait_for_recovery(timeout)
-
-    check_stuck(
-        manager,
-        num_inactive=0,
-        num_unclean=num_pgs,
-        num_stale=0,
-        )
-
-    manager.mark_in_osd(0)
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.wait_for_clean(timeout)
-
-    check_stuck(
-        manager,
-        num_inactive=0,
-        num_unclean=0,
-        num_stale=0,
-        )
-
-    for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'):
-        manager.kill_osd(id_)
-        manager.mark_down_osd(id_)
-
-    starttime = time.time()
-    done = False
-    while not done:
-        try:
-            check_stuck(
-                manager,
-                num_inactive=0,
-                num_unclean=0,
-                num_stale=num_pgs,
-                )
-            done = True
-        except AssertionError:
-            # wait up to 15 minutes to become stale
-            if time.time() - starttime > 900:
-                raise
-
-    for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'):
-        manager.revive_osd(id_)
-        manager.mark_in_osd(id_)
-    while True:
-        try:
-            manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-            manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-            break
-        except Exception:
-            log.exception('osds must not be started yet, waiting...')
-            time.sleep(1)
-    manager.wait_for_clean(timeout)
-
-    check_stuck(
-        manager,
-        num_inactive=0,
-        num_unclean=0,
-        num_stale=0,
-        )
diff --git a/teuthology/task/ec_lost_unfound.py b/teuthology/task/ec_lost_unfound.py
deleted file mode 100644 (file)
index 25bac6c..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-"""
-Lost_unfound
-"""
-import logging
-import ceph_manager
-from teuthology import misc as teuthology
-from teuthology.task_util.rados import rados
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Test handling of lost objects on an ec pool.
-
-    A pretty rigid cluster is brought up andtested by this task
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'lost_unfound task only accepts a dict for configuration'
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
-    manager.wait_for_clean()
-
-
-    pool = manager.create_pool_with_unique_name(
-        ec_pool=True,
-        ec_m=2,
-        ec_k=2)
-
-    # something that is always there
-    dummyfile = '/etc/fstab'
-
-    # kludge to make sure they get a map
-    rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile])
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.wait_for_recovery()
-
-    # create old objects
-    for f in range(1, 10):
-        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])
-        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
-        rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f])
-
-    # delay recovery, and make the pg log very long (to prevent backfill)
-    manager.raw_cluster_cmd(
-            'tell', 'osd.1',
-            'injectargs',
-            '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
-            )
-
-    manager.kill_osd(0)
-    manager.mark_down_osd(0)
-    manager.kill_osd(3)
-    manager.mark_down_osd(3)
-    
-    for f in range(1, 10):
-        rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile])
-        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
-        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])
-
-    # take out osd.1 and a necessary shard of those objects.
-    manager.kill_osd(1)
-    manager.mark_down_osd(1)
-    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
-    manager.revive_osd(0)
-    manager.wait_till_osd_is_up(0)
-    manager.revive_osd(3)
-    manager.wait_till_osd_is_up(3)
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
-    manager.wait_till_active()
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
-
-    # verify that there are unfound objects
-    unfound = manager.get_num_unfound_objects()
-    log.info("there are %d unfound objects" % unfound)
-    assert unfound
-
-    # mark stuff lost
-    pgs = manager.get_pg_stats()
-    for pg in pgs:
-        if pg['stat_sum']['num_objects_unfound'] > 0:
-            # verify that i can list them direct from the osd
-            log.info('listing missing/lost in %s state %s', pg['pgid'],
-                     pg['state']);
-            m = manager.list_pg_missing(pg['pgid'])
-            log.info('%s' % m)
-            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
-
-            log.info("reverting unfound in %s", pg['pgid'])
-            manager.raw_cluster_cmd('pg', pg['pgid'],
-                                    'mark_unfound_lost', 'delete')
-        else:
-            log.info("no unfound in %s", pg['pgid'])
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
-    manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5')
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
-    manager.wait_for_recovery()
-
-    # verify result
-    for f in range(1, 10):
-        err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-'])
-        assert err
-        err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-'])
-        assert err
-        err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-'])
-        assert err
-
-    # see if osd.1 can cope
-    manager.revive_osd(1)
-    manager.wait_till_osd_is_up(1)
-    manager.wait_for_clean()
diff --git a/teuthology/task/edit_sudoers.sh b/teuthology/task/edit_sudoers.sh
deleted file mode 100755 (executable)
index 6ab40a5..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#! /bin/sh
-
-sudo vi -e /etc/sudoers <<EOF
-g/  requiretty/s// !requiretty/
-g/ !visiblepw/s//  visiblepw/
-w!
-q
-EOF
-exit
-
diff --git a/teuthology/task/exec.py b/teuthology/task/exec.py
deleted file mode 100644 (file)
index f951f77..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-"""
-Exececute custom commands
-"""
-import logging
-
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Execute commands on a given role
-
-        tasks:
-        - ceph:
-        - kclient: [client.a]
-        - exec:
-            client.a:
-              - echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control
-              - echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control
-        - interactive:
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    log.info('Executing custom commands...')
-    assert isinstance(config, dict), "task exec got invalid config"
-
-    testdir = teuthology.get_testdir(ctx)
-
-    if 'all' in config and len(config) == 1:
-        a = config['all']
-        roles = teuthology.all_roles(ctx.cluster)
-        config = dict((id_, a) for id_ in roles)
-
-    for role, ls in config.iteritems():
-        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-        log.info('Running commands on role %s host %s', role, remote.name)
-        for c in ls:
-            c.replace('$TESTDIR', testdir)
-            remote.run(
-                args=[
-                    'sudo',
-                    'TESTDIR={tdir}'.format(tdir=testdir),
-                    'bash',
-                    '-c',
-                    c],
-                )
-
diff --git a/teuthology/task/filestore_idempotent.py b/teuthology/task/filestore_idempotent.py
deleted file mode 100644 (file)
index d33ad64..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-"""
-Filestore/filejournal handler
-"""
-import logging
-from ..orchestra import run
-import random
-
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Test filestore/filejournal handling of non-idempotent events.
-
-    Currently this is a kludge; we require the ceph task preceeds us just
-    so that we get the tarball installed to run the test binary.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    assert config is None or isinstance(config, list) \
-        or isinstance(config, dict), \
-        "task only supports a list or dictionary for configuration"
-    all_clients = ['client.{id}'.format(id=id_)
-                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-    if config is None:
-        config = all_clients
-    if isinstance(config, list):
-        config = dict.fromkeys(config)
-    clients = config.keys()
-
-    # just use the first client...
-    client = clients[0];
-    (remote,) = ctx.cluster.only(client).remotes.iterkeys()
-
-    testdir = teuthology.get_testdir(ctx)
-
-    dir = '%s/data/test.%s' % (testdir, client)
-
-    seed = str(int(random.uniform(1,100)))
-
-    try:
-        log.info('creating a working dir')
-        remote.run(args=['mkdir', dir])
-        remote.run(
-            args=[
-                'cd', dir,
-                run.Raw('&&'),
-                'wget','-q', '-Orun_seed_to.sh',
-                'http://ceph.com/git/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to.sh;hb=HEAD',
-                run.Raw('&&'),
-                'wget','-q', '-Orun_seed_to_range.sh',
-                'http://ceph.com/git/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to_range.sh;hb=HEAD',
-                run.Raw('&&'),
-                'chmod', '+x', 'run_seed_to.sh', 'run_seed_to_range.sh',
-                ]);
-
-        log.info('running a series of tests')
-        proc = remote.run(
-            args=[
-                'cd', dir,
-                run.Raw('&&'),
-                './run_seed_to_range.sh', seed, '50', '300',
-                ],
-            wait=False,
-            check_status=False)
-        result = proc.exitstatus.get();
-    
-        if result != 0:
-            remote.run(
-                args=[
-                    'cp', '-a', dir, '{tdir}/archive/idempotent_failure'.format(tdir=testdir),
-                    ])
-            raise Exception("./run_seed_to_range.sh errored out")
-
-    finally:
-        remote.run(args=[
-                'rm', '-rf', '--', dir
-                ])
-        
diff --git a/teuthology/task/hadoop.py b/teuthology/task/hadoop.py
deleted file mode 100644 (file)
index 30e4c69..0000000
+++ /dev/null
@@ -1,635 +0,0 @@
-"""
-Hadoop task
-
-Install and cofigure hadoop -- requires that Ceph is already installed and
-already running.
-"""
-from cStringIO import StringIO
-import contextlib
-import logging
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from teuthology.parallel import parallel
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def validate_cluster(ctx):
-    """
-    Check that there is exactly one master and at least one slave configured
-    """
-    log.info('Vaidating Hadoop configuration')
-    slaves = ctx.cluster.only(teuthology.is_type('hadoop.slave'))
-
-    if (len(slaves.remotes) < 1):
-        raise Exception("At least one hadoop.slave must be specified")
-    else:
-        log.info(str(len(slaves.remotes)) + " slaves specified")
-
-    masters = ctx.cluster.only(teuthology.is_type('hadoop.master'))
-    if (len(masters.remotes) == 1):
-        pass
-    else:
-        raise Exception(
-           "Exactly one hadoop.master must be specified. Currently there are "
-           + str(len(masters.remotes)))
-
-    try:
-        yield
-
-    finally:
-        pass
-
-
-def write_hadoop_env(ctx):
-    """
-    Add required entries to conf/hadoop-env.sh
-    """
-    hadoop_envfile = "{tdir}/apache_hadoop/conf/hadoop-env.sh".format(
-            tdir=teuthology.get_testdir(ctx))
-
-    hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
-    for remote in hadoop_nodes.remotes:
-        teuthology.write_file(remote, hadoop_envfile,
-'''export JAVA_HOME=/usr/lib/jvm/default-java
-export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/usr/share/java/libcephfs.jar:{tdir}/apache_hadoop/build/hadoop-core*.jar:{tdir}/inktank_hadoop/build/hadoop-cephfs.jar
-export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
-export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
-export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
-export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
-export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
-'''.format(tdir=teuthology.get_testdir(ctx)))
-        log.info("wrote file: " + hadoop_envfile + " to host: " + str(remote))
-
-
-def write_core_site(ctx, config):
-    """
-    Add required entries to conf/core-site.xml
-    """
-    testdir = teuthology.get_testdir(ctx)
-    core_site_file = "{tdir}/apache_hadoop/conf/core-site.xml".format(
-            tdir=testdir)
-
-    hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
-    for remote in hadoop_nodes.remotes:
-
-        # check the config to see if we should use hdfs or ceph
-        default_fs_string = ""
-        if config.get('hdfs'):
-            default_fs_string = 'hdfs://{master_ip}:54310'.format(
-                    master_ip=get_hadoop_master_ip(ctx))
-        else:
-            default_fs_string = 'ceph:///'
-
-        teuthology.write_file(remote, core_site_file,
-'''<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<!-- Put site-specific property overrides in this file.  -->
-<configuration>
-    <property>
-        <name>hadoop.tmp.dir</name>
-        <value>/tmp/hadoop/tmp</value>
-    </property>
-    <property>
-        <name>fs.default.name</name>
-        <value>{default_fs}</value>
-    </property>
-    <property>
-      <name>ceph.conf.file</name>
-      <value>/etc/ceph/ceph.conf</value>
-    </property>
-    <property>
-      <name>fs.ceph.impl</name>
-      <value>org.apache.hadoop.fs.ceph.CephFileSystem</value>
-    </property>
-</configuration>
-'''.format(tdir=teuthology.get_testdir(ctx), default_fs=default_fs_string))
-
-        log.info("wrote file: " + core_site_file + " to host: " + str(remote))
-
-
-def get_hadoop_master_ip(ctx):
-    """
-    finds the hadoop.master in the ctx and then pulls out just the IP address
-    """
-    remote, _ = _get_master(ctx)
-    master_name, master_port = remote.ssh.get_transport().getpeername()
-    log.info('master name: {name} port {port}'.format(name=master_name,
-            port=master_port))
-    return master_name
-
-
-def write_mapred_site(ctx):
-    """
-    Add required entries to conf/mapred-site.xml
-    """
-    mapred_site_file = "{tdir}/apache_hadoop/conf/mapred-site.xml".format(
-            tdir=teuthology.get_testdir(ctx))
-
-    master_ip = get_hadoop_master_ip(ctx)
-    log.info('adding host {remote} as jobtracker'.format(remote=master_ip))
-
-    hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
-    for remote in hadoop_nodes.remotes:
-        teuthology.write_file(remote, mapred_site_file,
-'''<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<!-- Put site-specific property overrides in this file. -->
-<configuration>
-    <property>
-        <name>mapred.job.tracker</name>
-        <value>{remote}:54311</value>
-    </property>
-</configuration>
-'''.format(remote=master_ip))
-
-        log.info("wrote file: " + mapred_site_file + " to host: " + str(remote))
-
-
-def write_hdfs_site(ctx):
-    """
-    Add required entries to conf/hdfs-site.xml
-    """
-    hdfs_site_file = "{tdir}/apache_hadoop/conf/hdfs-site.xml".format(
-            tdir=teuthology.get_testdir(ctx))
-
-    hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
-    for remote in hadoop_nodes.remotes:
-        teuthology.write_file(remote, hdfs_site_file,
-'''<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<!-- Put site-specific property overrides in this file. -->
-<configuration>
-    <property>
-        <name>dfs.replication</name>
-        <value>1</value>
-    </property>
-</configuration>
-''')
-        log.info("wrote file: " + hdfs_site_file + " to host: " + str(remote))
-
-
-def write_slaves(ctx):
-    """
-    Add required entries to conf/slaves
-    These nodes host TaskTrackers and DataNodes
-    """
-    log.info('Setting up slave nodes...')
-
-    slaves_file = "{tdir}/apache_hadoop/conf/slaves".format(
-            tdir=teuthology.get_testdir(ctx))
-    tmp_file = StringIO()
-
-    slaves = ctx.cluster.only(teuthology.is_type('hadoop.slave'))
-    for remote in slaves.remotes:
-        tmp_file.write('{remote}\n'.format(
-                remote=remote.ssh.get_transport().getpeername()[0]))
-
-    tmp_file.seek(0)
-
-    hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
-    for remote in hadoop_nodes.remotes:
-        teuthology.write_file(remote=remote, path=slaves_file, data=tmp_file)
-        tmp_file.seek(0)
-        log.info("wrote file: " + slaves_file + " to host: " + str(remote))
-
-
-def write_master(ctx):
-    """
-    Add required entries to conf/masters
-    These nodes host JobTrackers and Namenodes
-    """
-    masters_file = "{tdir}/apache_hadoop/conf/masters".format(
-            tdir=teuthology.get_testdir(ctx))
-    master = _get_master(ctx)
-    master_remote, _ = master
-
-    hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
-    for remote in hadoop_nodes.remotes:
-        teuthology.write_file(remote, masters_file, '{master_host}\n'.format(
-                master_host=master_remote.ssh.get_transport().getpeername()[0]))
-        log.info("wrote file: " + masters_file + " to host: " + str(remote))
-
-
-def _configure_hadoop(ctx, config):
-    """
-    Call the various functions that configure Hadoop
-    """
-    log.info('writing out config files')
-
-    write_hadoop_env(ctx)
-    write_core_site(ctx, config)
-    write_mapred_site(ctx)
-    write_hdfs_site(ctx)
-    write_slaves(ctx)
-    write_master(ctx)
-
-
-@contextlib.contextmanager
-def configure_hadoop(ctx, config):
-    """
-    Call the various functions that configure Hadoop, and handle the
-    startup of hadoop and clean up of temporary files if this is an hdfs.
-    """
-    _configure_hadoop(ctx, config)
-    log.info('config.get(hdfs): {hdfs}'.format(hdfs=config.get('hdfs')))
-
-    if config.get('hdfs'):
-        log.info('hdfs option specified. Setting up hdfs')
-
-        # let's run this from the master
-        master = _get_master(ctx)
-        remote, _ = master
-        remote.run(
-        args=["{tdir}/apache_hadoop/bin/hadoop".format(
-                tdir=teuthology.get_testdir(ctx)),
-              "namenode",
-              "-format"],
-            wait=True,
-        )
-
-    log.info('done setting up hadoop')
-
-    try:
-        yield
-
-    finally:
-        log.info('Removing hdfs directory')
-        run.wait(
-            ctx.cluster.run(
-                args=[
-                    'rm',
-                    '-rf',
-                    '/tmp/hadoop',
-                    ],
-                wait=False,
-                ),
-            )
-
-
-def _start_hadoop(ctx, remote, config):
-    """
-    remotely start hdfs if specified, and start mapred.
-    """
-    testdir = teuthology.get_testdir(ctx)
-    if config.get('hdfs'):
-        remote.run(
-            args=['{tdir}/apache_hadoop/bin/start-dfs.sh'.format(
-                    tdir=testdir), ],
-            wait=True,
-        )
-        log.info('done starting hdfs')
-
-    remote.run(
-        args=['{tdir}/apache_hadoop/bin/start-mapred.sh'.format(
-                tdir=testdir), ],
-        wait=True,
-    )
-    log.info('done starting mapred')
-
-
-def _stop_hadoop(ctx, remote, config):
-    """
-    remotely stop mapred, and if hdfs if specified, stop the hdfs handler too.
-    """
-    testdir = teuthology.get_testdir(ctx)
-    remote.run(
-        args=['{tdir}/apache_hadoop/bin/stop-mapred.sh'.format(tdir=testdir), ],
-        wait=True,
-    )
-
-    if config.get('hdfs'):
-        remote.run(
-            args=['{tdir}/apache_hadoop/bin/stop-dfs.sh'.format(
-                tdir=testdir), ],
-            wait=True,
-        )
-
-    log.info('done stopping hadoop')
-
-
-def _get_master(ctx):
-    """
-    Return the hadoop master.  If more than one is found, fail an assertion
-    """
-    master = ctx.cluster.only(teuthology.is_type('hadoop.master'))
-    assert 1 == len(master.remotes.items()), \
-            'There must be exactly 1 hadoop.master configured'
-
-    return master.remotes.items()[0]
-
-
-@contextlib.contextmanager
-def start_hadoop(ctx, config):
-    """
-    Handle the starting and stopping of hadoop
-    """
-    master = _get_master(ctx)
-    remote, _ = master
-
-    log.info('Starting hadoop on {remote}\n'.format(
-            remote=remote.ssh.get_transport().getpeername()[0]))
-    _start_hadoop(ctx, remote, config)
-
-    try:
-        yield
-
-    finally:
-        log.info('Running stop-mapred.sh on {remote}'.format(
-                remote=remote.ssh.get_transport().getpeername()[0]))
-        _stop_hadoop(ctx, remote, config)
-
-
-def _download_apache_hadoop_bins(ctx, remote, hadoop_url):
-    """
-    download and untar the most recent apache hadoop binaries into
-    {testdir}/apache_hadoop
-    """
-    log.info(
-        '_download_apache_hadoop_bins: path {path} on host {host}'.format(
-        path=hadoop_url, host=str(remote)))
-    file_name = 'apache-hadoop.tgz'
-    testdir = teuthology.get_testdir(ctx)
-    remote.run(
-        args=[
-            'mkdir', '-p', '-m0755',
-            '{tdir}/apache_hadoop'.format(tdir=testdir),
-            run.Raw('&&'),
-            'echo',
-            '{file_name}'.format(file_name=file_name),
-            run.Raw('|'),
-            'wget',
-            '-nv',
-            '-O-',
-            '--base={url}'.format(url=hadoop_url),
-            # need to use --input-file to make wget respect --base
-            '--input-file=-',
-            run.Raw('|'),
-            'tar', '-xzf', '-', '-C',
-            '{tdir}/apache_hadoop'.format(tdir=testdir),
-        ],
-    )
-
-
-def _download_inktank_hadoop_bins(ctx, remote, hadoop_url):
-    """
-    download and untar the most recent Inktank hadoop binaries into
-    {testdir}/hadoop
-    """
-    log.info(
-        '_download_inktank_hadoop_bins: path {path} on host {host}'.format(
-            path=hadoop_url, host=str(remote)))
-    file_name = 'hadoop.tgz'
-    testdir = teuthology.get_testdir(ctx)
-    remote.run(
-        args=[
-            'mkdir', '-p', '-m0755',
-            '{tdir}/inktank_hadoop'.format(tdir=testdir),
-            run.Raw('&&'),
-            'echo',
-            '{file_name}'.format(file_name=file_name),
-            run.Raw('|'),
-            'wget',
-            '-nv',
-            '-O-',
-            '--base={url}'.format(url=hadoop_url),
-            # need to use --input-file to make wget respect --base
-            '--input-file=-',
-            run.Raw('|'),
-            'tar', '-xzf', '-', '-C',
-            '{tdir}/inktank_hadoop'.format(tdir=testdir),
-        ],
-    )
-
-
-def _copy_hadoop_cephfs_jars(ctx, remote, from_dir, to_dir):
-    """
-    copy hadoop-cephfs.jar and hadoop-cephfs-test.jar into apache_hadoop
-    """
-    testdir = teuthology.get_testdir(ctx)
-    log.info('copy jars from {from_dir} to {to_dir} on host {host}'.format(
-            from_dir=from_dir, to_dir=to_dir, host=str(remote)))
-    file_names = ['hadoop-cephfs.jar', 'hadoop-cephfs-test.jar']
-    for file_name in file_names:
-        log.info('Copying file {file_name}'.format(file_name=file_name))
-        remote.run(
-            args=['cp', '{tdir}/{from_dir}/{file_name}'.format(
-                tdir=testdir, from_dir=from_dir, file_name=file_name),
-                '{tdir}/{to_dir}/'.format(tdir=testdir, to_dir=to_dir)
-            ],
-        )
-
-
-def _node_binaries(ctx, remote, inktank_hadoop_bindir_url,
-        apache_hadoop_bindir_url):
-    """
-    Download and copy over the appropriate binaries and jar files.
-    The calls from binaries() end up spawning this function on remote sites.
-    """
-    _download_inktank_hadoop_bins(ctx, remote, inktank_hadoop_bindir_url)
-    _download_apache_hadoop_bins(ctx, remote, apache_hadoop_bindir_url)
-    _copy_hadoop_cephfs_jars(ctx, remote, 'inktank_hadoop/build',
-            'apache_hadoop/build')
-
-
-@contextlib.contextmanager
-def binaries(ctx, config):
-    """
-    Fetch the binaries from the gitbuilder, and spawn the download tasks on
-    the remote machines.
-    """
-    path = config.get('path')
-
-    if path is None:
-        # fetch Apache Hadoop from gitbuilder
-        log.info(
-            'Fetching and unpacking Apache Hadoop binaries from gitbuilder...')
-        apache_sha1, apache_hadoop_bindir_url = teuthology.get_ceph_binary_url(
-            package='apache-hadoop',
-            branch=config.get('apache_branch'),
-            tag=config.get('tag'),
-            sha1=config.get('sha1'),
-            flavor=config.get('flavor'),
-            format=config.get('format'),
-            dist=config.get('dist'),
-            arch=config.get('arch'),
-            )
-        log.info('apache_hadoop_bindir_url %s' % (apache_hadoop_bindir_url))
-        ctx.summary['apache-hadoop-sha1'] = apache_sha1
-
-        # fetch Inktank Hadoop from gitbuilder
-        log.info(
-            'Fetching and unpacking Inktank Hadoop binaries from gitbuilder...')
-        inktank_sha1, inktank_hadoop_bindir_url = \
-            teuthology.get_ceph_binary_url(
-                package='hadoop',
-                branch=config.get('inktank_branch'),
-                tag=config.get('tag'),
-                sha1=config.get('sha1'),
-                flavor=config.get('flavor'),
-                format=config.get('format'),
-                dist=config.get('dist'),
-                arch=config.get('arch'),
-                )
-        log.info('inktank_hadoop_bindir_url %s' % (inktank_hadoop_bindir_url))
-        ctx.summary['inktank-hadoop-sha1'] = inktank_sha1
-
-    else:
-        raise Exception(
-                "The hadoop task does not support the path argument at present")
-
-    with parallel() as parallel_task:
-        hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
-        # these can happen independently
-        for remote in hadoop_nodes.remotes.iterkeys():
-            parallel_task.spawn(_node_binaries, ctx, remote,
-                    inktank_hadoop_bindir_url, apache_hadoop_bindir_url)
-
-    try:
-        yield
-    finally:
-        log.info('Removing hadoop binaries...')
-        run.wait(
-            ctx.cluster.run(
-                args=['rm', '-rf', '--', '{tdir}/apache_hadoop'.format(
-                        tdir=teuthology.get_testdir(ctx))],
-                wait=False,
-                ),
-            )
-        run.wait(
-            ctx.cluster.run(
-                args=['rm', '-rf', '--', '{tdir}/inktank_hadoop'.format(
-                        tdir=teuthology.get_testdir(ctx))],
-                wait=False,
-                ),
-            )
-
-
-@contextlib.contextmanager
-def out_of_safemode(ctx, config):
-    """
-    A Hadoop NameNode will stay in safe mode for 30 seconds by default.
-    This method blocks until the NameNode is out of safe mode.
-    """
-    if config.get('hdfs'):
-        log.info('Waiting for the Namenode to exit safe mode...')
-
-        master = _get_master(ctx)
-        remote, _ = master
-        remote.run(
-            args=["{tdir}/apache_hadoop/bin/hadoop".format(
-                  tdir=teuthology.get_testdir(ctx)),
-                  "dfsadmin",
-                  "-safemode",
-                  "wait"],
-            wait=True,
-        )
-    else:
-        pass
-
-    try:
-        yield
-    finally:
-        pass
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Set up and tear down a Hadoop cluster.
-
-    This depends on either having ceph installed prior to hadoop, like so:
-
-    roles:
-    - [mon.0, mds.0, osd.0, hadoop.master.0]
-    - [mon.1, osd.1, hadoop.slave.0]
-    - [mon.2, hadoop.slave.1]
-
-    tasks:
-    - ceph:
-    - hadoop:
-
-    Or if you want to use HDFS under Hadoop, this will configure Hadoop
-    for HDFS and start it along with MapReduce. Note that it does not
-    require Ceph be installed.
-
-    roles:
-    - [hadoop.master.0]
-    - [hadoop.slave.0]
-    - [hadoop.slave.1]
-
-    tasks:
-    - hadoop:
-        hdfs: True
-
-    This task requires exactly one hadoop.master be specified
-    and at least one hadoop.slave.
-
-    This does *not* do anything with the Hadoop setup. To run wordcount,
-    you could use pexec like so (after the hadoop task):
-
-    - pexec:
-        hadoop.slave.0:
-          - mkdir -p /tmp/hadoop_input
-          - wget http://ceph.com/qa/hadoop_input_files.tar -O /tmp/hadoop_input/files.tar
-          - cd /tmp/hadoop_input/; tar -xf /tmp/hadoop_input/files.tar
-          - {tdir}/hadoop/bin/hadoop fs -mkdir wordcount_input
-          - {tdir}/hadoop/bin/hadoop fs -put /tmp/hadoop_input/*txt wordcount_input/
-          - {tdir}/hadoop/bin/hadoop jar {tdir}/hadoop/build/hadoop-example*jar wordcount wordcount_input wordcount_output
-          - rm -rf /tmp/hadoop_input
-
-    Note: {tdir} in the above example is the teuthology test directory.
-    """
-
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        "task hadoop only supports a dictionary for configuration"
-
-    dist = 'precise'
-    format_type = 'jar'
-    arch = 'x86_64'
-    flavor = config.get('flavor', 'basic')
-
-    ctx.summary['flavor'] = flavor
-
-    overrides = ctx.config.get('overrides', {})
-    teuthology.deep_merge(config, overrides.get('hadoop', {}))
-
-    apache_branch = None
-    if config.get('apache_hadoop_branch') is not None:
-        apache_branch = config.get('apache_hadoop_branch')
-    else:
-        apache_branch = 'branch-1.0'  # hadoop branch to acquire
-
-    inktank_branch = None
-    if config.get('inktank_hadoop_branch') is not None:
-        inktank_branch = config.get('inktank_hadoop_branch')
-    else:
-        inktank_branch = 'cephfs/branch-1.0'  # default branch name
-
-    # replace any '/' with a '_' to match the artifact paths
-    inktank_branch = inktank_branch.replace('/', '_')
-    apache_branch = apache_branch.replace('/', '_')
-
-    with contextutil.nested(
-        lambda: validate_cluster(ctx=ctx),
-        lambda: binaries(ctx=ctx, config=dict(
-                tag=config.get('tag'),
-                sha1=config.get('sha1'),
-                path=config.get('path'),
-                flavor=flavor,
-                dist=config.get('dist', dist),
-                format=format_type,
-                arch=arch,
-                apache_branch=apache_branch,
-                inktank_branch=inktank_branch,
-                )),
-        lambda: configure_hadoop(ctx=ctx, config=config),
-        lambda: start_hadoop(ctx=ctx, config=config),
-        lambda: out_of_safemode(ctx=ctx, config=config),
-        ):
-        yield
diff --git a/teuthology/task/install.py b/teuthology/task/install.py
deleted file mode 100644 (file)
index 42544f6..0000000
+++ /dev/null
@@ -1,1230 +0,0 @@
-from cStringIO import StringIO
-
-import contextlib
-import copy
-import logging
-import time
-import os
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from teuthology.parallel import parallel
-from ..orchestra import run
-from ..orchestra.run import CommandFailedError
-
-log = logging.getLogger(__name__)
-
-# Should the RELEASE value get extracted from somewhere?
-RELEASE = "1-0"
-
-# This is intended to be a complete listing of ceph packages. If we're going
-# to hardcode this stuff, I don't want to do it in more than once place.
-PACKAGES = {}
-PACKAGES['ceph'] = {}
-PACKAGES['ceph']['deb'] = [
-    'ceph',
-    'ceph-dbg',
-    'ceph-mds',
-    'ceph-mds-dbg',
-    'ceph-common',
-    'ceph-common-dbg',
-    'ceph-fuse',
-    'ceph-fuse-dbg',
-    'ceph-test',
-    'ceph-test-dbg',
-    'radosgw',
-    'radosgw-dbg',
-    'python-ceph',
-    'libcephfs1',
-    'libcephfs1-dbg',
-    'libcephfs-java',
-    'librados2',
-    'librados2-dbg',
-    'librbd1',
-    'librbd1-dbg',
-]
-PACKAGES['ceph']['rpm'] = [
-    'ceph-debuginfo',
-    'ceph-radosgw',
-    'ceph-test',
-    'ceph-devel',
-    'ceph',
-    'ceph-fuse',
-    'rest-bench',
-    'libcephfs_jni1',
-    'libcephfs1',
-    'python-ceph',
-]
-
-
-def _run_and_log_error_if_fails(remote, args):
-    """
-    Yet another wrapper around command execution. This one runs a command on
-    the given remote, then, if execution fails, logs the error and re-raises.
-
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param args: list of arguments comprising the command the be executed
-    :returns: None
-    :raises: CommandFailedError
-    """
-    response = StringIO()
-    try:
-        remote.run(
-            args=args,
-            stdout=response,
-            stderr=response,
-        )
-    except CommandFailedError:
-        log.error(response.getvalue().strip())
-        raise
-
-
-def _get_config_value_for_remote(ctx, remote, config, key):
-    """
-    Look through config, and attempt to determine the "best" value to use for a
-    given key. For example, given:
-
-        config = {
-            'all':
-                {'branch': 'master'},
-            'branch': 'next'
-        }
-        _get_config_value_for_remote(ctx, remote, config, 'branch')
-
-    would return 'master'.
-
-    :param ctx: the argparse.Namespace object
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param config: the config dict
-    :param key: the name of the value to retrieve
-    """
-    roles = ctx.cluster.remotes[remote]
-    if 'all' in config:
-        return config['all'].get(key)
-    elif roles:
-        for role in roles:
-            if role in config and key in config[role]:
-                return config[role].get(key)
-    return config.get(key)
-
-
-def _get_baseurlinfo_and_dist(ctx, remote, config):
-    """
-    Through various commands executed on the remote, determines the
-    distribution name and version in use, as well as the portion of the repo
-    URI to use to specify which version of the project (normally ceph) to
-    install.Example:
-
-        {'arch': 'x86_64',
-        'dist': 'raring',
-        'dist_release': None,
-        'distro': 'Ubuntu',
-        'distro_release': None,
-        'flavor': 'basic',
-        'relval': '13.04',
-        'uri': 'ref/master'}
-
-    :param ctx: the argparse.Namespace object
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param config: the config dict
-    :returns: dict -- the information you want.
-    """
-    retval = {}
-    relval = None
-    r = remote.run(
-        args=['arch'],
-        stdout=StringIO(),
-    )
-    retval['arch'] = r.stdout.getvalue().strip()
-    r = remote.run(
-        args=['lsb_release', '-is'],
-        stdout=StringIO(),
-    )
-    retval['distro'] = r.stdout.getvalue().strip()
-    r = remote.run(
-        args=[
-            'lsb_release', '-rs'], stdout=StringIO())
-    retval['relval'] = r.stdout.getvalue().strip()
-    dist_name = None
-    if ((retval['distro'] == 'CentOS') | (retval['distro'] == 'RedHatEnterpriseServer')):
-        relval = retval['relval']
-        relval = relval[0:relval.find('.')]
-        distri = 'centos'
-        retval['distro_release'] = '%s%s' % (distri, relval)
-        retval['dist'] = retval['distro_release']
-        dist_name = 'el'
-        retval['dist_release'] = '%s%s' % (dist_name, relval)
-    elif retval['distro'] == 'Fedora':
-        distri = retval['distro']
-        dist_name = 'fc'
-        retval['distro_release'] = '%s%s' % (dist_name, retval['relval'])
-        retval['dist'] = retval['dist_release'] = retval['distro_release']
-    else:
-        r = remote.run(
-            args=['lsb_release', '-sc'],
-            stdout=StringIO(),
-        )
-        retval['dist'] = r.stdout.getvalue().strip()
-        retval['distro_release'] = None
-        retval['dist_release'] = None
-
-    # branch/tag/sha1 flavor
-    retval['flavor'] = config.get('flavor', 'basic')
-
-    uri = None
-    log.info('config is %s', config)
-    tag = _get_config_value_for_remote(ctx, remote, config, 'tag')
-    branch = _get_config_value_for_remote(ctx, remote, config, 'branch')
-    sha1 = _get_config_value_for_remote(ctx, remote, config, 'sha1')
-    if tag:
-        uri = 'ref/' + tag
-    elif branch:
-        uri = 'ref/' + branch
-    elif sha1:
-        uri = 'sha1/' + sha1
-    else:
-        # FIXME: Should master be the default?
-        log.debug("defaulting to master branch")
-        uri = 'ref/master'
-    retval['uri'] = uri
-
-    return retval
-
-
-def _get_baseurl(ctx, remote, config):
-    """
-    Figures out which package repo base URL to use.
-
-    Example:
-        'http://gitbuilder.ceph.com/ceph-deb-raring-x86_64-basic/ref/master'
-    :param ctx: the argparse.Namespace object
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param config: the config dict
-    :returns: str -- the URL
-    """
-    # get distro name and arch
-    baseparms = _get_baseurlinfo_and_dist(ctx, remote, config)
-    base_url = 'http://{host}/{proj}-{pkg_type}-{dist}-{arch}-{flavor}/{uri}'.format(
-        host=ctx.teuthology_config.get('gitbuilder_host',
-                                       'gitbuilder.ceph.com'),
-        proj=config.get('project', 'ceph'),
-        pkg_type=remote.system_type,
-        **baseparms
-    )
-    return base_url
-
-
-class VersionNotFoundError(Exception):
-
-    def __init__(self, url):
-        self.url = url
-
-    def __str__(self):
-        return "Failed to fetch package version from %s" % self.url
-
-
-def _block_looking_for_package_version(remote, base_url, wait=False):
-    """
-    Look for, and parse, a file called 'version' in base_url.
-
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param wait: wait forever for the file to show up. (default False)
-    :returns: str -- the version e.g. '0.67-240-g67a95b9-1raring'
-    :raises: VersionNotFoundError
-    """
-    while True:
-        r = remote.run(
-            args=['wget', '-q', '-O-', base_url + '/version'],
-            stdout=StringIO(),
-            check_status=False,
-        )
-        if r.exitstatus != 0:
-            if wait:
-                log.info('Package not there yet, waiting...')
-                time.sleep(15)
-                continue
-            raise VersionNotFoundError(base_url)
-        break
-    version = r.stdout.getvalue().strip()
-    return version
-
-def _get_local_dir(config, remote):
-    """
-    Extract local directory name from the task lists.
-    Copy files over to the remote site.
-    """
-    ldir = config.get('local', None)
-    if ldir:
-        remote.run(args=['sudo', 'mkdir', '-p', ldir,])
-        for fyle in os.listdir(ldir):
-            fname = "%s/%s" % (ldir, fyle)
-            teuthology.sudo_write_file(remote, fname, open(fname).read(), '644')
-    return ldir
-
-def _update_deb_package_list_and_install(ctx, remote, debs, config):
-    """
-    Runs ``apt-get update`` first, then runs ``apt-get install``, installing
-    the requested packages on the remote system.
-
-    TODO: split this into at least two functions.
-
-    :param ctx: the argparse.Namespace object
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param debs: list of packages names to install
-    :param config: the config dict
-    """
-
-    # check for ceph release key
-    r = remote.run(
-        args=[
-            'sudo', 'apt-key', 'list', run.Raw('|'), 'grep', 'Ceph',
-        ],
-        stdout=StringIO(),
-        check_status=False,
-    )
-    if r.stdout.getvalue().find('Ceph automated package') == -1:
-        # if it doesn't exist, add it
-        remote.run(
-            args=[
-                'wget', '-q', '-O-',
-                'https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/autobuild.asc',
-                run.Raw('|'),
-                'sudo', 'apt-key', 'add', '-',
-            ],
-            stdout=StringIO(),
-        )
-
-    baseparms = _get_baseurlinfo_and_dist(ctx, remote, config)
-    log.info("Installing packages: {pkglist} on remote deb {arch}".format(
-        pkglist=", ".join(debs), arch=baseparms['arch'])
-    )
-    # get baseurl
-    base_url = _get_baseurl(ctx, remote, config)
-    log.info('Pulling from %s', base_url)
-
-    # get package version string
-    # FIXME this is a terrible hack.
-    while True:
-        r = remote.run(
-            args=[
-                'wget', '-q', '-O-', base_url + '/version',
-            ],
-            stdout=StringIO(),
-            check_status=False,
-        )
-        if r.exitstatus != 0:
-            if config.get('wait_for_package'):
-                log.info('Package not there yet, waiting...')
-                time.sleep(15)
-                continue
-            raise Exception('failed to fetch package version from %s' %
-                            base_url + '/version')
-        version = r.stdout.getvalue().strip()
-        log.info('Package version is %s', version)
-        break
-
-    remote.run(
-        args=[
-            'echo', 'deb', base_url, baseparms['dist'], 'main',
-            run.Raw('|'),
-            'sudo', 'tee', '/etc/apt/sources.list.d/{proj}.list'.format(
-                proj=config.get('project', 'ceph')),
-        ],
-        stdout=StringIO(),
-    )
-    remote.run(
-        args=[
-            'sudo', 'apt-get', 'update', run.Raw('&&'),
-            'sudo', 'DEBIAN_FRONTEND=noninteractive', 'apt-get', '-y', '--force-yes',
-            '-o', run.Raw('Dpkg::Options::="--force-confdef"'), '-o', run.Raw(
-                'Dpkg::Options::="--force-confold"'),
-            'install',
-        ] + ['%s=%s' % (d, version) for d in debs],
-        stdout=StringIO(),
-    )
-    ldir = _get_local_dir(config, remote)
-    if ldir:
-        for fyle in os.listdir(ldir):
-            fname = "%s/%s" % (ldir, fyle)
-            remote.run(args=['sudo', 'dpkg', '-i', fname],)
-
-
-def _yum_fix_repo_priority(remote, project, uri):
-    """
-    On the remote, 'priority=1' lines to each enabled repo in:
-
-        /etc/yum.repos.d/{project}.repo
-
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param project: the project whose repos need modification
-    """
-    remote.run(
-        args=[
-            'sudo',
-            'sed',
-            '-i',
-            '-e',
-            run.Raw(
-                '\':a;N;$!ba;s/enabled=1\\ngpg/enabled=1\\npriority=1\\ngpg/g\''),
-            '-e',
-            run.Raw("'s;ref/[a-zA-Z0-9_]*/;{uri}/;g'".format(uri=uri)),
-            '/etc/yum.repos.d/%s.repo' % project,
-        ]
-    )
-
-
-def _update_rpm_package_list_and_install(ctx, remote, rpm, config):
-    """
-    Installs the ceph-release package for the relevant branch, then installs
-    the requested packages on the remote system.
-
-    TODO: split this into at least two functions.
-
-    :param ctx: the argparse.Namespace object
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param rpm: list of packages names to install
-    :param config: the config dict
-    """
-    baseparms = _get_baseurlinfo_and_dist(ctx, remote, config)
-    log.info("Installing packages: {pkglist} on remote rpm {arch}".format(
-        pkglist=", ".join(rpm), arch=baseparms['arch']))
-    host = ctx.teuthology_config.get('gitbuilder_host',
-                                     'gitbuilder.ceph.com')
-    dist_release = baseparms['dist_release']
-    start_of_url = 'http://{host}/ceph-rpm-{distro_release}-{arch}-{flavor}/{uri}'.format(
-        host=host, **baseparms)
-    ceph_release = 'ceph-release-{release}.{dist_release}.noarch'.format(
-        release=RELEASE, dist_release=dist_release)
-    rpm_name = "{rpm_nm}.rpm".format(rpm_nm=ceph_release)
-    base_url = "{start_of_url}/noarch/{rpm_name}".format(
-        start_of_url=start_of_url, rpm_name=rpm_name)
-    err_mess = StringIO()
-    try:
-        # When this was one command with a pipe, it would sometimes
-        # fail with the message 'rpm: no packages given for install'
-        remote.run(args=['wget', base_url, ],)
-        remote.run(args=['sudo', 'rpm', '-i', rpm_name, ], stderr=err_mess, )
-    except Exception:
-        cmp_msg = 'package {pkg} is already installed'.format(
-            pkg=ceph_release)
-        if cmp_msg != err_mess.getvalue().strip():
-            raise
-
-    remote.run(args=['rm', '-f', rpm_name])
-
-    # Fix Repo Priority
-    uri = baseparms['uri']
-    _yum_fix_repo_priority(remote, config.get('project', 'ceph'), uri)
-
-    remote.run(
-        args=[
-            'sudo', 'yum', 'clean', 'all',
-        ])
-    version_no = StringIO()
-    version_url = "{start_of_url}/version".format(start_of_url=start_of_url)
-    while True:
-        r = remote.run(args=['wget', '-q', '-O-', version_url, ],
-                       stdout=version_no, check_status=False)
-        if r.exitstatus != 0:
-            if config.get('wait_for_package'):
-                log.info('Package not there yet, waiting...')
-                time.sleep(15)
-                continue
-            raise Exception('failed to fetch package version from %s' %
-                            version_url)
-        version = r.stdout.getvalue().strip()
-        log.info('Package version is %s', version)
-        break
-
-    tmp_vers = version_no.getvalue().strip()[1:]
-    if '-' in tmp_vers:
-        tmp_vers = tmp_vers.split('-')[0]
-    ldir = _get_local_dir(config, remote)
-    for cpack in rpm:
-        pk_err_mess = StringIO()
-        pkg2add = "{cpack}-{version}".format(cpack=cpack, version=tmp_vers)
-        pkg = None
-        if ldir:
-            pkg = "{ldir}/{cpack}-{trailer}".format(ldir=ldir, cpack=cpack, trailer=tmp_vers)
-            remote.run(
-                args = ['if', 'test', '-e',
-                        run.Raw(pkg), run.Raw(';'), 'then',
-                        'sudo', 'yum', 'remove', pkg, '-y', run.Raw(';'),
-                        'sudo', 'yum', 'install', pkg, '-y',
-                        run.Raw(';'), 'fi']
-            )
-        if pkg is None:
-            remote.run(args=['sudo', 'yum', 'install', pkg2add, '-y', ],
-                    stderr=pk_err_mess)
-        else:
-            remote.run(
-                args = ['if', 'test', run.Raw('!'), '-e',
-                        run.Raw(pkg), run.Raw(';'), 'then',
-                        'sudo', 'yum', 'install', pkg2add, '-y',
-                        run.Raw(';'), 'fi'])
-
-
-def purge_data(ctx):
-    """
-    Purge /var/lib/ceph on every remote in ctx.
-
-    :param ctx: the argparse.Namespace object
-    """
-    with parallel() as p:
-        for remote in ctx.cluster.remotes.iterkeys():
-            p.spawn(_purge_data, remote)
-
-
-def _purge_data(remote):
-    """
-    Purge /var/lib/ceph on remote.
-
-    :param remote: the teuthology.orchestra.remote.Remote object
-    """
-    log.info('Purging /var/lib/ceph on %s', remote)
-    remote.run(args=[
-        'sudo',
-        'rm', '-rf', '--one-file-system', '--', '/var/lib/ceph',
-        run.Raw('||'),
-        'true',
-        run.Raw(';'),
-        'test', '-d', '/var/lib/ceph',
-        run.Raw('&&'),
-        'sudo',
-        'find', '/var/lib/ceph',
-        '-mindepth', '1',
-        '-maxdepth', '2',
-        '-type', 'd',
-        '-exec', 'umount', '{}', ';',
-        run.Raw(';'),
-        'sudo',
-        'rm', '-rf', '--one-file-system', '--', '/var/lib/ceph',
-    ])
-
-
-def install_packages(ctx, pkgs, config):
-    """
-    Installs packages on each remote in ctx.
-
-    :param ctx: the argparse.Namespace object
-    :param pkgs: list of packages names to install
-    :param config: the config dict
-    """
-    install_pkgs = {
-        "deb": _update_deb_package_list_and_install,
-        "rpm": _update_rpm_package_list_and_install,
-    }
-    with parallel() as p:
-        for remote in ctx.cluster.remotes.iterkeys():
-            system_type = teuthology.get_system_type(remote)
-            p.spawn(
-                install_pkgs[system_type],
-                ctx, remote, pkgs[system_type], config)
-
-
-def _remove_deb(ctx, config, remote, debs):
-    """
-    Removes Debian packages from remote, rudely
-
-    TODO: be less rude (e.g. using --force-yes)
-
-    :param ctx: the argparse.Namespace object
-    :param config: the config dict
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param debs: list of packages names to install
-    """
-    log.info("Removing packages: {pkglist} on Debian system.".format(
-        pkglist=", ".join(debs)))
-    # first ask nicely
-    remote.run(
-        args=[
-            'for', 'd', 'in',
-        ] + debs + [
-            run.Raw(';'),
-            'do',
-            'sudo', 'DEBIAN_FRONTEND=noninteractive', 'apt-get', '-y', '--force-yes',
-            '-o', run.Raw('Dpkg::Options::="--force-confdef"'), '-o', run.Raw(
-                'Dpkg::Options::="--force-confold"'), 'purge',
-            run.Raw('$d'),
-            run.Raw('||'),
-            'true',
-            run.Raw(';'),
-            'done',
-        ])
-    # mop up anything that is broken
-    remote.run(
-        args=[
-            'dpkg', '-l',
-            run.Raw('|'),
-            'grep', '^.HR',
-            run.Raw('|'),
-            'awk', '{print $2}',
-            run.Raw('|'),
-            'sudo',
-            'xargs', '--no-run-if-empty',
-            'dpkg', '-P', '--force-remove-reinstreq',
-        ])
-    # then let apt clean up
-    remote.run(
-        args=[
-            'sudo', 'DEBIAN_FRONTEND=noninteractive', 'apt-get', '-y', '--force-yes',
-            '-o', run.Raw('Dpkg::Options::="--force-confdef"'), '-o', run.Raw(
-                'Dpkg::Options::="--force-confold"'),
-            'autoremove',
-        ],
-        stdout=StringIO(),
-    )
-
-
-def _remove_rpm(ctx, config, remote, rpm):
-    """
-    Removes RPM packages from remote
-
-    :param ctx: the argparse.Namespace object
-    :param config: the config dict
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param rpm: list of packages names to remove
-    """
-    log.info("Removing packages: {pkglist} on rpm system.".format(
-        pkglist=", ".join(rpm)))
-    baseparms = _get_baseurlinfo_and_dist(ctx, remote, config)
-    dist_release = baseparms['dist_release']
-    remote.run(
-        args=[
-            'for', 'd', 'in',
-        ] + rpm + [
-            run.Raw(';'),
-            'do',
-            'sudo', 'yum', 'remove',
-            run.Raw('$d'),
-            '-y',
-            run.Raw('||'),
-            'true',
-            run.Raw(';'),
-            'done',
-        ])
-    remote.run(
-        args=[
-            'sudo', 'yum', 'clean', 'all',
-        ])
-    projRelease = '%s-release-%s.%s.noarch' % (
-        config.get('project', 'ceph'), RELEASE, dist_release)
-    remote.run(args=['sudo', 'yum', 'erase', projRelease, '-y'])
-    remote.run(
-        args=[
-            'sudo', 'yum', 'clean', 'expire-cache',
-        ])
-
-
-def remove_packages(ctx, config, pkgs):
-    """
-    Removes packages from each remote in ctx.
-
-    :param ctx: the argparse.Namespace object
-    :param config: the config dict
-    :param pkgs: list of packages names to remove
-    """
-    remove_pkgs = {
-        "deb": _remove_deb,
-        "rpm": _remove_rpm,
-    }
-    with parallel() as p:
-        for remote in ctx.cluster.remotes.iterkeys():
-            system_type = teuthology.get_system_type(remote)
-            p.spawn(remove_pkgs[
-                    system_type], ctx, config, remote, pkgs[system_type])
-
-
-def _remove_sources_list_deb(remote, proj):
-    """
-    Removes /etc/apt/sources.list.d/{proj}.list and then runs ``apt-get
-    update``.
-
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param proj: the project whose sources.list needs removing
-    """
-    remote.run(
-        args=[
-            'sudo', 'rm', '-f', '/etc/apt/sources.list.d/{proj}.list'.format(
-                proj=proj),
-            run.Raw('&&'),
-            'sudo', 'apt-get', 'update',
-            # ignore failure
-            run.Raw('||'),
-            'true',
-        ],
-        stdout=StringIO(),
-    )
-
-
-def _remove_sources_list_rpm(remote, proj):
-    """
-    Removes /etc/yum.repos.d/{proj}.repo, /var/lib/{proj}, and /var/log/{proj}.
-
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param proj: the project whose sources.list needs removing
-    """
-    remote.run(
-        args=[
-            'sudo', 'rm', '-f', '/etc/yum.repos.d/{proj}.repo'.format(
-                proj=proj),
-            run.Raw('||'),
-            'true',
-        ],
-        stdout=StringIO(),
-    )
-    # FIXME
-    # There probably should be a way of removing these files that is
-    # implemented in the yum/rpm remove procedures for the ceph package.
-    # FIXME but why is this function doing these things?
-    remote.run(
-        args=[
-            'sudo', 'rm', '-fr', '/var/lib/{proj}'.format(proj=proj),
-            run.Raw('||'),
-            'true',
-        ],
-        stdout=StringIO(),
-    )
-    remote.run(
-        args=[
-            'sudo', 'rm', '-fr', '/var/log/{proj}'.format(proj=proj),
-            run.Raw('||'),
-            'true',
-        ],
-        stdout=StringIO(),
-    )
-
-
-def remove_sources(ctx, config):
-    """
-    Removes repo source files from each remote in ctx.
-
-    :param ctx: the argparse.Namespace object
-    :param config: the config dict
-    """
-    remove_sources_pkgs = {
-        'deb': _remove_sources_list_deb,
-        'rpm': _remove_sources_list_rpm,
-    }
-    log.info("Removing {proj} sources lists".format(
-        proj=config.get('project', 'ceph')))
-    with parallel() as p:
-        for remote in ctx.cluster.remotes.iterkeys():
-            system_type = teuthology.get_system_type(remote)
-            p.spawn(remove_sources_pkgs[
-                    system_type], remote, config.get('project', 'ceph'))
-            p.spawn(remove_sources_pkgs[
-                    system_type], remote, 'calamari')
-
-deb_packages = {'ceph': [
-    'ceph',
-    'ceph-dbg',
-    'ceph-mds',
-    'ceph-mds-dbg',
-    'ceph-common',
-    'ceph-common-dbg',
-    'ceph-fuse',
-    'ceph-fuse-dbg',
-    'ceph-test',
-    'ceph-test-dbg',
-    'radosgw',
-    'radosgw-dbg',
-    'python-ceph',
-    'libcephfs1',
-    'libcephfs1-dbg',
-]}
-
-rpm_packages = {'ceph': [
-    'ceph-debuginfo',
-    'ceph-radosgw',
-    'ceph-test',
-    'ceph-devel',
-    'ceph',
-    'ceph-fuse',
-    'rest-bench',
-    'libcephfs_jni1',
-    'libcephfs1',
-    'python-ceph',
-]}
-
-
-@contextlib.contextmanager
-def install(ctx, config):
-    """
-    The install task. Installs packages for a given project on all hosts in
-    ctx. May work for projects besides ceph, but may not. Patches welcomed!
-
-    :param ctx: the argparse.Namespace object
-    :param config: the config dict
-    """
-
-    project = config.get('project', 'ceph')
-
-    global deb_packages
-    global rpm_packages
-    debs = deb_packages.get(project, [])
-    rpm = rpm_packages.get(project, [])
-
-    # pull any additional packages out of config
-    extra_pkgs = config.get('extra_packages')
-    log.info('extra packages: {packages}'.format(packages=extra_pkgs))
-    debs += extra_pkgs
-    rpm += extra_pkgs
-
-    # the extras option right now is specific to the 'ceph' project
-    extras = config.get('extras')
-    if extras is not None:
-        debs = ['ceph-test', 'ceph-test-dbg', 'ceph-fuse', 'ceph-fuse-dbg',
-                'librados2', 'librados2-dbg', 'librbd1', 'librbd1-dbg', 'python-ceph']
-        rpm = ['ceph-fuse', 'librbd1', 'librados2', 'ceph-test', 'python-ceph']
-
-    # install lib deps (so we explicitly specify version), but do not
-    # uninstall them, as other packages depend on them (e.g., kvm)
-    proj_install_debs = {'ceph': [
-        'librados2',
-        'librados2-dbg',
-        'librbd1',
-        'librbd1-dbg',
-    ]}
-
-    proj_install_rpm = {'ceph': [
-        'librbd1',
-        'librados2',
-    ]}
-
-    install_debs = proj_install_debs.get(project, [])
-    install_rpm = proj_install_rpm.get(project, [])
-
-    install_info = {
-        "deb": debs + install_debs,
-        "rpm": rpm + install_rpm}
-    remove_info = {
-        "deb": debs,
-        "rpm": rpm}
-    install_packages(ctx, install_info, config)
-    try:
-        yield
-    finally:
-        remove_packages(ctx, config, remove_info)
-        remove_sources(ctx, config)
-        if project == 'ceph':
-            purge_data(ctx)
-
-
-def _upgrade_deb_packages(ctx, config, remote, debs):
-    """
-    Upgrade project's packages on remote Debian host
-    Before doing so, installs the project's GPG key, writes a sources.list
-    file, and runs ``apt-get update``.
-
-    :param ctx: the argparse.Namespace object
-    :param config: the config dict
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param debs: the Debian packages to be installed
-    :param branch: the branch of the project to be used
-    """
-    # check for ceph release key
-    r = remote.run(
-        args=[
-            'sudo', 'apt-key', 'list', run.Raw('|'), 'grep', 'Ceph',
-        ],
-        stdout=StringIO(),
-        check_status=False,
-    )
-    if r.stdout.getvalue().find('Ceph automated package') == -1:
-        # if it doesn't exist, add it
-        remote.run(
-            args=[
-                'wget', '-q', '-O-',
-                'https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/autobuild.asc',
-                run.Raw('|'),
-                'sudo', 'apt-key', 'add', '-',
-            ],
-            stdout=StringIO(),
-        )
-
-    # get distro name and arch
-    r = remote.run(
-        args=['lsb_release', '-sc'],
-        stdout=StringIO(),
-    )
-    dist = r.stdout.getvalue().strip()
-    r = remote.run(
-        args=['arch'],
-        stdout=StringIO(),
-    )
-    arch = r.stdout.getvalue().strip()
-    log.info("dist %s arch %s", dist, arch)
-
-    # branch/tag/sha1 flavor
-    flavor = 'basic'
-    if 'sha1' in config:
-        uri = 'sha1/' + config.get('sha1')
-    elif 'branch' in config:
-        uri = 'ref/' + config.get('branch')
-    elif 'tag' in config:
-        uri = 'ref/' + config.get('tag')
-    base_url = 'http://{host}/{proj}-deb-{dist}-{arch}-{flavor}/{uri}'.format(
-        host=ctx.teuthology_config.get('gitbuilder_host',
-                                       'gitbuilder.ceph.com'),
-        proj=config.get('project', 'ceph'),
-        dist=dist,
-        arch=arch,
-        flavor=flavor,
-        uri=uri,
-    )
-    log.info('Pulling from %s', base_url)
-
-    # get package version string
-    while True:
-        r = remote.run(
-            args=[
-                'wget', '-q', '-O-', base_url + '/version',
-            ],
-            stdout=StringIO(),
-            check_status=False,
-        )
-        if r.exitstatus != 0:
-            if config.get('wait_for_package'):
-                log.info('Package not there yet, waiting...')
-                time.sleep(15)
-                continue
-            raise Exception('failed to fetch package version from %s' %
-                            base_url + '/version')
-        version = r.stdout.getvalue().strip()
-        log.info('Package version is %s', version)
-        break
-    remote.run(
-        args=[
-            'echo', 'deb', base_url, dist, 'main',
-            run.Raw('|'),
-            'sudo', 'tee', '/etc/apt/sources.list.d/{proj}.list'.format(
-                proj=config.get('project', 'ceph')),
-        ],
-        stdout=StringIO(),
-    )
-    remote.run(
-        args=[
-            'sudo', 'apt-get', 'update', run.Raw('&&'),
-            'sudo', 'DEBIAN_FRONTEND=noninteractive', 'apt-get', '-y', '--force-yes',
-            '-o', run.Raw('Dpkg::Options::="--force-confdef"'), '-o', run.Raw(
-                'Dpkg::Options::="--force-confold"'),
-            'install',
-        ] + ['%s=%s' % (d, version) for d in debs],
-        stdout=StringIO(),
-    )
-
-
-def _upgrade_rpm_packages(ctx, config, remote, pkgs):
-    """
-    Upgrade project's packages on remote RPM-based host
-    Before doing so, it makes sure the project's -release RPM is installed -
-    removing any previous version first.
-
-    :param ctx: the argparse.Namespace object
-    :param config: the config dict
-    :param remote: the teuthology.orchestra.remote.Remote object
-    :param pkgs: the RPM packages to be installed
-    :param branch: the branch of the project to be used
-    """
-    distinfo = _get_baseurlinfo_and_dist(ctx, remote, config)
-    log.info(
-        "Host {host} is: {distro} {ver} {arch}".format(
-            host=remote.shortname,
-            distro=distinfo['distro'],
-            ver=distinfo['relval'],
-            arch=distinfo['arch'],)
-    )
-
-    base_url = _get_baseurl(ctx, remote, config)
-    log.info('Repo base URL: %s', base_url)
-    version = _block_looking_for_package_version(
-        remote,
-        base_url,
-        config.get('wait_for_package', False))
-    # FIXME: 'version' as retreived from the repo is actually the RPM version
-    # PLUS *part* of the release. Example:
-    # Right now, ceph master is given the following version in the repo file:
-    # v0.67-rc3.164.gd5aa3a9 - whereas in reality the RPM version is 0.61.7
-    # and the release is 37.g1243c97.el6 (for centos6).
-    # Point being, I have to mangle a little here.
-    if version[0] == 'v':
-        version = version[1:]
-    if '-' in version:
-        version = version.split('-')[0]
-    project = config.get('project', 'ceph')
-
-    # Remove the -release package before upgrading it
-    args = ['sudo', 'rpm', '-ev', '%s-release' % project]
-    _run_and_log_error_if_fails(remote, args)
-
-    # Build the new -release package path
-    release_rpm = "{base}/noarch/{proj}-release-{release}.{dist_release}.noarch.rpm".format(
-        base=base_url,
-        proj=project,
-        release=RELEASE,
-        dist_release=distinfo['dist_release'],
-    )
-
-    # Upgrade the -release package
-    args = ['sudo', 'rpm', '-Uv', release_rpm]
-    _run_and_log_error_if_fails(remote, args)
-    uri = _get_baseurlinfo_and_dist(ctx, remote, config)['uri']
-    _yum_fix_repo_priority(remote, project, uri)
-
-    remote.run(
-        args=[
-            'sudo', 'yum', 'clean', 'all',
-        ])
-
-    # Build a space-separated string consisting of $PKG-$VER for yum
-    pkgs_with_vers = ["%s-%s" % (pkg, version) for pkg in pkgs]
-
-    # Actually upgrade the project packages
-    # FIXME: This currently outputs nothing until the command is finished
-    # executing. That sucks; fix it.
-    args = ['sudo', 'yum', '-y', 'install']
-    args += pkgs_with_vers
-    _run_and_log_error_if_fails(remote, args)
-
-
-@contextlib.contextmanager
-def upgrade(ctx, config):
-    """
-    Upgrades packages for a given project.
-
-    For example::
-
-        tasks:
-        - install.upgrade:
-             all:
-                branch: end
-
-    or specify specific roles::
-
-        tasks:
-        - install.upgrade:
-             mon.a:
-                branch: end
-             osd.0:
-                branch: other
-
-    or rely on the overrides for the target version::
-
-        overrides:
-          install:
-            ceph:
-              sha1: ...
-        tasks:
-        - install.upgrade:
-            all:
-
-    (HACK: the overrides will *only* apply the sha1/branch/tag if those
-    keys are not present in the config.)
-
-    :param ctx: the argparse.Namespace object
-    :param config: the config dict
-    """
-    assert config is None or isinstance(config, dict), \
-        "install.upgrade only supports a dictionary for configuration"
-
-    for i in config.keys():
-            assert config.get(i) is None or isinstance(
-                config.get(i), dict), 'host supports dictionary'
-
-    project = config.get('project', 'ceph')
-
-    # use 'install' overrides here, in case the upgrade target is left
-    # unspecified/implicit.
-    install_overrides = ctx.config.get(
-        'overrides', {}).get('install', {}).get(project, {})
-    log.info('project %s config %s overrides %s', project, config, install_overrides)
-
-    # FIXME: extra_pkgs is not distro-agnostic
-    extra_pkgs = config.get('extra_packages', [])
-    log.info('extra packages: {packages}'.format(packages=extra_pkgs))
-
-    # build a normalized remote -> config dict
-    remotes = {}
-    if 'all' in config:
-        for remote in ctx.cluster.remotes.iterkeys():
-            remotes[remote] = config.get('all')
-    else:
-        for role in config.keys():
-            (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-            if remote in remotes:
-                log.warn('remote %s came up twice (role %s)', remote, role)
-                continue
-            remotes[remote] = config.get(role)
-
-    for remote, node in remotes.iteritems():
-        if not node:
-            node = {}
-
-        this_overrides = copy.deepcopy(install_overrides)
-        if 'sha1' in node or 'tag' in node or 'branch' in node:
-            log.info('config contains sha1|tag|branch, removing those keys from override')
-            this_overrides.pop('sha1', None)
-            this_overrides.pop('tag', None)
-            this_overrides.pop('branch', None)
-        teuthology.deep_merge(node, this_overrides)
-        log.info('remote %s config %s', remote, node)
-
-        system_type = teuthology.get_system_type(remote)
-        assert system_type in ('deb', 'rpm')
-        pkgs = PACKAGES[project][system_type]
-        log.info("Upgrading {proj} {system_type} packages: {pkgs}".format(
-            proj=project, system_type=system_type, pkgs=', '.join(pkgs)))
-            # FIXME: again, make extra_pkgs distro-agnostic
-        pkgs += extra_pkgs
-        node['project'] = project
-        if system_type == 'deb':
-            _upgrade_deb_packages(ctx, node, remote, pkgs)
-        elif system_type == 'rpm':
-            _upgrade_rpm_packages(ctx, node, remote, pkgs)
-
-    yield
-
-
-@contextlib.contextmanager
-def ship_utilities(ctx, config):
-    """
-    Write a copy of valgrind.supp to each of the remote sites.  Set executables used
-    by Ceph in /usr/local/bin.  When finished (upon exit of the teuthology run), remove
-    these files.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    assert config is None
-    testdir = teuthology.get_testdir(ctx)
-    filenames = []
-
-    log.info('Shipping valgrind.supp...')
-    with file(os.path.join(os.path.dirname(__file__), 'valgrind.supp'), 'rb') as f:
-        fn = os.path.join(testdir, 'valgrind.supp')
-        filenames.append(fn)
-        for rem in ctx.cluster.remotes.iterkeys():
-            teuthology.sudo_write_file(
-                remote=rem,
-                path=fn,
-                data=f,
-                )
-            f.seek(0)
-
-    FILES = ['daemon-helper', 'adjust-ulimits', 'kcon_most']
-    destdir = '/usr/bin'
-    for filename in FILES:
-        log.info('Shipping %r...', filename)
-        src = os.path.join(os.path.dirname(__file__), filename)
-        dst = os.path.join(destdir, filename)
-        filenames.append(dst)
-        with file(src, 'rb') as f:
-            for rem in ctx.cluster.remotes.iterkeys():
-                teuthology.sudo_write_file(
-                    remote=rem,
-                    path=dst,
-                    data=f,
-                )
-                f.seek(0)
-                rem.run(
-                    args=[
-                        'sudo',
-                        'chmod',
-                        'a=rx',
-                        '--',
-                        dst,
-                    ],
-                )
-
-    try:
-        yield
-    finally:
-        log.info('Removing shipped files: %s...', ' '.join(filenames))
-        run.wait(
-            ctx.cluster.run(
-                args=[
-                    'sudo',
-                    'rm',
-                    '-f',
-                    '--',
-                ] + list(filenames),
-                wait=False,
-            ),
-        )
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Install packages for a given project.
-
-    tasks:
-    - install:
-        project: ceph
-        branch: bar
-    - install:
-        project: samba
-        branch: foo
-        extra_packages: ['samba']
-
-    Overrides are project specific:
-
-    overrides:
-      install:
-        ceph:
-          sha1: ...
-
-    :param ctx: the argparse.Namespace object
-    :param config: the config dict
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        "task install only supports a dictionary for configuration"
-
-    project, = config.get('project', 'ceph'),
-    log.debug('project %s' % project)
-    overrides = ctx.config.get('overrides')
-    if overrides:
-        install_overrides = overrides.get('install', {})
-        teuthology.deep_merge(config, install_overrides.get(project, {}))
-    log.debug('config %s' % config)
-
-    # Flavor tells us what gitbuilder to fetch the prebuilt software
-    # from. It's a combination of possible keywords, in a specific
-    # order, joined by dashes. It is used as a URL path name. If a
-    # match is not found, the teuthology run fails. This is ugly,
-    # and should be cleaned up at some point.
-
-    flavor = config.get('flavor', 'basic')
-
-    if config.get('path'):
-        # local dir precludes any other flavors
-        flavor = 'local'
-    else:
-        if config.get('valgrind'):
-            log.info(
-                'Using notcmalloc flavor and running some daemons under valgrind')
-            flavor = 'notcmalloc'
-        else:
-            if config.get('coverage'):
-                log.info('Recording coverage for this run.')
-                flavor = 'gcov'
-
-    ctx.summary['flavor'] = flavor
-
-    with contextutil.nested(
-        lambda: install(ctx=ctx, config=dict(
-            branch=config.get('branch'),
-            tag=config.get('tag'),
-            sha1=config.get('sha1'),
-            flavor=flavor,
-            extra_packages=config.get('extra_packages', []),
-            extras=config.get('extras', None),
-            wait_for_package=ctx.config.get('wait_for_package', False),
-            project=project,
-        )),
-        lambda: ship_utilities(ctx=ctx, config=None),
-    ):
-        yield
diff --git a/teuthology/task/interactive.py b/teuthology/task/interactive.py
deleted file mode 100644 (file)
index dd1676e..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-Drop into a python shell
-"""
-import code
-import readline
-import rlcompleter
-rlcompleter.__name__ # silence pyflakes
-import pprint
-
-readline.parse_and_bind('tab: complete')
-
-def task(ctx, config):
-    """
-    Run an interactive Python shell, with the cluster accessible via
-    the ``ctx`` variable.
-
-    Hit ``control-D`` to continue.
-
-    This is also useful to pause the execution of the test between two
-    tasks, either to perform ad hoc operations, or to examine the
-    state of the cluster. You can also use it to easily bring up a
-    Ceph cluster for ad hoc testing.
-
-    For example::
-
-        tasks:
-        - ceph:
-        - interactive:
-    """
-
-    pp = pprint.PrettyPrinter().pprint
-    code.interact(
-        banner='Ceph test interactive mode, use ctx to interact with the cluster, press control-D to exit...',
-        # TODO simplify this
-        local=dict(
-            ctx=ctx,
-            config=config,
-            pp=pp,
-            ),
-        )
diff --git a/teuthology/task/internal.py b/teuthology/task/internal.py
deleted file mode 100644 (file)
index 533d98a..0000000
+++ /dev/null
@@ -1,595 +0,0 @@
-"""
-Internal tasks are tasks that are started from the teuthology infrastructure.
-Note that there is no corresponding task defined for this module.  All of
-the calls are made from other modules, most notably teuthology/run.py
-"""
-from cStringIO import StringIO
-import contextlib
-import gevent
-import logging
-import os
-import time
-import yaml
-import re
-import subprocess
-
-from teuthology import lockstatus
-from teuthology import lock
-from teuthology import misc as teuthology
-from teuthology.parallel import parallel
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def base(ctx, config):
-    """
-    Create the test directory that we will be using on the remote system
-    """
-    log.info('Creating test directory...')
-    testdir = teuthology.get_testdir(ctx)
-    run.wait(
-        ctx.cluster.run(
-            args=[
-                'mkdir', '-m0755', '--',
-                testdir,
-                ],
-            wait=False,
-            )
-        )
-    try:
-        yield
-    finally:
-        log.info('Tidying up after the test...')
-        # if this fails, one of the earlier cleanups is flawed; don't
-        # just cram an rm -rf here
-        run.wait(
-            ctx.cluster.run(
-                args=[
-                    'rmdir',
-                    '--',
-                    testdir,
-                    ],
-                wait=False,
-                ),
-            )
-
-
-@contextlib.contextmanager
-def lock_machines(ctx, config):
-    """
-    Lock machines.  Called when the teuthology run finds and locks
-    new machines.  This is not called if the one has teuthology-locked
-    machines and placed those keys in the Targets section of a yaml file.
-    """
-    log.info('Locking machines...')
-    assert isinstance(config[0], int), 'config[0] must be an integer'
-    machine_type = config[1]
-    machine_types = teuthology.get_multi_machine_types(machine_type)
-    how_many = config[0]
-
-    while True:
-        # make sure there are enough machines up
-        machines = lock.list_locks()
-        if machines is None:
-            if ctx.block:
-                log.warn('error listing machines, trying again')
-                time.sleep(20)
-                continue
-            else:
-                assert 0, 'error listing machines'
-
-        is_up = lambda machine: machine['up'] and machine['type'] in machine_types  # noqa
-        num_up = len(filter(is_up, machines))
-        assert num_up >= how_many, 'not enough machines are up'
-
-        # make sure there are machines for non-automated jobs to run
-        is_up_and_free = lambda machine: machine['up'] and machine['locked'] == 0 and machine['type'] in machine_types  # noqa
-        up_and_free = filter(is_up_and_free, machines)
-        num_free = len(up_and_free)
-        if num_free < 6 and ctx.owner.startswith('scheduled'):
-            if ctx.block:
-                log.info(
-                    'waiting for more machines to be free (need %s see %s)...',
-                    how_many,
-                    num_free,
-                )
-                time.sleep(10)
-                continue
-            else:
-                assert 0, 'not enough machines free'
-
-        newly_locked = lock.lock_many(ctx, how_many, machine_type, ctx.owner,
-                                      ctx.archive)
-        if len(newly_locked) == how_many:
-            vmlist = []
-            for lmach in newly_locked:
-                if teuthology.is_vm(lmach):
-                    vmlist.append(lmach)
-            if vmlist:
-                log.info('Waiting for virtual machines to come up')
-                keyscan_out = ''
-                loopcount = 0
-                while len(keyscan_out.splitlines()) != len(vmlist):
-                    loopcount += 1
-                    time.sleep(10)
-                    keyscan_out, current_locks = lock.keyscan_check(ctx,
-                                                                    vmlist)
-                    log.info('virtual machine is still unavailable')
-                    if loopcount == 40:
-                        loopcount = 0
-                        log.info('virtual machine(s) still not up, ' +
-                                 'recreating unresponsive ones.')
-                        for guest in vmlist:
-                            if guest not in keyscan_out:
-                                log.info('recreating: ' + guest)
-                                lock.destroy_if_vm(ctx, 'ubuntu@' + guest)
-                                lock.create_if_vm(ctx, 'ubuntu@' + guest)
-                if lock.update_keys(ctx, keyscan_out, current_locks):
-                    log.info("Error in virtual machine keys")
-                newscandict = {}
-                for dkey in newly_locked.iterkeys():
-                    stats = lockstatus.get_status(ctx, dkey)
-                    newscandict[dkey] = stats['sshpubkey']
-                ctx.config['targets'] = newscandict
-            else:
-                ctx.config['targets'] = newly_locked
-            # FIXME: Ugh.
-            log.info('\n  '.join(['Locked targets:', ] + yaml.safe_dump(ctx.config['targets'], default_flow_style=False).splitlines()))
-            break
-        elif not ctx.block:
-            assert 0, 'not enough machines are available'
-
-        log.warn('Could not lock enough machines, waiting...')
-        time.sleep(10)
-    try:
-        yield
-    finally:
-        if ctx.config.get('unlock_on_failure', False) or \
-           ctx.summary.get('success', False):
-            log.info('Unlocking machines...')
-            for machine in ctx.config['targets'].iterkeys():
-                lock.unlock_one(ctx, machine, ctx.owner)
-
-def save_config(ctx, config):
-    """
-    Store the config in a yaml file
-    """
-    log.info('Saving configuration')
-    if ctx.archive is not None:
-        with file(os.path.join(ctx.archive, 'config.yaml'), 'w') as f:
-            yaml.safe_dump(ctx.config, f, default_flow_style=False)
-
-def check_lock(ctx, config):
-    """
-    Check lock status of remote machines.
-    """
-    if ctx.config.get('check-locks') == False:
-        log.info('Lock checking disabled.')
-        return
-    log.info('Checking locks...')
-    for machine in ctx.config['targets'].iterkeys():
-        status = lockstatus.get_status(ctx, machine)
-        log.debug('machine status is %s', repr(status))
-        assert status is not None, \
-            'could not read lock status for {name}'.format(name=machine)
-        assert status['up'], 'machine {name} is marked down'.format(name=machine)
-        assert status['locked'], \
-            'machine {name} is not locked'.format(name=machine)
-        assert status['locked_by'] == ctx.owner, \
-            'machine {name} is locked by {user}, not {owner}'.format(
-            name=machine,
-            user=status['locked_by'],
-            owner=ctx.owner,
-            )
-
-@contextlib.contextmanager
-def timer(ctx, config):
-    """
-    Start the timer used by teuthology
-    """
-    log.info('Starting timer...')
-    start = time.time()
-    try:
-        yield
-    finally:
-        duration = time.time() - start
-        log.info('Duration was %f seconds', duration)
-        ctx.summary['duration'] = duration
-
-def connect(ctx, config):
-    """
-    Open a connection to a remote host.
-    """
-    log.info('Opening connections...')
-    from ..orchestra import remote
-    from ..orchestra import cluster
-    remotes = []
-    machs = []
-    for name in ctx.config['targets'].iterkeys():
-        machs.append(name)
-    for t, key in ctx.config['targets'].iteritems():
-        log.debug('connecting to %s', t)
-        try:
-            if ctx.config['sshkeys'] == 'ignore':
-                key = None
-        except (AttributeError, KeyError):
-            pass
-        if key.startswith('ssh-rsa ') or key.startswith('ssh-dss '):
-            if teuthology.is_vm(t):
-                key = None
-        remotes.append(
-            remote.Remote(name=t, host_key=key, keep_alive=True, console=None))
-    ctx.cluster = cluster.Cluster()
-    if 'roles' in ctx.config:
-        for rem, roles in zip(remotes, ctx.config['roles']):
-            assert all(isinstance(role, str) for role in roles), \
-                "Roles in config must be strings: %r" % roles
-            ctx.cluster.add(rem, roles)
-            log.info('roles: %s - %s' % (rem, roles))
-    else:
-        for rem in remotes:
-            ctx.cluster.add(rem, rem.name)
-
-
-def serialize_remote_roles(ctx, config):
-    """
-    Provides an explicit mapping for which remotes have been assigned what roles
-    So that other software can be loosely coupled to teuthology
-    """
-    if ctx.archive is not None:
-        with file(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file:
-            info_yaml = yaml.safe_load(info_file)
-            info_file.seek(0)
-            info_yaml['cluster'] = dict([(remote.name, {'roles': roles}) for remote, roles in ctx.cluster.remotes.iteritems()])
-            yaml.safe_dump(info_yaml, info_file, default_flow_style=False)
-
-
-def check_ceph_data(ctx, config):
-    """
-    Check for old /var/lib/ceph directories and detect staleness.
-    """
-    log.info('Checking for old /var/lib/ceph...')
-    processes = ctx.cluster.run(
-        args=[
-            'test', '!', '-e', '/var/lib/ceph',
-            ],
-        wait=False,
-        )
-    failed = False
-    for proc in processes:
-        assert isinstance(proc.exitstatus, gevent.event.AsyncResult)
-        try:
-            proc.exitstatus.get()
-        except run.CommandFailedError:
-            log.error('Host %s has stale /var/lib/ceph, check lock and nuke/cleanup.', proc.remote.shortname)
-            failed = True
-    if failed:
-        raise RuntimeError('Stale /var/lib/ceph detected, aborting.')
-
-def check_conflict(ctx, config):
-    """
-    Note directory use conflicts and stale directories.
-    """
-    log.info('Checking for old test directory...')
-    testdir = teuthology.get_testdir(ctx)
-    processes = ctx.cluster.run(
-        args=[
-            'test', '!', '-e', testdir,
-            ],
-        wait=False,
-        )
-    failed = False
-    for proc in processes:
-        assert isinstance(proc.exitstatus, gevent.event.AsyncResult)
-        try:
-            proc.exitstatus.get()
-        except run.CommandFailedError:
-            log.error('Host %s has stale test directory %s, check lock and cleanup.', proc.remote.shortname, testdir)
-            failed = True
-    if failed:
-        raise RuntimeError('Stale jobs detected, aborting.')
-
-@contextlib.contextmanager
-def archive(ctx, config):
-    """
-    Handle the creation and deletion of the archive directory.
-    """
-    log.info('Creating archive directory...')
-    archive_dir = teuthology.get_archive_dir(ctx)
-    run.wait(
-        ctx.cluster.run(
-            args=[
-                'install', '-d', '-m0755', '--', archive_dir,
-                ],
-            wait=False,
-            )
-        )
-
-    try:
-        yield
-    except Exception:
-        # we need to know this below
-        ctx.summary['success'] = False
-        raise
-    finally:
-        if ctx.archive is not None and \
-                not (ctx.config.get('archive-on-error') and ctx.summary['success']):
-            log.info('Transferring archived files...')
-            logdir = os.path.join(ctx.archive, 'remote')
-            if (not os.path.exists(logdir)):
-                os.mkdir(logdir)
-            for remote in ctx.cluster.remotes.iterkeys():
-                path = os.path.join(logdir, remote.shortname)
-                teuthology.pull_directory(remote, archive_dir, path)
-
-        log.info('Removing archive directory...')
-        run.wait(
-            ctx.cluster.run(
-                args=[
-                    'rm',
-                    '-rf',
-                    '--',
-                    archive_dir,
-                    ],
-                wait=False,
-                ),
-            )
-
-
-@contextlib.contextmanager
-def sudo(ctx, config):
-    """
-    Enable use of sudo
-    """
-    log.info('Configuring sudo...')
-    sudoers_file = '/etc/sudoers'
-    backup_ext = '.orig.teuthology'
-    tty_expr = r's/^\([^#]*\) \(requiretty\)/\1 !\2/g'
-    pw_expr = r's/^\([^#]*\) !\(visiblepw\)/\1 \2/g'
-
-    run.wait(
-        ctx.cluster.run(
-            args="sudo sed -i{ext} -e '{tty}' -e '{pw}' {path}".format(
-                ext=backup_ext, tty=tty_expr, pw=pw_expr,
-                path=sudoers_file
-            ),
-            wait=False,
-        )
-    )
-    try:
-        yield
-    finally:
-        log.info('Restoring {0}...'.format(sudoers_file))
-        ctx.cluster.run(
-            args="sudo mv -f {path}{ext} {path}".format(
-                path=sudoers_file, ext=backup_ext
-            )
-        )
-
-
-@contextlib.contextmanager
-def coredump(ctx, config):
-    """
-    Stash a coredump of this system if an error occurs.
-    """
-    log.info('Enabling coredump saving...')
-    archive_dir = teuthology.get_archive_dir(ctx)
-    run.wait(
-        ctx.cluster.run(
-            args=[
-                'install', '-d', '-m0755', '--',
-                '{adir}/coredump'.format(adir=archive_dir),
-                run.Raw('&&'),
-                'sudo', 'sysctl', '-w', 'kernel.core_pattern={adir}/coredump/%t.%p.core'.format(adir=archive_dir),
-                ],
-            wait=False,
-            )
-        )
-
-    try:
-        yield
-    finally:
-        run.wait(
-            ctx.cluster.run(
-                args=[
-                    'sudo', 'sysctl', '-w', 'kernel.core_pattern=core',
-                    run.Raw('&&'),
-                    # don't litter the archive dir if there were no cores dumped
-                    'rmdir',
-                    '--ignore-fail-on-non-empty',
-                    '--',
-                    '{adir}/coredump'.format(adir=archive_dir),
-                    ],
-                wait=False,
-                )
-            )
-
-        # set success=false if the dir is still there = coredumps were
-        # seen
-        for remote in ctx.cluster.remotes.iterkeys():
-            r = remote.run(
-                args=[
-                    'if', 'test', '!', '-e', '{adir}/coredump'.format(adir=archive_dir), run.Raw(';'), 'then',
-                    'echo', 'OK', run.Raw(';'),
-                    'fi',
-                    ],
-                stdout=StringIO(),
-                )
-            if r.stdout.getvalue() != 'OK\n':
-                log.warning('Found coredumps on %s, flagging run as failed', remote)
-                ctx.summary['success'] = False
-                if 'failure_reason' not in ctx.summary:
-                    ctx.summary['failure_reason'] = \
-                        'Found coredumps on {remote}'.format(remote=remote)
-
-@contextlib.contextmanager
-def syslog(ctx, config):
-    """
-    start syslog / stop syslog on exit.
-    """
-    if ctx.archive is None:
-        # disable this whole feature if we're not going to archive the data anyway
-        yield
-        return
-
-    log.info('Starting syslog monitoring...')
-
-    archive_dir = teuthology.get_archive_dir(ctx)
-    run.wait(
-        ctx.cluster.run(
-            args=[
-                'mkdir', '-m0755', '--',
-                '{adir}/syslog'.format(adir=archive_dir),
-                ],
-            wait=False,
-            )
-        )
-
-    CONF = '/etc/rsyslog.d/80-cephtest.conf'
-    conf_fp = StringIO('''
-kern.* -{adir}/syslog/kern.log;RSYSLOG_FileFormat
-*.*;kern.none -{adir}/syslog/misc.log;RSYSLOG_FileFormat
-'''.format(adir=archive_dir))
-    try:
-        for rem in ctx.cluster.remotes.iterkeys():
-            teuthology.sudo_write_file(
-                remote=rem,
-                path=CONF,
-                data=conf_fp,
-                )
-            conf_fp.seek(0)
-        run.wait(
-            ctx.cluster.run(
-                args=[
-                    'sudo',
-                    'service',
-                    # a mere reload (SIGHUP) doesn't seem to make
-                    # rsyslog open the files
-                    'rsyslog',
-                    'restart',
-                    ],
-                wait=False,
-                ),
-            )
-
-        yield
-    finally:
-        log.info('Shutting down syslog monitoring...')
-
-        run.wait(
-            ctx.cluster.run(
-                args=[
-                    'sudo',
-                    'rm',
-                    '-f',
-                    '--',
-                    CONF,
-                    run.Raw('&&'),
-                    'sudo',
-                    'service',
-                    'rsyslog',
-                    'restart',
-                    ],
-                wait=False,
-                ),
-            )
-        # race condition: nothing actually says rsyslog had time to
-        # flush the file fully. oh well.
-
-        log.info('Checking logs for errors...')
-        for remote in ctx.cluster.remotes.iterkeys():
-            log.debug('Checking %s', remote.name)
-            r = remote.run(
-                args=[
-                    'egrep', '--binary-files=text',
-                    '\\bBUG\\b|\\bINFO\\b|\\bDEADLOCK\\b',
-                    run.Raw('{adir}/syslog/*.log'.format(adir=archive_dir)),
-                    run.Raw('|'),
-                    'grep', '-v', 'task .* blocked for more than .* seconds',
-                    run.Raw('|'),
-                    'grep', '-v', 'lockdep is turned off',
-                    run.Raw('|'),
-                    'grep', '-v', 'trying to register non-static key',
-                    run.Raw('|'),
-                    'grep', '-v', 'DEBUG: fsize',  # xfs_fsr
-                    run.Raw('|'),
-                    'grep', '-v', 'CRON',  # ignore cron noise
-                    run.Raw('|'),
-                    'grep', '-v', 'BUG: bad unlock balance detected', # #6097
-                    run.Raw('|'),
-                    'grep', '-v', 'inconsistent lock state', # FIXME see #2523
-                    run.Raw('|'),
-                    'grep', '-v', '*** DEADLOCK ***', # part of lockdep output
-                    run.Raw('|'),
-                    'grep', '-v', 'INFO: possible irq lock inversion dependency detected', # FIXME see #2590 and #147
-                    run.Raw('|'),
-                    'grep', '-v', 'INFO: NMI handler (perf_event_nmi_handler) took too long to run',
-                    run.Raw('|'),
-                    'grep', '-v', 'INFO: recovery required on readonly',
-                    run.Raw('|'),
-                    'head', '-n', '1',
-                    ],
-                stdout=StringIO(),
-                )
-            stdout = r.stdout.getvalue()
-            if stdout != '':
-                log.error('Error in syslog on %s: %s', remote.name, stdout)
-                ctx.summary['success'] = False
-                if 'failure_reason' not in ctx.summary:
-                    ctx.summary['failure_reason'] = \
-                        "'{error}' in syslog".format(error=stdout)
-
-        log.info('Compressing syslogs...')
-        run.wait(
-            ctx.cluster.run(
-                args=[
-                    'find',
-                    '{adir}/syslog'.format(adir=archive_dir),
-                    '-name',
-                    '*.log',
-                    '-print0',
-                    run.Raw('|'),
-                    'sudo',
-                    'xargs',
-                    '-0',
-                    '--no-run-if-empty',
-                    '--',
-                    'gzip',
-                    '--',
-                    ],
-                wait=False,
-                ),
-            )
-
-def vm_setup(ctx, config):
-    """
-    Look for virtual machines and handle their initialization
-    """
-    with parallel() as p:
-        editinfo = os.path.join(os.path.dirname(__file__),'edit_sudoers.sh')
-        for remote in ctx.cluster.remotes.iterkeys():
-            mname = re.match(".*@([^\.]*)\.?.*", str(remote)).group(1)
-            if teuthology.is_vm(mname):
-                r = remote.run(args=['test', '-e', '/ceph-qa-ready',],
-                        stdout=StringIO(),
-                        check_status=False,)
-                if r.exitstatus != 0:
-                    p1 = subprocess.Popen(['cat', editinfo], stdout=subprocess.PIPE)
-                    p2 = subprocess.Popen(['ssh', '-t', '-t', str(remote), 'sudo', 'sh'], stdin=p1.stdout, stdout=subprocess.PIPE)
-                    _, err = p2.communicate()
-                    if err:
-                        log.info("Edit of /etc/sudoers failed: %s", err)
-                    p.spawn(_handle_vm_init, remote)
-
-def _handle_vm_init(remote):
-    """
-    Initialize a remote vm by downloading and running ceph_qa_chef.
-    """
-    log.info('Running ceph_qa_chef on %s', remote)
-    remote.run(args=['wget', '-q', '-O-',
-            'http://ceph.com/git/?p=ceph-qa-chef.git;a=blob_plain;f=solo/solo-from-scratch;hb=HEAD',
-            run.Raw('|'),
-            'sh',
-        ])
-
diff --git a/teuthology/task/iscsi.py b/teuthology/task/iscsi.py
deleted file mode 100644 (file)
index d83fbd2..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-"""
-Handle iscsi adm commands for tgt connections.
-"""
-import logging
-import contextlib
-import socket
-
-from cStringIO import StringIO
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from teuthology.task.common_fs_utils import generic_mkfs
-from teuthology.task.common_fs_utils import generic_mount
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-
-def _get_remote(remotes, client):
-    """
-    Get remote object that is associated with the client specified.
-    """
-    for rem in remotes:
-        if client in remotes[rem]:
-            return rem
-
-
-def _get_remote_name(remotes, client):
-    """
-    Get remote name that is associated with the client specified.
-    """
-    rem_name = _get_remote(remotes, client).name
-    rem_name = rem_name[rem_name.find('@') + 1:]
-    return rem_name
-
-
-def tgt_devname_get(ctx, test_image):
-    """
-    Get the name of the newly created device by following the by-path
-    link (which is symbolically linked to the appropriate /dev/sd* file).
-    """
-    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
-    rem_name = _get_remote_name(remotes, test_image)
-    lnkpath = '/dev/disk/by-path/ip-%s:3260-iscsi-rbd-lun-1' % \
-            socket.gethostbyname(rem_name)
-    return lnkpath
-
-
-def tgt_devname_rtn(ctx, test_image):
-    """
-    Wrapper passed to common_fs_util functions.
-    """
-    image = test_image[test_image.find('.') + 1:]
-    return tgt_devname_get(ctx, image)
-
-
-def file_io_test(rem, file_from, lnkpath):
-    """
-    dd to the iscsi inteface, read it, and compare with original
-    """
-    rem.run(
-        args=[
-        'sudo',
-        'dd',
-        'if=%s' % file_from,
-        'of=%s' % lnkpath,
-        'bs=1024',
-        'conv=fsync',
-    ])
-    proc = rem.run(args=['mktemp'], stdout=StringIO(),)
-    tfile2 = proc.stdout.getvalue().strip()
-    rem.run(
-        args=[
-        'sudo',
-        'rbd',
-        'export',
-        'iscsi-image',
-        run.Raw('-'),
-        run.Raw('>'),
-        tfile2,
-    ])
-    proc = rem.run(
-        args=[
-            'ls',
-            '-l',
-            file_from,
-            run.Raw('|'),
-            'awk',
-            '{print $5}', ],
-        stdout=StringIO(),
-        )
-    size = proc.stdout.getvalue().strip()
-    rem.run(
-        args=[
-            'cmp',
-            '-n',
-            size,
-            file_from,
-            tfile2,
-    ])
-    rem.run(args=['rm', tfile2])
-
-
-def general_io_test(ctx, rem, image_name):
-    """
-    Do simple I/O tests to the iscsi interface before putting a
-    filesystem on it.
-    """
-    rem.run(
-        args=[
-            'udevadm',
-            'settle',
-    ])
-    test_phrase = 'The time has come the walrus said to speak of many things.'
-    lnkpath = tgt_devname_get(ctx, image_name)
-    proc = rem.run(args=['mktemp'], stdout=StringIO(),)
-    tfile1 = proc.stdout.getvalue().strip()
-    rem.run(
-        args=[
-            'echo',
-            test_phrase,
-            run.Raw('>'),
-            tfile1,
-        ])
-    file_io_test(rem, tfile1, lnkpath)
-    rem.run(args=['rm', tfile1])
-    file_io_test(rem, '/bin/ls', lnkpath)
-
-
-@contextlib.contextmanager
-def start_iscsi_initiators(ctx, tgt_link):
-    """
-    This is the sub-task that assigns an rbd to an iscsiadm control and
-    performs a login (thereby creating a /dev/sd device).  It performs
-    a logout when finished.
-    """
-    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
-    tgtd_list = []
-    for role, host in tgt_link:
-        rem = _get_remote(remotes, role)
-        rem_name = _get_remote_name(remotes, host)
-        rem.run(
-            args=[
-                'sudo',
-                'iscsiadm',
-                '-m',
-                'discovery',
-                '-t',
-                'st',
-                '-p',
-                rem_name,
-        ])
-        proc = rem.run(
-            args=[
-                'sudo',
-                'iscsiadm',
-                '-m',
-                'node',
-                '--login',
-        ])
-        if proc.exitstatus == 0:
-            tgtd_list.append((rem, rem_name))
-        general_io_test(ctx, rem, host)
-    try:
-        with contextutil.nested(
-            lambda: generic_mkfs(ctx=ctx, config={host: {'fs_type': 'xfs'}},
-                    devname_rtn=tgt_devname_rtn),
-            lambda: generic_mount(ctx=ctx, config={host: None},
-                    devname_rtn=tgt_devname_rtn),
-            ):
-            yield
-    finally:
-        for rem_info in tgtd_list:
-            rem = rem_info[0]
-            rem_name = rem_info[1]
-            rem.run(
-                args=[
-                    'sudo',
-                    'iscsiadm',
-                    '-m',
-                    'node',
-                    '--logout',
-            ])
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    handle iscsi admin login after a tgt connection has been established.
-
-    Assume a default host client of client.0 and a sending client of
-    client.0 if not specified otherwise.
-
-    Sample tests could be:
-
-    iscsi:
-
-        This sets up a tgt link from client.0 to client.0
-
-    iscsi: [client.1, client.2]
-
-        This sets up a tgt link from client.1 to client.0 and a tgt link
-        from client.2 to client.0
-
-    iscsi:
-        client.0: client.1
-        client.1: client.0
-
-        This sets up a tgt link from client.0 to client.1 and a tgt link
-        from client.1 to client.0
-
-    Note that the iscsi image name is iscsi-image, so this only works
-    for one image being tested at any one time.
-    """
-    try:
-        pairs = config.items()
-    except AttributeError:
-        pairs = [('client.0', 'client.0')]
-    with contextutil.nested(
-            lambda: start_iscsi_initiators(ctx=ctx, tgt_link=pairs),):
-        yield
diff --git a/teuthology/task/kclient.py b/teuthology/task/kclient.py
deleted file mode 100644 (file)
index 34595e8..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
-"""
-Mount/unmount a ``kernel`` client.
-"""
-import contextlib
-import logging
-import os
-
-from teuthology import misc as teuthology
-from teuthology.task_util.kclient import write_secret_file
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Mount/unmount a ``kernel`` client.
-
-    The config is optional and defaults to mounting on all clients. If
-    a config is given, it is expected to be a list of clients to do
-    this operation on. This lets you e.g. set up one client with
-    ``ceph-fuse`` and another with ``kclient``.
-
-    Example that mounts all clients::
-
-        tasks:
-        - ceph:
-        - kclient:
-        - interactive:
-
-    Example that uses both ``kclient` and ``ceph-fuse``::
-
-        tasks:
-        - ceph:
-        - ceph-fuse: [client.0]
-        - kclient: [client.1]
-        - interactive:
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    log.info('Mounting kernel clients...')
-    assert config is None or isinstance(config, list), \
-        "task kclient got invalid config"
-
-    if config is None:
-        config = ['client.{id}'.format(id=id_)
-                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-    clients = list(teuthology.get_clients(ctx=ctx, roles=config))
-
-    testdir = teuthology.get_testdir(ctx)
-
-    for id_, remote in clients:
-        mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-        log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format(
-                id=id_, remote=remote, mnt=mnt))
-
-        # figure mon ips
-        remotes_and_roles = ctx.cluster.remotes.items()
-        roles = [roles for (remote_, roles) in remotes_and_roles]
-        ips = [host for (host, port) in (remote_.ssh.get_transport().getpeername() for (remote_, roles) in remotes_and_roles)]
-        mons = teuthology.get_mons(roles, ips).values()
-
-        keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
-        secret = '{tdir}/data/client.{id}.secret'.format(tdir=testdir, id=id_)
-        write_secret_file(ctx, remote, 'client.{id}'.format(id=id_),
-                                     keyring, secret)
-
-        remote.run(
-            args=[
-                'mkdir',
-                '--',
-                mnt,
-                ],
-            )
-
-        remote.run(
-            args=[
-                'sudo',
-                'adjust-ulimits',
-                'ceph-coverage',
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                '/sbin/mount.ceph',
-                '{mons}:/'.format(mons=','.join(mons)),
-                mnt,
-                '-v',
-                '-o',
-                'name={id},secretfile={secret}'.format(id=id_,
-                                                       secret=secret),
-                ],
-            )
-
-    try:
-        yield
-    finally:
-        log.info('Unmounting kernel clients...')
-        for id_, remote in clients:
-            log.debug('Unmounting client client.{id}...'.format(id=id_))
-            mnt = os.path.join(testdir,  'mnt.{id}'.format(id=id_))
-            remote.run(
-                args=[
-                    'sudo',
-                    'umount',
-                    mnt,
-                    ],
-                )
-            remote.run(
-                args=[
-                    'rmdir',
-                    '--',
-                    mnt,
-                    ],
-                )
diff --git a/teuthology/task/kcon_most b/teuthology/task/kcon_most
deleted file mode 100755 (executable)
index cef3d89..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/sh
-
-# Adapted from ceph repo src/script/kcon_most.sh 
-
-p() {
- echo "$*" > /sys/kernel/debug/dynamic_debug/control
-}
-
-usage() {
- echo "usage: $0 [on|off]"
- exit 1
-}
-
-if [ $# != "1" ];
-then
-    usage
-fi
-
-if [ "$1" != "on" -a "$1" != "off" ];
-then
-    usage
-fi
-
-if [ $1 = "on" ];
-then
-       p 'module ceph +p'
-       p 'module libceph +p'
-       p 'module rbd +p'
-       p 'file net/ceph/messenger.c -p'
-       p 'file' `grep -- --- /sys/kernel/debug/dynamic_debug/control | grep ceph | awk '{print $1}' | sed 's/:/ line /'` '+p'
-       p 'file' `grep -- === /sys/kernel/debug/dynamic_debug/control | grep ceph | awk '{print $1}' | sed 's/:/ line /'` '+p'
-else
-       p 'module ceph -p'
-       p 'module libceph -p'
-       p 'module rbd -p'
-       p 'file' `grep -- --- /sys/kernel/debug/dynamic_debug/control | grep ceph | awk '{print $1}' | sed 's/:/ line /'` '-p'
-       p 'file' `grep -- === /sys/kernel/debug/dynamic_debug/control | grep ceph | awk '{print $1}' | sed 's/:/ line /'` '-p'
-fi
-exit 0
diff --git a/teuthology/task/kcon_most.py b/teuthology/task/kcon_most.py
deleted file mode 100644 (file)
index 819de34..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-"""
-Most ceph console logging
-"""
-import contextlib
-import logging
-
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Enable most ceph console logging
-
-    Example that enables logging on all clients::
-
-        tasks:
-        - ceph:
-        - kclient:
-        - kcon_most
-        - interactive:
-
-    Example that enables logging only on the client using kclient::
-
-        tasks:
-        - ceph:
-        - kclient: [client.0]
-        - kcon_most [client.0]
-        - interactive:
-    """
-    log.info('Enable additional kernel logging...')
-    assert config is None or isinstance(config, list), \
-        "task kcon_most got invalid config"
-
-    if config is None:
-        config = ['client.{id}'.format(id=id_)
-                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-    clients = list(teuthology.get_clients(ctx=ctx, roles=config))
-
-    for id_, remote in clients:
-        # TODO: Don't have to run this more than once per node (remote)
-        log.info('Enable logging on client.{id} at {remote} ...'.format(
-                id=id_, remote=remote))
-        remote.run(
-            args=[
-                'sudo',
-                'kcon_most',
-                'on'
-                ],
-            )
-
-    try:
-        yield
-    finally:
-        log.info('Disable extra kernel logging on clients...')
-        for id_, remote in clients:
-            log.debug('Disable extra kernel logging on client.{id}...'.format(id=id_))
-            remote.run(
-                args=[
-                    'sudo',
-                    'kcon_most',
-                    'off'
-                    ],
-                )
diff --git a/teuthology/task/kernel.py b/teuthology/task/kernel.py
deleted file mode 100644 (file)
index 0ac7360..0000000
+++ /dev/null
@@ -1,953 +0,0 @@
-"""
-Kernel installation task
-"""
-from cStringIO import StringIO
-
-import logging
-import re
-import shlex
-import urllib2
-import urlparse
-
-from teuthology import misc as teuthology
-from ..orchestra import run
-from ..config import config as teuth_config
-
-log = logging.getLogger(__name__)
-
-def normalize_config(ctx, config):
-    """
-    Returns a config whose keys are all real roles.
-    Generic roles (client, mon, osd, etc.) are replaced with
-    the actual roles (client.0, client.1, etc.). If the config
-    specifies a different version for a specific role, this is
-    unchanged.
-
-    For example, with 3 OSDs this::
-
-         osd:
-           tag: v3.0
-           kdb: true
-         osd.1:
-           branch: new_btrfs
-           kdb: false
-         osd.3:
-           deb: /path/to/linux-whatever.deb
-
-    is transformed into::
-
-         osd.0:
-           tag: v3.0
-           kdb: true
-         osd.1:
-           branch: new_btrfs
-           kdb: false
-         osd.2:
-           tag: v3.0
-           kdb: true
-         osd.3:
-           deb: /path/to/linux-whatever.deb
-
-    If config is None or just specifies a version to use,
-    it is applied to all nodes.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    if config is None or \
-            len(filter(lambda x: x in ['tag', 'branch', 'sha1', 'kdb',
-                                       'deb'],
-                       config.keys())) == len(config.keys()):
-        new_config = {}
-        if config is None:
-            config = {'branch': 'master'}
-        for _, roles_for_host in ctx.cluster.remotes.iteritems():
-            new_config[roles_for_host[0]] = config
-        return new_config
-
-    new_config = {}
-    for role, role_config in config.iteritems():
-        if role_config is None:
-            role_config = {'branch': 'master'}
-        if '.' in role:
-            new_config[role] = role_config
-        else:
-            for id_ in teuthology.all_roles_of_type(ctx.cluster, role):
-                name = '{type}.{id}'.format(type=role, id=id_)
-                # specific overrides generic
-                if name not in config:
-                    new_config[name] = role_config
-    return new_config
-
-def _find_arch_and_dist(ctx):
-    """
-    Return the arch and distro value as a tuple.
-
-    Currently this only returns armv7l on the quantal distro or x86_64
-    on the precise distro
-  
-    :param ctx: Context
-    :returns: arch,distro
-    """
-    info = ctx.config.get('machine_type', 'plana')
-    if teuthology.is_arm(info):
-        return ('armv7l', 'quantal')
-    return ('x86_64', 'precise')
-
-def validate_config(ctx, config):
-    """
-    Make sure that all kernels in the list of remove kernels
-    refer to the same kernel.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    for _, roles_for_host in ctx.cluster.remotes.iteritems():
-        kernel = None
-        for role in roles_for_host:
-            role_kernel = config.get(role, kernel)
-            if kernel is None:
-                kernel = role_kernel
-            elif role_kernel is not None:
-                assert kernel == role_kernel, \
-                    "everything on the same host must use the same kernel"
-                if role in config:
-                    del config[role]
-
-def _vsplitter(version):
-    """Kernels from Calxeda are named ...ceph-<sha1>...highbank.
-    Kernels that we generate are named ...-g<sha1>.
-    This routine finds the text in front of the sha1 that is used by
-    need_to_install() to extract information from the kernel name.
-
-    :param version: Name of the kernel
-    """
-    if version.endswith('highbank'):
-        return 'ceph-'
-    return '-g'
-
-def need_to_install(ctx, role, version):
-    """
-    Check to see if we need to install a kernel.  Get the version of the
-    currently running kernel, and compare it against the value passed in.
-
-    :param ctx: Context
-    :param role: Role
-    :param version: value to compare against (used in checking), can be either
-                    a utsrelease string (e.g. '3.13.0-rc3-ceph-00049-ge2817b3')
-                    or a sha1.
-    """
-    ret = True
-    log.info('Checking kernel version of {role}, want {ver}...'.format(
-             role=role, ver=version))
-    uname_fp = StringIO()
-    ctx.cluster.only(role).run(
-        args=[
-            'uname',
-            '-r',
-            ],
-        stdout=uname_fp,
-        )
-    cur_version = uname_fp.getvalue().rstrip('\n')
-    log.debug('current kernel version is {ver}'.format(ver=cur_version))
-
-    if '.' in version:
-        # version is utsrelease, yay
-        if cur_version == version:
-            log.debug('utsrelease strings match, do not need to install')
-            ret = False
-    else:
-        # version is sha1, need to try to extract sha1 from cur_version
-        splt = _vsplitter(cur_version)
-        if splt in cur_version:
-            _, cur_sha1 = cur_version.rsplit(splt, 1)
-            dloc = cur_sha1.find('-')
-            if dloc > 0:
-                cur_sha1 = cur_sha1[0:dloc]
-            log.debug('extracting sha1, {ver} -> {sha1}'.format(
-                      ver=cur_version, sha1=cur_sha1))
-            if version.startswith(cur_sha1):
-                log.debug('extracted sha1 matches, do not need to install')
-                ret = False
-        else:
-            log.debug('failed to parse current kernel version')
-    uname_fp.close()
-    return ret
-
-def install_firmware(ctx, config):
-    """
-    Go to the github to get the latest firmware.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    linux_firmware_git_upstream = 'git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git'
-    uri = teuth_config.linux_firmware_git_url or linux_firmware_git_upstream
-    fw_dir = '/lib/firmware/updates'
-
-    for role in config.iterkeys():
-        if config[role].find('distro') >= 0:
-            log.info('Skipping firmware on distro kernel');
-            return
-        (role_remote,) = ctx.cluster.only(role).remotes.keys()
-        machine_type = teuthology.get_system_type(role_remote)
-        if machine_type == 'rpm':
-            return
-        log.info('Installing linux-firmware on {role}...'.format(role=role))
-        role_remote.run(
-            args=[
-                # kludge around mysterious 0-byte .git/HEAD files
-                'cd', fw_dir,
-                run.Raw('&&'),
-                'test', '-d', '.git',
-                run.Raw('&&'),
-                'test', '!', '-s', '.git/HEAD',
-                run.Raw('&&'),
-                'sudo', 'rm', '-rf', '.git',
-                run.Raw(';'),
-                # init
-                'sudo', 'install', '-d', '-m0755', fw_dir,
-                run.Raw('&&'),
-                'cd', fw_dir,
-                run.Raw('&&'),
-                'sudo', 'git', 'init',
-                ],
-            )
-        role_remote.run(
-            args=[
-                'sudo', 'git', '--git-dir=%s/.git' % fw_dir, 'config',
-                '--get', 'remote.origin.url', run.Raw('>/dev/null'),
-                run.Raw('||'),
-                'sudo', 'git', '--git-dir=%s/.git' % fw_dir,
-                'remote', 'add', 'origin', uri,
-                ],
-            )
-        role_remote.run(
-            args=[
-                'cd', fw_dir,
-                run.Raw('&&'),
-                'sudo', 'git', 'fetch', 'origin',
-                run.Raw('&&'),
-                'sudo', 'git', 'reset', '--hard', 'origin/master'
-                ],
-            )
-
-def download_kernel(ctx, config):
-    """
-    Download a Debian kernel and copy the assocated linux image.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    procs = {}
-    #Don't need to download distro kernels
-    for role, src in config.iteritems():
-        (role_remote,) = ctx.cluster.only(role).remotes.keys()
-       if src.find('distro') >= 0:
-            log.info('Installing newest kernel distro');
-            return
-        machine_type = teuthology.get_system_type(role_remote)
-        if src.find('/') >= 0:
-            # local deb
-            log.info('Copying kernel deb {path} to {role}...'.format(path=src,
-                                                                     role=role))
-            f = open(src, 'r')
-            proc = role_remote.run(
-                args=[
-                    'python', '-c',
-                    'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
-                    '/tmp/linux-image.deb',
-                    ],
-                wait=False,
-                stdin=f
-                )
-            procs[role_remote.name] = proc
-        else:
-            log.info('Downloading kernel {sha1} on {role}...'.format(sha1=src,
-                                                                     role=role))
-            if machine_type == 'rpm':
-                dist, ver = teuthology.get_system_type(role_remote, distro=True, version=True)
-                if '.' in ver:
-                    ver = ver.split('.')[0]
-                ldist = '{dist}{ver}'.format(dist=dist, ver=ver)
-                _, rpm_url = teuthology.get_ceph_binary_url(
-                    package='kernel',
-                    sha1=src,
-                    format='rpm',
-                    flavor='basic',
-                    arch='x86_64',
-                    dist=ldist,
-                    )
-
-                kernel_url = urlparse.urljoin(rpm_url, 'kernel.x86_64.rpm')
-                output, err_mess = StringIO(), StringIO()
-                role_remote.run(args=['sudo', 'yum', 'list', 'installed', 'kernel'], stdout=output, stderr=err_mess )
-                # Check if short (first 8 digits) sha1 is in uname output as expected
-                if src[0:7] in output.getvalue():
-                    output.close()
-                    err_mess.close()
-                    continue
-                output.close()
-                err_mess.close()
-                proc = role_remote.run(args=['sudo', 'yum', 'install', '-y', kernel_url], wait=False)
-                procs[role_remote.name] = proc
-                continue
-
-            larch, ldist = _find_arch_and_dist(ctx)
-            _, deb_url = teuthology.get_ceph_binary_url(
-                package='kernel',
-                sha1=src,
-                format='deb',
-                flavor='basic',
-                arch=larch,
-                dist=ldist,
-                )
-
-            log.info('fetching kernel from {url}'.format(url=deb_url))
-            proc = role_remote.run(
-                args=[
-                    'sudo', 'rm', '-f', '/tmp/linux-image.deb',
-                    run.Raw('&&'),
-                    'echo',
-                    'linux-image.deb',
-                    run.Raw('|'),
-                    'wget',
-                    '-nv',
-                    '-O',
-                    '/tmp/linux-image.deb',
-                    '--base={url}'.format(url=deb_url),
-                    '--input-file=-',
-                    ],
-                wait=False)
-            procs[role_remote.name] = proc
-
-    for name, proc in procs.iteritems():
-        log.debug('Waiting for download/copy to %s to complete...', name)
-        proc.exitstatus.get()
-
-
-def _no_grub_link(in_file, remote, kernel_ver):
-    """
-    Copy and link kernel related files if grub cannot be used
-    (as is the case in Arm kernels)
-
-    :param infile: kernel file or image file to be copied.
-    :param remote: remote machine 
-    :param kernel_ver: kernel version
-    """
-    boot1 = '/boot/%s' % in_file
-    boot2 = '%s.old' % boot1
-    remote.run(
-        args=[
-            'if', 'test', '-e', boot1, run.Raw(';'), 'then',
-            'sudo', 'mv', boot1, boot2, run.Raw(';'), 'fi',],
-    )
-    remote.run(
-        args=['sudo', 'ln', '-s', '%s-%s' % (in_file, kernel_ver) , boot1, ],
-    )
-
-def install_and_reboot(ctx, config):
-    """
-    Install and reboot the kernel.  This mostly performs remote
-    installation operations.   The code does check for Arm images
-    and skips grub operations if the kernel is Arm.  Otherwise, it
-    extracts kernel titles from submenu entries and makes the appropriate
-    grub calls.   The assumptions here are somewhat simplified in that
-    it expects kernel entries to be present under submenu entries.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    procs = {}
-    kernel_title = ''
-    for role, src in config.iteritems():
-        (role_remote,) = ctx.cluster.only(role).remotes.keys()
-        if src.find('distro') >= 0:
-            log.info('Installing distro kernel on {role}...'.format(role=role))
-            install_kernel(role_remote)
-            continue
-        log.info('Installing kernel {src} on {role}...'.format(src=src,
-                                                               role=role))
-        system_type = teuthology.get_system_type(role_remote)
-        if system_type == 'rpm':
-            install_kernel(role_remote, src)
-            continue
-        proc = role_remote.run(
-            args=[
-                # install the kernel deb
-                'sudo',
-                'dpkg',
-                '-i',
-                '/tmp/linux-image.deb',
-                ],
-            )
-
-        # collect kernel image name from the .deb
-        cmdout = StringIO()
-        proc = role_remote.run(
-            args=[
-                # extract the actual boot image name from the deb
-                'dpkg-deb',
-                '--fsys-tarfile',
-                '/tmp/linux-image.deb',
-                run.Raw('|'),
-                'tar',
-                '-t',
-                '-v',
-                '-f', '-',
-                '--wildcards',
-                '--',
-                './boot/vmlinuz-*',
-                run.Raw('|'),
-                'sed',
-                r'-e s;.*\./boot/vmlinuz-;;',
-            ],
-            stdout = cmdout,
-            )
-        kernel_title = cmdout.getvalue().rstrip()
-        cmdout.close()
-        log.info('searching for kernel {}'.format(kernel_title))
-
-        if kernel_title.endswith("-highbank"):
-            _no_grub_link('vmlinuz', role_remote, kernel_title)
-            _no_grub_link('initrd.img', role_remote, kernel_title)
-            proc = role_remote.run(
-                args=[
-                    'sudo',
-                    'shutdown',
-                    '-r',
-                    'now',
-                    ],
-                wait=False,
-            )
-            procs[role_remote.name] = proc
-            continue
-
-        # look for menuentry for our kernel, and collect any
-        # submenu entries for their titles.  Assume that if our
-        # kernel entry appears later in the file than a submenu entry,
-        # it's actually nested under that submenu.  If it gets more
-        # complex this will totally break.
-
-        cmdout = StringIO()
-        proc = role_remote.run(
-            args=[
-                'egrep',
-                '(submenu|menuentry.*' + kernel_title + ').*{',
-                '/boot/grub/grub.cfg'
-               ],
-            stdout = cmdout,
-            )
-        submenu_title = ''
-        default_title = ''
-        for l in cmdout.getvalue().split('\n'):
-            fields = shlex.split(l)
-            if len(fields) >= 2:
-                command, title = fields[:2]
-                if command == 'submenu':
-                    submenu_title = title + '>'
-                if command == 'menuentry':
-                    if title.endswith(kernel_title):
-                        default_title = title
-                        break
-        cmdout.close()
-        log.info('submenu_title:{}'.format(submenu_title))
-        log.info('default_title:{}'.format(default_title))
-
-        proc = role_remote.run(
-            args=[
-                # use the title(s) to construct the content of
-                # the grub menu entry, so we can default to it.
-                '/bin/echo',
-                '-e',
-                r'cat <<EOF\nset default="' + submenu_title + \
-                    default_title + r'"\nEOF\n',
-                # make it look like an emacs backup file so
-                # unfortunately timed update-grub runs don't pick it
-                # up yet; use sudo tee so we are able to write to /etc
-                run.Raw('|'),
-                'sudo',
-                'tee',
-                '--',
-                '/etc/grub.d/01_ceph_kernel.tmp~',
-                run.Raw('>/dev/null'),
-                run.Raw('&&'),
-                'sudo',
-                'chmod',
-                'a+x',
-                '--',
-                '/etc/grub.d/01_ceph_kernel.tmp~',
-                run.Raw('&&'),
-                'sudo',
-                'mv',
-                '--',
-                '/etc/grub.d/01_ceph_kernel.tmp~',
-                '/etc/grub.d/01_ceph_kernel',
-                # update grub again so it accepts our default
-                run.Raw('&&'),
-                'sudo',
-                'update-grub',
-                run.Raw('&&'),
-                'rm',
-                '/tmp/linux-image.deb',
-                run.Raw('&&'),
-                'sudo',
-                'shutdown',
-                '-r',
-                'now',
-                ],
-            wait=False,
-            )
-        procs[role_remote.name] = proc
-
-    for name, proc in procs.iteritems():
-        log.debug('Waiting for install on %s to complete...', name)
-        proc.exitstatus.get()
-
-def enable_disable_kdb(ctx, config):
-    """
-    Enable kdb on remote machines in use.  Disable on those that are
-    not in use.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    for role, enable in config.iteritems():
-        (role_remote,) = ctx.cluster.only(role).remotes.keys()
-        if "mira" in role_remote.name:
-            serialdev = "ttyS2"
-        else:
-            serialdev = "ttyS1"
-        if enable:
-            log.info('Enabling kdb on {role}...'.format(role=role))
-            try:
-                role_remote.run(
-                    args=[
-                        'echo', serialdev,
-                        run.Raw('|'),
-                        'sudo', 'tee', '/sys/module/kgdboc/parameters/kgdboc'
-                        ])
-            except run.CommandFailedError:
-                log.warn('Kernel does not support kdb')
-        else:
-            log.info('Disabling kdb on {role}...'.format(role=role))
-            # Add true pipe so command doesn't fail on kernel without kdb support.
-            try:
-                role_remote.run(
-                    args=[
-                        'echo', '',
-                        run.Raw('|'),
-                        'sudo', 'tee', '/sys/module/kgdboc/parameters/kgdboc',
-                        run.Raw('|'),
-                        'true',
-                        ])
-            except run.CommandFailedError:
-                log.warn('Kernel does not support kdb')
-
-def wait_for_reboot(ctx, need_install, timeout, distro=False):
-    """
-    Loop reconnecting and checking kernel versions until
-    they're all correct or the timeout is exceeded.
-
-    :param ctx: Context
-    :param need_install: list of packages that we need to reinstall.
-    :param timeout: number of second before we timeout.
-    """
-    import time
-    starttime = time.time()
-    while need_install:
-        teuthology.reconnect(ctx, timeout)
-        for client in need_install.keys():
-            if 'distro' in need_install[client]:
-                 distro = True
-            log.info('Checking client {client} for new kernel version...'.format(client=client))
-            try:
-                if distro:
-                    assert not need_to_install_distro(ctx, client), \
-                            'failed to install new distro kernel version within timeout'
-
-                else:
-                    assert not need_to_install(ctx, client, need_install[client]), \
-                            'failed to install new kernel version within timeout'
-                del need_install[client]
-            except Exception:
-                log.exception("Saw exception")
-                # ignore connection resets and asserts while time is left
-                if time.time() - starttime > timeout:
-                    raise
-        time.sleep(1)
-
-
-def need_to_install_distro(ctx, role):
-    """
-    Installing kernels on rpm won't setup grub/boot into them.
-    This installs the newest kernel package and checks its version
-    and compares against current (uname -r) and returns true if newest != current.
-    Similar check for deb.
-    """
-    (role_remote,) = ctx.cluster.only(role).remotes.keys()
-    system_type = teuthology.get_system_type(role_remote)
-    output, err_mess = StringIO(), StringIO()
-    role_remote.run(args=['uname', '-r' ], stdout=output, stderr=err_mess )
-    current = output.getvalue().strip()
-    if system_type == 'rpm':
-        role_remote.run(args=['sudo', 'yum', 'install', '-y', 'kernel'], stdout=output, stderr=err_mess )
-        #reset stringIO output.
-        output, err_mess = StringIO(), StringIO()
-        role_remote.run(args=['rpm', '-q', 'kernel', '--last' ], stdout=output, stderr=err_mess )
-        for kernel in output.getvalue().split():
-            if kernel.startswith('kernel'):
-                if 'ceph' not in kernel:
-                    newest = kernel
-
-    if system_type == 'deb':
-        distribution = teuthology.get_system_type(role_remote, distro=True)
-        newest = get_version_from_pkg(role_remote, distribution)
-
-    output.close()
-    err_mess.close()
-    if current in newest:
-        return False
-    log.info('Not newest distro kernel. Curent: {cur} Expected: {new}'.format(cur=current, new=newest))
-    return True
-
-
-def install_kernel(remote, sha1=None):
-    """
-    RPM: Find newest kernel on the machine and update grub to use kernel + reboot.
-    DEB: Find newest kernel. Parse grub.cfg to figure out the entryname/subentry.
-    then modify 01_ceph_kernel to have correct entry + updategrub + reboot.
-    """
-    if sha1:
-        short = sha1[0:7]
-    else:
-        short = None
-    system_type = teuthology.get_system_type(remote)
-    distribution = ''
-    if system_type == 'rpm':
-        output, err_mess = StringIO(), StringIO()
-        kern_out, kern_err = StringIO(), StringIO()
-        if short:
-            remote.run(args=['rpm', '-q', 'kernel' ], stdout=output, stderr=err_mess )
-            if short in output.getvalue():
-                for kernel in output.getvalue().split('\n'):
-                    if short in kernel:
-                        remote.run(args=['rpm', '-ql', kernel ], stdout=kern_out, stderr=kern_err )
-                        for file in kern_out.getvalue().split('\n'):
-                            if 'vmlinuz' in file:
-                                newest = file.split('/boot/vmlinuz-')[1]
-                                log.info('Kernel Version: {version}'.format(version=newest)) 
-            else:
-                raise 'Something went wrong kernel file was installed but version is missing'
-        else:
-            remote.run(args=['rpm', '-q', 'kernel', '--last' ], stdout=output, stderr=err_mess )
-            newest=output.getvalue().split()[0].split('kernel-')[1]
-            log.info('Distro Kernel Version: {version}'.format(version=newest))
-        update_grub_rpm(remote, newest)
-        remote.run( args=['sudo', 'shutdown', '-r', 'now'], wait=False )
-        output.close()
-        err_mess.close()
-        return
-
-    if system_type == 'deb':
-        distribution = teuthology.get_system_type(remote, distro=True)
-        newversion = get_version_from_pkg(remote, distribution)
-        if 'ubuntu' in distribution:
-            grub2conf = teuthology.get_file(remote, '/boot/grub/grub.cfg', True)
-            submenu = ''
-            menuentry = ''
-            for line in grub2conf.split('\n'):
-                if 'submenu' in line:
-                    submenu = line.split('submenu ')[1]
-                    # Ubuntu likes to be sneaky and change formatting of
-                    # grub.cfg between quotes/doublequotes between versions
-                    if submenu.startswith("'"):
-                        submenu = submenu.split("'")[1]
-                    if submenu.startswith('"'):
-                        submenu = submenu.split('"')[1]
-                if 'menuentry' in line:
-                    if newversion in line and 'recovery' not in line:
-                        menuentry = line.split('\'')[1]
-                        break
-            if submenu:
-                grubvalue = submenu + '>' + menuentry
-            else:
-                grubvalue = menuentry
-            grubfile = 'cat <<EOF\nset default="' + grubvalue + '"\nEOF'
-            teuthology.delete_file(remote, '/etc/grub.d/01_ceph_kernel', sudo=True, force=True)
-            teuthology.sudo_write_file(remote, '/etc/grub.d/01_ceph_kernel', StringIO(grubfile), '755')
-            log.info('Distro Kernel Version: {version}'.format(version=newversion))
-            remote.run(args=['sudo', 'update-grub'])
-            remote.run(args=['sudo', 'shutdown', '-r', 'now'], wait=False )
-            return
-
-        if 'debian' in distribution:
-            grub2_kernel_select_generic(remote, newversion, 'deb')
-            log.info('Distro Kernel Version: {version}'.format(version=newversion))
-            remote.run( args=['sudo', 'shutdown', '-r', 'now'], wait=False )
-            return
-
-def update_grub_rpm(remote, newversion):
-    """
-    Updates grub file to boot new kernel version on both legacy grub/grub2.
-    """
-    grub='grub2'
-    # Check if grub2 is isntalled
-    try:
-        remote.run(args=['sudo', 'rpm', '-qi', 'grub2'])
-    except Exception:
-        grub = 'legacy'
-    log.info('Updating Grub Version: {grub}'.format(grub=grub))
-    if grub == 'legacy':
-        data = ''
-        #Write new legacy grub entry.
-        newgrub = generate_legacy_grub_entry(remote, newversion)
-        for line in newgrub:
-            data += line + '\n'
-        temp_file_path = teuthology.remote_mktemp(remote)
-        teuthology.sudo_write_file(remote, temp_file_path, StringIO(data), '755')
-        teuthology.move_file(remote, temp_file_path, '/boot/grub/grub.conf', True)
-    else:
-        #Update grub menu entry to new version.
-        grub2_kernel_select_generic(remote, newversion, 'rpm')
-
-def grub2_kernel_select_generic(remote, newversion, ostype):
-    """
-    Can be used on DEB and RPM. Sets which entry should be boted by entrynum.
-    """
-    if ostype == 'rpm':
-        grubset = 'grub2-set-default'
-        mkconfig = 'grub2-mkconfig'
-        grubconfig = '/boot/grub2/grub.cfg'
-    if ostype == 'deb':
-        grubset = 'grub-set-default'
-        grubconfig = '/boot/grub/grub.cfg'
-        mkconfig = 'grub-mkconfig'
-    remote.run(args=['sudo', mkconfig, '-o', grubconfig, ])
-    grub2conf = teuthology.get_file(remote, grubconfig, True)
-    entry_num = 0
-    for line in grub2conf.split('\n'):
-        if line.startswith('menuentry'):
-            if newversion in line:
-                break
-            entry_num =+ 1
-    remote.run(args=['sudo', grubset, str(entry_num), ])
-
-def generate_legacy_grub_entry(remote, newversion):
-    """
-    This will likely need to be used for ceph kernels as well
-    as legacy grub rpm distros don't have an easy way of selecting
-    a kernel just via a command. This generates an entry in legacy
-    grub for a new kernel version using the existing entry as a base.
-    """
-    grubconf = teuthology.get_file(remote, '/boot/grub/grub.conf', True)
-    titleline = ''
-    rootline = ''
-    kernelline = ''
-    initline = ''
-    kernelversion = ''
-    linenum = 0
-    titlelinenum = 0
-
-    #Grab first kernel entry (title/root/kernel/init lines)
-    for line in grubconf.split('\n'):
-        if re.match('^title', line):
-            titleline = line
-            titlelinenum = linenum
-        if re.match('(^\s+)root', line):
-            rootline = line
-        if re.match('(^\s+)kernel', line):
-            kernelline = line
-            for word in line.split(' '):
-                if 'vmlinuz' in word:
-                    kernelversion = word.split('vmlinuz-')[-1]
-        if re.match('(^\s+)initrd', line):
-            initline = line
-        if (kernelline != '') and (initline != ''):
-            break
-        else:
-            linenum += 1
-
-    #insert new entry into grubconfnew list:
-    linenum = 0
-    newgrubconf = []
-    for line in grubconf.split('\n'):
-        line = line.rstrip('\n')
-        if linenum == titlelinenum:
-            newtitle = re.sub(kernelversion, newversion, titleline)
-            newroot = re.sub(kernelversion, newversion, rootline)
-            newkernel = re.sub(kernelversion, newversion, kernelline)
-            newinit = re.sub(kernelversion, newversion, initline)
-            newgrubconf.append(newtitle)
-            newgrubconf.append(newroot)
-            newgrubconf.append(newkernel)
-            newgrubconf.append(newinit)
-            newgrubconf.append('')
-            newgrubconf.append(line)
-        else:
-            newgrubconf.append(line)
-        linenum += 1
-    return newgrubconf
-
-def get_version_from_pkg(remote, ostype):
-    """
-    Round-about way to get the newest kernel uname -r compliant version string
-    from the virtual package which is the newest kenel for debian/ubuntu.
-    """
-    output, err_mess = StringIO(), StringIO()
-    newest=''
-    #Depend of virtual package has uname -r output in package name. Grab that.
-    if 'debian' in ostype:
-        remote.run(args=['sudo', 'apt-get', '-y', 'install', 'linux-image-amd64' ], stdout=output, stderr=err_mess )
-        remote.run(args=['dpkg', '-s', 'linux-image-amd64' ], stdout=output, stderr=err_mess )
-        for line in output.getvalue().split('\n'):
-            if 'Depends:' in line:
-                newest = line.split('linux-image-')[1]
-                output.close()
-                err_mess.close()
-                return newest
-     #Ubuntu is a depend in a depend.
-    if 'ubuntu' in ostype:
-        try:
-            remote.run(args=['sudo', 'apt-get', '-y', 'install', 'linux-image-current-generic' ], stdout=output, stderr=err_mess )
-            remote.run(args=['dpkg', '-s', 'linux-image-current-generic' ], stdout=output, stderr=err_mess )
-            for line in output.getvalue().split('\n'):
-                if 'Depends:' in line:
-                    depends = line.split('Depends: ')[1]
-            remote.run(args=['dpkg', '-s', depends ], stdout=output, stderr=err_mess )
-        except run.CommandFailedError:
-            # Non precise ubuntu machines (like trusty) don't have
-            # linux-image-current-generic so use linux-image-generic instead.
-            remote.run(args=['sudo', 'apt-get', '-y', 'install', 'linux-image-generic' ], stdout=output, stderr=err_mess )
-            remote.run(args=['dpkg', '-s', 'linux-image-generic' ], stdout=output, stderr=err_mess )
-        for line in output.getvalue().split('\n'):
-            if 'Depends:' in line:
-                newest = line.split('linux-image-')[1]
-                if ',' in newest:
-                    newest = newest.split(',')[0]
-    output.close()
-    err_mess.close()
-    return newest
-
-def task(ctx, config):
-    """
-    Make sure the specified kernel is installed.
-    This can be a branch, tag, or sha1 of ceph-client.git.
-
-    To install the kernel from the master branch on all hosts::
-
-        kernel:
-        tasks:
-        - ceph:
-
-    To wait 5 minutes for hosts to reboot::
-
-        kernel:
-          timeout: 300
-        tasks:
-        - ceph:
-
-    To specify different kernels for each client::
-
-        kernel:
-          client.0:
-            branch: foo
-          client.1:
-            tag: v3.0rc1
-          client.2:
-            sha1: db3540522e955c1ebb391f4f5324dff4f20ecd09
-        tasks:
-        - ceph:
-
-    You can specify a branch, tag, or sha1 for all roles
-    of a certain type (more specific roles override this)::
-
-        kernel:
-          client:
-            tag: v3.0
-          osd:
-            branch: btrfs_fixes
-          client.1:
-            branch: more_specific_branch
-          osd.3:
-            branch: master
-
-    To enable kdb::
-
-        kernel:
-          kdb: true
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    assert config is None or isinstance(config, dict), \
-        "task kernel only supports a dictionary for configuration"
-
-    timeout = 300
-    if config is not None and 'timeout' in config:
-        timeout = config.pop('timeout')
-
-    config = normalize_config(ctx, config)
-    validate_config(ctx, config)
-    log.info('config %s' % config)
-
-    need_install = {}  # sha1 to dl, or path to deb
-    need_version = {}  # utsrelease or sha1
-    kdb = {}
-    for role, role_config in config.iteritems():
-        if role_config.get('deb'):
-            path = role_config.get('deb')
-            match = re.search('\d+-g(\w{7})', path)
-            if match:
-                sha1 = match.group(1)
-                log.info('kernel deb sha1 appears to be %s', sha1)
-                if need_to_install(ctx, role, sha1):
-                    need_install[role] = path
-                    need_version[role] = sha1
-            else:
-                log.info('unable to extract sha1 from deb path, forcing install')
-                assert False
-        elif role_config.get('sha1') == 'distro':
-            if need_to_install_distro(ctx, role):
-                need_install[role] = 'distro'
-                need_version[role] = 'distro'
-        else:
-            larch, ldist = _find_arch_and_dist(ctx)
-            sha1, base_url = teuthology.get_ceph_binary_url(
-                package='kernel',
-                branch=role_config.get('branch'),
-                tag=role_config.get('tag'),
-                sha1=role_config.get('sha1'),
-                flavor='basic',
-                format='deb',
-                dist=ldist,
-                arch=larch,
-                )
-            log.debug('sha1 for {role} is {sha1}'.format(role=role, sha1=sha1))
-            ctx.summary['{role}-kernel-sha1'.format(role=role)] = sha1
-
-            if need_to_install(ctx, role, sha1):
-                version = sha1
-                version_url = urlparse.urljoin(base_url, 'version')
-                try:
-                    version_fp = urllib2.urlopen(version_url)
-                    version = version_fp.read().rstrip('\n')
-                    version_fp.close()
-                except urllib2.HTTPError:
-                    log.debug('failed to get utsrelease string using url {url}'.format(
-                              url=version_url))
-
-                need_install[role] = sha1
-                need_version[role] = version
-
-        # enable or disable kdb if specified, otherwise do not touch
-        if role_config.get('kdb') is not None:
-            kdb[role] = role_config.get('kdb')
-
-    if need_install:
-        install_firmware(ctx, need_install)
-        download_kernel(ctx, need_install)
-        install_and_reboot(ctx, need_install)
-        wait_for_reboot(ctx, need_version, timeout)
-
-    enable_disable_kdb(ctx, kdb)
diff --git a/teuthology/task/knfsd.py b/teuthology/task/knfsd.py
deleted file mode 100644 (file)
index 55bb937..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-"""
-Export/Unexport a ``nfs server`` client.
-"""
-import contextlib
-import logging
-import os
-
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Export/Unexport a ``nfs server`` client.
-
-    The config is optional and defaults to exporting on all clients. If
-    a config is given, it is expected to be a list or dict of clients to do
-    this operation on. You must have specified ``ceph-fuse`` or
-    ``kclient`` on all clients specified for knfsd.
-
-    Example that exports all clients::
-
-        tasks:
-        - ceph:
-        - kclient:
-        - knfsd:
-        - interactive:
-
-    Example that uses both ``kclient` and ``ceph-fuse``::
-
-        tasks:
-        - ceph:
-        - ceph-fuse: [client.0]
-        - kclient: [client.1]
-        - knfsd: [client.0, client.1]
-        - interactive:
-
-    Example that specifies export options::
-
-        tasks:
-        - ceph:
-        - kclient: [client.0, client.1]
-        - knfsd:
-            client.0:
-              options: [rw,root_squash]
-            client.1:
-        - interactive:
-
-    Note that when options aren't specified, rw,no_root_squash is the default.
-    When you specify options, the defaults are as specified by exports(5).
-
-    So if empty options are specified, i.e. options: [] these are the defaults:
-        ro,sync,wdelay,hide,nocrossmnt,secure,root_squash,no_all_squash,
-        no_subtree_check,secure_locks,acl,anonuid=65534,anongid=65534
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    log.info('Exporting nfs server...')
-
-    if config is None:
-        config = dict(('client.{id}'.format(id=id_), None)
-                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client'))
-    elif isinstance(config, list):
-        config = dict((name, None) for name in config)
-
-    clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys()))
-
-    for id_, remote in clients:
-        mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_))
-        client_config = config.get("client.%s" % id_)
-        if client_config is None:
-            client_config = {}
-        log.debug("Client client.%s config is %s" % (id_, client_config))
-
-        if client_config.get('options') is not None:
-            opts = ','.join(client_config.get('options'))
-        else:
-            opts = 'rw,no_root_squash'
-
-        # Undocumented option to export to any client in case
-        # testing in interactive mode from other unspecified clients.
-        wildcard = False
-        if client_config.get('wildcard') is not None:
-            wildcard = True
-        
-        log.info('Exporting knfsd client.{id} at {remote} *:{mnt} ({opt})...'.format(
-                id=id_, remote=remote, mnt=mnt, opt=opts))
-
-        """
-        Should the user want to run with root_squash enabled, there is no
-        way to write anything to the initial ceph root dir which is set to
-        rwxr-xr-x root root.
-
-        This could possibly break test cases that make assumptions about
-        the initial state of the root dir.
-        """
-        remote.run(
-            args=[
-                'sudo',
-                'chmod',
-                "777",
-                '{MNT}'.format(MNT=mnt),
-                ],
-            )
-        args=[
-            'sudo',
-            "exportfs",
-            '-o',
-            'fsid=123{id},{opt}'.format(id=id_,opt=opts),
-            ]
-
-        if wildcard:
-            args += ['*:{MNT}'.format(MNT=mnt)]
-        else:
-            """
-            DEFAULT
-            Prevent bogus clients from old runs from access our 
-            export.  Specify all specify node addresses for this run.
-            """
-            ips = [host for (host, _) in (remote.ssh.get_transport().getpeername() for (remote, roles) in ctx.cluster.remotes.items())]
-            for ip in ips:
-                args += [ '{ip}:{MNT}'.format(ip=ip, MNT=mnt) ]
-
-        log.info('remote run {args}'.format(args=args))
-        remote.run( args=args )
-
-    try:
-        yield
-    finally:
-        log.info('Unexporting nfs server...')
-        for id_, remote in clients:
-            log.debug('Unexporting client client.{id}...'.format(id=id_))
-            mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_))
-            remote.run(
-                args=[
-                    'sudo',
-                    'exportfs',
-                    '-au',
-                    ],
-                )
diff --git a/teuthology/task/localdir.py b/teuthology/task/localdir.py
deleted file mode 100644 (file)
index 8a84514..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-Localdir
-"""
-import contextlib
-import logging
-import os
-
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Create a mount dir 'client' that is just the local disk:
-
-    Example that "mounts" all clients:
-
-        tasks:
-        - localdir:
-        - interactive:
-
-    Example for a specific client:
-
-        tasks:
-        - localdir: [client.2]
-        - interactive:
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    log.info('Creating local mnt dirs...')
-
-    testdir = teuthology.get_testdir(ctx)
-
-    if config is None:
-        config = list('client.{id}'.format(id=id_)
-                      for id_ in teuthology.all_roles_of_type(ctx.cluster,
-                                                              'client'))
-
-    clients = list(teuthology.get_clients(ctx=ctx, roles=config))
-    for id_, remote in clients:
-        mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-        log.info('Creating dir {remote} {mnt}...'.format(
-                remote=remote, mnt=mnt))
-        remote.run(
-            args=[
-                'mkdir',
-                '--',
-                mnt,
-                ],
-            )
-
-    try:
-        yield
-
-    finally:
-        log.info('Removing local mnt dirs...')
-        for id_, remote in clients:
-            mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-            remote.run(
-                args=[
-                    'rm',
-                    '-rf',
-                    '--',
-                    mnt,
-                    ],
-                )
diff --git a/teuthology/task/lockfile.py b/teuthology/task/lockfile.py
deleted file mode 100644 (file)
index 10ac1e8..0000000
+++ /dev/null
@@ -1,239 +0,0 @@
-"""
-Locking tests
-"""
-import logging
-import os
-
-from ..orchestra import run
-from teuthology import misc as teuthology
-import time
-import gevent
-
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    This task is designed to test locking. It runs an executable
-    for each lock attempt you specify, at 0.01 second intervals (to
-    preserve ordering of the locks).
-    You can also introduce longer intervals by setting an entry
-    as a number of seconds, rather than the lock dictionary.
-    The config is a list of dictionaries. For each entry in the list, you
-    must name the "client" to run on, the "file" to lock, and
-    the "holdtime" to hold the lock.
-    Optional entries are the "offset" and "length" of the lock. You can also specify a
-    "maxwait" timeout period which fails if the executable takes longer
-    to complete, and an "expectfail".
-    An example:
-    tasks:
-    - ceph:
-    - ceph-fuse: [client.0, client.1]
-    - lockfile:
-      [{client:client.0, file:testfile, holdtime:10},
-      {client:client.1, file:testfile, holdtime:0, maxwait:0, expectfail:true},
-      {client:client.1, file:testfile, holdtime:0, maxwait:15, expectfail:false},
-      10,
-      {client: client.1, lockfile: testfile, holdtime: 5},
-      {client: client.2, lockfile: testfile, holdtime: 5, maxwait: 1, expectfail: True}]
-
-      
-    In the past this test would have failed; there was a bug where waitlocks weren't
-    cleaned up if the process failed. More involved scenarios are also possible.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    log.info('Starting lockfile')
-    try:
-        assert isinstance(config, list), \
-            "task lockfile got invalid config"
-       
-        log.info("building executable on each host")
-        buildprocs = list()
-        # build the locker executable on each client
-        clients = list()
-        files = list()
-        for op in config:
-            if not isinstance(op, dict):
-                continue
-            log.info("got an op")
-            log.info("op['client'] = %s", op['client'])
-            clients.append(op['client'])
-            files.append(op['lockfile'])
-            if not "expectfail" in op:
-                op["expectfail"] = False
-            badconfig = False
-            if not "client" in op:
-                badconfig = True
-            if not "lockfile" in op:
-                badconfig = True
-            if not "holdtime" in op:
-                badconfig = True
-            if badconfig:
-                raise KeyError("bad config {op_}".format(op_=op))
-        
-        testdir = teuthology.get_testdir(ctx)
-        clients = set(clients)
-        files = set(files)
-        lock_procs = list()
-        for client in clients:
-            (client_remote,) = ctx.cluster.only(client).remotes.iterkeys()
-            log.info("got a client remote")
-            (_, _, client_id) = client.partition('.')
-            filepath = os.path.join(testdir, 'mnt.{id}'.format(id=client_id), op["lockfile"])
-            
-            proc = client_remote.run(
-                args=[
-                    'mkdir', '-p', '{tdir}/archive/lockfile'.format(tdir=testdir),
-                    run.Raw('&&'),
-                    'mkdir', '-p', '{tdir}/lockfile'.format(tdir=testdir),
-                    run.Raw('&&'),
-                    'wget',
-                    '-nv',
-                    '--no-check-certificate',
-                    'https://raw.github.com/gregsfortytwo/FileLocker/master/sclockandhold.cpp',
-                    '-O', '{tdir}/lockfile/sclockandhold.cpp'.format(tdir=testdir),
-                    run.Raw('&&'),
-                    'g++', '{tdir}/lockfile/sclockandhold.cpp'.format(tdir=testdir),
-                    '-o', '{tdir}/lockfile/sclockandhold'.format(tdir=testdir)
-                    ],
-                logger=log.getChild('lockfile_client.{id}'.format(id=client_id)),
-                wait=False
-                )      
-            log.info('building sclockandhold on client{id}'.format(id=client_id))
-            buildprocs.append(proc)
-            
-        # wait for builds to finish
-        run.wait(buildprocs)
-        log.info('finished building sclockandhold on all clients')
-            
-        # create the files to run these locks on
-        client = clients.pop()
-        clients.add(client)
-        (client_remote,) = ctx.cluster.only(client).remotes.iterkeys()
-        (_, _, client_id) = client.partition('.')
-        file_procs = list()
-        for lockfile in files:
-            filepath = os.path.join(testdir, 'mnt.{id}'.format(id=client_id), lockfile)
-            proc = client_remote.run(
-                args=[
-                    'sudo',
-                    'touch',
-                    filepath,
-                    ],
-                logger=log.getChild('lockfile_createfile'),
-                wait=False
-                )
-            file_procs.append(proc)
-        run.wait(file_procs)
-        file_procs = list()
-        for lockfile in files:
-            filepath = os.path.join(testdir, 'mnt.{id}'.format(id=client_id), lockfile)
-            proc = client_remote.run(
-                args=[
-                    'sudo', 'chown', 'ubuntu.ubuntu', filepath
-                    ],
-                logger=log.getChild('lockfile_createfile'),
-                wait=False
-                )
-            file_procs.append(proc)
-        run.wait(file_procs)
-        log.debug('created files to lock')
-
-        # now actually run the locktests
-        for op in config:
-            if not isinstance(op, dict):
-                assert isinstance(op, int) or isinstance(op, float)
-                log.info("sleeping for {sleep} seconds".format(sleep=op))
-                time.sleep(op)
-                continue
-            greenlet = gevent.spawn(lock_one, op, ctx)
-            lock_procs.append((greenlet, op))
-            time.sleep(0.1) # to provide proper ordering
-        #for op in config
-        
-        for (greenlet, op) in lock_procs:
-            log.debug('checking lock for op {op_}'.format(op_=op))
-            result = greenlet.get()
-            if not result:
-                raise Exception("Got wrong result for op {op_}".format(op_=op))
-        # for (greenlet, op) in lock_procs
-
-    finally:
-        #cleanup!
-        if lock_procs:
-            for (greenlet, op) in lock_procs:
-                log.debug('closing proc for op {op_}'.format(op_=op))
-                greenlet.kill(block=True)
-
-        for client in clients:
-            (client_remote,)  = ctx.cluster.only(client).remotes.iterkeys()
-            (_, _, client_id) = client.partition('.')
-            filepath = os.path.join(testdir, 'mnt.{id}'.format(id=client_id), op["lockfile"])
-            proc = client_remote.run(
-                args=[
-                    'rm', '-rf', '{tdir}/lockfile'.format(tdir=testdir),
-                    run.Raw(';'),
-                    'sudo', 'rm', '-rf', filepath
-                    ],
-                wait=True
-                ) #proc
-    #done!
-# task
-
-def lock_one(op, ctx):
-    """
-    Perform the individual lock
-    """
-    log.debug('spinning up locker with op={op_}'.format(op_=op))
-    timeout = None
-    proc = None
-    result = None
-    (client_remote,)  = ctx.cluster.only(op['client']).remotes.iterkeys()
-    (_, _, client_id) = op['client'].partition('.')
-    testdir = teuthology.get_testdir(ctx)
-    filepath = os.path.join(testdir, 'mnt.{id}'.format(id=client_id), op["lockfile"])
-
-    if "maxwait" in op:
-        timeout = gevent.Timeout(seconds=float(op["maxwait"]))
-        timeout.start()
-    try:
-        proc = client_remote.run(
-            args=[
-                'adjust-ulimits',
-                'ceph-coverage',
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                'daemon-helper',
-                'kill',
-                '{tdir}/lockfile/sclockandhold'.format(tdir=testdir),
-                filepath,
-                '{holdtime}'.format(holdtime=op["holdtime"]),
-                '{offset}'.format(offset=op.get("offset", '0')),
-                '{length}'.format(length=op.get("length", '1')),
-                ],
-            logger=log.getChild('lockfile_client.{id}'.format(id=client_id)),
-            wait=False,
-            stdin=run.PIPE,
-            check_status=False
-            )
-        result = proc.exitstatus.get()
-    except gevent.Timeout as tout:
-        if tout is not timeout:
-            raise
-        if bool(op["expectfail"]):
-            result = 1
-        if result is 1:
-            if bool(op["expectfail"]):
-                log.info("failed as expected for op {op_}".format(op_=op))
-            else:
-                raise Exception("Unexpectedly failed to lock {op_} within given timeout!".format(op_=op))
-    finally: #clean up proc
-        if timeout is not None:
-            timeout.cancel()
-        if proc is not None:
-            proc.stdin.close()
-
-    ret = (result == 0 and not bool(op["expectfail"])) or (result == 1 and bool(op["expectfail"]))
-
-    return ret  #we made it through
diff --git a/teuthology/task/locktest.py b/teuthology/task/locktest.py
deleted file mode 100755 (executable)
index 7832369..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-"""
-locktests
-"""
-import logging
-
-from ..orchestra import run
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Run locktests, from the xfstests suite, on the given
-    clients. Whether the clients are ceph-fuse or kernel does not
-    matter, and the two clients can refer to the same mount.
-
-    The config is a list of two clients to run the locktest on. The
-    first client will be the host.
-
-    For example:
-       tasks:
-       - ceph:
-       - ceph-fuse: [client.0, client.1]
-       - locktest:
-           [client.0, client.1]
-
-    This task does not yield; there would be little point.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-
-    assert isinstance(config, list)
-    log.info('fetching and building locktests...')
-    (host,) = ctx.cluster.only(config[0]).remotes
-    (client,) = ctx.cluster.only(config[1]).remotes
-    ( _, _, host_id) = config[0].partition('.')
-    ( _, _, client_id) = config[1].partition('.')
-    testdir = teuthology.get_testdir(ctx)
-    hostmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=host_id)
-    clientmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=client_id)
-
-    try:
-        for client_name in config:
-            log.info('building on {client_}'.format(client_=client_name))
-            ctx.cluster.only(client_name).run(
-                args=[
-                    # explicitly does not support multiple autotest tasks
-                    # in a single run; the result archival would conflict
-                    'mkdir', '{tdir}/archive/locktest'.format(tdir=testdir),
-                    run.Raw('&&'),
-                    'mkdir', '{tdir}/locktest'.format(tdir=testdir),
-                    run.Raw('&&'),
-                    'wget',
-                    '-nv',
-                    'https://raw.github.com/gregsfortytwo/xfstests-ceph/master/src/locktest.c',
-                    '-O', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
-                    run.Raw('&&'),
-                    'g++', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
-                    '-o', '{tdir}/locktest/locktest'.format(tdir=testdir)
-                    ],
-                logger=log.getChild('locktest_client.{id}'.format(id=client_name)),
-                )
-
-        log.info('built locktest on each client')
-        
-        host.run(args=['sudo', 'touch',
-                       '{mnt}/locktestfile'.format(mnt=hostmnt),
-                       run.Raw('&&'),
-                       'sudo', 'chown', 'ubuntu.ubuntu',
-                       '{mnt}/locktestfile'.format(mnt=hostmnt)
-                       ]
-                 )
-
-        log.info('starting on host')
-        hostproc = host.run(
-            args=[
-                '{tdir}/locktest/locktest'.format(tdir=testdir),
-                '-p', '6788',
-                '-d',
-                '{mnt}/locktestfile'.format(mnt=hostmnt),
-                ],
-            wait=False,
-            logger=log.getChild('locktest.host'),
-            )
-        log.info('starting on client')
-        (_,_,hostaddr) = host.name.partition('@')
-        clientproc = client.run(
-            args=[
-                '{tdir}/locktest/locktest'.format(tdir=testdir),
-                '-p', '6788',
-                '-d',
-                '-h', hostaddr,
-                '{mnt}/locktestfile'.format(mnt=clientmnt),
-                ],
-            logger=log.getChild('locktest.client'),
-            wait=False
-            )
-        
-        hostresult = hostproc.exitstatus.get()
-        clientresult = clientproc.exitstatus.get()
-        if (hostresult != 0) or (clientresult != 0):
-            raise Exception("Did not pass locking test!")
-        log.info('finished locktest executable with results {r} and {s}'. \
-                     format(r=hostresult, s=clientresult))
-
-    finally:
-        log.info('cleaning up host dir')
-        host.run(
-            args=[
-                'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir),
-                run.Raw('&&'),
-                'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
-                run.Raw('&&'),
-                'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir),
-                run.Raw('&&'),
-                'rmdir', '{tdir}/locktest'
-                ],
-            logger=log.getChild('.{id}'.format(id=config[0])),
-            )
-        log.info('cleaning up client dir')
-        client.run(
-            args=[
-                'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir),
-                run.Raw('&&'),
-                'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
-                run.Raw('&&'),
-                'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir),
-                run.Raw('&&'),
-                'rmdir', '{tdir}/locktest'.format(tdir=testdir)
-                ],
-            logger=log.getChild('.{id}'.format(\
-                    id=config[1])),
-            )
diff --git a/teuthology/task/lost_unfound.py b/teuthology/task/lost_unfound.py
deleted file mode 100644 (file)
index 700a300..0000000
+++ /dev/null
@@ -1,153 +0,0 @@
-"""
-Lost_unfound
-"""
-import logging
-import ceph_manager
-from teuthology import misc as teuthology
-from teuthology.task_util.rados import rados
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Test handling of lost objects.
-
-    A pretty rigid cluseter is brought up andtested by this task
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'lost_unfound task only accepts a dict for configuration'
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    while len(manager.get_osd_status()['up']) < 3:
-        manager.sleep(10)
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_for_clean()
-
-    # something that is always there
-    dummyfile = '/etc/fstab'
-
-    # take an osd out until the very end
-    manager.kill_osd(2)
-    manager.mark_down_osd(2)
-    manager.mark_out_osd(2)
-
-    # kludge to make sure they get a map
-    rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.wait_for_recovery()
-
-    # create old objects
-    for f in range(1, 10):
-        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
-        rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])
-        rados(ctx, mon, ['-p', 'data', 'rm', 'existed_%d' % f])
-
-    # delay recovery, and make the pg log very long (to prevent backfill)
-    manager.raw_cluster_cmd(
-            'tell', 'osd.1',
-            'injectargs',
-            '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
-            )
-
-    manager.kill_osd(0)
-    manager.mark_down_osd(0)
-    
-    for f in range(1, 10):
-        rados(ctx, mon, ['-p', 'data', 'put', 'new_%d' % f, dummyfile])
-        rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])
-        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
-
-    # bring osd.0 back up, let it peer, but don't replicate the new
-    # objects...
-    log.info('osd.0 command_args is %s' % 'foo')
-    log.info(ctx.daemons.get_daemon('osd', 0).command_args)
-    ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([
-            '--osd-recovery-delay-start', '1000'
-            ])
-    manager.revive_osd(0)
-    manager.mark_in_osd(0)
-    manager.wait_till_osd_is_up(0)
-
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.wait_till_active()
-
-    # take out osd.1 and the only copy of those objects.
-    manager.kill_osd(1)
-    manager.mark_down_osd(1)
-    manager.mark_out_osd(1)
-    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
-
-    # bring up osd.2 so that things would otherwise, in theory, recovery fully
-    manager.revive_osd(2)
-    manager.mark_in_osd(2)
-    manager.wait_till_osd_is_up(2)
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_till_active()
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-
-    # verify that there are unfound objects
-    unfound = manager.get_num_unfound_objects()
-    log.info("there are %d unfound objects" % unfound)
-    assert unfound
-
-    # mark stuff lost
-    pgs = manager.get_pg_stats()
-    for pg in pgs:
-        if pg['stat_sum']['num_objects_unfound'] > 0:
-            primary = 'osd.%d' % pg['acting'][0]
-
-            # verify that i can list them direct from the osd
-            log.info('listing missing/lost in %s state %s', pg['pgid'],
-                     pg['state']);
-            m = manager.list_pg_missing(pg['pgid'])
-            #log.info('%s' % m)
-            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
-            num_unfound=0
-            for o in m['objects']:
-                if len(o['locations']) == 0:
-                    num_unfound += 1
-            assert m['num_unfound'] == num_unfound
-
-            log.info("reverting unfound in %s on %s", pg['pgid'], primary)
-            manager.raw_cluster_cmd('pg', pg['pgid'],
-                                    'mark_unfound_lost', 'revert')
-        else:
-            log.info("no unfound in %s", pg['pgid'])
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_for_recovery()
-
-    # verify result
-    for f in range(1, 10):
-        err = rados(ctx, mon, ['-p', 'data', 'get', 'new_%d' % f, '-'])
-        assert err
-        err = rados(ctx, mon, ['-p', 'data', 'get', 'existed_%d' % f, '-'])
-        assert err
-        err = rados(ctx, mon, ['-p', 'data', 'get', 'existing_%d' % f, '-'])
-        assert not err
-
-    # see if osd.1 can cope
-    manager.revive_osd(1)
-    manager.mark_in_osd(1)
-    manager.wait_till_osd_is_up(1)
-    manager.wait_for_clean()
diff --git a/teuthology/task/manypools.py b/teuthology/task/manypools.py
deleted file mode 100644 (file)
index 32b9d56..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-"""
-Force pg creation on all osds
-"""
-from teuthology import misc as teuthology
-from ..orchestra import run
-import logging
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Create the specified number of pools and write 16 objects to them (thereby forcing
-    the PG creation on each OSD). This task creates pools from all the clients,
-    in parallel. It is easy to add other daemon types which have the appropriate
-    permissions, but I don't think anything else does.
-    The config is just the number of pools to create. I recommend setting
-    "mon create pg interval" to a very low value in your ceph config to speed
-    this up.
-    
-    You probably want to do this to look at memory consumption, and
-    maybe to test how performance changes with the number of PGs. For example:
-    
-    tasks:
-    - ceph:
-        config:
-          mon:
-            mon create pg interval: 1
-    - manypools: 3000
-    - radosbench:
-        clients: [client.0]
-        time: 360
-    """
-    
-    log.info('creating {n} pools'.format(n=config))
-    
-    poolnum = int(config)
-    creator_remotes = []
-    client_roles = teuthology.all_roles_of_type(ctx.cluster, 'client')
-    log.info('got client_roles={client_roles_}'.format(client_roles_=client_roles))
-    for role in client_roles:
-        log.info('role={role_}'.format(role_=role))
-        (creator_remote, ) = ctx.cluster.only('client.{id}'.format(id=role)).remotes.iterkeys()
-        creator_remotes.append((creator_remote, 'client.{id}'.format(id=role)))
-
-    remaining_pools = poolnum
-    poolprocs=dict()
-    while (remaining_pools > 0):
-        log.info('{n} pools remaining to create'.format(n=remaining_pools))
-       for remote, role_ in creator_remotes:
-            poolnum = remaining_pools
-            remaining_pools -= 1
-            if remaining_pools < 0:
-                continue
-            log.info('creating pool{num} on {role}'.format(num=poolnum, role=role_))
-           proc = remote.run(
-               args=[
-                   'rados',
-                   '--name', role_,
-                   'mkpool', 'pool{num}'.format(num=poolnum), '-1',
-                   run.Raw('&&'),
-                   'rados',
-                   '--name', role_,
-                   '--pool', 'pool{num}'.format(num=poolnum),
-                   'bench', '0', 'write', '-t', '16', '--block-size', '1'
-                   ],
-               wait = False
-           )
-            log.info('waiting for pool and object creates')
-           poolprocs[remote] = proc
-        
-        run.wait(poolprocs.itervalues())
-    
-    log.info('created all {n} pools and wrote 16 objects to each'.format(n=poolnum))
diff --git a/teuthology/task/mds_creation_failure.py b/teuthology/task/mds_creation_failure.py
deleted file mode 100644 (file)
index a3d052f..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-
-import logging
-import contextlib
-import time
-import ceph_manager
-from teuthology import misc
-from teuthology.orchestra.run import CommandFailedError, Raw
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Go through filesystem creation with a synthetic failure in an MDS
-    in its 'up:creating' state, to exercise the retry behaviour.
-    """
-    # Grab handles to the teuthology objects of interest
-    mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
-    if len(mdslist) != 1:
-        # Require exactly one MDS, the code path for creation failure when
-        # a standby is available is different
-        raise RuntimeError("This task requires exactly one MDS")
-
-    mds_id = mdslist[0]
-    (mds_remote,) = ctx.cluster.only('mds.{_id}'.format(_id=mds_id)).remotes.iterkeys()
-    manager = ceph_manager.CephManager(
-        mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'),
-    )
-
-    # Stop the MDS and reset the filesystem so that next start will go into CREATING
-    mds = ctx.daemons.get_daemon('mds', mds_id)
-    mds.stop()
-    data_pool_id = manager.get_pool_num("data")
-    md_pool_id = manager.get_pool_num("metadata")
-    manager.raw_cluster_cmd_result('mds', 'newfs', md_pool_id.__str__(), data_pool_id.__str__(),
-                                   '--yes-i-really-mean-it')
-
-    # Start the MDS with mds_kill_create_at set, it will crash during creation
-    mds.restart_with_args(["--mds_kill_create_at=1"])
-    try:
-        mds.wait_for_exit()
-    except CommandFailedError as e:
-        if e.exitstatus == 1:
-            log.info("MDS creation killed as expected")
-        else:
-            log.error("Unexpected status code %s" % e.exitstatus)
-            raise
-
-    # Since I have intentionally caused a crash, I will clean up the resulting core
-    # file to avoid task.internal.coredump seeing it as a failure.
-    log.info("Removing core file from synthetic MDS failure")
-    mds_remote.run(args=['rm', '-f', Raw("{archive}/coredump/*.core".format(archive=misc.get_archive_dir(ctx)))])
-
-    # It should have left the MDS map state still in CREATING
-    status = manager.get_mds_status(mds_id)
-    assert status['state'] == 'up:creating'
-
-    # Start the MDS again without the kill flag set, it should proceed with creation successfully
-    mds.restart()
-
-    # Wait for state ACTIVE
-    t = 0
-    create_timeout = 120
-    while True:
-        status = manager.get_mds_status(mds_id)
-        if status['state'] == 'up:active':
-            log.info("MDS creation completed successfully")
-            break
-        elif status['state'] == 'up:creating':
-            log.info("MDS still in creating state")
-            if t > create_timeout:
-                log.error("Creating did not complete within %ss" % create_timeout)
-                raise RuntimeError("Creating did not complete within %ss" % create_timeout)
-            t += 1
-            time.sleep(1)
-        else:
-            log.error("Unexpected MDS state: %s" % status['state'])
-            assert(status['state'] in ['up:active', 'up:creating'])
-
-    # The system should be back up in a happy healthy state, go ahead and run any further tasks
-    # inside this context.
-    yield
diff --git a/teuthology/task/mds_thrash.py b/teuthology/task/mds_thrash.py
deleted file mode 100644 (file)
index c60b741..0000000
+++ /dev/null
@@ -1,352 +0,0 @@
-"""
-Thrash mds by simulating failures
-"""
-import logging
-import contextlib
-import ceph_manager
-import random
-import time
-from gevent.greenlet import Greenlet
-from gevent.event import Event
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-
-class MDSThrasher(Greenlet):
-    """
-    MDSThrasher::
-
-    The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc).
-
-    The config is optional.  Many of the config parameters are a a maximum value
-    to use when selecting a random value from a range.  To always use the maximum
-    value, set no_random to true.  The config is a dict containing some or all of:
-
-    seed: [no default] seed the random number generator
-
-    randomize: [default: true] enables randomization and use the max/min values
-
-    max_thrash: [default: 1] the maximum number of MDSs that will be thrashed at
-      any given time.
-
-    max_thrash_delay: [default: 30] maximum number of seconds to delay before
-      thrashing again.
-
-    max_revive_delay: [default: 10] maximum number of seconds to delay before
-      bringing back a thrashed MDS
-
-    thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed
-      during replay.  Value should be between 0.0 and 1.0
-
-    max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in
-      the replay state before thrashing
-
-    thrash_weights: allows specific MDSs to be thrashed more/less frequently.  This option
-      overrides anything specified by max_thrash.  This option is a dict containing
-      mds.x: weight pairs.  For example, [mds.a: 0.7, mds.b: 0.3, mds.c: 0.0].  Each weight
-      is a value from 0.0 to 1.0.  Any MDSs not specified will be automatically
-      given a weight of 0.0.  For a given MDS, by default the trasher delays for up
-      to max_thrash_delay, trashes, waits for the MDS to recover, and iterates.  If a non-zero
-      weight is specified for an MDS, for each iteration the thrasher chooses whether to thrash
-      during that iteration based on a random value [0-1] not exceeding the weight of that MDS.
-
-    Examples::
-
-
-      The following example sets the likelihood that mds.a will be thrashed
-      to 80%, mds.b to 20%, and other MDSs will not be thrashed.  It also sets the
-      likelihood that an MDS will be thrashed in replay to 40%.
-      Thrash weights do not have to sum to 1.
-
-      tasks:
-      - ceph:
-      - mds_thrash:
-          thrash_weights:
-            - mds.a: 0.8
-            - mds.b: 0.2
-          thrash_in_replay: 0.4
-      - ceph-fuse:
-      - workunit:
-          clients:
-            all: [suites/fsx.sh]
-
-      The following example disables randomization, and uses the max delay values:
-
-      tasks:
-      - ceph:
-      - mds_thrash:
-          max_thrash_delay: 10
-          max_revive_delay: 1
-          max_replay_thrash_delay: 4
-
-    """
-
-    def __init__(self, ctx, manager, config, logger, failure_group, weight):
-        super(MDSThrasher, self).__init__()
-
-        self.ctx = ctx
-        self.manager = manager
-        assert self.manager.is_clean()
-
-        self.stopping = Event()
-        self.logger = logger
-        self.config = config
-
-        self.randomize = bool(self.config.get('randomize', True))
-        self.max_thrash_delay = float(self.config.get('thrash_delay', 30.0))
-        self.thrash_in_replay = float(self.config.get('thrash_in_replay', False))
-        assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format(
-            v=self.thrash_in_replay)
-
-        self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0))
-
-        self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0))
-
-        self.failure_group = failure_group
-        self.weight = weight
-
-    def _run(self):
-        try:
-            self.do_thrash()
-        except:
-            # Log exceptions here so we get the full backtrace (it's lost
-            # by the time someone does a .get() on this greenlet)
-            self.logger.exception("Exception in do_thrash:")
-            raise
-
-    def log(self, x):
-        """Write data to logger assigned to this MDThrasher"""
-        self.logger.info(x)
-
-    def stop(self):
-        self.stopping.set()
-
-    def do_thrash(self):
-        """
-        Perform the random thrashing action
-        """
-        self.log('starting mds_do_thrash for failure group: ' + ', '.join(
-            ['mds.{_id}'.format(_id=_f) for _f in self.failure_group]))
-        while not self.stopping.is_set():
-            delay = self.max_thrash_delay
-            if self.randomize:
-                delay = random.randrange(0.0, self.max_thrash_delay)
-
-            if delay > 0.0:
-                self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
-                self.stopping.wait(delay)
-                if self.stopping.is_set():
-                    continue
-
-            skip = random.randrange(0.0, 1.0)
-            if self.weight < 1.0 and skip > self.weight:
-                self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip,
-                                                                                                   weight=self.weight))
-                continue
-
-            # find the active mds in the failure group
-            statuses = [self.manager.get_mds_status(m) for m in self.failure_group]
-            actives = filter(lambda s: s and s['state'] == 'up:active', statuses)
-            assert len(actives) == 1, 'Can only have one active in a failure group'
-
-            active_mds = actives[0]['name']
-            active_rank = actives[0]['rank']
-
-            self.log('kill mds.{id} (rank={r})'.format(id=active_mds, r=active_rank))
-            self.manager.kill_mds_by_rank(active_rank)
-
-            # wait for mon to report killed mds as crashed
-            last_laggy_since = None
-            itercount = 0
-            while True:
-                failed = self.manager.get_mds_status_all()['failed']
-                status = self.manager.get_mds_status(active_mds)
-                if not status:
-                    break
-                if 'laggy_since' in status:
-                    last_laggy_since = status['laggy_since']
-                    break
-                if any([(f == active_mds) for f in failed]):
-                    break
-                self.log(
-                    'waiting till mds map indicates mds.{_id} is laggy/crashed, in failed state, or mds.{_id} is removed from mdsmap'.format(
-                        _id=active_mds))
-                itercount = itercount + 1
-                if itercount > 10:
-                    self.log('mds map: {status}'.format(status=self.manager.get_mds_status_all()))
-                time.sleep(2)
-            if last_laggy_since:
-                self.log(
-                    'mds.{_id} reported laggy/crashed since: {since}'.format(_id=active_mds, since=last_laggy_since))
-            else:
-                self.log('mds.{_id} down, removed from mdsmap'.format(_id=active_mds, since=last_laggy_since))
-
-            # wait for a standby mds to takeover and become active
-            takeover_mds = None
-            takeover_rank = None
-            itercount = 0
-            while True:
-                statuses = [self.manager.get_mds_status(m) for m in self.failure_group]
-                actives = filter(lambda s: s and s['state'] == 'up:active', statuses)
-                if len(actives) > 0:
-                    assert len(actives) == 1, 'Can only have one active in failure group'
-                    takeover_mds = actives[0]['name']
-                    takeover_rank = actives[0]['rank']
-                    break
-                itercount = itercount + 1
-                if itercount > 10:
-                    self.log('mds map: {status}'.format(status=self.manager.get_mds_status_all()))
-
-            self.log('New active mds is mds.{_id}'.format(_id=takeover_mds))
-
-            # wait for a while before restarting old active to become new
-            # standby
-            delay = self.max_revive_delay
-            if self.randomize:
-                delay = random.randrange(0.0, self.max_revive_delay)
-
-            self.log('waiting for {delay} secs before reviving mds.{id}'.format(
-                delay=delay, id=active_mds))
-            time.sleep(delay)
-
-            self.log('reviving mds.{id}'.format(id=active_mds))
-            self.manager.revive_mds(active_mds, standby_for_rank=takeover_rank)
-
-            status = {}
-            while True:
-                status = self.manager.get_mds_status(active_mds)
-                if status and (status['state'] == 'up:standby' or status['state'] == 'up:standby-replay'):
-                    break
-                self.log(
-                    'waiting till mds map indicates mds.{_id} is in standby or standby-replay'.format(_id=active_mds))
-                time.sleep(2)
-            self.log('mds.{_id} reported in {state} state'.format(_id=active_mds, state=status['state']))
-
-            # don't do replay thrashing right now
-            continue
-            # this might race with replay -> active transition...
-            if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay:
-
-                delay = self.max_replay_thrash_delay
-                if self.randomize:
-                    delay = random.randrange(0.0, self.max_replay_thrash_delay)
-                time.sleep(delay)
-                self.log('kill replaying mds.{id}'.format(id=self.to_kill))
-                self.manager.kill_mds(self.to_kill)
-
-                delay = self.max_revive_delay
-                if self.randomize:
-                    delay = random.randrange(0.0, self.max_revive_delay)
-
-                self.log('waiting for {delay} secs before reviving mds.{id}'.format(
-                    delay=delay, id=self.to_kill))
-                time.sleep(delay)
-
-                self.log('revive mds.{id}'.format(id=self.to_kill))
-                self.manager.revive_mds(self.to_kill)
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Stress test the mds by thrashing while another task/workunit
-    is running.
-
-    Please refer to MDSThrasher class for further information on the
-    available options.
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'mds_thrash task only accepts a dict for configuration'
-    mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))
-    assert len(mdslist) > 1, \
-        'mds_thrash task requires at least 2 metadata servers'
-
-    # choose random seed
-    seed = None
-    if 'seed' in config:
-        seed = int(config['seed'])
-    else:
-        seed = int(time.time())
-    log.info('mds thrasher using random seed: {seed}'.format(seed=seed))
-    random.seed(seed)
-
-    max_thrashers = config.get('max_thrash', 1)
-    thrashers = {}
-
-    (first,) = ctx.cluster.only('mds.{_id}'.format(_id=mdslist[0])).remotes.iterkeys()
-    manager = ceph_manager.CephManager(
-        first, ctx=ctx, logger=log.getChild('ceph_manager'),
-    )
-
-    # make sure everyone is in active, standby, or standby-replay
-    log.info('Wait for all MDSs to reach steady state...')
-    statuses = None
-    statuses_by_rank = None
-    while True:
-        statuses = {m: manager.get_mds_status(m) for m in mdslist}
-        statuses_by_rank = {}
-        for _, s in statuses.iteritems():
-            if isinstance(s, dict):
-                statuses_by_rank[s['rank']] = s
-
-        ready = filter(lambda (_, s): s is not None and (s['state'] == 'up:active'
-                                                         or s['state'] == 'up:standby'
-                                                         or s['state'] == 'up:standby-replay'),
-                       statuses.items())
-        if len(ready) == len(statuses):
-            break
-        time.sleep(2)
-    log.info('Ready to start thrashing')
-
-    # setup failure groups
-    failure_groups = {}
-    actives = {s['name']: s for (_, s) in statuses.iteritems() if s['state'] == 'up:active'}
-    log.info('Actives is: {d}'.format(d=actives))
-    log.info('Statuses is: {d}'.format(d=statuses_by_rank))
-    for active in actives:
-        for (r, s) in statuses.iteritems():
-            if s['standby_for_name'] == active:
-                if not active in failure_groups:
-                    failure_groups[active] = []
-                log.info('Assigning mds rank {r} to failure group {g}'.format(r=r, g=active))
-                failure_groups[active].append(r)
-
-    manager.wait_for_clean()
-    for (active, standbys) in failure_groups.iteritems():
-        weight = 1.0
-        if 'thrash_weights' in config:
-            weight = int(config['thrash_weights'].get('mds.{_id}'.format(_id=active), '0.0'))
-
-        failure_group = [active]
-        failure_group.extend(standbys)
-
-        thrasher = MDSThrasher(
-            ctx, manager, config,
-            logger=log.getChild('mds_thrasher.failure_group.[{a}, {sbs}]'.format(
-                a=active,
-                sbs=', '.join(standbys)
-            )
-            ),
-            failure_group=failure_group,
-            weight=weight)
-        thrasher.start()
-        thrashers[active] = thrasher
-
-        # if thrash_weights isn't specified and we've reached max_thrash,
-        # we're done
-        if not 'thrash_weights' in config and len(thrashers) == max_thrashers:
-            break
-
-    try:
-        log.debug('Yielding')
-        yield
-    finally:
-        log.info('joining mds_thrashers')
-        for t in thrashers:
-            log.info('join thrasher for failure group [{fg}]'.format(fg=', '.join(failure_group)))
-            thrashers[t].stop()
-            thrashers[t].join()
-        log.info('done joining')
diff --git a/teuthology/task/metadata.yaml b/teuthology/task/metadata.yaml
deleted file mode 100644 (file)
index ccdc3b0..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-instance-id: test
-local-hostname: test
diff --git a/teuthology/task/mon_clock_skew_check.py b/teuthology/task/mon_clock_skew_check.py
deleted file mode 100644 (file)
index 891e6ec..0000000
+++ /dev/null
@@ -1,261 +0,0 @@
-"""
-Handle clock skews in monitors.
-"""
-import logging
-import contextlib
-import ceph_manager
-import time
-import gevent
-from StringIO import StringIO
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-class ClockSkewCheck:
-    """
-    Periodically check if there are any clock skews among the monitors in the
-    quorum. By default, assume no skews are supposed to exist; that can be
-    changed using the 'expect-skew' option. If 'fail-on-skew' is set to false,
-    then we will always succeed and only report skews if any are found.
-
-    This class does not spawn a thread. It assumes that, if that is indeed
-    wanted, it should be done by a third party (for instance, the task using
-    this class). We intend it as such in order to reuse this class if need be.
-
-    This task accepts the following options:
-
-    interval     amount of seconds to wait in-between checks. (default: 30.0)
-    max-skew     maximum skew, in seconds, that is considered tolerable before
-                 issuing a warning. (default: 0.05)
-    expect-skew  'true' or 'false', to indicate whether to expect a skew during
-                 the run or not. If 'true', the test will fail if no skew is
-                 found, and succeed if a skew is indeed found; if 'false', it's
-                 the other way around. (default: false)
-    never-fail   Don't fail the run if a skew is detected and we weren't
-                 expecting it, or if no skew is detected and we were expecting
-                 it. (default: False)
-
-    at-least-once          Runs at least once, even if we are told to stop.
-                           (default: True)
-    at-least-once-timeout  If we were told to stop but we are attempting to
-                           run at least once, timeout after this many seconds.
-                           (default: 600)
-
-    Example:
-        Expect a skew higher than 0.05 seconds, but only report it without
-        failing the teuthology run.
-
-    - mon_clock_skew_check:
-        interval: 30
-        max-skew: 0.05
-        expect_skew: true
-        never-fail: true
-    """
-
-    def __init__(self, ctx, manager, config, logger):
-        self.ctx = ctx
-        self.manager = manager
-
-        self.stopping = False
-        self.logger = logger
-        self.config = config
-
-        if self.config is None:
-            self.config = dict()
-
-        self.check_interval = float(self.config.get('interval', 30.0))
-
-        first_mon = teuthology.get_first_mon(ctx, config)
-        remote = ctx.cluster.only(first_mon).remotes.keys()[0]
-        proc = remote.run(
-            args=[
-                'sudo',
-                'ceph-mon',
-                '-i', first_mon[4:],
-                '--show-config-value', 'mon_clock_drift_allowed'
-                ], stdout=StringIO(), wait=True
-                )
-        self.max_skew = self.config.get('max-skew', float(proc.stdout.getvalue()))
-
-        self.expect_skew = self.config.get('expect-skew', False)
-        self.never_fail = self.config.get('never-fail', False)
-        self.at_least_once = self.config.get('at-least-once', True)
-        self.at_least_once_timeout = self.config.get('at-least-once-timeout', 600.0)
-
-    def info(self, x):
-        """
-        locally define logger for info messages
-        """
-        self.logger.info(x)
-
-    def warn(self, x):
-        """
-        locally define logger for warnings
-        """
-        self.logger.warn(x)
-
-    def debug(self, x):
-        """
-        locally define logger for debug messages
-        """
-        self.logger.info(x)
-        self.logger.debug(x)
-
-    def finish(self):
-        """
-        Break out of the do_check loop.
-        """
-        self.stopping = True
-
-    def sleep_interval(self):
-        """
-        If a sleep interval is set, sleep for that amount of time.
-        """
-        if self.check_interval > 0.0:
-            self.debug('sleeping for {s} seconds'.format(
-                s=self.check_interval))
-            time.sleep(self.check_interval)
-
-    def print_skews(self, skews):
-        """
-        Display skew values.
-        """
-        total = len(skews)
-        if total > 0:
-            self.info('---------- found {n} skews ----------'.format(n=total))
-            for mon_id, values in skews.iteritems():
-                self.info('mon.{id}: {v}'.format(id=mon_id, v=values))
-            self.info('-------------------------------------')
-        else:
-            self.info('---------- no skews were found ----------')
-
-    def do_check(self):
-        """
-        Clock skew checker.  Loops until finish() is called.
-        """
-        self.info('start checking for clock skews')
-        skews = dict()
-        ran_once = False
-        
-        started_on = None
-
-        while not self.stopping or (self.at_least_once and not ran_once):
-
-            if self.at_least_once and not ran_once and self.stopping:
-                if started_on is None:
-                    self.info('kicking-off timeout (if any)')
-                    started_on = time.time()
-                elif self.at_least_once_timeout > 0.0:
-                    assert time.time() - started_on < self.at_least_once_timeout, \
-                        'failed to obtain a timecheck before timeout expired'
-
-            quorum_size = len(teuthology.get_mon_names(self.ctx))
-            self.manager.wait_for_mon_quorum_size(quorum_size)
-
-            health = self.manager.get_mon_health(True)
-            timechecks = health['timechecks']
-
-            clean_check = False
-
-            if timechecks['round_status'] == 'finished':
-                assert (timechecks['round'] % 2) == 0, \
-                    'timecheck marked as finished but round ' \
-                    'disagrees (r {r})'.format(
-                        r=timechecks['round'])
-                clean_check = True
-            else:
-                assert timechecks['round_status'] == 'on-going', \
-                        'timecheck status expected \'on-going\' ' \
-                        'but found \'{s}\' instead'.format(
-                            s=timechecks['round_status'])
-                if 'mons' in timechecks.keys() and len(timechecks['mons']) > 1:
-                    self.info('round still on-going, but there are available reports')
-                else:
-                    self.info('no timechecks available just yet')
-                    self.sleep_interval()
-                    continue
-
-            assert len(timechecks['mons']) > 1, \
-                'there are not enough reported timechecks; ' \
-                'expected > 1 found {n}'.format(n=len(timechecks['mons']))
-
-            for check in timechecks['mons']:
-                mon_skew = float(check['skew'])
-                mon_health = check['health']
-                mon_id = check['name']
-                if abs(mon_skew) > self.max_skew:
-                    assert mon_health == 'HEALTH_WARN', \
-                        'mon.{id} health is \'{health}\' but skew {s} > max {ms}'.format(
-                            id=mon_id,health=mon_health,s=abs(mon_skew),ms=self.max_skew)
-
-                    log_str = 'mon.{id} with skew {s} > max {ms}'.format(
-                        id=mon_id,s=abs(mon_skew),ms=self.max_skew)
-
-                    """ add to skew list """
-                    details = check['details']
-                    skews[mon_id] = {'skew': mon_skew, 'details': details}
-
-                    if self.expect_skew:
-                        self.info('expected skew: {str}'.format(str=log_str))
-                    else:
-                        self.warn('unexpected skew: {str}'.format(str=log_str))
-
-            if clean_check or (self.expect_skew and len(skews) > 0):
-                ran_once = True
-                self.print_skews(skews)
-            self.sleep_interval()
-
-        total = len(skews)
-        self.print_skews(skews)
-
-        error_str = ''
-        found_error = False
-
-        if self.expect_skew:
-            if total == 0:
-                error_str = 'We were expecting a skew, but none was found!'
-                found_error = True
-        else:
-            if total > 0:
-                error_str = 'We were not expecting a skew, but we did find it!'
-                found_error = True
-
-        if found_error:
-            self.info(error_str)
-            if not self.never_fail:
-                assert False, error_str
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Use clas ClockSkewCheck to check for clock skews on the monitors.
-    This task will spawn a thread running ClockSkewCheck's do_check().
-
-    All the configuration will be directly handled by ClockSkewCheck,
-    so please refer to the class documentation for further information.
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'mon_clock_skew_check task only accepts a dict for configuration'
-    log.info('Beginning mon_clock_skew_check...')
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    skew_check = ClockSkewCheck(ctx,
-        manager, config,
-        logger=log.getChild('mon_clock_skew_check'))
-    skew_check_thread = gevent.spawn(skew_check.do_check)
-    try:
-        yield
-    finally:
-        log.info('joining mon_clock_skew_check')
-        skew_check.finish()
-        skew_check_thread.get()
-
-
diff --git a/teuthology/task/mon_recovery.py b/teuthology/task/mon_recovery.py
deleted file mode 100644 (file)
index bfa2cdf..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-"""
-Monitor recovery
-"""
-import logging
-import ceph_manager
-from teuthology import misc as teuthology
-
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Test monitor recovery.
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'task only accepts a dict for configuration'
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)]
-    log.info("mon ids = %s" % mons)
-
-    manager.wait_for_mon_quorum_size(len(mons))
-
-    log.info('verifying all monitors are in the quorum')
-    for m in mons:
-        s = manager.get_mon_status(m)
-        assert s['state'] == 'leader' or s['state'] == 'peon'
-        assert len(s['quorum']) == len(mons)
-
-    log.info('restarting each monitor in turn')
-    for m in mons:
-        # stop a monitor
-        manager.kill_mon(m)
-        manager.wait_for_mon_quorum_size(len(mons) - 1)
-
-        # restart
-        manager.revive_mon(m)
-        manager.wait_for_mon_quorum_size(len(mons))
-
-    # in forward and reverse order,
-    rmons = mons
-    rmons.reverse()
-    for mons in mons, rmons:
-        log.info('stopping all monitors')
-        for m in mons:
-            manager.kill_mon(m)
-
-        log.info('forming a minimal quorum for %s, then adding monitors' % mons)
-        qnum = (len(mons) / 2) + 1
-        num = 0
-        for m in mons:
-            manager.revive_mon(m)
-            num += 1
-            if num >= qnum:
-                manager.wait_for_mon_quorum_size(num)
-
-    # on both leader and non-leader ranks...
-    for rank in [0, 1]:
-        # take one out
-        log.info('removing mon %s' % mons[rank])
-        manager.kill_mon(mons[rank])
-        manager.wait_for_mon_quorum_size(len(mons) - 1)
-
-        log.info('causing some monitor log activity')
-        m = 30
-        for n in range(1, m):
-            manager.raw_cluster_cmd('log', '%d of %d' % (n, m))
-
-        log.info('adding mon %s back in' % mons[rank])
-        manager.revive_mon(mons[rank])
-        manager.wait_for_mon_quorum_size(len(mons))
diff --git a/teuthology/task/mon_thrash.py b/teuthology/task/mon_thrash.py
deleted file mode 100644 (file)
index b45aaa9..0000000
+++ /dev/null
@@ -1,343 +0,0 @@
-"""
-Monitor thrash
-"""
-import logging
-import contextlib
-import ceph_manager
-import random
-import time
-import gevent
-import json
-import math
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-def _get_mons(ctx):
-    """
-    Get monitor names from the context value.
-    """
-    mons = [f[len('mon.'):] for f in teuthology.get_mon_names(ctx)]
-    return mons
-
-class MonitorThrasher:
-    """
-    How it works::
-
-    - pick a monitor
-    - kill it
-    - wait for quorum to be formed
-    - sleep for 'revive_delay' seconds
-    - revive monitor
-    - wait for quorum to be formed
-    - sleep for 'thrash_delay' seconds
-
-    Options::
-
-    seed                Seed to use on the RNG to reproduce a previous
-                        behaviour (default: None; i.e., not set)
-    revive_delay        Number of seconds to wait before reviving
-                        the monitor (default: 10)
-    thrash_delay        Number of seconds to wait in-between
-                        test iterations (default: 0)
-    thrash_store        Thrash monitor store before killing the monitor being thrashed (default: False)
-    thrash_store_probability  Probability of thrashing a monitor's store
-                              (default: 50)
-    thrash_many         Thrash multiple monitors instead of just one. If
-                        'maintain-quorum' is set to False, then we will
-                        thrash up to as many monitors as there are
-                        available. (default: False)
-    maintain_quorum     Always maintain quorum, taking care on how many
-                        monitors we kill during the thrashing. If we
-                        happen to only have one or two monitors configured,
-                        if this option is set to True, then we won't run
-                        this task as we cannot guarantee maintenance of
-                        quorum. Setting it to false however would allow the
-                        task to run with as many as just one single monitor.
-                        (default: True)
-    freeze_mon_probability: how often to freeze the mon instead of killing it,
-                        in % (default: 0)
-    freeze_mon_duration: how many seconds to freeze the mon (default: 15)
-    scrub               Scrub after each iteration (default: True)
-
-    Note: if 'store-thrash' is set to True, then 'maintain-quorum' must also
-          be set to True.
-
-    For example::
-
-    tasks:
-    - ceph:
-    - mon_thrash:
-        revive_delay: 20
-        thrash_delay: 1
-        thrash_store: true
-        thrash_store_probability: 40
-        seed: 31337
-        maintain_quorum: true
-        thrash_many: true
-    - ceph-fuse:
-    - workunit:
-        clients:
-          all:
-            - mon/workloadgen.sh
-    """
-    def __init__(self, ctx, manager, config, logger):
-        self.ctx = ctx
-        self.manager = manager
-        self.manager.wait_for_clean()
-
-        self.stopping = False
-        self.logger = logger
-        self.config = config
-
-        if self.config is None:
-            self.config = dict()
-
-        """ Test reproducibility """
-        self.random_seed = self.config.get('seed', None)
-
-        if self.random_seed is None:
-            self.random_seed = int(time.time())
-
-        self.rng = random.Random()
-        self.rng.seed(int(self.random_seed))
-
-        """ Monitor thrashing """
-        self.revive_delay = float(self.config.get('revive_delay', 10.0))
-        self.thrash_delay = float(self.config.get('thrash_delay', 0.0))
-
-        self.thrash_many = self.config.get('thrash_many', False)
-        self.maintain_quorum = self.config.get('maintain_quorum', True)
-
-        self.scrub = self.config.get('scrub', True)
-
-        self.freeze_mon_probability = float(self.config.get('freeze_mon_probability', 10))
-        self.freeze_mon_duration = float(self.config.get('freeze_mon_duration', 15.0))
-
-        assert self.max_killable() > 0, \
-            'Unable to kill at least one monitor with the current config.'
-
-        """ Store thrashing """
-        self.store_thrash = self.config.get('store_thrash', False)
-        self.store_thrash_probability = int(
-            self.config.get('store_thrash_probability', 50))
-        if self.store_thrash:
-            assert self.store_thrash_probability > 0, \
-                'store_thrash is set, probability must be > 0'
-            assert self.maintain_quorum, \
-                'store_thrash = true must imply maintain_quorum = true'
-
-        self.thread = gevent.spawn(self.do_thrash)
-
-    def log(self, x):
-        """
-        locally log info messages
-        """
-        self.logger.info(x)
-
-    def do_join(self):
-        """
-        Break out of this processes thrashing loop.
-        """
-        self.stopping = True
-        self.thread.get()
-
-    def should_thrash_store(self):
-        """
-        If allowed, indicate that we should thrash a certain percentage of
-        the time as determined by the store_thrash_probability value.
-        """
-        if not self.store_thrash:
-            return False
-        return self.rng.randrange(0, 101) < self.store_thrash_probability
-
-    def thrash_store(self, mon):
-        """
-        Thrash the monitor specified.
-        :param mon: monitor to thrash
-        """
-        addr = self.ctx.ceph.conf['mon.%s' % mon]['mon addr']
-        self.log('thrashing mon.{id}@{addr} store'.format(id=mon, addr=addr))
-        out = self.manager.raw_cluster_cmd('-m', addr, 'sync', 'force')
-        j = json.loads(out)
-        assert j['ret'] == 0, \
-            'error forcing store sync on mon.{id}:\n{ret}'.format(
-                id=mon,ret=out)
-
-    def should_freeze_mon(self):
-        """
-        Indicate that we should freeze a certain percentago of the time
-        as determined by the freeze_mon_probability value.
-        """
-        return self.rng.randrange(0, 101) < self.freeze_mon_probability
-
-    def freeze_mon(self, mon):
-        """
-        Send STOP signal to freeze the monitor.
-        """
-        log.info('Sending STOP to mon %s', mon)
-        self.manager.signal_mon(mon, 19)  # STOP
-
-    def unfreeze_mon(self, mon):
-        """
-        Send CONT signal to unfreeze the monitor.
-        """
-        log.info('Sending CONT to mon %s', mon)
-        self.manager.signal_mon(mon, 18)  # CONT
-
-    def kill_mon(self, mon):
-        """
-        Kill the monitor specified
-        """
-        self.log('killing mon.{id}'.format(id=mon))
-        self.manager.kill_mon(mon)
-
-    def revive_mon(self, mon):
-        """
-        Revive the monitor specified
-        """
-        self.log('killing mon.{id}'.format(id=mon))
-        self.log('reviving mon.{id}'.format(id=mon))
-        self.manager.revive_mon(mon)
-
-    def max_killable(self):
-        """
-        Return the maximum number of monitors we can kill.
-        """
-        m = len(_get_mons(self.ctx))
-        if self.maintain_quorum:
-            return max(math.ceil(m/2.0)-1, 0)
-        else:
-            return m
-
-    def do_thrash(self):
-        """
-        Cotinuously loop and thrash the monitors.
-        """
-        self.log('start thrashing')
-        self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\
-                   'thrash many: {tm}, maintain quorum: {mq} '\
-                   'store thrash: {st}, probability: {stp} '\
-                   'freeze mon: prob {fp} duration {fd}'.format(
-                s=self.random_seed,r=self.revive_delay,t=self.thrash_delay,
-                tm=self.thrash_many, mq=self.maintain_quorum,
-                st=self.store_thrash,stp=self.store_thrash_probability,
-                fp=self.freeze_mon_probability,fd=self.freeze_mon_duration,
-                ))
-
-        while not self.stopping:
-            mons = _get_mons(self.ctx)
-            self.manager.wait_for_mon_quorum_size(len(mons))
-            self.log('making sure all monitors are in the quorum')
-            for m in mons:
-                s = self.manager.get_mon_status(m)
-                assert s['state'] == 'leader' or s['state'] == 'peon'
-                assert len(s['quorum']) == len(mons)
-
-            kill_up_to = self.rng.randrange(1, self.max_killable()+1)
-            mons_to_kill = self.rng.sample(mons, kill_up_to)
-            self.log('monitors to thrash: {m}'.format(m=mons_to_kill))
-
-            mons_to_freeze = []
-            for mon in mons:
-                if mon in mons_to_kill:
-                    continue
-                if self.should_freeze_mon():
-                    mons_to_freeze.append(mon)
-            self.log('monitors to freeze: {m}'.format(m=mons_to_freeze))
-
-            for mon in mons_to_kill:
-                self.log('thrashing mon.{m}'.format(m=mon))
-
-                """ we only thrash stores if we are maintaining quorum """
-                if self.should_thrash_store() and self.maintain_quorum:
-                    self.thrash_store(mon)
-
-                self.kill_mon(mon)
-
-            if mons_to_freeze:
-                for mon in mons_to_freeze:
-                    self.freeze_mon(mon)
-                self.log('waiting for {delay} secs to unfreeze mons'.format(
-                    delay=self.freeze_mon_duration))
-                time.sleep(self.freeze_mon_duration)
-                for mon in mons_to_freeze:
-                    self.unfreeze_mon(mon)
-
-            if self.maintain_quorum:
-                self.manager.wait_for_mon_quorum_size(len(mons)-len(mons_to_kill))
-                for m in mons:
-                    if m in mons_to_kill:
-                        continue
-                    s = self.manager.get_mon_status(m)
-                    assert s['state'] == 'leader' or s['state'] == 'peon'
-                    assert len(s['quorum']) == len(mons)-len(mons_to_kill)
-
-            self.log('waiting for {delay} secs before reviving monitors'.format(
-                delay=self.revive_delay))
-            time.sleep(self.revive_delay)
-
-            for mon in mons_to_kill:
-                self.revive_mon(mon)
-            # do more freezes
-            if mons_to_freeze:
-                for mon in mons_to_freeze:
-                    self.freeze_mon(mon)
-                self.log('waiting for {delay} secs to unfreeze mons'.format(
-                    delay=self.freeze_mon_duration))
-                time.sleep(self.freeze_mon_duration)
-                for mon in mons_to_freeze:
-                    self.unfreeze_mon(mon)
-
-            self.manager.wait_for_mon_quorum_size(len(mons))
-            for m in mons:
-                s = self.manager.get_mon_status(m)
-                assert s['state'] == 'leader' or s['state'] == 'peon'
-                assert len(s['quorum']) == len(mons)
-
-            if self.scrub:
-                self.log('triggering scrub')
-                try:
-                    self.manager.raw_cluster_cmd('scrub')
-                except Exception:
-                    log.exception("Saw exception while triggering scrub")
-
-            if self.thrash_delay > 0.0:
-                self.log('waiting for {delay} secs before continuing thrashing'.format(
-                    delay=self.thrash_delay))
-                time.sleep(self.thrash_delay)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Stress test the monitor by thrashing them while another task/workunit
-    is running.
-
-    Please refer to MonitorThrasher class for further information on the
-    available options.
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'mon_thrash task only accepts a dict for configuration'
-    assert len(_get_mons(ctx)) > 2, \
-        'mon_thrash task requires at least 3 monitors'
-    log.info('Beginning mon_thrash...')
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-    thrash_proc = MonitorThrasher(ctx,
-        manager, config,
-        logger=log.getChild('mon_thrasher'))
-    try:
-        log.debug('Yielding')
-        yield
-    finally:
-        log.info('joining mon_thrasher')
-        thrash_proc.do_join()
-        mons = _get_mons(ctx)
-        manager.wait_for_mon_quorum_size(len(mons))
diff --git a/teuthology/task/mpi.py b/teuthology/task/mpi.py
deleted file mode 100644 (file)
index 6d2381e..0000000
+++ /dev/null
@@ -1,111 +0,0 @@
-"""
-Start mpi processes (and allow commands to be run inside process)
-"""
-import logging
-
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Setup MPI and execute commands
-
-    Example that starts an MPI process on specific clients::
-
-        tasks:
-        - ceph:
-        - ceph-fuse: [client.0, client.1]
-        - ssh_keys:
-        - mpi: 
-            nodes: [client.0, client.1]
-            exec: ior ...
-
-    Example that starts MPI processes on all clients::
-
-        tasks:
-        - ceph:
-        - ceph-fuse:
-        - ssh_keys:
-        - mpi:
-            exec: ior ...
-
-    Example that starts MPI processes on all roles::
-
-        tasks:
-        - ceph:
-        - ssh_keys:
-        - mpi:
-            nodes: all
-            exec: ...
-
-    Example that specifies a working directory for MPI processes:
-
-        tasks:
-        - ceph:
-        - ceph-fuse:
-        - pexec:
-            clients:
-              - ln -s {testdir}/mnt.* {testdir}/gmnt
-        - ssh_keys:
-        - mpi:
-            exec: fsx-mpi
-            workdir: {testdir}/gmnt
-        - pexec:
-            clients:
-              - rm -f {testdir}/gmnt
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    assert isinstance(config, dict), 'task mpi got invalid config'
-    assert 'exec' in config, 'task mpi got invalid config, missing exec'
-
-    testdir = teuthology.get_testdir(ctx)
-
-    mpiexec = config['exec'].replace('$TESTDIR', testdir)
-    hosts = []
-    remotes = []
-    master_remote = None
-    if 'nodes' in config:
-        if isinstance(config['nodes'], basestring) and config['nodes'] == 'all':
-            for role in  teuthology.all_roles(ctx.cluster):
-                (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-                ip,port = remote.ssh.get_transport().getpeername()
-                hosts.append(ip)
-                remotes.append(remote)
-            (master_remote,) = ctx.cluster.only(config['nodes'][0]).remotes.iterkeys()
-        elif isinstance(config['nodes'], list):
-            for role in config['nodes']:
-                (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-                ip,port = remote.ssh.get_transport().getpeername()
-                hosts.append(ip)
-                remotes.append(remote)
-            (master_remote,) = ctx.cluster.only(config['nodes'][0]).remotes.iterkeys()
-    else:
-        roles = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-        (master_remote,) = ctx.cluster.only(roles[0]).remotes.iterkeys()
-        for role in roles:
-            (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-            ip,port = remote.ssh.get_transport().getpeername()
-            hosts.append(ip)
-            remotes.append(remote)
-
-    workdir = []
-    if 'workdir' in config:
-        workdir = ['-wdir', config['workdir'].replace('$TESTDIR', testdir) ]
-
-    log.info('mpi rank 0 is: {name}'.format(name=master_remote.name))
-
-    # write out the mpi hosts file
-    log.info('mpi nodes: [%s]' % (', '.join(hosts)))
-    teuthology.write_file(remote=master_remote,
-                          path='{tdir}/mpi-hosts'.format(tdir=testdir),
-                          data='\n'.join(hosts))
-    log.info('mpiexec on {name}: {cmd}'.format(name=master_remote.name, cmd=mpiexec))
-    args=['mpiexec', '-f', '{tdir}/mpi-hosts'.format(tdir=testdir)]
-    args.extend(workdir)
-    args.extend(mpiexec.split(' '))
-    master_remote.run(args=args, )
-    log.info('mpi task completed')
-    master_remote.run(args=['rm', '{tdir}/mpi-hosts'.format(tdir=testdir)])
diff --git a/teuthology/task/multibench.py b/teuthology/task/multibench.py
deleted file mode 100644 (file)
index bc22b47..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-"""
-Multibench testing
-"""
-import contextlib
-import logging
-import radosbench
-import time
-import copy
-import gevent
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run multibench
-
-    The config should be as follows:
-
-    multibench:
-        time: <seconds to run total>
-        segments: <number of concurrent benches>
-        radosbench: <config for radosbench>
-
-    example:
-
-    tasks:
-    - ceph:
-    - multibench:
-        clients: [client.0]
-        time: 360
-    - interactive:
-    """
-    log.info('Beginning multibench...')
-    assert isinstance(config, dict), \
-        "please list clients to run on"
-
-    def run_one(num):
-    """Run test spawn from gevent"""
-        start = time.time()
-        benchcontext = copy.copy(config.get('radosbench'))
-        iterations = 0
-        while time.time() - start < int(config.get('time', 600)):
-            log.info("Starting iteration %s of segment %s"%(iterations, num))
-            benchcontext['pool'] = str(num) + "-" + str(iterations)
-            with radosbench.task(ctx, benchcontext):
-                time.sleep()
-            iterations += 1
-    log.info("Starting %s threads"%(str(config.get('segments', 3)),))
-    segments = [
-        gevent.spawn(run_one, i) 
-        for i in range(0, int(config.get('segments', 3)))]
-
-    try:
-        yield
-    finally:
-        [i.get() for i in segments]
diff --git a/teuthology/task/nfs.py b/teuthology/task/nfs.py
deleted file mode 100644 (file)
index 72a2981..0000000
+++ /dev/null
@@ -1,130 +0,0 @@
-"""
-Nfs client tester
-"""
-import contextlib
-import logging
-import os
-
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Mount nfs client (requires nfs server export like knfsd or ganesh)
-
-    Example that mounts a single nfs client::
-
-        tasks:
-        - ceph:
-        - kclient: [client.0]
-        - knfsd: [client.0]
-        - nfs:
-            client.1:
-                server: client.0
-        - interactive:
-
-    Example that mounts multiple nfs clients with options::
-
-        tasks:
-        - ceph:
-        - kclient: [client.0, client.1]
-        - knfsd: [client.0, client.1]
-        - nfs:
-            client.2:
-                server: client.0
-                options: [rw,hard,intr,nfsvers=3]
-            client.3:
-                server: client.1
-                options: [ro]
-        - workunit:
-            clients:
-                client.2:
-                    - suites/dbench.sh
-                client.3:
-                    - suites/blogbench.sh
-
-    It is not recommended that the nfs client and nfs server reside on the same node.  So in the example above client.0-3 should be on 4 distinct
-    nodes.  The client nfs testing would be using only client.2 and client.3.
-    """
-    log.info('Mounting nfs clients...')
-    assert isinstance(config, dict)
-
-    clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys()))
-
-    testdir = teuthology.get_testdir(ctx)
-    for id_, remote in clients:
-        mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-        client_config = config.get("client.%s" % id_)
-        if client_config is None:
-            client_config = {}
-        log.debug("Client client.%s config is %s" % (id_, client_config))
-
-        assert client_config.get('server') is not None
-        server = client_config.get('server');
-
-        svr_id = server[len('client.'):]
-        svr_mnt = os.path.join(testdir, 'mnt.{id}'.format(id=svr_id))
-
-        svr_remote  = None
-        all_config = ['client.{id}'.format(id=tmpid)
-                  for tmpid in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-        all_clients = list(teuthology.get_clients(ctx=ctx, roles=all_config))
-        for tmpid, tmpremote in all_clients:
-            if tmpid == svr_id:
-                svr_remote = tmpremote
-                break
-
-        assert svr_remote is not None 
-        svr_remote = svr_remote.name.split('@', 2)[1]
-
-        if client_config.get('options') is not None:
-            opts = ','.join(client_config.get('options'))
-        else:
-            opts = 'rw'
-
-        log.info('Mounting client.{id} from client.{sid}'.format(id=id_, sid=svr_id))
-        log.debug('mount -o {opts} {remote}:{svr_mnt} {mnt}'.format(
-                remote=svr_remote, svr_mnt=svr_mnt, opts=opts, mnt=mnt))
-
-        remote.run(
-            args=[
-                'mkdir',
-                '--',
-                mnt,
-                ],
-            )
-
-        remote.run(
-            args=[
-                'sudo',
-                "mount",
-                "-o",
-                opts,
-                '{remote}:{mnt}'.format(remote=svr_remote, mnt=svr_mnt),
-                mnt
-                ],
-            )
-
-    try:
-        yield
-    finally:
-        log.info('Unmounting nfs clients...')
-        for id_, remote in clients:
-            log.debug('Unmounting nfs client client.{id}...'.format(id=id_))
-            mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-            remote.run(
-                args=[
-                    'sudo',
-                    'umount',
-                    mnt,
-                    ],
-                )
-            remote.run(
-                args=[
-                    'rmdir',
-                    '--',
-                    mnt,
-                    ],
-                )
diff --git a/teuthology/task/nop.py b/teuthology/task/nop.py
deleted file mode 100644 (file)
index c7b1814..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-"""
-Null task
-"""
-def task(ctx, config):
-    """
-    This task does nothing.
-
-    For example::
-
-        tasks:
-        - nop:
-    """
-    pass
diff --git a/teuthology/task/object_source_down.py b/teuthology/task/object_source_down.py
deleted file mode 100644 (file)
index 1696c55..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-"""
-Test Object locations going down
-"""
-import logging
-import ceph_manager
-from teuthology import misc as teuthology
-from teuthology.task_util.rados import rados
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Test handling of object location going down
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'lost_unfound task only accepts a dict for configuration'
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    while len(manager.get_osd_status()['up']) < 3:
-        manager.sleep(10)
-    manager.wait_for_clean()
-
-    # something that is always there
-    dummyfile = '/etc/fstab'
-
-    # take 0, 1 out
-    manager.mark_out_osd(0)
-    manager.mark_out_osd(1)
-    manager.wait_for_clean()
-
-    # delay recovery, and make the pg log very long (to prevent backfill)
-    manager.raw_cluster_cmd(
-            'tell', 'osd.0',
-            'injectargs',
-            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
-            )
-    # delay recovery, and make the pg log very long (to prevent backfill)
-    manager.raw_cluster_cmd(
-            'tell', 'osd.1',
-            'injectargs',
-            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
-            )
-    # delay recovery, and make the pg log very long (to prevent backfill)
-    manager.raw_cluster_cmd(
-            'tell', 'osd.2',
-            'injectargs',
-            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
-            )
-    # delay recovery, and make the pg log very long (to prevent backfill)
-    manager.raw_cluster_cmd(
-            'tell', 'osd.3',
-            'injectargs',
-            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
-            )
-
-    # kludge to make sure they get a map
-    rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])
-
-    # create old objects
-    for f in range(1, 10):
-        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
-
-    manager.mark_out_osd(3)
-    manager.wait_till_active()
-
-    manager.mark_in_osd(0)
-    manager.wait_till_active()
-
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-
-    manager.mark_out_osd(2)
-    manager.wait_till_active()
-
-    # bring up 1
-    manager.mark_in_osd(1)
-    manager.wait_till_active()
-
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    log.info("Getting unfound objects")
-    unfound = manager.get_num_unfound_objects()
-    assert not unfound
-
-    manager.kill_osd(2)
-    manager.mark_down_osd(2)
-    manager.kill_osd(3)
-    manager.mark_down_osd(3)
-
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    log.info("Getting unfound objects")
-    unfound = manager.get_num_unfound_objects()
-    assert unfound
diff --git a/teuthology/task/omapbench.py b/teuthology/task/omapbench.py
deleted file mode 100644 (file)
index 7d25354..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-"""
-Run omapbench executable within teuthology
-"""
-import contextlib
-import logging
-
-from ..orchestra import run
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run omapbench
-
-    The config should be as follows::
-
-                 omapbench:
-                     clients: [client list]
-                     threads: <threads at once>
-                     objects: <number of objects to write>
-                     entries: <number of entries per object map>
-                     keysize: <number of characters per object map key>
-                     valsize: <number of characters per object map val>
-                     increment: <interval to show in histogram (in ms)>
-                     omaptype: <how the omaps should be generated>
-
-    example::
-
-                 tasks:
-                 - ceph:
-                 - omapbench:
-                     clients: [client.0]
-                     threads: 30
-                     objects: 1000
-                     entries: 10
-                     keysize: 10
-                     valsize: 100
-                     increment: 100
-                     omaptype: uniform
-                 - interactive:
-    """
-    log.info('Beginning omapbench...')
-    assert isinstance(config, dict), \
-        "please list clients to run on"
-    omapbench = {}
-    testdir = teuthology.get_testdir(ctx)
-    print(str(config.get('increment',-1)))
-    for role in config.get('clients', ['client.0']):
-        assert isinstance(role, basestring)
-        PREFIX = 'client.'
-        assert role.startswith(PREFIX)
-        id_ = role[len(PREFIX):]
-        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-        proc = remote.run(
-            args=[
-                "/bin/sh", "-c",
-                " ".join(['adjust-ulimits',
-                          'ceph-coverage',
-                          '{tdir}/archive/coverage',
-                          'omapbench',
-                          '--name', role[len(PREFIX):],
-                          '-t', str(config.get('threads', 30)),
-                          '-o', str(config.get('objects', 1000)),
-                          '--entries', str(config.get('entries',10)),
-                          '--keysize', str(config.get('keysize',10)),
-                          '--valsize', str(config.get('valsize',1000)),
-                          '--inc', str(config.get('increment',10)),
-                          '--omaptype', str(config.get('omaptype','uniform'))
-                          ]).format(tdir=testdir),
-                ],
-            logger=log.getChild('omapbench.{id}'.format(id=id_)),
-            stdin=run.PIPE,
-            wait=False
-            )
-        omapbench[id_] = proc
-
-    try:
-        yield
-    finally:
-        log.info('joining omapbench')
-        run.wait(omapbench.itervalues())
diff --git a/teuthology/task/osd_backfill.py b/teuthology/task/osd_backfill.py
deleted file mode 100644 (file)
index d80ea22..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-"""
-Osd backfill test
-"""
-import logging
-import ceph_manager
-import time
-from teuthology import misc as teuthology
-
-
-log = logging.getLogger(__name__)
-
-
-def rados_start(ctx, remote, cmd):
-    """
-    Run a remote rados command (currently used to only write data)
-    """
-    log.info("rados %s" % ' '.join(cmd))
-    testdir = teuthology.get_testdir(ctx)
-    pre = [
-        'adjust-ulimits',
-        'ceph-coverage',
-        '{tdir}/archive/coverage'.format(tdir=testdir),
-        'rados',
-        ];
-    pre.extend(cmd)
-    proc = remote.run(
-        args=pre,
-        wait=False,
-        )
-    return proc
-
-def task(ctx, config):
-    """
-    Test backfill
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'thrashosds task only accepts a dict for configuration'
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-    
-    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
-    log.info('num_osds is %s' % num_osds)
-    assert num_osds == 3        
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    while len(manager.get_osd_status()['up']) < 3:
-        manager.sleep(10)
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_for_clean()
-
-    # write some data
-    p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096',
-                          '--no-cleanup'])
-    err = p.exitstatus.get();
-    log.info('err is %d' % err)
-
-    # mark osd.0 out to trigger a rebalance/backfill
-    manager.mark_out_osd(0)
-
-    # also mark it down to it won't be included in pg_temps
-    manager.kill_osd(0)
-    manager.mark_down_osd(0)
-
-    # wait for everything to peer and be happy...
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_for_recovery()
-
-    # write some new data
-    p = rados_start(ctx, mon, ['-p', 'data', 'bench', '30', 'write', '-b', '4096',
-                          '--no-cleanup'])
-
-    time.sleep(15)
-
-    # blackhole + restart osd.1
-    # this triggers a divergent backfill target
-    manager.blackhole_kill_osd(1)
-    time.sleep(2)
-    manager.revive_osd(1)
-
-    # wait for our writes to complete + succeed
-    err = p.exitstatus.get()
-    log.info('err is %d' % err)
-
-    # cluster must recover
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_for_recovery()
-
-    # re-add osd.0
-    manager.revive_osd(0)
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_for_clean()
-
-
diff --git a/teuthology/task/osd_failsafe_enospc.py b/teuthology/task/osd_failsafe_enospc.py
deleted file mode 100644 (file)
index 39b5b5c..0000000
+++ /dev/null
@@ -1,218 +0,0 @@
-"""
-Handle osdfailsafe configuration settings (nearfull ratio and full ratio)
-"""
-from cStringIO import StringIO
-import logging
-import time
-
-import ceph_manager
-from ..orchestra import run
-from teuthology.task_util.rados import rados
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio
-    configuration settings
-
-    In order for test to pass must use log-whitelist as follows
-
-        tasks:
-            - chef:
-            - install:
-            - ceph:
-                log-whitelist: ['OSD near full', 'OSD full dropping all updates']
-            - osd_failsafe_enospc:
-
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'osd_failsafe_enospc task only accepts a dict for configuration'
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-    ctx.manager = manager
-
-    # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding
-    sleep_time = 50
-
-    # something that is always there
-    dummyfile = '/etc/fstab'
-    dummyfile2 = '/etc/resolv.conf'
-
-    # create 1 pg pool with 1 rep which can only be on osd.0
-    osds = manager.get_osd_dump()
-    for osd in osds:
-        if osd['osd'] != 0:
-            manager.mark_out_osd(osd['osd'])
-
-    log.info('creating pool foo')
-    manager.create_pool("foo")
-    manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1')
-
-    # State NONE -> NEAR
-    log.info('1. Verify warning messages when exceeding nearfull_ratio')
-
-    proc = mon.run(
-             args=[
-                'daemon-helper',
-                'kill',
-                'ceph', '-w'
-             ],
-             stdin=run.PIPE,
-             stdout=StringIO(),
-             wait=False,
-        )
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001')
-
-    time.sleep(sleep_time)
-    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
-    proc.exitstatus.get()
-
-    lines = proc.stdout.getvalue().split('\n')
-
-    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
-    assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count
-    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
-    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
-
-    # State NEAR -> FULL
-    log.info('2. Verify error messages when exceeding full_ratio')
-
-    proc = mon.run(
-             args=[
-                'daemon-helper',
-                'kill',
-                'ceph', '-w'
-             ],
-             stdin=run.PIPE,
-             stdout=StringIO(),
-             wait=False,
-        )
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
-
-    time.sleep(sleep_time)
-    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
-    proc.exitstatus.get()
-
-    lines = proc.stdout.getvalue().split('\n')
-
-    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
-    assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count
-
-    log.info('3. Verify write failure when exceeding full_ratio')
-
-    # Write data should fail
-    ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile])
-    assert ret != 0, 'Expected write failure but it succeeded with exit status 0'
-
-    # Put back default
-    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
-    time.sleep(10)
-
-    # State FULL -> NEAR
-    log.info('4. Verify write success when NOT exceeding full_ratio')
-
-    # Write should succeed
-    ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2])
-    assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret
-
-    log.info('5. Verify warning messages again when exceeding nearfull_ratio')
-
-    proc = mon.run(
-             args=[
-                'daemon-helper',
-                'kill',
-                'ceph', '-w'
-             ],
-             stdin=run.PIPE,
-             stdout=StringIO(),
-             wait=False,
-        )
-
-    time.sleep(sleep_time)
-    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
-    proc.exitstatus.get()
-
-    lines = proc.stdout.getvalue().split('\n')
-
-    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
-    assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count
-    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
-    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90')
-    time.sleep(10)
-
-    # State NONE -> FULL
-    log.info('6. Verify error messages again when exceeding full_ratio')
-
-    proc = mon.run(
-             args=[
-                'daemon-helper',
-                'kill',
-                'ceph', '-w'
-             ],
-             stdin=run.PIPE,
-             stdout=StringIO(),
-             wait=False,
-        )
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
-
-    time.sleep(sleep_time)
-    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
-    proc.exitstatus.get()
-
-    lines = proc.stdout.getvalue().split('\n')
-
-    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
-    assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
-    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
-    assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count
-
-    # State FULL -> NONE
-    log.info('7. Verify no messages settings back to default')
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
-    time.sleep(10)
-
-    proc = mon.run(
-             args=[
-                'daemon-helper',
-                'kill',
-                'ceph', '-w'
-             ],
-             stdin=run.PIPE,
-             stdout=StringIO(),
-             wait=False,
-        )
-
-    time.sleep(sleep_time)
-    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
-    proc.exitstatus.get()
-
-    lines = proc.stdout.getvalue().split('\n')
-
-    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
-    assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
-    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
-    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
-
-    log.info('Test Passed')
-
-    # Bring all OSDs back in
-    manager.remove_pool("foo")
-    for osd in osds:
-        if osd['osd'] != 0:
-            manager.mark_in_osd(osd['osd'])
diff --git a/teuthology/task/osd_recovery.py b/teuthology/task/osd_recovery.py
deleted file mode 100644 (file)
index 1ff1733..0000000
+++ /dev/null
@@ -1,206 +0,0 @@
-"""
-osd recovery
-"""
-import logging
-import ceph_manager
-import time
-from teuthology import misc as teuthology
-
-
-log = logging.getLogger(__name__)
-
-
-def rados_start(testdir, remote, cmd):
-    """
-    Run a remote rados command (currently used to only write data)
-    """
-    log.info("rados %s" % ' '.join(cmd))
-    pre = [
-        'adjust-ulimits',
-        'ceph-coverage',
-        '{tdir}/archive/coverage'.format(tdir=testdir),
-        'rados',
-        ];
-    pre.extend(cmd)
-    proc = remote.run(
-        args=pre,
-        wait=False,
-        )
-    return proc
-
-def task(ctx, config):
-    """
-    Test (non-backfill) recovery
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'task only accepts a dict for configuration'
-    testdir = teuthology.get_testdir(ctx)
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-    
-    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
-    log.info('num_osds is %s' % num_osds)
-    assert num_osds == 3        
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    while len(manager.get_osd_status()['up']) < 3:
-        manager.sleep(10)
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_for_clean()
-
-    # test some osdmap flags
-    manager.raw_cluster_cmd('osd', 'set', 'noin')
-    manager.raw_cluster_cmd('osd', 'set', 'noout')
-    manager.raw_cluster_cmd('osd', 'set', 'noup')
-    manager.raw_cluster_cmd('osd', 'set', 'nodown')
-    manager.raw_cluster_cmd('osd', 'unset', 'noin')
-    manager.raw_cluster_cmd('osd', 'unset', 'noout')
-    manager.raw_cluster_cmd('osd', 'unset', 'noup')
-    manager.raw_cluster_cmd('osd', 'unset', 'nodown')
-
-    # write some new data
-    p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '60', 'write', '-b', '4096',
-                          '--no-cleanup'])
-
-    time.sleep(15)
-
-    # trigger a divergent target:
-    #  blackhole + restart osd.1 (shorter log)
-    manager.blackhole_kill_osd(1)
-    #  kill osd.2 (longer log... we'll make it divergent below)
-    manager.kill_osd(2)
-    time.sleep(2)
-    manager.revive_osd(1)
-
-    # wait for our writes to complete + succeed
-    err = p.exitstatus.get()
-    log.info('err is %d' % err)
-
-    # cluster must repeer
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.wait_for_active_or_down()
-
-    # write some more (make sure osd.2 really is divergent)
-    p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096'])
-    p.exitstatus.get();
-
-    # revive divergent osd
-    manager.revive_osd(2)
-
-    while len(manager.get_osd_status()['up']) < 3:
-        log.info('waiting a bit...')
-        time.sleep(2)
-    log.info('3 are up!')
-
-    # cluster must recover
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_for_clean()
-
-
-def test_incomplete_pgs(ctx, config):
-    """
-    Test handling of incomplete pgs.  Requires 4 osds.
-    """
-    testdir = teuthology.get_testdir(ctx)
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'task only accepts a dict for configuration'
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
-    log.info('num_osds is %s' % num_osds)
-    assert num_osds == 4
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    while len(manager.get_osd_status()['up']) < 4:
-        time.sleep(10)
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
-    manager.wait_for_clean()
-
-    log.info('Testing incomplete pgs...')
-
-    for i in range(4):
-        manager.set_config(
-            i,
-            osd_recovery_delay_start=1000)
-
-    # move data off of osd.0, osd.1
-    manager.raw_cluster_cmd('osd', 'out', '0', '1')
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
-    manager.wait_for_clean()
-
-    # lots of objects in rbd (no pg log, will backfill)
-    p = rados_start(testdir, mon,
-                    ['-p', 'rbd', 'bench', '60', 'write', '-b', '1',
-                     '--no-cleanup'])
-    p.exitstatus.get()
-
-    # few objects in metadata pool (with pg log, normal recovery)
-    for f in range(1, 20):
-        p = rados_start(testdir, mon, ['-p', 'metadata', 'put',
-                              'foo.%d' % f, '/etc/passwd'])
-        p.exitstatus.get()
-
-    # move it back
-    manager.raw_cluster_cmd('osd', 'in', '0', '1')
-    manager.raw_cluster_cmd('osd', 'out', '2', '3')
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
-    manager.wait_for_active()
-
-    assert not manager.is_clean()
-    assert not manager.is_recovered()
-
-    # kill 2 + 3
-    log.info('stopping 2,3')
-    manager.kill_osd(2)
-    manager.kill_osd(3)
-    log.info('...')
-    manager.raw_cluster_cmd('osd', 'down', '2', '3')
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.wait_for_active_or_down()
-
-    assert manager.get_num_down() > 0
-
-    # revive 2 + 3
-    manager.revive_osd(2)
-    manager.revive_osd(3)
-    while len(manager.get_osd_status()['up']) < 4:
-        log.info('waiting a bit...')
-        time.sleep(2)
-    log.info('all are up!')
-
-    for i in range(4):
-        manager.kick_recovery_wq(i)
-
-    # cluster must recover
-    manager.wait_for_clean()
diff --git a/teuthology/task/parallel.py b/teuthology/task/parallel.py
deleted file mode 100644 (file)
index 4cfb678..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-"""
-Task to group parallel running tasks
-"""
-import sys
-import logging
-
-from teuthology import run_tasks
-from teuthology import parallel
-
-log = logging.getLogger(__name__)
-
-
-def task(ctx, config):
-    """
-    Run a group of tasks in parallel.
-
-    example:
-    - parallel:
-       - tasktest:
-       - tasktest:
-
-    You can also reference the job from elsewhere:
-
-    foo:
-      tasktest:
-    tasks:
-    - parallel:
-      - foo
-      - tasktest:
-
-    That is, if the entry is not a dict, we will look it up in the top-level
-    config.
-
-    Sequential tasks and Parallel tasks can be nested.
-    """
-
-    log.info('starting parallel...')
-    with parallel.parallel() as p:
-        for entry in config:
-            if not isinstance(entry, dict):
-                entry = ctx.config.get(entry, {})
-            ((taskname, confg),) = entry.iteritems()
-            p.spawn(_run_spawned, ctx, confg, taskname)
-
-def _run_spawned(ctx,config,taskname):
-    """Run one of the tasks (this runs in parallel with others)"""
-    mgr = {}
-    try:
-        log.info('In parallel, running task %s...' % taskname)
-        mgr = run_tasks.run_one_task(taskname, ctx=ctx, config=config)
-        if hasattr(mgr, '__enter__'):
-            mgr.__enter__()
-    finally:
-        exc_info = sys.exc_info()
-        if hasattr(mgr, '__exit__'):
-            mgr.__exit__(*exc_info)
-        del exc_info
diff --git a/teuthology/task/parallel_example.py b/teuthology/task/parallel_example.py
deleted file mode 100644 (file)
index 04babfc..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-"""
-Parallel contextmanager test
-"""
-import contextlib
-import logging
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def sequential_test(ctx, config):
-    """Example contextmanager that executes a command on remote hosts sequentially."""
-    for role in config:
-        """Create a cluster composed of all hosts with the given role, and run the command on them sequentially."""
-        log.info('Executing command on all hosts sequentially with role "%s"' % role)
-        ctx.cluster.only(role).run(args=['sleep', '5', run.Raw(';'), 'date', run.Raw(';'), 'hostname'])
-    yield
-
-@contextlib.contextmanager
-def parallel_test(ctx, config):
-    """Example contextmanager that executes a command on remote hosts in parallel."""
-    for role in config:
-        """Create a cluster composed of all hosts with the given role, and run the command on them concurrently."""
-        log.info('Executing command on all hosts concurrently with role "%s"' % role)
-        cluster = ctx.cluster.only(role)
-        nodes = {}
-        for remote in cluster.remotes.iterkeys():
-            """Call run for each remote host, but use 'wait=False' to have it return immediately."""
-            proc = remote.run(args=['sleep', '5', run.Raw(';'), 'date', run.Raw(';'), 'hostname'], wait=False,)
-            nodes[remote.name] = proc
-        for name, proc in nodes.iteritems():
-            """Wait for each process to finish before yielding and allowing other contextmanagers to run."""
-            proc.exitstatus.get()
-    yield
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """This is the main body of the task that gets run."""
-
-    """Take car of some yaml parsing here"""
-    if config is not None and not isinstance(config, list) and not isinstance(config, dict):
-        assert(False), "task parallel_example only supports a list or dictionary for configuration"
-    if config is None:
-        config = ['client.{id}'.format(id=id_)
-                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] 
-    if isinstance(config, list):
-        config = dict.fromkeys(config)
-    clients = config.keys()
-
-    """Run Multiple contextmanagers sequentially by nesting them."""
-    with contextutil.nested(
-        lambda: parallel_test(ctx=ctx, config=clients),
-        lambda: sequential_test(ctx=ctx, config=clients),
-        ):
-        yield
diff --git a/teuthology/task/peer.py b/teuthology/task/peer.py
deleted file mode 100644 (file)
index 8006c38..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-"""
-Peer test (Single test, not much configurable here)
-"""
-import logging
-import json
-
-import ceph_manager
-from teuthology import misc as teuthology
-from teuthology.task_util.rados import rados
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Test peering.
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'peer task only accepts a dict for configuration'
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    while len(manager.get_osd_status()['up']) < 3:
-        manager.sleep(10)
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_for_clean()
-
-    for i in range(3):
-        manager.set_config(
-            i,
-            osd_recovery_delay_start=120)
-
-    # take on osd down
-    manager.kill_osd(2)
-    manager.mark_down_osd(2)
-
-    # kludge to make sure they get a map
-    rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-'])
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.wait_for_recovery()
-
-    # kill another and revive 2, so that some pgs can't peer.
-    manager.kill_osd(1)
-    manager.mark_down_osd(1)
-    manager.revive_osd(2)
-    manager.wait_till_osd_is_up(2)
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-
-    manager.wait_for_active_or_down()
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-
-    # look for down pgs
-    num_down_pgs = 0
-    pgs = manager.get_pg_stats()
-    for pg in pgs:
-        out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query')
-       log.debug("out string %s",out)
-        j = json.loads(out)
-        log.info("pg is %s, query json is %s", pg, j)
-
-        if pg['state'].count('down'):
-            num_down_pgs += 1
-            # verify that it is blocked on osd.1
-            rs = j['recovery_state']
-            assert len(rs) > 0
-            assert rs[0]['name'] == 'Started/Primary/Peering/GetInfo'
-            assert rs[1]['name'] == 'Started/Primary/Peering'
-            assert rs[1]['blocked']
-            assert rs[1]['down_osds_we_would_probe'] == [1]
-            assert len(rs[1]['peering_blocked_by']) == 1
-            assert rs[1]['peering_blocked_by'][0]['osd'] == 1
-
-    assert num_down_pgs > 0
-
-    # bring it all back
-    manager.revive_osd(1)
-    manager.wait_till_osd_is_up(1)
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_for_clean()
diff --git a/teuthology/task/peering_speed_test.py b/teuthology/task/peering_speed_test.py
deleted file mode 100644 (file)
index 6c885f1..0000000
+++ /dev/null
@@ -1,93 +0,0 @@
-"""
-Remotely run peering tests.
-"""
-import logging
-import time
-from teuthology import misc as teuthology
-import ceph_manager
-
-log = logging.getLogger(__name__)
-
-from args import argify
-
-POOLNAME = "POOLNAME"
-ARGS = [
-    ('num_pgs', 'number of pgs to create', 256, int),
-    ('max_time', 'seconds to complete peering', 0, int),
-    ('runs', 'trials to run', 10, int),
-    ('num_objects', 'objects to create', 256 * 1024, int),
-    ('object_size', 'size in bytes for objects', 64, int),
-    ('creation_time_limit', 'time limit for pool population', 60*60, int),
-    ('create_threads', 'concurrent writes for create', 256, int)
-    ]
-
-def setup(ctx, config):
-    """
-    Setup peering test on remotes.
-    """
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-    ctx.manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-    ctx.manager.clear_pools()
-    ctx.manager.create_pool(POOLNAME, config.num_pgs)
-    log.info("populating pool")
-    ctx.manager.rados_write_objects(
-        POOLNAME,
-        config.num_objects,
-        config.object_size,
-        config.creation_time_limit,
-        config.create_threads)
-    log.info("done populating pool")
-
-def do_run(ctx, config):
-    """
-    Perform the test.
-    """
-    start = time.time()
-    # mark in osd
-    ctx.manager.mark_in_osd(0)
-    log.info("writing out objects")
-    ctx.manager.rados_write_objects(
-        POOLNAME,
-        config.num_pgs, # write 1 object per pg or so
-        1,
-        config.creation_time_limit,
-        config.num_pgs, # lots of concurrency
-        cleanup = True)
-    peering_end = time.time()
-
-    log.info("peering done, waiting on recovery")
-    ctx.manager.wait_for_clean()
-
-    log.info("recovery done")
-    recovery_end = time.time()
-    if config.max_time:
-        assert(peering_end - start < config.max_time)
-    ctx.manager.mark_out_osd(0)
-    ctx.manager.wait_for_clean()
-    return {
-        'time_to_active': peering_end - start,
-        'time_to_clean': recovery_end - start
-        }
-
-@argify("peering_speed_test", ARGS)
-def task(ctx, config):
-    """
-    Peering speed test
-    """
-    setup(ctx, config)
-    ctx.manager.mark_out_osd(0)
-    ctx.manager.wait_for_clean()
-    ret = []
-    for i in range(config.runs):
-        log.info("Run {i}".format(i = i))
-        ret.append(do_run(ctx, config))
-
-    ctx.manager.mark_in_osd(0)
-    ctx.summary['recovery_times'] = {
-        'runs': ret
-        }
diff --git a/teuthology/task/pexec.py b/teuthology/task/pexec.py
deleted file mode 100644 (file)
index 742ac00..0000000
+++ /dev/null
@@ -1,149 +0,0 @@
-"""
-Handle parallel execution on remote hosts
-"""
-import logging
-
-from teuthology import misc as teuthology
-from teuthology.parallel import parallel
-from teuthology.orchestra import run as tor
-
-log = logging.getLogger(__name__)
-
-from gevent import queue as queue
-from gevent import event as event
-
-def _init_barrier(barrier_queue, remote):
-    """current just queues a remote host""" 
-    barrier_queue.put(remote)
-
-def _do_barrier(barrier, barrier_queue, remote):
-    """special case for barrier"""
-    barrier_queue.get()
-    if barrier_queue.empty():
-        barrier.set()
-        barrier.clear()
-    else:
-        barrier.wait()
-
-    barrier_queue.put(remote)
-    if barrier_queue.full():
-        barrier.set()
-        barrier.clear()
-    else:
-        barrier.wait()
-
-def _exec_host(barrier, barrier_queue, remote, sudo, testdir, ls):
-    """Execute command remotely"""
-    log.info('Running commands on host %s', remote.name)
-    args = [
-        'TESTDIR={tdir}'.format(tdir=testdir),
-        'bash',
-        '-s'
-        ]
-    if sudo:
-        args.insert(0, 'sudo')
-    
-    r = remote.run( args=args, stdin=tor.PIPE, wait=False)
-    r.stdin.writelines(['set -e\n'])
-    r.stdin.flush()
-    for l in ls:
-        l.replace('$TESTDIR', testdir)
-        if l == "barrier":
-            _do_barrier(barrier, barrier_queue, remote)
-            continue
-
-        r.stdin.writelines([l, '\n'])
-        r.stdin.flush()
-    r.stdin.writelines(['\n'])
-    r.stdin.flush()
-    r.stdin.close()
-    tor.wait([r])
-
-def _generate_remotes(ctx, config):
-    """Return remote roles and the type of role specified in config"""
-    if 'all' in config and len(config) == 1:
-        ls = config['all']
-        for remote in ctx.cluster.remotes.iterkeys():
-            yield (remote, ls)
-    elif 'clients' in config:
-        ls = config['clients']
-        for role in teuthology.all_roles_of_type(ctx.cluster, 'client'):
-            (remote,) = ctx.cluster.only('client.{r}'.format(r=role)).remotes.iterkeys()
-            yield (remote, ls)
-        del config['clients']
-        for role, ls in config.iteritems():
-            (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-            yield (remote, ls)
-    else:
-        for role, ls in config.iteritems():
-            (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-            yield (remote, ls)
-
-def task(ctx, config):
-    """
-    Execute commands on multiple hosts in parallel
-
-        tasks:
-        - ceph:
-        - ceph-fuse: [client.0, client.1]
-        - pexec:
-            client.0:
-              - while true; do echo foo >> bar; done
-            client.1:
-              - sleep 1
-              - tail -f bar
-        - interactive:
-
-    Execute commands on all hosts in the cluster in parallel.  This
-    is useful if there are many hosts and you want to run the same
-    command on all:
-
-        tasks:
-        - pexec:
-            all:
-              - grep FAIL /var/log/ceph/*
-
-    Or if you want to run in parallel on all clients:
-
-        tasks:
-        - pexec:
-            clients:
-              - dd if=/dev/zero of={testdir}/mnt.* count=1024 bs=1024
-
-    You can also ensure that parallel commands are synchronized with the
-    special 'barrier' statement:
-
-    tasks:
-    - pexec:
-        clients:
-          - cd {testdir}/mnt.*
-          - while true; do
-          -   barrier
-          -   dd if=/dev/zero of=./foo count=1024 bs=1024
-          - done
-
-    The above writes to the file foo on all clients over and over, but ensures that
-    all clients perform each write command in sync.  If one client takes longer to
-    write, all the other clients will wait.
-
-    """
-    log.info('Executing custom commands...')
-    assert isinstance(config, dict), "task pexec got invalid config"
-
-    sudo = False
-    if 'sudo' in config:
-        sudo = config['sudo']
-        del config['sudo']
-
-    testdir = teuthology.get_testdir(ctx)
-
-    remotes = list(_generate_remotes(ctx, config))
-    count = len(remotes)
-    barrier_queue = queue.Queue(count)
-    barrier = event.Event()
-
-    for remote in remotes:
-        _init_barrier(barrier_queue, remote[0])
-    with parallel() as p:
-        for remote in remotes:
-            p.spawn(_exec_host, barrier, barrier_queue, remote[0], sudo, testdir, remote[1])
diff --git a/teuthology/task/print.py b/teuthology/task/print.py
deleted file mode 100644 (file)
index 198203f..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-"""
-Print task
-"""
-
-import logging
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Print out config argument in teuthology log/output
-    """
-    log.info('{config}'.format(config=config))
diff --git a/teuthology/task/proc_thrasher.py b/teuthology/task/proc_thrasher.py
deleted file mode 100644 (file)
index 1c1100f..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-"""
-Process thrasher
-"""
-import logging
-import gevent
-import random
-import time
-
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-class ProcThrasher:
-    """ Kills and restarts some number of the specified process on the specified
-        remote
-    """
-    def __init__(self, config, remote, *proc_args, **proc_kwargs):
-        self.proc_kwargs = proc_kwargs
-        self.proc_args = proc_args
-        self.config = config
-        self.greenlet = None
-        self.logger = proc_kwargs.get("logger", log.getChild('proc_thrasher'))
-        self.remote = remote
-
-        # config:
-        self.num_procs = self.config.get("num_procs", 5)
-        self.rest_period = self.config.get("rest_period", 100) # seconds
-        self.run_time = self.config.get("run_time", 1000) # seconds
-
-    def log(self, msg):
-        """
-        Local log wrapper
-        """
-        self.logger.info(msg)
-
-    def start(self):
-        """
-        Start thrasher.  This also makes sure that the greenlet interface
-        is used.
-        """
-        if self.greenlet is not None:
-            return
-        self.greenlet = gevent.Greenlet(self.loop)
-        self.greenlet.start()
-
-    def join(self):
-        """
-        Local join
-        """
-        self.greenlet.join()
-
-    def loop(self):
-        """
-        Thrashing loop -- loops at time intervals.  Inside that loop, the
-        code loops through the individual procs, creating new procs.
-        """
-        time_started = time.time()
-        procs = []
-        self.log("Starting")
-        while time_started + self.run_time > time.time():
-            if len(procs) > 0:
-                self.log("Killing proc")
-                proc = random.choice(procs)
-                procs.remove(proc)
-                proc.stdin.close()
-                self.log("About to wait")
-                run.wait([proc])
-                self.log("Killed proc")
-                
-            while len(procs) < self.num_procs:
-                self.log("Creating proc " + str(len(procs) + 1))
-                self.log("args are " + str(self.proc_args) + " kwargs: " + str(self.proc_kwargs))
-                procs.append(self.remote.run(
-                        *self.proc_args,
-                        ** self.proc_kwargs))
-            self.log("About to sleep")
-            time.sleep(self.rest_period)
-            self.log("Just woke")
-
-        run.wait(procs)
diff --git a/teuthology/task/qemu.py b/teuthology/task/qemu.py
deleted file mode 100644 (file)
index a05b4db..0000000
+++ /dev/null
@@ -1,327 +0,0 @@
-"""
-Qemu task
-"""
-from cStringIO import StringIO
-
-import contextlib
-import logging
-import os
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from teuthology.task import rbd
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-DEFAULT_NUM_RBD = 1
-DEFAULT_IMAGE_URL = 'http://ceph.com/qa/ubuntu-12.04.qcow2'
-DEFAULT_MEM = 4096 # in megabytes
-
-@contextlib.contextmanager
-def create_dirs(ctx, config):
-    """
-    Handle directory creation and cleanup
-    """
-    testdir = teuthology.get_testdir(ctx)
-    for client, client_config in config.iteritems():
-        assert 'test' in client_config, 'You must specify a test to run'
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        remote.run(
-            args=[
-                'install', '-d', '-m0755', '--',
-                '{tdir}/qemu'.format(tdir=testdir),
-                '{tdir}/archive/qemu'.format(tdir=testdir),
-                ]
-            )
-    try:
-        yield
-    finally:
-        for client, client_config in config.iteritems():
-            assert 'test' in client_config, 'You must specify a test to run'
-            (remote,) = ctx.cluster.only(client).remotes.keys()
-            remote.run(
-                args=[
-                    'rmdir', '{tdir}/qemu'.format(tdir=testdir), run.Raw('||'), 'true',
-                    ]
-                )
-
-@contextlib.contextmanager
-def generate_iso(ctx, config):
-    """Execute system commands to generate iso"""
-    log.info('generating iso...')
-    testdir = teuthology.get_testdir(ctx)
-    for client, client_config in config.iteritems():
-        assert 'test' in client_config, 'You must specify a test to run'
-        src_dir = os.path.dirname(__file__)
-        userdata_path = os.path.join(testdir, 'qemu', 'userdata.' + client)
-        metadata_path = os.path.join(testdir, 'qemu', 'metadata.' + client)
-
-        with file(os.path.join(src_dir, 'userdata_setup.yaml'), 'rb') as f:
-            test_setup = ''.join(f.readlines())
-
-        with file(os.path.join(src_dir, 'userdata_teardown.yaml'), 'rb') as f:
-            test_teardown = ''.join(f.readlines())
-
-        user_data = test_setup
-        if client_config.get('type', 'filesystem') == 'filesystem':
-            for i in xrange(0, client_config.get('num_rbd', DEFAULT_NUM_RBD)):
-                dev_letter = chr(ord('b') + i)
-                user_data += """
-- |
-  #!/bin/bash
-  mkdir /mnt/test_{dev_letter}
-  mkfs -t xfs /dev/vd{dev_letter}
-  mount -t xfs /dev/vd{dev_letter} /mnt/test_{dev_letter}
-""".format(dev_letter=dev_letter)
-
-        # this may change later to pass the directories as args to the
-        # script or something. xfstests needs that.
-        user_data += """
-- |
-  #!/bin/bash
-  test -d /mnt/test_b && cd /mnt/test_b
-  /mnt/cdrom/test.sh > /mnt/log/test.log 2>&1 && touch /mnt/log/success
-""" + test_teardown
-
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        teuthology.write_file(remote, userdata_path, StringIO(user_data))
-
-        with file(os.path.join(src_dir, 'metadata.yaml'), 'rb') as f:
-            teuthology.write_file(remote, metadata_path, f)
-
-        test_file = '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client)
-        remote.run(
-            args=[
-                'wget', '-nv', '-O', test_file,
-                client_config['test'],
-                run.Raw('&&'),
-                'chmod', '755', test_file,
-                ],
-            )
-        remote.run(
-            args=[
-                'genisoimage', '-quiet', '-input-charset', 'utf-8',
-                '-volid', 'cidata', '-joliet', '-rock',
-                '-o', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client),
-                '-graft-points',
-                'user-data={userdata}'.format(userdata=userdata_path),
-                'meta-data={metadata}'.format(metadata=metadata_path),
-                'test.sh={file}'.format(file=test_file),
-                ],
-            )
-    try:
-        yield
-    finally:
-        for client in config.iterkeys():
-            (remote,) = ctx.cluster.only(client).remotes.keys()
-            remote.run(
-                args=[
-                    'rm', '-f',
-                    '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client),
-                    os.path.join(testdir, 'qemu', 'userdata.' + client),
-                    os.path.join(testdir, 'qemu', 'metadata.' + client),
-                    '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client),
-                    ],
-                )
-
-@contextlib.contextmanager
-def download_image(ctx, config):
-    """Downland base image, remove image file when done"""
-    log.info('downloading base image')
-    testdir = teuthology.get_testdir(ctx)
-    for client, client_config in config.iteritems():
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        base_file = '{tdir}/qemu/base.{client}.qcow2'.format(tdir=testdir, client=client)
-        remote.run(
-            args=[
-                'wget', '-nv', '-O', base_file, DEFAULT_IMAGE_URL,
-                ]
-            )
-    try:
-        yield
-    finally:
-        log.debug('cleaning up base image files')
-        for client in config.iterkeys():
-            base_file = '{tdir}/qemu/base.{client}.qcow2'.format(
-                tdir=testdir,
-                client=client,
-                )
-            (remote,) = ctx.cluster.only(client).remotes.keys()
-            remote.run(
-                args=[
-                    'rm', '-f', base_file,
-                    ],
-                )
-
-@contextlib.contextmanager
-def run_qemu(ctx, config):
-    """Setup kvm environment and start qemu"""
-    procs = []
-    testdir = teuthology.get_testdir(ctx)
-    for client, client_config in config.iteritems():
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir, client=client)
-        remote.run(
-            args=[
-                'mkdir', log_dir, run.Raw('&&'),
-                'sudo', 'modprobe', 'kvm',
-                ]
-            )
-
-        base_file = '{tdir}/qemu/base.{client}.qcow2'.format(tdir=testdir, client=client)
-        args=[
-            'adjust-ulimits',
-            'ceph-coverage',
-            '{tdir}/archive/coverage'.format(tdir=testdir),
-            'daemon-helper',
-            'term',
-            'qemu-system-x86_64', '-enable-kvm', '-nographic',
-            '-m', str(client_config.get('memory', DEFAULT_MEM)),
-            # base OS device
-            '-drive',
-            'file={base},format=qcow2,if=virtio'.format(base=base_file),
-            # cd holding metadata for cloud-init
-            '-cdrom', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client),
-            # virtio 9p fs for logging
-            '-fsdev',
-            'local,id=log,path={log},security_model=none'.format(log=log_dir),
-            '-device',
-            'virtio-9p-pci,fsdev=log,mount_tag=test_log',
-            ]
-
-        cachemode = 'none'
-        ceph_config = ctx.ceph.conf.get('global', {})
-        ceph_config.update(ctx.ceph.conf.get('client', {}))
-        ceph_config.update(ctx.ceph.conf.get(client, {}))
-        if ceph_config.get('rbd cache'):
-            if ceph_config.get('rbd cache max dirty', 1) > 0:
-                cachemode = 'writeback'
-            else:
-                cachemode = 'writethrough'
-
-        for i in xrange(client_config.get('num_rbd', DEFAULT_NUM_RBD)):
-            args.extend([
-                '-drive',
-                'file=rbd:rbd/{img}:id={id},format=raw,if=virtio,cache={cachemode}'.format(
-                    img='{client}.{num}'.format(client=client, num=i),
-                    id=client[len('client.'):],
-                    cachemode=cachemode,
-                    ),
-                ])
-
-        log.info('starting qemu...')
-        procs.append(
-            remote.run(
-                args=args,
-                logger=log.getChild(client),
-                stdin=run.PIPE,
-                wait=False,
-                )
-            )
-
-    try:
-        yield
-    finally:
-        log.info('waiting for qemu tests to finish...')
-        run.wait(procs)
-
-        log.debug('checking that qemu tests succeeded...')
-        for client in config.iterkeys():
-            (remote,) = ctx.cluster.only(client).remotes.keys()
-            remote.run(
-                args=[
-                    'test', '-f',
-                    '{tdir}/archive/qemu/{client}/success'.format(
-                        tdir=testdir,
-                        client=client
-                        ),
-                    ],
-                )
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run a test inside of QEMU on top of rbd. Only one test
-    is supported per client.
-
-    For example, you can specify which clients to run on::
-
-        tasks:
-        - ceph:
-        - qemu:
-            client.0:
-              test: http://ceph.com/qa/test.sh
-            client.1:
-              test: http://ceph.com/qa/test2.sh
-
-    Or use the same settings on all clients:
-
-        tasks:
-        - ceph:
-        - qemu:
-            all:
-              test: http://ceph.com/qa/test.sh
-
-    For tests that don't need a filesystem, set type to block::
-
-        tasks:
-        - ceph:
-        - qemu:
-            client.0:
-              test: http://ceph.com/qa/test.sh
-              type: block
-
-    The test should be configured to run on /dev/vdb and later
-    devices.
-
-    If you want to run a test that uses more than one rbd image,
-    specify how many images to use::
-
-        tasks:
-        - ceph:
-        - qemu:
-            client.0:
-              test: http://ceph.com/qa/test.sh
-              type: block
-              num_rbd: 2
-
-    You can set the amount of memory the VM has (default is 1024 MB)::
-
-        tasks:
-        - ceph:
-        - qemu:
-            client.0:
-              test: http://ceph.com/qa/test.sh
-              memory: 512 # megabytes
-    """
-    assert isinstance(config, dict), \
-           "task qemu only supports a dictionary for configuration"
-
-    config = teuthology.replace_all_with_clients(ctx.cluster, config)
-
-    managers = []
-    for client, client_config in config.iteritems():
-        num_rbd = client_config.get('num_rbd', 1)
-        assert num_rbd > 0, 'at least one rbd device must be used'
-        for i in xrange(num_rbd):
-            create_config = {
-                client: {
-                    'image_name':
-                    '{client}.{num}'.format(client=client, num=i),
-                    }
-                }
-            managers.append(
-                lambda create_config=create_config:
-                rbd.create_image(ctx=ctx, config=create_config)
-                )
-
-    managers.extend([
-        lambda: create_dirs(ctx=ctx, config=config),
-        lambda: generate_iso(ctx=ctx, config=config),
-        lambda: download_image(ctx=ctx, config=config),
-        lambda: run_qemu(ctx=ctx, config=config),
-        ])
-
-    with contextutil.nested(*managers):
-        yield
diff --git a/teuthology/task/rados.py b/teuthology/task/rados.py
deleted file mode 100644 (file)
index 0897726..0000000
+++ /dev/null
@@ -1,170 +0,0 @@
-"""
-Rados modle-based integration tests
-"""
-import contextlib
-import logging
-import gevent
-from ceph_manager import CephManager
-from teuthology import misc as teuthology
-
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run RadosModel-based integration tests.
-
-    The config should be as follows::
-
-        rados:
-          clients: [client list]
-          ops: <number of ops>
-          objects: <number of objects to use>
-          max_in_flight: <max number of operations in flight>
-          object_size: <size of objects in bytes>
-          min_stride_size: <minimum write stride size in bytes>
-          max_stride_size: <maximum write stride size in bytes>
-          op_weights: <dictionary mapping operation type to integer weight>
-          runs: <number of times to run> - the pool is remade between runs
-          ec_pool: use an ec pool
-
-    For example::
-
-        tasks:
-        - ceph:
-        - rados:
-            clients: [client.0]
-            ops: 1000
-            max_seconds: 0   # 0 for no limit
-            objects: 25
-            max_in_flight: 16
-            object_size: 4000000
-            min_stride_size: 1024
-            max_stride_size: 4096
-            op_weights:
-              read: 20
-              write: 10
-              delete: 2
-              snap_create: 3
-              rollback: 2
-              snap_remove: 0
-            ec_pool: true
-            runs: 10
-        - interactive:
-
-    Optionally, you can provide the pool name to run against:
-
-        tasks:
-        - ceph:
-        - exec:
-            client.0:
-              - ceph osd pool create foo
-        - rados:
-            clients: [client.0]
-            pools: [foo]
-            ...
-
-    Alternatively, you can provide a pool prefix:
-
-        tasks:
-        - ceph:
-        - exec:
-            client.0:
-              - ceph osd pool create foo.client.0
-        - rados:
-            clients: [client.0]
-            pool_prefix: foo
-            ...
-
-    """
-    log.info('Beginning rados...')
-    assert isinstance(config, dict), \
-        "please list clients to run on"
-
-    object_size = int(config.get('object_size', 4000000))
-    op_weights = config.get('op_weights', {})
-    testdir = teuthology.get_testdir(ctx)
-    args = [
-        'adjust-ulimits',
-        'ceph-coverage',
-        '{tdir}/archive/coverage'.format(tdir=testdir),
-        'ceph_test_rados']
-    if config.get('ec_pool', False):
-        args.extend(['--ec-pool'])
-    args.extend([
-        '--op', 'read', str(op_weights.get('read', 100)),
-        '--op', 'write', str(op_weights.get('write', 100)),
-        '--op', 'delete', str(op_weights.get('delete', 10)),
-        '--max-ops', str(config.get('ops', 10000)),
-        '--objects', str(config.get('objects', 500)),
-        '--max-in-flight', str(config.get('max_in_flight', 16)),
-        '--size', str(object_size),
-        '--min-stride-size', str(config.get('min_stride_size', object_size / 10)),
-        '--max-stride-size', str(config.get('max_stride_size', object_size / 5)),
-        '--max-seconds', str(config.get('max_seconds', 0))
-        ])
-    for field in [
-        'copy_from', 'is_dirty', 'undirty', 'cache_flush',
-        'cache_try_flush', 'cache_evict',
-        'snap_create', 'snap_remove', 'rollback', 'setattr', 'rmattr',
-        'watch', 'append',
-        ]:
-        if field in op_weights:
-            args.extend([
-                    '--op', field, str(op_weights[field]),
-                    ])
-
-    def thread():
-        """Thread spawned by gevent"""
-        if not hasattr(ctx, 'manager'):
-            first_mon = teuthology.get_first_mon(ctx, config)
-            (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-            ctx.manager = CephManager(
-                mon,
-                ctx=ctx,
-                logger=log.getChild('ceph_manager'),
-                )
-
-        clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-        log.info('clients are %s' % clients)
-        for i in range(int(config.get('runs', '1'))):
-            log.info("starting run %s out of %s", str(i), config.get('runs', '1'))
-            tests = {}
-            existing_pools = config.get('pools', [])
-            created_pools = []
-            for role in config.get('clients', clients):
-                assert isinstance(role, basestring)
-                PREFIX = 'client.'
-                assert role.startswith(PREFIX)
-                id_ = role[len(PREFIX):]
-
-                pool = config.get('pool', None)
-                if not pool and existing_pools:
-                    pool = existing_pools.pop()
-                else:
-                    pool = ctx.manager.create_pool_with_unique_name(ec_pool=config.get('ec_pool', False))
-                    created_pools.append(pool)
-
-                (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-                proc = remote.run(
-                    args=["CEPH_CLIENT_ID={id_}".format(id_=id_)] + args +
-                    ["--pool", pool],
-                    logger=log.getChild("rados.{id}".format(id=id_)),
-                    stdin=run.PIPE,
-                    wait=False
-                    )
-                tests[id_] = proc
-            run.wait(tests.itervalues())
-
-            for pool in created_pools:
-                ctx.manager.remove_pool(pool)
-
-    running = gevent.spawn(thread)
-
-    try:
-        yield
-    finally:
-        log.info('joining rados')
-        running.get()
diff --git a/teuthology/task/radosbench.py b/teuthology/task/radosbench.py
deleted file mode 100644 (file)
index d2e7571..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-"""
-Rados benchmarking
-"""
-import contextlib
-import logging
-
-from ..orchestra import run
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run radosbench
-
-    The config should be as follows:
-
-    radosbench:
-        clients: [client list]
-        time: <seconds to run>
-        pool: <pool to use>
-        unique_pool: use a unique pool, defaults to False
-        ec_pool: create ec pool, defaults to False
-
-    example:
-
-    tasks:
-    - ceph:
-    - radosbench:
-        clients: [client.0]
-        time: 360
-    - interactive:
-    """
-    log.info('Beginning radosbench...')
-    assert isinstance(config, dict), \
-        "please list clients to run on"
-    radosbench = {}
-
-    testdir = teuthology.get_testdir(ctx)
-
-    for role in config.get('clients', ['client.0']):
-        assert isinstance(role, basestring)
-        PREFIX = 'client.'
-        assert role.startswith(PREFIX)
-        id_ = role[len(PREFIX):]
-        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-
-        pool = 'data'
-        if config.get('pool'):
-            pool = config.get('pool')
-            if pool is not 'data':
-                ctx.manager.create_pool(pool, ec_pool=config.get('ec_pool', False))
-        else:
-            pool = ctx.manager.create_pool_with_unique_name(ec_pool=config.get('ec_pool', False))
-
-        proc = remote.run(
-            args=[
-                "/bin/sh", "-c",
-                " ".join(['adjust-ulimits',
-                          'ceph-coverage',
-                          '{tdir}/archive/coverage',
-                          'rados',
-                          '--name', role,
-                          '-p' , pool,
-                          'bench', str(config.get('time', 360)), 'write',
-                          ]).format(tdir=testdir),
-                ],
-            logger=log.getChild('radosbench.{id}'.format(id=id_)),
-            stdin=run.PIPE,
-            wait=False
-            )
-        radosbench[id_] = proc
-
-    try:
-        yield
-    finally:
-        timeout = config.get('time', 360) * 5
-        log.info('joining radosbench (timing out after %ss)', timeout)
-        run.wait(radosbench.itervalues(), timeout=timeout)
-
-        if pool is not 'data':
-            ctx.manager.remove_pool(pool)
diff --git a/teuthology/task/radosgw_admin.py b/teuthology/task/radosgw_admin.py
deleted file mode 100644 (file)
index 6936b79..0000000
+++ /dev/null
@@ -1,974 +0,0 @@
-"""
-Rgw admin testing against a running instance
-"""
-# The test cases in this file have been annotated for inventory.
-# To extract the inventory (in csv format) use the command:
-#
-#   grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
-#
-
-import copy
-import json
-import logging
-import time
-
-from cStringIO import StringIO
-
-import boto.exception
-import boto.s3.connection
-import boto.s3.acl
-
-import teuthology.task_util.rgw as rgw_utils
-
-from teuthology import misc as teuthology
-from teuthology.task_util.rgw import rgwadmin
-
-log = logging.getLogger(__name__)
-
-
-def successful_ops(out):
-    """Extract total from the first summary entry (presumed to be only one)"""
-    summary = out['summary']
-    if len(summary) == 0:
-        return 0
-    entry = summary[0]
-    return entry['total']['successful_ops']
-
-
-def task(ctx, config):
-    """
-    Test radosgw-admin functionality against a running rgw instance.
-    """
-    global log
-    assert config is None or isinstance(config, list) \
-        or isinstance(config, dict), \
-        "task s3tests only supports a list or dictionary for configuration"
-    all_clients = ['client.{id}'.format(id=id_)
-                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-    if config is None:
-        config = all_clients
-    if isinstance(config, list):
-        config = dict.fromkeys(config)
-    clients = config.keys()
-
-    multi_region_run = rgw_utils.multi_region_enabled(ctx)
-
-    client = clients[0]; # default choice, multi-region code may overwrite this
-    if multi_region_run:
-        client = rgw_utils.get_master_client(ctx, clients)
-
-    # once the client is chosen, pull the host name and  assigned port out of
-    # the role_endpoints that were assigned by the rgw task
-    (remote_host, remote_port) = ctx.rgw.role_endpoints[client]
-
-    ##
-    user1='foo'
-    user2='fud'
-    subuser1='foo:foo1'
-    subuser2='foo:foo2'
-    display_name1='Foo'
-    display_name2='Fud'
-    email='foo@foo.com'
-    email2='bar@bar.com'
-    access_key='9te6NH5mcdcq0Tc5i8i1'
-    secret_key='Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu'
-    access_key2='p5YnriCv1nAtykxBrupQ'
-    secret_key2='Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh'
-    swift_secret1='gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL'
-    swift_secret2='ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy'
-
-    bucket_name='myfoo'
-    bucket_name2='mybar'
-
-    # connect to rgw
-    connection = boto.s3.connection.S3Connection(
-        aws_access_key_id=access_key,
-        aws_secret_access_key=secret_key,
-        is_secure=False,
-        port=remote_port,
-        host=remote_host,
-        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
-        )
-    connection2 = boto.s3.connection.S3Connection(
-        aws_access_key_id=access_key2,
-        aws_secret_access_key=secret_key2,
-        is_secure=False,
-        port=remote_port,
-        host=remote_host,
-        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
-        )
-
-    # legend (test cases can be easily grep-ed out)
-    # TESTCASE 'testname','object','method','operation','assertion'
-    # TESTCASE 'info-nosuch','user','info','non-existent user','fails'
-    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1])
-    assert err
-
-    # TESTCASE 'create-ok','user','create','w/all valid info','succeeds'
-    (err, out) = rgwadmin(ctx, client, [
-            'user', 'create',
-            '--uid', user1,
-            '--display-name', display_name1,
-            '--email', email,
-            '--access-key', access_key,
-            '--secret', secret_key,
-            '--max-buckets', '4'
-            ],
-            check_status=True)
-
-    # TESTCASE 'duplicate email','user','create','existing user email','fails'
-    (err, out) = rgwadmin(ctx, client, [
-            'user', 'create',
-            '--uid', user2,
-            '--display-name', display_name2,
-            '--email', email,
-            ])
-    assert err
-
-    # TESTCASE 'info-existing','user','info','existing user','returns correct info'
-    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
-    assert out['user_id'] == user1
-    assert out['email'] == email
-    assert out['display_name'] == display_name1
-    assert len(out['keys']) == 1
-    assert out['keys'][0]['access_key'] == access_key
-    assert out['keys'][0]['secret_key'] == secret_key
-    assert not out['suspended']
-
-    # this whole block should only be run if regions have been configured
-    if multi_region_run:
-        rgw_utils.radosgw_agent_sync_all(ctx)
-        # post-sync, validate that user1 exists on the sync destination host
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            dest_client = c_config['dest']
-            (err, out) = rgwadmin(ctx, dest_client, ['metadata', 'list', 'user'])
-            (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user1], check_status=True)
-            assert out['user_id'] == user1
-            assert out['email'] == email
-            assert out['display_name'] == display_name1
-            assert len(out['keys']) == 1
-            assert out['keys'][0]['access_key'] == access_key
-            assert out['keys'][0]['secret_key'] == secret_key
-            assert not out['suspended']
-
-        # compare the metadata between different regions, make sure it matches
-        log.debug('compare the metadata between different regions, make sure it matches')
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            source_client = c_config['src']
-            dest_client = c_config['dest']
-            (err1, out1) = rgwadmin(ctx, source_client,
-                ['metadata', 'get', 'user:{uid}'.format(uid=user1)], check_status=True)
-            (err2, out2) = rgwadmin(ctx, dest_client,
-                ['metadata', 'get', 'user:{uid}'.format(uid=user1)], check_status=True)
-            assert out1 == out2
-
-        # suspend a user on the master, then check the status on the destination
-        log.debug('suspend a user on the master, then check the status on the destination')
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            source_client = c_config['src']
-            dest_client = c_config['dest']
-            (err, out) = rgwadmin(ctx, source_client, ['user', 'suspend', '--uid', user1])
-            rgw_utils.radosgw_agent_sync_all(ctx)
-            (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user1], check_status=True)
-            assert out['suspended']
-
-        # delete a user on the master, then check that it's gone on the destination
-        log.debug('delete a user on the master, then check that it\'s gone on the destination')
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            source_client = c_config['src']
-            dest_client = c_config['dest']
-            (err, out) = rgwadmin(ctx, source_client, ['user', 'rm', '--uid', user1], check_status=True)
-            rgw_utils.radosgw_agent_sync_all(ctx)
-            (err, out) = rgwadmin(ctx, source_client, ['user', 'info', '--uid', user1])
-            assert out is None
-            (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user1])
-            assert out is None
-
-            # then recreate it so later tests pass
-            (err, out) = rgwadmin(ctx, client, [
-                'user', 'create',
-                '--uid', user1,
-                '--display-name', display_name1,
-                '--email', email,
-                '--access-key', access_key,
-                '--secret', secret_key,
-                '--max-buckets', '4'
-                ],
-                check_status=True)
-
-        # now do the multi-region bucket tests
-        log.debug('now do the multi-region bucket tests')
-
-        # Create a second user for the following tests
-        log.debug('Create a second user for the following tests')
-        (err, out) = rgwadmin(ctx, client, [
-            'user', 'create',
-            '--uid', user2,
-            '--display-name', display_name2,
-            '--email', email2,
-            '--access-key', access_key2,
-            '--secret', secret_key2,
-            '--max-buckets', '4'
-            ],
-            check_status=True)
-        (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user2], check_status=True)
-        assert out is not None
-
-        # create a bucket and do a sync
-        log.debug('create a bucket and do a sync')
-        bucket = connection.create_bucket(bucket_name2)
-        rgw_utils.radosgw_agent_sync_all(ctx)
-
-        # compare the metadata for the bucket between different regions, make sure it matches
-        log.debug('compare the metadata for the bucket between different regions, make sure it matches')
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            source_client = c_config['src']
-            dest_client = c_config['dest']
-            (err1, out1) = rgwadmin(ctx, source_client,
-                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
-                check_status=True)
-            (err2, out2) = rgwadmin(ctx, dest_client,
-                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
-                check_status=True)
-            assert out1 == out2
-
-            # get the bucket.instance info and compare that
-            src_bucket_id = out1['data']['bucket']['bucket_id']
-            dest_bucket_id = out2['data']['bucket']['bucket_id']
-            (err1, out1) = rgwadmin(ctx, source_client, ['metadata', 'get',
-                'bucket.instance:{bucket_name}:{bucket_instance}'.format(
-                bucket_name=bucket_name2,bucket_instance=src_bucket_id)],
-                check_status=True)
-            (err2, out2) = rgwadmin(ctx, dest_client, ['metadata', 'get',
-                'bucket.instance:{bucket_name}:{bucket_instance}'.format(
-                bucket_name=bucket_name2,bucket_instance=dest_bucket_id)],
-                check_status=True)
-            del out1['data']['bucket_info']['bucket']['pool']
-            del out1['data']['bucket_info']['bucket']['index_pool']
-            del out2['data']['bucket_info']['bucket']['pool']
-            del out2['data']['bucket_info']['bucket']['index_pool']
-            assert out1 == out2
-
-        same_region = 0
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            source_client = c_config['src']
-            dest_client = c_config['dest']
-
-            source_region = rgw_utils.region_for_client(ctx, source_client)
-            dest_region = rgw_utils.region_for_client(ctx, dest_client)
-
-            # 301 is only returned for requests to something in a different region
-            if source_region == dest_region:
-                log.debug('301 is only returned for requests to something in a different region')
-                same_region += 1
-                continue
-
-            # Attempt to create a new connection with user1 to the destination RGW
-            log.debug('Attempt to create a new connection with user1 to the destination RGW')
-            # and use that to attempt a delete (that should fail)
-            exception_encountered = False
-            try:
-                (dest_remote_host, dest_remote_port) = ctx.rgw.role_endpoints[dest_client]
-                connection_dest = boto.s3.connection.S3Connection(
-                    aws_access_key_id=access_key,
-                    aws_secret_access_key=secret_key,
-                    is_secure=False,
-                    port=dest_remote_port,
-                    host=dest_remote_host,
-                    calling_format=boto.s3.connection.OrdinaryCallingFormat(),
-                    )
-
-                # this should fail
-                connection_dest.delete_bucket(bucket_name2)
-            except boto.exception.S3ResponseError as e:
-                assert e.status == 301
-                exception_encountered = True
-
-            # confirm that the expected exception was seen
-            assert exception_encountered
-
-            # now delete the bucket on the source RGW and do another sync
-            log.debug('now delete the bucket on the source RGW and do another sync')
-            bucket.delete()
-            rgw_utils.radosgw_agent_sync_all(ctx)
-
-        if same_region == len(ctx.radosgw_agent.config):
-            bucket.delete()
-            rgw_utils.radosgw_agent_sync_all(ctx)
-
-        # make sure that the bucket no longer exists in either region
-        log.debug('make sure that the bucket no longer exists in either region')
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            source_client = c_config['src']
-            dest_client = c_config['dest']
-            (err1, out1) = rgwadmin(ctx, source_client, ['metadata', 'get',
-                'bucket:{bucket_name}'.format(bucket_name=bucket_name2)])
-            (err2, out2) = rgwadmin(ctx, dest_client, ['metadata', 'get',
-                'bucket:{bucket_name}'.format(bucket_name=bucket_name2)])
-            # Both of the previous calls should have errors due to requesting
-            # metadata for non-existent buckets
-            assert err1
-            assert err2
-
-        # create a bucket and then sync it
-        log.debug('create a bucket and then sync it')
-        bucket = connection.create_bucket(bucket_name2)
-        rgw_utils.radosgw_agent_sync_all(ctx)
-
-        # compare the metadata for the bucket between different regions, make sure it matches
-        log.debug('compare the metadata for the bucket between different regions, make sure it matches')
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            source_client = c_config['src']
-            dest_client = c_config['dest']
-            (err1, out1) = rgwadmin(ctx, source_client,
-                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
-                check_status=True)
-            (err2, out2) = rgwadmin(ctx, dest_client,
-                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
-                check_status=True)
-            assert out1 == out2
-
-        # Now delete the bucket and recreate it with a different user
-        log.debug('Now delete the bucket and recreate it with a different user')
-        # within the same window of time and then sync.
-        bucket.delete()
-        bucket = connection2.create_bucket(bucket_name2)
-        rgw_utils.radosgw_agent_sync_all(ctx)
-
-        # compare the metadata for the bucket between different regions, make sure it matches
-        log.debug('compare the metadata for the bucket between different regions, make sure it matches')
-        # user2 should own the bucket in both regions
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            source_client = c_config['src']
-            dest_client = c_config['dest']
-            (err1, out1) = rgwadmin(ctx, source_client,
-                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
-                check_status=True)
-            (err2, out2) = rgwadmin(ctx, dest_client,
-                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
-                check_status=True)
-            assert out1 == out2
-            assert out1['data']['owner'] == user2
-            assert out1['data']['owner'] != user1
-
-        # now we're going to use this bucket to test meta-data update propagation
-        log.debug('now we\'re going to use this bucket to test meta-data update propagation')
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            source_client = c_config['src']
-            dest_client = c_config['dest']
-
-            # get the metadata so we can tweak it
-            log.debug('get the metadata so we can tweak it')
-            (err, orig_data) = rgwadmin(ctx, source_client,
-                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
-                check_status=True)
-
-            # manually edit mtime for this bucket to be 300 seconds in the past
-            log.debug('manually edit mtime for this bucket to be 300 seconds in the past')
-            new_data = copy.deepcopy(orig_data)
-            new_data['mtime'] =  orig_data['mtime'] - 300
-            assert new_data != orig_data
-            (err, out) = rgwadmin(ctx, source_client,
-                ['metadata', 'put', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
-                stdin=StringIO(json.dumps(new_data)),
-                check_status=True)
-
-            # get the metadata and make sure that the 'put' worked
-            log.debug('get the metadata and make sure that the \'put\' worked')
-            (err, out) = rgwadmin(ctx, source_client,
-                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
-                check_status=True)
-            assert out == new_data
-
-            # sync to propagate the new metadata
-            log.debug('sync to propagate the new metadata')
-            rgw_utils.radosgw_agent_sync_all(ctx)
-
-            # get the metadata from the dest and compare it to what we just set
-            log.debug('get the metadata from the dest and compare it to what we just set')
-            # and what the source region has.
-            (err1, out1) = rgwadmin(ctx, source_client,
-                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
-                check_status=True)
-            (err2, out2) = rgwadmin(ctx, dest_client,
-                ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)],
-                check_status=True)
-            # yeah for the transitive property
-            assert out1 == out2
-            assert out1 == new_data
-
-        # now we delete the bucket
-        log.debug('now we delete the bucket')
-        bucket.delete()
-
-        log.debug('sync to propagate the deleted bucket')
-        rgw_utils.radosgw_agent_sync_all(ctx)
-
-        # Delete user2 as later tests do not expect it to exist.
-        # Verify that it is gone on both regions
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            source_client = c_config['src']
-            dest_client = c_config['dest']
-            (err, out) = rgwadmin(ctx, source_client,
-                ['user', 'rm', '--uid', user2], check_status=True)
-            rgw_utils.radosgw_agent_sync_all(ctx)
-            # The two 'user info' calls should fail and not return any data
-            # since we just deleted this user.
-            (err, out) = rgwadmin(ctx, source_client, ['user', 'info', '--uid', user2])
-            assert out is None
-            (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user2])
-            assert out is None
-
-        # Test data sync
-
-        # First create a bucket for data sync test purpose
-        bucket = connection.create_bucket(bucket_name + 'data')
-
-        # Create a tiny file and check if in sync
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            if c_config.get('metadata-only'):
-                continue
-
-            source_client = c_config['src']
-            dest_client = c_config['dest']
-            k = boto.s3.key.Key(bucket)
-            k.key = 'tiny_file'
-            k.set_contents_from_string("123456789")
-            time.sleep(rgw_utils.radosgw_data_log_window(ctx, source_client))
-            rgw_utils.radosgw_agent_sync_all(ctx, data=True)
-            (dest_host, dest_port) = ctx.rgw.role_endpoints[dest_client]
-            dest_connection = boto.s3.connection.S3Connection(
-                aws_access_key_id=access_key,
-                aws_secret_access_key=secret_key,
-                is_secure=False,
-                port=dest_port,
-                host=dest_host,
-                calling_format=boto.s3.connection.OrdinaryCallingFormat(),
-                )
-            dest_k = dest_connection.get_bucket(bucket_name + 'data').get_key('tiny_file')
-            assert k.get_contents_as_string() == dest_k.get_contents_as_string()
-
-            # check that deleting it removes it from the dest zone
-            k.delete()
-            time.sleep(rgw_utils.radosgw_data_log_window(ctx, source_client))
-            rgw_utils.radosgw_agent_sync_all(ctx, data=True)
-
-            dest_bucket = dest_connection.get_bucket(bucket_name + 'data')
-            dest_k = dest_bucket.get_key('tiny_file')
-            assert dest_k == None, 'object not deleted from destination zone'
-
-        # finally we delete the bucket
-        bucket.delete()
-
-        bucket = connection.create_bucket(bucket_name + 'data2')
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            if c_config.get('metadata-only'):
-                continue
-
-            source_client = c_config['src']
-            dest_client = c_config['dest']
-            (dest_host, dest_port) = ctx.rgw.role_endpoints[dest_client]
-            dest_connection = boto.s3.connection.S3Connection(
-                aws_access_key_id=access_key,
-                aws_secret_access_key=secret_key,
-                is_secure=False,
-                port=dest_port,
-                host=dest_host,
-                calling_format=boto.s3.connection.OrdinaryCallingFormat(),
-                )
-            for i in range(20):
-                k = boto.s3.key.Key(bucket)
-                k.key = 'tiny_file_' + str(i)
-                k.set_contents_from_string(str(i) * 100)
-
-            time.sleep(rgw_utils.radosgw_data_log_window(ctx, source_client))
-            rgw_utils.radosgw_agent_sync_all(ctx, data=True)
-
-            for i in range(20):
-                dest_k = dest_connection.get_bucket(bucket_name + 'data2').get_key('tiny_file_' + str(i))
-                assert (str(i) * 100) == dest_k.get_contents_as_string()
-                k = boto.s3.key.Key(bucket)
-                k.key = 'tiny_file_' + str(i)
-                k.delete()
-
-            # check that deleting removes the objects from the dest zone
-            time.sleep(rgw_utils.radosgw_data_log_window(ctx, source_client))
-            rgw_utils.radosgw_agent_sync_all(ctx, data=True)
-
-            for i in range(20):
-                dest_bucket = dest_connection.get_bucket(bucket_name + 'data2')
-                dest_k = dest_bucket.get_key('tiny_file_' + str(i))
-                assert dest_k == None, 'object %d not deleted from destination zone' % i
-        bucket.delete()
-
-    # end of 'if multi_region_run:'
-
-    # TESTCASE 'suspend-ok','user','suspend','active user','succeeds'
-    (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1],
-        check_status=True)
-
-    # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory'
-    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
-    assert out['suspended']
-
-    # TESTCASE 're-enable','user','enable','suspended user','succeeds'
-    (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1], check_status=True)
-
-    # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended'
-    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
-    assert not out['suspended']
-
-    # TESTCASE 'add-keys','key','create','w/valid info','succeeds'
-    (err, out) = rgwadmin(ctx, client, [
-            'key', 'create', '--uid', user1,
-            '--access-key', access_key2, '--secret', secret_key2,
-            ], check_status=True)
-
-    # TESTCASE 'info-new-key','user','info','after key addition','returns all keys'
-    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1],
-        check_status=True)
-    assert len(out['keys']) == 2
-    assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2
-    assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2
-
-    # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed'
-    (err, out) = rgwadmin(ctx, client, [
-            'key', 'rm', '--uid', user1,
-            '--access-key', access_key2,
-            ], check_status=True)
-    assert len(out['keys']) == 1
-    assert out['keys'][0]['access_key'] == access_key
-    assert out['keys'][0]['secret_key'] == secret_key
-
-    # TESTCASE 'add-swift-key','key','create','swift key','succeeds'
-    subuser_access = 'full'
-    subuser_perm = 'full-control'
-
-    (err, out) = rgwadmin(ctx, client, [
-            'subuser', 'create', '--subuser', subuser1,
-            '--access', subuser_access
-            ], check_status=True)
-
-    # TESTCASE 'add-swift-key','key','create','swift key','succeeds'
-    (err, out) = rgwadmin(ctx, client, [
-            'subuser', 'modify', '--subuser', subuser1,
-            '--secret', swift_secret1,
-            '--key-type', 'swift',
-            ], check_status=True)
-
-    # TESTCASE 'subuser-perm-mask', 'subuser', 'info', 'test subuser perm mask durability', 'succeeds'
-    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1])
-
-    assert out['subusers'][0]['permissions'] == subuser_perm
-
-    # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys'
-    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
-    assert len(out['swift_keys']) == 1
-    assert out['swift_keys'][0]['user'] == subuser1
-    assert out['swift_keys'][0]['secret_key'] == swift_secret1
-
-    # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds'
-    (err, out) = rgwadmin(ctx, client, [
-            'subuser', 'create', '--subuser', subuser2,
-            '--secret', swift_secret2,
-            '--key-type', 'swift',
-            ], check_status=True)
-
-    # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys'
-    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
-    assert len(out['swift_keys']) == 2
-    assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2
-    assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2
-
-    # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed'
-    (err, out) = rgwadmin(ctx, client, [
-            'key', 'rm', '--subuser', subuser1,
-            '--key-type', 'swift',
-            ], check_status=True)
-    assert len(out['swift_keys']) == 1
-
-    # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed'
-    (err, out) = rgwadmin(ctx, client, [
-            'subuser', 'rm', '--subuser', subuser1,
-            ], check_status=True)
-    assert len(out['subusers']) == 1
-
-    # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed'
-    (err, out) = rgwadmin(ctx, client, [
-            'subuser', 'rm', '--subuser', subuser2,
-            '--key-type', 'swift', '--purge-keys',
-            ], check_status=True)
-    assert len(out['swift_keys']) == 0
-    assert len(out['subusers']) == 0
-
-    # TESTCASE 'bucket-stats','bucket','stats','no session/buckets','succeeds, empty list'
-    (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1],
-        check_status=True)
-    assert len(out) == 0
-
-    if multi_region_run:
-        rgw_utils.radosgw_agent_sync_all(ctx)
-
-    # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list'
-    (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True)
-    assert len(out) == 0
-
-    # create a first bucket
-    bucket = connection.create_bucket(bucket_name)
-
-    # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list'
-    (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True)
-    assert len(out) == 1
-    assert out[0] == bucket_name
-
-    # TESTCASE 'bucket-list-all','bucket','list','all buckets','succeeds, expected list'
-    (err, out) = rgwadmin(ctx, client, ['bucket', 'list'], check_status=True)
-    assert len(out) >= 1
-    assert bucket_name in out;
-
-    # TESTCASE 'max-bucket-limit,'bucket','create','4 buckets','5th bucket fails due to max buckets == 4'
-    bucket2 = connection.create_bucket(bucket_name + '2')
-    bucket3 = connection.create_bucket(bucket_name + '3')
-    bucket4 = connection.create_bucket(bucket_name + '4')
-    # the 5th should fail.
-    failed = False
-    try:
-        connection.create_bucket(bucket_name + '5')
-    except Exception:
-        failed = True
-    assert failed
-
-    # delete the buckets
-    bucket2.delete()
-    bucket3.delete()
-    bucket4.delete()
-
-    # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list'
-    (err, out) = rgwadmin(ctx, client, [
-            'bucket', 'stats', '--bucket', bucket_name], check_status=True)
-    assert out['owner'] == user1
-    bucket_id = out['id']
-
-    # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID'
-    (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1], check_status=True)
-    assert len(out) == 1
-    assert out[0]['id'] == bucket_id    # does it return the same ID twice in a row?
-
-    # use some space
-    key = boto.s3.key.Key(bucket)
-    key.set_contents_from_string('one')
-
-    # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object'
-    (err, out) = rgwadmin(ctx, client, [
-            'bucket', 'stats', '--bucket', bucket_name], check_status=True)
-    assert out['id'] == bucket_id
-    assert out['usage']['rgw.main']['num_objects'] == 1
-    assert out['usage']['rgw.main']['size_kb'] > 0
-
-    # reclaim it
-    key.delete()
-
-    # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error'
-    (err, out) = rgwadmin(ctx, client,
-        ['bucket', 'unlink', '--uid', user1, '--bucket', bucket_name],
-        check_status=True)
-
-    # create a second user to link the bucket to
-    (err, out) = rgwadmin(ctx, client, [
-            'user', 'create',
-            '--uid', user2,
-            '--display-name', display_name2,
-            '--access-key', access_key2,
-            '--secret', secret_key2,
-            '--max-buckets', '1',
-            ],
-            check_status=True)
-
-    # try creating an object with the first user before the bucket is relinked
-    denied = False
-    key = boto.s3.key.Key(bucket)
-
-    try:
-        key.set_contents_from_string('two')
-    except boto.exception.S3ResponseError:
-        denied = True
-
-    assert not denied
-
-    # delete the object
-    key.delete()
-
-    # link the bucket to another user
-    (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--uid', user2, '--bucket', bucket_name],
-        check_status=True)
-
-    # try to remove user, should fail (has a linked bucket)
-    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2])
-    assert err
-
-    # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'succeeds, bucket unlinked'
-    (err, out) = rgwadmin(ctx, client, ['bucket', 'unlink', '--uid', user2, '--bucket', bucket_name],
-        check_status=True)
-
-    # relink the bucket to the first user and delete the second user
-    (err, out) = rgwadmin(ctx, client,
-        ['bucket', 'link', '--uid', user1, '--bucket', bucket_name],
-        check_status=True)
-
-    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2],
-        check_status=True)
-
-    # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed'
-
-    # upload an object
-    object_name = 'four'
-    key = boto.s3.key.Key(bucket, object_name)
-    key.set_contents_from_string(object_name)
-
-    # now delete it
-    (err, out) = rgwadmin(ctx, client,
-        ['object', 'rm', '--bucket', bucket_name, '--object', object_name],
-        check_status=True)
-
-    # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects'
-    (err, out) = rgwadmin(ctx, client, [
-            'bucket', 'stats', '--bucket', bucket_name],
-            check_status=True)
-    assert out['id'] == bucket_id
-    assert out['usage']['rgw.main']['num_objects'] == 0
-
-    # list log objects
-    # TESTCASE 'log-list','log','list','after activity','succeeds, lists one no objects'
-    (err, out) = rgwadmin(ctx, client, ['log', 'list'], check_status=True)
-    assert len(out) > 0
-
-    for obj in out:
-        # TESTCASE 'log-show','log','show','after activity','returns expected info'
-        if obj[:4] == 'meta' or obj[:4] == 'data':
-            continue
-
-        (err, rgwlog) = rgwadmin(ctx, client, ['log', 'show', '--object', obj],
-            check_status=True)
-        assert len(rgwlog) > 0
-
-        # exempt bucket_name2 from checking as it was only used for multi-region tests
-        assert rgwlog['bucket'].find(bucket_name) == 0 or rgwlog['bucket'].find(bucket_name2) == 0
-        assert rgwlog['bucket'] != bucket_name or rgwlog['bucket_id'] == bucket_id
-        assert rgwlog['bucket_owner'] == user1 or rgwlog['bucket'] == bucket_name + '5' or rgwlog['bucket'] == bucket_name2
-        for entry in rgwlog['log_entries']:
-            log.debug('checking log entry: ', entry)
-            assert entry['bucket'] == rgwlog['bucket']
-            possible_buckets = [bucket_name + '5', bucket_name2]
-            user = entry['user']
-            assert user == user1 or user.endswith('system-user') or \
-                rgwlog['bucket'] in possible_buckets
-
-        # TESTCASE 'log-rm','log','rm','delete log objects','succeeds'
-        (err, out) = rgwadmin(ctx, client, ['log', 'rm', '--object', obj],
-            check_status=True)
-
-    # TODO: show log by bucket+date
-
-    # need to wait for all usage data to get flushed, should take up to 30 seconds
-    timestamp = time.time()
-    while time.time() - timestamp <= (20 * 60):      # wait up to 20 minutes
-        (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--categories', 'delete_obj'])  # last operation we did is delete obj, wait for it to flush
-        if successful_ops(out) > 0:
-            break;
-        time.sleep(1)
-
-    assert time.time() - timestamp <= (20 * 60)
-
-    # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds'
-    (err, out) = rgwadmin(ctx, client, ['usage', 'show'], check_status=True)
-    assert len(out['entries']) > 0
-    assert len(out['summary']) > 0
-    user_summary = out['summary'][0]
-    total = user_summary['total']
-    assert total['successful_ops'] > 0
-
-    # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds'
-    (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1],
-        check_status=True)
-    assert len(out['entries']) > 0
-    assert len(out['summary']) > 0
-    user_summary = out['summary'][0]
-    for entry in user_summary['categories']:
-        assert entry['successful_ops'] > 0
-    assert user_summary['user'] == user1
-
-    # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds'
-    test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket']
-    for cat in test_categories:
-        (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1, '--categories', cat],
-            check_status=True)
-        assert len(out['summary']) > 0
-        user_summary = out['summary'][0]
-        assert user_summary['user'] == user1
-        assert len(user_summary['categories']) == 1
-        entry = user_summary['categories'][0]
-        assert entry['category'] == cat
-        assert entry['successful_ops'] > 0
-
-    # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed'
-    (err, out) = rgwadmin(ctx, client, ['usage', 'trim', '--uid', user1],
-        check_status=True)
-    (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1],
-        check_status=True)
-    assert len(out['entries']) == 0
-    assert len(out['summary']) == 0
-
-    # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds'
-    (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1],
-        check_status=True)
-
-    # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects'
-    try:
-        key = boto.s3.key.Key(bucket)
-        key.set_contents_from_string('five')
-    except boto.exception.S3ResponseError as e:
-        assert e.status == 403
-
-    # TESTCASE 'user-renable2','user','enable','suspended user','succeeds'
-    (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1],
-        check_status=True)
-
-    # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects'
-    key = boto.s3.key.Key(bucket)
-    key.set_contents_from_string('six')
-
-    # TESTCASE 'gc-list', 'gc', 'list', 'get list of objects ready for garbage collection'
-
-    # create an object large enough to be split into multiple parts
-    test_string = 'foo'*10000000
-
-    big_key = boto.s3.key.Key(bucket)
-    big_key.set_contents_from_string(test_string)
-
-    # now delete the head
-    big_key.delete()
-
-    # wait a bit to give the garbage collector time to cycle
-    time.sleep(15)
-
-    (err, out) = rgwadmin(ctx, client, ['gc', 'list'])
-
-    assert len(out) > 0
-
-    # TESTCASE 'gc-process', 'gc', 'process', 'manually collect garbage'
-    (err, out) = rgwadmin(ctx, client, ['gc', 'process'], check_status=True)
-
-    #confirm
-    (err, out) = rgwadmin(ctx, client, ['gc', 'list'])
-
-    assert len(out) == 0
-
-    # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets'
-    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1])
-    assert err
-
-    # delete should fail because ``key`` still exists
-    try:
-        bucket.delete()
-    except boto.exception.S3ResponseError as e:
-        assert e.status == 409
-
-    key.delete()
-    bucket.delete()
-
-    # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy'
-    bucket = connection.create_bucket(bucket_name)
-
-    # create an object
-    key = boto.s3.key.Key(bucket)
-    key.set_contents_from_string('seven')
-
-    # should be private already but guarantee it
-    key.set_acl('private')
-
-    (err, out) = rgwadmin(ctx, client,
-        ['policy', '--bucket', bucket.name, '--object', key.key],
-        check_status=True)
-
-    acl = key.get_xml_acl()
-
-    assert acl == out.strip('\n')
-
-    # add another grantee by making the object public read
-    key.set_acl('public-read')
-
-    (err, out) = rgwadmin(ctx, client,
-        ['policy', '--bucket', bucket.name, '--object', key.key],
-        check_status=True)
-
-    acl = key.get_xml_acl()
-    assert acl == out.strip('\n')
-
-    # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds'
-    bucket = connection.create_bucket(bucket_name)
-    key_name = ['eight', 'nine', 'ten', 'eleven']
-    for i in range(4):
-        key = boto.s3.key.Key(bucket)
-        key.set_contents_from_string(key_name[i])
-
-    (err, out) = rgwadmin(ctx, client,
-        ['bucket', 'rm', '--bucket', bucket_name, '--purge-objects'],
-        check_status=True)
-
-    # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds'
-    caps='user=read'
-    (err, out) = rgwadmin(ctx, client, ['caps', 'add', '--uid', user1, '--caps', caps])
-
-    assert out['caps'][0]['perm'] == 'read'
-
-    # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds'
-    (err, out) = rgwadmin(ctx, client, ['caps', 'rm', '--uid', user1, '--caps', caps])
-
-    assert not out['caps']
-
-    # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets'
-    bucket = connection.create_bucket(bucket_name)
-    key = boto.s3.key.Key(bucket)
-
-    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1])
-    assert err
-
-    # TESTCASE 'rm-user2', 'user', 'rm', 'user with data', 'succeeds'
-    bucket = connection.create_bucket(bucket_name)
-    key = boto.s3.key.Key(bucket)
-    key.set_contents_from_string('twelve')
-
-    (err, out) = rgwadmin(ctx, client,
-        ['user', 'rm', '--uid', user1, '--purge-data' ],
-        check_status=True)
-
-    # TESTCASE 'rm-user3','user','rm','deleted user','fails'
-    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1])
-    assert err
-
-    # TESTCASE 'zone-info', 'zone', 'get', 'get zone info', 'succeeds, has default placement rule'
-    #
-
-    (err, out) = rgwadmin(ctx, client, ['zone', 'get'])
-    orig_placement_pools = len(out['placement_pools'])
-
-    # removed this test, it is not correct to assume that zone has default placement, it really
-    # depends on how we set it up before
-    #
-    # assert len(out) > 0
-    # assert len(out['placement_pools']) == 1
-
-    # default_rule = out['placement_pools'][0]
-    # assert default_rule['key'] == 'default-placement'
-
-    rule={'key': 'new-placement', 'val': {'data_pool': '.rgw.buckets.2', 'index_pool': '.rgw.buckets.index.2'}}
-
-    out['placement_pools'].append(rule)
-
-    (err, out) = rgwadmin(ctx, client, ['zone', 'set'],
-        stdin=StringIO(json.dumps(out)),
-        check_status=True)
-
-    (err, out) = rgwadmin(ctx, client, ['zone', 'get'])
-    assert len(out) > 0
-    assert len(out['placement_pools']) == orig_placement_pools + 1
diff --git a/teuthology/task/radosgw_admin_rest.py b/teuthology/task/radosgw_admin_rest.py
deleted file mode 100644 (file)
index 866ff4f..0000000
+++ /dev/null
@@ -1,678 +0,0 @@
-"""
-Run a series of rgw admin commands through the rest interface.
-
-The test cases in this file have been annotated for inventory.
-To extract the inventory (in csv format) use the command:
-
-   grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
-
-"""
-from cStringIO import StringIO
-import logging
-import json
-
-import boto.exception
-import boto.s3.connection
-import boto.s3.acl
-
-import requests
-import time
-
-from boto.connection import AWSAuthConnection
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-def successful_ops(out):
-    """
-    Extract successful operations
-    :param out: list
-    """
-    summary = out['summary']
-    if len(summary) == 0:
-        return 0
-    entry = summary[0]
-    return entry['total']['successful_ops']
-
-def rgwadmin(ctx, client, cmd):
-    """
-    Perform rgw admin command
-
-    :param client: client
-    :param cmd: command to execute.
-    :return: command exit status, json result.
-    """
-    log.info('radosgw-admin: %s' % cmd)
-    testdir = teuthology.get_testdir(ctx)
-    pre = [
-        'adjust-ulimits',
-        'ceph-coverage',
-        '{tdir}/archive/coverage'.format(tdir=testdir),
-        'radosgw-admin',
-        '--log-to-stderr',
-        '--format', 'json',
-        ]
-    pre.extend(cmd)
-    (remote,) = ctx.cluster.only(client).remotes.iterkeys()
-    proc = remote.run(
-        args=pre,
-        check_status=False,
-        stdout=StringIO(),
-        stderr=StringIO(),
-        )
-    r = proc.exitstatus
-    out = proc.stdout.getvalue()
-    j = None
-    if not r and out != '':
-        try:
-            j = json.loads(out)
-            log.info(' json result: %s' % j)
-        except ValueError:
-            j = out
-            log.info(' raw result: %s' % j)
-    return (r, j)
-
-
-def rgwadmin_rest(connection, cmd, params=None, headers=None, raw=False):
-    """
-    perform a rest command
-    """
-    log.info('radosgw-admin-rest: %s %s' % (cmd, params))
-    put_cmds = ['create', 'link', 'add']
-    post_cmds = ['unlink', 'modify']
-    delete_cmds = ['trim', 'rm', 'process']
-    get_cmds = ['check', 'info', 'show', 'list']
-
-    bucket_sub_resources = ['object', 'policy', 'index']
-    user_sub_resources = ['subuser', 'key', 'caps']
-    zone_sub_resources = ['pool', 'log', 'garbage']
-
-    def get_cmd_method_and_handler(cmd):
-        """
-        Get the rest command and handler from information in cmd and
-        from the imported requests object.
-        """
-        if cmd[1] in put_cmds:
-            return 'PUT', requests.put
-        elif cmd[1] in delete_cmds:
-            return 'DELETE', requests.delete
-        elif cmd[1] in post_cmds:
-            return 'POST', requests.post
-        elif cmd[1] in get_cmds:
-            return 'GET', requests.get
-
-    def get_resource(cmd):
-        """
-        Get the name of the resource from information in cmd.
-        """
-        if cmd[0] == 'bucket' or cmd[0] in bucket_sub_resources:
-            if cmd[0] == 'bucket':
-                return 'bucket', ''
-            else:
-                return 'bucket', cmd[0]
-        elif cmd[0] == 'user' or cmd[0] in user_sub_resources:
-            if cmd[0] == 'user':
-                return 'user', ''
-            else:
-                return 'user', cmd[0]
-        elif cmd[0] == 'usage':
-            return 'usage', ''
-        elif cmd[0] == 'zone' or cmd[0] in zone_sub_resources:
-            if cmd[0] == 'zone':
-                return 'zone', ''
-            else:
-                return 'zone', cmd[0]
-
-    def build_admin_request(conn, method, resource = '', headers=None, data='',
-            query_args=None, params=None):
-        """
-        Build an administative request adapted from the build_request()
-        method of boto.connection
-        """
-
-        path = conn.calling_format.build_path_base('admin', resource)
-        auth_path = conn.calling_format.build_auth_path('admin', resource)
-        host = conn.calling_format.build_host(conn.server_name(), 'admin')
-        if query_args:
-            path += '?' + query_args
-            boto.log.debug('path=%s' % path)
-            auth_path += '?' + query_args
-            boto.log.debug('auth_path=%s' % auth_path)
-        return AWSAuthConnection.build_base_http_request(conn, method, path,
-                auth_path, params, headers, data, host)
-
-    method, handler = get_cmd_method_and_handler(cmd)
-    resource, query_args = get_resource(cmd)
-    request = build_admin_request(connection, method, resource,
-            query_args=query_args, headers=headers)
-
-    url = '{protocol}://{host}{path}'.format(protocol=request.protocol,
-            host=request.host, path=request.path)
-
-    request.authorize(connection=connection)
-    result = handler(url, params=params, headers=request.headers)
-
-    if raw:
-        log.info(' text result: %s' % result.txt)
-        return result.status_code, result.txt
-    else:
-        log.info(' json result: %s' % result.json())
-        return result.status_code, result.json()
-
-
-def task(ctx, config):
-    """
-    Test radosgw-admin functionality through the RESTful interface
-    """
-    assert config is None or isinstance(config, list) \
-        or isinstance(config, dict), \
-        "task s3tests only supports a list or dictionary for configuration"
-    all_clients = ['client.{id}'.format(id=id_)
-                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-    if config is None:
-        config = all_clients
-    if isinstance(config, list):
-        config = dict.fromkeys(config)
-    clients = config.keys()
-
-    # just use the first client...
-    client = clients[0]
-
-    ##
-    admin_user = 'ada'
-    admin_display_name = 'Ms. Admin User'
-    admin_access_key = 'MH1WC2XQ1S8UISFDZC8W'
-    admin_secret_key = 'dQyrTPA0s248YeN5bBv4ukvKU0kh54LWWywkrpoG'
-    admin_caps = 'users=read, write; usage=read, write; buckets=read, write; zone=read, write'
-
-    user1 = 'foo'
-    user2 = 'fud'
-    subuser1 = 'foo:foo1'
-    subuser2 = 'foo:foo2'
-    display_name1 = 'Foo'
-    display_name2 = 'Fud'
-    email = 'foo@foo.com'
-    access_key = '9te6NH5mcdcq0Tc5i8i1'
-    secret_key = 'Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu'
-    access_key2 = 'p5YnriCv1nAtykxBrupQ'
-    secret_key2 = 'Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh'
-    swift_secret1 = 'gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL'
-    swift_secret2 = 'ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy'
-
-    bucket_name = 'myfoo'
-
-    # legend (test cases can be easily grep-ed out)
-    # TESTCASE 'testname','object','method','operation','assertion'
-    # TESTCASE 'create-admin-user','user','create','administrative user','succeeds'
-    (err, out) = rgwadmin(ctx, client, [
-            'user', 'create',
-            '--uid', admin_user,
-            '--display-name', admin_display_name,
-            '--access-key', admin_access_key,
-            '--secret', admin_secret_key,
-            '--max-buckets', '0',
-            '--caps', admin_caps
-            ])
-    logging.error(out)
-    logging.error(err)
-    assert not err
-
-    (remote,) = ctx.cluster.only(client).remotes.iterkeys()
-    remote_host = remote.name.split('@')[1]
-    admin_conn = boto.s3.connection.S3Connection(
-        aws_access_key_id=admin_access_key,
-        aws_secret_access_key=admin_secret_key,
-        is_secure=False,
-        port=7280,
-        host=remote_host,
-        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
-        )
-
-    # TESTCASE 'info-nosuch','user','info','non-existent user','fails'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {"uid": user1})
-    assert ret == 404
-
-    # TESTCASE 'create-ok','user','create','w/all valid info','succeeds'
-    (ret, out) = rgwadmin_rest(admin_conn,
-            ['user', 'create'],
-            {'uid' : user1,
-             'display-name' :  display_name1,
-             'email' : email,
-             'access-key' : access_key,
-             'secret-key' : secret_key,
-             'max-buckets' : '4'
-            })
-
-    assert ret == 200
-
-    # TESTCASE 'info-existing','user','info','existing user','returns correct info'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
-
-    assert out['user_id'] == user1
-    assert out['email'] == email
-    assert out['display_name'] == display_name1
-    assert len(out['keys']) == 1
-    assert out['keys'][0]['access_key'] == access_key
-    assert out['keys'][0]['secret_key'] == secret_key
-    assert not out['suspended']
-
-    # TESTCASE 'suspend-ok','user','suspend','active user','succeeds'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True})
-    assert ret == 200
-
-    # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
-    assert ret == 200
-    assert out['suspended']
-
-    # TESTCASE 're-enable','user','enable','suspended user','succeeds'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'})
-    assert not err
-
-    # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
-    assert ret == 200
-    assert not out['suspended']
-
-    # TESTCASE 'add-keys','key','create','w/valid info','succeeds'
-    (ret, out) = rgwadmin_rest(admin_conn,
-            ['key', 'create'],
-            {'uid' : user1,
-             'access-key' : access_key2,
-             'secret-key' : secret_key2
-            })
-
-
-    assert ret == 200
-
-    # TESTCASE 'info-new-key','user','info','after key addition','returns all keys'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
-    assert ret == 200
-    assert len(out['keys']) == 2
-    assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2
-    assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2
-
-    # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed'
-    (ret, out) = rgwadmin_rest(admin_conn,
-            ['key', 'rm'],
-            {'uid' : user1,
-             'access-key' : access_key2
-            })
-
-    assert ret == 200
-
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
-
-    assert len(out['keys']) == 1
-    assert out['keys'][0]['access_key'] == access_key
-    assert out['keys'][0]['secret_key'] == secret_key
-
-    # TESTCASE 'add-swift-key','key','create','swift key','succeeds'
-    (ret, out) = rgwadmin_rest(admin_conn,
-            ['subuser', 'create'],
-            {'subuser' : subuser1,
-             'secret-key' : swift_secret1,
-             'key-type' : 'swift'
-            })
-
-    assert ret == 200
-
-    # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
-    assert ret == 200
-    assert len(out['swift_keys']) == 1
-    assert out['swift_keys'][0]['user'] == subuser1
-    assert out['swift_keys'][0]['secret_key'] == swift_secret1
-
-    # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds'
-    (ret, out) = rgwadmin_rest(admin_conn,
-            ['subuser', 'create'],
-            {'subuser' : subuser2,
-             'secret-key' : swift_secret2,
-             'key-type' : 'swift'
-            })
-
-    assert ret == 200
-
-    # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
-    assert ret == 200
-    assert len(out['swift_keys']) == 2
-    assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2
-    assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2
-
-    # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed'
-    (ret, out) = rgwadmin_rest(admin_conn,
-            ['key', 'rm'],
-            {'subuser' : subuser1,
-             'key-type' :'swift'
-            })
-
-    assert ret == 200
-
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
-    assert len(out['swift_keys']) == 1
-
-    # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed'
-    (ret, out) = rgwadmin_rest(admin_conn,
-            ['subuser', 'rm'],
-            {'subuser' : subuser1
-            })
-
-    assert ret == 200
-
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
-    assert len(out['subusers']) == 1
-
-    # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed'
-    (ret, out) = rgwadmin_rest(admin_conn,
-            ['subuser', 'rm'],
-            {'subuser' : subuser2,
-             'key-type' : 'swift',
-             '{purge-keys' :True
-            })
-
-    assert ret == 200
-
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
-    assert len(out['swift_keys']) == 0
-    assert len(out['subusers']) == 0
-
-    # TESTCASE 'bucket-stats','bucket','info','no session/buckets','succeeds, empty list'
-    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' :  user1})
-    assert ret == 200
-    assert len(out) == 0
-
-    # connect to rgw
-    connection = boto.s3.connection.S3Connection(
-        aws_access_key_id=access_key,
-        aws_secret_access_key=secret_key,
-        is_secure=False,
-        port=7280,
-        host=remote_host,
-        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
-        )
-
-    # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list'
-    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True})
-    assert ret == 200
-    assert len(out) == 0
-
-    # create a first bucket
-    bucket = connection.create_bucket(bucket_name)
-
-    # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list'
-    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1})
-    assert ret == 200
-    assert len(out) == 1
-    assert out[0] == bucket_name
-
-    # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list'
-    (ret, out) = rgwadmin_rest(admin_conn,
-            ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True})
-
-    assert ret == 200
-    assert out['owner'] == user1
-    bucket_id = out['id']
-
-    # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID'
-    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True})
-    assert ret == 200
-    assert len(out) == 1
-    assert out[0]['id'] == bucket_id    # does it return the same ID twice in a row?
-
-    # use some space
-    key = boto.s3.key.Key(bucket)
-    key.set_contents_from_string('one')
-
-    # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object'
-    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True})
-    assert ret == 200
-    assert out['id'] == bucket_id
-    assert out['usage']['rgw.main']['num_objects'] == 1
-    assert out['usage']['rgw.main']['size_kb'] > 0
-
-    # reclaim it
-    key.delete()
-
-    # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error'
-    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'unlink'], {'uid' : user1, 'bucket' : bucket_name})
-
-    assert ret == 200
-
-    # create a second user to link the bucket to
-    (ret, out) = rgwadmin_rest(admin_conn,
-            ['user', 'create'],
-            {'uid' : user2,
-            'display-name' :  display_name2,
-            'access-key' : access_key2,
-            'secret-key' : secret_key2,
-            'max-buckets' : '1',
-            })
-
-    assert ret == 200
-
-    # try creating an object with the first user before the bucket is relinked
-    denied = False
-    key = boto.s3.key.Key(bucket)
-
-    try:
-        key.set_contents_from_string('two')
-    except boto.exception.S3ResponseError:
-        denied = True
-
-    assert not denied
-
-    # delete the object
-    key.delete()
-
-    # link the bucket to another user
-    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'link'], {'uid' : user2, 'bucket' : bucket_name})
-
-    assert ret == 200
-
-    # try creating an object with the first user which should cause an error
-    key = boto.s3.key.Key(bucket)
-
-    try:
-        key.set_contents_from_string('three')
-    except boto.exception.S3ResponseError:
-        denied = True
-
-    assert denied
-
-    # relink the bucket to the first user and delete the second user
-    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'link'], {'uid' : user1, 'bucket' : bucket_name})
-    assert ret == 200
-
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user2})
-    assert ret == 200
-
-    # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed'
-
-    # upload an object
-    object_name = 'four'
-    key = boto.s3.key.Key(bucket, object_name)
-    key.set_contents_from_string(object_name)
-
-    # now delete it
-    (ret, out) = rgwadmin_rest(admin_conn, ['object', 'rm'], {'bucket' : bucket_name, 'object' : object_name})
-    assert ret == 200
-
-    # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects'
-    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True})
-    assert ret == 200
-    assert out['id'] == bucket_id
-    assert out['usage']['rgw.main']['num_objects'] == 0
-
-    # create a bucket for deletion stats
-    useless_bucket = connection.create_bucket('useless_bucket')
-    useless_key = useless_bucket.new_key('useless_key')
-    useless_key.set_contents_from_string('useless string')
-
-    # delete it
-    useless_key.delete()
-    useless_bucket.delete()
-
-    # wait for the statistics to flush
-    time.sleep(60)
-
-    # need to wait for all usage data to get flushed, should take up to 30 seconds
-    timestamp = time.time()
-    while time.time() - timestamp <= (20 * 60):      # wait up to 20 minutes
-        (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'categories' : 'delete_obj'})  # last operation we did is delete obj, wait for it to flush
-
-        if successful_ops(out) > 0:
-            break
-        time.sleep(1)
-
-    assert time.time() - timestamp <= (20 * 60)
-
-    # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds'
-    (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'])
-    assert ret == 200
-    assert len(out['entries']) > 0
-    assert len(out['summary']) > 0
-    user_summary = out['summary'][0]
-    total = user_summary['total']
-    assert total['successful_ops'] > 0
-
-    # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds'
-    (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1})
-    assert ret == 200
-    assert len(out['entries']) > 0
-    assert len(out['summary']) > 0
-    user_summary = out['summary'][0]
-    for entry in user_summary['categories']:
-        assert entry['successful_ops'] > 0
-    assert user_summary['user'] == user1
-
-    # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds'
-    test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket']
-    for cat in test_categories:
-        (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1, 'categories' : cat})
-        assert ret == 200
-        assert len(out['summary']) > 0
-        user_summary = out['summary'][0]
-        assert user_summary['user'] == user1
-        assert len(user_summary['categories']) == 1
-        entry = user_summary['categories'][0]
-        assert entry['category'] == cat
-        assert entry['successful_ops'] > 0
-
-    # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed'
-    (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'trim'], {'uid' : user1})
-    assert ret == 200
-    (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1})
-    assert ret == 200
-    assert len(out['entries']) == 0
-    assert len(out['summary']) == 0
-
-    # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True})
-    assert ret == 200
-
-    # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects'
-    try:
-        key = boto.s3.key.Key(bucket)
-        key.set_contents_from_string('five')
-    except boto.exception.S3ResponseError as e:
-        assert e.status == 403
-
-    # TESTCASE 'user-renable2','user','enable','suspended user','succeeds'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' :  user1, 'suspended' : 'false'})
-    assert ret == 200
-
-    # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects'
-    key = boto.s3.key.Key(bucket)
-    key.set_contents_from_string('six')
-
-    # TESTCASE 'garbage-list', 'garbage', 'list', 'get list of objects ready for garbage collection'
-
-    # create an object large enough to be split into multiple parts
-    test_string = 'foo'*10000000
-
-    big_key = boto.s3.key.Key(bucket)
-    big_key.set_contents_from_string(test_string)
-
-    # now delete the head
-    big_key.delete()
-
-    # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1})
-    assert ret == 409
-
-    # delete should fail because ``key`` still exists
-    try:
-        bucket.delete()
-    except boto.exception.S3ResponseError as e:
-        assert e.status == 409
-
-    key.delete()
-    bucket.delete()
-
-    # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy'
-    bucket = connection.create_bucket(bucket_name)
-
-    # create an object
-    key = boto.s3.key.Key(bucket)
-    key.set_contents_from_string('seven')
-
-    # should be private already but guarantee it
-    key.set_acl('private')
-
-    (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key})
-    assert ret == 200
-
-    acl = key.get_xml_acl()
-    assert acl == out.strip('\n')
-
-    # add another grantee by making the object public read
-    key.set_acl('public-read')
-
-    (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key})
-    assert ret == 200
-
-    acl = key.get_xml_acl()
-    assert acl == out.strip('\n')
-
-    # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds'
-    bucket = connection.create_bucket(bucket_name)
-    key_name = ['eight', 'nine', 'ten', 'eleven']
-    for i in range(4):
-        key = boto.s3.key.Key(bucket)
-        key.set_contents_from_string(key_name[i])
-
-    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'rm'], {'bucket' : bucket_name, 'purge-objects' : True})
-    assert ret == 200
-
-    # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds'
-    caps = 'usage=read'
-    (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'add'], {'uid' :  user1, 'user-caps' : caps})
-    assert ret == 200
-    assert out[0]['perm'] == 'read'
-
-    # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds'
-    (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'rm'], {'uid' :  user1, 'user-caps' : caps})
-    assert ret == 200
-    assert not out
-
-    # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets'
-    bucket = connection.create_bucket(bucket_name)
-    key = boto.s3.key.Key(bucket)
-
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1})
-    assert ret == 409
-
-    # TESTCASE 'rm-user2', 'user', 'rm', user with data', 'succeeds'
-    bucket = connection.create_bucket(bucket_name)
-    key = boto.s3.key.Key(bucket)
-    key.set_contents_from_string('twelve')
-
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1, 'purge-data' : True})
-    assert ret == 200
-
-    # TESTCASE 'rm-user3','user','info','deleted user','fails'
-    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
-    assert ret == 404
-
diff --git a/teuthology/task/radosgw_agent.py b/teuthology/task/radosgw_agent.py
deleted file mode 100644 (file)
index e8ffe94..0000000
+++ /dev/null
@@ -1,211 +0,0 @@
-"""
-Run rados gateway agent in test mode
-"""
-import contextlib
-import logging
-import argparse
-
-from ..orchestra import run
-from teuthology import misc as teuthology
-import teuthology.task_util.rgw as rgw_utils
-
-log = logging.getLogger(__name__)
-
-def run_radosgw_agent(ctx, config):
-    """
-    Run a single radosgw-agent. See task() for config format.
-    """
-    return_list = list()
-    for (client, cconf) in config.items():
-        # don't process entries that are not clients
-        if not client.startswith('client.'):
-            log.debug('key {data} does not start with \'client.\', moving on'.format(
-                      data=client))
-            continue
-
-        src_client = cconf['src']
-        dest_client = cconf['dest']
-
-        src_zone = rgw_utils.zone_for_client(ctx, src_client)
-        dest_zone = rgw_utils.zone_for_client(ctx, dest_client)
-
-        log.info("source is %s", src_zone)
-        log.info("dest is %s", dest_zone)
-
-        testdir = teuthology.get_testdir(ctx)
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        # figure out which branch to pull from
-        branch = cconf.get('force-branch', None)
-        if not branch:
-            branch = cconf.get('branch', 'master')
-        sha1 = cconf.get('sha1')
-        remote.run(
-            args=[
-                'cd', testdir, run.Raw('&&'),
-                'git', 'clone',
-                '-b', branch,
-#                'https://github.com/ceph/radosgw-agent.git',
-                'git://ceph.com/git/radosgw-agent.git',
-                'radosgw-agent.{client}'.format(client=client),
-                ]
-            )
-        if sha1 is not None:
-            remote.run(
-                args=[
-                    'cd', testdir, run.Raw('&&'),
-                    run.Raw('&&'),
-                    'git', 'reset', '--hard', sha1,
-                ]
-            )
-        remote.run(
-            args=[
-                'cd', testdir, run.Raw('&&'),
-                'cd', 'radosgw-agent.{client}'.format(client=client),
-                run.Raw('&&'),
-                './bootstrap',
-                ]
-            )
-
-        src_host, src_port = rgw_utils.get_zone_host_and_port(ctx, src_client,
-                                                              src_zone)
-        dest_host, dest_port = rgw_utils.get_zone_host_and_port(ctx, dest_client,
-                                                                 dest_zone)
-        src_access, src_secret = rgw_utils.get_zone_system_keys(ctx, src_client,
-                                                               src_zone)
-        dest_access, dest_secret = rgw_utils.get_zone_system_keys(ctx, dest_client,
-                                                                 dest_zone)
-        sync_scope = cconf.get('sync-scope', None)
-        port = cconf.get('port', 8000)
-        daemon_name = '{host}.{port}.syncdaemon'.format(host=remote.name, port=port)
-        in_args=[
-            'daemon-helper',
-            'kill',
-            '{tdir}/radosgw-agent.{client}/radosgw-agent'.format(tdir=testdir,
-                                                                 client=client),
-            '-v',
-            '--src-access-key', src_access,
-            '--src-secret-key', src_secret,
-            '--source', "http://{addr}:{port}".format(addr=src_host, port=src_port),
-            '--dest-access-key', dest_access,
-            '--dest-secret-key', dest_secret,
-            '--max-entries', str(cconf.get('max-entries', 1000)),
-            '--log-file', '{tdir}/archive/rgw_sync_agent.{client}.log'.format(
-                tdir=testdir,
-                client=client),
-            '--object-sync-timeout', '30',
-            ]
-
-        if cconf.get('metadata-only', False):
-            in_args.append('--metadata-only')
-
-        # the test server and full/incremental flags are mutually exclusive
-        if sync_scope is None:
-            in_args.append('--test-server-host')
-            in_args.append('0.0.0.0')
-            in_args.append('--test-server-port')
-            in_args.append(str(port))
-            log.debug('Starting a sync test server on {client}'.format(client=client))
-            # Stash the radosgw-agent server / port # for use by subsequent tasks
-            ctx.radosgw_agent.endpoint = (client, str(port))
-        else:
-            in_args.append('--sync-scope')
-            in_args.append(sync_scope)
-            log.debug('Starting a {scope} sync on {client}'.format(scope=sync_scope,client=client))
-
-        # positional arg for destination must come last
-        in_args.append("http://{addr}:{port}".format(addr=dest_host,
-                                                     port=dest_port))
-
-        return_list.append((client, remote.run(
-            args=in_args,
-            wait=False,
-            stdin=run.PIPE,
-            logger=log.getChild(daemon_name),
-            )))
-    return return_list
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run radosgw-agents in test mode.
-
-    Configuration is clients to run the agents on, with settings for
-    source client, destination client, and port to listen on.  Binds
-    to 0.0.0.0. Port defaults to 8000. This must be run on clients
-    that have the correct zone root pools and rgw zone set in
-    ceph.conf, or the task cannot read the region information from the
-    cluster.
-
-    By default, this task will start an HTTP server that will trigger full
-    or incremental syncs based on requests made to it.
-    Alternatively, a single full sync can be triggered by
-    specifying 'sync-scope: full' or a loop of incremental syncs can be triggered
-    by specifying 'sync-scope: incremental' (the loop will sleep
-    '--incremental-sync-delay' seconds between each sync, default is 30 seconds).
-
-    By default, both data and metadata are synced. To only sync
-    metadata, for example because you want to sync between regions,
-    set metadata-only: true.
-
-    An example::
-
-      tasks:
-      - ceph:
-          conf:
-            client.0:
-              rgw zone = foo
-              rgw zone root pool = .root.pool
-            client.1:
-              rgw zone = bar
-              rgw zone root pool = .root.pool2
-      - rgw: # region configuration omitted for brevity
-      - radosgw-agent:
-          client.0:
-            branch: wip-next-feature-branch
-            src: client.0
-            dest: client.1
-            sync-scope: full
-            metadata-only: true
-            # port: 8000 (default)
-          client.1:
-            src: client.1
-            dest: client.0
-            port: 8001
-    """
-    assert isinstance(config, dict), 'rgw_sync_agent requires a dictionary config'
-    log.debug("config is %s", config)
-
-    overrides = ctx.config.get('overrides', {})
-    # merge each client section, but only if it exists in config since there isn't
-    # a sensible default action for this task
-    for client in config.iterkeys():
-        if config[client]:
-            log.debug('config[{client}]: {data}'.format(client=client, data=config[client]))
-            teuthology.deep_merge(config[client], overrides.get('radosgw-agent', {}))
-
-    ctx.radosgw_agent = argparse.Namespace()
-    ctx.radosgw_agent.config = config
-
-    procs = run_radosgw_agent(ctx, config)
-
-    ctx.radosgw_agent.procs = procs
-
-    try:
-        yield
-    finally:
-        testdir = teuthology.get_testdir(ctx)
-        try:
-            for client, proc in procs:
-                log.info("shutting down sync agent on %s", client)
-                proc.stdin.close()
-                proc.exitstatus.get()
-        finally:
-            for client, proc in procs:
-                ctx.cluster.only(client).run(
-                    args=[
-                        'rm', '-rf',
-                        '{tdir}/radosgw-agent.{client}'.format(tdir=testdir,
-                                                               client=client)
-                        ]
-                    )
diff --git a/teuthology/task/rbd.py b/teuthology/task/rbd.py
deleted file mode 100644 (file)
index 7d07a61..0000000
+++ /dev/null
@@ -1,506 +0,0 @@
-"""
-Rbd testing task
-"""
-import contextlib
-import logging
-import os
-
-from cStringIO import StringIO
-from ..orchestra import run
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from teuthology.parallel import parallel
-from teuthology.task.common_fs_utils import generic_mkfs
-from teuthology.task.common_fs_utils import generic_mount
-from teuthology.task.common_fs_utils import default_image_name
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def create_image(ctx, config):
-    """
-    Create an rbd image.
-
-    For example::
-
-        tasks:
-        - ceph:
-        - rbd.create_image:
-            client.0:
-                image_name: testimage
-                image_size: 100
-                image_format: 1
-            client.1:
-
-    Image size is expressed as a number of megabytes; default value
-    is 10240.
-
-    Image format value must be either 1 or 2; default value is 1.
-
-    """
-    assert isinstance(config, dict) or isinstance(config, list), \
-        "task create_image only supports a list or dictionary for configuration"
-
-    if isinstance(config, dict):
-        images = config.items()
-    else:
-        images = [(role, None) for role in config]
-
-    testdir = teuthology.get_testdir(ctx)
-    for role, properties in images:
-        if properties is None:
-            properties = {}
-        name = properties.get('image_name', default_image_name(role))
-        size = properties.get('image_size', 10240)
-        fmt = properties.get('image_format', 1)
-        (remote,) = ctx.cluster.only(role).remotes.keys()
-        log.info('Creating image {name} with size {size}'.format(name=name,
-                                                                 size=size))
-        args = [
-                'adjust-ulimits',
-                'ceph-coverage'.format(tdir=testdir),
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                'rbd',
-                '-p', 'rbd',
-                'create',
-                '--size', str(size),
-                name,
-            ]
-        # omit format option if using the default (format 1)
-        # since old versions of don't support it
-        if int(fmt) != 1:
-            args += ['--format', str(fmt)]
-        remote.run(args=args)
-    try:
-        yield
-    finally:
-        log.info('Deleting rbd images...')
-        for role, properties in images:
-            if properties is None:
-                properties = {}
-            name = properties.get('image_name', default_image_name(role))
-            (remote,) = ctx.cluster.only(role).remotes.keys()
-            remote.run(
-                args=[
-                    'adjust-ulimits',
-                    'ceph-coverage',
-                    '{tdir}/archive/coverage'.format(tdir=testdir),
-                    'rbd',
-                    '-p', 'rbd',
-                    'rm',
-                    name,
-                    ],
-                )
-
-@contextlib.contextmanager
-def modprobe(ctx, config):
-    """
-    Load the rbd kernel module..
-
-    For example::
-
-        tasks:
-        - ceph:
-        - rbd.create_image: [client.0]
-        - rbd.modprobe: [client.0]
-    """
-    log.info('Loading rbd kernel module...')
-    for role in config:
-        (remote,) = ctx.cluster.only(role).remotes.keys()
-        remote.run(
-            args=[
-                'sudo',
-                'modprobe',
-                'rbd',
-                ],
-            )
-    try:
-        yield
-    finally:
-        log.info('Unloading rbd kernel module...')
-        for role in config:
-            (remote,) = ctx.cluster.only(role).remotes.keys()
-            remote.run(
-                args=[
-                    'sudo',
-                    'modprobe',
-                    '-r',
-                    'rbd',
-                    # force errors to be ignored; necessary if more
-                    # than one device was created, which may mean
-                    # the module isn't quite ready to go the first
-                    # time through.
-                    run.Raw('||'),
-                    'true',
-                    ],
-                )
-
-@contextlib.contextmanager
-def dev_create(ctx, config):
-    """
-    Map block devices to rbd images.
-
-    For example::
-
-        tasks:
-        - ceph:
-        - rbd.create_image: [client.0]
-        - rbd.modprobe: [client.0]
-        - rbd.dev_create:
-            client.0: testimage.client.0
-    """
-    assert isinstance(config, dict) or isinstance(config, list), \
-        "task dev_create only supports a list or dictionary for configuration"
-
-    if isinstance(config, dict):
-        role_images = config.items()
-    else:
-        role_images = [(role, None) for role in config]
-
-    log.info('Creating rbd block devices...')
-
-    testdir = teuthology.get_testdir(ctx)
-
-    for role, image in role_images:
-        if image is None:
-            image = default_image_name(role)
-        (remote,) = ctx.cluster.only(role).remotes.keys()
-
-        remote.run(
-            args=[
-                'sudo',
-                'adjust-ulimits',
-                'ceph-coverage',
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                'rbd',
-                '--user', role.rsplit('.')[-1],
-                '-p', 'rbd',
-                'map',
-                image,
-                run.Raw('&&'),
-                # wait for the symlink to be created by udev
-                'while', 'test', '!', '-e', '/dev/rbd/rbd/{image}'.format(image=image), run.Raw(';'), 'do',
-                'sleep', '1', run.Raw(';'),
-                'done',
-                ],
-            )
-    try:
-        yield
-    finally:
-        log.info('Unmapping rbd devices...')
-        for role, image in role_images:
-            if image is None:
-                image = default_image_name(role)
-            (remote,) = ctx.cluster.only(role).remotes.keys()
-            remote.run(
-                args=[
-                    'LD_LIBRARY_PATH={tdir}/binary/usr/local/lib'.format(tdir=testdir),
-                    'sudo',
-                    'adjust-ulimits',
-                    'ceph-coverage',
-                    '{tdir}/archive/coverage'.format(tdir=testdir),
-                    'rbd',
-                    '-p', 'rbd',
-                    'unmap',
-                    '/dev/rbd/rbd/{imgname}'.format(imgname=image),
-                    run.Raw('&&'),
-                    # wait for the symlink to be deleted by udev
-                    'while', 'test', '-e', '/dev/rbd/rbd/{image}'.format(image=image),
-                    run.Raw(';'),
-                    'do',
-                    'sleep', '1', run.Raw(';'),
-                    'done',
-                    ],
-                )
-
-
-def rbd_devname_rtn(ctx, image):
-    return '/dev/rbd/rbd/{image}'.format(image=image)    
-
-def canonical_path(ctx, role, path):
-    """
-    Determine the canonical path for a given path on the host
-    representing the given role.  A canonical path contains no
-    . or .. components, and includes no symbolic links.
-    """
-    version_fp = StringIO()
-    ctx.cluster.only(role).run(
-        args=[ 'readlink', '-f', path ],
-        stdout=version_fp,
-        )
-    canonical_path = version_fp.getvalue().rstrip('\n')
-    version_fp.close()
-    return canonical_path
-
-@contextlib.contextmanager
-def run_xfstests(ctx, config):
-    """
-    Run xfstests over specified devices.
-
-    Warning: both the test and scratch devices specified will be
-    overwritten.  Normally xfstests modifies (but does not destroy)
-    the test device, but for now the run script used here re-makes
-    both filesystems.
-
-    Note: Only one instance of xfstests can run on a single host at
-    a time, although this is not enforced.
-
-    This task in its current form needs some improvement.  For
-    example, it assumes all roles provided in the config are
-    clients, and that the config provided is a list of key/value
-    pairs.  For now please use the xfstests() interface, below.
-
-    For example::
-
-        tasks:
-        - ceph:
-        - rbd.run_xfstests:
-            client.0:
-                count: 2
-                test_dev: 'test_dev'
-                scratch_dev: 'scratch_dev'
-                fs_type: 'xfs'
-                tests: '1-9 11-15 17 19-21 26-28 31-34 41 45-48'
-    """
-    with parallel() as p:
-        for role, properties in config.items():
-            p.spawn(run_xfstests_one_client, ctx, role, properties)
-    yield
-
-def run_xfstests_one_client(ctx, role, properties):
-    """
-    Spawned routine to handle xfs tests for a single client
-    """
-    testdir = teuthology.get_testdir(ctx)
-    try:
-        count = properties.get('count')
-        test_dev = properties.get('test_dev')
-        assert test_dev is not None, \
-            "task run_xfstests requires test_dev to be defined"
-        test_dev = canonical_path(ctx, role, test_dev)
-
-        scratch_dev = properties.get('scratch_dev')
-        assert scratch_dev is not None, \
-            "task run_xfstests requires scratch_dev to be defined"
-        scratch_dev = canonical_path(ctx, role, scratch_dev)
-
-        fs_type = properties.get('fs_type')
-        tests = properties.get('tests')
-
-        (remote,) = ctx.cluster.only(role).remotes.keys()
-
-        # Fetch the test script
-        test_root = teuthology.get_testdir(ctx)
-        test_script = 'run_xfstests.sh'
-        test_path = os.path.join(test_root, test_script)
-
-        git_branch = 'master'
-        test_url = 'https://raw.github.com/ceph/ceph/{branch}/qa/{script}'.format(branch=git_branch, script=test_script)
-        # test_url = 'http://ceph.newdream.net/git/?p=ceph.git;a=blob_plain;hb=refs/heads/{branch};f=qa/{script}'.format(branch=git_branch, script=test_script)
-
-        log.info('Fetching {script} for {role} from {url}'.format(script=test_script,
-                                                                role=role,
-                                                                url=test_url))
-        args = [ 'wget', '-O', test_path, '--', test_url ]
-        remote.run(args=args)
-
-        log.info('Running xfstests on {role}:'.format(role=role))
-        log.info('   iteration count: {count}:'.format(count=count))
-        log.info('       test device: {dev}'.format(dev=test_dev))
-        log.info('    scratch device: {dev}'.format(dev=scratch_dev))
-        log.info('     using fs_type: {fs_type}'.format(fs_type=fs_type))
-        log.info('      tests to run: {tests}'.format(tests=tests))
-
-        # Note that the device paths are interpreted using
-        # readlink -f <path> in order to get their canonical
-        # pathname (so it matches what the kernel remembers).
-        args = [
-            '/usr/bin/sudo',
-            'TESTDIR={tdir}'.format(tdir=testdir),
-            'adjust-ulimits',
-            'ceph-coverage',
-            '{tdir}/archive/coverage'.format(tdir=testdir),
-            '/bin/bash',
-            test_path,
-            '-c', str(count),
-            '-f', fs_type,
-            '-t', test_dev,
-            '-s', scratch_dev,
-            ]
-        if tests:
-            args.append(tests)
-        remote.run(args=args, logger=log.getChild(role))
-    finally:
-        log.info('Removing {script} on {role}'.format(script=test_script,
-                                                      role=role))
-        remote.run(args=['rm', '-f', test_path])
-
-@contextlib.contextmanager
-def xfstests(ctx, config):
-    """
-    Run xfstests over rbd devices.  This interface sets up all
-    required configuration automatically if not otherwise specified.
-    Note that only one instance of xfstests can run on a single host
-    at a time.  By default, the set of tests specified is run once.
-    If a (non-zero) count value is supplied, the complete set of
-    tests will be run that number of times.
-
-    For example::
-
-        tasks:
-        - ceph:
-        # Image sizes are in MB
-        - rbd.xfstests:
-            client.0:
-                count: 3
-                test_image: 'test_image'
-                test_size: 250
-                test_format: 2
-                scratch_image: 'scratch_image'
-                scratch_size: 250
-                scratch_format: 1
-                fs_type: 'xfs'
-                tests: '1-9 11-15 17 19-21 26-28 31-34 41 45-48'
-    """
-    if config is None:
-        config = { 'all': None }
-    assert isinstance(config, dict) or isinstance(config, list), \
-        "task xfstests only supports a list or dictionary for configuration"
-    if isinstance(config, dict):
-        config = teuthology.replace_all_with_clients(ctx.cluster, config)
-        runs = config.items()
-    else:
-        runs = [(role, None) for role in config]
-
-    running_xfstests = {}
-    for role, properties in runs:
-        assert role.startswith('client.'), \
-            "task xfstests can only run on client nodes"
-        for host, roles_for_host in ctx.cluster.remotes.items():
-            if role in roles_for_host:
-                assert host not in running_xfstests, \
-                    "task xfstests allows only one instance at a time per host"
-                running_xfstests[host] = True
-
-    images_config = {}
-    scratch_config = {}
-    modprobe_config = {}
-    image_map_config = {}
-    scratch_map_config = {}
-    xfstests_config = {}
-    for role, properties in runs:
-        if properties is None:
-            properties = {}
-
-        test_image = properties.get('test_image', 'test_image.{role}'.format(role=role))
-        test_size = properties.get('test_size', 2000) # 2G
-        test_fmt = properties.get('test_format', 1)
-        scratch_image = properties.get('scratch_image', 'scratch_image.{role}'.format(role=role))
-        scratch_size = properties.get('scratch_size', 10000) # 10G
-        scratch_fmt = properties.get('scratch_format', 1)
-
-        images_config[role] = dict(
-            image_name=test_image,
-            image_size=test_size,
-            image_format=test_fmt,
-            )
-
-        scratch_config[role] = dict(
-            image_name=scratch_image,
-            image_size=scratch_size,
-            image_format=scratch_fmt,
-            )
-
-        xfstests_config[role] = dict(
-            count=properties.get('count', 1),
-            test_dev='/dev/rbd/rbd/{image}'.format(image=test_image),
-            scratch_dev='/dev/rbd/rbd/{image}'.format(image=scratch_image),
-            fs_type=properties.get('fs_type', 'xfs'),
-            tests=properties.get('tests'),
-            )
-
-        log.info('Setting up xfstests using RBD images:')
-        log.info('      test ({size} MB): {image}'.format(size=test_size,
-                                                        image=test_image))
-        log.info('   scratch ({size} MB): {image}'.format(size=scratch_size,
-                                                        image=scratch_image))
-        modprobe_config[role] = None
-        image_map_config[role] = test_image
-        scratch_map_config[role] = scratch_image
-
-    with contextutil.nested(
-        lambda: create_image(ctx=ctx, config=images_config),
-        lambda: create_image(ctx=ctx, config=scratch_config),
-        lambda: modprobe(ctx=ctx, config=modprobe_config),
-        lambda: dev_create(ctx=ctx, config=image_map_config),
-        lambda: dev_create(ctx=ctx, config=scratch_map_config),
-        lambda: run_xfstests(ctx=ctx, config=xfstests_config),
-        ):
-        yield
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Create and mount an rbd image.
-
-    For example, you can specify which clients to run on::
-
-        tasks:
-        - ceph:
-        - rbd: [client.0, client.1]
-
-    There are a few image options::
-
-        tasks:
-        - ceph:
-        - rbd:
-            client.0: # uses defaults
-            client.1:
-                image_name: foo
-                image_size: 2048
-                image_format: 2
-                fs_type: xfs
-
-    To use default options on all clients::
-
-        tasks:
-        - ceph:
-        - rbd:
-            all:
-
-    To create 20GiB images and format them with xfs on all clients::
-
-        tasks:
-        - ceph:
-        - rbd:
-            all:
-              image_size: 20480
-              fs_type: xfs
-    """
-    if config is None:
-        config = { 'all': None }
-    norm_config = config
-    if isinstance(config, dict):
-        norm_config = teuthology.replace_all_with_clients(ctx.cluster, config)
-    if isinstance(norm_config, dict):
-        role_images = {}
-        for role, properties in norm_config.iteritems():
-            if properties is None:
-                properties = {}
-            role_images[role] = properties.get('image_name')
-    else:
-        role_images = norm_config
-
-    log.debug('rbd config is: %s', norm_config)
-
-    with contextutil.nested(
-        lambda: create_image(ctx=ctx, config=norm_config),
-        lambda: modprobe(ctx=ctx, config=norm_config),
-        lambda: dev_create(ctx=ctx, config=role_images),
-        lambda: generic_mkfs(ctx=ctx, config=norm_config,
-                devname_rtn=rbd_devname_rtn),
-        lambda: generic_mount(ctx=ctx, config=role_images,
-                devname_rtn=rbd_devname_rtn),
-        ):
-        yield
diff --git a/teuthology/task/rbd_fsx.py b/teuthology/task/rbd_fsx.py
deleted file mode 100644 (file)
index 6d55b5c..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-"""
-Run fsx on an rbd image
-"""
-import contextlib
-import logging
-
-from teuthology.parallel import parallel
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run fsx on an rbd image.
-
-    Currently this requires running as client.admin
-    to create a pool.
-
-    Specify which clients to run on as a list::
-
-      tasks:
-        ceph:
-        rbd_fsx:
-          clients: [client.0, client.1]
-
-    You can optionally change some properties of fsx:
-
-      tasks:
-        ceph:
-        rbd_fsx:
-          clients: <list of clients>
-          seed: <random seed number, or 0 to use the time>
-          ops: <number of operations to do>
-          size: <maximum image size in bytes>
-    """
-    log.info('starting rbd_fsx...')
-    with parallel() as p:
-        for role in config['clients']:
-            p.spawn(_run_one_client, ctx, config, role)
-    yield
-
-def _run_one_client(ctx, config, role):
-    """Spawned task that runs the client"""
-    testdir = teuthology.get_testdir(ctx)
-    (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-    remote.run(
-        args=[
-            'adjust-ulimits',
-            'ceph-coverage',
-            '{tdir}/archive/coverage'.format(tdir=testdir),
-            'ceph_test_librbd_fsx',
-            '-d',
-            '-W', '-R', # mmap doesn't work with rbd
-            '-p', str(config.get('progress_interval', 100)),  # show progress
-            '-P', '{tdir}/archive'.format(tdir=testdir),
-            '-t', str(config.get('truncbdy',1)),
-            '-l', str(config.get('size', 250000000)),
-            '-S', str(config.get('seed', 0)),
-            '-N', str(config.get('ops', 1000)),
-            'pool_{pool}'.format(pool=role),
-            'image_{image}'.format(image=role),
-            ],
-        )
diff --git a/teuthology/task/recovery_bench.py b/teuthology/task/recovery_bench.py
deleted file mode 100644 (file)
index 1984b97..0000000
+++ /dev/null
@@ -1,208 +0,0 @@
-"""
-Recovery system benchmarking
-"""
-from cStringIO import StringIO
-
-import contextlib
-import gevent
-import json
-import logging
-import random
-import time
-
-import ceph_manager
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Benchmark the recovery system.
-
-    Generates objects with smalliobench, runs it normally to get a
-    baseline performance measurement, then marks an OSD out and reruns
-    to measure performance during recovery.
-
-    The config should be as follows:
-
-    recovery_bench:
-        duration: <seconds for each measurement run>
-        num_objects: <number of objects>
-        io_size: <io size in bytes>
-
-    example:
-
-    tasks:
-    - ceph:
-    - recovery_bench:
-        duration: 60
-        num_objects: 500
-        io_size: 4096
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'recovery_bench task only accepts a dict for configuration'
-
-    log.info('Beginning recovery bench...')
-
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
-    while len(manager.get_osd_status()['up']) < num_osds:
-        manager.sleep(10)
-
-    bench_proc = RecoveryBencher(
-        manager,
-        config,
-        )
-    try:
-        yield
-    finally:
-        log.info('joining recovery bencher')
-        bench_proc.do_join()
-
-class RecoveryBencher:
-    """
-    RecoveryBencher
-    """
-    def __init__(self, manager, config):
-        self.ceph_manager = manager
-        self.ceph_manager.wait_for_clean()
-
-        osd_status = self.ceph_manager.get_osd_status()
-        self.osds = osd_status['up']
-
-        self.config = config
-        if self.config is None:
-            self.config = dict()
-
-        else:
-            def tmp(x):
-                """
-                Local wrapper to print value.
-                """
-                print x
-            self.log = tmp
-
-        log.info("spawning thread")
-
-        self.thread = gevent.spawn(self.do_bench)
-
-    def do_join(self):
-        """
-        Join the recovery bencher.  This is called after the main
-        task exits.
-        """
-        self.thread.get()
-
-    def do_bench(self):
-        """
-        Do the benchmarking.
-        """
-        duration = self.config.get("duration", 60)
-        num_objects = self.config.get("num_objects", 500)
-        io_size = self.config.get("io_size", 4096)
-
-        osd = str(random.choice(self.osds))
-        (osd_remote,) = self.ceph_manager.ctx.cluster.only('osd.%s' % osd).remotes.iterkeys()
-
-        testdir = teuthology.get_testdir(self.ceph_manager.ctx)
-
-        # create the objects
-        osd_remote.run(
-            args=[
-                'adjust-ulimits',
-                'ceph-coverage',
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                'smalliobench'.format(tdir=testdir),
-                '--use-prefix', 'recovery_bench',
-                '--init-only', '1',
-                '--num-objects', str(num_objects),
-                '--io-size', str(io_size),
-                ],
-            wait=True,
-        )
-
-        # baseline bench
-        log.info('non-recovery (baseline)')
-        p = osd_remote.run(
-            args=[
-                'adjust-ulimits',
-                'ceph-coverage',
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                'smalliobench',
-                '--use-prefix', 'recovery_bench',
-                '--do-not-init', '1',
-                '--duration', str(duration),
-                '--io-size', str(io_size),
-                ],
-            stdout=StringIO(),
-            stderr=StringIO(),
-            wait=True,
-        )
-        self.process_samples(p.stderr.getvalue())
-
-        self.ceph_manager.raw_cluster_cmd('osd', 'out', osd)
-        time.sleep(5)
-
-        # recovery bench
-        log.info('recovery active')
-        p = osd_remote.run(
-            args=[
-                'adjust-ulimits',
-                'ceph-coverage',
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                'smalliobench',
-                '--use-prefix', 'recovery_bench',
-                '--do-not-init', '1',
-                '--duration', str(duration),
-                '--io-size', str(io_size),
-                ],
-            stdout=StringIO(),
-            stderr=StringIO(),
-            wait=True,
-        )
-        self.process_samples(p.stderr.getvalue())
-
-        self.ceph_manager.raw_cluster_cmd('osd', 'in', osd)
-
-    def process_samples(self, input):
-        """
-        Extract samples from the input and process the results
-
-        :param input: input lines in JSON format
-        """
-        lat = {}
-        for line in input.split('\n'):
-            try:
-                sample = json.loads(line)
-                samples = lat.setdefault(sample['type'], [])
-                samples.append(float(sample['latency']))
-            except Exception:
-                pass
-
-        for type in lat:
-            samples = lat[type]
-            samples.sort()
-
-            num = len(samples)
-
-            # median
-            if num & 1 == 1: # odd number of samples
-                median = samples[num / 2]
-            else:
-                median = (samples[num / 2] + samples[num / 2 - 1]) / 2
-
-            # 99%
-            ninety_nine = samples[int(num * 0.99)]
-
-            log.info("%s: median %f, 99%% %f" % (type, median, ninety_nine))
diff --git a/teuthology/task/rep_lost_unfound_delete.py b/teuthology/task/rep_lost_unfound_delete.py
deleted file mode 100644 (file)
index f75a4d2..0000000
+++ /dev/null
@@ -1,153 +0,0 @@
-"""
-Lost_unfound
-"""
-import logging
-import ceph_manager
-from teuthology import misc as teuthology
-from teuthology.task_util.rados import rados
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Test handling of lost objects.
-
-    A pretty rigid cluseter is brought up andtested by this task
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'lost_unfound task only accepts a dict for configuration'
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    while len(manager.get_osd_status()['up']) < 3:
-        manager.sleep(10)
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_for_clean()
-
-    # something that is always there
-    dummyfile = '/etc/fstab'
-
-    # take an osd out until the very end
-    manager.kill_osd(2)
-    manager.mark_down_osd(2)
-    manager.mark_out_osd(2)
-
-    # kludge to make sure they get a map
-    rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.wait_for_recovery()
-
-    # create old objects
-    for f in range(1, 10):
-        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
-        rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])
-        rados(ctx, mon, ['-p', 'data', 'rm', 'existed_%d' % f])
-
-    # delay recovery, and make the pg log very long (to prevent backfill)
-    manager.raw_cluster_cmd(
-            'tell', 'osd.1',
-            'injectargs',
-            '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
-            )
-
-    manager.kill_osd(0)
-    manager.mark_down_osd(0)
-    
-    for f in range(1, 10):
-        rados(ctx, mon, ['-p', 'data', 'put', 'new_%d' % f, dummyfile])
-        rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])
-        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
-
-    # bring osd.0 back up, let it peer, but don't replicate the new
-    # objects...
-    log.info('osd.0 command_args is %s' % 'foo')
-    log.info(ctx.daemons.get_daemon('osd', 0).command_args)
-    ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([
-            '--osd-recovery-delay-start', '1000'
-            ])
-    manager.revive_osd(0)
-    manager.mark_in_osd(0)
-    manager.wait_till_osd_is_up(0)
-
-    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.wait_till_active()
-
-    # take out osd.1 and the only copy of those objects.
-    manager.kill_osd(1)
-    manager.mark_down_osd(1)
-    manager.mark_out_osd(1)
-    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
-
-    # bring up osd.2 so that things would otherwise, in theory, recovery fully
-    manager.revive_osd(2)
-    manager.mark_in_osd(2)
-    manager.wait_till_osd_is_up(2)
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_till_active()
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-
-    # verify that there are unfound objects
-    unfound = manager.get_num_unfound_objects()
-    log.info("there are %d unfound objects" % unfound)
-    assert unfound
-
-    # mark stuff lost
-    pgs = manager.get_pg_stats()
-    for pg in pgs:
-        if pg['stat_sum']['num_objects_unfound'] > 0:
-            primary = 'osd.%d' % pg['acting'][0]
-
-            # verify that i can list them direct from the osd
-            log.info('listing missing/lost in %s state %s', pg['pgid'],
-                     pg['state']);
-            m = manager.list_pg_missing(pg['pgid'])
-            #log.info('%s' % m)
-            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
-            num_unfound=0
-            for o in m['objects']:
-                if len(o['locations']) == 0:
-                    num_unfound += 1
-            assert m['num_unfound'] == num_unfound
-
-            log.info("reverting unfound in %s on %s", pg['pgid'], primary)
-            manager.raw_cluster_cmd('pg', pg['pgid'],
-                                    'mark_unfound_lost', 'delete')
-        else:
-            log.info("no unfound in %s", pg['pgid'])
-
-    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
-    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
-    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
-    manager.wait_for_recovery()
-
-    # verify result
-    for f in range(1, 10):
-        err = rados(ctx, mon, ['-p', 'data', 'get', 'new_%d' % f, '-'])
-        assert err
-        err = rados(ctx, mon, ['-p', 'data', 'get', 'existed_%d' % f, '-'])
-        assert err
-        err = rados(ctx, mon, ['-p', 'data', 'get', 'existing_%d' % f, '-'])
-        assert err
-
-    # see if osd.1 can cope
-    manager.revive_osd(1)
-    manager.mark_in_osd(1)
-    manager.wait_till_osd_is_up(1)
-    manager.wait_for_clean()
diff --git a/teuthology/task/repair_test.py b/teuthology/task/repair_test.py
deleted file mode 100644 (file)
index 1dd8f2f..0000000
+++ /dev/null
@@ -1,213 +0,0 @@
-import logging
-import time
-
-import ceph_manager
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-def setup(ctx, config):
-    ctx.manager.wait_for_clean()
-    ctx.manager.create_pool("repair_test_pool", 1)
-    return "repair_test_pool"
-
-def teardown(ctx, config, pool):
-    ctx.manager.remove_pool(pool)
-    ctx.manager.wait_for_clean()
-
-def run_test(ctx, config, test):
-    s = setup(ctx, config)
-    test(ctx, config, s)
-    teardown(ctx, config, s)
-
-def choose_primary(ctx):
-    def ret(pool, num):
-        log.info("Choosing primary")
-        return ctx.manager.get_pg_primary(pool, num)
-    return ret
-
-def choose_replica(ctx):
-    def ret(pool, num):
-        log.info("Choosing replica")
-        return ctx.manager.get_pg_replica(pool, num)
-    return ret
-
-def trunc(ctx):
-    def ret(osd, pool, obj):
-        log.info("truncating object")
-        return ctx.manager.osd_admin_socket(
-            osd,
-            ['truncobj', pool, obj, '1'])
-    return ret
-
-def dataerr(ctx):
-    def ret(osd, pool, obj):
-        log.info("injecting data err on object")
-        return ctx.manager.osd_admin_socket(
-            osd,
-            ['injectdataerr', pool, obj])
-    return ret
-
-def mdataerr(ctx):
-    def ret(osd, pool, obj):
-        log.info("injecting mdata err on object")
-        return ctx.manager.osd_admin_socket(
-            osd,
-            ['injectmdataerr', pool, obj])
-    return ret
-
-def omaperr(ctx):
-    def ret(osd, pool, obj):
-        log.info("injecting omap err on object")
-        return ctx.manager.osd_admin_socket(osd, ['setomapval', pool, obj, 'badkey', 'badval']);
-    return ret
-
-def gen_repair_test_1(corrupter, chooser, scrub_type):
-    def ret(ctx, config, pool):
-        log.info("starting repair test type 1")
-        victim_osd = chooser(pool, 0)
-
-        # create object
-        log.info("doing put")
-        ctx.manager.do_put(pool, 'repair_test_obj', '/etc/hosts')
-
-        # corrupt object
-        log.info("corrupting object")
-        corrupter(victim_osd, pool, 'repair_test_obj')
-
-        # verify inconsistent
-        log.info("scrubbing")
-        ctx.manager.do_pg_scrub(pool, 0, scrub_type)
-
-        assert ctx.manager.pg_inconsistent(pool, 0)
-
-        # repair
-        log.info("repairing")
-        ctx.manager.do_pg_scrub(pool, 0, "repair")
-
-        log.info("re-scrubbing")
-        ctx.manager.do_pg_scrub(pool, 0, scrub_type)
-
-        # verify consistent
-        assert not ctx.manager.pg_inconsistent(pool, 0)
-        log.info("done")
-    return ret
-
-def gen_repair_test_2(chooser):
-    def ret(ctx, config, pool):
-        log.info("starting repair test type 2")
-        victim_osd = chooser(pool, 0)
-        first_mon = teuthology.get_first_mon(ctx, config)
-        (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-        # create object
-        log.info("doing put and setomapval")
-        ctx.manager.do_put(pool, 'file1', '/etc/hosts')
-        ctx.manager.do_rados(mon, ['-p', pool, 'setomapval', 'file1', 'key', 'val'])
-        ctx.manager.do_put(pool, 'file2', '/etc/hosts')
-        ctx.manager.do_put(pool, 'file3', '/etc/hosts')
-        ctx.manager.do_put(pool, 'file4', '/etc/hosts')
-        ctx.manager.do_put(pool, 'file5', '/etc/hosts')
-        ctx.manager.do_rados(mon, ['-p', pool, 'setomapval', 'file5', 'key', 'val'])
-        ctx.manager.do_put(pool, 'file6', '/etc/hosts')
-
-        # corrupt object
-        log.info("corrupting object")
-        omaperr(ctx)(victim_osd, pool, 'file1')
-
-        # verify inconsistent
-        log.info("scrubbing")
-        ctx.manager.do_pg_scrub(pool, 0, 'deep-scrub')
-
-        assert ctx.manager.pg_inconsistent(pool, 0)
-
-        # Regression test for bug #4778, should still
-        # be inconsistent after scrub
-        ctx.manager.do_pg_scrub(pool, 0, 'scrub')
-
-        assert ctx.manager.pg_inconsistent(pool, 0)
-
-        # Additional corruptions including 2 types for file1
-        log.info("corrupting more objects")
-        dataerr(ctx)(victim_osd, pool, 'file1')
-        mdataerr(ctx)(victim_osd, pool, 'file2')
-        trunc(ctx)(victim_osd, pool, 'file3')
-        omaperr(ctx)(victim_osd, pool, 'file6')
-
-        # see still inconsistent
-        log.info("scrubbing")
-        ctx.manager.do_pg_scrub(pool, 0, 'deep-scrub')
-
-        assert ctx.manager.pg_inconsistent(pool, 0)
-
-        # repair
-        log.info("repairing")
-        ctx.manager.do_pg_scrub(pool, 0, "repair")
-
-        # Let repair clear inconsistent flag
-        time.sleep(10)
-
-        # verify consistent
-        assert not ctx.manager.pg_inconsistent(pool, 0)
-
-        # In the future repair might determine state of
-        # inconsistency itself, verify with a deep-scrub
-        log.info("scrubbing")
-        ctx.manager.do_pg_scrub(pool, 0, 'deep-scrub')
-
-        # verify consistent
-        assert not ctx.manager.pg_inconsistent(pool, 0)
-
-        log.info("done")
-    return ret
-
-def task(ctx, config):
-    """
-    Test [deep] repair in several situations:
-      Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica]
-
-    The config should be as follows:
-
-      Must include the log-whitelist below
-      Must enable filestore_debug_inject_read_err config
-
-    example:
-
-    tasks:
-    - chef:
-    - install:
-    - ceph:
-        log-whitelist: ['candidate had a read error', 'deep-scrub 0 missing, 1 inconsistent objects', 'deep-scrub 0 missing, 4 inconsistent objects', 'deep-scrub 1 errors', 'deep-scrub 4 errors', '!= known omap_digest', 'repair 0 missing, 1 inconsistent objects', 'repair 0 missing, 4 inconsistent objects', 'repair 1 errors, 1 fixed', 'repair 4 errors, 4 fixed', 'scrub 0 missing, 1 inconsistent', 'scrub 1 errors', 'size 1 != known size']
-        conf:
-          osd:
-            filestore debug inject read err: true
-    - repair_test:
-
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'repair_test task only accepts a dict for config'
-
-    if not hasattr(ctx, 'manager'):
-        first_mon = teuthology.get_first_mon(ctx, config)
-        (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-        ctx.manager = ceph_manager.CephManager(
-            mon,
-            ctx=ctx,
-            logger=log.getChild('ceph_manager')
-            )
-
-    tests = [
-        gen_repair_test_1(mdataerr(ctx), choose_primary(ctx), "scrub"),
-        gen_repair_test_1(mdataerr(ctx), choose_replica(ctx), "scrub"),
-        gen_repair_test_1(dataerr(ctx), choose_primary(ctx), "deep-scrub"),
-        gen_repair_test_1(dataerr(ctx), choose_replica(ctx), "deep-scrub"),
-        gen_repair_test_1(trunc(ctx), choose_primary(ctx), "scrub"),
-        gen_repair_test_1(trunc(ctx), choose_replica(ctx), "scrub"),
-        gen_repair_test_2(choose_primary(ctx)),
-        gen_repair_test_2(choose_replica(ctx))
-        ]
-
-    for test in tests:
-        run_test(ctx, config, test)
diff --git a/teuthology/task/rest_api.py b/teuthology/task/rest_api.py
deleted file mode 100644 (file)
index d34d31a..0000000
+++ /dev/null
@@ -1,183 +0,0 @@
-"""
-Rest Api
-"""
-import logging
-import contextlib
-import time
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from ..orchestra import run
-from teuthology.task.ceph import CephState
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def run_rest_api_daemon(ctx, api_clients):
-    """
-    Wrapper starts the rest api daemons
-    """
-    if not hasattr(ctx, 'daemons'):
-        ctx.daemons = CephState()
-    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
-    for rems, roles in remotes.iteritems():
-        for whole_id_ in roles:
-            if whole_id_ in api_clients:
-                id_ = whole_id_[len('clients'):]
-                run_cmd = [
-                    'sudo',
-                    'daemon-helper',
-                    'kill',
-                    'ceph-rest-api',
-                    '-n',
-                    'client.rest{id}'.format(id=id_), ]
-                cl_rest_id = 'client.rest{id}'.format(id=id_)
-                ctx.daemons.add_daemon(rems, 'restapi',
-                    cl_rest_id,
-                    args=run_cmd,
-                    logger=log.getChild(cl_rest_id),
-                    stdin=run.PIPE,
-                    wait=False,
-                    )
-                for i in range(1, 12):
-                    log.info('testing for ceph-rest-api try {0}'.format(i))
-                    run_cmd = [
-                        'wget',
-                        '-O',
-                        '/dev/null',
-                        '-q',
-                        'http://localhost:5000/api/v0.1/status'
-                    ]
-                    proc = rems.run(
-                        args=run_cmd,
-                        check_status=False
-                    )
-                    if proc.exitstatus == 0:
-                        break
-                    time.sleep(5)
-                if proc.exitstatus != 0:
-                    raise RuntimeError('Cannot contact ceph-rest-api')
-    try:
-        yield
-
-    finally:
-        """
-        TO DO: destroy daemons started -- modify iter_daemons_of_role
-        """
-        teuthology.stop_daemons_of_type(ctx, 'restapi')
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Start up rest-api.
-
-    To start on on all clients::
-
-        tasks:
-        - ceph:
-        - rest-api:
-
-    To only run on certain clients::
-
-        tasks:
-        - ceph:
-        - rest-api: [client.0, client.3]
-
-    or
-
-        tasks:
-        - ceph:
-        - rest-api:
-            client.0:
-            client.3:
-
-    The general flow of things here is:
-        1. Find clients on which rest-api is supposed to run (api_clients)
-        2. Generate keyring values
-        3. Start up ceph-rest-api daemons
-    On cleanup:
-        4. Stop the daemons
-        5. Delete keyring value files.
-    """
-    api_clients = []
-    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
-    log.info(remotes)
-    if config == None:
-        api_clients = ['client.{id}'.format(id=id_)
-            for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-    else:
-        api_clients = config
-    log.info(api_clients)
-    testdir = teuthology.get_testdir(ctx)
-    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
-    for rems, roles in remotes.iteritems():
-        for whole_id_ in roles:
-            if whole_id_ in api_clients:
-                id_ = whole_id_[len('client.'):]
-                keyring = '/etc/ceph/ceph.client.rest{id}.keyring'.format(
-                        id=id_)
-                rems.run(
-                    args=[
-                        'sudo',
-                        'adjust-ulimits',
-                        'ceph-coverage',
-                        coverage_dir,
-                        'ceph-authtool',
-                        '--create-keyring',
-                        '--gen-key',
-                        '--name=client.rest{id}'.format(id=id_),
-                        '--set-uid=0',
-                        '--cap', 'mon', 'allow *',
-                        '--cap', 'osd', 'allow *',
-                        '--cap', 'mds', 'allow',
-                        keyring,
-                        run.Raw('&&'),
-                        'sudo',
-                        'chmod',
-                        '0644',
-                        keyring,
-                        ],
-                    )
-                rems.run(
-                    args=[
-                        'sudo',
-                        'sh',
-                        '-c',
-                        run.Raw("'"),
-                        "echo",
-                        '[client.rest{id}]'.format(id=id_),
-                        run.Raw('>>'),
-                        "/etc/ceph/ceph.conf",
-                        run.Raw("'")
-                        ]
-                    )
-                rems.run(
-                    args=[
-                        'sudo',
-                        'sh',
-                        '-c',
-                        run.Raw("'"),
-                        'echo',
-                        'restapi',
-                        'keyring',
-                        '=',
-                        '/etc/ceph/ceph.client.rest{id}.keyring'.format(id=id_),
-                        run.Raw('>>'),
-                        '/etc/ceph/ceph.conf',
-                        run.Raw("'"),
-                        ]
-                    )
-                rems.run(
-                    args=[
-                        'ceph',
-                        'auth',
-                        'import',
-                        '-i',
-                        '/etc/ceph/ceph.client.rest{id}.keyring'.format(id=id_),
-                    ]
-                )
-    with contextutil.nested(
-            lambda: run_rest_api_daemon(ctx=ctx, api_clients=api_clients),):
-        yield
-
diff --git a/teuthology/task/restart.py b/teuthology/task/restart.py
deleted file mode 100644 (file)
index 87ca2b0..0000000
+++ /dev/null
@@ -1,163 +0,0 @@
-"""
-Daemon restart
-"""
-import logging
-import pipes
-
-from teuthology import misc as teuthology
-from teuthology.orchestra import run as tor
-
-from ..orchestra import run
-log = logging.getLogger(__name__)
-
-def restart_daemon(ctx, config, role, id_, *args):
-    """
-    Handle restart (including the execution of the command parameters passed)
-    """
-    log.info('Restarting {r}.{i} daemon...'.format(r=role, i=id_))
-    daemon = ctx.daemons.get_daemon(role, id_)
-    log.debug('Waiting for exit of {r}.{i} daemon...'.format(r=role, i=id_))
-    try:
-        daemon.wait_for_exit()
-    except tor.CommandFailedError as e:
-        log.debug('Command Failed: {e}'.format(e=e))
-    if len(args) > 0:
-        confargs = ['--{k}={v}'.format(k=k, v=v) for k,v in zip(args[0::2], args[1::2])]
-        log.debug('Doing restart of {r}.{i} daemon with args: {a}...'.format(r=role, i=id_, a=confargs))
-        daemon.restart_with_args(confargs)
-    else:
-        log.debug('Doing restart of {r}.{i} daemon...'.format(r=role, i=id_))
-        daemon.restart()
-
-def get_tests(ctx, config, role, remote, testdir):
-    """Download restart tests"""
-    srcdir = '{tdir}/restart.{role}'.format(tdir=testdir, role=role)
-
-    refspec = config.get('branch')
-    if refspec is None:
-        refspec = config.get('sha1')
-    if refspec is None:
-        refspec = config.get('tag')
-    if refspec is None:
-        refspec = 'HEAD'
-    log.info('Pulling restart qa/workunits from ref %s', refspec)
-
-    remote.run(
-        logger=log.getChild(role),
-        args=[
-            'mkdir', '--', srcdir,
-            run.Raw('&&'),
-            'git',
-            'archive',
-            '--remote=git://ceph.newdream.net/git/ceph.git',
-            '%s:qa/workunits' % refspec,
-            run.Raw('|'),
-            'tar',
-            '-C', srcdir,
-            '-x',
-            '-f-',
-            run.Raw('&&'),
-            'cd', '--', srcdir,
-            run.Raw('&&'),
-            'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi',
-            run.Raw('&&'),
-            'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir),
-            run.Raw('>{tdir}/restarts.list'.format(tdir=testdir)),
-            ],
-        )
-    restarts = sorted(teuthology.get_file(
-                        remote,
-                        '{tdir}/restarts.list'.format(tdir=testdir)).split('\0'))
-    return (srcdir, restarts)
-
-def task(ctx, config):
-    """
-    Execute commands and allow daemon restart with config options.
-    Each process executed can output to stdout restart commands of the form:
-        restart <role> <id> <conf_key1> <conf_value1> <conf_key2> <conf_value2>
-    This will restart the daemon <role>.<id> with the specified config values once
-    by modifying the conf file with those values, and then replacing the old conf file
-    once the daemon is restarted.
-    This task does not kill a running daemon, it assumes the daemon will abort on an
-    assert specified in the config.
-
-        tasks:
-        - install:
-        - ceph:
-        - restart:
-            exec:
-              client.0:
-                - test_backtraces.py
-
-    """
-    assert isinstance(config, dict), "task kill got invalid config"
-
-    testdir = teuthology.get_testdir(ctx)
-
-    try:
-        assert 'exec' in config, "config requires exec key with <role>: <command> entries"
-        for role, task in config['exec'].iteritems():
-            log.info('restart for role {r}'.format(r=role))
-            (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-            srcdir, restarts = get_tests(ctx, config, role, remote, testdir)
-            log.info('Running command on role %s host %s', role, remote.name)
-            spec = '{spec}'.format(spec=task[0])
-            log.info('Restarts list: %s', restarts)
-            log.info('Spec is %s', spec)
-            to_run = [w for w in restarts if w == task or w.find(spec) != -1]
-            log.info('To run: %s', to_run)
-            for c in to_run:
-                log.info('Running restart script %s...', c)
-                args = [
-                    run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
-                    ]
-                env = config.get('env')
-                if env is not None:
-                    for var, val in env.iteritems():
-                        quoted_val = pipes.quote(val)
-                        env_arg = '{var}={val}'.format(var=var, val=quoted_val)
-                        args.append(run.Raw(env_arg))
-                args.extend([
-                            'adjust-ulimits',
-                            'ceph-coverage',
-                            '{tdir}/archive/coverage'.format(tdir=testdir),
-                            '{srcdir}/{c}'.format(
-                                srcdir=srcdir,
-                                c=c,
-                                ),
-                            ])
-                proc = remote.run(
-                    args=args,
-                    stdout=tor.PIPE,
-                    stdin=tor.PIPE,
-                    stderr=log,
-                    wait=False,
-                    )
-                log.info('waiting for a command from script')
-                while True:
-                    l = proc.stdout.readline()
-                    if not l or l == '':
-                        break
-                    log.debug('script command: {c}'.format(c=l))
-                    ll = l.strip()
-                    cmd = ll.split(' ')
-                    if cmd[0] == "done":
-                        break
-                    assert cmd[0] == 'restart', "script sent invalid command request to kill task"
-                    # cmd should be: restart <role> <id> <conf_key1> <conf_value1> <conf_key2> <conf_value2>
-                    # or to clear, just: restart <role> <id>
-                    restart_daemon(ctx, config, cmd[1], cmd[2], *cmd[3:])
-                    proc.stdin.writelines(['restarted\n'])
-                    proc.stdin.flush()
-                try:
-                    proc.exitstatus.get()
-                except tor.CommandFailedError:
-                    raise Exception('restart task got non-zero exit status from script: {s}'.format(s=c))
-    finally:
-        log.info('Finishing %s on %s...', task, role)
-        remote.run(
-            logger=log.getChild(role),
-            args=[
-                'rm', '-rf', '--', '{tdir}/restarts.list'.format(tdir=testdir), srcdir,
-                ],
-            )
diff --git a/teuthology/task/rgw.py b/teuthology/task/rgw.py
deleted file mode 100644 (file)
index afa464d..0000000
+++ /dev/null
@@ -1,808 +0,0 @@
-"""
-rgw routines
-"""
-import argparse
-import contextlib
-import json
-import logging
-import os
-
-from cStringIO import StringIO
-
-from ..orchestra import run
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from teuthology.task_util.rgw import rgwadmin
-from teuthology.task_util.rados import (rados, create_ec_pool,
-                                        create_replicated_pool,
-                                        create_cache_pool)
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def create_apache_dirs(ctx, config):
-    """
-    Remotely create apache directories.  Delete when finished.
-    """
-    log.info('Creating apache directories...')
-    testdir = teuthology.get_testdir(ctx)
-    for client in config.iterkeys():
-        ctx.cluster.only(client).run(
-            args=[
-                'mkdir',
-                '-p',
-                '{tdir}/apache/htdocs.{client}'.format(tdir=testdir,
-                                                       client=client),
-                '{tdir}/apache/tmp.{client}/fastcgi_sock'.format(
-                    tdir=testdir,
-                    client=client),
-                run.Raw('&&'),
-                'mkdir',
-                '{tdir}/archive/apache.{client}'.format(tdir=testdir,
-                                                        client=client),
-                ],
-            )
-    try:
-        yield
-    finally:
-        log.info('Cleaning up apache directories...')
-        for client in config.iterkeys():
-            ctx.cluster.only(client).run(
-                args=[
-                    'rm',
-                    '-rf',
-                    '{tdir}/apache/tmp.{client}'.format(tdir=testdir,
-                                                        client=client),
-                    run.Raw('&&'),
-                    'rmdir',
-                    '{tdir}/apache/htdocs.{client}'.format(tdir=testdir,
-                                                           client=client),
-                    ],
-                )
-
-        for client in config.iterkeys():
-            ctx.cluster.only(client).run(
-                args=[
-                    'rmdir',
-                    '{tdir}/apache'.format(tdir=testdir),
-                    ],
-                check_status=False,  # only need to remove once per host
-                )
-
-
-@contextlib.contextmanager
-def ship_apache_configs(ctx, config, role_endpoints):
-    """
-    Ship apache config and rgw.fgci to all clients.  Clean up on termination
-    """
-    assert isinstance(config, dict)
-    assert isinstance(role_endpoints, dict)
-    testdir = teuthology.get_testdir(ctx)
-    log.info('Shipping apache config and rgw.fcgi...')
-    src = os.path.join(os.path.dirname(__file__), 'apache.conf.template')
-    for client, conf in config.iteritems():
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        system_type = teuthology.get_system_type(remote)
-        if not conf:
-            conf = {}
-        idle_timeout = conf.get('idle_timeout', 30)
-        if system_type == 'deb':
-            mod_path = '/usr/lib/apache2/modules'
-            print_continue = 'on'
-        else:
-            mod_path = '/usr/lib64/httpd/modules'
-            print_continue = 'off'
-        host, port = role_endpoints[client]
-        with file(src, 'rb') as f:
-            conf = f.read().format(
-                testdir=testdir,
-                mod_path=mod_path,
-                print_continue=print_continue,
-                host=host,
-                port=port,
-                client=client,
-                idle_timeout=idle_timeout,
-                )
-            teuthology.write_file(
-                remote=remote,
-                path='{tdir}/apache/apache.{client}.conf'.format(
-                    tdir=testdir,
-                    client=client),
-                data=conf,
-                )
-        teuthology.write_file(
-            remote=remote,
-            path='{tdir}/apache/htdocs.{client}/rgw.fcgi'.format(
-                tdir=testdir,
-                client=client),
-            data="""#!/bin/sh
-ulimit -c unlimited
-exec radosgw -f -n {client} -k /etc/ceph/ceph.{client}.keyring --rgw-socket-path {tdir}/apache/tmp.{client}/fastcgi_sock/rgw_sock
-
-""".format(tdir=testdir, client=client)
-            )
-        remote.run(
-            args=[
-                'chmod',
-                'a=rx',
-                '{tdir}/apache/htdocs.{client}/rgw.fcgi'.format(tdir=testdir,
-                                                                client=client),
-                ],
-            )
-    try:
-        yield
-    finally:
-        log.info('Removing apache config...')
-        for client in config.iterkeys():
-            ctx.cluster.only(client).run(
-                args=[
-                    'rm',
-                    '-f',
-                    '{tdir}/apache/apache.{client}.conf'.format(tdir=testdir,
-                                                                client=client),
-                    run.Raw('&&'),
-                    'rm',
-                    '-f',
-                    '{tdir}/apache/htdocs.{client}/rgw.fcgi'.format(
-                        tdir=testdir,
-                        client=client),
-                    ],
-                )
-
-
-@contextlib.contextmanager
-def start_rgw(ctx, config):
-    """
-    Start rgw on remote sites.
-    """
-    log.info('Starting rgw...')
-    testdir = teuthology.get_testdir(ctx)
-    for client in config.iterkeys():
-        (remote,) = ctx.cluster.only(client).remotes.iterkeys()
-
-        client_config = config.get(client)
-        if client_config is None:
-            client_config = {}
-        log.info("rgw %s config is %s", client, client_config)
-        id_ = client.split('.', 1)[1]
-        log.info('client {client} is id {id}'.format(client=client, id=id_))
-        cmd_prefix = [
-            'sudo',
-            'adjust-ulimits',
-            'ceph-coverage',
-            '{tdir}/archive/coverage'.format(tdir=testdir),
-            'daemon-helper',
-            'term',
-            ]
-
-        rgw_cmd = ['radosgw']
-
-        if ctx.rgw.frontend == 'apache':
-            rgw_cmd.extend([
-                '--rgw-socket-path',
-                '{tdir}/apache/tmp.{client}/fastcgi_sock/rgw_sock'.format(
-                    tdir=testdir,
-                    client=client,
-                    ),
-            ])
-        elif ctx.rgw.frontend == 'civetweb':
-            host, port = ctx.rgw.role_endpoints[client]
-            rgw_cmd.extend([
-                '--rgw-frontends',
-                'civetweb port={port}'.format(port=port),
-            ])
-
-        rgw_cmd.extend([
-            '-n', client,
-            '-k', '/etc/ceph/ceph.{client}.keyring'.format(client=client),
-            '--log-file',
-            '/var/log/ceph/rgw.{client}.log'.format(client=client),
-            '--rgw_ops_log_socket_path',
-            '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir,
-                                                     client=client),
-            '--foreground',
-            run.Raw('|'),
-            'sudo',
-            'tee',
-            '/var/log/ceph/rgw.{client}.stdout'.format(tdir=testdir,
-                                                       client=client),
-            run.Raw('2>&1'),
-            ])
-
-        if client_config.get('valgrind'):
-            cmd_prefix = teuthology.get_valgrind_args(
-                testdir,
-                client,
-                cmd_prefix,
-                client_config.get('valgrind')
-                )
-
-        run_cmd = list(cmd_prefix)
-        run_cmd.extend(rgw_cmd)
-
-        ctx.daemons.add_daemon(
-            remote, 'rgw', client,
-            args=run_cmd,
-            logger=log.getChild(client),
-            stdin=run.PIPE,
-            wait=False,
-            )
-
-    try:
-        yield
-    finally:
-        teuthology.stop_daemons_of_type(ctx, 'rgw')
-        for client in config.iterkeys():
-            ctx.cluster.only(client).run(
-                args=[
-                    'rm',
-                    '-f',
-                    '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir,
-                                                             client=client),
-                    ],
-                )
-
-
-@contextlib.contextmanager
-def start_apache(ctx, config):
-    """
-    Start apache on remote sites.
-    """
-    log.info('Starting apache...')
-    testdir = teuthology.get_testdir(ctx)
-    apaches = {}
-    for client in config.iterkeys():
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        system_type = teuthology.get_system_type(remote)
-        if system_type == 'deb':
-            apache_name = 'apache2'
-        else:
-            apache_name = '/usr/sbin/httpd.worker'
-        proc = remote.run(
-            args=[
-                'adjust-ulimits',
-                'daemon-helper',
-                'kill',
-                apache_name,
-                '-X',
-                '-f',
-                '{tdir}/apache/apache.{client}.conf'.format(tdir=testdir,
-                                                            client=client),
-                ],
-            logger=log.getChild(client),
-            stdin=run.PIPE,
-            wait=False,
-            )
-        apaches[client] = proc
-
-    try:
-        yield
-    finally:
-        log.info('Stopping apache...')
-        for client, proc in apaches.iteritems():
-            proc.stdin.close()
-
-        run.wait(apaches.itervalues())
-
-
-def extract_user_info(client_config):
-    """
-    Extract user info from the client config specified.  Returns a dict
-    that includes system key information.
-    """
-    # test if there isn't a system user or if there isn't a name for that
-    # user, return None
-    if ('system user' not in client_config or
-            'name' not in client_config['system user']):
-        return None
-
-    user_info = dict()
-    user_info['system_key'] = dict(
-        user=client_config['system user']['name'],
-        access_key=client_config['system user']['access key'],
-        secret_key=client_config['system user']['secret key'],
-        )
-    return user_info
-
-
-def extract_zone_info(ctx, client, client_config):
-    """
-    Get zone information.
-    :param client: dictionary of client information
-    :param client_config: dictionary of client configuration information
-    :returns: zone extracted from client and client_config information
-    """
-    ceph_config = ctx.ceph.conf.get('global', {})
-    ceph_config.update(ctx.ceph.conf.get('client', {}))
-    ceph_config.update(ctx.ceph.conf.get(client, {}))
-    for key in ['rgw zone', 'rgw region', 'rgw zone root pool']:
-        assert key in ceph_config, \
-            'ceph conf must contain {key} for {client}'.format(key=key,
-                                                               client=client)
-    region = ceph_config['rgw region']
-    zone = ceph_config['rgw zone']
-    zone_info = dict()
-    for key in ['rgw control pool', 'rgw gc pool', 'rgw log pool',
-                'rgw intent log pool', 'rgw usage log pool',
-                'rgw user keys pool', 'rgw user email pool',
-                'rgw user swift pool', 'rgw user uid pool',
-                'rgw domain root']:
-        new_key = key.split(' ', 1)[1]
-        new_key = new_key.replace(' ', '_')
-
-        if key in ceph_config:
-            value = ceph_config[key]
-            log.debug('{key} specified in ceph_config ({val})'.format(
-                key=key, val=value))
-            zone_info[new_key] = value
-        else:
-            zone_info[new_key] = '.' + region + '.' + zone + '.' + new_key
-
-    index_pool = '.' + region + '.' + zone + '.' + 'index_pool'
-    data_pool = '.' + region + '.' + zone + '.' + 'data_pool'
-    data_extra_pool = '.' + region + '.' + zone + '.' + 'data_extra_pool'
-
-    zone_info['placement_pools'] = [{'key': 'default_placement',
-                                     'val': {'index_pool': index_pool,
-                                             'data_pool': data_pool,
-                                             'data_extra_pool': data_extra_pool}
-                                     }]
-
-    # these keys are meant for the zones argument in the region info.  We
-    # insert them into zone_info with a different format and then remove them
-    # in the fill_in_endpoints() method
-    for key in ['rgw log meta', 'rgw log data']:
-        if key in ceph_config:
-            zone_info[key] = ceph_config[key]
-
-    # these keys are meant for the zones argument in the region info.  We
-    # insert them into zone_info with a different format and then remove them
-    # in the fill_in_endpoints() method
-    for key in ['rgw log meta', 'rgw log data']:
-        if key in ceph_config:
-            zone_info[key] = ceph_config[key]
-
-    return region, zone, zone_info
-
-
-def extract_region_info(region, region_info):
-    """
-    Extract region information from the region_info parameter, using get
-    to set default values.
-
-    :param region: name of the region
-    :param region_info: region information (in dictionary form).
-    :returns: dictionary of region information set from region_info, using
-            default values for missing fields.
-    """
-    assert isinstance(region_info['zones'], list) and region_info['zones'], \
-        'zones must be a non-empty list'
-    return dict(
-        name=region,
-        api_name=region_info.get('api name', region),
-        is_master=region_info.get('is master', False),
-        log_meta=region_info.get('log meta', False),
-        log_data=region_info.get('log data', False),
-        master_zone=region_info.get('master zone', region_info['zones'][0]),
-        placement_targets=region_info.get('placement targets',
-                                          [{'name': 'default_placement',
-                                            'tags': []}]),
-        default_placement=region_info.get('default placement',
-                                          'default_placement'),
-        )
-
-
-def assign_ports(ctx, config):
-    """
-    Assign port numberst starting with port 7280.
-    """
-    port = 7280
-    role_endpoints = {}
-    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
-        for role in roles_for_host:
-            if role in config:
-                role_endpoints[role] = (remote.name.split('@')[1], port)
-                port += 1
-
-    return role_endpoints
-
-
-def fill_in_endpoints(region_info, role_zones, role_endpoints):
-    """
-    Iterate through the list of role_endpoints, filling in zone information
-
-    :param region_info: region data
-    :param role_zones: region and zone information.
-    :param role_endpoints: endpoints being used
-    """
-    for role, (host, port) in role_endpoints.iteritems():
-        region, zone, zone_info, _ = role_zones[role]
-        host, port = role_endpoints[role]
-        endpoint = 'http://{host}:{port}/'.format(host=host, port=port)
-        # check if the region specified under client actually exists
-        # in region_info (it should, if properly configured).
-        # If not, throw a reasonable error
-        if region not in region_info:
-            raise Exception(
-                'Region: {region} was specified but no corresponding'
-                ' entry was found under \'regions\''.format(region=region))
-
-        region_conf = region_info[region]
-        region_conf.setdefault('endpoints', [])
-        region_conf['endpoints'].append(endpoint)
-
-        # this is the payload for the 'zones' field in the region field
-        zone_payload = dict()
-        zone_payload['endpoints'] = [endpoint]
-        zone_payload['name'] = zone
-
-        # Pull the log meta and log data settings out of zone_info, if they
-        # exist, then pop them as they don't actually belong in the zone info
-        for key in ['rgw log meta', 'rgw log data']:
-            new_key = key.split(' ', 1)[1]
-            new_key = new_key.replace(' ', '_')
-
-            if key in zone_info:
-                value = zone_info.pop(key)
-            else:
-                value = 'false'
-
-            zone_payload[new_key] = value
-
-        region_conf.setdefault('zones', [])
-        region_conf['zones'].append(zone_payload)
-
-
-@contextlib.contextmanager
-def configure_users(ctx, config, everywhere=False):
-    """
-    Create users by remotely running rgwadmin commands using extracted
-    user information.
-    """
-    log.info('Configuring users...')
-
-    # extract the user info and append it to the payload tuple for the given
-    # client
-    for client, c_config in config.iteritems():
-        if not c_config:
-            continue
-        user_info = extract_user_info(c_config)
-        if not user_info:
-            continue
-
-        # For data sync the master zones and regions must have the
-        # system users of the secondary zones. To keep this simple,
-        # just create the system users on every client if regions are
-        # configured.
-        clients_to_create_as = [client]
-        if everywhere:
-            clients_to_create_as = config.keys()
-        for client_name in clients_to_create_as:
-            log.debug('Creating user {user} on {client}'.format(
-                      user=user_info['system_key']['user'], client=client))
-            rgwadmin(ctx, client_name,
-                     cmd=[
-                         'user', 'create',
-                         '--uid', user_info['system_key']['user'],
-                         '--access-key', user_info['system_key']['access_key'],
-                         '--secret', user_info['system_key']['secret_key'],
-                         '--display-name', user_info['system_key']['user'],
-                         '--system',
-                     ],
-                     check_status=True,
-                     )
-
-    yield
-
-
-@contextlib.contextmanager
-def create_nonregion_pools(ctx, config, regions):
-    """Create replicated or erasure coded data pools for rgw."""
-    if regions:
-        yield
-        return
-
-    log.info('creating data pools')
-    for client in config.keys():
-        (remote,) = ctx.cluster.only(client).remotes.iterkeys()
-        data_pool = '.rgw.buckets'
-        if ctx.rgw.ec_data_pool:
-            create_ec_pool(remote, data_pool, client, 64)
-        else:
-            create_replicated_pool(remote, data_pool, 64)
-        if ctx.rgw.cache_pools:
-            create_cache_pool(remote, data_pool, data_pool + '.cache', 64,
-                              64*1024*1024)
-    yield
-
-
-@contextlib.contextmanager
-def configure_regions_and_zones(ctx, config, regions, role_endpoints):
-    """
-    Configure regions and zones from rados and rgw.
-    """
-    if not regions:
-        log.debug(
-            'In rgw.configure_regions_and_zones() and regions is None. '
-            'Bailing')
-        yield
-        return
-
-    log.info('Configuring regions and zones...')
-
-    log.debug('config is %r', config)
-    log.debug('regions are %r', regions)
-    log.debug('role_endpoints = %r', role_endpoints)
-    # extract the zone info
-    role_zones = dict([(client, extract_zone_info(ctx, client, c_config))
-                       for client, c_config in config.iteritems()])
-    log.debug('roles_zones = %r', role_zones)
-
-    # extract the user info and append it to the payload tuple for the given
-    # client
-    for client, c_config in config.iteritems():
-        if not c_config:
-            user_info = None
-        else:
-            user_info = extract_user_info(c_config)
-
-        (region, zone, zone_info) = role_zones[client]
-        role_zones[client] = (region, zone, zone_info, user_info)
-
-    region_info = dict([
-        (region_name, extract_region_info(region_name, r_config))
-        for region_name, r_config in regions.iteritems()])
-
-    fill_in_endpoints(region_info, role_zones, role_endpoints)
-
-    # clear out the old defaults
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-    # removing these objects from .rgw.root and the per-zone root pools
-    # may or may not matter
-    rados(ctx, mon,
-          cmd=['-p', '.rgw.root', 'rm', 'region_info.default'])
-    rados(ctx, mon,
-          cmd=['-p', '.rgw.root', 'rm', 'zone_info.default'])
-
-    for client in config.iterkeys():
-        for role, (_, zone, zone_info, user_info) in role_zones.iteritems():
-            rados(ctx, mon,
-                  cmd=['-p', zone_info['domain_root'],
-                       'rm', 'region_info.default'])
-            rados(ctx, mon,
-                  cmd=['-p', zone_info['domain_root'],
-                       'rm', 'zone_info.default'])
-
-            (remote,) = ctx.cluster.only(role).remotes.keys()
-            for pool_info in zone_info['placement_pools']:
-                remote.run(args=['ceph', 'osd', 'pool', 'create',
-                                 pool_info['val']['index_pool'], '64', '64'])
-                if ctx.rgw.ec_data_pool:
-                    create_ec_pool(remote, pool_info['val']['data_pool'],
-                                   zone, 64)
-                else:
-                    create_replicated_pool(
-                        remote, pool_info['val']['data_pool'],
-                        64)
-
-            rgwadmin(ctx, client,
-                     cmd=['-n', client, 'zone', 'set', '--rgw-zone', zone],
-                     stdin=StringIO(json.dumps(dict(
-                         zone_info.items() + user_info.items()))),
-                     check_status=True)
-
-        for region, info in region_info.iteritems():
-            region_json = json.dumps(info)
-            log.debug('region info is: %s', region_json)
-            rgwadmin(ctx, client,
-                     cmd=['-n', client, 'region', 'set'],
-                     stdin=StringIO(region_json),
-                     check_status=True)
-            if info['is_master']:
-                rgwadmin(ctx, client,
-                         cmd=['-n', client,
-                              'region', 'default',
-                              '--rgw-region', region],
-                         check_status=True)
-
-        rgwadmin(ctx, client, cmd=['-n', client, 'regionmap', 'update'])
-    yield
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Either use configure apache to run a rados gateway, or use the built-in
-    civetweb server.
-    Only one should be run per machine, since it uses a hard-coded port for
-    now.
-
-    For example, to run rgw on all clients::
-
-        tasks:
-        - ceph:
-        - rgw:
-
-    To only run on certain clients::
-
-        tasks:
-        - ceph:
-        - rgw: [client.0, client.3]
-
-    or
-
-        tasks:
-        - ceph:
-        - rgw:
-            client.0:
-            client.3:
-
-    You can adjust the idle timeout for fastcgi (default is 30 seconds):
-
-        tasks:
-        - ceph:
-        - rgw:
-            client.0:
-              idle_timeout: 90
-
-    To run radosgw through valgrind:
-
-        tasks:
-        - ceph:
-        - rgw:
-            client.0:
-              valgrind: [--tool=memcheck]
-            client.3:
-              valgrind: [--tool=memcheck]
-
-    To use civetweb instead of apache:
-
-        tasks:
-        - ceph:
-        - rgw:
-          - client.0
-        overrides:
-          rgw:
-            frontend: civetweb
-
-    Note that without a modified fastcgi module e.g. with the default
-    one on CentOS, you must have rgw print continue = false in ceph.conf::
-
-        tasks:
-        - ceph:
-            conf:
-              global:
-                rgw print continue: false
-        - rgw: [client.0]
-
-    To run rgws for multiple regions or zones, describe the regions
-    and their zones in a regions section. The endpoints will be
-    generated by this task. Each client must have a region, zone,
-    and pools assigned in ceph.conf::
-
-        tasks:
-        - install:
-        - ceph:
-            conf:
-              client.0:
-                rgw region: foo
-                rgw zone: foo-1
-                rgw region root pool: .rgw.rroot.foo
-                rgw zone root pool: .rgw.zroot.foo
-                rgw log meta: true
-                rgw log data: true
-              client.1:
-                rgw region: bar
-                rgw zone: bar-master
-                rgw region root pool: .rgw.rroot.bar
-                rgw zone root pool: .rgw.zroot.bar
-                rgw log meta: true
-                rgw log data: true
-              client.2:
-                rgw region: bar
-                rgw zone: bar-secondary
-                rgw region root pool: .rgw.rroot.bar
-                rgw zone root pool: .rgw.zroot.bar-secondary
-        - rgw:
-            ec-data-pool: true
-            regions:
-              foo:
-                api name: api_name # default: region name
-                is master: true    # default: false
-                master zone: foo-1 # default: first zone
-                zones: [foo-1]
-                log meta: true
-                log data: true
-                placement targets: [target1, target2] # default: []
-                default placement: target2            # default: ''
-              bar:
-                api name: bar-api
-                zones: [bar-master, bar-secondary]
-            client.0:
-              system user:
-                name: foo-system
-                access key: X2IYPSTY1072DDY1SJMC
-                secret key: YIMHICpPvT+MhLTbSsiBJ1jQF15IFvJA8tgwJEcm
-            client.1:
-              system user:
-                name: bar1
-                access key: Y2IYPSTY1072DDY1SJMC
-                secret key: XIMHICpPvT+MhLTbSsiBJ1jQF15IFvJA8tgwJEcm
-            client.2:
-              system user:
-                name: bar2
-                access key: Z2IYPSTY1072DDY1SJMC
-                secret key: ZIMHICpPvT+MhLTbSsiBJ1jQF15IFvJA8tgwJEcm
-    """
-    if config is None:
-        config = dict(('client.{id}'.format(id=id_), None)
-                      for id_ in teuthology.all_roles_of_type(
-                          ctx.cluster, 'client'))
-    elif isinstance(config, list):
-        config = dict((name, None) for name in config)
-
-    overrides = ctx.config.get('overrides', {})
-    teuthology.deep_merge(config, overrides.get('rgw', {}))
-
-    regions = {}
-    if 'regions' in config:
-        # separate region info so only clients are keys in config
-        regions = config['regions']
-        del config['regions']
-
-    role_endpoints = assign_ports(ctx, config)
-    ctx.rgw = argparse.Namespace()
-    ctx.rgw.role_endpoints = role_endpoints
-    # stash the region info for later, since it was deleted from the config
-    # structure
-    ctx.rgw.regions = regions
-
-    ctx.rgw.ec_data_pool = False
-    if 'ec-data-pool' in config:
-        ctx.rgw.ec_data_pool = bool(config['ec-data-pool'])
-        del config['ec-data-pool']
-    ctx.rgw.cache_pools = False
-    if 'cache-pools' in config:
-        ctx.rgw.cache_pools = bool(config['cache-pools'])
-        del config['cache-pools']
-
-    ctx.rgw.frontend = 'apache'
-    if 'frontend' in config:
-        ctx.rgw.frontend = config['frontend']
-        del config['frontend']
-
-    subtasks = [
-        lambda: configure_regions_and_zones(
-            ctx=ctx,
-            config=config,
-            regions=regions,
-            role_endpoints=role_endpoints,
-            ),
-        lambda: configure_users(
-            ctx=ctx,
-            config=config,
-            everywhere=bool(regions),
-            ),
-        lambda: create_nonregion_pools(
-            ctx=ctx, config=config, regions=regions),
-    ]
-    if ctx.rgw.frontend == 'apache':
-        subtasks.insert(0, lambda: create_apache_dirs(ctx=ctx, config=config))
-        subtasks.extend([
-            lambda: ship_apache_configs(ctx=ctx, config=config,
-                                        role_endpoints=role_endpoints),
-            lambda: start_rgw(ctx=ctx, config=config),
-            lambda: start_apache(ctx=ctx, config=config),
-        ])
-    elif ctx.rgw.frontend == 'civetweb':
-        subtasks.extend([
-            lambda: start_rgw(ctx=ctx, config=config),
-        ])
-    else:
-        raise ValueError("frontend must be 'apache' or 'civetweb'")
-
-    log.info("Using %s as radosgw frontend", ctx.rgw.frontend)
-    with contextutil.nested(*subtasks):
-        yield
diff --git a/teuthology/task/rgw_logsocket.py b/teuthology/task/rgw_logsocket.py
deleted file mode 100644 (file)
index 6f49b00..0000000
+++ /dev/null
@@ -1,161 +0,0 @@
-"""
-rgw s3tests logging wrappers
-"""
-from cStringIO import StringIO
-from configobj import ConfigObj
-import contextlib
-import logging
-import s3tests
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def download(ctx, config):
-    """
-    Run s3tests download function
-    """
-    return s3tests.download(ctx, config)
-
-def _config_user(s3tests_conf, section, user):
-    """
-    Run s3tests user config function
-    """
-    return s3tests._config_user(s3tests_conf, section, user)
-
-@contextlib.contextmanager
-def create_users(ctx, config):
-    """
-    Run s3tests user create function
-    """
-    return s3tests.create_users(ctx, config)
-
-@contextlib.contextmanager
-def configure(ctx, config):
-    """
-    Run s3tests user configure function
-    """
-    return s3tests.configure(ctx, config)
-
-@contextlib.contextmanager
-def run_tests(ctx, config):
-    """
-    Run remote netcat tests
-    """
-    assert isinstance(config, dict)
-    testdir = teuthology.get_testdir(ctx)
-    for client, client_config in config.iteritems():
-        client_config['extra_args'] = [
-            's3tests.functional.test_s3:test_bucket_list_return_data',
-        ]
-#        args = [
-#                'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
-#                '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir),
-#                '-w',
-#                '{tdir}/s3-tests'.format(tdir=testdir),
-#                '-v',
-#              's3tests.functional.test_s3:test_bucket_list_return_data',
-#                ]
-#        if client_config is not None and 'extra_args' in client_config:
-#            args.extend(client_config['extra_args'])
-#
-#        ctx.cluster.only(client).run(
-#            args=args,
-#            )
-
-    s3tests.run_tests(ctx, config)
-
-    netcat_out = StringIO()
-
-    for client, client_config in config.iteritems():
-        ctx.cluster.only(client).run(
-            args = [
-                'netcat',
-                '-w', '5',
-                '-U', '{tdir}/rgw.opslog.sock'.format(tdir=testdir),
-                ],
-             stdout = netcat_out,
-        )
-
-        out = netcat_out.getvalue()
-
-        assert len(out) > 100
-
-        log.info('Received', out)
-
-    yield
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run some s3-tests suite against rgw, verify opslog socket returns data
-
-    Must restrict testing to a particular client::
-
-        tasks:
-        - ceph:
-        - rgw: [client.0]
-        - s3tests: [client.0]
-
-    To pass extra arguments to nose (e.g. to run a certain test)::
-
-        tasks:
-        - ceph:
-        - rgw: [client.0]
-        - s3tests:
-            client.0:
-              extra_args: ['test_s3:test_object_acl_grand_public_read']
-            client.1:
-              extra_args: ['--exclude', 'test_100_continue']
-    """
-    assert config is None or isinstance(config, list) \
-        or isinstance(config, dict), \
-        "task s3tests only supports a list or dictionary for configuration"
-    all_clients = ['client.{id}'.format(id=id_)
-                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-    if config is None:
-        config = all_clients
-    if isinstance(config, list):
-        config = dict.fromkeys(config)
-    clients = config.keys()
-
-    overrides = ctx.config.get('overrides', {})
-    # merge each client section, not the top level.
-    for (client, cconf) in config.iteritems():
-        teuthology.deep_merge(cconf, overrides.get('rgw-logsocket', {}))
-
-    log.debug('config is %s', config)
-
-    s3tests_conf = {}
-    for client in clients:
-        s3tests_conf[client] = ConfigObj(
-            indent_type='',
-            infile={
-                'DEFAULT':
-                    {
-                    'port'      : 7280,
-                    'is_secure' : 'no',
-                    },
-                'fixtures' : {},
-                's3 main'  : {},
-                's3 alt'   : {},
-                }
-            )
-
-    with contextutil.nested(
-        lambda: download(ctx=ctx, config=config),
-        lambda: create_users(ctx=ctx, config=dict(
-                clients=clients,
-                s3tests_conf=s3tests_conf,
-                )),
-        lambda: configure(ctx=ctx, config=dict(
-                clients=config,
-                s3tests_conf=s3tests_conf,
-                )),
-        lambda: run_tests(ctx=ctx, config=config),
-        ):
-        yield
diff --git a/teuthology/task/s3readwrite.py b/teuthology/task/s3readwrite.py
deleted file mode 100644 (file)
index 476015d..0000000
+++ /dev/null
@@ -1,346 +0,0 @@
-"""
-Run rgw s3 readwite tests
-"""
-from cStringIO import StringIO
-import base64
-import contextlib
-import logging
-import os
-import random
-import string
-import yaml
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from ..config import config as teuth_config
-from ..orchestra import run
-from ..orchestra.connection import split_user
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def download(ctx, config):
-    """
-    Download the s3 tests from the git builder.
-    Remove downloaded s3 file upon exit.
-    
-    The context passed in should be identical to the context
-    passed in to the main task.
-    """
-    assert isinstance(config, dict)
-    log.info('Downloading s3-tests...')
-    testdir = teuthology.get_testdir(ctx)
-    for (client, cconf) in config.items():
-        branch = cconf.get('force-branch', None)
-        if not branch:
-            branch = cconf.get('branch', 'master')
-        sha1 = cconf.get('sha1')
-        ctx.cluster.only(client).run(
-            args=[
-                'git', 'clone',
-                '-b', branch,
-                teuth_config.ceph_git_base_url + 's3-tests.git',
-                '{tdir}/s3-tests'.format(tdir=testdir),
-                ],
-            )
-        if sha1 is not None:
-            ctx.cluster.only(client).run(
-                args=[
-                    'cd', '{tdir}/s3-tests'.format(tdir=testdir),
-                    run.Raw('&&'),
-                    'git', 'reset', '--hard', sha1,
-                    ],
-                )
-    try:
-        yield
-    finally:
-        log.info('Removing s3-tests...')
-        testdir = teuthology.get_testdir(ctx)
-        for client in config:
-            ctx.cluster.only(client).run(
-                args=[
-                    'rm',
-                    '-rf',
-                    '{tdir}/s3-tests'.format(tdir=testdir),
-                    ],
-                )
-
-
-def _config_user(s3tests_conf, section, user):
-    """
-    Configure users for this section by stashing away keys, ids, and
-    email addresses.
-    """
-    s3tests_conf[section].setdefault('user_id', user)
-    s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
-    s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
-    s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.uppercase) for i in xrange(20)))
-    s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40)))
-
-@contextlib.contextmanager
-def create_users(ctx, config):
-    """
-    Create a default s3 user.
-    """
-    assert isinstance(config, dict)
-    log.info('Creating rgw users...')
-    testdir = teuthology.get_testdir(ctx)
-    users = {'s3': 'foo'}
-    cached_client_user_names = dict()
-    for client in config['clients']:
-        cached_client_user_names[client] = dict()
-        s3tests_conf = config['s3tests_conf'][client]
-        s3tests_conf.setdefault('readwrite', {})
-        s3tests_conf['readwrite'].setdefault('bucket', 'rwtest-' + client + '-{random}-')
-        s3tests_conf['readwrite'].setdefault('readers', 10)
-        s3tests_conf['readwrite'].setdefault('writers', 3)
-        s3tests_conf['readwrite'].setdefault('duration', 300)
-        s3tests_conf['readwrite'].setdefault('files', {})
-        rwconf = s3tests_conf['readwrite']
-        rwconf['files'].setdefault('num', 10)
-        rwconf['files'].setdefault('size', 2000)
-        rwconf['files'].setdefault('stddev', 500)
-        for section, user in users.iteritems():
-            _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client))
-            log.debug('creating user {user} on {client}'.format(user=s3tests_conf[section]['user_id'],
-                                                                client=client))
-
-            # stash the 'delete_user' flag along with user name for easier cleanup
-            delete_this_user = True
-            if 'delete_user' in s3tests_conf['s3']:
-                delete_this_user = s3tests_conf['s3']['delete_user']
-                log.debug('delete_user set to {flag} for {client}'.format(flag=delete_this_user, client=client))
-            cached_client_user_names[client][section+user] = (s3tests_conf[section]['user_id'], delete_this_user)
-
-            # skip actual user creation if the create_user flag is set to false for this client
-            if 'create_user' in s3tests_conf['s3'] and s3tests_conf['s3']['create_user'] == False:
-                log.debug('create_user set to False, skipping user creation for {client}'.format(client=client))
-                continue
-            else:
-                ctx.cluster.only(client).run(
-                    args=[
-                        'adjust-ulimits',
-                        'ceph-coverage',
-                        '{tdir}/archive/coverage'.format(tdir=testdir),
-                        'radosgw-admin',
-                        '-n', client,
-                        'user', 'create',
-                        '--uid', s3tests_conf[section]['user_id'],
-                        '--display-name', s3tests_conf[section]['display_name'],
-                        '--access-key', s3tests_conf[section]['access_key'],
-                        '--secret', s3tests_conf[section]['secret_key'],
-                        '--email', s3tests_conf[section]['email'],
-                    ],
-                )
-    try:
-        yield
-    finally:
-        for client in config['clients']:
-            for section, user in users.iteritems():
-                #uid = '{user}.{client}'.format(user=user, client=client)
-                real_uid, delete_this_user  = cached_client_user_names[client][section+user]
-                if delete_this_user:
-                    ctx.cluster.only(client).run(
-                        args=[
-                            'adjust-ulimits',
-                            'ceph-coverage',
-                            '{tdir}/archive/coverage'.format(tdir=testdir),
-                            'radosgw-admin',
-                            '-n', client,
-                            'user', 'rm',
-                            '--uid', real_uid,
-                            '--purge-data',
-                            ],
-                        )
-                else:
-                    log.debug('skipping delete for user {uid} on {client}'.format(uid=real_uid, client=client))
-
-@contextlib.contextmanager
-def configure(ctx, config):
-    """
-    Configure the s3-tests.  This includes the running of the
-    bootstrap code and the updating of local conf files.
-    """
-    assert isinstance(config, dict)
-    log.info('Configuring s3-readwrite-tests...')
-    for client, properties in config['clients'].iteritems():
-        s3tests_conf = config['s3tests_conf'][client]
-        if properties is not None and 'rgw_server' in properties:
-            host = None
-            for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']):
-                log.info('roles: ' + str(roles))
-                log.info('target: ' + str(target))
-                if properties['rgw_server'] in roles:
-                    _, host = split_user(target)
-            assert host is not None, "Invalid client specified as the rgw_server"
-            s3tests_conf['s3']['host'] = host
-        else:
-            s3tests_conf['s3']['host'] = 'localhost'
-
-        def_conf = s3tests_conf['DEFAULT']
-        s3tests_conf['s3'].setdefault('port', def_conf['port'])
-        s3tests_conf['s3'].setdefault('is_secure', def_conf['is_secure'])
-
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        remote.run(
-            args=[
-                'cd',
-                '{tdir}/s3-tests'.format(tdir=teuthology.get_testdir(ctx)),
-                run.Raw('&&'),
-                './bootstrap',
-                ],
-            )
-        conf_fp = StringIO()
-        conf = dict(
-                        s3=s3tests_conf['s3'],
-                        readwrite=s3tests_conf['readwrite'],
-                    )
-        yaml.safe_dump(conf, conf_fp, default_flow_style=False)
-        teuthology.write_file(
-            remote=remote,
-            path='{tdir}/archive/s3readwrite.{client}.config.yaml'.format(tdir=teuthology.get_testdir(ctx), client=client),
-            data=conf_fp.getvalue(),
-            )
-    yield
-
-
-@contextlib.contextmanager
-def run_tests(ctx, config):
-    """
-    Run the s3readwrite tests after everything is set up.
-
-    :param ctx: Context passed to task
-    :param config: specific configuration information
-    """
-    assert isinstance(config, dict)
-    testdir = teuthology.get_testdir(ctx)
-    for client, client_config in config.iteritems():
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        conf = teuthology.get_file(remote, '{tdir}/archive/s3readwrite.{client}.config.yaml'.format(tdir=testdir, client=client))
-        args = [
-                '{tdir}/s3-tests/virtualenv/bin/s3tests-test-readwrite'.format(tdir=testdir),
-                ]
-        if client_config is not None and 'extra_args' in client_config:
-            args.extend(client_config['extra_args'])
-
-        ctx.cluster.only(client).run(
-            args=args,
-            stdin=conf,
-            )
-    yield
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run the s3tests-test-readwrite suite against rgw.
-
-    To run all tests on all clients::
-
-        tasks:
-        - ceph:
-        - rgw:
-        - s3readwrite:
-
-    To restrict testing to particular clients::
-
-        tasks:
-        - ceph:
-        - rgw: [client.0]
-        - s3readwrite: [client.0]
-
-    To run against a server on client.1::
-
-        tasks:
-        - ceph:
-        - rgw: [client.1]
-        - s3readwrite:
-            client.0:
-              rgw_server: client.1
-
-    To pass extra test arguments
-
-        tasks:
-        - ceph:
-        - rgw: [client.0]
-        - s3readwrite:
-            client.0:
-              readwrite:
-                bucket: mybucket
-                readers: 10
-                writers: 3
-                duration: 600
-                files:
-                  num: 10
-                  size: 2000
-                  stddev: 500
-            client.1:
-              ...
-
-    To override s3 configuration
-
-        tasks:
-        - ceph:
-        - rgw: [client.0]
-        - s3readwrite:
-            client.0:
-              s3:
-                user_id: myuserid
-                display_name: myname
-                email: my@email
-                access_key: myaccesskey
-                secret_key: mysecretkey
-
-    """
-    assert config is None or isinstance(config, list) \
-        or isinstance(config, dict), \
-        "task s3tests only supports a list or dictionary for configuration"
-    all_clients = ['client.{id}'.format(id=id_)
-                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-    if config is None:
-        config = all_clients
-    if isinstance(config, list):
-        config = dict.fromkeys(config)
-    clients = config.keys()
-
-    overrides = ctx.config.get('overrides', {})
-    # merge each client section, not the top level.
-    for client in config.iterkeys():
-        if not config[client]:
-            config[client] = {}
-        teuthology.deep_merge(config[client], overrides.get('s3readwrite', {}))
-
-    log.debug('in s3readwrite, config is %s', config)
-
-    s3tests_conf = {}
-    for client in clients:
-        if config[client] is None:
-            config[client] = {}
-        config[client].setdefault('s3', {})
-        config[client].setdefault('readwrite', {})
-
-        s3tests_conf[client] = ({
-                'DEFAULT':
-                    {
-                    'port'      : 7280,
-                    'is_secure' : False,
-                    },
-                'readwrite' : config[client]['readwrite'],
-                's3'  : config[client]['s3'],
-                })
-
-    with contextutil.nested(
-        lambda: download(ctx=ctx, config=config),
-        lambda: create_users(ctx=ctx, config=dict(
-                clients=clients,
-                s3tests_conf=s3tests_conf,
-                )),
-        lambda: configure(ctx=ctx, config=dict(
-                clients=config,
-                s3tests_conf=s3tests_conf,
-                )),
-        lambda: run_tests(ctx=ctx, config=config),
-        ):
-        pass
-    yield
diff --git a/teuthology/task/s3roundtrip.py b/teuthology/task/s3roundtrip.py
deleted file mode 100644 (file)
index 5a7093d..0000000
+++ /dev/null
@@ -1,302 +0,0 @@
-"""
-Run rgw roundtrip message tests
-"""
-from cStringIO import StringIO
-import base64
-import contextlib
-import logging
-import os
-import random
-import string
-import yaml
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from ..config import config as teuth_config
-from ..orchestra import run
-from ..orchestra.connection import split_user
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def download(ctx, config):
-    """
-    Download the s3 tests from the git builder.
-    Remove downloaded s3 file upon exit.
-    
-    The context passed in should be identical to the context
-    passed in to the main task.
-    """
-    assert isinstance(config, list)
-    log.info('Downloading s3-tests...')
-    testdir = teuthology.get_testdir(ctx)
-    for client in config:
-        ctx.cluster.only(client).run(
-            args=[
-                'git', 'clone',
-                teuth_config.ceph_git_base_url + 's3-tests.git',
-                '{tdir}/s3-tests'.format(tdir=testdir),
-                ],
-            )
-    try:
-        yield
-    finally:
-        log.info('Removing s3-tests...')
-        for client in config:
-            ctx.cluster.only(client).run(
-                args=[
-                    'rm',
-                    '-rf',
-                    '{tdir}/s3-tests'.format(tdir=testdir),
-                    ],
-                )
-
-def _config_user(s3tests_conf, section, user):
-    """
-    Configure users for this section by stashing away keys, ids, and
-    email addresses.
-    """
-    s3tests_conf[section].setdefault('user_id', user)
-    s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
-    s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
-    s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.uppercase) for i in xrange(20)))
-    s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40)))
-
-@contextlib.contextmanager
-def create_users(ctx, config):
-    """
-    Create a default s3 user.
-    """
-    assert isinstance(config, dict)
-    log.info('Creating rgw users...')
-    testdir = teuthology.get_testdir(ctx)
-    users = {'s3': 'foo'}
-    for client in config['clients']:
-        s3tests_conf = config['s3tests_conf'][client]
-        s3tests_conf.setdefault('roundtrip', {})
-        s3tests_conf['roundtrip'].setdefault('bucket', 'rttest-' + client + '-{random}-')
-        s3tests_conf['roundtrip'].setdefault('readers', 10)
-        s3tests_conf['roundtrip'].setdefault('writers', 3)
-        s3tests_conf['roundtrip'].setdefault('duration', 300)
-        s3tests_conf['roundtrip'].setdefault('files', {})
-        rtconf = s3tests_conf['roundtrip']
-        rtconf['files'].setdefault('num', 10)
-        rtconf['files'].setdefault('size', 2000)
-        rtconf['files'].setdefault('stddev', 500)
-        for section, user in [('s3', 'foo')]:
-            _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client))
-            ctx.cluster.only(client).run(
-                args=[
-                    'adjust-ulimits',
-                    'ceph-coverage',
-                    '{tdir}/archive/coverage'.format(tdir=testdir),
-                    'radosgw-admin',
-                    '-n', client,
-                    'user', 'create',
-                    '--uid', s3tests_conf[section]['user_id'],
-                    '--display-name', s3tests_conf[section]['display_name'],
-                    '--access-key', s3tests_conf[section]['access_key'],
-                    '--secret', s3tests_conf[section]['secret_key'],
-                    '--email', s3tests_conf[section]['email'],
-                ],
-            )
-    try:
-        yield
-    finally:
-        for client in config['clients']:
-            for user in users.itervalues():
-                uid = '{user}.{client}'.format(user=user, client=client)
-                ctx.cluster.only(client).run(
-                    args=[
-                        'adjust-ulimits',
-                        'ceph-coverage',
-                        '{tdir}/archive/coverage'.format(tdir=testdir),
-                        'radosgw-admin',
-                        '-n', client,
-                        'user', 'rm',
-                        '--uid', uid,
-                        '--purge-data',
-                        ],
-                    )
-
-@contextlib.contextmanager
-def configure(ctx, config):
-    """
-    Configure the s3-tests.  This includes the running of the
-    bootstrap code and the updating of local conf files.
-    """
-    assert isinstance(config, dict)
-    log.info('Configuring s3-roundtrip-tests...')
-    testdir = teuthology.get_testdir(ctx)
-    for client, properties in config['clients'].iteritems():
-        s3tests_conf = config['s3tests_conf'][client]
-        if properties is not None and 'rgw_server' in properties:
-            host = None
-            for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']):
-                log.info('roles: ' + str(roles))
-                log.info('target: ' + str(target))
-                if properties['rgw_server'] in roles:
-                    _, host = split_user(target)
-            assert host is not None, "Invalid client specified as the rgw_server"
-            s3tests_conf['s3']['host'] = host
-        else:
-            s3tests_conf['s3']['host'] = 'localhost'
-
-        def_conf = s3tests_conf['DEFAULT']
-        s3tests_conf['s3'].setdefault('port', def_conf['port'])
-        s3tests_conf['s3'].setdefault('is_secure', def_conf['is_secure'])
-
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        remote.run(
-            args=[
-                'cd',
-                '{tdir}/s3-tests'.format(tdir=testdir),
-                run.Raw('&&'),
-                './bootstrap',
-                ],
-            )
-        conf_fp = StringIO()
-        conf = dict(
-                        s3=s3tests_conf['s3'],
-                        roundtrip=s3tests_conf['roundtrip'],
-                    )
-        yaml.safe_dump(conf, conf_fp, default_flow_style=False)
-        teuthology.write_file(
-            remote=remote,
-            path='{tdir}/archive/s3roundtrip.{client}.config.yaml'.format(tdir=testdir, client=client),
-            data=conf_fp.getvalue(),
-            )
-    yield
-
-
-@contextlib.contextmanager
-def run_tests(ctx, config):
-    """
-    Run the s3 roundtrip after everything is set up.
-
-    :param ctx: Context passed to task
-    :param config: specific configuration information
-    """
-    assert isinstance(config, dict)
-    testdir = teuthology.get_testdir(ctx)
-    for client, client_config in config.iteritems():
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        conf = teuthology.get_file(remote, '{tdir}/archive/s3roundtrip.{client}.config.yaml'.format(tdir=testdir, client=client))
-        args = [
-                '{tdir}/s3-tests/virtualenv/bin/s3tests-test-roundtrip'.format(tdir=testdir),
-                ]
-        if client_config is not None and 'extra_args' in client_config:
-            args.extend(client_config['extra_args'])
-
-        ctx.cluster.only(client).run(
-            args=args,
-            stdin=conf,
-            )
-    yield
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run the s3tests-test-roundtrip suite against rgw.
-
-    To run all tests on all clients::
-
-        tasks:
-        - ceph:
-        - rgw:
-        - s3roundtrip:
-
-    To restrict testing to particular clients::
-
-        tasks:
-        - ceph:
-        - rgw: [client.0]
-        - s3roundtrip: [client.0]
-
-    To run against a server on client.1::
-
-        tasks:
-        - ceph:
-        - rgw: [client.1]
-        - s3roundtrip:
-            client.0:
-              rgw_server: client.1
-
-    To pass extra test arguments
-
-        tasks:
-        - ceph:
-        - rgw: [client.0]
-        - s3roundtrip:
-            client.0:
-              roundtrip:
-                bucket: mybucket
-                readers: 10
-                writers: 3
-                duration: 600
-                files:
-                  num: 10
-                  size: 2000
-                  stddev: 500
-            client.1:
-              ...
-
-    To override s3 configuration
-
-        tasks:
-        - ceph:
-        - rgw: [client.0]
-        - s3roundtrip:
-            client.0:
-              s3:
-                user_id: myuserid
-                display_name: myname
-                email: my@email
-                access_key: myaccesskey
-                secret_key: mysecretkey
-
-    """
-    assert config is None or isinstance(config, list) \
-        or isinstance(config, dict), \
-        "task s3tests only supports a list or dictionary for configuration"
-    all_clients = ['client.{id}'.format(id=id_)
-                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-    if config is None:
-        config = all_clients
-    if isinstance(config, list):
-        config = dict.fromkeys(config)
-    clients = config.keys()
-
-    s3tests_conf = {}
-    for client in clients:
-        if config[client] is None:
-            config[client] = {}
-        config[client].setdefault('s3', {})
-        config[client].setdefault('roundtrip', {})
-
-        s3tests_conf[client] = ({
-                'DEFAULT':
-                    {
-                    'port'      : 7280,
-                    'is_secure' : False,
-                    },
-                'roundtrip' : config[client]['roundtrip'],
-                's3'  : config[client]['s3'],
-                })
-
-    with contextutil.nested(
-        lambda: download(ctx=ctx, config=clients),
-        lambda: create_users(ctx=ctx, config=dict(
-                clients=clients,
-                s3tests_conf=s3tests_conf,
-                )),
-        lambda: configure(ctx=ctx, config=dict(
-                clients=config,
-                s3tests_conf=s3tests_conf,
-                )),
-        lambda: run_tests(ctx=ctx, config=config),
-        ):
-        pass
-    yield
diff --git a/teuthology/task/s3tests.py b/teuthology/task/s3tests.py
deleted file mode 100644 (file)
index abbacb9..0000000
+++ /dev/null
@@ -1,402 +0,0 @@
-"""
-Run a set of s3 tests on rgw.
-"""
-from cStringIO import StringIO
-from configobj import ConfigObj
-import base64
-import contextlib
-import logging
-import os
-import random
-import string
-
-import teuthology.task_util.rgw as rgw_utils
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from ..config import config as teuth_config
-from ..orchestra import run
-from ..orchestra.connection import split_user
-
-log = logging.getLogger(__name__)
-
-def extract_sync_client_data(ctx, client_name):
-    """
-    Extract synchronized client rgw zone and rgw region information.
-    
-    :param ctx: Context passed to the s3tests task
-    :param name: Name of client that we are synching with
-    """
-    return_region_name = None
-    return_dict = None
-    client = ctx.ceph.conf.get(client_name, None)
-    if client:
-        current_client_zone = client.get('rgw zone', None)
-        if current_client_zone:
-            (endpoint_host, endpoint_port) = ctx.rgw.role_endpoints.get(client_name, (None, None))
-            # pull out the radosgw_agent stuff
-            regions = ctx.rgw.regions
-            for region in regions:
-                log.debug('jbuck, region is {region}'.format(region=region))
-                region_data = ctx.rgw.regions[region]
-                log.debug('region data is {region}'.format(region=region_data))
-                zones = region_data['zones']
-                for zone in zones:
-                    if current_client_zone in zone:
-                        return_region_name = region
-                        return_dict = dict()
-                        return_dict['api_name'] = region_data['api name']
-                        return_dict['is_master'] = region_data['is master']
-                        return_dict['port'] = endpoint_port
-                        return_dict['host'] = endpoint_host
-
-                        # The s3tests expect the sync_agent_[addr|port} to be
-                        # set on the non-master node for some reason
-                        if not region_data['is master']:
-                            (rgwagent_host, rgwagent_port) = ctx.radosgw_agent.endpoint
-                            (return_dict['sync_agent_addr'], _) = ctx.rgw.role_endpoints[rgwagent_host]
-                            return_dict['sync_agent_port'] = rgwagent_port
-
-        else: #if client_zone:
-            log.debug('No zone info for {host}'.format(host=client_name))
-    else: # if client
-        log.debug('No ceph conf for {host}'.format(host=client_name))
-
-    return return_region_name, return_dict
-
-def update_conf_with_region_info(ctx, config, s3tests_conf):
-    """
-    Scan for a client (passed in s3tests_conf) that is an s3agent
-    with which we can sync.  Update information in local conf file
-    if such a client is found.
-    """
-    for key in s3tests_conf.keys():
-        # we'll assume that there's only one sync relationship (source / destination) with client.X
-        # as the key for now
-
-        # Iterate through all of the radosgw_agent (rgwa) configs and see if a
-        # given client is involved in a relationship.
-        # If a given client isn't, skip it
-        this_client_in_rgwa_config = False
-        for rgwa in ctx.radosgw_agent.config.keys():
-            rgwa_data = ctx.radosgw_agent.config[rgwa]
-
-            if key in rgwa_data['src'] or key in rgwa_data['dest']:
-                this_client_in_rgwa_config = True
-                log.debug('{client} is in an radosgw-agent sync relationship'.format(client=key))
-                radosgw_sync_data = ctx.radosgw_agent.config[key]
-                break
-        if not this_client_in_rgwa_config:
-            log.debug('{client} is NOT in an radosgw-agent sync relationship'.format(client=key))
-            continue
-
-        source_client = radosgw_sync_data['src']
-        dest_client = radosgw_sync_data['dest']
-
-        # #xtract the pertinent info for the source side
-        source_region_name, source_region_dict = extract_sync_client_data(ctx, source_client)
-        log.debug('\t{key} source_region {source_region} source_dict {source_dict}'.format
-            (key=key,source_region=source_region_name,source_dict=source_region_dict))
-
-        # The source *should* be the master region, but test anyway and then set it as the default region
-        if source_region_dict['is_master']:
-            log.debug('Setting {region} as default_region'.format(region=source_region_name))
-            s3tests_conf[key]['fixtures'].setdefault('default_region', source_region_name)
-
-        # Extract the pertinent info for the destination side
-        dest_region_name, dest_region_dict = extract_sync_client_data(ctx, dest_client)
-        log.debug('\t{key} dest_region {dest_region} dest_dict {dest_dict}'.format
-            (key=key,dest_region=dest_region_name,dest_dict=dest_region_dict))
-
-        # now add these regions to the s3tests_conf object
-        s3tests_conf[key]['region {region_name}'.format(region_name=source_region_name)] = source_region_dict
-        s3tests_conf[key]['region {region_name}'.format(region_name=dest_region_name)] = dest_region_dict
-
-@contextlib.contextmanager
-def download(ctx, config):
-    """
-    Download the s3 tests from the git builder.
-    Remove downloaded s3 file upon exit.
-    
-    The context passed in should be identical to the context
-    passed in to the main task.
-    """
-    assert isinstance(config, dict)
-    log.info('Downloading s3-tests...')
-    testdir = teuthology.get_testdir(ctx)
-    for (client, cconf) in config.items():
-        branch = cconf.get('force-branch', None)
-        if not branch:
-            branch = cconf.get('branch', 'master')
-        sha1 = cconf.get('sha1')
-        ctx.cluster.only(client).run(
-            args=[
-                'git', 'clone',
-                '-b', branch,
-                teuth_config.ceph_git_base_url + 's3-tests.git',
-                '{tdir}/s3-tests'.format(tdir=testdir),
-                ],
-            )
-        if sha1 is not None:
-            ctx.cluster.only(client).run(
-                args=[
-                    'cd', '{tdir}/s3-tests'.format(tdir=testdir),
-                    run.Raw('&&'),
-                    'git', 'reset', '--hard', sha1,
-                    ],
-                )
-    try:
-        yield
-    finally:
-        log.info('Removing s3-tests...')
-        testdir = teuthology.get_testdir(ctx)
-        for client in config:
-            ctx.cluster.only(client).run(
-                args=[
-                    'rm',
-                    '-rf',
-                    '{tdir}/s3-tests'.format(tdir=testdir),
-                    ],
-                )
-
-
-def _config_user(s3tests_conf, section, user):
-    """
-    Configure users for this section by stashing away keys, ids, and
-    email addresses.
-    """
-    s3tests_conf[section].setdefault('user_id', user)
-    s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
-    s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
-    s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.uppercase) for i in xrange(20)))
-    s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40)))
-
-
-@contextlib.contextmanager
-def create_users(ctx, config):
-    """
-    Create a main and an alternate s3 user.
-    """
-    assert isinstance(config, dict)
-    log.info('Creating rgw users...')
-    testdir = teuthology.get_testdir(ctx)
-    users = {'s3 main': 'foo', 's3 alt': 'bar'}
-    for client in config['clients']:
-        s3tests_conf = config['s3tests_conf'][client]
-        s3tests_conf.setdefault('fixtures', {})
-        s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-')
-        for section, user in users.iteritems():
-            _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client))
-            log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client))
-            ctx.cluster.only(client).run(
-                args=[
-                    'adjust-ulimits',
-                    'ceph-coverage',
-                    '{tdir}/archive/coverage'.format(tdir=testdir),
-                    'radosgw-admin',
-                    '-n', client,
-                    'user', 'create',
-                    '--uid', s3tests_conf[section]['user_id'],
-                    '--display-name', s3tests_conf[section]['display_name'],
-                    '--access-key', s3tests_conf[section]['access_key'],
-                    '--secret', s3tests_conf[section]['secret_key'],
-                    '--email', s3tests_conf[section]['email'],
-                ],
-            )
-    try:
-        yield
-    finally:
-        for client in config['clients']:
-            for user in users.itervalues():
-                uid = '{user}.{client}'.format(user=user, client=client)
-                ctx.cluster.only(client).run(
-                    args=[
-                        'adjust-ulimits',
-                        'ceph-coverage',
-                        '{tdir}/archive/coverage'.format(tdir=testdir),
-                        'radosgw-admin',
-                        '-n', client,
-                        'user', 'rm',
-                        '--uid', uid,
-                        '--purge-data',
-                        ],
-                    )
-
-
-@contextlib.contextmanager
-def configure(ctx, config):
-    """
-    Configure the s3-tests.  This includes the running of the
-    bootstrap code and the updating of local conf files.
-    """
-    assert isinstance(config, dict)
-    log.info('Configuring s3-tests...')
-    testdir = teuthology.get_testdir(ctx)
-    for client, properties in config['clients'].iteritems():
-        s3tests_conf = config['s3tests_conf'][client]
-        if properties is not None and 'rgw_server' in properties:
-            host = None
-            for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']):
-                log.info('roles: ' + str(roles))
-                log.info('target: ' + str(target))
-                if properties['rgw_server'] in roles:
-                    _, host = split_user(target)
-            assert host is not None, "Invalid client specified as the rgw_server"
-            s3tests_conf['DEFAULT']['host'] = host
-        else:
-            s3tests_conf['DEFAULT']['host'] = 'localhost'
-
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        remote.run(
-            args=[
-                'cd',
-                '{tdir}/s3-tests'.format(tdir=testdir),
-                run.Raw('&&'),
-                './bootstrap',
-                ],
-            )
-        conf_fp = StringIO()
-        s3tests_conf.write(conf_fp)
-        teuthology.write_file(
-            remote=remote,
-            path='{tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
-            data=conf_fp.getvalue(),
-            )
-    yield
-
-@contextlib.contextmanager
-def sync_users(ctx, config):
-    """
-    Sync this user.
-    """
-    assert isinstance(config, dict)
-    # do a full sync if this is a multi-region test
-    if rgw_utils.multi_region_enabled(ctx):
-        log.debug('Doing a full sync')
-        rgw_utils.radosgw_agent_sync_all(ctx)
-    else:
-        log.debug('Not a multi-region config; skipping the metadata sync')
-
-    yield
-
-@contextlib.contextmanager
-def run_tests(ctx, config):
-    """
-    Run the s3tests after everything is set up.
-
-    :param ctx: Context passed to task
-    :param config: specific configuration information
-    """
-    assert isinstance(config, dict)
-    testdir = teuthology.get_testdir(ctx)
-    for client, client_config in config.iteritems():
-        args = [
-                'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
-                '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir),
-                '-w',
-                '{tdir}/s3-tests'.format(tdir=testdir),
-                '-v',
-                '-a', '!fails_on_rgw',
-                ]
-        if client_config is not None and 'extra_args' in client_config:
-            args.extend(client_config['extra_args'])
-
-        ctx.cluster.only(client).run(
-            args=args,
-            )
-    yield
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run the s3-tests suite against rgw.
-
-    To run all tests on all clients::
-
-        tasks:
-        - ceph:
-        - rgw:
-        - s3tests:
-
-    To restrict testing to particular clients::
-
-        tasks:
-        - ceph:
-        - rgw: [client.0]
-        - s3tests: [client.0]
-
-    To run against a server on client.1::
-
-        tasks:
-        - ceph:
-        - rgw: [client.1]
-        - s3tests:
-            client.0:
-              rgw_server: client.1
-
-    To pass extra arguments to nose (e.g. to run a certain test)::
-
-        tasks:
-        - ceph:
-        - rgw: [client.0]
-        - s3tests:
-            client.0:
-              extra_args: ['test_s3:test_object_acl_grand_public_read']
-            client.1:
-              extra_args: ['--exclude', 'test_100_continue']
-    """
-    assert config is None or isinstance(config, list) \
-        or isinstance(config, dict), \
-        "task s3tests only supports a list or dictionary for configuration"
-    all_clients = ['client.{id}'.format(id=id_)
-                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-    if config is None:
-        config = all_clients
-    if isinstance(config, list):
-        config = dict.fromkeys(config)
-    clients = config.keys()
-
-    overrides = ctx.config.get('overrides', {})
-    # merge each client section, not the top level.
-    for client in config.iterkeys():
-        if not config[client]:
-            config[client] = {}
-        teuthology.deep_merge(config[client], overrides.get('s3tests', {}))
-
-    log.debug('s3tests config is %s', config)
-
-    s3tests_conf = {}
-    for client in clients:
-        s3tests_conf[client] = ConfigObj(
-            indent_type='',
-            infile={
-                'DEFAULT':
-                    {
-                    'port'      : 7280,
-                    'is_secure' : 'no',
-                    },
-                'fixtures' : {},
-                's3 main'  : {},
-                's3 alt'   : {},
-                }
-            )
-
-    # Only attempt to add in the region info if there's a radosgw_agent configured
-    if hasattr(ctx, 'radosgw_agent'):
-        update_conf_with_region_info(ctx, config, s3tests_conf)
-
-    with contextutil.nested(
-        lambda: download(ctx=ctx, config=config),
-        lambda: create_users(ctx=ctx, config=dict(
-                clients=clients,
-                s3tests_conf=s3tests_conf,
-                )),
-        lambda: sync_users(ctx=ctx, config=config),
-        lambda: configure(ctx=ctx, config=dict(
-                clients=config,
-                s3tests_conf=s3tests_conf,
-                )),
-        lambda: run_tests(ctx=ctx, config=config),
-        ):
-        pass
-    yield
diff --git a/teuthology/task/samba.py b/teuthology/task/samba.py
deleted file mode 100644 (file)
index a0375c5..0000000
+++ /dev/null
@@ -1,241 +0,0 @@
-"""
-Samba
-"""
-import contextlib
-import logging
-import sys
-
-from teuthology import misc as teuthology
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-def get_sambas(ctx, roles):
-    """
-    Scan for roles that are samba.  Yield the id of the the samba role
-    (samba.0, samba.1...)  and the associated remote site 
-    
-    :param ctx: Context
-    :param roles: roles for this test (extracted from yaml files)
-    """
-    for role in roles:
-        assert isinstance(role, basestring)
-        PREFIX = 'samba.'
-        assert role.startswith(PREFIX)
-        id_ = role[len(PREFIX):]
-        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-        yield (id_, remote)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Setup samba smbd with ceph vfs module.  This task assumes the samba
-    package has already been installed via the install task.
-
-    The config is optional and defaults to starting samba on all nodes.
-    If a config is given, it is expected to be a list of
-    samba nodes to start smbd servers on.
-
-    Example that starts smbd on all samba nodes::
-
-        tasks:
-        - install:
-        - install:
-            project: samba
-            extra_packages: ['samba']
-        - ceph:
-        - samba:
-        - interactive:
-
-    Example that starts smbd on just one of the samba nodes and cifs on the other::
-
-        tasks:
-        - samba: [samba.0]
-        - cifs: [samba.1]
-
-    An optional backend can be specified, and requires a path which smbd will
-    use as the backend storage location:
-
-        roles:
-            - [osd.0, osd.1, osd.2, mon.0, mon.1, mon.2, mds.a]
-            - [client.0, samba.0]
-
-        tasks:
-        - ceph:
-        - ceph-fuse: [client.0]
-        - samba:
-            samba.0:
-              cephfuse: "{testdir}/mnt.0"
-
-    This mounts ceph to {testdir}/mnt.0 using fuse, and starts smbd with
-    a UNC of //localhost/cephfuse.  Access through that UNC will be on
-    the ceph fuse mount point.
-
-    If no arguments are specified in the samba
-    role, the default behavior is to enable the ceph UNC //localhost/ceph
-    and use the ceph vfs module as the smbd backend.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    log.info("Setting up smbd with ceph vfs...")
-    assert config is None or isinstance(config, list) or isinstance(config, dict), \
-        "task samba got invalid config"
-
-    if config is None:
-        config = dict(('samba.{id}'.format(id=id_), None)
-                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba'))
-    elif isinstance(config, list):
-        config = dict((name, None) for name in config)
-
-    samba_servers = list(get_sambas(ctx=ctx, roles=config.keys()))
-
-    testdir = teuthology.get_testdir(ctx)
-
-    from teuthology.task.ceph import CephState
-    if not hasattr(ctx, 'daemons'):
-        ctx.daemons = CephState()
-
-    for id_, remote in samba_servers:
-
-        rolestr = "samba.{id_}".format(id_=id_)
-
-        confextras = """vfs objects = ceph
-  ceph:config_file = /etc/ceph/ceph.conf"""
-
-        unc = "ceph"
-        backend = "/"
-
-        if config[rolestr] is not None:
-            # verify that there's just one parameter in role
-            if len(config[rolestr]) != 1:
-                log.error("samba config for role samba.{id_} must have only one parameter".format(id_=id_))
-                raise Exception('invalid config')
-            confextras = ""
-            (unc, backendstr) = config[rolestr].items()[0]
-            backend = backendstr.format(testdir=testdir)
-
-        # on first samba role, set ownership and permissions of ceph root
-        # so that samba tests succeed
-        if config[rolestr] is None and id_ == samba_servers[0][0]:
-            remote.run(
-                    args=[
-                        'mkdir', '-p', '/tmp/cmnt', run.Raw('&&'),
-                        'sudo', 'ceph-fuse', '/tmp/cmnt', run.Raw('&&'),
-                        'sudo', 'chown', 'ubuntu:ubuntu', '/tmp/cmnt/', run.Raw('&&'),
-                        'sudo', 'chmod', '1777', '/tmp/cmnt/', run.Raw('&&'),
-                        'sudo', 'umount', '/tmp/cmnt/', run.Raw('&&'),
-                        'rm', '-rf', '/tmp/cmnt',
-                        ],
-                    )
-        else:
-            remote.run(
-                    args=[
-                        'sudo', 'chown', 'ubuntu:ubuntu', backend, run.Raw('&&'),
-                        'sudo', 'chmod', '1777', backend,
-                        ],
-                    )
-
-        teuthology.sudo_write_file(remote, "/usr/local/samba/etc/smb.conf", """
-[global]
-  workgroup = WORKGROUP
-  netbios name = DOMAIN
-
-[{unc}]
-  path = {backend}
-  {extras}
-  writeable = yes
-  valid users = ubuntu
-""".format(extras=confextras, unc=unc, backend=backend))
-
-        # create ubuntu user
-        remote.run(
-            args=[
-                'sudo', '/usr/local/samba/bin/smbpasswd', '-e', 'ubuntu',
-                run.Raw('||'),
-                'printf', run.Raw('"ubuntu\nubuntu\n"'),
-                run.Raw('|'),
-                'sudo', '/usr/local/samba/bin/smbpasswd', '-s', '-a', 'ubuntu'
-            ])
-
-        smbd_cmd = [
-                'sudo',
-                'daemon-helper',
-                'kill',
-                'nostdin',
-                '/usr/local/samba/sbin/smbd',
-                '-F',
-                ]
-        ctx.daemons.add_daemon(remote, 'smbd', id_,
-                               args=smbd_cmd,
-                               logger=log.getChild("smbd.{id_}".format(id_=id_)),
-                               stdin=run.PIPE,
-                               wait=False,
-                               )
-
-        # let smbd initialize, probably a better way...
-        import time
-        seconds_to_sleep = 100        
-        log.info('Sleeping for %s  seconds...' % seconds_to_sleep)
-        time.sleep(seconds_to_sleep)
-        log.info('Sleeping stopped...')
-
-    try:
-        yield
-    finally:
-        log.info('Stopping smbd processes...')
-        exc_info = (None, None, None)
-        for d in ctx.daemons.iter_daemons_of_role('smbd'):
-            try:
-                d.stop()
-            except (run.CommandFailedError,
-                    run.CommandCrashedError,
-                    run.ConnectionLostError):
-                exc_info = sys.exc_info()
-                log.exception('Saw exception from %s.%s', d.role, d.id_)
-        if exc_info != (None, None, None):
-            raise exc_info[0], exc_info[1], exc_info[2]
-
-        for id_, remote in samba_servers:
-            remote.run(
-                args=[
-                    'sudo',
-                    'rm', '-rf',
-                    '/usr/local/samba/etc/smb.conf',
-                    '/usr/local/samba/private/*',
-                    '/usr/local/samba/var/run/',
-                    '/usr/local/samba/var/locks',
-                    '/usr/local/samba/var/lock',
-                    ],
-                )
-            # make sure daemons are gone
-            try:
-                remote.run(
-                    args=[
-                        'while',
-                        'sudo', 'killall', '-9', 'smbd',
-                        run.Raw(';'),
-                        'do', 'sleep', '1',
-                        run.Raw(';'),
-                        'done',
-                        ],
-                    )
-
-                remote.run(
-                    args=[
-                        'sudo',
-                        'lsof',
-                        backend,
-                        ],
-                    )
-                remote.run(
-                    args=[
-                        'sudo',
-                        'fuser',
-                        '-M',
-                        backend,
-                        ],
-                    )
-            except Exception:
-                log.exception("Saw exception")
-                pass
diff --git a/teuthology/task/scrub.py b/teuthology/task/scrub.py
deleted file mode 100644 (file)
index 7a25300..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-"""
-Scrub osds
-"""
-import contextlib
-import gevent
-import logging
-import random
-import time
-
-import ceph_manager
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run scrub periodically. Randomly chooses an OSD to scrub.
-
-    The config should be as follows:
-
-    scrub:
-        frequency: <seconds between scrubs>
-        deep: <bool for deepness>
-
-    example:
-
-    tasks:
-    - ceph:
-    - scrub:
-        frequency: 30
-        deep: 0
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'scrub task only accepts a dict for configuration'
-
-    log.info('Beginning scrub...')
-
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
-    while len(manager.get_osd_status()['up']) < num_osds:
-        manager.sleep(10)
-
-    scrub_proc = Scrubber(
-        manager,
-        config,
-        )
-    try:
-        yield
-    finally:
-        log.info('joining scrub')
-        scrub_proc.do_join()
-
-class Scrubber:
-    """
-    Scrubbing is actually performed during initialzation
-    """
-    def __init__(self, manager, config):
-        """
-        Spawn scrubbing thread upon completion.
-        """
-        self.ceph_manager = manager
-        self.ceph_manager.wait_for_clean()
-
-        osd_status = self.ceph_manager.get_osd_status()
-        self.osds = osd_status['up']
-
-        self.config = config
-        if self.config is None:
-            self.config = dict()
-
-        else:
-            def tmp(x):
-                """Local display"""
-                print x
-            self.log = tmp
-
-        self.stopping = False
-
-        log.info("spawning thread")
-
-        self.thread = gevent.spawn(self.do_scrub)
-
-    def do_join(self):
-        """Scrubbing thread finished"""
-        self.stopping = True
-        self.thread.get()
-
-    def do_scrub(self):
-        """Perform the scrub operation"""
-        frequency = self.config.get("frequency", 30)
-        deep = self.config.get("deep", 0)
-
-        log.info("stopping %s" % self.stopping)
-
-        while not self.stopping:
-            osd = str(random.choice(self.osds))
-
-            if deep:
-                cmd = 'deep-scrub'
-            else:
-                cmd = 'scrub'
-
-            log.info('%sbing %s' % (cmd, osd))
-            self.ceph_manager.raw_cluster_cmd('osd', cmd, osd)
-
-            time.sleep(frequency)
diff --git a/teuthology/task/scrub_test.py b/teuthology/task/scrub_test.py
deleted file mode 100644 (file)
index 3443ae9..0000000
+++ /dev/null
@@ -1,199 +0,0 @@
-"""Scrub testing"""
-from cStringIO import StringIO
-
-import logging
-import os
-import time
-
-import ceph_manager
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-def task(ctx, config):
-    """
-    Test [deep] scrub
-
-    tasks:
-    - chef:
-    - install:
-    - ceph:
-        log-whitelist:
-        - '!= known digest'
-        - '!= known omap_digest'
-        - deep-scrub 0 missing, 1 inconsistent objects
-        - deep-scrub 1 errors
-        - repair 0 missing, 1 inconsistent objects
-        - repair 1 errors, 1 fixed
-    - scrub_test: 
-    
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'scrub_test task only accepts a dict for configuration'
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-    
-    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
-    log.info('num_osds is %s' % num_osds)
-
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        logger=log.getChild('ceph_manager'),
-        )
-
-    while len(manager.get_osd_status()['up']) < num_osds:
-        time.sleep(10)
-
-    for i in range(num_osds):
-        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'flush_pg_stats')
-    manager.wait_for_clean()
-
-    # write some data
-    p = manager.do_rados(mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1', 'write', '-b', '4096'])
-    err = p.exitstatus
-    log.info('err is %d' % err)
-
-    # wait for some PG to have data that we can mess with
-    victim = None
-    osd = None
-    while victim is None:
-        stats = manager.get_pg_stats()
-        for pg in stats:
-            size = pg['stat_sum']['num_bytes']
-            if size > 0:
-                victim = pg['pgid']
-                osd = pg['acting'][0]
-                break
-
-        if victim is None:
-            time.sleep(3)
-
-    log.info('messing with PG %s on osd %d' % (victim, osd))
-
-    (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.iterkeys()
-    data_path = os.path.join(
-        '/var/lib/ceph/osd',
-        'ceph-{id}'.format(id=osd),
-        'current',
-        '{pg}_head'.format(pg=victim)
-        )
-
-    # fuzz time
-    ls_fp = StringIO()
-    osd_remote.run(
-        args=[ 'ls', data_path ],
-        stdout=ls_fp,
-    )
-    ls_out = ls_fp.getvalue()
-    ls_fp.close()
-
-    # find an object file we can mess with
-    osdfilename = None
-    for line in ls_out.split('\n'):
-        if 'object' in line:
-            osdfilename = line
-            break
-    assert osdfilename is not None
-
-    # Get actual object name from osd stored filename
-    tmp=osdfilename.split('__')
-    objname=tmp[0]
-    objname=objname.replace('\u', '_')
-    log.info('fuzzing %s' % objname)
-
-    # put a single \0 at the beginning of the file
-    osd_remote.run(
-        args=[ 'sudo', 'dd',
-               'if=/dev/zero',
-               'of=%s' % os.path.join(data_path, osdfilename),
-               'bs=1', 'count=1', 'conv=notrunc'
-             ]
-    )
-
-    # scrub, verify inconsistent
-    manager.raw_cluster_cmd('pg', 'deep-scrub', victim)
-    # Give deep-scrub a chance to start
-    time.sleep(60)
-
-    while True:
-        stats = manager.get_single_pg_stats(victim)
-        state = stats['state']
-
-        # wait for the scrub to finish
-        if 'scrubbing' in state:
-            time.sleep(3)
-            continue
-
-        inconsistent = stats['state'].find('+inconsistent') != -1
-        assert inconsistent
-        break
-
-
-    # repair, verify no longer inconsistent
-    manager.raw_cluster_cmd('pg', 'repair', victim)
-    # Give repair a chance to start
-    time.sleep(60)
-
-    while True:
-        stats = manager.get_single_pg_stats(victim)
-        state = stats['state']
-
-        # wait for the scrub to finish
-        if 'scrubbing' in state:
-            time.sleep(3)
-            continue
-
-        inconsistent = stats['state'].find('+inconsistent') != -1
-        assert not inconsistent
-        break
-
-    # Test deep-scrub with various omap modifications
-    manager.do_rados(mon, ['-p', 'rbd', 'setomapval', objname, 'key', 'val'])
-    manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', objname, 'hdr'])
-
-    # Modify omap on specific osd
-    log.info('fuzzing omap of %s' % objname)
-    manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key']);
-    manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, 'badkey', 'badval']);
-    manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr']);
-
-    # scrub, verify inconsistent
-    manager.raw_cluster_cmd('pg', 'deep-scrub', victim)
-    # Give deep-scrub a chance to start
-    time.sleep(60)
-
-    while True:
-        stats = manager.get_single_pg_stats(victim)
-        state = stats['state']
-
-        # wait for the scrub to finish
-        if 'scrubbing' in state:
-            time.sleep(3)
-            continue
-
-        inconsistent = stats['state'].find('+inconsistent') != -1
-        assert inconsistent
-        break
-
-    # repair, verify no longer inconsistent
-    manager.raw_cluster_cmd('pg', 'repair', victim)
-    # Give repair a chance to start
-    time.sleep(60)
-
-    while True:
-        stats = manager.get_single_pg_stats(victim)
-        state = stats['state']
-
-        # wait for the scrub to finish
-        if 'scrubbing' in state:
-            time.sleep(3)
-            continue
-
-        inconsistent = stats['state'].find('+inconsistent') != -1
-        assert not inconsistent
-        break
-
-    log.info('test successful!')
diff --git a/teuthology/task/sequential.py b/teuthology/task/sequential.py
deleted file mode 100644 (file)
index 690d60f..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-"""
-Task sequencer
-"""
-import sys
-import logging
-
-from teuthology import run_tasks
-
-log = logging.getLogger(__name__)
-
-
-def task(ctx, config):
-    """
-    Sequentialize a group of tasks into one executable block
-
-    example:
-    - sequential:
-       - tasktest:
-       - tasktest:
-
-    You can also reference the job from elsewhere:
-
-    foo:
-      tasktest:
-    tasks:
-    - sequential:
-      - tasktest:
-      - foo
-      - tasktest:
-
-    That is, if the entry is not a dict, we will look it up in the top-level
-    config.
-
-    Sequential tasks and Parallel tasks can be nested.
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    stack = []
-    try:
-        for entry in config:
-            if not isinstance(entry, dict):
-                entry = ctx.config.get(entry, {})
-            ((taskname, confg),) = entry.iteritems()
-            log.info('In sequential, running task %s...' % taskname)
-            mgr = run_tasks.run_one_task(taskname, ctx=ctx, config=confg)
-            if hasattr(mgr, '__enter__'):
-                mgr.__enter__()
-                stack.append(mgr)
-    finally:
-        try:
-            exc_info = sys.exc_info()
-            while stack:
-                mgr = stack.pop()
-                mgr.__exit__(*exc_info)
-        finally:
-            del exc_info
diff --git a/teuthology/task/sleep.py b/teuthology/task/sleep.py
deleted file mode 100644 (file)
index 4e36d59..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-Sleep task
-"""
-import logging
-import time
-
-log = logging.getLogger(__name__)
-
-
-def task(ctx, config):
-    """
-    Sleep for some number of seconds.
-
-    Example::
-
-
-       tasks:
-       - install:
-       - ceph:
-       - sleep:
-           duration: 10
-       - interactive:
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    if not config:
-        config = {}
-    assert isinstance(config, dict)
-    duration = int(config.get('duration', 5))
-    log.info('Sleeping for %d', duration)
-    time.sleep(duration)
diff --git a/teuthology/task/ssh_keys.py b/teuthology/task/ssh_keys.py
deleted file mode 100644 (file)
index 6f1dbe1..0000000
+++ /dev/null
@@ -1,180 +0,0 @@
-#!/usr/bin/python
-"""
-Ssh-key key handlers and associated routines
-"""
-import contextlib
-import logging
-import paramiko
-import re
-
-from cStringIO import StringIO
-from teuthology import contextutil
-import teuthology.misc as misc
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-ssh_keys_user = 'ssh-keys-user'
-
-def generate_keys():
-    """
-    Generatees a public and private key
-    """
-    key = paramiko.RSAKey.generate(2048)
-    privateString = StringIO()
-    key.write_private_key(privateString)
-    return key.get_base64(), privateString.getvalue()
-
-def particular_ssh_key_test(line_to_test, ssh_key):
-    """
-    Check the validity of the ssh_key
-    """
-    match = re.match('[\w-]+ {key} \S+@\S+'.format(key=re.escape(ssh_key)), line_to_test)
-
-    if match:
-        return False
-    else:
-        return True
-
-def ssh_keys_user_line_test(line_to_test, username ):
-    """
-    Check the validity of the username
-    """
-    match = re.match('[\w-]+ \S+ {username}@\S+'.format(username=username), line_to_test)
-
-    if match:
-        return False
-    else:
-        return True
-
-def cleanup_added_key(ctx):
-    """
-    Delete the keys and removes ~/.ssh/authorized_keys2 entries we added
-    """
-    log.info('cleaning up keys added for testing')
-
-    for remote in ctx.cluster.remotes:
-        username, hostname = str(remote).split('@')
-        if "" == username or "" == hostname:
-            continue
-        else:
-            log.info('  cleaning up keys for user {user} on {host}'.format(host=hostname, user=username))
-            misc.delete_file(remote, '/home/{user}/.ssh/id_rsa'.format(user=username))
-            misc.delete_file(remote, '/home/{user}/.ssh/id_rsa.pub'.format(user=username))
-            misc.delete_file(remote, '/home/{user}/.ssh/authorized_keys2'.format(user=username))
-
-@contextlib.contextmanager
-def tweak_ssh_config(ctx, config):   
-    """ 
-    Turn off StrictHostKeyChecking
-    """
-    run.wait(
-        ctx.cluster.run(
-            args=[
-                'echo',
-                'StrictHostKeyChecking no\n',
-                run.Raw('>'),
-                run.Raw('/home/ubuntu/.ssh/config'),
-                run.Raw('&&'),
-                'echo',
-                'UserKnownHostsFile ',
-                run.Raw('/dev/null'),
-                run.Raw('>>'),
-                run.Raw('/home/ubuntu/.ssh/config'),
-                run.Raw('&&'),
-                run.Raw('chmod 600 /home/ubuntu/.ssh/config'),
-            ],
-            wait=False,
-        )
-    )
-
-    try: 
-        yield
-
-    finally:
-        run.wait(
-            ctx.cluster.run(
-                args=['rm',run.Raw('/home/ubuntu/.ssh/config')],
-            wait=False
-            ),
-        )
-
-@contextlib.contextmanager
-def push_keys_to_host(ctx, config, public_key, private_key):   
-    """
-    Push keys to all hosts
-    """
-    log.info('generated public key {pub_key}'.format(pub_key=public_key))
-
-    # add an entry for all hosts in ctx to auth_keys_data
-    auth_keys_data = ''
-
-    for inner_host in ctx.cluster.remotes.iterkeys():
-        inner_username, inner_hostname = str(inner_host).split('@')
-        # create a 'user@hostname' string using our fake hostname
-        fake_hostname = '{user}@{host}'.format(user=ssh_keys_user, host=str(inner_hostname))
-        auth_keys_data += '\nssh-rsa {pub_key} {user_host}\n'.format(pub_key=public_key, user_host=fake_hostname)
-
-    # for each host in ctx, add keys for all other hosts
-    for remote in ctx.cluster.remotes:
-        username, hostname = str(remote).split('@')
-        if "" == username or "" == hostname:
-            continue
-        else:
-            log.info('pushing keys to {host} for {user}'.format(host=hostname, user=username))
-
-            # adding a private key
-            priv_key_file = '/home/{user}/.ssh/id_rsa'.format(user=username)
-            priv_key_data = '{priv_key}'.format(priv_key=private_key)
-            misc.delete_file(remote, priv_key_file, force=True)
-            # Hadoop requires that .ssh/id_rsa have permissions of '500'
-            misc.create_file(remote, priv_key_file, priv_key_data, str(500))
-
-            # then a private key
-            pub_key_file = '/home/{user}/.ssh/id_rsa.pub'.format(user=username)
-            pub_key_data = 'ssh-rsa {pub_key} {user_host}'.format(pub_key=public_key, user_host=str(remote))
-            misc.delete_file(remote, pub_key_file, force=True)
-            misc.create_file(remote, pub_key_file, pub_key_data)
-
-            # adding appropriate entries to the authorized_keys2 file for this host
-            auth_keys_file = '/home/{user}/.ssh/authorized_keys2'.format(user=username)
-
-            # now add the list of keys for hosts in ctx to ~/.ssh/authorized_keys2
-            misc.create_file(remote, auth_keys_file, auth_keys_data, str(600))
-
-    try: 
-        yield
-
-    finally:
-        # cleanup the keys
-        log.info("Cleaning up SSH keys")
-        cleanup_added_key(ctx)
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Creates a set of RSA keys, distributes the same key pair
-    to all hosts listed in ctx.cluster, and adds all hosts
-    to all others authorized_keys list. 
-
-    During cleanup it will delete .ssh/id_rsa, .ssh/id_rsa.pub 
-    and remove the entries in .ssh/authorized_keys while leaving
-    pre-existing entries in place. 
-    """
-
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        "task hadoop only supports a dictionary for configuration"
-
-    # this does not need to do cleanup and does not depend on 
-    # ctx, so I'm keeping it outside of the nested calls
-    public_key_string, private_key_string = generate_keys()
-
-    with contextutil.nested(
-        lambda: tweak_ssh_config(ctx, config),
-        lambda: push_keys_to_host(ctx, config, public_key_string, private_key_string),
-        #lambda: tweak_ssh_config(ctx, config),
-        ):
-        yield
-
diff --git a/teuthology/task/swift.py b/teuthology/task/swift.py
deleted file mode 100644 (file)
index 6285eb6..0000000
+++ /dev/null
@@ -1,258 +0,0 @@
-"""
-Test Swfit api.
-"""
-from cStringIO import StringIO
-from configobj import ConfigObj
-import base64
-import contextlib
-import logging
-import os
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-from ..config import config as teuth_config
-from ..orchestra import run
-from ..orchestra.connection import split_user
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def download(ctx, config):
-    """
-    Download the Swift API.
-    """
-    testdir = teuthology.get_testdir(ctx)
-    assert isinstance(config, list)
-    log.info('Downloading swift...')
-    for client in config:
-        ctx.cluster.only(client).run(
-            args=[
-                'git', 'clone',
-                teuth_config.ceph_git_base_url + 'swift.git',
-                '{tdir}/swift'.format(tdir=testdir),
-                ],
-            )
-    try:
-        yield
-    finally:
-        log.info('Removing swift...')
-        testdir = teuthology.get_testdir(ctx)
-        for client in config:
-            ctx.cluster.only(client).run(
-                args=[
-                    'rm',
-                    '-rf',
-                    '{tdir}/swift'.format(tdir=testdir),
-                    ],
-                )
-
-def _config_user(testswift_conf, account, user, suffix):
-    """
-    Configure a swift user
-
-    :param account: Swift account
-    :param user: User name
-    :param suffix: user name and email suffixes.
-    """
-    testswift_conf['func_test'].setdefault('account{s}'.format(s=suffix), account)
-    testswift_conf['func_test'].setdefault('username{s}'.format(s=suffix), user)
-    testswift_conf['func_test'].setdefault('email{s}'.format(s=suffix), '{account}+test@test.test'.format(account=account))
-    testswift_conf['func_test'].setdefault('display_name{s}'.format(s=suffix), 'Mr. {account} {user}'.format(account=account, user=user))
-    testswift_conf['func_test'].setdefault('password{s}'.format(s=suffix), base64.b64encode(os.urandom(40)))
-
-@contextlib.contextmanager
-def create_users(ctx, config):
-    """
-    Create rgw users to interact with the swift interface.
-    """
-    assert isinstance(config, dict)
-    log.info('Creating rgw users...')
-    testdir = teuthology.get_testdir(ctx)
-    users = {'': 'foo', '2': 'bar'}
-    for client in config['clients']:
-        testswift_conf = config['testswift_conf'][client]
-        for suffix, user in users.iteritems():
-            _config_user(testswift_conf, '{user}.{client}'.format(user=user, client=client), user, suffix)
-            ctx.cluster.only(client).run(
-                args=[
-                    'adjust-ulimits',
-                    'ceph-coverage',
-                    '{tdir}/archive/coverage'.format(tdir=testdir),
-                    'radosgw-admin',
-                    '-n', client,
-                    'user', 'create',
-                    '--subuser', '{account}:{user}'.format(account=testswift_conf['func_test']['account{s}'.format(s=suffix)],user=user),
-                    '--display-name', testswift_conf['func_test']['display_name{s}'.format(s=suffix)],
-                    '--secret', testswift_conf['func_test']['password{s}'.format(s=suffix)],
-                    '--email', testswift_conf['func_test']['email{s}'.format(s=suffix)],
-                    '--key-type', 'swift',
-                ],
-            )
-    try:
-        yield
-    finally:
-        for client in config['clients']:
-            for user in users.itervalues():
-                uid = '{user}.{client}'.format(user=user, client=client)
-                ctx.cluster.only(client).run(
-                    args=[
-                        'adjust-ulimits',
-                        'ceph-coverage',
-                        '{tdir}/archive/coverage'.format(tdir=testdir),
-                        'radosgw-admin',
-                        '-n', client,
-                        'user', 'rm',
-                        '--uid', uid,
-                        '--purge-data',
-                        ],
-                    )
-
-@contextlib.contextmanager
-def configure(ctx, config):
-    """
-    Configure rgw and Swift
-    """
-    assert isinstance(config, dict)
-    log.info('Configuring testswift...')
-    testdir = teuthology.get_testdir(ctx)
-    for client, properties in config['clients'].iteritems():
-        log.info('client={c}'.format(c=client))
-        log.info('config={c}'.format(c=config))
-        testswift_conf = config['testswift_conf'][client]
-        if properties is not None and 'rgw_server' in properties:
-            host = None
-            for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']):
-                log.info('roles: ' + str(roles))
-                log.info('target: ' + str(target))
-                if properties['rgw_server'] in roles:
-                    _, host = split_user(target)
-            assert host is not None, "Invalid client specified as the rgw_server"
-            testswift_conf['func_test']['auth_host'] = host
-        else:
-            testswift_conf['func_test']['auth_host'] = 'localhost'
-
-        log.info(client)
-        (remote,) = ctx.cluster.only(client).remotes.keys()
-        remote.run(
-            args=[
-                'cd',
-                '{tdir}/swift'.format(tdir=testdir),
-                run.Raw('&&'),
-                './bootstrap',
-                ],
-            )
-        conf_fp = StringIO()
-        testswift_conf.write(conf_fp)
-        teuthology.write_file(
-            remote=remote,
-            path='{tdir}/archive/testswift.{client}.conf'.format(tdir=testdir, client=client),
-            data=conf_fp.getvalue(),
-            )
-    yield
-
-
-@contextlib.contextmanager
-def run_tests(ctx, config):
-    """
-    Run an individual Swift test.
-    """
-    assert isinstance(config, dict)
-    testdir = teuthology.get_testdir(ctx)
-    for client, client_config in config.iteritems():
-        args = [
-                'SWIFT_TEST_CONFIG_FILE={tdir}/archive/testswift.{client}.conf'.format(tdir=testdir, client=client),
-                '{tdir}/swift/virtualenv/bin/nosetests'.format(tdir=testdir),
-                '-w',
-                '{tdir}/swift/test/functional'.format(tdir=testdir),
-                '-v',
-                '-a', '!fails_on_rgw',
-                ]
-        if client_config is not None and 'extra_args' in client_config:
-            args.extend(client_config['extra_args'])
-
-        ctx.cluster.only(client).run(
-            args=args,
-            )
-    yield
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run the testswift suite against rgw.
-
-    To run all tests on all clients::
-
-        tasks:
-        - ceph:
-        - rgw:
-        - testswift:
-
-    To restrict testing to particular clients::
-
-        tasks:
-        - ceph:
-        - rgw: [client.0]
-        - testswift: [client.0]
-
-    To run against a server on client.1::
-
-        tasks:
-        - ceph:
-        - rgw: [client.1]
-        - testswift:
-            client.0:
-              rgw_server: client.1
-
-    To pass extra arguments to nose (e.g. to run a certain test)::
-
-        tasks:
-        - ceph:
-        - rgw: [client.0]
-        - testswift:
-            client.0:
-              extra_args: ['test.functional.tests:TestFileUTF8', '-m', 'testCopy']
-            client.1:
-              extra_args: ['--exclude', 'TestFile']
-    """
-    assert config is None or isinstance(config, list) \
-        or isinstance(config, dict), \
-        "task testswift only supports a list or dictionary for configuration"
-    all_clients = ['client.{id}'.format(id=id_)
-                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-    if config is None:
-        config = all_clients
-    if isinstance(config, list):
-        config = dict.fromkeys(config)
-    clients = config.keys()
-
-    log.info('clients={c}'.format(c=clients))
-
-    testswift_conf = {}
-    for client in clients:
-        testswift_conf[client] = ConfigObj(
-                indent_type='',
-                infile={
-                    'func_test':
-                        {
-                        'auth_port'      : 7280,
-                        'auth_ssl' : 'no',
-                        'auth_prefix' : '/auth/',
-                        },
-                    }
-                )
-
-    with contextutil.nested(
-        lambda: download(ctx=ctx, config=clients),
-        lambda: create_users(ctx=ctx, config=dict(
-                clients=clients,
-                testswift_conf=testswift_conf,
-                )),
-        lambda: configure(ctx=ctx, config=dict(
-                clients=config,
-                testswift_conf=testswift_conf,
-                )),
-        lambda: run_tests(ctx=ctx, config=config),
-        ):
-        pass
-    yield
diff --git a/teuthology/task/tasktest.py b/teuthology/task/tasktest.py
deleted file mode 100644 (file)
index 74a12c2..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-"""
-Parallel and sequential task tester.  Not used by any ceph tests, but used to
-unit test the parallel and sequential tasks
-"""
-import logging
-import contextlib
-import time
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Task that just displays information when it is create and when it is
-    destroyed/cleaned up.  This task was used to test parallel and
-    sequential task options.
-
-    example:
-
-    tasks:
-    - sequential:
-        - tasktest:
-            - id: 'foo'
-        - tasktest:
-            - id: 'bar'
-            - delay:5
-    - tasktest:
-
-    The above yaml will sequentially start a test task named foo and a test
-    task named bar.  Bar will take 5 seconds to complete.  After foo and bar
-    have finished, an unidentified tasktest task will run.
-    """
-    try:
-        delay = config.get('delay', 0)
-        id = config.get('id', 'UNKNOWN')
-    except AttributeError:
-        delay = 0
-        id = 'UNKNOWN'
-    try:
-        log.info('**************************************************')
-        log.info('Started task test -- %s' % id)
-        log.info('**************************************************')
-        time.sleep(delay)
-        yield
-
-    finally:
-        log.info('**************************************************')
-        log.info('Task test is being cleaned up -- %s' % id)
-        log.info('**************************************************')
-
diff --git a/teuthology/task/test/__init__.py b/teuthology/task/test/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/teuthology/task/test/test_devstack.py b/teuthology/task/test/test_devstack.py
deleted file mode 100644 (file)
index 117b307..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-from textwrap import dedent
-
-from .. import devstack
-
-
-class TestDevstack(object):
-    def test_parse_os_table(self):
-        table_str = dedent("""
-            +---------------------+--------------------------------------+
-            |       Property      |                Value                 |
-            +---------------------+--------------------------------------+
-            |     attachments     |                  []                  |
-            |  availability_zone  |                 nova                 |
-            |       bootable      |                false                 |
-            |      created_at     |      2014-02-21T17:14:47.548361      |
-            | display_description |                 None                 |
-            |     display_name    |                 NAME                 |
-            |          id         | ffdbd1bb-60dc-4d95-acfe-88774c09ad3e |
-            |       metadata      |                  {}                  |
-            |         size        |                  1                   |
-            |     snapshot_id     |                 None                 |
-            |     source_volid    |                 None                 |
-            |        status       |               creating               |
-            |     volume_type     |                 None                 |
-            +---------------------+--------------------------------------+
-            """).strip()
-        expected = {
-            'Property': 'Value',
-            'attachments': '[]',
-            'availability_zone': 'nova',
-            'bootable': 'false',
-            'created_at': '2014-02-21T17:14:47.548361',
-            'display_description': 'None',
-            'display_name': 'NAME',
-            'id': 'ffdbd1bb-60dc-4d95-acfe-88774c09ad3e',
-            'metadata': '{}',
-            'size': '1',
-            'snapshot_id': 'None',
-            'source_volid': 'None',
-            'status': 'creating',
-            'volume_type': 'None'}
-
-        vol_info = devstack.parse_os_table(table_str)
-        assert vol_info == expected
-
-
-
-
diff --git a/teuthology/task/tgt.py b/teuthology/task/tgt.py
deleted file mode 100644 (file)
index c2b322e..0000000
+++ /dev/null
@@ -1,177 +0,0 @@
-"""
-Task to handle tgt
-
-Assumptions made:
-    The ceph-extras tgt package may need to get installed.
-    The open-iscsi package needs to get installed.
-"""
-import logging
-import contextlib
-
-from teuthology import misc as teuthology
-from teuthology import contextutil
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def start_tgt_remotes(ctx, start_tgtd):
-    """
-    This subtask starts up a tgtd on the clients specified
-    """
-    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
-    tgtd_list = []
-    for rem, roles in remotes.iteritems():
-        for _id in roles:
-            if _id in start_tgtd:
-                if not rem in tgtd_list:
-                    tgtd_list.append(rem)
-                    size = ctx.config.get('image_size', 10240)
-                    rem.run(
-                        args=[
-                            'rbd',
-                            'create',
-                            'iscsi-image',
-                            '--size',
-                            str(size),
-                    ])
-                    rem.run(
-                        args=[
-                            'sudo',
-                            'tgtadm',
-                            '--lld',
-                            'iscsi',
-                            '--mode',
-                            'target',
-                            '--op',
-                            'new',
-                            '--tid',
-                            '1',
-                            '--targetname',
-                            'rbd',
-                        ])
-                    rem.run(
-                        args=[
-                            'sudo',
-                            'tgtadm',
-                            '--lld',
-                            'iscsi',
-                            '--mode',
-                            'logicalunit',
-                            '--op',
-                            'new',
-                            '--tid',
-                            '1',
-                            '--lun',
-                            '1',
-                            '--backing-store',
-                            'iscsi-image',
-                            '--bstype',
-                            'rbd',
-                        ])
-                    rem.run(
-                        args=[
-                            'sudo',
-                            'tgtadm',
-                            '--lld',
-                            'iscsi',
-                            '--op',
-                            'bind',
-                            '--mode',
-                            'target',
-                            '--tid',
-                            '1',
-                            '-I',
-                            'ALL',
-                        ])
-    try:
-        yield
-
-    finally:
-        for rem in tgtd_list:
-            rem.run(
-                args=[
-                    'sudo',
-                    'tgtadm',
-                    '--lld',
-                    'iscsi',
-                    '--mode',
-                    'target',
-                    '--op',
-                    'delete',
-                    '--force',
-                    '--tid',
-                    '1',
-                ])
-            rem.run(
-                args=[
-                    'rbd',
-                    'snap',
-                    'purge',
-                    'iscsi-image',
-                ])
-            rem.run(
-                args=[
-                    'sudo',
-                    'rbd',
-                    'rm',
-                    'iscsi-image',
-                ])
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Start up tgt.
-
-    To start on on all clients::
-
-        tasks:
-        - ceph:
-        - tgt:
-
-    To start on certain clients::
-
-        tasks:
-        - ceph:
-        - tgt: [client.0, client.3]
-
-    or
-
-        tasks:
-        - ceph:
-        - tgt:
-            client.0:
-            client.3:
-
-    An image blocksize size can also be specified::
-        
-        tasks:
-        - ceph:
-        - tgt:
-            image_size = 20480
-
-    The general flow of things here is:
-        1. Find clients on which tgt is supposed to run (start_tgtd)
-        2. Remotely start up tgt daemon
-    On cleanup:
-        3. Stop tgt daemon
-
-    The iscsi administration is handled by the iscsi task.
-    """
-    if config:
-        config = {key : val for key, val in config.items()
-                if key.startswith('client')}
-    # config at this point should only contain keys starting with 'client'
-    start_tgtd = []
-    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
-    log.info(remotes)
-    if not config:
-        start_tgtd = ['client.{id}'.format(id=id_)
-            for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
-    else:
-        start_tgtd = config
-    log.info(start_tgtd)
-    with contextutil.nested(
-            lambda: start_tgt_remotes(ctx=ctx, start_tgtd=start_tgtd),):
-        yield
diff --git a/teuthology/task/thrashosds.py b/teuthology/task/thrashosds.py
deleted file mode 100644 (file)
index ba166ed..0000000
+++ /dev/null
@@ -1,179 +0,0 @@
-"""
-Thrash -- Simulate random osd failures.
-"""
-import contextlib
-import logging
-import ceph_manager
-from teuthology import misc as teuthology
-
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    "Thrash" the OSDs by randomly marking them out/down (and then back
-    in) until the task is ended. This loops, and every op_delay
-    seconds it randomly chooses to add or remove an OSD (even odds)
-    unless there are fewer than min_out OSDs out of the cluster, or
-    more than min_in OSDs in the cluster.
-
-    All commands are run on mon0 and it stops when __exit__ is called.
-
-    The config is optional, and is a dict containing some or all of:
-
-    min_in: (default 3) the minimum number of OSDs to keep in the
-       cluster
-
-    min_out: (default 0) the minimum number of OSDs to keep out of the
-       cluster
-
-    op_delay: (5) the length of time to sleep between changing an
-       OSD's status
-
-    min_dead: (0) minimum number of osds to leave down/dead.
-
-    max_dead: (0) maximum number of osds to leave down/dead before waiting
-       for clean.  This should probably be num_replicas - 1.
-
-    clean_interval: (60) the approximate length of time to loop before
-       waiting until the cluster goes clean. (In reality this is used
-       to probabilistically choose when to wait, and the method used
-       makes it closer to -- but not identical to -- the half-life.)
-
-    scrub_interval: (-1) the approximate length of time to loop before
-       waiting until a scrub is performed while cleaning. (In reality
-       this is used to probabilistically choose when to wait, and it
-       only applies to the cases where cleaning is being performed). 
-       -1 is used to indicate that no scrubbing will be done.
-  
-    chance_down: (0.4) the probability that the thrasher will mark an
-       OSD down rather than marking it out. (The thrasher will not
-       consider that OSD out of the cluster, since presently an OSD
-       wrongly marked down will mark itself back up again.) This value
-       can be either an integer (eg, 75) or a float probability (eg
-       0.75).
-
-    chance_test_min_size: (0) chance to run test_pool_min_size,
-       which:
-       - kills all but one osd
-       - waits
-       - kills that osd
-       - revives all other osds
-       - verifies that the osds fully recover
-
-    timeout: (360) the number of seconds to wait for the cluster
-       to become clean after each cluster change. If this doesn't
-       happen within the timeout, an exception will be raised.
-
-    revive_timeout: (75) number of seconds to wait for an osd asok to
-       appear after attempting to revive the osd
-
-    thrash_primary_affinity: (true) randomly adjust primary-affinity
-
-    chance_pgnum_grow: (0) chance to increase a pool's size
-    chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool
-    pool_grow_by: (10) amount to increase pgnum by
-    max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd
-
-    pause_short: (3) duration of short pause
-    pause_long: (80) duration of long pause
-    pause_check_after: (50) assert osd down after this long
-    chance_inject_pause_short: (1) chance of injecting short stall
-    chance_inject_pause_long: (0) chance of injecting long stall
-
-    clean_wait: (0) duration to wait before resuming thrashing once clean
-
-    powercycle: (false) whether to power cycle the node instead
-        of just the osd process. Note that this assumes that a single
-        osd is the only important process on the node.
-
-    chance_test_backfill_full: (0) chance to simulate full disks stopping
-        backfill
-
-    chance_test_map_discontinuity: (0) chance to test map discontinuity
-    map_discontinuity_sleep_time: (40) time to wait for map trims
-
-    example:
-
-    tasks:
-    - ceph:
-    - thrashosds:
-        chance_down: 10
-        op_delay: 3
-        min_in: 1
-        timeout: 600
-    - interactive:
-    """
-    if config is None:
-        config = {}
-    assert isinstance(config, dict), \
-        'thrashosds task only accepts a dict for configuration'
-
-    if 'powercycle' in config:
-
-        # sync everyone first to avoid collateral damage to / etc.
-        log.info('Doing preliminary sync to avoid collateral damage...')
-        ctx.cluster.run(args=['sync'])
-
-        if 'ipmi_user' in ctx.teuthology_config:
-            for t, key in ctx.config['targets'].iteritems():
-                host = t.split('@')[-1]
-                shortname = host.split('.')[0]
-                from ..orchestra import remote as oremote
-                console = oremote.getRemoteConsole(
-                    name=host,
-                    ipmiuser=ctx.teuthology_config['ipmi_user'],
-                    ipmipass=ctx.teuthology_config['ipmi_password'],
-                    ipmidomain=ctx.teuthology_config['ipmi_domain'])
-                cname = '{host}.{domain}'.format(
-                    host=shortname,
-                    domain=ctx.teuthology_config['ipmi_domain'])
-                log.debug('checking console status of %s' % cname)
-                if not console.check_status():
-                    log.info(
-                        'Failed to get console status for '
-                        '%s, disabling console...'
-                        % cname)
-                    console=None
-                else:
-                    # find the remote for this console and add it
-                    remotes = [
-                        r for r in ctx.cluster.remotes.keys() if r.name == t]
-                    if len(remotes) != 1:
-                        raise Exception(
-                            'Too many (or too few) remotes '
-                            'found for target {t}'.format(t=t))
-                    remotes[0].console = console
-                    log.debug('console ready on %s' % cname)
-
-            # check that all osd remotes have a valid console
-            osds = ctx.cluster.only(teuthology.is_type('osd'))
-            for remote, _ in osds.remotes.iteritems():
-                if not remote.console:
-                    raise Exception(
-                        'IPMI console required for powercycling, '
-                        'but not available on osd role: {r}'.format(
-                            r=remote.name))
-
-    log.info('Beginning thrashosds...')
-    first_mon = teuthology.get_first_mon(ctx, config)
-    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-    manager = ceph_manager.CephManager(
-        mon,
-        ctx=ctx,
-        config=config,
-        logger=log.getChild('ceph_manager'),
-        )
-    ctx.manager = manager
-    thrash_proc = ceph_manager.Thrasher(
-        manager,
-        config,
-        logger=log.getChild('thrasher')
-        )
-    try:
-        yield
-    finally:
-        log.info('joining thrashosds')
-        thrash_proc.do_join()
-        manager.wait_for_recovery(config.get('timeout', 360))
diff --git a/teuthology/task/timer.py b/teuthology/task/timer.py
deleted file mode 100644 (file)
index d47830f..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-"""
-Timer task
-"""
-import logging
-import contextlib
-import datetime
-
-log = logging.getLogger(__name__)
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Timer
-
-    Measure the time that this set of tasks takes and save that value in the summary file.
-    Config is a description of what we are timing.
-
-    example:
-
-    tasks:
-    - ceph:
-    - foo:
-    - timer: "fsx run"
-    - fsx:
-
-    """
-    start = datetime.datetime.now()
-    log.debug("got here in timer")
-    try:
-        yield
-    finally:
-        nowinfo = datetime.datetime.now()
-        elapsed = nowinfo - start
-        datesaved = nowinfo.isoformat(' ')
-        hourz, remainder = divmod(elapsed.seconds, 3600)
-        minutez, secondz = divmod(remainder, 60)
-        elapsedtime = "%02d:%02d:%02d.%06d" % (hourz,minutez,secondz, elapsed.microseconds)
-        dateinfo = (datesaved, elapsedtime)
-        if not 'timer' in ctx.summary:
-            ctx.summary['timer'] = {config : [dateinfo]}
-        else:
-            if config in ctx.summary['timer']:
-                ctx.summary['timer'][config].append(dateinfo)
-            else:
-                ctx.summary['timer'][config] = [dateinfo]
-        log.info('Elapsed time for %s -- %s' % (config,elapsedtime))
diff --git a/teuthology/task/userdata_setup.yaml b/teuthology/task/userdata_setup.yaml
deleted file mode 100644 (file)
index eaa5f73..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-#cloud-config-archive
-
-- type: text/cloud-config
-  content: |
-    output:
-      all: '| tee -a /var/log/cloud-init-output.log'
-
-# allow passwordless access for debugging
-- |
-  #!/bin/bash
-  exec passwd -d ubuntu
-
-- |
-  #!/bin/bash
-
-  # mount a 9p fs for storing logs
-  mkdir /mnt/log
-  mount -t 9p -o trans=virtio test_log /mnt/log
-
-  # mount the iso image that has the test script
-  mkdir /mnt/cdrom
-  mount -t auto /dev/cdrom /mnt/cdrom
diff --git a/teuthology/task/userdata_teardown.yaml b/teuthology/task/userdata_teardown.yaml
deleted file mode 100644 (file)
index 7f3d64f..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-- |
-  #!/bin/bash
-  cp /var/log/cloud-init-output.log /mnt/log
-
-- |
-  #!/bin/bash
-  umount /mnt/log
-
-- |
-  #!/bin/bash
-  shutdown -h -P now
diff --git a/teuthology/task/valgrind.supp b/teuthology/task/valgrind.supp
deleted file mode 100644 (file)
index 4dfe04a..0000000
+++ /dev/null
@@ -1,182 +0,0 @@
-{
-   osd: ignore ec plugin loading (FIXME SOMEDAY)
-   Memcheck:Leak
-   ...
-   fun:*ErasureCodePluginRegistry*load*
-   ...
-}
-{
-   tcmalloc: msync heap allocation points to uninit bytes (precise)
-   Memcheck:Param
-   msync(start)
-   obj:/lib/x86_64-linux-gnu/libpthread-2.15.so
-   obj:/usr/lib/libunwind.so.7.0.0
-   fun:_ULx86_64_step
-   fun:_Z13GetStackTracePPvii
-   fun:_ZN8tcmalloc8PageHeap8GrowHeapEm
-   fun:_ZN8tcmalloc8PageHeap3NewEm
-   obj:/usr/lib/libtcmalloc.so.0.1.0
-}
-{
-   tcmalloc: msync heap allocation points to uninit bytes (trusty)
-   Memcheck:Param
-   msync(start)
-   obj:/lib/x86_64-linux-gnu/libpthread-2.19.so
-   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
-   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
-   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
-   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
-   fun:_ULx86_64_step
-   fun:_Z13GetStackTracePPvii
-   fun:_ZN8tcmalloc8PageHeap8GrowHeapEm
-   fun:_ZN8tcmalloc8PageHeap3NewEm
-   fun:_ZN8tcmalloc15CentralFreeList8PopulateEv
-   fun:_ZN8tcmalloc15CentralFreeList18FetchFromSpansSafeEv
-   fun:_ZN8tcmalloc15CentralFreeList11RemoveRangeEPPvS2_i
-}
-{
-   tcmalloc: msync heap allocation points to uninit bytes 2 (trusty)
-   Memcheck:Param
-   msync(start)
-   fun:__msync_nocancel
-   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
-   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
-   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
-   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
-   fun:_ULx86_64_step
-   fun:_Z13GetStackTracePPvii
-   fun:_ZN8tcmalloc8PageHeap8GrowHeapEm
-   fun:_ZN8tcmalloc8PageHeap3NewEm
-   fun:_ZN8tcmalloc15CentralFreeList8PopulateEv
-   fun:_ZN8tcmalloc15CentralFreeList18FetchFromSpansSafeEv
-   fun:_ZN8tcmalloc15CentralFreeList11RemoveRangeEPPvS2_i
-}
-{
-       tcmalloc: string
-       Memcheck:Leak
-       ...
-       obj:*tcmalloc*
-       fun:call_init.part.0
-       ...
-}
-{
-       ceph global: deliberate onexit leak
-       Memcheck:Leak
-       ...
-       fun:*set_flush_on_exit*
-       ...
-}
-{
-       libleveldb: ignore all static leveldb leaks
-       Memcheck:Leak
-       ...
-       fun:*leveldb*
-       ...
-}
-{
-       libleveldb: ignore all dynamic libleveldb leaks
-       Memcheck:Leak
-       ...
-       obj:*libleveldb.so*
-       ...
-}
-{
-       libcurl: ignore libcurl leaks
-       Memcheck:Leak
-       ...
-       fun:*curl_global_init
-}
-{
-       ignore gnutls leaks
-       Memcheck:Leak
-       ...
-       fun:gnutls_global_init
-}
-{
-       ignore libfcgi leak; OS_LibShutdown has no callers!
-       Memcheck:Leak
-       ...
-       fun:OS_LibInit
-       fun:FCGX_Init
-}
-{
-       ignore libnss3 leaks
-       Memcheck:Leak
-       ...
-       obj:*libnss3*
-       ...
-}
-{
-        strptime suckage
-        Memcheck:Cond
-        fun:__GI___strncasecmp_l
-        fun:__strptime_internal
-        ...
-}
-{
-        strptime suckage 2
-        Memcheck:Value8
-        fun:__GI___strncasecmp_l
-        fun:__strptime_internal
-        ...
-}
-{
-        strptime suckage 3
-        Memcheck:Addr8
-        fun:__GI___strncasecmp_l
-        fun:__strptime_internal
-        ...
-}
-{
-       inet_ntop does something lame on local stack
-       Memcheck:Value8
-       ...
-       fun:inet_ntop
-       ...
-}
-{
-       inet_ntop does something lame on local stack
-       Memcheck:Addr8
-       ...
-       fun:inet_ntop
-       ...
-}
-{
-       dl-lookup.c thing .. Invalid write of size 8
-       Memcheck:Value8
-       fun:do_lookup_x
-       ...
-       fun:_dl_lookup_symbol_x
-       ...
-}
-{
-       dl-lookup.c thing .. Invalid write of size 8
-       Memcheck:Addr8
-       fun:do_lookup_x
-       ...
-       fun:_dl_lookup_symbol_x
-       ...
-}
-{
-       weird thing from libc
-       Memcheck:Leak
-       ...
-       fun:*sub_I_comparator*
-       fun:__libc_csu_init
-       ...
-}
-{
-       libfuse leak
-       Memcheck:Leak
-       ...
-       fun:fuse_parse_cmdline
-       ...
-}
-{
-       boost thread leaks on exit
-       Memcheck:Leak
-       ...
-       fun:*boost*detail*
-       ...
-       fun:exit
-}
diff --git a/teuthology/task/watch_notify_stress.py b/teuthology/task/watch_notify_stress.py
deleted file mode 100644 (file)
index ab611c3..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-test_stress_watch task
-"""
-import contextlib
-import logging
-import proc_thrasher
-
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-    Run test_stress_watch
-
-    The config should be as follows:
-
-    test_stress_watch:
-        clients: [client list]
-
-    example:
-
-    tasks:
-    - ceph:
-    - test_stress_watch:
-        clients: [client.0]
-    - interactive:
-    """
-    log.info('Beginning test_stress_watch...')
-    assert isinstance(config, dict), \
-        "please list clients to run on"
-    testwatch = {}
-
-    remotes = []
-
-    for role in config.get('clients', ['client.0']):
-        assert isinstance(role, basestring)
-        PREFIX = 'client.'
-        assert role.startswith(PREFIX)
-        id_ = role[len(PREFIX):]
-        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-        remotes.append(remote)
-
-        args =['CEPH_CLIENT_ID={id_}'.format(id_=id_),
-               'CEPH_ARGS="{flags}"'.format(flags=config.get('flags', '')),
-               'daemon-helper',
-               'kill',
-               'multi_stress_watch foo foo'
-               ]
-
-        log.info("args are %s" % (args,))
-
-        proc = proc_thrasher.ProcThrasher({}, remote,
-            args=[run.Raw(i) for i in args],
-            logger=log.getChild('testwatch.{id}'.format(id=id_)),
-            stdin=run.PIPE,
-            wait=False
-            )
-        proc.start()
-        testwatch[id_] = proc
-
-    try:
-        yield
-    finally:
-        log.info('joining watch_notify_stress')
-        for i in testwatch.itervalues():
-            i.join()
diff --git a/teuthology/task/workunit.py b/teuthology/task/workunit.py
deleted file mode 100644 (file)
index b504eeb..0000000
+++ /dev/null
@@ -1,372 +0,0 @@
-"""
-Workunit task -- Run ceph on sets of specific clients
-"""
-import logging
-import pipes
-import os
-
-from teuthology import misc as teuthology
-from teuthology.parallel import parallel
-from ..orchestra import run
-
-log = logging.getLogger(__name__)
-
-
-def task(ctx, config):
-    """
-    Run ceph on all workunits found under the specified path.
-
-    For example::
-
-        tasks:
-        - ceph:
-        - ceph-fuse: [client.0]
-        - workunit:
-            clients:
-              client.0: [direct_io, xattrs.sh]
-              client.1: [snaps]
-            branch: foo
-
-    You can also run a list of workunits on all clients:
-        tasks:
-        - ceph:
-        - ceph-fuse:
-        - workunit:
-            tag: v0.47
-            clients:
-              all: [direct_io, xattrs.sh, snaps]
-
-    If you have an "all" section it will run all the workunits
-    on each client simultaneously, AFTER running any workunits specified
-    for individual clients. (This prevents unintended simultaneous runs.)
-
-    To customize tests, you can specify environment variables as a dict. You
-    can also specify a time limit for each work unit (defaults to 3h):
-
-        tasks:
-        - ceph:
-        - ceph-fuse:
-        - workunit:
-            sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6
-            clients:
-              all: [snaps]
-            env:
-              FOO: bar
-              BAZ: quux
-            timeout: 3h
-
-    :param ctx: Context
-    :param config: Configuration
-    """
-    assert isinstance(config, dict)
-    assert isinstance(config.get('clients'), dict), \
-        'configuration must contain a dictionary of clients'
-
-    overrides = ctx.config.get('overrides', {})
-    teuthology.deep_merge(config, overrides.get('workunit', {}))
-
-    refspec = config.get('branch')
-    if refspec is None:
-        refspec = config.get('sha1')
-    if refspec is None:
-        refspec = config.get('tag')
-    if refspec is None:
-        refspec = 'HEAD'
-
-    timeout = config.get('timeout', '3h')
-
-    log.info('Pulling workunits from ref %s', refspec)
-
-    created_dir_dict = {}
-
-    if config.get('env') is not None:
-        assert isinstance(config['env'], dict), 'env must be a dictionary'
-    clients = config['clients']
-    log.info('Making a separate scratch dir for every client...')
-    for role in clients.iterkeys():
-        assert isinstance(role, basestring)
-        if role == "all":
-            continue
-        PREFIX = 'client.'
-        assert role.startswith(PREFIX)
-        created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir'))
-        created_dir_dict[role] = created_mnt_dir
-
-    all_spec = False #is there an all grouping?
-    with parallel() as p:
-        for role, tests in clients.iteritems():
-            if role != "all":
-                p.spawn(_run_tests, ctx, refspec, role, tests,
-                        config.get('env'), timeout=timeout)
-            else:
-                all_spec = True
-
-    if all_spec:
-        all_tasks = clients["all"]
-        _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'),
-                              config.get('subdir'), timeout=timeout)
-
-    for role in clients.iterkeys():
-        assert isinstance(role, basestring)
-        if role == "all":
-            continue
-        PREFIX = 'client.'
-        assert role.startswith(PREFIX)
-        if created_dir_dict[role]:
-            _delete_dir(ctx, role)
-
-
-def _delete_dir(ctx, role):
-    """
-    Delete file used by this role, and delete the directory that this
-    role appeared in.
-
-    :param ctx: Context
-    :param role: "role.#" where # is used for the role id.
-    """
-    PREFIX = 'client.'
-    testdir = teuthology.get_testdir(ctx)
-    id_ = role[len(PREFIX):]
-    (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-    mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-    # Is there any reason why this is not: join(mnt, role) ?
-    client = os.path.join(mnt, 'client.{id}'.format(id=id_))
-    try:
-        remote.run(
-            args=[
-                'rm',
-                '-rf',
-                '--',
-                client,
-                ],
-            )
-        log.info("Deleted dir {dir}".format(dir=client))
-    except Exception:
-        log.exception("Caught an exception deleting dir {dir}".format(dir=client))
-
-    try:
-        remote.run(
-            args=[
-                'rmdir',
-                '--',
-                mnt,
-                ],
-            )
-        log.info("Deleted dir {dir}".format(dir=mnt))
-    except Exception:
-        log.exception("Caught an exception deleting dir {dir}".format(dir=mnt))
-
-def _make_scratch_dir(ctx, role, subdir):
-    """
-    Make scratch directories for this role.  This also makes the mount
-    point if that directory does not exist.
-
-    :param ctx: Context
-    :param role: "role.#" where # is used for the role id.
-    :param subdir: use this subdir (False if not used)
-    """
-    retVal = False
-    PREFIX = 'client.'
-    id_ = role[len(PREFIX):]
-    log.debug("getting remote for {id} role {role_}".format(id=id_, role_=role))
-    (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-    dir_owner = remote.shortname.split('@', 1)[0]
-    mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_))
-    # if neither kclient nor ceph-fuse are required for a workunit,
-    # mnt may not exist. Stat and create the directory if it doesn't.
-    try:
-        remote.run(
-            args=[
-                'stat',
-                '--',
-                mnt,
-                ],
-            )
-        log.info('Did not need to create dir {dir}'.format(dir=mnt))
-    except Exception:
-        remote.run(
-            args=[
-                'mkdir',
-                '--',
-                mnt,
-                ],
-            )
-        log.info('Created dir {dir}'.format(dir=mnt))
-        retVal = True
-
-    if not subdir: subdir = 'client.{id}'.format(id=id_)
-    if retVal:
-        remote.run(
-            args=[
-                'cd',
-                '--',
-                mnt,
-                run.Raw('&&'),
-                'mkdir',
-                '--',
-                subdir,
-                ],
-            )
-    else:
-        remote.run(
-            args=[
-                # cd first so this will fail if the mount point does
-                # not exist; pure install -d will silently do the
-                # wrong thing
-                'cd',
-                '--',
-                mnt,
-                run.Raw('&&'),
-                'sudo',
-                'install',
-                '-d',
-                '-m', '0755',
-                '--owner={user}'.format(user=dir_owner),
-                '--',
-                subdir,
-                ],
-            )
-
-    return retVal
-
-
-def _spawn_on_all_clients(ctx, refspec, tests, env, subdir, timeout=None):
-    """
-    Make a scratch directory for each client in the cluster, and then for each
-    test spawn _run_tests() for each role.
-
-    See run_tests() for parameter documentation.
-    """
-    client_generator = teuthology.all_roles_of_type(ctx.cluster, 'client')
-    client_remotes = list()
-    for client in client_generator:
-        (client_remote,) = ctx.cluster.only('client.{id}'.format(id=client)).remotes.iterkeys()
-        client_remotes.append((client_remote, 'client.{id}'.format(id=client)))
-        _make_scratch_dir(ctx, "client.{id}".format(id=client), subdir)
-
-    for unit in tests:
-        with parallel() as p:
-            for remote, role in client_remotes:
-                p.spawn(_run_tests, ctx, refspec, role, [unit], env, subdir,
-                        timeout=timeout)
-
-    # cleanup the generated client directories
-    client_generator = teuthology.all_roles_of_type(ctx.cluster, 'client')
-    for client in client_generator:
-        _delete_dir(ctx, 'client.{id}'.format(id=client))
-
-
-def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None):
-    """
-    Run the individual test. Create a scratch directory and then extract the
-    workunits from git. Make the executables, and then run the tests.
-    Clean up (remove files created) after the tests are finished.
-
-    :param ctx:     Context
-    :param refspec: branch, sha1, or version tag used to identify this
-                    build
-    :param tests:   specific tests specified.
-    :param env:     environment set in yaml file.  Could be None.
-    :param subdir:  subdirectory set in yaml file.  Could be None
-    :param timeout: If present, use the 'timeout' command on the remote host
-                    to limit execution time. Must be specified by a number
-                    followed by 's' for seconds, 'm' for minutes, 'h' for
-                    hours, or 'd' for days. If '0' or anything that evaluates
-                    to False is passed, the 'timeout' command is not used.
-    """
-    testdir = teuthology.get_testdir(ctx)
-    assert isinstance(role, basestring)
-    PREFIX = 'client.'
-    assert role.startswith(PREFIX)
-    id_ = role[len(PREFIX):]
-    (remote,) = ctx.cluster.only(role).remotes.iterkeys()
-    mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
-    # subdir so we can remove and recreate this a lot without sudo
-    if subdir is None:
-        scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp')
-    else:
-        scratch_tmp = os.path.join(mnt, subdir)
-    srcdir = '{tdir}/workunit.{role}'.format(tdir=testdir, role=role)
-
-    remote.run(
-        logger=log.getChild(role),
-        args=[
-            'mkdir', '--', srcdir,
-            run.Raw('&&'),
-            'git',
-            'archive',
-            '--remote=git://ceph.newdream.net/git/ceph.git',
-            '%s:qa/workunits' % refspec,
-            run.Raw('|'),
-            'tar',
-            '-C', srcdir,
-            '-x',
-            '-f-',
-            run.Raw('&&'),
-            'cd', '--', srcdir,
-            run.Raw('&&'),
-            'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi',
-            run.Raw('&&'),
-            'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir),
-            run.Raw('>{tdir}/workunits.list'.format(tdir=testdir)),
-            ],
-        )
-
-    workunits = sorted(teuthology.get_file(
-                            remote,
-                            '{tdir}/workunits.list'.format(tdir=testdir)).split('\0'))
-    assert workunits
-
-    try:
-        assert isinstance(tests, list)
-        for spec in tests:
-            log.info('Running workunits matching %s on %s...', spec, role)
-            prefix = '{spec}/'.format(spec=spec)
-            to_run = [w for w in workunits if w == spec or w.startswith(prefix)]
-            if not to_run:
-                raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec))
-            for workunit in to_run:
-                log.info('Running workunit %s...', workunit)
-                args = [
-                    'mkdir', '-p', '--', scratch_tmp,
-                    run.Raw('&&'),
-                    'cd', '--', scratch_tmp,
-                    run.Raw('&&'),
-                    run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'),
-                    run.Raw('CEPH_REF={ref}'.format(ref=refspec)),
-                    run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
-                    run.Raw('CEPH_ID="{id}"'.format(id=id_)),
-                    ]
-                if env is not None:
-                    for var, val in env.iteritems():
-                        quoted_val = pipes.quote(val)
-                        env_arg = '{var}={val}'.format(var=var, val=quoted_val)
-                        args.append(run.Raw(env_arg))
-                args.extend([
-                    'adjust-ulimits',
-                    'ceph-coverage',
-                    '{tdir}/archive/coverage'.format(tdir=testdir)])
-                if timeout and timeout != '0':
-                    args.extend(['timeout', timeout])
-                args.extend([
-                    '{srcdir}/{workunit}'.format(
-                        srcdir=srcdir,
-                        workunit=workunit,
-                        ),
-                    ])
-                remote.run(
-                    logger=log.getChild(role),
-                    args=args,
-                    )
-                remote.run(
-                    logger=log.getChild(role),
-                    args=['sudo', 'rm', '-rf', '--', scratch_tmp],
-                    )
-    finally:
-        log.info('Stopping %s on %s...', spec, role)
-        remote.run(
-            logger=log.getChild(role),
-            args=[
-                'rm', '-rf', '--', '{tdir}/workunits.list'.format(tdir=testdir), srcdir,
-                ],
-            )
diff --git a/teuthology/task_util/__init__.py b/teuthology/task_util/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/teuthology/task_util/kclient.py b/teuthology/task_util/kclient.py
deleted file mode 100644 (file)
index c6a259f..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-from teuthology.misc import get_testdir
-from teuthology.orchestra import run
-
-
-def write_secret_file(ctx, remote, role, keyring, filename):
-    """
-    Stash the kerying in the filename specified.
-    """
-    testdir = get_testdir(ctx)
-    remote.run(
-        args=[
-            'adjust-ulimits',
-            'ceph-coverage',
-            '{tdir}/archive/coverage'.format(tdir=testdir),
-            'ceph-authtool',
-            '--name={role}'.format(role=role),
-            '--print-key',
-            keyring,
-            run.Raw('>'),
-            filename,
-            ],
-        )
diff --git a/teuthology/task_util/rados.py b/teuthology/task_util/rados.py
deleted file mode 100644 (file)
index f6a806c..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-import logging
-
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-def rados(ctx, remote, cmd, wait=True, check_status=False):
-    testdir = teuthology.get_testdir(ctx)
-    log.info("rados %s" % ' '.join(cmd))
-    pre = [
-        'adjust-ulimits',
-        'ceph-coverage',
-        '{tdir}/archive/coverage'.format(tdir=testdir),
-        'rados',
-        ];
-    pre.extend(cmd)
-    proc = remote.run(
-        args=pre,
-        check_status=check_status,
-        wait=wait,
-        )
-    if wait:
-        return proc.exitstatus
-    else:
-        return proc
-
-def create_ec_pool(remote, name, profile_name, pgnum, m=1, k=2):
-    remote.run(args=[
-        'ceph', 'osd', 'erasure-code-profile', 'set',
-        profile_name, 'm=' + str(m), 'k=' + str(k),
-        'ruleset-failure-domain=osd',
-        ])
-    remote.run(args=[
-        'ceph', 'osd', 'pool', 'create', name,
-        str(pgnum), str(pgnum), 'erasure', profile_name,
-        ])
-
-def create_replicated_pool(remote, name, pgnum):
-    remote.run(args=[
-        'ceph', 'osd', 'pool', 'create', name, str(pgnum), str(pgnum),
-        ])
-
-def create_cache_pool(remote, base_name, cache_name, pgnum, size):
-    remote.run(args=[
-            'ceph', 'osd', 'pool', 'create', cache_name, str(pgnum)
-            ])
-    remote.run(args=[
-            'ceph', 'osd', 'tier', 'add-cache', base_name, cache_name,
-            str(size),
-            ])
diff --git a/teuthology/task_util/rgw.py b/teuthology/task_util/rgw.py
deleted file mode 100644 (file)
index cbe3071..0000000
+++ /dev/null
@@ -1,153 +0,0 @@
-from cStringIO import StringIO
-import logging
-import json
-import requests
-from urlparse import urlparse
-
-from ..orchestra.connection import split_user
-from teuthology import misc as teuthology
-
-log = logging.getLogger(__name__)
-
-# simple test to indicate if multi-region testing should occur
-def multi_region_enabled(ctx):
-    # this is populated by the radosgw-agent task, seems reasonable to
-    # use that as an indicator that we're testing multi-region sync
-    return 'radosgw_agent' in ctx
-
-def rgwadmin(ctx, client, cmd, stdin=StringIO(), check_status=False):
-    log.info('rgwadmin: {client} : {cmd}'.format(client=client,cmd=cmd))
-    testdir = teuthology.get_testdir(ctx)
-    pre = [
-        'adjust-ulimits',
-        'ceph-coverage'.format(tdir=testdir),
-        '{tdir}/archive/coverage'.format(tdir=testdir),
-        'radosgw-admin'.format(tdir=testdir),
-        '--log-to-stderr',
-        '--format', 'json',
-        '-n',  client,
-        ]
-    pre.extend(cmd)
-    log.info('rgwadmin: cmd=%s' % pre)
-    (remote,) = ctx.cluster.only(client).remotes.iterkeys()
-    proc = remote.run(
-        args=pre,
-        check_status=check_status,
-        stdout=StringIO(),
-        stderr=StringIO(),
-        stdin=stdin,
-        )
-    r = proc.exitstatus
-    out = proc.stdout.getvalue()
-    j = None
-    if not r and out != '':
-        try:
-            j = json.loads(out)
-            log.info(' json result: %s' % j)
-        except ValueError:
-            j = out
-            log.info(' raw result: %s' % j)
-    return (r, j)
-
-def get_zone_host_and_port(ctx, client, zone):
-    _, region_map = rgwadmin(ctx, client, check_status=True,
-                             cmd=['-n', client, 'region-map', 'get'])
-    regions = region_map['regions']
-    for region in regions:
-        for zone_info in region['val']['zones']:
-            if zone_info['name'] == zone:
-                endpoint = urlparse(zone_info['endpoints'][0])
-                host, port = endpoint.hostname, endpoint.port
-                if port is None:
-                    port = 80
-                return host, port
-    assert False, 'no endpoint for zone {zone} found'.format(zone=zone)
-
-def get_master_zone(ctx, client):
-    _, region_map = rgwadmin(ctx, client, check_status=True,
-                             cmd=['-n', client, 'region-map', 'get'])
-    regions = region_map['regions']
-    for region in regions:
-        is_master = (region['val']['is_master'] == "true")
-        log.info('region={r} is_master={ism}'.format(r=region, ism=is_master))
-        if not is_master:
-          continue
-        master_zone = region['val']['master_zone']
-        log.info('master_zone=%s' % master_zone)
-        for zone_info in region['val']['zones']:
-            if zone_info['name'] == master_zone:
-                return master_zone
-    log.info('couldn\'t find master zone')
-    return None
-
-def get_master_client(ctx, clients):
-    master_zone = get_master_zone(ctx, clients[0]) # can use any client for this as long as system configured correctly
-    if not master_zone:
-        return None
-
-    for client in clients:
-        zone = zone_for_client(ctx, client)
-        if zone == master_zone:
-            return client
-
-    return None
-
-def get_zone_system_keys(ctx, client, zone):
-    _, zone_info = rgwadmin(ctx, client, check_status=True,
-                            cmd=['-n', client,
-                                 'zone', 'get', '--rgw-zone', zone])
-    system_key = zone_info['system_key']
-    return system_key['access_key'], system_key['secret_key']
-
-def zone_for_client(ctx, client):
-    ceph_config = ctx.ceph.conf.get('global', {})
-    ceph_config.update(ctx.ceph.conf.get('client', {}))
-    ceph_config.update(ctx.ceph.conf.get(client, {}))
-    return ceph_config.get('rgw zone')
-
-def region_for_client(ctx, client):
-    ceph_config = ctx.ceph.conf.get('global', {})
-    ceph_config.update(ctx.ceph.conf.get('client', {}))
-    ceph_config.update(ctx.ceph.conf.get(client, {}))
-    return ceph_config.get('rgw region')
-
-def radosgw_data_log_window(ctx, client):
-    ceph_config = ctx.ceph.conf.get('global', {})
-    ceph_config.update(ctx.ceph.conf.get('client', {}))
-    ceph_config.update(ctx.ceph.conf.get(client, {}))
-    return ceph_config.get('rgw data log window', 30)
-
-def radosgw_agent_sync_data(ctx, agent_host, agent_port, full=False):
-    log.info('sync agent {h}:{p}'.format(h=agent_host, p=agent_port))
-    method = "full" if full else "incremental"
-    return requests.post('http://{addr}:{port}/data/{method}'.format(addr = agent_host, port = agent_port, method = method))
-
-def radosgw_agent_sync_metadata(ctx, agent_host, agent_port, full=False):
-    log.info('sync agent {h}:{p}'.format(h=agent_host, p=agent_port))
-    method = "full" if full else "incremental"
-    return requests.post('http://{addr}:{port}/metadata/{method}'.format(addr = agent_host, port = agent_port, method = method))
-
-def radosgw_agent_sync_all(ctx, full=False, data=False):
-    if ctx.radosgw_agent.procs:
-        for agent_client, c_config in ctx.radosgw_agent.config.iteritems():
-            zone_for_client(ctx, agent_client)
-            sync_host, sync_port = get_sync_agent(ctx, agent_client)
-            log.debug('doing a sync via {host1}'.format(host1=sync_host))
-            radosgw_agent_sync_metadata(ctx, sync_host, sync_port, full)
-            if (data):
-                radosgw_agent_sync_data(ctx, sync_host, sync_port, full)
-
-def host_for_role(ctx, role):
-    for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']):
-        if role in roles:
-            _, host = split_user(target)
-            return host
-
-def get_sync_agent(ctx, source):
-    for task in ctx.config['tasks']:
-        if 'radosgw-agent' not in task:
-            continue
-        for client, conf in task['radosgw-agent'].iteritems():
-            if conf['src'] == source:
-                return host_for_role(ctx, source), conf.get('port', 8000)
-    return None, None
diff --git a/teuthology/test/__init__.py b/teuthology/test/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/teuthology/test/fake_archive.py b/teuthology/test/fake_archive.py
deleted file mode 100644 (file)
index 38d5864..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-import os
-import shutil
-import yaml
-import random
-
-
-class FakeArchive(object):
-    def __init__(self, archive_base="./test_archive"):
-        self.archive_base = archive_base
-
-    def get_random_metadata(self, run_name, job_id=None, hung=False):
-        """
-        Generate a random info dict for a fake job. If 'hung' is not True, also
-        generate a summary dict.
-
-        :param run_name:   Run name e.g. 'test_foo'
-        :param job_id: Job ID e.g. '12345'
-        :param hung:   Simulate a hung job e.g. don't return a summary.yaml
-        :return:       A dict with keys 'job_id', 'info' and possibly
-                       'summary', with corresponding values
-        """
-        rand = random.Random()
-
-        description = 'description for job with id %s' % job_id
-        owner = 'job@owner'
-        duration = rand.randint(1, 36000)
-        pid = rand.randint(1000, 99999)
-        job_id = rand.randint(1, 99999)
-
-        info = {
-            'description': description,
-            'job_id': job_id,
-            'run_name': run_name,
-            'owner': owner,
-            'pid': pid,
-        }
-
-        metadata = {
-            'info': info,
-            'job_id': job_id,
-        }
-
-        if not hung:
-            success = True if rand.randint(0, 1) != 1 else False
-
-            summary = {
-                'description': description,
-                'duration': duration,
-                'owner': owner,
-                'success': success,
-            }
-
-            if not success:
-                summary['failure_reason'] = 'Failure reason!'
-            metadata['summary'] = summary
-
-        return metadata
-
-    def setup(self):
-        if os.path.exists(self.archive_base):
-            shutil.rmtree(self.archive_base)
-        os.mkdir(self.archive_base)
-
-    def teardown(self):
-        shutil.rmtree(self.archive_base)
-
-    def populate_archive(self, run_name, jobs):
-        run_archive_dir = os.path.join(self.archive_base, run_name)
-        os.mkdir(run_archive_dir)
-        for job in jobs:
-            archive_dir = os.path.join(run_archive_dir, str(job['job_id']))
-            os.mkdir(archive_dir)
-
-            with file(os.path.join(archive_dir, 'info.yaml'), 'w') as yfile:
-                yaml.safe_dump(job['info'], yfile)
-
-            if 'summary' in job:
-                summary_path = os.path.join(archive_dir, 'summary.yaml')
-                with file(summary_path, 'w') as yfile:
-                    yaml.safe_dump(job['summary'], yfile)
-
-    def create_fake_run(self, run_name, job_count, yaml_path, num_hung=0):
-        """
-        Creates a fake run using run_name. Uses the YAML specified for each
-        job's config.yaml
-
-        Returns a list of job_ids
-        """
-        assert os.path.exists(yaml_path)
-        assert job_count > 0
-        jobs = []
-        made_hung = 0
-        for i in range(job_count):
-            if made_hung < num_hung:
-                jobs.append(self.get_random_metadata(run_name, hung=True))
-                made_hung += 1
-            else:
-                jobs.append(self.get_random_metadata(run_name, hung=False))
-            #job_config = yaml.safe_load(yaml_path)
-        self.populate_archive(run_name, jobs)
-        for job in jobs:
-            job_id = job['job_id']
-            job_yaml_path = os.path.join(self.archive_base, run_name,
-                                         str(job_id), 'config.yaml')
-            shutil.copyfile(yaml_path, job_yaml_path)
-        return jobs
-
diff --git a/teuthology/test/test_config.py b/teuthology/test/test_config.py
deleted file mode 100644 (file)
index f94ff35..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-from .. import config
-
-
-class TestConfig(object):
-    def test_get_ceph_git_base_default(self):
-        conf_obj = config.Config()
-        conf_obj.teuthology_yaml = ''
-        conf_obj.load_files()
-        assert conf_obj.ceph_git_base_url == "https://github.com/ceph/"
-
-    def test_set_ceph_git_base_via_private(self):
-        conf_obj = config.Config()
-        conf_obj._Config__conf['ceph_git_base_url'] = "git://ceph.com/"
-        assert conf_obj.ceph_git_base_url == "git://ceph.com/"
-
-    def test_set_nonstandard(self):
-        conf_obj = config.Config()
-        conf_obj.something = 'something else'
-        assert conf_obj.something == 'something else'
diff --git a/teuthology/test/test_contextutil.py b/teuthology/test/test_contextutil.py
deleted file mode 100644 (file)
index 2465459..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-from pytest import raises
-from teuthology import contextutil
-from logging import ERROR
-
-
-class TestSafeWhile(object):
-
-    def setup(self):
-        contextutil.log.setLevel(ERROR)
-        self.fake_sleep = lambda s: True
-        self.s_while = contextutil.safe_while
-
-    def test_6_5_10_deal(self):
-        with raises(contextutil.MaxWhileTries):
-            with self.s_while(_sleeper=self.fake_sleep) as proceed:
-                while proceed():
-                    pass
-
-    def test_6_0_1_deal(self):
-        with raises(contextutil.MaxWhileTries) as error:
-            with self.s_while(
-                tries=1,
-                _sleeper=self.fake_sleep
-            ) as proceed:
-                while proceed():
-                    pass
-
-        msg = error.value[0]
-        assert 'waiting for 6 seconds' in msg
-
-    def test_1_0_10_deal(self):
-        with raises(contextutil.MaxWhileTries) as error:
-            with self.s_while(
-                sleep=1,
-                _sleeper=self.fake_sleep
-            ) as proceed:
-                while proceed():
-                    pass
-
-        msg = error.value[0]
-        assert 'waiting for 10 seconds' in msg
-
-    def test_6_1_10_deal(self):
-        with raises(contextutil.MaxWhileTries) as error:
-            with self.s_while(
-                increment=1,
-                _sleeper=self.fake_sleep
-            ) as proceed:
-                while proceed():
-                    pass
-
-        msg = error.value[0]
-        assert 'waiting for 105 seconds' in msg
-
-    def test_action(self):
-        with raises(contextutil.MaxWhileTries) as error:
-            with self.s_while(
-                action='doing the thing',
-                _sleeper=self.fake_sleep
-            ) as proceed:
-                while proceed():
-                    pass
-
-        msg = error.value[0]
-        assert "'doing the thing'" in msg
-
-    def test_no_raise(self):
-        with self.s_while(_raise=False, _sleeper=self.fake_sleep) as proceed:
-            while proceed():
-                pass
-
-        assert True
diff --git a/teuthology/test/test_get_distro.py b/teuthology/test/test_get_distro.py
deleted file mode 100644 (file)
index 3ade547..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-from .. import misc as teuthology
-
-class Mock: pass
-
-class TestGetDistro(object):
-
-    def setup(self):
-        self.fake_ctx = Mock()
-        self.fake_ctx.config = {}
-        self.fake_ctx.os_type = 'ubuntu'
-
-    def test_default_distro(self):
-        distro = teuthology.get_distro(self.fake_ctx)
-        assert distro == 'ubuntu'
-
-    def test_argument(self):
-        self.fake_ctx.os_type = 'centos'
-        distro = teuthology.get_distro(self.fake_ctx)
-        assert distro == 'centos'
-
-    def test_teuth_config(self):
-        self.fake_ctx.config = {'os_type': 'fedora'}
-        distro = teuthology.get_distro(self.fake_ctx)
-        assert distro == 'fedora'
-
-    def test_teuth_config_downburst(self):
-        self.fake_ctx.config = {'downburst' : {'distro': 'sles'}}
-        distro = teuthology.get_distro(self.fake_ctx)
-        assert distro == 'sles'
diff --git a/teuthology/test/test_get_distro_version.py b/teuthology/test/test_get_distro_version.py
deleted file mode 100644 (file)
index e93b9b6..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-from .. import misc as teuthology
-
-class Mock: pass
-
-class TestGetDistroVersion(object):
-
-    def setup(self):
-        self.fake_ctx = Mock()
-        self.fake_ctx.config = {}
-        self.fake_ctx_noarg = Mock()
-        self.fake_ctx_noarg.config = {}
-        self.fake_ctx_noarg.os_version = None
-
-    def test_default_distro_version(self):
-        #Default distro is ubuntu, default version of ubuntu is 12.04
-        self.fake_ctx.os_version = None
-        distroversion = teuthology.get_distro_version(self.fake_ctx)
-        assert distroversion == '12.04'
-
-    def test_argument_version(self):
-        self.fake_ctx.os_version = '13.04'
-        distroversion = teuthology.get_distro_version(self.fake_ctx)
-        assert distroversion == '13.04'
-
-    def test_teuth_config_version(self):
-        #Argument takes precidence.
-        self.fake_ctx.os_version = '13.04'
-        self.fake_ctx.config = {'os_version': '13.10'}
-        distroversion = teuthology.get_distro_version(self.fake_ctx)
-        assert distroversion == '13.04'
-
-    def test_teuth_config_downburst_version(self):
-        #Argument takes precidence
-        self.fake_ctx.os_version = '13.10'
-        self.fake_ctx.config = {'downburst' : {'distroversion': '13.04'}}
-        distroversion = teuthology.get_distro_version(self.fake_ctx)
-        assert distroversion == '13.10'
-
-    def test_teuth_config_noarg_version(self):
-        self.fake_ctx_noarg.config = {'os_version': '13.04'}
-        distroversion = teuthology.get_distro_version(self.fake_ctx_noarg)
-        assert distroversion == '13.04'
-
-    def test_teuth_config_downburst_noarg_version(self):
-        self.fake_ctx_noarg.config = {'downburst' : {'distroversion': '13.04'}}
-        distroversion = teuthology.get_distro_version(self.fake_ctx_noarg)
-        assert distroversion == '13.04'
diff --git a/teuthology/test/test_get_multi_machine_types.py b/teuthology/test/test_get_multi_machine_types.py
deleted file mode 100644 (file)
index c6013f4..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-from .. import misc as teuthology
-
-class Mock: pass
-
-class TestGetMultiMachineTypes(object):
-
-    def test_space(self):
-        give = 'burnupi plana vps'
-        expect = ['burnupi','plana','vps']
-        assert teuthology.get_multi_machine_types(give) == expect
-
-    def test_tab(self):
-        give = 'burnupi        plana   vps'
-        expect = ['burnupi','plana','vps']
-        assert teuthology.get_multi_machine_types(give) == expect
-
-    def test_comma(self):
-        give = 'burnupi,plana,vps'
-        expect = ['burnupi','plana','vps']
-        assert teuthology.get_multi_machine_types(give) == expect
-
-    def test_single(self):
-        give = 'burnupi'
-        expect = ['burnupi']
-        assert teuthology.get_multi_machine_types(give) == expect
-
-
diff --git a/teuthology/test/test_misc.py b/teuthology/test/test_misc.py
deleted file mode 100644 (file)
index b2919e6..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-import argparse
-from ..orchestra import cluster
-from .. import misc
-from ..config import config
-
-import pytest
-
-
-class FakeRemote(object):
-    pass
-
-
-def test_get_clients_simple():
-    ctx = argparse.Namespace()
-    remote = FakeRemote()
-    ctx.cluster = cluster.Cluster(
-        remotes=[
-            (remote, ['client.0', 'client.1'])
-            ],
-        )
-    g = misc.get_clients(ctx=ctx, roles=['client.1'])
-    got = next(g)
-    assert len(got) == 2
-    assert got[0] == ('1')
-    assert got[1] is remote
-    with pytest.raises(StopIteration):
-        next(g)
-
-
-def test_get_http_log_path():
-    # Fake configuration
-    archive_server = "http://example.com/server_root"
-    config.archive_server = archive_server
-    archive_dir = "/var/www/archives"
-
-    path = misc.get_http_log_path(archive_dir)
-    assert path == "http://example.com/server_root/archives/"
-
-    job_id = '12345'
-    path = misc.get_http_log_path(archive_dir, job_id)
-    assert path == "http://example.com/server_root/archives/12345/"
-
-    # Inktank configuration
-    archive_server = "http://qa-proxy.ceph.com/teuthology/"
-    config.archive_server = archive_server
-    archive_dir = "/var/lib/teuthworker/archive/teuthology-2013-09-12_11:49:50-ceph-deploy-master-testing-basic-vps"
-    job_id = 31087
-    path = misc.get_http_log_path(archive_dir, job_id)
-    assert path == "http://qa-proxy.ceph.com/teuthology/teuthology-2013-09-12_11:49:50-ceph-deploy-master-testing-basic-vps/31087/"
-
-    path = misc.get_http_log_path(archive_dir)
-    assert path == "http://qa-proxy.ceph.com/teuthology/teuthology-2013-09-12_11:49:50-ceph-deploy-master-testing-basic-vps/"
diff --git a/teuthology/test/test_report.py b/teuthology/test/test_report.py
deleted file mode 100644 (file)
index 851caeb..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-import yaml
-import json
-import fake_archive
-from .. import report
-
-
-class TestSerializer(object):
-    def setup(self):
-        self.archive = fake_archive.FakeArchive()
-        self.archive.setup()
-        self.archive_base = self.archive.archive_base
-        self.reporter = report.ResultsReporter(archive_base=self.archive_base)
-
-    def teardown(self):
-        self.archive.teardown()
-
-    def test_all_runs_one_run(self):
-        run_name = "test_all_runs"
-        yaml_path = "examples/3node_ceph.yaml"
-        job_count = 3
-        self.archive.create_fake_run(run_name, job_count, yaml_path)
-        assert [run_name] == self.reporter.serializer.all_runs
-
-    def test_all_runs_three_runs(self):
-        run_count = 3
-        runs = {}
-        for i in range(run_count):
-            run_name = "run #%s" % i
-            yaml_path = "examples/3node_ceph.yaml"
-            job_count = 3
-            job_ids = self.archive.create_fake_run(
-                run_name,
-                job_count,
-                yaml_path)
-            runs[run_name] = job_ids
-        assert sorted(runs.keys()) == sorted(self.reporter.serializer.all_runs)
-
-    def test_jobs_for_run(self):
-        run_name = "test_jobs_for_run"
-        yaml_path = "examples/3node_ceph.yaml"
-        job_count = 3
-        jobs = self.archive.create_fake_run(run_name, job_count, yaml_path)
-        job_ids = [str(job['job_id']) for job in jobs]
-
-        got_jobs = self.reporter.serializer.jobs_for_run(run_name)
-        assert sorted(job_ids) == sorted(got_jobs.keys())
-
-    def test_running_jobs_for_run(self):
-        run_name = "test_jobs_for_run"
-        yaml_path = "examples/3node_ceph.yaml"
-        job_count = 10
-        num_hung = 3
-        self.archive.create_fake_run(run_name, job_count, yaml_path,
-                                     num_hung=num_hung)
-
-        got_jobs = self.reporter.serializer.running_jobs_for_run(run_name)
-        assert len(got_jobs) == num_hung
-
-    def test_json_for_job(self):
-        run_name = "test_json_for_job"
-        yaml_path = "examples/3node_ceph.yaml"
-        job_count = 1
-        jobs = self.archive.create_fake_run(run_name, job_count, yaml_path)
-        job = jobs[0]
-
-        with file(yaml_path) as yaml_file:
-            obj_from_yaml = yaml.safe_load(yaml_file)
-        full_obj = obj_from_yaml.copy()
-        full_obj.update(job['info'])
-        full_obj.update(job['summary'])
-
-        out_json = self.reporter.serializer.json_for_job(
-            run_name, str(job['job_id']))
-        out_obj = json.loads(out_json)
-        assert full_obj == out_obj
-
-
diff --git a/teuthology/test/test_results.py b/teuthology/test/test_results.py
deleted file mode 100644 (file)
index 5318b7d..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-import os
-import textwrap
-from .. import results
-from .fake_archive import FakeArchive
-
-
-class TestResultsEmail(object):
-    reference = {
-        'run_name': 'test_name',
-        'jobs': [
-            {'info': {'description': 'description for job with name test_name',
-                      'job_id': 30481,
-                      'name': 'test_name',
-                      'owner': 'job@owner',
-                      'pid': 80399},
-             'job_id': 30481},
-            {'info': {'description': 'description for job with name test_name',
-                      'job_id': 88979,
-                      'name': 'test_name',
-                      'owner': 'job@owner',
-                      'pid': 3903},
-                'job_id': 88979,
-                'summary': {
-                    'description': 'description for job with name test_name',
-                    'duration': 35190, 'failure_reason': 'Failure reason!',
-                    'owner': 'job@owner',
-                    'success': False}},
-            {'info': {'description': 'description for job with name test_name',
-                      'job_id': 68369,
-                      'name': 'test_name',
-                      'owner': 'job@owner',
-                      'pid': 38524},
-             'job_id': 68369,
-             'summary': {
-                 'description': 'description for job with name test_name',
-                 'duration': 33771, 'owner': 'job@owner', 'success':
-                 True}},
-        ],
-        'subject': '1 failed, 1 hung, 1 passed in test_name',
-        'body': textwrap.dedent("""
-    Test Run: test_name
-    =================================================================
-    logs:   http://qa-proxy.ceph.com/teuthology/test_name/
-    failed: 1
-    hung:   1
-    passed: 1
-
-    Failed
-    =================================================================
-    [88979]  description for job with name test_name
-    -----------------------------------------------------------------
-    time:   35190s
-    log:    http://qa-proxy.ceph.com/teuthology/test_name/88979/
-
-        Failure reason!
-
-
-    Hung
-    =================================================================
-    [30481] description for job with name test_name
-
-    Passed
-    =================================================================
-    [68369] description for job with name test_name
-    time:    33771s
-    """).strip(),
-    }
-
-    def setup(self):
-        self.archive = FakeArchive()
-        self.archive.setup()
-        self.archive_base = self.archive.archive_base
-
-    def teardown(self):
-        self.archive.teardown()
-
-    def test_build_email_body(self):
-        run_name = self.reference['run_name']
-        run_dir = os.path.join(self.archive_base, run_name)
-        self.archive.populate_archive(run_name, self.reference['jobs'])
-        (subject, body) = results.build_email_body(
-            run_name,
-            run_dir,
-            36000)
-        assert subject == self.reference['subject']
-        assert body == self.reference['body']
diff --git a/teuthology/test/test_safepath.py b/teuthology/test/test_safepath.py
deleted file mode 100644 (file)
index e842e5a..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-from .. import safepath
-
-class TestSafepath(object):
-    def test_simple(self):
-        got = safepath.munge('foo')
-        assert got == 'foo'
-
-    def test_empty(self):
-        # really odd corner case
-        got = safepath.munge('')
-        assert got == '_'
-
-    def test_slash(self):
-        got = safepath.munge('/')
-        assert got == '_'
-
-    def test_slashslash(self):
-        got = safepath.munge('//')
-        assert got == '_'
-
-    def test_absolute(self):
-        got = safepath.munge('/evil')
-        assert got == 'evil'
-
-    def test_absolute_subdir(self):
-        got = safepath.munge('/evil/here')
-        assert got == 'evil/here'
-
-    def test_dot_leading(self):
-        got = safepath.munge('./foo')
-        assert got == 'foo'
-
-    def test_dot_middle(self):
-        got = safepath.munge('evil/./foo')
-        assert got == 'evil/foo'
-
-    def test_dot_trailing(self):
-        got = safepath.munge('evil/foo/.')
-        assert got == 'evil/foo'
-
-    def test_dotdot(self):
-        got = safepath.munge('../evil/foo')
-        assert got == '_./evil/foo'
-
-    def test_dotdot_subdir(self):
-        got = safepath.munge('evil/../foo')
-        assert got == 'evil/_./foo'
-
-    def test_hidden(self):
-        got = safepath.munge('.evil')
-        assert got == '_evil'
-
-    def test_hidden_subdir(self):
-        got = safepath.munge('foo/.evil')
-        assert got == 'foo/_evil'
diff --git a/teuthology/worker.py b/teuthology/worker.py
deleted file mode 100644 (file)
index 91d6434..0000000
+++ /dev/null
@@ -1,330 +0,0 @@
-import fcntl
-import logging
-import os
-import subprocess
-import shutil
-import sys
-import tempfile
-import time
-import yaml
-
-from datetime import datetime
-
-from . import beanstalk
-from . import report
-from . import safepath
-from .config import config as teuth_config
-from .kill import kill_job
-from .misc import read_config
-
-log = logging.getLogger(__name__)
-start_time = datetime.utcnow()
-restart_file_path = '/tmp/teuthology-restart-workers'
-
-
-def need_restart():
-    if not os.path.exists(restart_file_path):
-        return False
-    file_mtime = datetime.utcfromtimestamp(os.path.getmtime(restart_file_path))
-    if file_mtime > start_time:
-        return True
-    else:
-        return False
-
-
-def restart():
-    log.info('Restarting...')
-    args = sys.argv[:]
-    args.insert(0, sys.executable)
-    os.execv(sys.executable, args)
-
-
-class filelock(object):
-    # simple flock class
-    def __init__(self, fn):
-        self.fn = fn
-        self.fd = None
-
-    def acquire(self):
-        assert not self.fd
-        self.fd = file(self.fn, 'w')
-        fcntl.lockf(self.fd, fcntl.LOCK_EX)
-
-    def release(self):
-        assert self.fd
-        fcntl.lockf(self.fd, fcntl.LOCK_UN)
-        self.fd = None
-
-
-def fetch_teuthology_branch(path, branch='master'):
-    """
-    Make sure we have the correct teuthology branch checked out and up-to-date
-    """
-    # only let one worker create/update the checkout at a time
-    lock = filelock('%s.lock' % path)
-    lock.acquire()
-    try:
-        if not os.path.isdir(path):
-            log.info("Cloning %s from upstream", branch)
-            teuthology_git_upstream = teuth_config.ceph_git_base_url + \
-                'teuthology.git'
-            log.info(
-                subprocess.check_output(('git', 'clone', '--branch', branch,
-                                         teuthology_git_upstream, path),
-                                        cwd=os.path.dirname(path))
-            )
-        elif time.time() - os.stat('/etc/passwd').st_mtime > 60:
-            # only do this at most once per minute
-            log.info("Fetching %s from upstream", branch)
-            log.info(
-                subprocess.check_output(('git', 'fetch', '-p', 'origin'),
-                                        cwd=path)
-            )
-            log.info(
-                subprocess.check_output(('touch', path))
-            )
-        else:
-            log.info("%s was just updated; assuming it is current", branch)
-
-        # This try/except block will notice if the requested branch doesn't
-        # exist, whether it was cloned or fetched.
-        try:
-            subprocess.check_output(
-                ('git', 'reset', '--hard', 'origin/%s' % branch),
-                cwd=path,
-            )
-        except subprocess.CalledProcessError:
-            log.exception("teuthology branch not found: %s", branch)
-            shutil.rmtree(path)
-            raise
-
-        log.debug("Bootstrapping %s", path)
-        # This magic makes the bootstrap script not attempt to clobber an
-        # existing virtualenv. But the branch's bootstrap needs to actually
-        # check for the NO_CLOBBER variable.
-        env = os.environ.copy()
-        env['NO_CLOBBER'] = '1'
-        cmd = './bootstrap'
-        boot_proc = subprocess.Popen(cmd, shell=True, cwd=path, env=env,
-                                     stdout=subprocess.PIPE,
-                                     stderr=subprocess.STDOUT)
-        returncode = boot_proc.wait()
-        if returncode != 0:
-            for line in boot_proc.stdout.readlines():
-                log.warn(line.strip())
-        log.info("Bootstrap exited with status %s", returncode)
-
-    finally:
-        lock.release()
-
-
-def main(ctx):
-    loglevel = logging.INFO
-    if ctx.verbose:
-        loglevel = logging.DEBUG
-    log.setLevel(loglevel)
-
-    log_file_path = os.path.join(ctx.log_dir, 'worker.{tube}.{pid}'.format(
-        pid=os.getpid(), tube=ctx.tube,))
-    log_handler = logging.FileHandler(filename=log_file_path)
-    log_formatter = logging.Formatter(
-        fmt='%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s',
-        datefmt='%Y-%m-%dT%H:%M:%S')
-    log_handler.setFormatter(log_formatter)
-    log.addHandler(log_handler)
-
-    if not os.path.isdir(ctx.archive_dir):
-        sys.exit("{prog}: archive directory must exist: {path}".format(
-            prog=os.path.basename(sys.argv[0]),
-            path=ctx.archive_dir,
-        ))
-    else:
-        teuth_config.archive_base = ctx.archive_dir
-
-    read_config(ctx)
-
-    connection = beanstalk.connect()
-    beanstalk.watch_tube(connection, ctx.tube)
-
-    while True:
-        if need_restart():
-            restart()
-
-        job = connection.reserve(timeout=60)
-        if job is None:
-            continue
-
-        # bury the job so it won't be re-run if it fails
-        job.bury()
-        log.info('Reserved job %d', job.jid)
-        log.info('Config is: %s', job.body)
-        job_config = yaml.safe_load(job.body)
-
-        job_config['job_id'] = str(job.jid)
-        safe_archive = safepath.munge(job_config['name'])
-        job_config['worker_log'] = log_file_path
-        archive_path_full = os.path.join(
-            ctx.archive_dir, safe_archive, str(job.jid))
-        job_config['archive_path'] = archive_path_full
-
-        # If the teuthology branch was not specified, default to master and
-        # store that value.
-        teuthology_branch = job_config.get('teuthology_branch', 'master')
-        job_config['teuthology_branch'] = teuthology_branch
-
-        teuth_path = os.path.join(os.getenv("HOME"),
-                                  'teuthology-' + teuthology_branch)
-
-        fetch_teuthology_branch(path=teuth_path, branch=teuthology_branch)
-
-        teuth_bin_path = os.path.join(teuth_path, 'virtualenv', 'bin')
-        if not os.path.isdir(teuth_bin_path):
-            raise RuntimeError("teuthology branch %s at %s not bootstrapped!" %
-                               (teuthology_branch, teuth_bin_path))
-
-        if job_config.get('last_in_suite'):
-            log.info('Generating coverage for %s', job_config['name'])
-            if teuth_config.results_server:
-                report.try_delete_jobs(job_config['name'],
-                                       job_config['job_id'])
-            args = [
-                os.path.join(teuth_bin_path, 'teuthology-results'),
-                '--timeout',
-                str(job_config.get('results_timeout', 21600)),
-                '--email',
-                job_config['email'],
-                '--archive-dir',
-                os.path.join(ctx.archive_dir, safe_archive),
-                '--name',
-                job_config['name'],
-            ]
-            subprocess.Popen(args=args).wait()
-        else:
-            log.info('Creating archive dir %s', archive_path_full)
-            safepath.makedirs(ctx.archive_dir, safe_archive)
-            log.info('Running job %d', job.jid)
-            run_job(job_config, teuth_bin_path)
-        job.delete()
-
-
-def run_with_watchdog(process, job_config):
-    job_start_time = datetime.utcnow()
-
-    # Only push the information that's relevant to the watchdog, to save db
-    # load
-    job_info = dict(
-        name=job_config['name'],
-        job_id=job_config['job_id'],
-    )
-
-    # Sleep once outside of the loop to avoid double-posting jobs
-    time.sleep(teuth_config.watchdog_interval)
-    symlink_worker_log(job_config['worker_log'], job_config['archive_path'])
-    while process.poll() is None:
-        # Kill jobs that have been running longer than the global max
-        run_time = datetime.utcnow() - job_start_time
-        total_seconds = run_time.days * 60 * 60 * 24 + run_time.seconds
-        if total_seconds > teuth_config.max_job_time:
-            log.warning("Job ran longer than {max}s. Killing...".format(
-                max=teuth_config.max_job_time))
-            kill_job(job_info['name'], job_info['job_id'],
-                     teuth_config.archive_base)
-
-        report.try_push_job_info(job_info, dict(status='running'))
-        time.sleep(teuth_config.watchdog_interval)
-
-    # The job finished. Let's make sure paddles knows.
-    branches_sans_reporting = ('argonaut', 'bobtail', 'cuttlefish', 'dumpling')
-    if job_config.get('teuthology_branch') in branches_sans_reporting:
-        # The job ran with a teuthology branch that may not have the reporting
-        # feature. Let's call teuthology-report (which will be from the master
-        # branch) to report the job manually.
-        cmd = "teuthology-report -v -D -r {run_name} -j {job_id}".format(
-            run_name=job_info['name'],
-            job_id=job_info['job_id'])
-        try:
-            log.info("Executing %s" % cmd)
-            report_proc = subprocess.Popen(cmd, shell=True,
-                                           stdout=subprocess.PIPE,
-                                           stderr=subprocess.STDOUT)
-            while report_proc.poll() is None:
-                for line in report_proc.stdout.readlines():
-                    log.info(line.strip())
-                time.sleep(1)
-            log.info("Reported results via the teuthology-report command")
-        except Exception:
-            log.exception("teuthology-report failed")
-    else:
-        # Let's make sure that paddles knows the job is finished. We don't know
-        # the status, but if it was a pass or fail it will have already been
-        # reported to paddles. In that case paddles ignores the 'dead' status.
-        # If the job was killed, paddles will use the 'dead' status.
-        report.try_push_job_info(job_info, dict(status='dead'))
-
-
-def run_job(job_config, teuth_bin_path):
-    arg = [
-        os.path.join(teuth_bin_path, 'teuthology'),
-    ]
-    # The following is for compatibility with older schedulers, from before we
-    # started merging the contents of job_config['config'] into job_config
-    # itself.
-    if 'config' in job_config:
-        inner_config = job_config.pop('config')
-        if not isinstance(inner_config, dict):
-            log.warn("run_job: job_config['config'] isn't a dict, it's a %s",
-                     str(type(inner_config)))
-        else:
-            job_config.update(inner_config)
-
-    if job_config['verbose']:
-        arg.append('-v')
-
-    arg.extend([
-        '--lock',
-        '--block',
-        '--owner', job_config['owner'],
-        '--archive', job_config['archive_path'],
-        '--name', job_config['name'],
-    ])
-    if job_config['description'] is not None:
-        arg.extend(['--description', job_config['description']])
-    arg.append('--')
-
-    with tempfile.NamedTemporaryFile(prefix='teuthology-worker.',
-                                     suffix='.tmp',) as tmp:
-        yaml.safe_dump(data=job_config, stream=tmp)
-        tmp.flush()
-        arg.append(tmp.name)
-        p = subprocess.Popen(args=arg)
-        log.info("Job archive: %s", job_config['archive_path'])
-        log.info("Job PID: %s", str(p.pid))
-
-        if teuth_config.results_server:
-            log.info("Running with watchdog")
-            try:
-                run_with_watchdog(p, job_config)
-            except Exception:
-                log.exception("run_with_watchdog had an unhandled exception")
-                raise
-        else:
-            log.info("Running without watchdog")
-            # This sleep() is to give the child time to start up and create the
-            # archive dir.
-            time.sleep(5)
-            symlink_worker_log(job_config['worker_log'],
-                               job_config['archive_path'])
-            p.wait()
-
-        if p.returncode != 0:
-            log.error('Child exited with code %d', p.returncode)
-        else:
-            log.info('Success!')
-
-
-def symlink_worker_log(worker_log_path, archive_dir):
-    try:
-        log.debug("Worker log: %s", worker_log_path)
-        os.symlink(worker_log_path, os.path.join(archive_dir, 'worker.log'))
-    except Exception:
-        log.exception("Failed to symlink worker log")
diff --git a/tox.ini b/tox.ini
deleted file mode 100644 (file)
index 4c97fcb..0000000
--- a/tox.ini
+++ /dev/null
@@ -1,18 +0,0 @@
-[tox]
-envlist = py27, flake8
-
-[testenv:py27]
-sitepackages=True
-deps=
-  -r{toxinidir}/requirements.txt
-  pytest
-  mock
-  fudge
-  nose
-
-commands=py.test -v {posargs:teuthology scripts}
-
-[testenv:flake8]
-deps=
-  flake8
-commands=flake8 --select=F {posargs:teuthology scripts}
diff --git a/watch-suite.sh b/watch-suite.sh
deleted file mode 100755 (executable)
index 03d73e3..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-
-watch "pwd ; echo \`teuthology-ls --archive-dir . | grep -c pass\` passes ; teuthology-ls --archive-dir . | grep -v pass"
-