From: Sage Weil Date: Wed, 14 Dec 2016 17:29:55 +0000 (-0600) Subject: move ceph-qa-suite dirs into qa/ X-Git-Tag: v0.94.10~27^2^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7ed0dce3cf7ded278e59a9b36d3fd713e3c42b8d;p=ceph.git move ceph-qa-suite dirs into qa/ --- diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 5a337a00754..00000000000 --- a/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*~ -.*.sw[nmop] -__pycache__ -*.pyc -.tox diff --git a/README b/README deleted file mode 100644 index 0e32ce9f638..00000000000 --- a/README +++ /dev/null @@ -1,52 +0,0 @@ -ceph-qa-suite -------------- - -clusters/ - some predefined cluster layouts -suites/ - set suite - -The suites directory has a hierarchical collection of tests. This can be -freeform, but generally follows the convention of - - suites///... - -A test is described by a yaml fragment. - -A test can exist as a single .yaml file in the directory tree. For example: - - suites/foo/one.yaml - suites/foo/two.yaml - -is a simple group of two tests. - -A directory with a magic '+' file represents a test that combines all -other items in the directory into a single yaml fragment. For example: - - suites/foo/bar/+ - suites/foo/bar/a.yaml - suites/foo/bar/b.yaml - suites/foo/bar/c.yaml - -is a single test consisting of a + b + c. - -A directory with a magic '%' file represents a test matrix formed from -all other items in the directory. For example, - - suites/baz/% - suites/baz/a.yaml - suites/baz/b/b1.yaml - suites/baz/b/b2.yaml - suites/baz/c.yaml - suites/baz/d/d1.yaml - suites/baz/d/d2.yaml - -is a 4-dimensional test matrix. Two dimensions (a, c) are trivial (1 -item), so this is really 2x2 = 4 tests, which are - - a + b1 + c + d1 - a + b1 + c + d2 - a + b2 + c + d1 - a + b2 + c + d2 - -Symlinks are okay. - -The teuthology code can be found in https://github.com/ceph/teuthology.git diff --git a/archs/aarch64.yaml b/archs/aarch64.yaml deleted file mode 100644 index 6399b9959b5..00000000000 --- a/archs/aarch64.yaml +++ /dev/null @@ -1 +0,0 @@ -arch: aarch64 diff --git a/archs/armv7.yaml b/archs/armv7.yaml deleted file mode 100644 index c261ebd52a9..00000000000 --- a/archs/armv7.yaml +++ /dev/null @@ -1 +0,0 @@ -arch: armv7l diff --git a/archs/i686.yaml b/archs/i686.yaml deleted file mode 100644 index a920e5a9ed4..00000000000 --- a/archs/i686.yaml +++ /dev/null @@ -1 +0,0 @@ -arch: i686 diff --git a/archs/x86_64.yaml b/archs/x86_64.yaml deleted file mode 100644 index c2409f5d0dc..00000000000 --- a/archs/x86_64.yaml +++ /dev/null @@ -1 +0,0 @@ -arch: x86_64 diff --git a/ceph-deploy-overrides/ceph_deploy_dmcrypt.yaml b/ceph-deploy-overrides/ceph_deploy_dmcrypt.yaml deleted file mode 100644 index 859a37faa88..00000000000 --- a/ceph-deploy-overrides/ceph_deploy_dmcrypt.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - ceph-deploy: - dmcrypt: yes diff --git a/ceph-deploy-overrides/disable_diff_journal_disk.yaml b/ceph-deploy-overrides/disable_diff_journal_disk.yaml deleted file mode 100644 index 5c998c512d3..00000000000 --- a/ceph-deploy-overrides/disable_diff_journal_disk.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - ceph-deploy: - separate_journal_disk: diff --git a/ceph-deploy-overrides/enable_diff_journal_disk.yaml b/ceph-deploy-overrides/enable_diff_journal_disk.yaml deleted file mode 100644 index ea3f63415df..00000000000 --- a/ceph-deploy-overrides/enable_diff_journal_disk.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - ceph-deploy: - separate_journal_disk: yes diff --git a/ceph-deploy-overrides/enable_dmcrypt_diff_journal_disk.yaml b/ceph-deploy-overrides/enable_dmcrypt_diff_journal_disk.yaml deleted file mode 100644 index 59cb799726e..00000000000 --- a/ceph-deploy-overrides/enable_dmcrypt_diff_journal_disk.yaml +++ /dev/null @@ -1,4 +0,0 @@ -overrides: - ceph-deploy: - dmcrypt: yes - separate_journal_disk: yes diff --git a/clusters/extra-client.yaml b/clusters/extra-client.yaml deleted file mode 100644 index 70ccbd028b2..00000000000 --- a/clusters/extra-client.yaml +++ /dev/null @@ -1,5 +0,0 @@ -roles: -- [mon.a, mon.c, osd.0, osd.1, osd.2] -- [mon.b, mds.a, osd.3, osd.4, osd.5] -- [client.0] -- [client.1] diff --git a/clusters/fixed-1.yaml b/clusters/fixed-1.yaml deleted file mode 100644 index 5f3a8f99233..00000000000 --- a/clusters/fixed-1.yaml +++ /dev/null @@ -1,2 +0,0 @@ -roles: -- [mon.a, mon.b, mon.c, osd.0, osd.1, osd.2, client.0] diff --git a/clusters/fixed-2.yaml b/clusters/fixed-2.yaml deleted file mode 100644 index 9a93c9cbd09..00000000000 --- a/clusters/fixed-2.yaml +++ /dev/null @@ -1,3 +0,0 @@ -roles: -- [mon.a, mon.c, osd.0, osd.1, osd.2, client.0] -- [mon.b, osd.3, osd.4, osd.5, client.1] diff --git a/clusters/fixed-3-cephfs.yaml b/clusters/fixed-3-cephfs.yaml deleted file mode 100644 index 74a24a3e9d6..00000000000 --- a/clusters/fixed-3-cephfs.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mds.a, osd.0, osd.1] -- [mon.b, mds.a-s, mon.c, osd.2, osd.3] -- [client.0] diff --git a/clusters/fixed-3.yaml b/clusters/fixed-3.yaml deleted file mode 100644 index 455de4f9831..00000000000 --- a/clusters/fixed-3.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.c, osd.0, osd.1, osd.2] -- [mon.b, osd.3, osd.4, osd.5] -- [client.0] diff --git a/config_options/cephdeploy_conf.yaml b/config_options/cephdeploy_conf.yaml deleted file mode 100644 index 7f9f0b797d3..00000000000 --- a/config_options/cephdeploy_conf.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - ceph-deploy: - conf: - global: - mon pg warn min per osd: 2 - osd pool default size: 2 diff --git a/debug/buildpackages.yaml b/debug/buildpackages.yaml deleted file mode 100644 index 527ed66275a..00000000000 --- a/debug/buildpackages.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: - - buildpackages: - machine: - disk: 40 # GB - ram: 15000 # MB - cpus: 16 diff --git a/debug/mds_client.yaml b/debug/mds_client.yaml deleted file mode 100644 index c6fec3fc6f9..00000000000 --- a/debug/mds_client.yaml +++ /dev/null @@ -1,9 +0,0 @@ -overrides: - ceph: - conf: - mds: - debug ms: 1 - debug mds: 20 - client: - debug ms: 1 - debug client: 20 \ No newline at end of file diff --git a/distros/all/centos_6.3.yaml b/distros/all/centos_6.3.yaml deleted file mode 100644 index 32187d6daf0..00000000000 --- a/distros/all/centos_6.3.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: centos -os_version: "6.3" diff --git a/distros/all/centos_6.4.yaml b/distros/all/centos_6.4.yaml deleted file mode 100644 index 02383cd5f8c..00000000000 --- a/distros/all/centos_6.4.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: centos -os_version: "6.4" diff --git a/distros/all/centos_6.5.yaml b/distros/all/centos_6.5.yaml deleted file mode 100644 index 77c9e41f73e..00000000000 --- a/distros/all/centos_6.5.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: centos -os_version: "6.5" diff --git a/distros/all/centos_7.0.yaml b/distros/all/centos_7.0.yaml deleted file mode 100644 index bccb286013e..00000000000 --- a/distros/all/centos_7.0.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: centos -os_version: "7.0" diff --git a/distros/all/centos_7.1.yaml b/distros/all/centos_7.1.yaml deleted file mode 100644 index 74c68f96b5b..00000000000 --- a/distros/all/centos_7.1.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: centos -os_version: "7.1" diff --git a/distros/all/centos_7.2.yaml b/distros/all/centos_7.2.yaml deleted file mode 100644 index 44d2f0ec81f..00000000000 --- a/distros/all/centos_7.2.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: centos -os_version: "7.2" diff --git a/distros/all/debian_6.0.yaml b/distros/all/debian_6.0.yaml deleted file mode 100644 index 6820fa3c702..00000000000 --- a/distros/all/debian_6.0.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: debian -os_version: "6.0" diff --git a/distros/all/debian_7.0.yaml b/distros/all/debian_7.0.yaml deleted file mode 100644 index 8100dc41e3d..00000000000 --- a/distros/all/debian_7.0.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: debian -os_version: "7.0" diff --git a/distros/all/fedora_17.yaml b/distros/all/fedora_17.yaml deleted file mode 100644 index 801053af0ae..00000000000 --- a/distros/all/fedora_17.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: fedora -os_version: "17" diff --git a/distros/all/fedora_18.yaml b/distros/all/fedora_18.yaml deleted file mode 100644 index 07872aa7edf..00000000000 --- a/distros/all/fedora_18.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: fedora -os_version: "18" diff --git a/distros/all/fedora_19.yaml b/distros/all/fedora_19.yaml deleted file mode 100644 index 5bac8aceea2..00000000000 --- a/distros/all/fedora_19.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: fedora -os_version: "19" diff --git a/distros/all/opensuse_12.2.yaml b/distros/all/opensuse_12.2.yaml deleted file mode 100644 index ee9f877a26b..00000000000 --- a/distros/all/opensuse_12.2.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: opensuse -os_version: "12.2" diff --git a/distros/all/opensuse_13.2.yaml b/distros/all/opensuse_13.2.yaml deleted file mode 100644 index 7551e81f3d4..00000000000 --- a/distros/all/opensuse_13.2.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: opensuse -os_version: "13.2" diff --git a/distros/all/opensuse_42.1.yaml b/distros/all/opensuse_42.1.yaml deleted file mode 100644 index 48c789dbda4..00000000000 --- a/distros/all/opensuse_42.1.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: opensuse -os_version: "42.1" diff --git a/distros/all/rhel_6.3.yaml b/distros/all/rhel_6.3.yaml deleted file mode 100644 index 6a8edcd5626..00000000000 --- a/distros/all/rhel_6.3.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: rhel -os_version: "6.3" diff --git a/distros/all/rhel_6.4.yaml b/distros/all/rhel_6.4.yaml deleted file mode 100644 index 5225495834a..00000000000 --- a/distros/all/rhel_6.4.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: rhel -os_version: "6.4" diff --git a/distros/all/rhel_6.5.yaml b/distros/all/rhel_6.5.yaml deleted file mode 100644 index 7db54bea1bd..00000000000 --- a/distros/all/rhel_6.5.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: rhel -os_version: "6.5" diff --git a/distros/all/rhel_7.0.yaml b/distros/all/rhel_7.0.yaml deleted file mode 100644 index c87c0bc135b..00000000000 --- a/distros/all/rhel_7.0.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: rhel -os_version: "7.0" diff --git a/distros/all/sles_11-sp2.yaml b/distros/all/sles_11-sp2.yaml deleted file mode 100644 index df9c3ca01c8..00000000000 --- a/distros/all/sles_11-sp2.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: sles -os_version: "11-sp2" diff --git a/distros/all/ubuntu_12.04.yaml b/distros/all/ubuntu_12.04.yaml deleted file mode 100644 index dbc3a8d9c58..00000000000 --- a/distros/all/ubuntu_12.04.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: ubuntu -os_version: "12.04" diff --git a/distros/all/ubuntu_12.10.yaml b/distros/all/ubuntu_12.10.yaml deleted file mode 100644 index ab655676e4c..00000000000 --- a/distros/all/ubuntu_12.10.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: ubuntu -os_version: "12.10" diff --git a/distros/all/ubuntu_14.04.yaml b/distros/all/ubuntu_14.04.yaml deleted file mode 100644 index 309e989feeb..00000000000 --- a/distros/all/ubuntu_14.04.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: ubuntu -os_version: "14.04" diff --git a/distros/all/ubuntu_14.04_aarch64.yaml b/distros/all/ubuntu_14.04_aarch64.yaml deleted file mode 100644 index 9dfbcb5102b..00000000000 --- a/distros/all/ubuntu_14.04_aarch64.yaml +++ /dev/null @@ -1,3 +0,0 @@ -os_type: ubuntu -os_version: "14.04" -arch: aarch64 diff --git a/distros/all/ubuntu_14.04_i686.yaml b/distros/all/ubuntu_14.04_i686.yaml deleted file mode 100644 index 4a0652e73b5..00000000000 --- a/distros/all/ubuntu_14.04_i686.yaml +++ /dev/null @@ -1,3 +0,0 @@ -os_type: ubuntu -os_version: "14.04" -arch: i686 diff --git a/distros/supported/centos_7.2.yaml b/distros/supported/centos_7.2.yaml deleted file mode 120000 index 05e1679b997..00000000000 --- a/distros/supported/centos_7.2.yaml +++ /dev/null @@ -1 +0,0 @@ -../all/centos_7.2.yaml \ No newline at end of file diff --git a/distros/supported/ubuntu_14.04.yaml b/distros/supported/ubuntu_14.04.yaml deleted file mode 120000 index cf7fff7a866..00000000000 --- a/distros/supported/ubuntu_14.04.yaml +++ /dev/null @@ -1 +0,0 @@ -../all/ubuntu_14.04.yaml \ No newline at end of file diff --git a/erasure-code/ec-feature-plugins-v2.yaml b/erasure-code/ec-feature-plugins-v2.yaml deleted file mode 100644 index 102a4528924..00000000000 --- a/erasure-code/ec-feature-plugins-v2.yaml +++ /dev/null @@ -1,97 +0,0 @@ -# -# Test the expected behavior of the -# -# CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 -# -# feature. -# -roles: -- - mon.a - - mon.b - - osd.0 - - osd.1 -- - osd.2 - - mon.c -tasks: -# -# Install firefly -# -- install: - branch: firefly -- ceph: - fs: xfs -# -# We don't need mon.c for now: it will be used later to make sure an old -# mon cannot join the quorum once the feature has been activated -# -- ceph.stop: - daemons: [mon.c] -- exec: - mon.a: - - |- - ceph osd erasure-code-profile set WRONG plugin=WRONG - ceph osd pool create poolWRONG 12 12 erasure WRONG 2>&1 | grep "failed to load plugin using profile WRONG" -# -# Partial upgrade, osd.2 is not upgraded -# -- install.upgrade: - osd.0: -# -# a is the leader -# -- ceph.restart: - daemons: [mon.a] - wait-for-healthy: false -- exec: - mon.a: - - |- - ceph osd erasure-code-profile set profile-lrc plugin=lrc 2>&1 | grep "unsupported by: the monitor cluster" -- ceph.restart: - daemons: [mon.b, osd.1, osd.0] - wait-for-healthy: false - wait-for-osds-up: true -# -# The lrc plugin cannot be used because osd.2 is not upgraded yet -# and would crash. -# -- exec: - mon.a: - - |- - ceph osd erasure-code-profile set profile-lrc plugin=lrc 2>&1 | grep "unsupported by: osd.2" -# -# Taking osd.2 out, the rest of the cluster is upgraded -# -- ceph.stop: - daemons: [osd.2] -- sleep: - duration: 60 -# -# Creating an erasure code profile using the lrc plugin now works -# -- exec: - mon.a: - - "ceph osd erasure-code-profile set profile-lrc plugin=lrc" -# -# osd.2 won't be able to join the because is does not support the feature -# -- ceph.restart: - daemons: [osd.2] - wait-for-healthy: false -- sleep: - duration: 60 -- exec: - osd.2: - - |- - grep "protocol feature.*missing 100000000000" /var/log/ceph/ceph-osd.2.log -# -# mon.c won't be able to join the because it does not support the feature -# -- ceph.restart: - daemons: [mon.c] - wait-for-healthy: false -- sleep: - duration: 60 -- exec: - mon.c: - - |- - grep "missing.*feature" /var/log/ceph/ceph-mon.c.log diff --git a/erasure-code/ec-rados-default.yaml b/erasure-code/ec-rados-default.yaml deleted file mode 100644 index f2f0452257e..00000000000 --- a/erasure-code/ec-rados-default.yaml +++ /dev/null @@ -1,19 +0,0 @@ -workload: - sequential: - - rados: - clients: [client.0] - ops: 4000 - objects: 50 - ec_pool: true - op_weights: - read: 100 - write: 0 - append: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 - setattr: 25 - rmattr: 25 - - print: "**** done rados ec sequential" diff --git a/erasure-code/ec-rados-parallel.yaml b/erasure-code/ec-rados-parallel.yaml deleted file mode 100644 index 798e7f713bb..00000000000 --- a/erasure-code/ec-rados-parallel.yaml +++ /dev/null @@ -1,19 +0,0 @@ -workload: - parallel: - - rados: - clients: [client.0] - ops: 4000 - objects: 50 - ec_pool: true - op_weights: - read: 100 - write: 0 - append: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 - setattr: 25 - rmattr: 25 - - print: "**** done rados ec parallel" diff --git a/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml b/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml deleted file mode 100644 index fa8f49035f3..00000000000 --- a/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml +++ /dev/null @@ -1,24 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - ec_pool: true - erasure_code_profile: - name: isaprofile - plugin: isa - k: 2 - m: 1 - technique: reed_sol_van - ruleset-failure-domain: osd - op_weights: - read: 100 - write: 0 - append: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 - setattr: 25 - rmattr: 25 diff --git a/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml b/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml deleted file mode 100644 index d8fcffc495f..00000000000 --- a/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml +++ /dev/null @@ -1,24 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - ec_pool: true - erasure_code_profile: - name: jerasure21profile - plugin: jerasure - k: 2 - m: 1 - technique: reed_sol_van - ruleset-failure-domain: osd - op_weights: - read: 100 - write: 0 - append: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 - setattr: 25 - rmattr: 25 diff --git a/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml b/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml deleted file mode 100644 index 32d96f589db..00000000000 --- a/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# -# k=3 implies a stripe_width of 1376*3 = 4128 which is different from -# the default value of 4096 It is also not a multiple of 1024*1024 and -# creates situations where rounding rules during recovery becomes -# necessary. -# -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - ec_pool: true - erasure_code_profile: - name: jerasure31profile - plugin: jerasure - k: 3 - m: 1 - technique: reed_sol_van - ruleset-failure-domain: osd - op_weights: - read: 100 - write: 0 - append: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 - setattr: 25 - rmattr: 25 diff --git a/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml b/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml deleted file mode 100644 index 6dfe210e81e..00000000000 --- a/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml +++ /dev/null @@ -1,24 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - ec_pool: true - erasure_code_profile: - name: lrcprofile - plugin: lrc - k: 4 - m: 2 - l: 3 - ruleset-failure-domain: osd - op_weights: - read: 100 - write: 0 - append: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 - setattr: 25 - rmattr: 25 diff --git a/fs/btrfs.yaml b/fs/btrfs.yaml deleted file mode 100644 index 0b3f6fac7a5..00000000000 --- a/fs/btrfs.yaml +++ /dev/null @@ -1,7 +0,0 @@ -overrides: - ceph: - fs: btrfs - conf: - osd: - osd sloppy crc: true - osd op thread timeout: 60 diff --git a/fs/ext4.yaml b/fs/ext4.yaml deleted file mode 100644 index fde6751751d..00000000000 --- a/fs/ext4.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - ceph: - fs: ext4 diff --git a/fs/xfs.yaml b/fs/xfs.yaml deleted file mode 100644 index 0d88e107df4..00000000000 --- a/fs/xfs.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - ceph: - fs: xfs - conf: - osd: - osd sloppy crc: true \ No newline at end of file diff --git a/machine_types/schedule_rados.sh b/machine_types/schedule_rados.sh deleted file mode 100755 index befb7acd6f7..00000000000 --- a/machine_types/schedule_rados.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -# $1 - part -# $2 - branch name -# $3 - machine name - -teuthology-suite -v -c $2 -m $3 -k distro -s rados --subset $(echo "(($(date +%U) % 2) * 7) + $1" | bc)/14 diff --git a/machine_types/vps.yaml b/machine_types/vps.yaml deleted file mode 100644 index bffa0985da8..00000000000 --- a/machine_types/vps.yaml +++ /dev/null @@ -1,16 +0,0 @@ -overrides: - ceph: - conf: - global: - osd heartbeat grace: 100 - # this line to address issue #1017 - mon lease: 15 - mon lease ack timeout: 25 - rgw: - default_idle_timeout: 1200 - s3tests: - idle_timeout: 1200 - ceph-fuse: - client.0: - mount_wait: 60 - mount_timeout: 120 diff --git a/overrides/2-size-1-min-size.yaml b/overrides/2-size-1-min-size.yaml deleted file mode 100644 index d710aee22cf..00000000000 --- a/overrides/2-size-1-min-size.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - ceph: - conf: - global: - osd_pool_default_size: 2 - osd_pool_default_min_size: 1 diff --git a/overrides/2-size-2-min-size.yaml b/overrides/2-size-2-min-size.yaml deleted file mode 100644 index 42b854eb435..00000000000 --- a/overrides/2-size-2-min-size.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - ceph: - conf: - global: - osd_pool_default_size: 2 - osd_pool_default_min_size: 2 diff --git a/overrides/3-size-2-min-size.yaml b/overrides/3-size-2-min-size.yaml deleted file mode 100644 index 3f2387e6060..00000000000 --- a/overrides/3-size-2-min-size.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - ceph: - conf: - global: - osd_pool_default_size: 3 - osd_pool_default_min_size: 2 diff --git a/overrides/short_pg_log.yaml b/overrides/short_pg_log.yaml deleted file mode 100644 index 0feecd2da6c..00000000000 --- a/overrides/short_pg_log.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - ceph: - conf: - global: - osd_min_pg_log_entries: 100 - osd_max_pg_log_entries: 200 diff --git a/overrides/whitelist_wrongly_marked_down.yaml b/overrides/whitelist_wrongly_marked_down.yaml deleted file mode 100644 index 5cf329fa0e1..00000000000 --- a/overrides/whitelist_wrongly_marked_down.yaml +++ /dev/null @@ -1,10 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - conf: - mds: - debug mds: 20 - debug ms: 1 - client: - debug client: 10 \ No newline at end of file diff --git a/qa/.gitignore b/qa/.gitignore new file mode 100644 index 00000000000..5a337a00754 --- /dev/null +++ b/qa/.gitignore @@ -0,0 +1,5 @@ +*~ +.*.sw[nmop] +__pycache__ +*.pyc +.tox diff --git a/qa/README b/qa/README new file mode 100644 index 00000000000..0e32ce9f638 --- /dev/null +++ b/qa/README @@ -0,0 +1,52 @@ +ceph-qa-suite +------------- + +clusters/ - some predefined cluster layouts +suites/ - set suite + +The suites directory has a hierarchical collection of tests. This can be +freeform, but generally follows the convention of + + suites///... + +A test is described by a yaml fragment. + +A test can exist as a single .yaml file in the directory tree. For example: + + suites/foo/one.yaml + suites/foo/two.yaml + +is a simple group of two tests. + +A directory with a magic '+' file represents a test that combines all +other items in the directory into a single yaml fragment. For example: + + suites/foo/bar/+ + suites/foo/bar/a.yaml + suites/foo/bar/b.yaml + suites/foo/bar/c.yaml + +is a single test consisting of a + b + c. + +A directory with a magic '%' file represents a test matrix formed from +all other items in the directory. For example, + + suites/baz/% + suites/baz/a.yaml + suites/baz/b/b1.yaml + suites/baz/b/b2.yaml + suites/baz/c.yaml + suites/baz/d/d1.yaml + suites/baz/d/d2.yaml + +is a 4-dimensional test matrix. Two dimensions (a, c) are trivial (1 +item), so this is really 2x2 = 4 tests, which are + + a + b1 + c + d1 + a + b1 + c + d2 + a + b2 + c + d1 + a + b2 + c + d2 + +Symlinks are okay. + +The teuthology code can be found in https://github.com/ceph/teuthology.git diff --git a/qa/archs/aarch64.yaml b/qa/archs/aarch64.yaml new file mode 100644 index 00000000000..6399b9959b5 --- /dev/null +++ b/qa/archs/aarch64.yaml @@ -0,0 +1 @@ +arch: aarch64 diff --git a/qa/archs/armv7.yaml b/qa/archs/armv7.yaml new file mode 100644 index 00000000000..c261ebd52a9 --- /dev/null +++ b/qa/archs/armv7.yaml @@ -0,0 +1 @@ +arch: armv7l diff --git a/qa/archs/i686.yaml b/qa/archs/i686.yaml new file mode 100644 index 00000000000..a920e5a9ed4 --- /dev/null +++ b/qa/archs/i686.yaml @@ -0,0 +1 @@ +arch: i686 diff --git a/qa/archs/x86_64.yaml b/qa/archs/x86_64.yaml new file mode 100644 index 00000000000..c2409f5d0dc --- /dev/null +++ b/qa/archs/x86_64.yaml @@ -0,0 +1 @@ +arch: x86_64 diff --git a/qa/ceph-deploy-overrides/ceph_deploy_dmcrypt.yaml b/qa/ceph-deploy-overrides/ceph_deploy_dmcrypt.yaml new file mode 100644 index 00000000000..859a37faa88 --- /dev/null +++ b/qa/ceph-deploy-overrides/ceph_deploy_dmcrypt.yaml @@ -0,0 +1,3 @@ +overrides: + ceph-deploy: + dmcrypt: yes diff --git a/qa/ceph-deploy-overrides/disable_diff_journal_disk.yaml b/qa/ceph-deploy-overrides/disable_diff_journal_disk.yaml new file mode 100644 index 00000000000..5c998c512d3 --- /dev/null +++ b/qa/ceph-deploy-overrides/disable_diff_journal_disk.yaml @@ -0,0 +1,3 @@ +overrides: + ceph-deploy: + separate_journal_disk: diff --git a/qa/ceph-deploy-overrides/enable_diff_journal_disk.yaml b/qa/ceph-deploy-overrides/enable_diff_journal_disk.yaml new file mode 100644 index 00000000000..ea3f63415df --- /dev/null +++ b/qa/ceph-deploy-overrides/enable_diff_journal_disk.yaml @@ -0,0 +1,3 @@ +overrides: + ceph-deploy: + separate_journal_disk: yes diff --git a/qa/ceph-deploy-overrides/enable_dmcrypt_diff_journal_disk.yaml b/qa/ceph-deploy-overrides/enable_dmcrypt_diff_journal_disk.yaml new file mode 100644 index 00000000000..59cb799726e --- /dev/null +++ b/qa/ceph-deploy-overrides/enable_dmcrypt_diff_journal_disk.yaml @@ -0,0 +1,4 @@ +overrides: + ceph-deploy: + dmcrypt: yes + separate_journal_disk: yes diff --git a/qa/clusters/extra-client.yaml b/qa/clusters/extra-client.yaml new file mode 100644 index 00000000000..70ccbd028b2 --- /dev/null +++ b/qa/clusters/extra-client.yaml @@ -0,0 +1,5 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2] +- [mon.b, mds.a, osd.3, osd.4, osd.5] +- [client.0] +- [client.1] diff --git a/qa/clusters/fixed-1.yaml b/qa/clusters/fixed-1.yaml new file mode 100644 index 00000000000..5f3a8f99233 --- /dev/null +++ b/qa/clusters/fixed-1.yaml @@ -0,0 +1,2 @@ +roles: +- [mon.a, mon.b, mon.c, osd.0, osd.1, osd.2, client.0] diff --git a/qa/clusters/fixed-2.yaml b/qa/clusters/fixed-2.yaml new file mode 100644 index 00000000000..9a93c9cbd09 --- /dev/null +++ b/qa/clusters/fixed-2.yaml @@ -0,0 +1,3 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2, client.0] +- [mon.b, osd.3, osd.4, osd.5, client.1] diff --git a/qa/clusters/fixed-3-cephfs.yaml b/qa/clusters/fixed-3-cephfs.yaml new file mode 100644 index 00000000000..74a24a3e9d6 --- /dev/null +++ b/qa/clusters/fixed-3-cephfs.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mds.a, osd.0, osd.1] +- [mon.b, mds.a-s, mon.c, osd.2, osd.3] +- [client.0] diff --git a/qa/clusters/fixed-3.yaml b/qa/clusters/fixed-3.yaml new file mode 100644 index 00000000000..455de4f9831 --- /dev/null +++ b/qa/clusters/fixed-3.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2] +- [mon.b, osd.3, osd.4, osd.5] +- [client.0] diff --git a/qa/config_options/cephdeploy_conf.yaml b/qa/config_options/cephdeploy_conf.yaml new file mode 100644 index 00000000000..7f9f0b797d3 --- /dev/null +++ b/qa/config_options/cephdeploy_conf.yaml @@ -0,0 +1,6 @@ +overrides: + ceph-deploy: + conf: + global: + mon pg warn min per osd: 2 + osd pool default size: 2 diff --git a/qa/debug/buildpackages.yaml b/qa/debug/buildpackages.yaml new file mode 100644 index 00000000000..527ed66275a --- /dev/null +++ b/qa/debug/buildpackages.yaml @@ -0,0 +1,6 @@ +tasks: + - buildpackages: + machine: + disk: 40 # GB + ram: 15000 # MB + cpus: 16 diff --git a/qa/debug/mds_client.yaml b/qa/debug/mds_client.yaml new file mode 100644 index 00000000000..c6fec3fc6f9 --- /dev/null +++ b/qa/debug/mds_client.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + mds: + debug ms: 1 + debug mds: 20 + client: + debug ms: 1 + debug client: 20 \ No newline at end of file diff --git a/qa/distros/all/centos_6.3.yaml b/qa/distros/all/centos_6.3.yaml new file mode 100644 index 00000000000..32187d6daf0 --- /dev/null +++ b/qa/distros/all/centos_6.3.yaml @@ -0,0 +1,2 @@ +os_type: centos +os_version: "6.3" diff --git a/qa/distros/all/centos_6.4.yaml b/qa/distros/all/centos_6.4.yaml new file mode 100644 index 00000000000..02383cd5f8c --- /dev/null +++ b/qa/distros/all/centos_6.4.yaml @@ -0,0 +1,2 @@ +os_type: centos +os_version: "6.4" diff --git a/qa/distros/all/centos_6.5.yaml b/qa/distros/all/centos_6.5.yaml new file mode 100644 index 00000000000..77c9e41f73e --- /dev/null +++ b/qa/distros/all/centos_6.5.yaml @@ -0,0 +1,2 @@ +os_type: centos +os_version: "6.5" diff --git a/qa/distros/all/centos_7.0.yaml b/qa/distros/all/centos_7.0.yaml new file mode 100644 index 00000000000..bccb286013e --- /dev/null +++ b/qa/distros/all/centos_7.0.yaml @@ -0,0 +1,2 @@ +os_type: centos +os_version: "7.0" diff --git a/qa/distros/all/centos_7.1.yaml b/qa/distros/all/centos_7.1.yaml new file mode 100644 index 00000000000..74c68f96b5b --- /dev/null +++ b/qa/distros/all/centos_7.1.yaml @@ -0,0 +1,2 @@ +os_type: centos +os_version: "7.1" diff --git a/qa/distros/all/centos_7.2.yaml b/qa/distros/all/centos_7.2.yaml new file mode 100644 index 00000000000..44d2f0ec81f --- /dev/null +++ b/qa/distros/all/centos_7.2.yaml @@ -0,0 +1,2 @@ +os_type: centos +os_version: "7.2" diff --git a/qa/distros/all/debian_6.0.yaml b/qa/distros/all/debian_6.0.yaml new file mode 100644 index 00000000000..6820fa3c702 --- /dev/null +++ b/qa/distros/all/debian_6.0.yaml @@ -0,0 +1,2 @@ +os_type: debian +os_version: "6.0" diff --git a/qa/distros/all/debian_7.0.yaml b/qa/distros/all/debian_7.0.yaml new file mode 100644 index 00000000000..8100dc41e3d --- /dev/null +++ b/qa/distros/all/debian_7.0.yaml @@ -0,0 +1,2 @@ +os_type: debian +os_version: "7.0" diff --git a/qa/distros/all/fedora_17.yaml b/qa/distros/all/fedora_17.yaml new file mode 100644 index 00000000000..801053af0ae --- /dev/null +++ b/qa/distros/all/fedora_17.yaml @@ -0,0 +1,2 @@ +os_type: fedora +os_version: "17" diff --git a/qa/distros/all/fedora_18.yaml b/qa/distros/all/fedora_18.yaml new file mode 100644 index 00000000000..07872aa7edf --- /dev/null +++ b/qa/distros/all/fedora_18.yaml @@ -0,0 +1,2 @@ +os_type: fedora +os_version: "18" diff --git a/qa/distros/all/fedora_19.yaml b/qa/distros/all/fedora_19.yaml new file mode 100644 index 00000000000..5bac8aceea2 --- /dev/null +++ b/qa/distros/all/fedora_19.yaml @@ -0,0 +1,2 @@ +os_type: fedora +os_version: "19" diff --git a/qa/distros/all/opensuse_12.2.yaml b/qa/distros/all/opensuse_12.2.yaml new file mode 100644 index 00000000000..ee9f877a26b --- /dev/null +++ b/qa/distros/all/opensuse_12.2.yaml @@ -0,0 +1,2 @@ +os_type: opensuse +os_version: "12.2" diff --git a/qa/distros/all/opensuse_13.2.yaml b/qa/distros/all/opensuse_13.2.yaml new file mode 100644 index 00000000000..7551e81f3d4 --- /dev/null +++ b/qa/distros/all/opensuse_13.2.yaml @@ -0,0 +1,2 @@ +os_type: opensuse +os_version: "13.2" diff --git a/qa/distros/all/opensuse_42.1.yaml b/qa/distros/all/opensuse_42.1.yaml new file mode 100644 index 00000000000..48c789dbda4 --- /dev/null +++ b/qa/distros/all/opensuse_42.1.yaml @@ -0,0 +1,2 @@ +os_type: opensuse +os_version: "42.1" diff --git a/qa/distros/all/rhel_6.3.yaml b/qa/distros/all/rhel_6.3.yaml new file mode 100644 index 00000000000..6a8edcd5626 --- /dev/null +++ b/qa/distros/all/rhel_6.3.yaml @@ -0,0 +1,2 @@ +os_type: rhel +os_version: "6.3" diff --git a/qa/distros/all/rhel_6.4.yaml b/qa/distros/all/rhel_6.4.yaml new file mode 100644 index 00000000000..5225495834a --- /dev/null +++ b/qa/distros/all/rhel_6.4.yaml @@ -0,0 +1,2 @@ +os_type: rhel +os_version: "6.4" diff --git a/qa/distros/all/rhel_6.5.yaml b/qa/distros/all/rhel_6.5.yaml new file mode 100644 index 00000000000..7db54bea1bd --- /dev/null +++ b/qa/distros/all/rhel_6.5.yaml @@ -0,0 +1,2 @@ +os_type: rhel +os_version: "6.5" diff --git a/qa/distros/all/rhel_7.0.yaml b/qa/distros/all/rhel_7.0.yaml new file mode 100644 index 00000000000..c87c0bc135b --- /dev/null +++ b/qa/distros/all/rhel_7.0.yaml @@ -0,0 +1,2 @@ +os_type: rhel +os_version: "7.0" diff --git a/qa/distros/all/sles_11-sp2.yaml b/qa/distros/all/sles_11-sp2.yaml new file mode 100644 index 00000000000..df9c3ca01c8 --- /dev/null +++ b/qa/distros/all/sles_11-sp2.yaml @@ -0,0 +1,2 @@ +os_type: sles +os_version: "11-sp2" diff --git a/qa/distros/all/ubuntu_12.04.yaml b/qa/distros/all/ubuntu_12.04.yaml new file mode 100644 index 00000000000..dbc3a8d9c58 --- /dev/null +++ b/qa/distros/all/ubuntu_12.04.yaml @@ -0,0 +1,2 @@ +os_type: ubuntu +os_version: "12.04" diff --git a/qa/distros/all/ubuntu_12.10.yaml b/qa/distros/all/ubuntu_12.10.yaml new file mode 100644 index 00000000000..ab655676e4c --- /dev/null +++ b/qa/distros/all/ubuntu_12.10.yaml @@ -0,0 +1,2 @@ +os_type: ubuntu +os_version: "12.10" diff --git a/qa/distros/all/ubuntu_14.04.yaml b/qa/distros/all/ubuntu_14.04.yaml new file mode 100644 index 00000000000..309e989feeb --- /dev/null +++ b/qa/distros/all/ubuntu_14.04.yaml @@ -0,0 +1,2 @@ +os_type: ubuntu +os_version: "14.04" diff --git a/qa/distros/all/ubuntu_14.04_aarch64.yaml b/qa/distros/all/ubuntu_14.04_aarch64.yaml new file mode 100644 index 00000000000..9dfbcb5102b --- /dev/null +++ b/qa/distros/all/ubuntu_14.04_aarch64.yaml @@ -0,0 +1,3 @@ +os_type: ubuntu +os_version: "14.04" +arch: aarch64 diff --git a/qa/distros/all/ubuntu_14.04_i686.yaml b/qa/distros/all/ubuntu_14.04_i686.yaml new file mode 100644 index 00000000000..4a0652e73b5 --- /dev/null +++ b/qa/distros/all/ubuntu_14.04_i686.yaml @@ -0,0 +1,3 @@ +os_type: ubuntu +os_version: "14.04" +arch: i686 diff --git a/qa/distros/supported/centos_7.2.yaml b/qa/distros/supported/centos_7.2.yaml new file mode 120000 index 00000000000..05e1679b997 --- /dev/null +++ b/qa/distros/supported/centos_7.2.yaml @@ -0,0 +1 @@ +../all/centos_7.2.yaml \ No newline at end of file diff --git a/qa/distros/supported/ubuntu_14.04.yaml b/qa/distros/supported/ubuntu_14.04.yaml new file mode 120000 index 00000000000..cf7fff7a866 --- /dev/null +++ b/qa/distros/supported/ubuntu_14.04.yaml @@ -0,0 +1 @@ +../all/ubuntu_14.04.yaml \ No newline at end of file diff --git a/qa/erasure-code/ec-feature-plugins-v2.yaml b/qa/erasure-code/ec-feature-plugins-v2.yaml new file mode 100644 index 00000000000..102a4528924 --- /dev/null +++ b/qa/erasure-code/ec-feature-plugins-v2.yaml @@ -0,0 +1,97 @@ +# +# Test the expected behavior of the +# +# CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 +# +# feature. +# +roles: +- - mon.a + - mon.b + - osd.0 + - osd.1 +- - osd.2 + - mon.c +tasks: +# +# Install firefly +# +- install: + branch: firefly +- ceph: + fs: xfs +# +# We don't need mon.c for now: it will be used later to make sure an old +# mon cannot join the quorum once the feature has been activated +# +- ceph.stop: + daemons: [mon.c] +- exec: + mon.a: + - |- + ceph osd erasure-code-profile set WRONG plugin=WRONG + ceph osd pool create poolWRONG 12 12 erasure WRONG 2>&1 | grep "failed to load plugin using profile WRONG" +# +# Partial upgrade, osd.2 is not upgraded +# +- install.upgrade: + osd.0: +# +# a is the leader +# +- ceph.restart: + daemons: [mon.a] + wait-for-healthy: false +- exec: + mon.a: + - |- + ceph osd erasure-code-profile set profile-lrc plugin=lrc 2>&1 | grep "unsupported by: the monitor cluster" +- ceph.restart: + daemons: [mon.b, osd.1, osd.0] + wait-for-healthy: false + wait-for-osds-up: true +# +# The lrc plugin cannot be used because osd.2 is not upgraded yet +# and would crash. +# +- exec: + mon.a: + - |- + ceph osd erasure-code-profile set profile-lrc plugin=lrc 2>&1 | grep "unsupported by: osd.2" +# +# Taking osd.2 out, the rest of the cluster is upgraded +# +- ceph.stop: + daemons: [osd.2] +- sleep: + duration: 60 +# +# Creating an erasure code profile using the lrc plugin now works +# +- exec: + mon.a: + - "ceph osd erasure-code-profile set profile-lrc plugin=lrc" +# +# osd.2 won't be able to join the because is does not support the feature +# +- ceph.restart: + daemons: [osd.2] + wait-for-healthy: false +- sleep: + duration: 60 +- exec: + osd.2: + - |- + grep "protocol feature.*missing 100000000000" /var/log/ceph/ceph-osd.2.log +# +# mon.c won't be able to join the because it does not support the feature +# +- ceph.restart: + daemons: [mon.c] + wait-for-healthy: false +- sleep: + duration: 60 +- exec: + mon.c: + - |- + grep "missing.*feature" /var/log/ceph/ceph-mon.c.log diff --git a/qa/erasure-code/ec-rados-default.yaml b/qa/erasure-code/ec-rados-default.yaml new file mode 100644 index 00000000000..f2f0452257e --- /dev/null +++ b/qa/erasure-code/ec-rados-default.yaml @@ -0,0 +1,19 @@ +workload: + sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 + - print: "**** done rados ec sequential" diff --git a/qa/erasure-code/ec-rados-parallel.yaml b/qa/erasure-code/ec-rados-parallel.yaml new file mode 100644 index 00000000000..798e7f713bb --- /dev/null +++ b/qa/erasure-code/ec-rados-parallel.yaml @@ -0,0 +1,19 @@ +workload: + parallel: + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 + - print: "**** done rados ec parallel" diff --git a/qa/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml b/qa/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml new file mode 100644 index 00000000000..fa8f49035f3 --- /dev/null +++ b/qa/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml @@ -0,0 +1,24 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + erasure_code_profile: + name: isaprofile + plugin: isa + k: 2 + m: 1 + technique: reed_sol_van + ruleset-failure-domain: osd + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml b/qa/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml new file mode 100644 index 00000000000..d8fcffc495f --- /dev/null +++ b/qa/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml @@ -0,0 +1,24 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + erasure_code_profile: + name: jerasure21profile + plugin: jerasure + k: 2 + m: 1 + technique: reed_sol_van + ruleset-failure-domain: osd + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml b/qa/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml new file mode 100644 index 00000000000..32d96f589db --- /dev/null +++ b/qa/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml @@ -0,0 +1,30 @@ +# +# k=3 implies a stripe_width of 1376*3 = 4128 which is different from +# the default value of 4096 It is also not a multiple of 1024*1024 and +# creates situations where rounding rules during recovery becomes +# necessary. +# +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + erasure_code_profile: + name: jerasure31profile + plugin: jerasure + k: 3 + m: 1 + technique: reed_sol_van + ruleset-failure-domain: osd + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml b/qa/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml new file mode 100644 index 00000000000..6dfe210e81e --- /dev/null +++ b/qa/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml @@ -0,0 +1,24 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + erasure_code_profile: + name: lrcprofile + plugin: lrc + k: 4 + m: 2 + l: 3 + ruleset-failure-domain: osd + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/fs/btrfs.yaml b/qa/fs/btrfs.yaml new file mode 100644 index 00000000000..0b3f6fac7a5 --- /dev/null +++ b/qa/fs/btrfs.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + fs: btrfs + conf: + osd: + osd sloppy crc: true + osd op thread timeout: 60 diff --git a/qa/fs/ext4.yaml b/qa/fs/ext4.yaml new file mode 100644 index 00000000000..fde6751751d --- /dev/null +++ b/qa/fs/ext4.yaml @@ -0,0 +1,3 @@ +overrides: + ceph: + fs: ext4 diff --git a/qa/fs/xfs.yaml b/qa/fs/xfs.yaml new file mode 100644 index 00000000000..0d88e107df4 --- /dev/null +++ b/qa/fs/xfs.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + fs: xfs + conf: + osd: + osd sloppy crc: true \ No newline at end of file diff --git a/qa/machine_types/schedule_rados.sh b/qa/machine_types/schedule_rados.sh new file mode 100755 index 00000000000..befb7acd6f7 --- /dev/null +++ b/qa/machine_types/schedule_rados.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# $1 - part +# $2 - branch name +# $3 - machine name + +teuthology-suite -v -c $2 -m $3 -k distro -s rados --subset $(echo "(($(date +%U) % 2) * 7) + $1" | bc)/14 diff --git a/qa/machine_types/vps.yaml b/qa/machine_types/vps.yaml new file mode 100644 index 00000000000..bffa0985da8 --- /dev/null +++ b/qa/machine_types/vps.yaml @@ -0,0 +1,16 @@ +overrides: + ceph: + conf: + global: + osd heartbeat grace: 100 + # this line to address issue #1017 + mon lease: 15 + mon lease ack timeout: 25 + rgw: + default_idle_timeout: 1200 + s3tests: + idle_timeout: 1200 + ceph-fuse: + client.0: + mount_wait: 60 + mount_timeout: 120 diff --git a/qa/overrides/2-size-1-min-size.yaml b/qa/overrides/2-size-1-min-size.yaml new file mode 100644 index 00000000000..d710aee22cf --- /dev/null +++ b/qa/overrides/2-size-1-min-size.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_pool_default_size: 2 + osd_pool_default_min_size: 1 diff --git a/qa/overrides/2-size-2-min-size.yaml b/qa/overrides/2-size-2-min-size.yaml new file mode 100644 index 00000000000..42b854eb435 --- /dev/null +++ b/qa/overrides/2-size-2-min-size.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_pool_default_size: 2 + osd_pool_default_min_size: 2 diff --git a/qa/overrides/3-size-2-min-size.yaml b/qa/overrides/3-size-2-min-size.yaml new file mode 100644 index 00000000000..3f2387e6060 --- /dev/null +++ b/qa/overrides/3-size-2-min-size.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_pool_default_size: 3 + osd_pool_default_min_size: 2 diff --git a/qa/overrides/short_pg_log.yaml b/qa/overrides/short_pg_log.yaml new file mode 100644 index 00000000000..0feecd2da6c --- /dev/null +++ b/qa/overrides/short_pg_log.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_min_pg_log_entries: 100 + osd_max_pg_log_entries: 200 diff --git a/qa/overrides/whitelist_wrongly_marked_down.yaml b/qa/overrides/whitelist_wrongly_marked_down.yaml new file mode 100644 index 00000000000..5cf329fa0e1 --- /dev/null +++ b/qa/overrides/whitelist_wrongly_marked_down.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + conf: + mds: + debug mds: 20 + debug ms: 1 + client: + debug client: 10 \ No newline at end of file diff --git a/qa/rgw_pool_type/ec-cache.yaml b/qa/rgw_pool_type/ec-cache.yaml new file mode 100644 index 00000000000..6462fbe8862 --- /dev/null +++ b/qa/rgw_pool_type/ec-cache.yaml @@ -0,0 +1,6 @@ +overrides: + rgw: + ec-data-pool: true + cache-pools: true + s3tests: + slow_backend: true diff --git a/qa/rgw_pool_type/ec-profile.yaml b/qa/rgw_pool_type/ec-profile.yaml new file mode 100644 index 00000000000..52798f85e72 --- /dev/null +++ b/qa/rgw_pool_type/ec-profile.yaml @@ -0,0 +1,10 @@ +overrides: + rgw: + ec-data-pool: true + erasure_code_profile: + name: testprofile + k: 3 + m: 1 + ruleset-failure-domain: osd + s3tests: + slow_backend: true diff --git a/qa/rgw_pool_type/ec.yaml b/qa/rgw_pool_type/ec.yaml new file mode 100644 index 00000000000..7c99b7f85c8 --- /dev/null +++ b/qa/rgw_pool_type/ec.yaml @@ -0,0 +1,5 @@ +overrides: + rgw: + ec-data-pool: true + s3tests: + slow_backend: true diff --git a/qa/rgw_pool_type/replicated.yaml b/qa/rgw_pool_type/replicated.yaml new file mode 100644 index 00000000000..c91709eaae7 --- /dev/null +++ b/qa/rgw_pool_type/replicated.yaml @@ -0,0 +1,3 @@ +overrides: + rgw: + ec-data-pool: false diff --git a/qa/suites/big/rados-thrash/% b/qa/suites/big/rados-thrash/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/big/rados-thrash/ceph/ceph.yaml b/qa/suites/big/rados-thrash/ceph/ceph.yaml new file mode 100644 index 00000000000..2030acb9083 --- /dev/null +++ b/qa/suites/big/rados-thrash/ceph/ceph.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/big/rados-thrash/clusters/big.yaml b/qa/suites/big/rados-thrash/clusters/big.yaml new file mode 100644 index 00000000000..18197ad8571 --- /dev/null +++ b/qa/suites/big/rados-thrash/clusters/big.yaml @@ -0,0 +1,68 @@ +roles: +- [osd.0, osd.1, osd.2, client.0, mon.a] +- [osd.3, osd.4, osd.5, client.1, mon.b] +- [osd.6, osd.7, osd.8, client.2, mon.c] +- [osd.9, osd.10, osd.11, client.3, mon.d] +- [osd.12, osd.13, osd.14, client.4, mon.e] +- [osd.15, osd.16, osd.17, client.5] +- [osd.18, osd.19, osd.20, client.6] +- [osd.21, osd.22, osd.23, client.7] +- [osd.24, osd.25, osd.26, client.8] +- [osd.27, osd.28, osd.29, client.9] +- [osd.30, osd.31, osd.32, client.10] +- [osd.33, osd.34, osd.35, client.11] +- [osd.36, osd.37, osd.38, client.12] +- [osd.39, osd.40, osd.41, client.13] +- [osd.42, osd.43, osd.44, client.14] +- [osd.45, osd.46, osd.47, client.15] +- [osd.48, osd.49, osd.50, client.16] +- [osd.51, osd.52, osd.53, client.17] +- [osd.54, osd.55, osd.56, client.18] +- [osd.57, osd.58, osd.59, client.19] +- [osd.60, osd.61, osd.62, client.20] +- [osd.63, osd.64, osd.65, client.21] +- [osd.66, osd.67, osd.68, client.22] +- [osd.69, osd.70, osd.71, client.23] +- [osd.72, osd.73, osd.74, client.24] +- [osd.75, osd.76, osd.77, client.25] +- [osd.78, osd.79, osd.80, client.26] +- [osd.81, osd.82, osd.83, client.27] +- [osd.84, osd.85, osd.86, client.28] +- [osd.87, osd.88, osd.89, client.29] +- [osd.90, osd.91, osd.92, client.30] +- [osd.93, osd.94, osd.95, client.31] +- [osd.96, osd.97, osd.98, client.32] +- [osd.99, osd.100, osd.101, client.33] +- [osd.102, osd.103, osd.104, client.34] +- [osd.105, osd.106, osd.107, client.35] +- [osd.108, osd.109, osd.110, client.36] +- [osd.111, osd.112, osd.113, client.37] +- [osd.114, osd.115, osd.116, client.38] +- [osd.117, osd.118, osd.119, client.39] +- [osd.120, osd.121, osd.122, client.40] +- [osd.123, osd.124, osd.125, client.41] +- [osd.126, osd.127, osd.128, client.42] +- [osd.129, osd.130, osd.131, client.43] +- [osd.132, osd.133, osd.134, client.44] +- [osd.135, osd.136, osd.137, client.45] +- [osd.138, osd.139, osd.140, client.46] +- [osd.141, osd.142, osd.143, client.47] +- [osd.144, osd.145, osd.146, client.48] +- [osd.147, osd.148, osd.149, client.49] +- [osd.150, osd.151, osd.152, client.50] +#- [osd.153, osd.154, osd.155, client.51] +#- [osd.156, osd.157, osd.158, client.52] +#- [osd.159, osd.160, osd.161, client.53] +#- [osd.162, osd.163, osd.164, client.54] +#- [osd.165, osd.166, osd.167, client.55] +#- [osd.168, osd.169, osd.170, client.56] +#- [osd.171, osd.172, osd.173, client.57] +#- [osd.174, osd.175, osd.176, client.58] +#- [osd.177, osd.178, osd.179, client.59] +#- [osd.180, osd.181, osd.182, client.60] +#- [osd.183, osd.184, osd.185, client.61] +#- [osd.186, osd.187, osd.188, client.62] +#- [osd.189, osd.190, osd.191, client.63] +#- [osd.192, osd.193, osd.194, client.64] +#- [osd.195, osd.196, osd.197, client.65] +#- [osd.198, osd.199, osd.200, client.66] diff --git a/qa/suites/big/rados-thrash/clusters/medium.yaml b/qa/suites/big/rados-thrash/clusters/medium.yaml new file mode 100644 index 00000000000..48b66dd5ca3 --- /dev/null +++ b/qa/suites/big/rados-thrash/clusters/medium.yaml @@ -0,0 +1,22 @@ +roles: +- [osd.0, osd.1, osd.2, client.0, mon.a] +- [osd.3, osd.4, osd.5, client.1, mon.b] +- [osd.6, osd.7, osd.8, client.2, mon.c] +- [osd.9, osd.10, osd.11, client.3, mon.d] +- [osd.12, osd.13, osd.14, client.4, mon.e] +- [osd.15, osd.16, osd.17, client.5] +- [osd.18, osd.19, osd.20, client.6] +- [osd.21, osd.22, osd.23, client.7] +- [osd.24, osd.25, osd.26, client.8] +- [osd.27, osd.28, osd.29, client.9] +- [osd.30, osd.31, osd.32, client.10] +- [osd.33, osd.34, osd.35, client.11] +- [osd.36, osd.37, osd.38, client.12] +- [osd.39, osd.40, osd.41, client.13] +- [osd.42, osd.43, osd.44, client.14] +- [osd.45, osd.46, osd.47, client.15] +- [osd.48, osd.49, osd.50, client.16] +- [osd.51, osd.52, osd.53, client.17] +- [osd.54, osd.55, osd.56, client.18] +- [osd.57, osd.58, osd.59, client.19] +- [osd.60, osd.61, osd.62, client.20] diff --git a/qa/suites/big/rados-thrash/clusters/small.yaml b/qa/suites/big/rados-thrash/clusters/small.yaml new file mode 100644 index 00000000000..b5a79906c69 --- /dev/null +++ b/qa/suites/big/rados-thrash/clusters/small.yaml @@ -0,0 +1,6 @@ +roles: +- [osd.0, osd.1, osd.2, client.0, mon.a] +- [osd.3, osd.4, osd.5, client.1, mon.b] +- [osd.6, osd.7, osd.8, client.2, mon.c] +- [osd.9, osd.10, osd.11, client.3, mon.d] +- [osd.12, osd.13, osd.14, client.4, mon.e] diff --git a/qa/suites/big/rados-thrash/fs/btrfs.yaml b/qa/suites/big/rados-thrash/fs/btrfs.yaml new file mode 100644 index 00000000000..0b3f6fac7a5 --- /dev/null +++ b/qa/suites/big/rados-thrash/fs/btrfs.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + fs: btrfs + conf: + osd: + osd sloppy crc: true + osd op thread timeout: 60 diff --git a/qa/suites/big/rados-thrash/fs/xfs.yaml b/qa/suites/big/rados-thrash/fs/xfs.yaml new file mode 100644 index 00000000000..b4a82911a2f --- /dev/null +++ b/qa/suites/big/rados-thrash/fs/xfs.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + fs: xfs + conf: + osd: + osd sloppy crc: true diff --git a/qa/suites/big/rados-thrash/thrashers/default.yaml b/qa/suites/big/rados-thrash/thrashers/default.yaml new file mode 100644 index 00000000000..d67ff20a693 --- /dev/null +++ b/qa/suites/big/rados-thrash/thrashers/default.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 diff --git a/qa/suites/big/rados-thrash/workloads/snaps-few-objects.yaml b/qa/suites/big/rados-thrash/workloads/snaps-few-objects.yaml new file mode 100644 index 00000000000..b73bb6781dc --- /dev/null +++ b/qa/suites/big/rados-thrash/workloads/snaps-few-objects.yaml @@ -0,0 +1,13 @@ +tasks: +- rados: + ops: 4000 + max_seconds: 3600 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/buildpackages/any/% b/qa/suites/buildpackages/any/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/buildpackages/any/distros b/qa/suites/buildpackages/any/distros new file mode 120000 index 00000000000..1ce8f29bf14 --- /dev/null +++ b/qa/suites/buildpackages/any/distros @@ -0,0 +1 @@ +../../../distros/all \ No newline at end of file diff --git a/qa/suites/buildpackages/any/tasks/release.yaml b/qa/suites/buildpackages/any/tasks/release.yaml new file mode 100644 index 00000000000..d7a3b62c8cd --- /dev/null +++ b/qa/suites/buildpackages/any/tasks/release.yaml @@ -0,0 +1,8 @@ +# --suite buildpackages/any --ceph v10.0.1 --filter centos_7,ubuntu_14.04 +roles: + - [client.0] +tasks: + - install: + - exec: + client.0: + - ceph --version | grep 'version ' diff --git a/qa/suites/buildpackages/tests/% b/qa/suites/buildpackages/tests/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/buildpackages/tests/distros b/qa/suites/buildpackages/tests/distros new file mode 120000 index 00000000000..1ce8f29bf14 --- /dev/null +++ b/qa/suites/buildpackages/tests/distros @@ -0,0 +1 @@ +../../../distros/all \ No newline at end of file diff --git a/qa/suites/buildpackages/tests/tasks/release.yaml b/qa/suites/buildpackages/tests/tasks/release.yaml new file mode 100644 index 00000000000..05e87789d71 --- /dev/null +++ b/qa/suites/buildpackages/tests/tasks/release.yaml @@ -0,0 +1,20 @@ +# --suite buildpackages/tests --ceph v10.0.1 --filter centos_7.2,ubuntu_14.04 +overrides: + ansible.cephlab: + playbook: users.yml + buildpackages: + good_machine: + disk: 20 # GB + ram: 2000 # MB + cpus: 2 + min_machine: + disk: 10 # GB + ram: 1000 # MB + cpus: 1 +roles: + - [client.0] +tasks: + - install: + - exec: + client.0: + - ceph --version | grep 'version ' diff --git a/qa/suites/calamari/% b/qa/suites/calamari/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/calamari/clusters/osd-3.yaml b/qa/suites/calamari/clusters/osd-3.yaml new file mode 100644 index 00000000000..66f4fe59850 --- /dev/null +++ b/qa/suites/calamari/clusters/osd-3.yaml @@ -0,0 +1,5 @@ +roles: +- [client.0] +- [mon.0, osd.0] +- [osd.1] +- [osd.2] diff --git a/qa/suites/calamari/distros/centos6.4.yaml b/qa/suites/calamari/distros/centos6.4.yaml new file mode 100644 index 00000000000..2240054be17 --- /dev/null +++ b/qa/suites/calamari/distros/centos6.4.yaml @@ -0,0 +1,2 @@ +os_type: centos +os_version: '6.4' diff --git a/qa/suites/calamari/distros/centos6.5.yaml b/qa/suites/calamari/distros/centos6.5.yaml new file mode 100644 index 00000000000..e2ee6b36f2e --- /dev/null +++ b/qa/suites/calamari/distros/centos6.5.yaml @@ -0,0 +1,2 @@ +os_type: centos +os_version: '6.5' diff --git a/qa/suites/calamari/distros/precise.yaml b/qa/suites/calamari/distros/precise.yaml new file mode 100644 index 00000000000..7aaa31b660c --- /dev/null +++ b/qa/suites/calamari/distros/precise.yaml @@ -0,0 +1,2 @@ +os_type: ubuntu +os_version: precise diff --git a/qa/suites/calamari/distros/rhel6.4.yaml b/qa/suites/calamari/distros/rhel6.4.yaml new file mode 100644 index 00000000000..72dd4d13109 --- /dev/null +++ b/qa/suites/calamari/distros/rhel6.4.yaml @@ -0,0 +1,2 @@ +os_type: rhel +os_version: '6.4' diff --git a/qa/suites/calamari/distros/rhel6.5.yaml b/qa/suites/calamari/distros/rhel6.5.yaml new file mode 100644 index 00000000000..4294d98d6f3 --- /dev/null +++ b/qa/suites/calamari/distros/rhel6.5.yaml @@ -0,0 +1,2 @@ +os_type: rhel +os_version: '6.5' diff --git a/qa/suites/calamari/distros/rhel7.0.yaml b/qa/suites/calamari/distros/rhel7.0.yaml new file mode 100644 index 00000000000..1571f9477ad --- /dev/null +++ b/qa/suites/calamari/distros/rhel7.0.yaml @@ -0,0 +1,2 @@ +os_type: rhel +os_version: '7.0' diff --git a/qa/suites/calamari/distros/trusty.yaml b/qa/suites/calamari/distros/trusty.yaml new file mode 100644 index 00000000000..cef9fd0037e --- /dev/null +++ b/qa/suites/calamari/distros/trusty.yaml @@ -0,0 +1,2 @@ +os_type: ubuntu +os_version: trusty diff --git a/qa/suites/calamari/distros/wheezy.yaml.disabled b/qa/suites/calamari/distros/wheezy.yaml.disabled new file mode 100644 index 00000000000..47c54de4649 --- /dev/null +++ b/qa/suites/calamari/distros/wheezy.yaml.disabled @@ -0,0 +1,2 @@ +os_type: debian +os_version: '7.0' diff --git a/qa/suites/calamari/tasks/calamari.yaml b/qa/suites/calamari/tasks/calamari.yaml new file mode 100644 index 00000000000..70e1129a032 --- /dev/null +++ b/qa/suites/calamari/tasks/calamari.yaml @@ -0,0 +1,10 @@ +machine_type: vps + +tasks: +- ssh_keys: +- calamari_setup: + iceball_location: http://download.inktank.com/enterprise-testing + ice_version: 1.2.2 + email: calamari@inktank.com +- calamari_nosetests: + calamari_branch: wip-testing-1.2.2 diff --git a/qa/suites/ceph-deploy/basic/% b/qa/suites/ceph-deploy/basic/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/ceph-deploy/basic/ceph-deploy-overrides b/qa/suites/ceph-deploy/basic/ceph-deploy-overrides new file mode 120000 index 00000000000..3954c75a789 --- /dev/null +++ b/qa/suites/ceph-deploy/basic/ceph-deploy-overrides @@ -0,0 +1 @@ +../../../ceph-deploy-overrides \ No newline at end of file diff --git a/qa/suites/ceph-deploy/basic/config_options b/qa/suites/ceph-deploy/basic/config_options new file mode 120000 index 00000000000..50bcdbe600e --- /dev/null +++ b/qa/suites/ceph-deploy/basic/config_options @@ -0,0 +1 @@ +../../../config_options \ No newline at end of file diff --git a/qa/suites/ceph-deploy/basic/distros b/qa/suites/ceph-deploy/basic/distros new file mode 120000 index 00000000000..c5d59352cb5 --- /dev/null +++ b/qa/suites/ceph-deploy/basic/distros @@ -0,0 +1 @@ +../../../distros/supported \ No newline at end of file diff --git a/qa/suites/ceph-deploy/basic/tasks/ceph-deploy_hello_world.yaml b/qa/suites/ceph-deploy/basic/tasks/ceph-deploy_hello_world.yaml new file mode 100644 index 00000000000..1e090fad379 --- /dev/null +++ b/qa/suites/ceph-deploy/basic/tasks/ceph-deploy_hello_world.yaml @@ -0,0 +1,37 @@ +overrides: + ceph-deploy: + conf: + global: + debug ms: 1 + osd: + debug osd: 10 + mon: + debug mon: 10 +roles: +- - mon.a + - mds.0 + - osd.0 +- - osd.1 + - mon.b + - client.0 +openstack: + - machine: + disk: 10 # GB + ram: 2000 # MB + cpus: 1 + volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: + extras: yes +- print: "**** done install extras" +- ssh_keys: +- print: "**** done ssh_keys" +- ceph-deploy: +- print: "**** done ceph-deploy" +- workunit: + clients: + client.0: + - ceph-deploy/ceph-deploy_hello_world.sh +- print: "**** done ceph-deploy/ceph-deploy_hello_world.sh" diff --git a/qa/suites/dummy/% b/qa/suites/dummy/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/dummy/all/nop.yaml b/qa/suites/dummy/all/nop.yaml new file mode 100644 index 00000000000..cef190df58e --- /dev/null +++ b/qa/suites/dummy/all/nop.yaml @@ -0,0 +1,9 @@ +overrides: + ansible.cephlab: + playbook: users.yml +roles: + - [mon.a, mds.a, osd.0, osd.1, client.0] + +tasks: + - nop: + diff --git a/qa/suites/experimental/multimds/% b/qa/suites/experimental/multimds/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/experimental/multimds/clusters/7-multimds.yaml b/qa/suites/experimental/multimds/clusters/7-multimds.yaml new file mode 100644 index 00000000000..17cfd7b3d79 --- /dev/null +++ b/qa/suites/experimental/multimds/clusters/7-multimds.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, mds.a, mds.a-s] +- [mon.b, mds.b, mds.b-s] +- [mon.c, mds.c, mds.c-s] +- [osd.0] +- [osd.1] +- [osd.2] +- [client.0] diff --git a/qa/suites/experimental/multimds/tasks/fsstress_thrash_subtrees.yaml b/qa/suites/experimental/multimds/tasks/fsstress_thrash_subtrees.yaml new file mode 100644 index 00000000000..bee01a83586 --- /dev/null +++ b/qa/suites/experimental/multimds/tasks/fsstress_thrash_subtrees.yaml @@ -0,0 +1,15 @@ +tasks: +- install: +- ceph: + conf: + mds: + mds thrash exports: 1 + mds debug subtrees: 1 + mds debug scatterstat: 1 + mds verify scatter: 1 +- ceph-fuse: +- workunit: + clients: + client.0: + - suites/fsstress.sh + diff --git a/qa/suites/fs/basic/% b/qa/suites/fs/basic/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/fs/basic/clusters/fixed-3-cephfs.yaml b/qa/suites/fs/basic/clusters/fixed-3-cephfs.yaml new file mode 120000 index 00000000000..a482e650421 --- /dev/null +++ b/qa/suites/fs/basic/clusters/fixed-3-cephfs.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/qa/suites/fs/basic/debug/mds_client.yaml b/qa/suites/fs/basic/debug/mds_client.yaml new file mode 120000 index 00000000000..335c1cafed7 --- /dev/null +++ b/qa/suites/fs/basic/debug/mds_client.yaml @@ -0,0 +1 @@ +../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/qa/suites/fs/basic/fs/btrfs.yaml b/qa/suites/fs/basic/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/fs/basic/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/fs/basic/inline/no.yaml b/qa/suites/fs/basic/inline/no.yaml new file mode 100644 index 00000000000..2030acb9083 --- /dev/null +++ b/qa/suites/fs/basic/inline/no.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/fs/basic/inline/yes.yaml b/qa/suites/fs/basic/inline/yes.yaml new file mode 100644 index 00000000000..72a285c590f --- /dev/null +++ b/qa/suites/fs/basic/inline/yes.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: +- exec: + client.0: + - ceph mds set inline_data true --yes-i-really-mean-it diff --git a/qa/suites/fs/basic/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/fs/basic/overrides/whitelist_wrongly_marked_down.yaml new file mode 120000 index 00000000000..08f746bf894 --- /dev/null +++ b/qa/suites/fs/basic/overrides/whitelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/qa/suites/fs/basic/tasks/cephfs_journal_tool.yaml b/qa/suites/fs/basic/tasks/cephfs_journal_tool.yaml new file mode 100644 index 00000000000..f24890857a8 --- /dev/null +++ b/qa/suites/fs/basic/tasks/cephfs_journal_tool.yaml @@ -0,0 +1,20 @@ + +tasks: +- ceph-fuse: +- workunit: + clients: + all: [fs/misc/trivial_sync.sh] +- ceph-fuse: + client.0: + mounted: false +- ceph.stop: [mds.*] +- workunit: + clients: + client.0: [suites/cephfs_journal_tool_smoke.sh] +- ceph.restart: [mds.*] +- ceph-fuse: + client.0: + mounted: true +- workunit: + clients: + all: [fs/misc/trivial_sync.sh] diff --git a/qa/suites/fs/basic/tasks/cephfs_scrub_tests.yaml b/qa/suites/fs/basic/tasks/cephfs_scrub_tests.yaml new file mode 100644 index 00000000000..b08dcdd0c0f --- /dev/null +++ b/qa/suites/fs/basic/tasks/cephfs_scrub_tests.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + conf: + mds: + mds log max segments: 1 + mds cache max size: 1000 +tasks: +- ceph-fuse: +- mds_scrub_checks: + mds_rank: 0 + path: /scrub/test/path + client: 0 + run_seq: 0 +- workunit: + clients: + client.0: [suites/pjd.sh] +- mds_scrub_checks: + mds_rank: 0 + path: /scrub/test/path + client: 0 + run_seq: 1 diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_kernel_untar_build.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_kernel_untar_build.yaml new file mode 100644 index 00000000000..0531b3a2578 --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_kernel_untar_build.yaml @@ -0,0 +1,14 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - kernel_untar_build.sh +openstack: + - machine: + disk: 100 # GB + ram: 8000 # MB + cpus: 1 + volumes: # attached to each instance + count: 3 + size: 30 # GB diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_misc.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_misc.yaml new file mode 100644 index 00000000000..6dfec976eec --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_misc.yaml @@ -0,0 +1,8 @@ +tasks: +- ceph-fuse: +- workunit: + timeout: 6h + clients: + all: + - fs/misc + diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_misc_test_o_trunc.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_misc_test_o_trunc.yaml new file mode 100644 index 00000000000..c9720a2fd48 --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_misc_test_o_trunc.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - fs/test_o_trunc.sh diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_quota.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_quota.yaml new file mode 100644 index 00000000000..a6d35ab8ece --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_quota.yaml @@ -0,0 +1,13 @@ +tasks: +- ceph-fuse: +- workunit: + timeout: 6h + clients: + all: + - fs/quota + +overrides: + ceph: + conf: + client: + client quota: true diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_suites_blogbench.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_blogbench.yaml new file mode 100644 index 00000000000..09898e16bda --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_blogbench.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/blogbench.sh diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_suites_dbench.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_dbench.yaml new file mode 100644 index 00000000000..ad96b4c5e7f --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_dbench.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_suites_ffsb.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_ffsb.yaml new file mode 100644 index 00000000000..86008160034 --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_ffsb.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + conf: + osd: + filestore flush min: 0 +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_fsstress.yaml new file mode 100644 index 00000000000..5908d951b2d --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_suites_fsx.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_fsx.yaml new file mode 100644 index 00000000000..3c11ed74fc7 --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_fsx.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_suites_fsync.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_fsync.yaml new file mode 100644 index 00000000000..c6043e209bd --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_fsync.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsync-tester.sh diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_suites_iogen.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_iogen.yaml new file mode 100644 index 00000000000..6989990e22a --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_iogen.yaml @@ -0,0 +1,7 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/iogen.sh + diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_suites_iozone.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_iozone.yaml new file mode 100644 index 00000000000..1e23f670e28 --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_iozone.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: [client.0] +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_suites_pjd.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_pjd.yaml new file mode 100644 index 00000000000..65bcd0d0333 --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_pjd.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + client: + debug ms: 1 + debug client: 20 + mds: + debug ms: 1 + debug mds: 20 +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_suites_truncate_delay.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_truncate_delay.yaml new file mode 100644 index 00000000000..911026e13bb --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_suites_truncate_delay.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + client: + ms_inject_delay_probability: 1 + ms_inject_delay_type: osd + ms_inject_delay_max: 5 + client_oc_max_dirty_age: 1 +tasks: +- ceph-fuse: +- exec: + client.0: + - cd $TESTDIR/mnt.* && dd if=/dev/zero of=./foo count=100 + - sleep 2 + - cd $TESTDIR/mnt.* && truncate --size 0 ./foo diff --git a/qa/suites/fs/basic/tasks/cfuse_workunit_trivial_sync.yaml b/qa/suites/fs/basic/tasks/cfuse_workunit_trivial_sync.yaml new file mode 100644 index 00000000000..9509650c76c --- /dev/null +++ b/qa/suites/fs/basic/tasks/cfuse_workunit_trivial_sync.yaml @@ -0,0 +1,5 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: [fs/misc/trivial_sync.sh] diff --git a/qa/suites/fs/basic/tasks/libcephfs_interface_tests.yaml b/qa/suites/fs/basic/tasks/libcephfs_interface_tests.yaml new file mode 100644 index 00000000000..0b1d41fea5c --- /dev/null +++ b/qa/suites/fs/basic/tasks/libcephfs_interface_tests.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + client.0: + - libcephfs/test.sh diff --git a/qa/suites/fs/basic/tasks/libcephfs_java.yaml b/qa/suites/fs/basic/tasks/libcephfs_java.yaml new file mode 100644 index 00000000000..4330d50965e --- /dev/null +++ b/qa/suites/fs/basic/tasks/libcephfs_java.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + client.0: + - libcephfs-java/test.sh diff --git a/qa/suites/fs/basic/tasks/mds_creation_retry.yaml b/qa/suites/fs/basic/tasks/mds_creation_retry.yaml new file mode 100644 index 00000000000..76ceeafa8e7 --- /dev/null +++ b/qa/suites/fs/basic/tasks/mds_creation_retry.yaml @@ -0,0 +1,7 @@ +tasks: +-mds_creation_failure: +-ceph-fuse: +- workunit: + clients: + all: [fs/misc/trivial_sync.sh] + diff --git a/qa/suites/fs/multiclient/% b/qa/suites/fs/multiclient/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/fs/multiclient/clusters/three_clients.yaml b/qa/suites/fs/multiclient/clusters/three_clients.yaml new file mode 100644 index 00000000000..fd2535fd4a0 --- /dev/null +++ b/qa/suites/fs/multiclient/clusters/three_clients.yaml @@ -0,0 +1,5 @@ +roles: +- [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2] +- [client.2] +- [client.1] +- [client.0] diff --git a/qa/suites/fs/multiclient/clusters/two_clients.yaml b/qa/suites/fs/multiclient/clusters/two_clients.yaml new file mode 100644 index 00000000000..2258befd8bf --- /dev/null +++ b/qa/suites/fs/multiclient/clusters/two_clients.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2] +- [client.1] +- [client.0] diff --git a/qa/suites/fs/multiclient/debug/mds_client.yaml b/qa/suites/fs/multiclient/debug/mds_client.yaml new file mode 120000 index 00000000000..335c1cafed7 --- /dev/null +++ b/qa/suites/fs/multiclient/debug/mds_client.yaml @@ -0,0 +1 @@ +../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/qa/suites/fs/multiclient/fs/btrfs.yaml b/qa/suites/fs/multiclient/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/fs/multiclient/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/fs/multiclient/mount/ceph-fuse.yaml b/qa/suites/fs/multiclient/mount/ceph-fuse.yaml new file mode 100644 index 00000000000..37ac5b69e61 --- /dev/null +++ b/qa/suites/fs/multiclient/mount/ceph-fuse.yaml @@ -0,0 +1,4 @@ +tasks: +- install: +- ceph: +- ceph-fuse: diff --git a/qa/suites/fs/multiclient/mount/kclient.yaml.disabled b/qa/suites/fs/multiclient/mount/kclient.yaml.disabled new file mode 100644 index 00000000000..04adb48b63f --- /dev/null +++ b/qa/suites/fs/multiclient/mount/kclient.yaml.disabled @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- install: +- ceph: +- kclient: diff --git a/qa/suites/fs/multiclient/tasks/fsx-mpi.yaml.disabled b/qa/suites/fs/multiclient/tasks/fsx-mpi.yaml.disabled new file mode 100644 index 00000000000..e486c44c51e --- /dev/null +++ b/qa/suites/fs/multiclient/tasks/fsx-mpi.yaml.disabled @@ -0,0 +1,20 @@ +# make sure we get the same MPI version on all hosts +os_type: ubuntu +os_version: "14.04" + +tasks: +- pexec: + clients: + - cd $TESTDIR + - wget http://ceph.com/qa/fsx-mpi.c + - mpicc fsx-mpi.c -o fsx-mpi + - rm fsx-mpi.c + - ln -s $TESTDIR/mnt.* $TESTDIR/gmnt +- ssh_keys: +- mpi: + exec: $TESTDIR/fsx-mpi 1MB -N 50000 -p 10000 -l 1048576 + workdir: $TESTDIR/gmnt +- pexec: + all: + - rm $TESTDIR/gmnt + - rm $TESTDIR/fsx-mpi diff --git a/qa/suites/fs/multiclient/tasks/ior-shared-file.yaml b/qa/suites/fs/multiclient/tasks/ior-shared-file.yaml new file mode 100644 index 00000000000..dcf24247a92 --- /dev/null +++ b/qa/suites/fs/multiclient/tasks/ior-shared-file.yaml @@ -0,0 +1,26 @@ +# make sure we get the same MPI version on all hosts +os_type: ubuntu +os_version: "14.04" + +tasks: +- pexec: + clients: + - cd $TESTDIR + - wget http://ceph.com/qa/ior.tbz2 + - tar xvfj ior.tbz2 + - cd ior + - ./configure + - make + - make install DESTDIR=$TESTDIR/binary/ + - cd $TESTDIR/ + - rm ior.tbz2 + - rm -r ior + - ln -s $TESTDIR/mnt.* $TESTDIR/gmnt +- ssh_keys: +- mpi: + exec: $TESTDIR/binary/usr/local/bin/ior -e -w -r -W -b 10m -a POSIX -o $TESTDIR/gmnt/ior.testfile +- pexec: + all: + - rm -f $TESTDIR/gmnt/ior.testfile + - rm -f $TESTDIR/gmnt + - rm -rf $TESTDIR/binary diff --git a/qa/suites/fs/multiclient/tasks/mdtest.yaml b/qa/suites/fs/multiclient/tasks/mdtest.yaml new file mode 100644 index 00000000000..1dd95d954fb --- /dev/null +++ b/qa/suites/fs/multiclient/tasks/mdtest.yaml @@ -0,0 +1,23 @@ +# make sure we get the same MPI version on all hosts +os_type: ubuntu +os_version: "14.04" + +tasks: +- pexec: + clients: + - cd $TESTDIR + - wget http://ceph.com/qa/mdtest-1.9.3.tgz + - mkdir mdtest-1.9.3 + - cd mdtest-1.9.3 + - tar xvfz $TESTDIR/mdtest-1.9.3.tgz + - rm $TESTDIR/mdtest-1.9.3.tgz + - MPI_CC=mpicc make + - ln -s $TESTDIR/mnt.* $TESTDIR/gmnt +- ssh_keys: +- mpi: + exec: $TESTDIR/mdtest-1.9.3/mdtest -d $TESTDIR/gmnt -I 20 -z 5 -b 2 -R +- pexec: + all: + - rm -f $TESTDIR/gmnt + - rm -rf $TESTDIR/mdtest-1.9.3 + - rm -rf $TESTDIR/._mdtest-1.9.3 \ No newline at end of file diff --git a/qa/suites/fs/recovery/% b/qa/suites/fs/recovery/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/fs/recovery/clusters/2-remote-clients.yaml b/qa/suites/fs/recovery/clusters/2-remote-clients.yaml new file mode 100644 index 00000000000..d8af6b6ae12 --- /dev/null +++ b/qa/suites/fs/recovery/clusters/2-remote-clients.yaml @@ -0,0 +1,3 @@ +roles: +- [mon.a, osd.0, mds.a] +- [client.0, client.1, osd.1, osd.2] diff --git a/qa/suites/fs/recovery/debug/mds_client.yaml b/qa/suites/fs/recovery/debug/mds_client.yaml new file mode 100644 index 00000000000..cf5995fdda4 --- /dev/null +++ b/qa/suites/fs/recovery/debug/mds_client.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + mds: + debug ms: 1 + debug mds: 20 + client: + debug ms: 1 + debug client: 20 diff --git a/qa/suites/fs/recovery/mounts/ceph-fuse.yaml b/qa/suites/fs/recovery/mounts/ceph-fuse.yaml new file mode 100644 index 00000000000..8092598f404 --- /dev/null +++ b/qa/suites/fs/recovery/mounts/ceph-fuse.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- ceph-fuse: + client.0: + mounted: false + client.1: + mounted: false diff --git a/qa/suites/fs/recovery/tasks/client-limits.yaml b/qa/suites/fs/recovery/tasks/client-limits.yaml new file mode 100644 index 00000000000..4cf874d5df8 --- /dev/null +++ b/qa/suites/fs/recovery/tasks/client-limits.yaml @@ -0,0 +1,8 @@ + +overrides: + ceph: + log-whitelist: + - responding to mclientcaps\(revoke\) + +tasks: +- mds_client_limits: diff --git a/qa/suites/fs/recovery/tasks/client-recovery.yaml b/qa/suites/fs/recovery/tasks/client-recovery.yaml new file mode 100644 index 00000000000..2cd39510f35 --- /dev/null +++ b/qa/suites/fs/recovery/tasks/client-recovery.yaml @@ -0,0 +1,11 @@ + +# The task interferes with the network, so we need +# to permit OSDs to complain about that. +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - slow request + +tasks: +- mds_client_recovery: diff --git a/qa/suites/fs/recovery/tasks/mds-flush.yaml b/qa/suites/fs/recovery/tasks/mds-flush.yaml new file mode 100644 index 00000000000..8e3021e0060 --- /dev/null +++ b/qa/suites/fs/recovery/tasks/mds-flush.yaml @@ -0,0 +1,3 @@ + +tasks: +- mds_flush: diff --git a/qa/suites/fs/recovery/tasks/mds-full.yaml b/qa/suites/fs/recovery/tasks/mds-full.yaml new file mode 100644 index 00000000000..fe52ae32043 --- /dev/null +++ b/qa/suites/fs/recovery/tasks/mds-full.yaml @@ -0,0 +1,13 @@ + +overrides: + ceph: + log-whitelist: + - OSD full dropping all updates + - OSD near full + conf: + osd: + osd objectstore: memstore + memstore device bytes: 100000000 + +tasks: + - mds_full: diff --git a/qa/suites/fs/snaps/% b/qa/suites/fs/snaps/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/fs/snaps/clusters/fixed-3-cephfs.yaml b/qa/suites/fs/snaps/clusters/fixed-3-cephfs.yaml new file mode 120000 index 00000000000..a482e650421 --- /dev/null +++ b/qa/suites/fs/snaps/clusters/fixed-3-cephfs.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/qa/suites/fs/snaps/fs/btrfs.yaml b/qa/suites/fs/snaps/fs/btrfs.yaml new file mode 100644 index 00000000000..4c7af311538 --- /dev/null +++ b/qa/suites/fs/snaps/fs/btrfs.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + fs: btrfs + conf: + osd: + osd op thread timeout: 60 diff --git a/qa/suites/fs/snaps/mount/ceph-fuse.yaml b/qa/suites/fs/snaps/mount/ceph-fuse.yaml new file mode 100644 index 00000000000..37ac5b69e61 --- /dev/null +++ b/qa/suites/fs/snaps/mount/ceph-fuse.yaml @@ -0,0 +1,4 @@ +tasks: +- install: +- ceph: +- ceph-fuse: diff --git a/qa/suites/fs/snaps/tasks/snaptests.yaml b/qa/suites/fs/snaps/tasks/snaptests.yaml new file mode 100644 index 00000000000..7f7b0f21569 --- /dev/null +++ b/qa/suites/fs/snaps/tasks/snaptests.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + clients: + all: + - snaps/snaptest-0.sh + - snaps/snaptest-1.sh + - snaps/snaptest-2.sh diff --git a/qa/suites/fs/standbyreplay/% b/qa/suites/fs/standbyreplay/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/fs/standbyreplay/clusters/standby-replay.yaml b/qa/suites/fs/standbyreplay/clusters/standby-replay.yaml new file mode 100644 index 00000000000..5d21765ca18 --- /dev/null +++ b/qa/suites/fs/standbyreplay/clusters/standby-replay.yaml @@ -0,0 +1,11 @@ + +overrides: + ceph: + conf: + mds: + mds standby replay: true + +roles: +- [mon.a, mds.a, mds.b-s-0, osd.0, osd.1] +- [mon.b, mds.c-s-0, mds.d-s-0, mon.c, osd.2, osd.3] +- [client.0] diff --git a/qa/suites/fs/standbyreplay/mount/fuse.yaml b/qa/suites/fs/standbyreplay/mount/fuse.yaml new file mode 100644 index 00000000000..5769caaaaac --- /dev/null +++ b/qa/suites/fs/standbyreplay/mount/fuse.yaml @@ -0,0 +1,5 @@ + +tasks: + - install: + - ceph: + - ceph_fuse: diff --git a/qa/suites/fs/standbyreplay/tasks/migration.yaml b/qa/suites/fs/standbyreplay/tasks/migration.yaml new file mode 100644 index 00000000000..09181ac1150 --- /dev/null +++ b/qa/suites/fs/standbyreplay/tasks/migration.yaml @@ -0,0 +1,7 @@ + +tasks: + - mds_journal_migration: + - workunit: + clients: + all: [fs/misc/trivial_sync.sh] + diff --git a/qa/suites/fs/thrash/% b/qa/suites/fs/thrash/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/fs/thrash/ceph-thrash/default.yaml b/qa/suites/fs/thrash/ceph-thrash/default.yaml new file mode 100644 index 00000000000..aefdf826ce7 --- /dev/null +++ b/qa/suites/fs/thrash/ceph-thrash/default.yaml @@ -0,0 +1,2 @@ +tasks: +- mds_thrash: diff --git a/qa/suites/fs/thrash/ceph/base.yaml b/qa/suites/fs/thrash/ceph/base.yaml new file mode 100644 index 00000000000..2030acb9083 --- /dev/null +++ b/qa/suites/fs/thrash/ceph/base.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/fs/thrash/clusters/mds-1active-1standby.yaml b/qa/suites/fs/thrash/clusters/mds-1active-1standby.yaml new file mode 100644 index 00000000000..7e951b95889 --- /dev/null +++ b/qa/suites/fs/thrash/clusters/mds-1active-1standby.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2] +- [mon.b, mds.a, osd.3, osd.4, osd.5] +- [client.0, mds.b-s-a] diff --git a/qa/suites/fs/thrash/debug/mds_client.yaml b/qa/suites/fs/thrash/debug/mds_client.yaml new file mode 120000 index 00000000000..335c1cafed7 --- /dev/null +++ b/qa/suites/fs/thrash/debug/mds_client.yaml @@ -0,0 +1 @@ +../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/qa/suites/fs/thrash/fs/btrfs.yaml b/qa/suites/fs/thrash/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/fs/thrash/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/fs/thrash/msgr-failures/none.yaml b/qa/suites/fs/thrash/msgr-failures/none.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml b/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml new file mode 100644 index 00000000000..adcebc0baac --- /dev/null +++ b/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + mds inject delay type: osd mds + ms inject delay probability: .005 + ms inject delay max: 1 diff --git a/qa/suites/fs/thrash/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/fs/thrash/overrides/whitelist_wrongly_marked_down.yaml new file mode 120000 index 00000000000..08f746bf894 --- /dev/null +++ b/qa/suites/fs/thrash/overrides/whitelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/qa/suites/fs/thrash/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/fs/thrash/tasks/cfuse_workunit_suites_fsstress.yaml new file mode 100644 index 00000000000..5908d951b2d --- /dev/null +++ b/qa/suites/fs/thrash/tasks/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/fs/thrash/tasks/cfuse_workunit_suites_pjd.yaml b/qa/suites/fs/thrash/tasks/cfuse_workunit_suites_pjd.yaml new file mode 100644 index 00000000000..930bf4a671d --- /dev/null +++ b/qa/suites/fs/thrash/tasks/cfuse_workunit_suites_pjd.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/fs/thrash/tasks/cfuse_workunit_trivial_sync.yaml b/qa/suites/fs/thrash/tasks/cfuse_workunit_trivial_sync.yaml new file mode 100644 index 00000000000..9509650c76c --- /dev/null +++ b/qa/suites/fs/thrash/tasks/cfuse_workunit_trivial_sync.yaml @@ -0,0 +1,5 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: [fs/misc/trivial_sync.sh] diff --git a/qa/suites/fs/traceless/% b/qa/suites/fs/traceless/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/fs/traceless/clusters/fixed-3-cephfs.yaml b/qa/suites/fs/traceless/clusters/fixed-3-cephfs.yaml new file mode 120000 index 00000000000..a482e650421 --- /dev/null +++ b/qa/suites/fs/traceless/clusters/fixed-3-cephfs.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/qa/suites/fs/traceless/debug/mds_client.yaml b/qa/suites/fs/traceless/debug/mds_client.yaml new file mode 120000 index 00000000000..335c1cafed7 --- /dev/null +++ b/qa/suites/fs/traceless/debug/mds_client.yaml @@ -0,0 +1 @@ +../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/qa/suites/fs/traceless/fs/btrfs.yaml b/qa/suites/fs/traceless/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/fs/traceless/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/fs/traceless/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/fs/traceless/overrides/whitelist_wrongly_marked_down.yaml new file mode 120000 index 00000000000..08f746bf894 --- /dev/null +++ b/qa/suites/fs/traceless/overrides/whitelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_blogbench.yaml b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_blogbench.yaml new file mode 100644 index 00000000000..ed9d92d5bda --- /dev/null +++ b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_blogbench.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- ceph-fuse: +- workunit: + clients: + all: + - suites/blogbench.sh diff --git a/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_dbench.yaml b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_dbench.yaml new file mode 100644 index 00000000000..e678ed47cc6 --- /dev/null +++ b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_dbench.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- ceph-fuse: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_ffsb.yaml b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_ffsb.yaml new file mode 100644 index 00000000000..652a3a62f59 --- /dev/null +++ b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_ffsb.yaml @@ -0,0 +1,11 @@ +tasks: +- install: +- ceph: + conf: + osd: + filestore flush min: 0 +- ceph-fuse: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_fsstress.yaml new file mode 100644 index 00000000000..b58487c0785 --- /dev/null +++ b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/fs/traceless/traceless/50pc.yaml b/qa/suites/fs/traceless/traceless/50pc.yaml new file mode 100644 index 00000000000..e0418bcb2be --- /dev/null +++ b/qa/suites/fs/traceless/traceless/50pc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mds: + mds inject traceless reply probability: .5 diff --git a/qa/suites/fs/verify/% b/qa/suites/fs/verify/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/fs/verify/clusters/fixed-3-cephfs.yaml b/qa/suites/fs/verify/clusters/fixed-3-cephfs.yaml new file mode 120000 index 00000000000..a482e650421 --- /dev/null +++ b/qa/suites/fs/verify/clusters/fixed-3-cephfs.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/qa/suites/fs/verify/debug/mds_client.yaml b/qa/suites/fs/verify/debug/mds_client.yaml new file mode 120000 index 00000000000..335c1cafed7 --- /dev/null +++ b/qa/suites/fs/verify/debug/mds_client.yaml @@ -0,0 +1 @@ +../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/qa/suites/fs/verify/fs/btrfs.yaml b/qa/suites/fs/verify/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/fs/verify/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/fs/verify/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/fs/verify/overrides/whitelist_wrongly_marked_down.yaml new file mode 120000 index 00000000000..08f746bf894 --- /dev/null +++ b/qa/suites/fs/verify/overrides/whitelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/qa/suites/fs/verify/tasks/cfuse_workunit_suites_dbench.yaml b/qa/suites/fs/verify/tasks/cfuse_workunit_suites_dbench.yaml new file mode 100644 index 00000000000..73319776f03 --- /dev/null +++ b/qa/suites/fs/verify/tasks/cfuse_workunit_suites_dbench.yaml @@ -0,0 +1,12 @@ +tasks: +- install: +- ceph: + conf: + client: + debug client: 1/20 + debug ms: 0/10 +- ceph-fuse: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/fs/verify/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/fs/verify/tasks/cfuse_workunit_suites_fsstress.yaml new file mode 100644 index 00000000000..b58487c0785 --- /dev/null +++ b/qa/suites/fs/verify/tasks/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/fs/verify/tasks/libcephfs_interface_tests.yaml b/qa/suites/fs/verify/tasks/libcephfs_interface_tests.yaml new file mode 100644 index 00000000000..22d1f142161 --- /dev/null +++ b/qa/suites/fs/verify/tasks/libcephfs_interface_tests.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- ceph-fuse: +- workunit: + clients: + client.0: + - libcephfs/test.sh diff --git a/qa/suites/fs/verify/validater/lockdep.yaml b/qa/suites/fs/verify/validater/lockdep.yaml new file mode 100644 index 00000000000..25f84355c0b --- /dev/null +++ b/qa/suites/fs/verify/validater/lockdep.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + lockdep: true diff --git a/qa/suites/fs/verify/validater/valgrind.yaml b/qa/suites/fs/verify/validater/valgrind.yaml new file mode 100644 index 00000000000..973f460ad47 --- /dev/null +++ b/qa/suites/fs/verify/validater/valgrind.yaml @@ -0,0 +1,15 @@ +overrides: + install: + ceph: + flavor: notcmalloc + ceph: + conf: + global: + osd heartbeat grace: 40 + valgrind: + mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] + osd: [--tool=memcheck] + mds: [--tool=memcheck] + ceph-fuse: + client.0: + valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] diff --git a/qa/suites/hadoop/basic/% b/qa/suites/hadoop/basic/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/hadoop/basic/clusters/fixed-3.yaml b/qa/suites/hadoop/basic/clusters/fixed-3.yaml new file mode 100644 index 00000000000..708d751178c --- /dev/null +++ b/qa/suites/hadoop/basic/clusters/fixed-3.yaml @@ -0,0 +1,5 @@ +roles: +- [mon.0, mds.0, osd.0, hadoop.master.0] +- [mon.1, osd.1, hadoop.slave.0] +- [mon.2, hadoop.slave.1, client.0] + diff --git a/qa/suites/hadoop/basic/tasks/repl.yaml b/qa/suites/hadoop/basic/tasks/repl.yaml new file mode 100644 index 00000000000..60cdcca327e --- /dev/null +++ b/qa/suites/hadoop/basic/tasks/repl.yaml @@ -0,0 +1,8 @@ +tasks: +- ssh_keys: +- install: +- ceph: +- hadoop: +- workunit: + clients: + client.0: [hadoop/repl.sh] diff --git a/qa/suites/hadoop/basic/tasks/wordcount.yaml b/qa/suites/hadoop/basic/tasks/wordcount.yaml new file mode 100644 index 00000000000..b84941b81ed --- /dev/null +++ b/qa/suites/hadoop/basic/tasks/wordcount.yaml @@ -0,0 +1,8 @@ +tasks: +- ssh_keys: +- install: +- ceph: +- hadoop: +- workunit: + clients: + client.0: [hadoop/wordcount.sh] diff --git a/qa/suites/kcephfs/cephfs/% b/qa/suites/kcephfs/cephfs/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/kcephfs/cephfs/clusters/fixed-3-cephfs.yaml b/qa/suites/kcephfs/cephfs/clusters/fixed-3-cephfs.yaml new file mode 120000 index 00000000000..a482e650421 --- /dev/null +++ b/qa/suites/kcephfs/cephfs/clusters/fixed-3-cephfs.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/qa/suites/kcephfs/cephfs/conf.yaml b/qa/suites/kcephfs/cephfs/conf.yaml new file mode 100644 index 00000000000..30da870b25d --- /dev/null +++ b/qa/suites/kcephfs/cephfs/conf.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false diff --git a/qa/suites/kcephfs/cephfs/fs/btrfs.yaml b/qa/suites/kcephfs/cephfs/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/kcephfs/cephfs/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/kcephfs/cephfs/inline/no.yaml b/qa/suites/kcephfs/cephfs/inline/no.yaml new file mode 100644 index 00000000000..2030acb9083 --- /dev/null +++ b/qa/suites/kcephfs/cephfs/inline/no.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/kcephfs/cephfs/inline/yes.yaml b/qa/suites/kcephfs/cephfs/inline/yes.yaml new file mode 100644 index 00000000000..72a285c590f --- /dev/null +++ b/qa/suites/kcephfs/cephfs/inline/yes.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: +- exec: + client.0: + - ceph mds set inline_data true --yes-i-really-mean-it diff --git a/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_direct_io.yaml b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_direct_io.yaml new file mode 100644 index 00000000000..cc4b32a441a --- /dev/null +++ b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_direct_io.yaml @@ -0,0 +1,7 @@ +tasks: +- kclient: +- workunit: + clients: + all: + - direct_io + diff --git a/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_kernel_untar_build.yaml b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_kernel_untar_build.yaml new file mode 100644 index 00000000000..84d15f66d6c --- /dev/null +++ b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_kernel_untar_build.yaml @@ -0,0 +1,6 @@ +tasks: +- kclient: +- workunit: + clients: + all: + - kernel_untar_build.sh diff --git a/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_misc.yaml b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_misc.yaml new file mode 100644 index 00000000000..e3f4fb17672 --- /dev/null +++ b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_misc.yaml @@ -0,0 +1,6 @@ +tasks: +- kclient: +- workunit: + clients: + all: + - fs/misc diff --git a/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_o_trunc.yaml b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_o_trunc.yaml new file mode 100644 index 00000000000..5219fc929ef --- /dev/null +++ b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_o_trunc.yaml @@ -0,0 +1,7 @@ +tasks: +- kclient: +- workunit: + clients: + all: + - fs/test_o_trunc.sh + diff --git a/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_dbench.yaml b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_dbench.yaml new file mode 100644 index 00000000000..8dd810a3765 --- /dev/null +++ b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_dbench.yaml @@ -0,0 +1,6 @@ +tasks: +- kclient: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_ffsb.yaml b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_ffsb.yaml new file mode 100644 index 00000000000..059ffe1ea4a --- /dev/null +++ b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_ffsb.yaml @@ -0,0 +1,6 @@ +tasks: +- kclient: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsstress.yaml b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsstress.yaml new file mode 100644 index 00000000000..bc49fc9086c --- /dev/null +++ b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsstress.yaml @@ -0,0 +1,6 @@ +tasks: +- kclient: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsx.yaml b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsx.yaml new file mode 100644 index 00000000000..38d9604fcac --- /dev/null +++ b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsx.yaml @@ -0,0 +1,6 @@ +tasks: +- kclient: +- workunit: + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsync.yaml b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsync.yaml new file mode 100644 index 00000000000..452641cfc9e --- /dev/null +++ b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsync.yaml @@ -0,0 +1,6 @@ +tasks: +- kclient: +- workunit: + clients: + all: + - suites/fsync-tester.sh diff --git a/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_iozone.yaml b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_iozone.yaml new file mode 100644 index 00000000000..832e0241b27 --- /dev/null +++ b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_iozone.yaml @@ -0,0 +1,6 @@ +tasks: +- kclient: +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_pjd.yaml b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_pjd.yaml new file mode 100644 index 00000000000..09abaeb6eec --- /dev/null +++ b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_pjd.yaml @@ -0,0 +1,6 @@ +tasks: +- kclient: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_trivial_sync.yaml b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_trivial_sync.yaml new file mode 100644 index 00000000000..d317a392983 --- /dev/null +++ b/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_trivial_sync.yaml @@ -0,0 +1,5 @@ +tasks: +- kclient: +- workunit: + clients: + all: [fs/misc/trivial_sync.sh] diff --git a/qa/suites/kcephfs/mixed-clients/% b/qa/suites/kcephfs/mixed-clients/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/kcephfs/mixed-clients/clusters/2-clients.yaml b/qa/suites/kcephfs/mixed-clients/clusters/2-clients.yaml new file mode 100644 index 00000000000..067ce1a8278 --- /dev/null +++ b/qa/suites/kcephfs/mixed-clients/clusters/2-clients.yaml @@ -0,0 +1,5 @@ +roles: +- [mon.a, mds.a, osd.0, osd.1] +- [mon.b, mon.c, osd.2, osd.3] +- [client.0] +- [client.1] diff --git a/qa/suites/kcephfs/mixed-clients/conf.yaml b/qa/suites/kcephfs/mixed-clients/conf.yaml new file mode 100644 index 00000000000..30da870b25d --- /dev/null +++ b/qa/suites/kcephfs/mixed-clients/conf.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false diff --git a/qa/suites/kcephfs/mixed-clients/fs/btrfs.yaml b/qa/suites/kcephfs/mixed-clients/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/kcephfs/mixed-clients/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/kcephfs/mixed-clients/tasks/kernel_cfuse_workunits_dbench_iozone.yaml b/qa/suites/kcephfs/mixed-clients/tasks/kernel_cfuse_workunits_dbench_iozone.yaml new file mode 100644 index 00000000000..0121a01c538 --- /dev/null +++ b/qa/suites/kcephfs/mixed-clients/tasks/kernel_cfuse_workunits_dbench_iozone.yaml @@ -0,0 +1,20 @@ +tasks: +- install: +- ceph: +- parallel: + - user-workload + - kclient-workload +user-workload: + sequential: + - ceph-fuse: [client.0] + - workunit: + clients: + client.0: + - suites/iozone.sh +kclient-workload: + sequential: + - kclient: [client.1] + - workunit: + clients: + client.1: + - suites/dbench.sh diff --git a/qa/suites/kcephfs/mixed-clients/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml b/qa/suites/kcephfs/mixed-clients/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml new file mode 100644 index 00000000000..7b0ce5b5d58 --- /dev/null +++ b/qa/suites/kcephfs/mixed-clients/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml @@ -0,0 +1,20 @@ +tasks: +- install: +- ceph: +- parallel: + - user-workload + - kclient-workload +user-workload: + sequential: + - ceph-fuse: [client.0] + - workunit: + clients: + client.0: + - suites/blogbench.sh +kclient-workload: + sequential: + - kclient: [client.1] + - workunit: + clients: + client.1: + - kernel_untar_build.sh diff --git a/qa/suites/kcephfs/thrash/% b/qa/suites/kcephfs/thrash/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/kcephfs/thrash/clusters/fixed-3-cephfs.yaml b/qa/suites/kcephfs/thrash/clusters/fixed-3-cephfs.yaml new file mode 120000 index 00000000000..a482e650421 --- /dev/null +++ b/qa/suites/kcephfs/thrash/clusters/fixed-3-cephfs.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/qa/suites/kcephfs/thrash/conf.yaml b/qa/suites/kcephfs/thrash/conf.yaml new file mode 100644 index 00000000000..30da870b25d --- /dev/null +++ b/qa/suites/kcephfs/thrash/conf.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false diff --git a/qa/suites/kcephfs/thrash/fs/btrfs.yaml b/qa/suites/kcephfs/thrash/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/kcephfs/thrash/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/kcephfs/thrash/thrashers/default.yaml b/qa/suites/kcephfs/thrash/thrashers/default.yaml new file mode 100644 index 00000000000..14d772583cf --- /dev/null +++ b/qa/suites/kcephfs/thrash/thrashers/default.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +- thrashosds: diff --git a/qa/suites/kcephfs/thrash/thrashers/mds.yaml b/qa/suites/kcephfs/thrash/thrashers/mds.yaml new file mode 100644 index 00000000000..cab4a01a5fd --- /dev/null +++ b/qa/suites/kcephfs/thrash/thrashers/mds.yaml @@ -0,0 +1,4 @@ +tasks: +- install: +- ceph: +- mds_thrash: diff --git a/qa/suites/kcephfs/thrash/thrashers/mon.yaml b/qa/suites/kcephfs/thrash/thrashers/mon.yaml new file mode 100644 index 00000000000..90612f21865 --- /dev/null +++ b/qa/suites/kcephfs/thrash/thrashers/mon.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: +- mon_thrash: + revive_delay: 20 + thrash_delay: 1 diff --git a/qa/suites/kcephfs/thrash/workloads/kclient_workunit_suites_ffsb.yaml b/qa/suites/kcephfs/thrash/workloads/kclient_workunit_suites_ffsb.yaml new file mode 100644 index 00000000000..0c4a1528d08 --- /dev/null +++ b/qa/suites/kcephfs/thrash/workloads/kclient_workunit_suites_ffsb.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + conf: + osd: + filestore flush min: 0 +tasks: +- kclient: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/kcephfs/thrash/workloads/kclient_workunit_suites_iozone.yaml b/qa/suites/kcephfs/thrash/workloads/kclient_workunit_suites_iozone.yaml new file mode 100644 index 00000000000..832e0241b27 --- /dev/null +++ b/qa/suites/kcephfs/thrash/workloads/kclient_workunit_suites_iozone.yaml @@ -0,0 +1,6 @@ +tasks: +- kclient: +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/knfs/basic/% b/qa/suites/knfs/basic/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/knfs/basic/ceph/base.yaml b/qa/suites/knfs/basic/ceph/base.yaml new file mode 100644 index 00000000000..7e80c462c37 --- /dev/null +++ b/qa/suites/knfs/basic/ceph/base.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false + +tasks: +- install: +- ceph: +- kclient: [client.0] +- knfsd: + client.0: + options: [rw,no_root_squash,async] diff --git a/qa/suites/knfs/basic/clusters/extra-client.yaml b/qa/suites/knfs/basic/clusters/extra-client.yaml new file mode 120000 index 00000000000..1582e308945 --- /dev/null +++ b/qa/suites/knfs/basic/clusters/extra-client.yaml @@ -0,0 +1 @@ +../../../../clusters/extra-client.yaml \ No newline at end of file diff --git a/qa/suites/knfs/basic/fs/btrfs.yaml b/qa/suites/knfs/basic/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/knfs/basic/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/knfs/basic/mount/v3.yaml b/qa/suites/knfs/basic/mount/v3.yaml new file mode 100644 index 00000000000..1b61119242b --- /dev/null +++ b/qa/suites/knfs/basic/mount/v3.yaml @@ -0,0 +1,5 @@ +tasks: +- nfs: + client.1: + server: client.0 + options: [rw,hard,intr,nfsvers=3] diff --git a/qa/suites/knfs/basic/mount/v4.yaml b/qa/suites/knfs/basic/mount/v4.yaml new file mode 100644 index 00000000000..88405666bfb --- /dev/null +++ b/qa/suites/knfs/basic/mount/v4.yaml @@ -0,0 +1,5 @@ +tasks: +- nfs: + client.1: + server: client.0 + options: [rw,hard,intr,nfsvers=4] diff --git a/qa/suites/knfs/basic/tasks/nfs-workunit-kernel-untar-build.yaml b/qa/suites/knfs/basic/tasks/nfs-workunit-kernel-untar-build.yaml new file mode 100644 index 00000000000..b9c0a5e05a3 --- /dev/null +++ b/qa/suites/knfs/basic/tasks/nfs-workunit-kernel-untar-build.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + timeout: 6h + clients: + client.1: + - kernel_untar_build.sh diff --git a/qa/suites/knfs/basic/tasks/nfs_workunit_misc.yaml b/qa/suites/knfs/basic/tasks/nfs_workunit_misc.yaml new file mode 100644 index 00000000000..135c4a74009 --- /dev/null +++ b/qa/suites/knfs/basic/tasks/nfs_workunit_misc.yaml @@ -0,0 +1,11 @@ +tasks: +- workunit: + clients: + client.1: + - fs/misc/chmod.sh + - fs/misc/i_complete_vs_rename.sh + - fs/misc/trivial_sync.sh + #- fs/misc/multiple_rsync.sh + #- fs/misc/xattrs.sh +# Once we can run multiple_rsync.sh and xattrs.sh we can change to this +# - misc diff --git a/qa/suites/knfs/basic/tasks/nfs_workunit_suites_blogbench.yaml b/qa/suites/knfs/basic/tasks/nfs_workunit_suites_blogbench.yaml new file mode 100644 index 00000000000..e554a3d9a06 --- /dev/null +++ b/qa/suites/knfs/basic/tasks/nfs_workunit_suites_blogbench.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.1: + - suites/blogbench.sh diff --git a/qa/suites/knfs/basic/tasks/nfs_workunit_suites_dbench.yaml b/qa/suites/knfs/basic/tasks/nfs_workunit_suites_dbench.yaml new file mode 100644 index 00000000000..1da1b768d02 --- /dev/null +++ b/qa/suites/knfs/basic/tasks/nfs_workunit_suites_dbench.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.1: + - suites/dbench-short.sh diff --git a/qa/suites/knfs/basic/tasks/nfs_workunit_suites_ffsb.yaml b/qa/suites/knfs/basic/tasks/nfs_workunit_suites_ffsb.yaml new file mode 100644 index 00000000000..3090f91ea43 --- /dev/null +++ b/qa/suites/knfs/basic/tasks/nfs_workunit_suites_ffsb.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + conf: + osd: + filestore flush min: 0 +tasks: +- workunit: + clients: + client.1: + - suites/ffsb.sh diff --git a/qa/suites/knfs/basic/tasks/nfs_workunit_suites_fsstress.yaml b/qa/suites/knfs/basic/tasks/nfs_workunit_suites_fsstress.yaml new file mode 100644 index 00000000000..bbe7b7a4045 --- /dev/null +++ b/qa/suites/knfs/basic/tasks/nfs_workunit_suites_fsstress.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.1: + - suites/fsstress.sh diff --git a/qa/suites/knfs/basic/tasks/nfs_workunit_suites_iozone.yaml b/qa/suites/knfs/basic/tasks/nfs_workunit_suites_iozone.yaml new file mode 100644 index 00000000000..7c3eec2ff3e --- /dev/null +++ b/qa/suites/knfs/basic/tasks/nfs_workunit_suites_iozone.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.1: + - suites/iozone.sh diff --git a/qa/suites/krbd/rbd-nomount/% b/qa/suites/krbd/rbd-nomount/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/krbd/rbd-nomount/clusters/fixed-3.yaml b/qa/suites/krbd/rbd-nomount/clusters/fixed-3.yaml new file mode 120000 index 00000000000..a3ac9fc4dec --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/clusters/fixed-3.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3.yaml \ No newline at end of file diff --git a/qa/suites/krbd/rbd-nomount/conf.yaml b/qa/suites/krbd/rbd-nomount/conf.yaml new file mode 100644 index 00000000000..30da870b25d --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/conf.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false diff --git a/qa/suites/krbd/rbd-nomount/fs/btrfs.yaml b/qa/suites/krbd/rbd-nomount/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/krbd/rbd-nomount/install/ceph.yaml b/qa/suites/krbd/rbd-nomount/install/ceph.yaml new file mode 100644 index 00000000000..2030acb9083 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/install/ceph.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml b/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml b/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml new file mode 100644 index 00000000000..86f8dde8a0e --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_concurrent.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_concurrent.yaml new file mode 100644 index 00000000000..675b98e73a5 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_concurrent.yaml @@ -0,0 +1,10 @@ +tasks: +- workunit: + clients: + all: + - rbd/concurrent.sh +# Options for rbd/concurrent.sh (default values shown) +# env: +# RBD_CONCURRENT_ITER: 100 +# RBD_CONCURRENT_COUNT: 5 +# RBD_CONCURRENT_DELAY: 5 diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_huge_tickets.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_huge_tickets.yaml new file mode 100644 index 00000000000..ea421eec16e --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_huge_tickets.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/huge-tickets.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_image_read.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_image_read.yaml new file mode 100644 index 00000000000..e5017e118d1 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_image_read.yaml @@ -0,0 +1,15 @@ +tasks: +- workunit: + clients: + all: + - rbd/image_read.sh +# Options for rbd/image_read.sh (default values shown) +# env: +# IMAGE_READ_LOCAL_FILES: 'false' +# IMAGE_READ_FORMAT: '2' +# IMAGE_READ_VERBOSE: 'true' +# IMAGE_READ_PAGE_SIZE: '4096' +# IMAGE_READ_OBJECT_ORDER: '22' +# IMAGE_READ_TEST_CLONES: 'true' +# IMAGE_READ_DOUBLE_ORDER: 'true' +# IMAGE_READ_HALF_ORDER: 'false' diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_kernel.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_kernel.yaml new file mode 100644 index 00000000000..aa155827c69 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_kernel.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/kernel.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_kfsx.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_kfsx.yaml new file mode 100644 index 00000000000..0f4b24aa64a --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_kfsx.yaml @@ -0,0 +1,11 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 10000 + krbd: true + readbdy: 512 + writebdy: 512 + truncbdy: 512 + holebdy: 512 + punch_holes: true + randomized_striping: false diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_map_snapshot_io.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_map_snapshot_io.yaml new file mode 100644 index 00000000000..c1529398b9e --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_map_snapshot_io.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/map-snapshot-io.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_map_unmap.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_map_unmap.yaml new file mode 100644 index 00000000000..c2160997c81 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_map_unmap.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/map-unmap.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_simple_big.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_simple_big.yaml new file mode 100644 index 00000000000..c493cfaf420 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_simple_big.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + clients: + all: + - rbd/simple_big.sh + diff --git a/qa/suites/krbd/rbd/% b/qa/suites/krbd/rbd/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/krbd/rbd/clusters/fixed-3.yaml b/qa/suites/krbd/rbd/clusters/fixed-3.yaml new file mode 120000 index 00000000000..a3ac9fc4dec --- /dev/null +++ b/qa/suites/krbd/rbd/clusters/fixed-3.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3.yaml \ No newline at end of file diff --git a/qa/suites/krbd/rbd/conf.yaml b/qa/suites/krbd/rbd/conf.yaml new file mode 100644 index 00000000000..30da870b25d --- /dev/null +++ b/qa/suites/krbd/rbd/conf.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false diff --git a/qa/suites/krbd/rbd/fs/btrfs.yaml b/qa/suites/krbd/rbd/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/krbd/rbd/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/krbd/rbd/msgr-failures/few.yaml b/qa/suites/krbd/rbd/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/krbd/rbd/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/krbd/rbd/msgr-failures/many.yaml b/qa/suites/krbd/rbd/msgr-failures/many.yaml new file mode 100644 index 00000000000..86f8dde8a0e --- /dev/null +++ b/qa/suites/krbd/rbd/msgr-failures/many.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_kernel_untar_build.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_kernel_untar_build.yaml new file mode 100644 index 00000000000..ef2a35dcc1d --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_kernel_untar_build.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- rbd: + all: +- workunit: + clients: + all: + - kernel_untar_build.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_dbench.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_dbench.yaml new file mode 100644 index 00000000000..d779eea23ca --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_dbench.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- rbd: + all: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_ffsb.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_ffsb.yaml new file mode 100644 index 00000000000..5204bb87ffe --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_ffsb.yaml @@ -0,0 +1,10 @@ +tasks: +- install: +- ceph: +- rbd: + all: + image_size: 20480 +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress.yaml new file mode 100644 index 00000000000..f9d62fefcac --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- rbd: + all: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_btrfs.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_btrfs.yaml new file mode 100644 index 00000000000..f3930a8986a --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_btrfs.yaml @@ -0,0 +1,10 @@ +tasks: +- install: +- ceph: +- rbd: + all: + fs_type: btrfs +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_ext4.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_ext4.yaml new file mode 100644 index 00000000000..f765b74a6c7 --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_ext4.yaml @@ -0,0 +1,10 @@ +tasks: +- install: +- ceph: +- rbd: + all: + fs_type: ext4 +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsx.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsx.yaml new file mode 100644 index 00000000000..98c0849c57e --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsx.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- rbd: + all: +- workunit: + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_iozone.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_iozone.yaml new file mode 100644 index 00000000000..eb8f18d60de --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_iozone.yaml @@ -0,0 +1,10 @@ +tasks: +- install: +- ceph: +- rbd: + all: + image_size: 20480 +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_trivial_sync.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_trivial_sync.yaml new file mode 100644 index 00000000000..7c2796b2a88 --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_trivial_sync.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- rbd: + all: +- workunit: + clients: + all: [fs/misc/trivial_sync.sh] diff --git a/qa/suites/krbd/singleton/% b/qa/suites/krbd/singleton/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/krbd/singleton/conf.yaml b/qa/suites/krbd/singleton/conf.yaml new file mode 100644 index 00000000000..30da870b25d --- /dev/null +++ b/qa/suites/krbd/singleton/conf.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false diff --git a/qa/suites/krbd/singleton/fs/btrfs.yaml b/qa/suites/krbd/singleton/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/krbd/singleton/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/krbd/singleton/msgr-failures/few.yaml b/qa/suites/krbd/singleton/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/krbd/singleton/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/krbd/singleton/msgr-failures/many.yaml b/qa/suites/krbd/singleton/msgr-failures/many.yaml new file mode 100644 index 00000000000..86f8dde8a0e --- /dev/null +++ b/qa/suites/krbd/singleton/msgr-failures/many.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 diff --git a/qa/suites/krbd/singleton/tasks/rbd_xfstests.yaml b/qa/suites/krbd/singleton/tasks/rbd_xfstests.yaml new file mode 100644 index 00000000000..47a8199c190 --- /dev/null +++ b/qa/suites/krbd/singleton/tasks/rbd_xfstests.yaml @@ -0,0 +1,19 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2] +- [mon.b, mds.a, osd.3, osd.4, osd.5] +- [client.0] +- [client.1] +tasks: +- install: +- ceph: +- rbd.xfstests: + client.0: + test_image: 'test_image-0' + scratch_image: 'scratch_image-0' + tests: '-g auto' + randomize: true + client.1: + test_image: 'test_image-1' + scratch_image: 'scratch_image-1' + tests: '-g auto' + randomize: true diff --git a/qa/suites/krbd/thrash/% b/qa/suites/krbd/thrash/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/krbd/thrash/clusters/fixed-3.yaml b/qa/suites/krbd/thrash/clusters/fixed-3.yaml new file mode 120000 index 00000000000..a3ac9fc4dec --- /dev/null +++ b/qa/suites/krbd/thrash/clusters/fixed-3.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3.yaml \ No newline at end of file diff --git a/qa/suites/krbd/thrash/conf.yaml b/qa/suites/krbd/thrash/conf.yaml new file mode 100644 index 00000000000..30da870b25d --- /dev/null +++ b/qa/suites/krbd/thrash/conf.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false diff --git a/qa/suites/krbd/thrash/fs/btrfs.yaml b/qa/suites/krbd/thrash/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/krbd/thrash/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/krbd/thrash/thrashers/default.yaml b/qa/suites/krbd/thrash/thrashers/default.yaml new file mode 100644 index 00000000000..14d772583cf --- /dev/null +++ b/qa/suites/krbd/thrash/thrashers/default.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +- thrashosds: diff --git a/qa/suites/krbd/thrash/thrashers/mon-thrasher.yaml b/qa/suites/krbd/thrash/thrashers/mon-thrasher.yaml new file mode 100644 index 00000000000..90612f21865 --- /dev/null +++ b/qa/suites/krbd/thrash/thrashers/mon-thrasher.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: +- mon_thrash: + revive_delay: 20 + thrash_delay: 1 diff --git a/qa/suites/krbd/thrash/workloads/rbd_workunit_suites_ffsb.yaml b/qa/suites/krbd/thrash/workloads/rbd_workunit_suites_ffsb.yaml new file mode 100644 index 00000000000..4ae7d690905 --- /dev/null +++ b/qa/suites/krbd/thrash/workloads/rbd_workunit_suites_ffsb.yaml @@ -0,0 +1,8 @@ +tasks: +- rbd: + all: + image_size: 20480 +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/krbd/thrash/workloads/rbd_workunit_suites_iozone.yaml.disabled b/qa/suites/krbd/thrash/workloads/rbd_workunit_suites_iozone.yaml.disabled new file mode 100644 index 00000000000..d61ede1bd66 --- /dev/null +++ b/qa/suites/krbd/thrash/workloads/rbd_workunit_suites_iozone.yaml.disabled @@ -0,0 +1,8 @@ +tasks: +- rbd: + all: + image_size: 20480 +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/marginal/basic/% b/qa/suites/marginal/basic/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/marginal/basic/clusters/fixed-3.yaml b/qa/suites/marginal/basic/clusters/fixed-3.yaml new file mode 100644 index 00000000000..0038432afa7 --- /dev/null +++ b/qa/suites/marginal/basic/clusters/fixed-3.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2] +- [mon.b, mds.a, osd.3, osd.4, osd.5] +- [client.0] diff --git a/qa/suites/marginal/basic/fs/btrfs.yaml b/qa/suites/marginal/basic/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/marginal/basic/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/marginal/basic/tasks/kclient_workunit_suites_blogbench.yaml b/qa/suites/marginal/basic/tasks/kclient_workunit_suites_blogbench.yaml new file mode 100644 index 00000000000..4f25d806313 --- /dev/null +++ b/qa/suites/marginal/basic/tasks/kclient_workunit_suites_blogbench.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- kclient: +- workunit: + clients: + all: + - suites/blogbench.sh diff --git a/qa/suites/marginal/basic/tasks/kclient_workunit_suites_fsx.yaml b/qa/suites/marginal/basic/tasks/kclient_workunit_suites_fsx.yaml new file mode 100644 index 00000000000..a0d2e765bdb --- /dev/null +++ b/qa/suites/marginal/basic/tasks/kclient_workunit_suites_fsx.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- kclient: +- workunit: + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/marginal/fs-misc/% b/qa/suites/marginal/fs-misc/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/marginal/fs-misc/clusters/two_clients.yaml b/qa/suites/marginal/fs-misc/clusters/two_clients.yaml new file mode 100644 index 00000000000..2258befd8bf --- /dev/null +++ b/qa/suites/marginal/fs-misc/clusters/two_clients.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2] +- [client.1] +- [client.0] diff --git a/qa/suites/marginal/fs-misc/fs/btrfs.yaml b/qa/suites/marginal/fs-misc/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/marginal/fs-misc/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/marginal/fs-misc/tasks/locktest.yaml b/qa/suites/marginal/fs-misc/tasks/locktest.yaml new file mode 100644 index 00000000000..444bb1f19b3 --- /dev/null +++ b/qa/suites/marginal/fs-misc/tasks/locktest.yaml @@ -0,0 +1,5 @@ +tasks: +- install: +- ceph: +- kclient: +- locktest: [client.0, client.1] diff --git a/qa/suites/marginal/mds_restart/% b/qa/suites/marginal/mds_restart/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/marginal/mds_restart/clusters/one_mds.yaml b/qa/suites/marginal/mds_restart/clusters/one_mds.yaml new file mode 100644 index 00000000000..9e11c02a36c --- /dev/null +++ b/qa/suites/marginal/mds_restart/clusters/one_mds.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.b, mon.c, osd.0, osd.1, osd.2] +- [mds.a] +- [client.0] diff --git a/qa/suites/marginal/mds_restart/tasks/restart-workunit-backtraces.yaml b/qa/suites/marginal/mds_restart/tasks/restart-workunit-backtraces.yaml new file mode 100644 index 00000000000..d086d4cf8d3 --- /dev/null +++ b/qa/suites/marginal/mds_restart/tasks/restart-workunit-backtraces.yaml @@ -0,0 +1,11 @@ +tasks: +- install: +- ceph: + conf: + mds: + mds log segment size: 16384 + mds log max segments: 1 +- restart: + exec: + client.0: + - test-backtraces.py diff --git a/qa/suites/marginal/multimds/% b/qa/suites/marginal/multimds/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/marginal/multimds/clusters/3-node-3-mds.yaml b/qa/suites/marginal/multimds/clusters/3-node-3-mds.yaml new file mode 100644 index 00000000000..088d9f0d31d --- /dev/null +++ b/qa/suites/marginal/multimds/clusters/3-node-3-mds.yaml @@ -0,0 +1,5 @@ +roles: +- [mon.a, mon.c, mds.a, osd.0, osd.1, osd.2] +- [mon.b, mds.b, mds.c, osd.3, osd.4, osd.5] +- [client.0] +- [client.1] diff --git a/qa/suites/marginal/multimds/clusters/3-node-9-mds.yaml b/qa/suites/marginal/multimds/clusters/3-node-9-mds.yaml new file mode 100644 index 00000000000..be824f0f554 --- /dev/null +++ b/qa/suites/marginal/multimds/clusters/3-node-9-mds.yaml @@ -0,0 +1,5 @@ +roles: +- [mon.a, mon.c, mds.a, mds.b, mds.c, mds.d, osd.0, osd.1, osd.2] +- [mon.b, mds.e, mds.f, mds.g, mds.h, mds.i, osd.3, osd.4, osd.5] +- [client.0] +- [client.1] diff --git a/qa/suites/marginal/multimds/fs/btrfs.yaml b/qa/suites/marginal/multimds/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/marginal/multimds/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/marginal/multimds/mounts/ceph-fuse.yaml b/qa/suites/marginal/multimds/mounts/ceph-fuse.yaml new file mode 100644 index 00000000000..55d8beb00e9 --- /dev/null +++ b/qa/suites/marginal/multimds/mounts/ceph-fuse.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + conf: + client: + fuse_default_permissions: 0 +- ceph-fuse: diff --git a/qa/suites/marginal/multimds/mounts/kclient.yaml b/qa/suites/marginal/multimds/mounts/kclient.yaml new file mode 100644 index 00000000000..c18db8f5ea6 --- /dev/null +++ b/qa/suites/marginal/multimds/mounts/kclient.yaml @@ -0,0 +1,4 @@ +tasks: +- install: +- ceph: +- kclient: diff --git a/qa/suites/marginal/multimds/tasks/workunit_misc.yaml b/qa/suites/marginal/multimds/tasks/workunit_misc.yaml new file mode 100644 index 00000000000..aa62b9e8c3a --- /dev/null +++ b/qa/suites/marginal/multimds/tasks/workunit_misc.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - fs/misc diff --git a/qa/suites/marginal/multimds/tasks/workunit_suites_blogbench.yaml b/qa/suites/marginal/multimds/tasks/workunit_suites_blogbench.yaml new file mode 100644 index 00000000000..4c1fcc11ed9 --- /dev/null +++ b/qa/suites/marginal/multimds/tasks/workunit_suites_blogbench.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/blogbench.sh diff --git a/qa/suites/marginal/multimds/tasks/workunit_suites_dbench.yaml b/qa/suites/marginal/multimds/tasks/workunit_suites_dbench.yaml new file mode 100644 index 00000000000..41b2bc8edaa --- /dev/null +++ b/qa/suites/marginal/multimds/tasks/workunit_suites_dbench.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/marginal/multimds/tasks/workunit_suites_fsstress.yaml b/qa/suites/marginal/multimds/tasks/workunit_suites_fsstress.yaml new file mode 100644 index 00000000000..ddb18fb791a --- /dev/null +++ b/qa/suites/marginal/multimds/tasks/workunit_suites_fsstress.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/marginal/multimds/tasks/workunit_suites_fsync.yaml b/qa/suites/marginal/multimds/tasks/workunit_suites_fsync.yaml new file mode 100644 index 00000000000..7efa1adb82d --- /dev/null +++ b/qa/suites/marginal/multimds/tasks/workunit_suites_fsync.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/fsync-tester.sh diff --git a/qa/suites/marginal/multimds/tasks/workunit_suites_pjd.yaml b/qa/suites/marginal/multimds/tasks/workunit_suites_pjd.yaml new file mode 100644 index 00000000000..dfb3abe23a7 --- /dev/null +++ b/qa/suites/marginal/multimds/tasks/workunit_suites_pjd.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + conf: + client: + fuse_default_permissions: 1 +tasks: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/marginal/multimds/tasks/workunit_suites_truncate_delay.yaml b/qa/suites/marginal/multimds/tasks/workunit_suites_truncate_delay.yaml new file mode 100644 index 00000000000..3aa5f8825ac --- /dev/null +++ b/qa/suites/marginal/multimds/tasks/workunit_suites_truncate_delay.yaml @@ -0,0 +1,15 @@ +tasks: +- install: +- ceph: + conf: + client: + ms_inject_delay_probability: 1 + ms_inject_delay_type: osd + ms_inject_delay_max: 5 + client_oc_max_dirty_age: 1 +- ceph-fuse: +- exec: + client.0: + - dd if=/dev/zero of=./foo count=100 + - sleep 2 + - truncate --size 0 ./foo diff --git a/qa/suites/marginal/multimds/thrash/exports.yaml b/qa/suites/marginal/multimds/thrash/exports.yaml new file mode 100644 index 00000000000..240b46dfd8a --- /dev/null +++ b/qa/suites/marginal/multimds/thrash/exports.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mds: + mds thrash exports: 1 diff --git a/qa/suites/marginal/multimds/thrash/normal.yaml b/qa/suites/marginal/multimds/thrash/normal.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/mixed-clients/basic/clusters/fixed-3.yaml b/qa/suites/mixed-clients/basic/clusters/fixed-3.yaml new file mode 100644 index 00000000000..e1d3c7b7932 --- /dev/null +++ b/qa/suites/mixed-clients/basic/clusters/fixed-3.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mds.a, osd.0, osd.1] +- [mon.b, mon.c, osd.2, osd.3, client.0] +- [client.1] diff --git a/qa/suites/mixed-clients/basic/fs/btrfs.yaml b/qa/suites/mixed-clients/basic/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/mixed-clients/basic/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_dbench_iozone.yaml b/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_dbench_iozone.yaml new file mode 100644 index 00000000000..bb347be7fd7 --- /dev/null +++ b/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_dbench_iozone.yaml @@ -0,0 +1,26 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- install: + branch: dumpling +- ceph: +- parallel: + - user-workload + - kclient-workload +user-workload: + sequential: + - ceph-fuse: [client.0] + - workunit: + clients: + client.0: + - suites/iozone.sh +kclient-workload: + sequential: + - kclient: [client.1] + - workunit: + clients: + client.1: + - suites/dbench.sh diff --git a/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml b/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml new file mode 100644 index 00000000000..2c32a61e864 --- /dev/null +++ b/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml @@ -0,0 +1,26 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- install: + branch: dumpling +- ceph: +- parallel: + - user-workload + - kclient-workload +user-workload: + sequential: + - ceph-fuse: [client.0] + - workunit: + clients: + client.0: + - suites/blogbench.sh +kclient-workload: + sequential: + - kclient: [client.1] + - workunit: + clients: + client.1: + - kernel_untar_build.sh diff --git a/qa/suites/multimds/basic/% b/qa/suites/multimds/basic/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/multimds/basic/ceph/base.yaml b/qa/suites/multimds/basic/ceph/base.yaml new file mode 100644 index 00000000000..50b60b5152b --- /dev/null +++ b/qa/suites/multimds/basic/ceph/base.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: + conf: + client: + fuse_default_permissions: 0 diff --git a/qa/suites/multimds/basic/clusters/3-mds.yaml b/qa/suites/multimds/basic/clusters/3-mds.yaml new file mode 100644 index 00000000000..c655b90c81c --- /dev/null +++ b/qa/suites/multimds/basic/clusters/3-mds.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.c, mds.a, osd.0, osd.1, osd.2] +- [mon.b, mds.b, mds.c, osd.3, osd.4, osd.5] +- [client.0] diff --git a/qa/suites/multimds/basic/clusters/9-mds.yaml b/qa/suites/multimds/basic/clusters/9-mds.yaml new file mode 100644 index 00000000000..ed554c9fe3c --- /dev/null +++ b/qa/suites/multimds/basic/clusters/9-mds.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.c, mds.a, mds.b, mds.c, mds.d, osd.0, osd.1, osd.2] +- [mon.b, mds.e, mds.f, mds.g, mds.h, mds.i, osd.3, osd.4, osd.5] +- [client.0] diff --git a/qa/suites/multimds/basic/debug/mds_client.yaml b/qa/suites/multimds/basic/debug/mds_client.yaml new file mode 120000 index 00000000000..335c1cafed7 --- /dev/null +++ b/qa/suites/multimds/basic/debug/mds_client.yaml @@ -0,0 +1 @@ +../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/qa/suites/multimds/basic/fs/btrfs.yaml b/qa/suites/multimds/basic/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/multimds/basic/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/multimds/basic/inline/no.yaml b/qa/suites/multimds/basic/inline/no.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/multimds/basic/inline/yes.yaml b/qa/suites/multimds/basic/inline/yes.yaml new file mode 100644 index 00000000000..4b2c1d9cf49 --- /dev/null +++ b/qa/suites/multimds/basic/inline/yes.yaml @@ -0,0 +1,4 @@ +tasks: +- exec: + client.0: + - ceph mds set inline_data true --yes-i-really-mean-it diff --git a/qa/suites/multimds/basic/mount/cfuse.yaml b/qa/suites/multimds/basic/mount/cfuse.yaml new file mode 100644 index 00000000000..e3c34a1f604 --- /dev/null +++ b/qa/suites/multimds/basic/mount/cfuse.yaml @@ -0,0 +1,2 @@ +tasks: +- ceph-fuse: diff --git a/qa/suites/multimds/basic/mount/kclient.yaml b/qa/suites/multimds/basic/mount/kclient.yaml new file mode 100644 index 00000000000..f00f16aea22 --- /dev/null +++ b/qa/suites/multimds/basic/mount/kclient.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- kclient: diff --git a/qa/suites/multimds/basic/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/multimds/basic/overrides/whitelist_wrongly_marked_down.yaml new file mode 120000 index 00000000000..08f746bf894 --- /dev/null +++ b/qa/suites/multimds/basic/overrides/whitelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/qa/suites/multimds/basic/tasks/kernel_untar_build.yaml b/qa/suites/multimds/basic/tasks/kernel_untar_build.yaml new file mode 100644 index 00000000000..8dbc24a9feb --- /dev/null +++ b/qa/suites/multimds/basic/tasks/kernel_untar_build.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + conf: + client: + fuse_default_permissions: 0 +tasks: +- workunit: + clients: + all: + - kernel_untar_build.sh diff --git a/qa/suites/multimds/basic/tasks/misc.yaml b/qa/suites/multimds/basic/tasks/misc.yaml new file mode 100644 index 00000000000..6c8327bb0d7 --- /dev/null +++ b/qa/suites/multimds/basic/tasks/misc.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + timeout: 5h + clients: + all: + - fs/misc diff --git a/qa/suites/multimds/basic/tasks/misc_test_o_trunc.yaml b/qa/suites/multimds/basic/tasks/misc_test_o_trunc.yaml new file mode 100644 index 00000000000..c9de5c38637 --- /dev/null +++ b/qa/suites/multimds/basic/tasks/misc_test_o_trunc.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - fs/test_o_trunc.sh diff --git a/qa/suites/multimds/basic/tasks/suites_blogbench.yaml b/qa/suites/multimds/basic/tasks/suites_blogbench.yaml new file mode 100644 index 00000000000..4c1fcc11ed9 --- /dev/null +++ b/qa/suites/multimds/basic/tasks/suites_blogbench.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/blogbench.sh diff --git a/qa/suites/multimds/basic/tasks/suites_dbench.yaml b/qa/suites/multimds/basic/tasks/suites_dbench.yaml new file mode 100644 index 00000000000..41b2bc8edaa --- /dev/null +++ b/qa/suites/multimds/basic/tasks/suites_dbench.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/multimds/basic/tasks/suites_ffsb.yaml b/qa/suites/multimds/basic/tasks/suites_ffsb.yaml new file mode 100644 index 00000000000..4a2a627fe5d --- /dev/null +++ b/qa/suites/multimds/basic/tasks/suites_ffsb.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + conf: + osd: + filestore flush min: 0 +tasks: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/multimds/basic/tasks/suites_fsstress.yaml b/qa/suites/multimds/basic/tasks/suites_fsstress.yaml new file mode 100644 index 00000000000..ddb18fb791a --- /dev/null +++ b/qa/suites/multimds/basic/tasks/suites_fsstress.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/multimds/basic/tasks/suites_fsx.yaml b/qa/suites/multimds/basic/tasks/suites_fsx.yaml new file mode 100644 index 00000000000..8b2b1ab5c14 --- /dev/null +++ b/qa/suites/multimds/basic/tasks/suites_fsx.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/multimds/basic/tasks/suites_fsync.yaml b/qa/suites/multimds/basic/tasks/suites_fsync.yaml new file mode 100644 index 00000000000..7efa1adb82d --- /dev/null +++ b/qa/suites/multimds/basic/tasks/suites_fsync.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/fsync-tester.sh diff --git a/qa/suites/multimds/basic/tasks/suites_iogen.yaml b/qa/suites/multimds/basic/tasks/suites_iogen.yaml new file mode 100644 index 00000000000..d45d4ea3c3f --- /dev/null +++ b/qa/suites/multimds/basic/tasks/suites_iogen.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/iogen.sh diff --git a/qa/suites/multimds/basic/tasks/suites_iozone.yaml b/qa/suites/multimds/basic/tasks/suites_iozone.yaml new file mode 100644 index 00000000000..9270f3c51e2 --- /dev/null +++ b/qa/suites/multimds/basic/tasks/suites_iozone.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/multimds/basic/tasks/suites_pjd.yaml b/qa/suites/multimds/basic/tasks/suites_pjd.yaml new file mode 100644 index 00000000000..de21f7c3464 --- /dev/null +++ b/qa/suites/multimds/basic/tasks/suites_pjd.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + client: + debug ms: 1 + debug client: 20 + mds: + debug ms: 1 + debug mds: 20 +tasks: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/multimds/basic/tasks/suites_truncate_delay.yaml b/qa/suites/multimds/basic/tasks/suites_truncate_delay.yaml new file mode 100644 index 00000000000..ac5c9b13901 --- /dev/null +++ b/qa/suites/multimds/basic/tasks/suites_truncate_delay.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + client: + ms_inject_delay_probability: 1 + ms_inject_delay_type: osd + ms_inject_delay_max: 5 + client_oc_max_dirty_age: 1 +tasks: +- exec: + client.0: + - dd if=/dev/zero of=./foo count=100 + - sleep 2 + - truncate --size 0 ./foo diff --git a/qa/suites/multimds/basic/tasks/trivial_sync.yaml b/qa/suites/multimds/basic/tasks/trivial_sync.yaml new file mode 100644 index 00000000000..36e7411b638 --- /dev/null +++ b/qa/suites/multimds/basic/tasks/trivial_sync.yaml @@ -0,0 +1,4 @@ +tasks: +- workunit: + clients: + all: [fs/misc/trivial_sync.sh] diff --git a/qa/suites/multimds/libcephfs/% b/qa/suites/multimds/libcephfs/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/multimds/libcephfs/ceph/base.yaml b/qa/suites/multimds/libcephfs/ceph/base.yaml new file mode 100644 index 00000000000..50b60b5152b --- /dev/null +++ b/qa/suites/multimds/libcephfs/ceph/base.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: + conf: + client: + fuse_default_permissions: 0 diff --git a/qa/suites/multimds/libcephfs/clusters/3-mds.yaml b/qa/suites/multimds/libcephfs/clusters/3-mds.yaml new file mode 100644 index 00000000000..c655b90c81c --- /dev/null +++ b/qa/suites/multimds/libcephfs/clusters/3-mds.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.c, mds.a, osd.0, osd.1, osd.2] +- [mon.b, mds.b, mds.c, osd.3, osd.4, osd.5] +- [client.0] diff --git a/qa/suites/multimds/libcephfs/clusters/9-mds.yaml b/qa/suites/multimds/libcephfs/clusters/9-mds.yaml new file mode 100644 index 00000000000..ed554c9fe3c --- /dev/null +++ b/qa/suites/multimds/libcephfs/clusters/9-mds.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.c, mds.a, mds.b, mds.c, mds.d, osd.0, osd.1, osd.2] +- [mon.b, mds.e, mds.f, mds.g, mds.h, mds.i, osd.3, osd.4, osd.5] +- [client.0] diff --git a/qa/suites/multimds/libcephfs/debug/mds_client.yaml b/qa/suites/multimds/libcephfs/debug/mds_client.yaml new file mode 120000 index 00000000000..335c1cafed7 --- /dev/null +++ b/qa/suites/multimds/libcephfs/debug/mds_client.yaml @@ -0,0 +1 @@ +../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/qa/suites/multimds/libcephfs/fs/btrfs.yaml b/qa/suites/multimds/libcephfs/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/multimds/libcephfs/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/multimds/libcephfs/inline/no.yaml b/qa/suites/multimds/libcephfs/inline/no.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/multimds/libcephfs/inline/yes.yaml b/qa/suites/multimds/libcephfs/inline/yes.yaml new file mode 100644 index 00000000000..4b2c1d9cf49 --- /dev/null +++ b/qa/suites/multimds/libcephfs/inline/yes.yaml @@ -0,0 +1,4 @@ +tasks: +- exec: + client.0: + - ceph mds set inline_data true --yes-i-really-mean-it diff --git a/qa/suites/multimds/libcephfs/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/multimds/libcephfs/overrides/whitelist_wrongly_marked_down.yaml new file mode 120000 index 00000000000..08f746bf894 --- /dev/null +++ b/qa/suites/multimds/libcephfs/overrides/whitelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/qa/suites/multimds/libcephfs/tasks/libcephfs_interface_tests.yaml b/qa/suites/multimds/libcephfs/tasks/libcephfs_interface_tests.yaml new file mode 100644 index 00000000000..0b1d41fea5c --- /dev/null +++ b/qa/suites/multimds/libcephfs/tasks/libcephfs_interface_tests.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + client.0: + - libcephfs/test.sh diff --git a/qa/suites/multimds/libcephfs/tasks/libcephfs_java.yaml b/qa/suites/multimds/libcephfs/tasks/libcephfs_java.yaml new file mode 100644 index 00000000000..4330d50965e --- /dev/null +++ b/qa/suites/multimds/libcephfs/tasks/libcephfs_java.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + client.0: + - libcephfs-java/test.sh diff --git a/qa/suites/multimds/libcephfs/tasks/mds_creation_retry.yaml b/qa/suites/multimds/libcephfs/tasks/mds_creation_retry.yaml new file mode 100644 index 00000000000..cd87f28ad08 --- /dev/null +++ b/qa/suites/multimds/libcephfs/tasks/mds_creation_retry.yaml @@ -0,0 +1,6 @@ +tasks: +-mds_creation_failure: +-ceph-fuse: +- workunit: + clients: + all: [fs/misc/trivial_sync.sh] diff --git a/qa/suites/multimds/verify/% b/qa/suites/multimds/verify/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/multimds/verify/ceph/base.yaml b/qa/suites/multimds/verify/ceph/base.yaml new file mode 100644 index 00000000000..50b60b5152b --- /dev/null +++ b/qa/suites/multimds/verify/ceph/base.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: + conf: + client: + fuse_default_permissions: 0 diff --git a/qa/suites/multimds/verify/clusters/3-mds.yaml b/qa/suites/multimds/verify/clusters/3-mds.yaml new file mode 100644 index 00000000000..c655b90c81c --- /dev/null +++ b/qa/suites/multimds/verify/clusters/3-mds.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.c, mds.a, osd.0, osd.1, osd.2] +- [mon.b, mds.b, mds.c, osd.3, osd.4, osd.5] +- [client.0] diff --git a/qa/suites/multimds/verify/clusters/9-mds.yaml b/qa/suites/multimds/verify/clusters/9-mds.yaml new file mode 100644 index 00000000000..ed554c9fe3c --- /dev/null +++ b/qa/suites/multimds/verify/clusters/9-mds.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.c, mds.a, mds.b, mds.c, mds.d, osd.0, osd.1, osd.2] +- [mon.b, mds.e, mds.f, mds.g, mds.h, mds.i, osd.3, osd.4, osd.5] +- [client.0] diff --git a/qa/suites/multimds/verify/debug/mds_client.yaml b/qa/suites/multimds/verify/debug/mds_client.yaml new file mode 120000 index 00000000000..335c1cafed7 --- /dev/null +++ b/qa/suites/multimds/verify/debug/mds_client.yaml @@ -0,0 +1 @@ +../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/qa/suites/multimds/verify/fs/btrfs.yaml b/qa/suites/multimds/verify/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/multimds/verify/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/multimds/verify/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/multimds/verify/overrides/whitelist_wrongly_marked_down.yaml new file mode 120000 index 00000000000..08f746bf894 --- /dev/null +++ b/qa/suites/multimds/verify/overrides/whitelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/qa/suites/multimds/verify/tasks/cfuse_workunit_suites_dbench.yaml b/qa/suites/multimds/verify/tasks/cfuse_workunit_suites_dbench.yaml new file mode 100644 index 00000000000..ad96b4c5e7f --- /dev/null +++ b/qa/suites/multimds/verify/tasks/cfuse_workunit_suites_dbench.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/multimds/verify/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/multimds/verify/tasks/cfuse_workunit_suites_fsstress.yaml new file mode 100644 index 00000000000..5908d951b2d --- /dev/null +++ b/qa/suites/multimds/verify/tasks/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/multimds/verify/tasks/libcephfs_interface_tests.yaml b/qa/suites/multimds/verify/tasks/libcephfs_interface_tests.yaml new file mode 100644 index 00000000000..0b1d41fea5c --- /dev/null +++ b/qa/suites/multimds/verify/tasks/libcephfs_interface_tests.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + client.0: + - libcephfs/test.sh diff --git a/qa/suites/multimds/verify/validater/lockdep.yaml b/qa/suites/multimds/verify/validater/lockdep.yaml new file mode 100644 index 00000000000..25f84355c0b --- /dev/null +++ b/qa/suites/multimds/verify/validater/lockdep.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + lockdep: true diff --git a/qa/suites/multimds/verify/validater/valgrind.yaml b/qa/suites/multimds/verify/validater/valgrind.yaml new file mode 100644 index 00000000000..973f460ad47 --- /dev/null +++ b/qa/suites/multimds/verify/validater/valgrind.yaml @@ -0,0 +1,15 @@ +overrides: + install: + ceph: + flavor: notcmalloc + ceph: + conf: + global: + osd heartbeat grace: 40 + valgrind: + mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] + osd: [--tool=memcheck] + mds: [--tool=memcheck] + ceph-fuse: + client.0: + valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] diff --git a/qa/suites/powercycle/osd/% b/qa/suites/powercycle/osd/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/powercycle/osd/clusters/3osd-1per-target.yaml b/qa/suites/powercycle/osd/clusters/3osd-1per-target.yaml new file mode 100644 index 00000000000..d5503a40c86 --- /dev/null +++ b/qa/suites/powercycle/osd/clusters/3osd-1per-target.yaml @@ -0,0 +1,5 @@ +roles: +- [mon.0, mon.1, mon.2, mds.0, client.0] +- [osd.0] +- [osd.1] +- [osd.2] diff --git a/qa/suites/powercycle/osd/fs/btrfs.yaml b/qa/suites/powercycle/osd/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/powercycle/osd/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/powercycle/osd/fs/ext4.yaml b/qa/suites/powercycle/osd/fs/ext4.yaml new file mode 120000 index 00000000000..65d71886933 --- /dev/null +++ b/qa/suites/powercycle/osd/fs/ext4.yaml @@ -0,0 +1 @@ +../../../../fs/ext4.yaml \ No newline at end of file diff --git a/qa/suites/powercycle/osd/fs/xfs.yaml b/qa/suites/powercycle/osd/fs/xfs.yaml new file mode 120000 index 00000000000..4c28d731f6b --- /dev/null +++ b/qa/suites/powercycle/osd/fs/xfs.yaml @@ -0,0 +1 @@ +../../../../fs/xfs.yaml \ No newline at end of file diff --git a/qa/suites/powercycle/osd/powercycle/default.yaml b/qa/suites/powercycle/osd/powercycle/default.yaml new file mode 100644 index 00000000000..b632e83e621 --- /dev/null +++ b/qa/suites/powercycle/osd/powercycle/default.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: +- thrashosds: + chance_down: 1.0 + powercycle: true + timeout: 600 diff --git a/qa/suites/powercycle/osd/tasks/admin_socket_objecter_requests.yaml b/qa/suites/powercycle/osd/tasks/admin_socket_objecter_requests.yaml new file mode 100644 index 00000000000..b1ddad8d3b0 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/admin_socket_objecter_requests.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + client.0: + admin socket: /var/run/ceph/ceph-$name.asok +tasks: +- radosbench: + clients: [client.0] + time: 60 +- admin_socket: + client.0: + objecter_requests: + test: "http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}" diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_kernel_untar_build.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_kernel_untar_build.yaml new file mode 100644 index 00000000000..87f8f57cc7b --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_kernel_untar_build.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + conf: + client: + fuse_default_permissions: 0 +tasks: +- ceph-fuse: +- workunit: + timeout: 6h + clients: + all: + - kernel_untar_build.sh diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_misc.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_misc.yaml new file mode 100644 index 00000000000..683d3f592c2 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_misc.yaml @@ -0,0 +1,7 @@ +tasks: +- ceph-fuse: +- workunit: + timeout: 6h + clients: + all: + - fs/misc diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_ffsb.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_ffsb.yaml new file mode 100644 index 00000000000..9f3fa7b1887 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_ffsb.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + osd: + filestore flush min: 0 + mds: + debug ms: 1 + debug mds: 20 +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsstress.yaml new file mode 100644 index 00000000000..5908d951b2d --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsx.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsx.yaml new file mode 100644 index 00000000000..94031518ea5 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsx.yaml @@ -0,0 +1,7 @@ +tasks: +- ceph-fuse: +- workunit: + timeout: 6h + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml new file mode 100644 index 00000000000..c6043e209bd --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsync-tester.sh diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_pjd.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_pjd.yaml new file mode 100644 index 00000000000..930bf4a671d --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_pjd.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_truncate_delay.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_truncate_delay.yaml new file mode 100644 index 00000000000..f3efafa2e9d --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_truncate_delay.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + client: + ms_inject_delay_probability: 1 + ms_inject_delay_type: osd + ms_inject_delay_max: 5 + client_oc_max_dirty_age: 1 +tasks: +- ceph-fuse: +- exec: + client.0: + - dd if=/dev/zero of=./foo count=100 + - sleep 2 + - truncate --size 0 ./foo diff --git a/qa/suites/powercycle/osd/tasks/rados_api_tests.yaml b/qa/suites/powercycle/osd/tasks/rados_api_tests.yaml new file mode 100644 index 00000000000..b4708ebd7c0 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/rados_api_tests.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + client.0: + - rados/test.sh diff --git a/qa/suites/powercycle/osd/tasks/radosbench.yaml b/qa/suites/powercycle/osd/tasks/radosbench.yaml new file mode 100644 index 00000000000..68e933028a2 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/radosbench.yaml @@ -0,0 +1,4 @@ +tasks: +- radosbench: + clients: [client.0] + time: 600 diff --git a/qa/suites/powercycle/osd/tasks/readwrite.yaml b/qa/suites/powercycle/osd/tasks/readwrite.yaml new file mode 100644 index 00000000000..c53e52b0872 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/readwrite.yaml @@ -0,0 +1,9 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 diff --git a/qa/suites/powercycle/osd/tasks/snaps-few-objects.yaml b/qa/suites/powercycle/osd/tasks/snaps-few-objects.yaml new file mode 100644 index 00000000000..aa82d973ae1 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/snaps-few-objects.yaml @@ -0,0 +1,13 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/powercycle/osd/tasks/snaps-many-objects.yaml b/qa/suites/powercycle/osd/tasks/snaps-many-objects.yaml new file mode 100644 index 00000000000..1ffe4e14888 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/snaps-many-objects.yaml @@ -0,0 +1,13 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/basic/% b/qa/suites/rados/basic/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/basic/clusters/fixed-2.yaml b/qa/suites/rados/basic/clusters/fixed-2.yaml new file mode 120000 index 00000000000..cd0791a1486 --- /dev/null +++ b/qa/suites/rados/basic/clusters/fixed-2.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-2.yaml \ No newline at end of file diff --git a/qa/suites/rados/basic/fs/xfs.yaml b/qa/suites/rados/basic/fs/xfs.yaml new file mode 120000 index 00000000000..4c28d731f6b --- /dev/null +++ b/qa/suites/rados/basic/fs/xfs.yaml @@ -0,0 +1 @@ +../../../../fs/xfs.yaml \ No newline at end of file diff --git a/qa/suites/rados/basic/msgr-failures/few.yaml b/qa/suites/rados/basic/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/rados/basic/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/rados/basic/msgr-failures/many.yaml b/qa/suites/rados/basic/msgr-failures/many.yaml new file mode 100644 index 00000000000..038c3a79908 --- /dev/null +++ b/qa/suites/rados/basic/msgr-failures/many.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 1500 diff --git a/qa/suites/rados/basic/tasks/rados_api_tests.yaml b/qa/suites/rados/basic/tasks/rados_api_tests.yaml new file mode 100644 index 00000000000..acfc597dec3 --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_api_tests.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-whitelist: + - reached quota + - wrongly marked me down +tasks: +- install: +- ceph: +- workunit: + clients: + client.0: + - rados/test.sh + - rados/test_pool_quota.sh + diff --git a/qa/suites/rados/basic/tasks/rados_cls_all.yaml b/qa/suites/rados/basic/tasks/rados_cls_all.yaml new file mode 100644 index 00000000000..34f7cbbb4a0 --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_cls_all.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: +- workunit: + clients: + client.0: + - cls diff --git a/qa/suites/rados/basic/tasks/rados_python.yaml b/qa/suites/rados/basic/tasks/rados_python.yaml new file mode 100644 index 00000000000..00320538ff7 --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_python.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down +- workunit: + clients: + client.0: + - rados/test_python.sh diff --git a/qa/suites/rados/basic/tasks/rados_stress_watch.yaml b/qa/suites/rados/basic/tasks/rados_stress_watch.yaml new file mode 100644 index 00000000000..ae2e5fd0083 --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_stress_watch.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: +- workunit: + clients: + client.0: + - rados/stress_watch.sh diff --git a/qa/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml new file mode 100644 index 00000000000..9432367e356 --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down +tasks: +- install: +- ceph: +- workunit: + clients: + all: + - rados/load-gen-big.sh diff --git a/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml new file mode 100644 index 00000000000..7d882cac9c9 --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down +tasks: +- install: +- ceph: +- workunit: + clients: + all: + - rados/load-gen-mix.sh diff --git a/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml new file mode 100644 index 00000000000..69c06b7b049 --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down +tasks: +- install: +- ceph: +- workunit: + clients: + all: + - rados/load-gen-mostlyread.sh diff --git a/qa/suites/rados/basic/tasks/repair_test.yaml b/qa/suites/rados/basic/tasks/repair_test.yaml new file mode 100644 index 00000000000..1fd037bed21 --- /dev/null +++ b/qa/suites/rados/basic/tasks/repair_test.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + log-whitelist: ['candidate had a read error', 'deep-scrub 0 missing, 1 inconsistent objects', 'deep-scrub 0 missing, 4 inconsistent objects', 'deep-scrub 1 errors', 'deep-scrub 4 errors', '!= known omap_digest', '!= known data_digest', 'repair 0 missing, 1 inconsistent objects', 'repair 0 missing, 4 inconsistent objects', 'repair 1 errors, 1 fixed', 'repair 4 errors, 4 fixed', 'scrub 0 missing, 1 inconsistent', 'scrub 1 errors', 'size 1 != known size', '!= best guess'] + conf: + osd: + filestore debug inject read err : true +tasks: +- install: +- ceph: +- repair_test: + diff --git a/qa/suites/rados/basic/tasks/scrub_test.yaml b/qa/suites/rados/basic/tasks/scrub_test.yaml new file mode 100644 index 00000000000..2b87c3f0dff --- /dev/null +++ b/qa/suites/rados/basic/tasks/scrub_test.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + log-whitelist: + - '!= best guess digest' + - '!= best guess data_digest' + - '!= best guess omap_digest' + - '!= known digest' + - '!= known data_digest' + - '!= known omap_digest' + - deep-scrub 0 missing, 1 inconsistent objects + - deep-scrub 1 errors + - repair 0 missing, 1 inconsistent objects + - repair 1 errors, 1 fixed +tasks: +- install: +- ceph: +- scrub_test: diff --git a/qa/suites/rados/monthrash/% b/qa/suites/rados/monthrash/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/monthrash/ceph/ceph.yaml b/qa/suites/rados/monthrash/ceph/ceph.yaml new file mode 100644 index 00000000000..a2c0efc7779 --- /dev/null +++ b/qa/suites/rados/monthrash/ceph/ceph.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + mon: + mon min osdmap epochs: 25 + paxos service trim min: 5 +tasks: +- install: +- ceph: diff --git a/qa/suites/rados/monthrash/clusters/3-mons.yaml b/qa/suites/rados/monthrash/clusters/3-mons.yaml new file mode 100644 index 00000000000..b36db6592bd --- /dev/null +++ b/qa/suites/rados/monthrash/clusters/3-mons.yaml @@ -0,0 +1,3 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2] +- [mon.b, osd.3, osd.4, osd.5, client.0] diff --git a/qa/suites/rados/monthrash/clusters/9-mons.yaml b/qa/suites/rados/monthrash/clusters/9-mons.yaml new file mode 100644 index 00000000000..fdb87c6b324 --- /dev/null +++ b/qa/suites/rados/monthrash/clusters/9-mons.yaml @@ -0,0 +1,3 @@ +roles: +- [mon.a, mon.b, mon.c, mon.d, mon.e, osd.0, osd.1, osd.2] +- [mon.f, mon.g, mon.h, mon.i, osd.3, osd.4, osd.5, client.0] diff --git a/qa/suites/rados/monthrash/fs/xfs.yaml b/qa/suites/rados/monthrash/fs/xfs.yaml new file mode 120000 index 00000000000..4c28d731f6b --- /dev/null +++ b/qa/suites/rados/monthrash/fs/xfs.yaml @@ -0,0 +1 @@ +../../../../fs/xfs.yaml \ No newline at end of file diff --git a/qa/suites/rados/monthrash/msgr-failures/few.yaml b/qa/suites/rados/monthrash/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/rados/monthrash/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml b/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml new file mode 100644 index 00000000000..03b7e37f842 --- /dev/null +++ b/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms inject delay type: mon + ms inject delay probability: .005 + ms inject delay max: 1 + ms inject internal delays: .002 diff --git a/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml b/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml new file mode 100644 index 00000000000..2867f2db5ec --- /dev/null +++ b/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml @@ -0,0 +1,6 @@ +tasks: +- mon_thrash: + revive_delay: 90 + thrash_delay: 1 + thrash_store: true + thrash_many: true diff --git a/qa/suites/rados/monthrash/thrashers/many.yaml b/qa/suites/rados/monthrash/thrashers/many.yaml new file mode 100644 index 00000000000..fe52bb2bbeb --- /dev/null +++ b/qa/suites/rados/monthrash/thrashers/many.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + osd: + mon client ping interval: 4 + mon client ping timeout: 12 +tasks: +- mon_thrash: + revive_delay: 20 + thrash_delay: 1 + thrash_many: true + freeze_mon_duration: 20 + freeze_mon_probability: 10 diff --git a/qa/suites/rados/monthrash/thrashers/one.yaml b/qa/suites/rados/monthrash/thrashers/one.yaml new file mode 100644 index 00000000000..2ce44c8601f --- /dev/null +++ b/qa/suites/rados/monthrash/thrashers/one.yaml @@ -0,0 +1,4 @@ +tasks: +- mon_thrash: + revive_delay: 20 + thrash_delay: 1 diff --git a/qa/suites/rados/monthrash/thrashers/sync-many.yaml b/qa/suites/rados/monthrash/thrashers/sync-many.yaml new file mode 100644 index 00000000000..9868f18159f --- /dev/null +++ b/qa/suites/rados/monthrash/thrashers/sync-many.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + conf: + mon: + paxos min: 10 + paxos trim min: 10 +tasks: +- mon_thrash: + revive_delay: 90 + thrash_delay: 1 + thrash_many: true diff --git a/qa/suites/rados/monthrash/thrashers/sync.yaml b/qa/suites/rados/monthrash/thrashers/sync.yaml new file mode 100644 index 00000000000..1e7054c271d --- /dev/null +++ b/qa/suites/rados/monthrash/thrashers/sync.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + conf: + mon: + paxos min: 10 + paxos trim min: 10 +tasks: +- mon_thrash: + revive_delay: 90 + thrash_delay: 1 diff --git a/qa/suites/rados/monthrash/workloads/pool-create-delete.yaml b/qa/suites/rados/monthrash/workloads/pool-create-delete.yaml new file mode 100644 index 00000000000..c0f0f2e35b4 --- /dev/null +++ b/qa/suites/rados/monthrash/workloads/pool-create-delete.yaml @@ -0,0 +1,56 @@ +overrides: + ceph: + log-whitelist: + - slow request +tasks: +- exec: + client.0: + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel diff --git a/qa/suites/rados/monthrash/workloads/rados_5925.yaml b/qa/suites/rados/monthrash/workloads/rados_5925.yaml new file mode 100644 index 00000000000..b49937f76df --- /dev/null +++ b/qa/suites/rados/monthrash/workloads/rados_5925.yaml @@ -0,0 +1,4 @@ +tasks: +- exec: + client.0: + - ceph_test_rados_delete_pools_parallel --debug_objecter 20 --debug_ms 1 --debug_rados 20 --debug_monc 20 diff --git a/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml b/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml new file mode 100644 index 00000000000..cd11ae6ca0c --- /dev/null +++ b/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rados/test.sh diff --git a/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml b/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml new file mode 100644 index 00000000000..31465cffe71 --- /dev/null +++ b/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down +tasks: +- workunit: + clients: + client.0: + - mon/pool_ops.sh + - mon/crush_ops.sh + - mon/osd.sh + - mon/caps.sh + diff --git a/qa/suites/rados/monthrash/workloads/snaps-few-objects.yaml b/qa/suites/rados/monthrash/workloads/snaps-few-objects.yaml new file mode 100644 index 00000000000..aa82d973ae1 --- /dev/null +++ b/qa/suites/rados/monthrash/workloads/snaps-few-objects.yaml @@ -0,0 +1,13 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/multimon/% b/qa/suites/rados/multimon/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/multimon/clusters/21.yaml b/qa/suites/rados/multimon/clusters/21.yaml new file mode 100644 index 00000000000..646ff15d45e --- /dev/null +++ b/qa/suites/rados/multimon/clusters/21.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, mon.d, mon.g, mon.j, mon.m, mon.p, mon.s, osd.0] +- [mon.b, mon.e, mon.h, mon.k, mon.n, mon.q, mon.t] +- [mon.c, mon.f, mon.i, mon.l, mon.o, mon.r, mon.u, osd.1] +openstack: +- volumes: # attached to each instance + count: 1 + size: 10 # GB diff --git a/qa/suites/rados/multimon/clusters/3.yaml b/qa/suites/rados/multimon/clusters/3.yaml new file mode 100644 index 00000000000..e30dc76f381 --- /dev/null +++ b/qa/suites/rados/multimon/clusters/3.yaml @@ -0,0 +1,6 @@ +roles: +- [mon.a, mon.b, mon.c, osd.0, osd.1] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB diff --git a/qa/suites/rados/multimon/clusters/6.yaml b/qa/suites/rados/multimon/clusters/6.yaml new file mode 100644 index 00000000000..b16e3267c06 --- /dev/null +++ b/qa/suites/rados/multimon/clusters/6.yaml @@ -0,0 +1,7 @@ +roles: +- [mon.a, mon.c, mon.e, osd.0] +- [mon.b, mon.d, mon.f, osd.1] +openstack: +- volumes: # attached to each instance + count: 1 + size: 10 # GB diff --git a/qa/suites/rados/multimon/clusters/9.yaml b/qa/suites/rados/multimon/clusters/9.yaml new file mode 100644 index 00000000000..c2c7b494ed8 --- /dev/null +++ b/qa/suites/rados/multimon/clusters/9.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, mon.d, mon.g, osd.0] +- [mon.b, mon.e, mon.h] +- [mon.c, mon.f, mon.i, osd.1] +openstack: +- volumes: # attached to each instance + count: 1 + size: 10 # GB diff --git a/qa/suites/rados/multimon/msgr-failures/few.yaml b/qa/suites/rados/multimon/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/rados/multimon/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/rados/multimon/msgr-failures/many.yaml b/qa/suites/rados/multimon/msgr-failures/many.yaml new file mode 100644 index 00000000000..86f8dde8a0e --- /dev/null +++ b/qa/suites/rados/multimon/msgr-failures/many.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 diff --git a/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml b/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml new file mode 100644 index 00000000000..e86bdde1d7d --- /dev/null +++ b/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: + log-whitelist: + - slow request + - .*clock.*skew.* + - clocks not synchronized +- mon_clock_skew_check: + expect-skew: false diff --git a/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml b/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml new file mode 100644 index 00000000000..2953e0d6dc2 --- /dev/null +++ b/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + mon.b: + clock offset: 10 +tasks: +- install: +- ceph: + wait-for-healthy: false + log-whitelist: + - slow request + - .*clock.*skew.* + - clocks not synchronized +- mon_clock_skew_check: + expect-skew: true diff --git a/qa/suites/rados/multimon/tasks/mon_recovery.yaml b/qa/suites/rados/multimon/tasks/mon_recovery.yaml new file mode 100644 index 00000000000..94721ea53a4 --- /dev/null +++ b/qa/suites/rados/multimon/tasks/mon_recovery.yaml @@ -0,0 +1,4 @@ +tasks: +- install: +- ceph: +- mon_recovery: diff --git a/qa/suites/rados/objectstore/alloc-hint.yaml b/qa/suites/rados/objectstore/alloc-hint.yaml new file mode 100644 index 00000000000..8f8d4841eb0 --- /dev/null +++ b/qa/suites/rados/objectstore/alloc-hint.yaml @@ -0,0 +1,25 @@ +roles: +- [mon.a, osd.0, osd.1, osd.2, client.0] + +overrides: + ceph: + fs: xfs + conf: + osd: + filestore xfs extsize: true + +tasks: +- install: +- ceph: +- workunit: + clients: + all: + - rados/test_alloc_hint.sh +openstack: + - machine: + disk: 40 # GB + ram: 8000 # MB + cpus: 1 + volumes: # attached to each instance + count: 3 + size: 10 # GB diff --git a/qa/suites/rados/objectstore/ceph_objectstore_tool.yaml b/qa/suites/rados/objectstore/ceph_objectstore_tool.yaml new file mode 100644 index 00000000000..881d908f06b --- /dev/null +++ b/qa/suites/rados/objectstore/ceph_objectstore_tool.yaml @@ -0,0 +1,11 @@ +roles: +- [mon.0, osd.0, osd.1, osd.2, osd.3, osd.4, osd.5, client.0] +openstack: +- volumes: # attached to each instance + count: 6 + size: 10 # GB +tasks: +- install: +- ceph: +- ceph_objectstore_tool: + objects: 20 diff --git a/qa/suites/rados/objectstore/filejournal.yaml b/qa/suites/rados/objectstore/filejournal.yaml new file mode 100644 index 00000000000..69ffab2092c --- /dev/null +++ b/qa/suites/rados/objectstore/filejournal.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.0, osd.0, osd.1, client.0] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- ceph: +- exec: + client.0: + - ceph_test_filejournal diff --git a/qa/suites/rados/objectstore/filestore-idempotent-aio-journal.yaml b/qa/suites/rados/objectstore/filestore-idempotent-aio-journal.yaml new file mode 100644 index 00000000000..c97d7cd8cb7 --- /dev/null +++ b/qa/suites/rados/objectstore/filestore-idempotent-aio-journal.yaml @@ -0,0 +1,13 @@ +roles: +- [mon.0, osd.0, osd.1, client.0] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- ceph: + conf: + global: + journal aio: true +- filestore_idempotent: diff --git a/qa/suites/rados/objectstore/filestore-idempotent.yaml b/qa/suites/rados/objectstore/filestore-idempotent.yaml new file mode 100644 index 00000000000..39b2f0fb006 --- /dev/null +++ b/qa/suites/rados/objectstore/filestore-idempotent.yaml @@ -0,0 +1,10 @@ +roles: +- [mon.0, osd.0, osd.1, client.0] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- ceph: +- filestore_idempotent: diff --git a/qa/suites/rados/objectstore/objectcacher-stress.yaml b/qa/suites/rados/objectstore/objectcacher-stress.yaml new file mode 100644 index 00000000000..7cb78a76e7e --- /dev/null +++ b/qa/suites/rados/objectstore/objectcacher-stress.yaml @@ -0,0 +1,13 @@ +roles: +- [mon.0, osd.0, osd.1, client.0] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- ceph: +- workunit: + clients: + all: + - osdc/stress_objectcacher.sh diff --git a/qa/suites/rados/objectstore/objectstore.yaml b/qa/suites/rados/objectstore/objectstore.yaml new file mode 100644 index 00000000000..23d650b9d2e --- /dev/null +++ b/qa/suites/rados/objectstore/objectstore.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.0, osd.0, osd.1, client.0] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- exec: + client.0: + - mkdir $TESTDIR/ostest && cd $TESTDIR/ostest && ceph_test_objectstore + - rm -rf $TESTDIR/ostest diff --git a/qa/suites/rados/singleton-nomsgr/% b/qa/suites/rados/singleton-nomsgr/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/singleton-nomsgr/all/11429.yaml b/qa/suites/rados/singleton-nomsgr/all/11429.yaml new file mode 100644 index 00000000000..7076e048dc2 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/11429.yaml @@ -0,0 +1,106 @@ +overrides: + ceph: + conf: + mon: + debug mon: 20 + debug ms: 1 + debug paxos: 20 + mon warn on legacy crush tunables: false + mon min osdmap epochs: 3 + osd: + osd map cache size: 2 + osd map max advance: 1 + debug filestore: 20 + debug journal: 20 + debug ms: 1 + debug osd: 20 + log-whitelist: + - osd_map_cache_size + - slow request + - scrub mismatch + - ScrubResult + - failed to encode +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 + - mon.b + - mon.c + - osd.2 + - client.0 +tasks: +- install: + branch: v0.80.8 +- print: '**** done installing firefly' +- ceph: + fs: xfs +- print: '**** done ceph' +- full_sequential: + - ceph_manager.create_pool: + args: ['toremove'] + kwargs: + pg_num: 4096 + - sleep: + duration: 30 + - ceph_manager.wait_for_clean: null + - radosbench: + clients: [client.0] + time: 120 + size: 1 + pool: toremove + create_pool: false + - ceph_manager.remove_pool: + args: ['toremove'] + - sleep: + duration: 10 + - ceph.restart: + daemons: + - osd.0 + - osd.1 + - osd.2 + - sleep: + duration: 30 + - ceph_manager.wait_for_clean: null + - radosbench: + clients: [client.0] + time: 60 + size: 1 + - ceph_manager.create_pool: + args: ['newpool'] + - loop: + count: 100 + body: + - ceph_manager.set_pool_property: + args: ['newpool', 'min_size', 2] + - ceph_manager.set_pool_property: + args: ['newpool', 'min_size', 1] + - sleep: + duration: 30 + - ceph_manager.wait_for_clean: null + - loop: + count: 100 + body: + - ceph_manager.set_pool_property: + args: ['newpool', 'min_size', 2] + - ceph_manager.set_pool_property: + args: ['newpool', 'min_size', 1] + - sleep: + duration: 30 + - ceph_manager.wait_for_clean: null + - sleep: + duration: 30 + - install.upgrade: + mon.a: null + - ceph.restart: + daemons: + - osd.0 + - osd.1 + - osd.2 + - sleep: + duration: 30 + - radosbench: + clients: [client.0] + time: 30 + size: 1 + - ceph_manager.wait_for_clean: null diff --git a/qa/suites/rados/singleton-nomsgr/all/13234.yaml b/qa/suites/rados/singleton-nomsgr/all/13234.yaml new file mode 100644 index 00000000000..3b3602fb5ec --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/13234.yaml @@ -0,0 +1,130 @@ +overrides: + ceph: + conf: + mon: + debug mon: 20 + debug ms: 1 + debug paxos: 20 + mon warn on legacy crush tunables: false + mon min osdmap epochs: 3 + osd: + osd map cache size: 2 + osd map max advance: 1 + debug filestore: 20 + debug journal: 20 + debug ms: 1 + debug osd: 20 + log-whitelist: + - osd_map_cache_size + - slow request + - scrub mismatch + - ScrubResult + - failed to encode +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 + - mon.b + - mon.c + - osd.2 + - client.0 +tasks: +- install: + tag: v0.67.10 +- print: '**** done installing dumpling' +- ceph: + fs: xfs +- print: '**** done ceph' +- full_sequential: + - ceph_manager.create_pool: + args: + - newpool + kwargs: + pg_num: 32 + - sleep: + duration: 30 + - ceph_manager.wait_for_clean: null + - ceph_manager.kill_osd: + kwargs: + osd: 0 + - ceph_manager.kill_osd: + kwargs: + osd: 1 + - ceph_manager.kill_osd: + kwargs: + osd: 2 + - print: '**** done killing osds' + - loop: + body: + - ceph_manager.set_pool_property: + args: + - newpool + - min_size + - 2 + - ceph_manager.set_pool_property: + args: + - newpool + - min_size + - 1 + count: 10 + - install.upgrade: + mon.a: + branch: firefly + - print: '**** done upgrading to firefly' + - ceph.restart: + - mon.a + - mon.b + - mon.c + - print: '**** done upgrading restarting mons' + - loop: + body: + - ceph_manager.set_pool_property: + args: + - newpool + - min_size + - 2 + - ceph_manager.set_pool_property: + args: + - newpool + - min_size + - 1 + count: 10 + - sleep: + duration: 10 + - install.upgrade: + mon.a: null + - print: '**** done upgrading to branch' + - ceph.restart: + - mon.a + - mon.b + - mon.c + - loop: + body: + - ceph_manager.set_pool_property: + args: + - newpool + - min_size + - 2 + - ceph_manager.set_pool_property: + args: + - newpool + - min_size + - 1 + count: 10 + - sleep: + duration: 10 + - print: '**** about to start osds' + - ceph_manager.revive_osd: + kwargs: + osd: 0 + - ceph_manager.revive_osd: + kwargs: + osd: 1 + - ceph_manager.revive_osd: + kwargs: + osd: 2 + - sleep: + duration: 30 + - ceph_manager.wait_for_clean: null + - print: '**** done!' diff --git a/qa/suites/rados/singleton-nomsgr/all/ceph-post-file.yaml b/qa/suites/rados/singleton-nomsgr/all/ceph-post-file.yaml new file mode 100644 index 00000000000..d0a4db067cf --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/ceph-post-file.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, osd.0, osd.1, osd.2, client.0] +tasks: +- install: +- workunit: + clients: + all: + - post-file.sh diff --git a/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml b/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml new file mode 100644 index 00000000000..f8e4b405f19 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml @@ -0,0 +1,25 @@ +roles: +- - mon.a + - osd.0 + - osd.1 + - osd.2 + - client.0 +tasks: +- install: +- ceph: +- exec: + client.0: + - ceph osd pool create base-pool 4 + - ceph osd pool create cache-pool 4 + - ceph osd tier add base-pool cache-pool + - ceph osd tier cache-mode cache-pool writeback + - ceph osd tier set-overlay base-pool cache-pool + - dd if=/dev/urandom of=$TESTDIR/foo bs=1M count=1 + - rbd import --image-format 2 $TESTDIR/foo base-pool/bar + - rbd snap create base-pool/bar@snap + - rados -p base-pool cache-flush-evict-all + - rbd export base-pool/bar $TESTDIR/bar + - rbd export base-pool/bar@snap $TESTDIR/snap + - cmp $TESTDIR/foo $TESTDIR/bar + - cmp $TESTDIR/foo $TESTDIR/snap + - rm $TESTDIR/foo $TESTDIR/bar $TESTDIR/snap diff --git a/qa/suites/rados/singleton-nomsgr/all/msgr.yaml b/qa/suites/rados/singleton-nomsgr/all/msgr.yaml new file mode 100644 index 00000000000..86c717f6686 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/msgr.yaml @@ -0,0 +1,16 @@ +roles: +- [mon.0, osd.0, osd.1, client.0] +tasks: +- install: +- exec: + client.0: + - ceph_test_async_driver + - ceph_test_msgr +openstack: + - machine: + disk: 40 # GB + ram: 15000 # MB + cpus: 1 + volumes: # attached to each instance + count: 0 + size: 1 # GB diff --git a/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml b/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml new file mode 100644 index 00000000000..a9ec78a763a --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml @@ -0,0 +1,30 @@ +roles: +- - mon.a + - osd.0 + - osd.1 + - osd.2 + - client.0 +- - osd.3 + - osd.4 + - osd.5 +tasks: +- install: +- ceph: + conf: + osd: + osd debug reject backfill probability: .3 + osd min pg log entries: 25 + osd max pg log entries: 100 +- exec: + client.0: + - ceph osd pool create foo 64 + - rados -p foo bench 60 write -b 1024 --no-cleanup + - ceph osd pool set foo size 3 + - ceph osd out 0 1 +- sleep: + duration: 60 +- exec: + client.0: + - ceph osd in 0 1 +- sleep: + duration: 60 diff --git a/qa/suites/rados/singleton/% b/qa/suites/rados/singleton/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/singleton/all/admin-socket.yaml b/qa/suites/rados/singleton/all/admin-socket.yaml new file mode 100644 index 00000000000..0dbf9b219b2 --- /dev/null +++ b/qa/suites/rados/singleton/all/admin-socket.yaml @@ -0,0 +1,17 @@ +roles: +- - mon.a + - osd.0 + - osd.1 + - client.a +tasks: +- install: +- ceph: +- admin_socket: + osd.0: + version: + git_version: + help: + config show: + config set filestore_dump_file /tmp/foo: + perf dump: + perf schema: diff --git a/qa/suites/rados/singleton/all/cephtool.yaml b/qa/suites/rados/singleton/all/cephtool.yaml new file mode 100644 index 00000000000..114073baa4e --- /dev/null +++ b/qa/suites/rados/singleton/all/cephtool.yaml @@ -0,0 +1,21 @@ +roles: +- - mon.a + - mon.b + - mon.c + - osd.0 + - osd.1 + - osd.2 + - client.0 +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - had wrong client addr + - had wrong cluster addr + - must scrub before tier agent can activate +- workunit: + clients: + all: + - cephtool + - mon/pool_ops.sh diff --git a/qa/suites/rados/singleton/all/divergent_priors.yaml b/qa/suites/rados/singleton/all/divergent_priors.yaml new file mode 100644 index 00000000000..a01dd122a19 --- /dev/null +++ b/qa/suites/rados/singleton/all/divergent_priors.yaml @@ -0,0 +1,17 @@ +roles: +- - mon.0 + - osd.0 + - osd.1 + - osd.2 + - client.0 + +overrides: + ceph: + conf: + osd: + debug osd: 5 + +tasks: +- install: +- ceph: +- divergent_priors: diff --git a/qa/suites/rados/singleton/all/divergent_priors2.yaml b/qa/suites/rados/singleton/all/divergent_priors2.yaml new file mode 100644 index 00000000000..aecbc0a4011 --- /dev/null +++ b/qa/suites/rados/singleton/all/divergent_priors2.yaml @@ -0,0 +1,17 @@ +roles: +- - mon.0 + - osd.0 + - osd.1 + - osd.2 + - client.0 + +overrides: + ceph: + conf: + osd: + debug osd: 5 + +tasks: +- install: +- ceph: +- divergent_priors2: diff --git a/qa/suites/rados/singleton/all/dump-stuck.yaml b/qa/suites/rados/singleton/all/dump-stuck.yaml new file mode 100644 index 00000000000..2752a38fd45 --- /dev/null +++ b/qa/suites/rados/singleton/all/dump-stuck.yaml @@ -0,0 +1,10 @@ +roles: +- - mon.a + - osd.0 + - osd.1 +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down +- dump_stuck: diff --git a/qa/suites/rados/singleton/all/ec-lost-unfound.yaml b/qa/suites/rados/singleton/all/ec-lost-unfound.yaml new file mode 100644 index 00000000000..7006d7f3783 --- /dev/null +++ b/qa/suites/rados/singleton/all/ec-lost-unfound.yaml @@ -0,0 +1,14 @@ +roles: +- - mon.a + - mon.b + - mon.c + - osd.0 + - osd.1 + - osd.2 + - osd.3 +tasks: +- install: +- ceph: + log-whitelist: + - objects unfound and apparently lost +- ec_lost_unfound: diff --git a/qa/suites/rados/singleton/all/lost-unfound-delete.yaml b/qa/suites/rados/singleton/all/lost-unfound-delete.yaml new file mode 100644 index 00000000000..e6e09d320d3 --- /dev/null +++ b/qa/suites/rados/singleton/all/lost-unfound-delete.yaml @@ -0,0 +1,13 @@ +roles: +- - mon.a + - mon.b + - mon.c + - osd.0 + - osd.1 + - osd.2 +tasks: +- install: +- ceph: + log-whitelist: + - objects unfound and apparently lost +- rep_lost_unfound_delete: diff --git a/qa/suites/rados/singleton/all/lost-unfound.yaml b/qa/suites/rados/singleton/all/lost-unfound.yaml new file mode 100644 index 00000000000..0597e43713c --- /dev/null +++ b/qa/suites/rados/singleton/all/lost-unfound.yaml @@ -0,0 +1,13 @@ +roles: +- - mon.a + - mon.b + - mon.c + - osd.0 + - osd.1 + - osd.2 +tasks: +- install: +- ceph: + log-whitelist: + - objects unfound and apparently lost +- lost_unfound: diff --git a/qa/suites/rados/singleton/all/mon-config-keys.yaml b/qa/suites/rados/singleton/all/mon-config-keys.yaml new file mode 100644 index 00000000000..f81070fc770 --- /dev/null +++ b/qa/suites/rados/singleton/all/mon-config-keys.yaml @@ -0,0 +1,15 @@ +roles: +- - mon.0 + - mon.1 + - mon.2 + - osd.0 + - osd.1 + - osd.2 + - client.0 +tasks: +- install: +- ceph: +- workunit: + clients: + all: + - mon/test_mon_config_key.py diff --git a/qa/suites/rados/singleton/all/mon-thrasher.yaml b/qa/suites/rados/singleton/all/mon-thrasher.yaml new file mode 100644 index 00000000000..e69198c85e5 --- /dev/null +++ b/qa/suites/rados/singleton/all/mon-thrasher.yaml @@ -0,0 +1,21 @@ +roles: +- - mon.a + - mon.b + - mon.c + - osd.0 + - osd.1 + - client.0 +tasks: +- install: +- ceph: +- mon_thrash: + revive_delay: 20 + thrash_delay: 1 +- workunit: + clients: + all: + - mon/workloadgen.sh + env: + LOADGEN_NUM_OSDS: "5" + VERBOSE: "1" + DURATION: "600" diff --git a/qa/suites/rados/singleton/all/osd-backfill.yaml b/qa/suites/rados/singleton/all/osd-backfill.yaml new file mode 100644 index 00000000000..781a63eaf62 --- /dev/null +++ b/qa/suites/rados/singleton/all/osd-backfill.yaml @@ -0,0 +1,16 @@ +roles: +- - mon.a + - mon.b + - mon.c + - osd.0 + - osd.1 + - osd.2 +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + conf: + osd: + osd min pg log entries: 5 +- osd_backfill: diff --git a/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml b/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml new file mode 100644 index 00000000000..123f4d44fbe --- /dev/null +++ b/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml @@ -0,0 +1,17 @@ +roles: +- - mon.a + - mon.b + - mon.c + - osd.0 + - osd.1 + - osd.2 + - osd.3 +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + conf: + osd: + osd min pg log entries: 5 +- osd_recovery.test_incomplete_pgs: diff --git a/qa/suites/rados/singleton/all/osd-recovery.yaml b/qa/suites/rados/singleton/all/osd-recovery.yaml new file mode 100644 index 00000000000..a6e1d99f6fe --- /dev/null +++ b/qa/suites/rados/singleton/all/osd-recovery.yaml @@ -0,0 +1,16 @@ +roles: +- - mon.a + - mon.b + - mon.c + - osd.0 + - osd.1 + - osd.2 +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + conf: + osd: + osd min pg log entries: 5 +- osd_recovery: diff --git a/qa/suites/rados/singleton/all/peer.yaml b/qa/suites/rados/singleton/all/peer.yaml new file mode 100644 index 00000000000..655ea685584 --- /dev/null +++ b/qa/suites/rados/singleton/all/peer.yaml @@ -0,0 +1,16 @@ +roles: +- - mon.0 + - mon.1 + - mon.2 + - osd.0 + - osd.1 + - osd.2 +tasks: +- install: +- ceph: + config: + global: + osd pool default min size : 1 + log-whitelist: + - objects unfound and apparently lost +- peer: diff --git a/qa/suites/rados/singleton/all/pg-removal-interruption.yaml b/qa/suites/rados/singleton/all/pg-removal-interruption.yaml new file mode 100644 index 00000000000..95cfef76681 --- /dev/null +++ b/qa/suites/rados/singleton/all/pg-removal-interruption.yaml @@ -0,0 +1,27 @@ +roles: +- - mon.a + - osd.0 + - osd.1 + - osd.2 + - client.0 +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - slow request +- exec: + client.0: + - ceph osd pool create foo 128 128 + - sleep 5 + - ceph tell osd.0 injectargs -- --osd-inject-failure-on-pg-removal + - ceph osd pool delete foo foo --yes-i-really-really-mean-it +- ceph.wait_for_failure: [osd.0] +- exec: + client.0: + - sudo ceph osd down 0 +- ceph.restart: [osd.0] +- exec: + client.0: + - ceph tell osd.0 flush_pg_stats +- ceph.healthy: diff --git a/qa/suites/rados/singleton/all/radostool.yaml b/qa/suites/rados/singleton/all/radostool.yaml new file mode 100644 index 00000000000..05ab4a3f7c2 --- /dev/null +++ b/qa/suites/rados/singleton/all/radostool.yaml @@ -0,0 +1,16 @@ +roles: +- - mon.a + - osd.0 + - osd.1 + - client.0 +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - had wrong client addr + - had wrong cluster addr +- workunit: + clients: + all: + - rados/test_rados_tool.sh diff --git a/qa/suites/rados/singleton/all/reg11184.yaml b/qa/suites/rados/singleton/all/reg11184.yaml new file mode 100644 index 00000000000..54361a4e3d5 --- /dev/null +++ b/qa/suites/rados/singleton/all/reg11184.yaml @@ -0,0 +1,17 @@ +roles: +- - mon.0 + - osd.0 + - osd.1 + - osd.2 + - client.0 + +overrides: + ceph: + conf: + osd: + debug osd: 5 + +tasks: +- install: +- ceph: +- reg11184: diff --git a/qa/suites/rados/singleton/all/rest-api.yaml b/qa/suites/rados/singleton/all/rest-api.yaml new file mode 100644 index 00000000000..133840a5dc1 --- /dev/null +++ b/qa/suites/rados/singleton/all/rest-api.yaml @@ -0,0 +1,25 @@ +roles: +- - mon.0 + - mon.1 + - mon.2 + - osd.0 + - osd.1 + - osd.2 + - mds.a + - client.0 +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - had wrong client addr + conf: + client.rest0: + debug ms: 1 + debug objecter: 20 + debug rados: 20 +- rest-api: [client.0] +- workunit: + clients: + all: + - rest/test.py diff --git a/qa/suites/rados/singleton/all/thrash-rados.yaml b/qa/suites/rados/singleton/all/thrash-rados.yaml new file mode 100644 index 00000000000..82c47bf3fe7 --- /dev/null +++ b/qa/suites/rados/singleton/all/thrash-rados.yaml @@ -0,0 +1,22 @@ +roles: +- - mon.a + - osd.0 + - osd.1 + - osd.2 +- - osd.3 + - osd.4 + - osd.5 + - client.0 +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down +- thrashosds: + op_delay: 30 + clean_interval: 120 + chance_down: .5 +- workunit: + clients: + all: + - rados/load-gen-mix-small.sh diff --git a/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml b/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml new file mode 100644 index 00000000000..3d2d683f63c --- /dev/null +++ b/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml @@ -0,0 +1,22 @@ +roles: +- - mon.0 + - mon.1 + - mon.2 + - osd.0 + - osd.1 + - osd.2 + - client.0 +tasks: +- install: +- ceph: + config: + global: + osd pool default min size : 1 + client: + debug ms: 1 + debug objecter: 20 + debug rados: 20 + log-whitelist: + - objects unfound and apparently lost +- watch_notify_same_primary: + clients: [client.0] diff --git a/qa/suites/rados/singleton/fs/xfs.yaml b/qa/suites/rados/singleton/fs/xfs.yaml new file mode 120000 index 00000000000..4c28d731f6b --- /dev/null +++ b/qa/suites/rados/singleton/fs/xfs.yaml @@ -0,0 +1 @@ +../../../../fs/xfs.yaml \ No newline at end of file diff --git a/qa/suites/rados/singleton/msgr-failures/few.yaml b/qa/suites/rados/singleton/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/rados/singleton/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/rados/singleton/msgr-failures/many.yaml b/qa/suites/rados/singleton/msgr-failures/many.yaml new file mode 100644 index 00000000000..86f8dde8a0e --- /dev/null +++ b/qa/suites/rados/singleton/msgr-failures/many.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 diff --git a/qa/suites/rados/thrash-erasure-code-isa/% b/qa/suites/rados/thrash-erasure-code-isa/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/thrash-erasure-code-isa/arch/x86_64.yaml b/qa/suites/rados/thrash-erasure-code-isa/arch/x86_64.yaml new file mode 100644 index 00000000000..c2409f5d0dc --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/arch/x86_64.yaml @@ -0,0 +1 @@ +arch: x86_64 diff --git a/qa/suites/rados/thrash-erasure-code-isa/clusters b/qa/suites/rados/thrash-erasure-code-isa/clusters new file mode 120000 index 00000000000..7aac47be3e6 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/clusters @@ -0,0 +1 @@ +../thrash/clusters \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/fs b/qa/suites/rados/thrash-erasure-code-isa/fs new file mode 120000 index 00000000000..c11782e0462 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/fs @@ -0,0 +1 @@ +../thrash/fs \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/msgr-failures b/qa/suites/rados/thrash-erasure-code-isa/msgr-failures new file mode 120000 index 00000000000..03689aa44a3 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/msgr-failures @@ -0,0 +1 @@ +../thrash/msgr-failures \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/supported b/qa/suites/rados/thrash-erasure-code-isa/supported new file mode 120000 index 00000000000..c5d59352cb5 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/supported @@ -0,0 +1 @@ +../../../distros/supported \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/thrashers b/qa/suites/rados/thrash-erasure-code-isa/thrashers new file mode 120000 index 00000000000..f461dadc3f2 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/thrashers @@ -0,0 +1 @@ +../thrash/thrashers \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/workloads/ec-rados-plugin=isa-k=2-m=1.yaml b/qa/suites/rados/thrash-erasure-code-isa/workloads/ec-rados-plugin=isa-k=2-m=1.yaml new file mode 120000 index 00000000000..9d32cd811c1 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/workloads/ec-rados-plugin=isa-k=2-m=1.yaml @@ -0,0 +1 @@ +../../../../erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/% b/qa/suites/rados/thrash-erasure-code/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/thrash-erasure-code/clusters b/qa/suites/rados/thrash-erasure-code/clusters new file mode 120000 index 00000000000..7aac47be3e6 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/clusters @@ -0,0 +1 @@ +../thrash/clusters \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/fs b/qa/suites/rados/thrash-erasure-code/fs new file mode 120000 index 00000000000..c11782e0462 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/fs @@ -0,0 +1 @@ +../thrash/fs \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/msgr-failures b/qa/suites/rados/thrash-erasure-code/msgr-failures new file mode 120000 index 00000000000..03689aa44a3 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/msgr-failures @@ -0,0 +1 @@ +../thrash/msgr-failures \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/default.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/default.yaml new file mode 100644 index 00000000000..fade054b1b7 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashers/default.yaml @@ -0,0 +1,17 @@ +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + conf: + osd: + osd debug reject backfill probability: .3 + osd max backfills: 1 + osd scrub min interval: 60 + osd scrub max interval: 120 +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + min_in: 4 diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml new file mode 100644 index 00000000000..c37147fda22 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml @@ -0,0 +1,22 @@ +overrides: + ceph: + conf: + mon: + mon min osdmap epochs: 2 + osd: + osd map cache size: 1 + osd scrub min interval: 60 + osd scrub max interval: 120 +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + - osd_map_cache_size +- thrashosds: + timeout: 1800 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + chance_test_map_discontinuity: 0.5 + min_in: 4 diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml new file mode 100644 index 00000000000..9ba1b9e5867 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml @@ -0,0 +1,16 @@ +tasks: +- install: +- ceph: + conf: + osd: + osd max backfills: 1 + osd scrub min interval: 60 + osd scrub max interval: 120 + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 3 + chance_pgpnum_fix: 1 + min_in: 4 diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml new file mode 100644 index 00000000000..744761d8cce --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml @@ -0,0 +1,15 @@ +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1 + min_in: 4 diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=2-m=1.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=2-m=1.yaml new file mode 120000 index 00000000000..f11eddb7f56 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=2-m=1.yaml @@ -0,0 +1 @@ +../../../../erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=3-m=1.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=3-m=1.yaml new file mode 120000 index 00000000000..b1407aef7e1 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=3-m=1.yaml @@ -0,0 +1 @@ +../../../../erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-radosbench.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-radosbench.yaml new file mode 100644 index 00000000000..7aaf0e1c30b --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-radosbench.yaml @@ -0,0 +1,6 @@ +tasks: +- radosbench: + clients: [client.0] + time: 300 + unique_pool: true + ec_pool: true diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects.yaml new file mode 100644 index 00000000000..a8ac39716e5 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects.yaml @@ -0,0 +1,20 @@ +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + ec_pool: true + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/rados/thrash/% b/qa/suites/rados/thrash/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/thrash/0-size-min-size-overrides/2-size-1-min-size.yaml b/qa/suites/rados/thrash/0-size-min-size-overrides/2-size-1-min-size.yaml new file mode 120000 index 00000000000..4c817a6fecf --- /dev/null +++ b/qa/suites/rados/thrash/0-size-min-size-overrides/2-size-1-min-size.yaml @@ -0,0 +1 @@ +../../../../overrides/2-size-1-min-size.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml b/qa/suites/rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml new file mode 120000 index 00000000000..c429b07b999 --- /dev/null +++ b/qa/suites/rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml @@ -0,0 +1 @@ +../../../../overrides/2-size-2-min-size.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml b/qa/suites/rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml new file mode 120000 index 00000000000..8d529f0a9ad --- /dev/null +++ b/qa/suites/rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml @@ -0,0 +1 @@ +../../../../overrides/3-size-2-min-size.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash/1-pg-log-overrides/normal_pg_log.yaml b/qa/suites/rados/thrash/1-pg-log-overrides/normal_pg_log.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/thrash/1-pg-log-overrides/short_pg_log.yaml b/qa/suites/rados/thrash/1-pg-log-overrides/short_pg_log.yaml new file mode 120000 index 00000000000..62010f4f7fb --- /dev/null +++ b/qa/suites/rados/thrash/1-pg-log-overrides/short_pg_log.yaml @@ -0,0 +1 @@ +../../../../overrides/short_pg_log.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash/clusters/+ b/qa/suites/rados/thrash/clusters/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/thrash/clusters/fixed-2.yaml b/qa/suites/rados/thrash/clusters/fixed-2.yaml new file mode 120000 index 00000000000..cd0791a1486 --- /dev/null +++ b/qa/suites/rados/thrash/clusters/fixed-2.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-2.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash/clusters/openstack.yaml b/qa/suites/rados/thrash/clusters/openstack.yaml new file mode 100644 index 00000000000..39e43d021ac --- /dev/null +++ b/qa/suites/rados/thrash/clusters/openstack.yaml @@ -0,0 +1,8 @@ +openstack: + - machine: + disk: 40 # GB + ram: 8000 # MB + cpus: 1 + volumes: # attached to each instance + count: 3 + size: 30 # GB diff --git a/qa/suites/rados/thrash/fs/ext4.yaml b/qa/suites/rados/thrash/fs/ext4.yaml new file mode 120000 index 00000000000..65d71886933 --- /dev/null +++ b/qa/suites/rados/thrash/fs/ext4.yaml @@ -0,0 +1 @@ +../../../../fs/ext4.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash/fs/xfs.yaml b/qa/suites/rados/thrash/fs/xfs.yaml new file mode 120000 index 00000000000..4c28d731f6b --- /dev/null +++ b/qa/suites/rados/thrash/fs/xfs.yaml @@ -0,0 +1 @@ +../../../../fs/xfs.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash/msgr-failures/fastclose.yaml b/qa/suites/rados/thrash/msgr-failures/fastclose.yaml new file mode 100644 index 00000000000..77fd730aff7 --- /dev/null +++ b/qa/suites/rados/thrash/msgr-failures/fastclose.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms tcp read timeout: 5 diff --git a/qa/suites/rados/thrash/msgr-failures/few.yaml b/qa/suites/rados/thrash/msgr-failures/few.yaml new file mode 100644 index 00000000000..477bffe619b --- /dev/null +++ b/qa/suites/rados/thrash/msgr-failures/few.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + osd: + osd heartbeat use min delay socket: true diff --git a/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml b/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml new file mode 100644 index 00000000000..a33ba89e14f --- /dev/null +++ b/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms inject delay type: osd + ms inject delay probability: .005 + ms inject delay max: 1 + ms inject internal delays: .002 diff --git a/qa/suites/rados/thrash/thrashers/default.yaml b/qa/suites/rados/thrash/thrashers/default.yaml new file mode 100644 index 00000000000..fabfc4f8c40 --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/default.yaml @@ -0,0 +1,16 @@ +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + conf: + osd: + osd debug reject backfill probability: .3 + osd max backfills: 1 + osd scrub min interval: 60 + osd scrub max interval: 120 +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/thrash/thrashers/mapgap.yaml b/qa/suites/rados/thrash/thrashers/mapgap.yaml new file mode 100644 index 00000000000..016563bd8ea --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/mapgap.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + conf: + mon: + mon min osdmap epochs: 2 + osd: + osd map cache size: 1 + osd scrub min interval: 60 + osd scrub max interval: 120 +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + - osd_map_cache_size +- thrashosds: + timeout: 1800 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + chance_test_map_discontinuity: 0.5 diff --git a/qa/suites/rados/thrash/thrashers/morepggrow.yaml b/qa/suites/rados/thrash/thrashers/morepggrow.yaml new file mode 100644 index 00000000000..0bb136ddfea --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/morepggrow.yaml @@ -0,0 +1,15 @@ +tasks: +- install: +- ceph: + conf: + osd: + osd max backfills: 1 + osd scrub min interval: 60 + osd scrub max interval: 120 + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 3 + chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/thrash/thrashers/pggrow.yaml b/qa/suites/rados/thrash/thrashers/pggrow.yaml new file mode 100644 index 00000000000..4a94f50d829 --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/pggrow.yaml @@ -0,0 +1,14 @@ +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/thrash/workloads/admin_socket_objecter_requests.yaml b/qa/suites/rados/thrash/workloads/admin_socket_objecter_requests.yaml new file mode 100644 index 00000000000..b1ddad8d3b0 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/admin_socket_objecter_requests.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + client.0: + admin socket: /var/run/ceph/ceph-$name.asok +tasks: +- radosbench: + clients: [client.0] + time: 60 +- admin_socket: + client.0: + objecter_requests: + test: "http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}" diff --git a/qa/suites/rados/thrash/workloads/cache-agent-big.yaml b/qa/suites/rados/thrash/workloads/cache-agent-big.yaml new file mode 100644 index 00000000000..bafc08df69f --- /dev/null +++ b/qa/suites/rados/thrash/workloads/cache-agent-big.yaml @@ -0,0 +1,31 @@ +overrides: + ceph: + log-whitelist: + - must scrub before tier agent can activate +tasks: +- exec: + client.0: + - ceph osd erasure-code-profile set teuthologyprofile ruleset-failure-domain=osd + m=1 k=2 + - ceph osd pool create base 4 4 erasure teuthologyprofile + - ceph osd pool create cache 4 + - ceph osd tier add base cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay base cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 60 + - ceph osd pool set cache target_max_objects 5000 + - ceph osd pool set cache min_read_recency_for_promote 2 +- rados: + clients: [client.0] + pools: [base] + ops: 10000 + objects: 6600 + max_seconds: 1200 + size: 1024 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash/workloads/cache-agent-small.yaml b/qa/suites/rados/thrash/workloads/cache-agent-small.yaml new file mode 100644 index 00000000000..9cbc435815e --- /dev/null +++ b/qa/suites/rados/thrash/workloads/cache-agent-small.yaml @@ -0,0 +1,28 @@ +overrides: + ceph: + crush_tunables: firefly + log-whitelist: + - must scrub before tier agent can activate +tasks: +- exec: + client.0: + - ceph osd pool create base 4 + - ceph osd pool create cache 4 + - ceph osd tier add base cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay base cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 60 + - ceph osd pool set cache target_max_objects 250 + - ceph osd pool set cache min_read_recency_for_promote 0 +- rados: + clients: [client.0] + pools: [base] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash/workloads/cache-pool-snaps.yaml b/qa/suites/rados/thrash/workloads/cache-pool-snaps.yaml new file mode 100644 index 00000000000..17dfe33dd57 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/cache-pool-snaps.yaml @@ -0,0 +1,34 @@ +overrides: + ceph: + log-whitelist: + - must scrub before tier agent can activate +tasks: +- exec: + client.0: + - ceph osd pool create base 4 + - ceph osd pool create cache 4 + - ceph osd tier add base cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay base cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 3600 + - ceph osd pool set cache target_max_objects 250 + - ceph osd pool set cache min_read_recency_for_promote 2 +- rados: + clients: [client.0] + pools: [base] + ops: 4000 + objects: 500 + pool_snaps: true + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + flush: 50 + try_flush: 50 + evict: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/rados/thrash/workloads/cache-snaps.yaml b/qa/suites/rados/thrash/workloads/cache-snaps.yaml new file mode 100644 index 00000000000..21d963d7f60 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/cache-snaps.yaml @@ -0,0 +1,33 @@ +overrides: + ceph: + log-whitelist: + - must scrub before tier agent can activate +tasks: +- exec: + client.0: + - ceph osd pool create base 4 + - ceph osd pool create cache 4 + - ceph osd tier add base cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay base cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 3600 + - ceph osd pool set cache target_max_objects 250 + - ceph osd pool set cache min_read_recency_for_promote 0 +- rados: + clients: [client.0] + pools: [base] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + flush: 50 + try_flush: 50 + evict: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/rados/thrash/workloads/cache.yaml b/qa/suites/rados/thrash/workloads/cache.yaml new file mode 100644 index 00000000000..0f15cad6763 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/cache.yaml @@ -0,0 +1,28 @@ +overrides: + ceph: + log-whitelist: + - must scrub before tier agent can activate +tasks: +- exec: + client.0: + - ceph osd pool create base 4 + - ceph osd pool create cache 4 + - ceph osd tier add base cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay base cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 3600 +- rados: + clients: [client.0] + pools: [base] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + flush: 50 + try_flush: 50 + evict: 50 diff --git a/qa/suites/rados/thrash/workloads/pool-snaps-few-objects.yaml b/qa/suites/rados/thrash/workloads/pool-snaps-few-objects.yaml new file mode 100644 index 00000000000..b5f6dca6c50 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/pool-snaps-few-objects.yaml @@ -0,0 +1,14 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + pool_snaps: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash/workloads/rados_api_tests.yaml b/qa/suites/rados/thrash/workloads/rados_api_tests.yaml new file mode 100644 index 00000000000..265649e2b0c --- /dev/null +++ b/qa/suites/rados/thrash/workloads/rados_api_tests.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + crush_tunables: hammer + conf: + client: + debug ms: 1 + debug objecter: 20 + rebug rados: 20 +tasks: +- workunit: + clients: + client.0: + - rados/test.sh diff --git a/qa/suites/rados/thrash/workloads/radosbench.yaml b/qa/suites/rados/thrash/workloads/radosbench.yaml new file mode 100644 index 00000000000..03a8ecbdb5b --- /dev/null +++ b/qa/suites/rados/thrash/workloads/radosbench.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + conf: + client.0: + debug ms: 1 + debug objecter: 20 + debug rados: 20 +tasks: +- radosbench: + clients: [client.0] + time: 300 diff --git a/qa/suites/rados/thrash/workloads/readwrite.yaml b/qa/suites/rados/thrash/workloads/readwrite.yaml new file mode 100644 index 00000000000..84290905d14 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/readwrite.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + crush_tunables: optimal +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 diff --git a/qa/suites/rados/thrash/workloads/small-objects.yaml b/qa/suites/rados/thrash/workloads/small-objects.yaml new file mode 100644 index 00000000000..d8545b9bece --- /dev/null +++ b/qa/suites/rados/thrash/workloads/small-objects.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + crush_tunables: legacy +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/rados/thrash/workloads/snaps-few-objects.yaml b/qa/suites/rados/thrash/workloads/snaps-few-objects.yaml new file mode 100644 index 00000000000..aa82d973ae1 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/snaps-few-objects.yaml @@ -0,0 +1,13 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash/workloads/write_fadvise_dontneed.yaml b/qa/suites/rados/thrash/workloads/write_fadvise_dontneed.yaml new file mode 100644 index 00000000000..606dcae6922 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/write_fadvise_dontneed.yaml @@ -0,0 +1,8 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + write_fadvise_dontneed: true + op_weights: + write: 100 diff --git a/qa/suites/rados/verify/% b/qa/suites/rados/verify/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/verify/1thrash/default.yaml b/qa/suites/rados/verify/1thrash/default.yaml new file mode 100644 index 00000000000..9435b146af6 --- /dev/null +++ b/qa/suites/rados/verify/1thrash/default.yaml @@ -0,0 +1,10 @@ +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/verify/1thrash/none.yaml b/qa/suites/rados/verify/1thrash/none.yaml new file mode 100644 index 00000000000..2030acb9083 --- /dev/null +++ b/qa/suites/rados/verify/1thrash/none.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/rados/verify/clusters/fixed-2.yaml b/qa/suites/rados/verify/clusters/fixed-2.yaml new file mode 120000 index 00000000000..cd0791a1486 --- /dev/null +++ b/qa/suites/rados/verify/clusters/fixed-2.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-2.yaml \ No newline at end of file diff --git a/qa/suites/rados/verify/fs/xfs.yaml b/qa/suites/rados/verify/fs/xfs.yaml new file mode 120000 index 00000000000..4c28d731f6b --- /dev/null +++ b/qa/suites/rados/verify/fs/xfs.yaml @@ -0,0 +1 @@ +../../../../fs/xfs.yaml \ No newline at end of file diff --git a/qa/suites/rados/verify/msgr-failures/few.yaml b/qa/suites/rados/verify/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/rados/verify/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/rados/verify/tasks/mon_recovery.yaml b/qa/suites/rados/verify/tasks/mon_recovery.yaml new file mode 100644 index 00000000000..6986303409e --- /dev/null +++ b/qa/suites/rados/verify/tasks/mon_recovery.yaml @@ -0,0 +1,2 @@ +tasks: +- mon_recovery: diff --git a/qa/suites/rados/verify/tasks/rados_api_tests.yaml b/qa/suites/rados/verify/tasks/rados_api_tests.yaml new file mode 100644 index 00000000000..0031704784e --- /dev/null +++ b/qa/suites/rados/verify/tasks/rados_api_tests.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + client: + debug ms: 1 + debug objecter: 20 + debug rados: 20 + debug monc: 20 +tasks: +- workunit: + timeout: 6h + clients: + client.0: + - rados/test.sh diff --git a/qa/suites/rados/verify/tasks/rados_cls_all.yaml b/qa/suites/rados/verify/tasks/rados_cls_all.yaml new file mode 100644 index 00000000000..853da39ad99 --- /dev/null +++ b/qa/suites/rados/verify/tasks/rados_cls_all.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - cls diff --git a/qa/suites/rados/verify/validater/lockdep.yaml b/qa/suites/rados/verify/validater/lockdep.yaml new file mode 100644 index 00000000000..25f84355c0b --- /dev/null +++ b/qa/suites/rados/verify/validater/lockdep.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + lockdep: true diff --git a/qa/suites/rados/verify/validater/valgrind.yaml b/qa/suites/rados/verify/validater/valgrind.yaml new file mode 100644 index 00000000000..0b28c183065 --- /dev/null +++ b/qa/suites/rados/verify/validater/valgrind.yaml @@ -0,0 +1,12 @@ +overrides: + install: + ceph: + flavor: notcmalloc + ceph: + conf: + global: + osd heartbeat grace: 40 + valgrind: + mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] + osd: [--tool=memcheck] + mds: [--tool=memcheck] diff --git a/qa/suites/rbd/basic/% b/qa/suites/rbd/basic/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rbd/basic/base/install.yaml b/qa/suites/rbd/basic/base/install.yaml new file mode 100644 index 00000000000..2030acb9083 --- /dev/null +++ b/qa/suites/rbd/basic/base/install.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/rbd/basic/cachepool/none.yaml b/qa/suites/rbd/basic/cachepool/none.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rbd/basic/cachepool/small.yaml b/qa/suites/rbd/basic/cachepool/small.yaml new file mode 100644 index 00000000000..f8ed11040fa --- /dev/null +++ b/qa/suites/rbd/basic/cachepool/small.yaml @@ -0,0 +1,11 @@ +tasks: +- exec: + client.0: + - ceph osd pool create cache 4 + - ceph osd tier add rbd cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay rbd cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 60 + - ceph osd pool set cache target_max_objects 250 diff --git a/qa/suites/rbd/basic/clusters/fixed-1.yaml b/qa/suites/rbd/basic/clusters/fixed-1.yaml new file mode 120000 index 00000000000..435ea3c7546 --- /dev/null +++ b/qa/suites/rbd/basic/clusters/fixed-1.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-1.yaml \ No newline at end of file diff --git a/qa/suites/rbd/basic/fs/btrfs.yaml b/qa/suites/rbd/basic/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/rbd/basic/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/rbd/basic/msgr-failures/few.yaml b/qa/suites/rbd/basic/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/rbd/basic/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/rbd/basic/msgr-failures/many.yaml b/qa/suites/rbd/basic/msgr-failures/many.yaml new file mode 100644 index 00000000000..86f8dde8a0e --- /dev/null +++ b/qa/suites/rbd/basic/msgr-failures/many.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 diff --git a/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml b/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml new file mode 100644 index 00000000000..a98768540ba --- /dev/null +++ b/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh diff --git a/qa/suites/rbd/basic/tasks/rbd_cli_tests.yaml b/qa/suites/rbd/basic/tasks/rbd_cli_tests.yaml new file mode 100644 index 00000000000..a37db057b5d --- /dev/null +++ b/qa/suites/rbd/basic/tasks/rbd_cli_tests.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/run_cli_tests.sh + diff --git a/qa/suites/rbd/basic/tasks/rbd_cls_tests.yaml b/qa/suites/rbd/basic/tasks/rbd_cls_tests.yaml new file mode 100644 index 00000000000..9ccd57c4a82 --- /dev/null +++ b/qa/suites/rbd/basic/tasks/rbd_cls_tests.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - cls/test_cls_rbd.sh diff --git a/qa/suites/rbd/basic/tasks/rbd_lock_and_fence.yaml b/qa/suites/rbd/basic/tasks/rbd_lock_and_fence.yaml new file mode 100644 index 00000000000..d2c80ad6585 --- /dev/null +++ b/qa/suites/rbd/basic/tasks/rbd_lock_and_fence.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_lock_fence.sh diff --git a/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml b/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml new file mode 100644 index 00000000000..263b784e27d --- /dev/null +++ b/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh diff --git a/qa/suites/rbd/cli/% b/qa/suites/rbd/cli/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rbd/cli/base/install.yaml b/qa/suites/rbd/cli/base/install.yaml new file mode 100644 index 00000000000..2030acb9083 --- /dev/null +++ b/qa/suites/rbd/cli/base/install.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/rbd/cli/cachepool/none.yaml b/qa/suites/rbd/cli/cachepool/none.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rbd/cli/cachepool/small.yaml b/qa/suites/rbd/cli/cachepool/small.yaml new file mode 100644 index 00000000000..f8ed11040fa --- /dev/null +++ b/qa/suites/rbd/cli/cachepool/small.yaml @@ -0,0 +1,11 @@ +tasks: +- exec: + client.0: + - ceph osd pool create cache 4 + - ceph osd tier add rbd cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay rbd cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 60 + - ceph osd pool set cache target_max_objects 250 diff --git a/qa/suites/rbd/cli/clusters/fixed-1.yaml b/qa/suites/rbd/cli/clusters/fixed-1.yaml new file mode 120000 index 00000000000..435ea3c7546 --- /dev/null +++ b/qa/suites/rbd/cli/clusters/fixed-1.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-1.yaml \ No newline at end of file diff --git a/qa/suites/rbd/cli/features/layering.yaml b/qa/suites/rbd/cli/features/layering.yaml new file mode 100644 index 00000000000..233dd53667c --- /dev/null +++ b/qa/suites/rbd/cli/features/layering.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default format: 2 diff --git a/qa/suites/rbd/cli/features/none.yaml b/qa/suites/rbd/cli/features/none.yaml new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/qa/suites/rbd/cli/features/none.yaml @@ -0,0 +1 @@ + diff --git a/qa/suites/rbd/cli/features/object_map.yaml b/qa/suites/rbd/cli/features/object_map.yaml new file mode 100644 index 00000000000..4e31f21cc25 --- /dev/null +++ b/qa/suites/rbd/cli/features/object_map.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + client: + rbd default format: 2 + rbd default features: 13 diff --git a/qa/suites/rbd/cli/fs b/qa/suites/rbd/cli/fs new file mode 120000 index 00000000000..3658920363d --- /dev/null +++ b/qa/suites/rbd/cli/fs @@ -0,0 +1 @@ +../basic/fs \ No newline at end of file diff --git a/qa/suites/rbd/cli/msgr-failures/few.yaml b/qa/suites/rbd/cli/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/rbd/cli/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/rbd/cli/msgr-failures/many.yaml b/qa/suites/rbd/cli/msgr-failures/many.yaml new file mode 100644 index 00000000000..86f8dde8a0e --- /dev/null +++ b/qa/suites/rbd/cli/msgr-failures/many.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_copy.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_copy.yaml new file mode 100644 index 00000000000..2f99f8990de --- /dev/null +++ b/qa/suites/rbd/cli/workloads/rbd_cli_copy.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/copy.sh diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_import_export.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_import_export.yaml new file mode 100644 index 00000000000..b08f2612f7a --- /dev/null +++ b/qa/suites/rbd/cli/workloads/rbd_cli_import_export.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/import_export.sh diff --git a/qa/suites/rbd/librbd/% b/qa/suites/rbd/librbd/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rbd/librbd/cache/none.yaml b/qa/suites/rbd/librbd/cache/none.yaml new file mode 100644 index 00000000000..42fd9c95562 --- /dev/null +++ b/qa/suites/rbd/librbd/cache/none.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: false diff --git a/qa/suites/rbd/librbd/cache/writeback.yaml b/qa/suites/rbd/librbd/cache/writeback.yaml new file mode 100644 index 00000000000..86fe06afa05 --- /dev/null +++ b/qa/suites/rbd/librbd/cache/writeback.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true diff --git a/qa/suites/rbd/librbd/cache/writethrough.yaml b/qa/suites/rbd/librbd/cache/writethrough.yaml new file mode 100644 index 00000000000..6dc29e16c02 --- /dev/null +++ b/qa/suites/rbd/librbd/cache/writethrough.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache max dirty: 0 diff --git a/qa/suites/rbd/librbd/cachepool/none.yaml b/qa/suites/rbd/librbd/cachepool/none.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rbd/librbd/cachepool/small.yaml b/qa/suites/rbd/librbd/cachepool/small.yaml new file mode 100644 index 00000000000..f8ed11040fa --- /dev/null +++ b/qa/suites/rbd/librbd/cachepool/small.yaml @@ -0,0 +1,11 @@ +tasks: +- exec: + client.0: + - ceph osd pool create cache 4 + - ceph osd tier add rbd cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay rbd cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 60 + - ceph osd pool set cache target_max_objects 250 diff --git a/qa/suites/rbd/librbd/clusters/fixed-3.yaml b/qa/suites/rbd/librbd/clusters/fixed-3.yaml new file mode 120000 index 00000000000..a3ac9fc4dec --- /dev/null +++ b/qa/suites/rbd/librbd/clusters/fixed-3.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3.yaml \ No newline at end of file diff --git a/qa/suites/rbd/librbd/copy-on-read/off.yaml b/qa/suites/rbd/librbd/copy-on-read/off.yaml new file mode 100644 index 00000000000..638d14aa12a --- /dev/null +++ b/qa/suites/rbd/librbd/copy-on-read/off.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd clone copy on read: false diff --git a/qa/suites/rbd/librbd/copy-on-read/on.yaml b/qa/suites/rbd/librbd/copy-on-read/on.yaml new file mode 100644 index 00000000000..ce99e7ec0a9 --- /dev/null +++ b/qa/suites/rbd/librbd/copy-on-read/on.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd clone copy on read: true diff --git a/qa/suites/rbd/librbd/fs b/qa/suites/rbd/librbd/fs new file mode 120000 index 00000000000..3658920363d --- /dev/null +++ b/qa/suites/rbd/librbd/fs @@ -0,0 +1 @@ +../basic/fs \ No newline at end of file diff --git a/qa/suites/rbd/librbd/msgr-failures/few.yaml b/qa/suites/rbd/librbd/msgr-failures/few.yaml new file mode 100644 index 00000000000..a8bc68355ea --- /dev/null +++ b/qa/suites/rbd/librbd/msgr-failures/few.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + log-whitelist: + - wrongly marked me down diff --git a/qa/suites/rbd/librbd/workloads/c_api_tests.yaml b/qa/suites/rbd/librbd/workloads/c_api_tests.yaml new file mode 100644 index 00000000000..188ddc56c60 --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/c_api_tests.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "1" diff --git a/qa/suites/rbd/librbd/workloads/c_api_tests_with_object_map.yaml b/qa/suites/rbd/librbd/workloads/c_api_tests_with_object_map.yaml new file mode 100644 index 00000000000..fef368b9bc4 --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/c_api_tests_with_object_map.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "13" diff --git a/qa/suites/rbd/librbd/workloads/fsx.yaml b/qa/suites/rbd/librbd/workloads/fsx.yaml new file mode 100644 index 00000000000..ef512d8a9b4 --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/fsx.yaml @@ -0,0 +1,4 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 5000 diff --git a/qa/suites/rbd/librbd/workloads/python_api_tests.yaml b/qa/suites/rbd/librbd/workloads/python_api_tests.yaml new file mode 100644 index 00000000000..a7b3ce7d3e6 --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/python_api_tests.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh + env: + RBD_FEATURES: "1" diff --git a/qa/suites/rbd/librbd/workloads/python_api_tests_with_object_map.yaml b/qa/suites/rbd/librbd/workloads/python_api_tests_with_object_map.yaml new file mode 100644 index 00000000000..ede74cd8d59 --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/python_api_tests_with_object_map.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh + env: + RBD_FEATURES: "13" diff --git a/qa/suites/rbd/qemu/% b/qa/suites/rbd/qemu/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rbd/qemu/cache/none.yaml b/qa/suites/rbd/qemu/cache/none.yaml new file mode 100644 index 00000000000..42fd9c95562 --- /dev/null +++ b/qa/suites/rbd/qemu/cache/none.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: false diff --git a/qa/suites/rbd/qemu/cache/writeback.yaml b/qa/suites/rbd/qemu/cache/writeback.yaml new file mode 100644 index 00000000000..86fe06afa05 --- /dev/null +++ b/qa/suites/rbd/qemu/cache/writeback.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true diff --git a/qa/suites/rbd/qemu/cache/writethrough.yaml b/qa/suites/rbd/qemu/cache/writethrough.yaml new file mode 100644 index 00000000000..6dc29e16c02 --- /dev/null +++ b/qa/suites/rbd/qemu/cache/writethrough.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache max dirty: 0 diff --git a/qa/suites/rbd/qemu/cachepool/none.yaml b/qa/suites/rbd/qemu/cachepool/none.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rbd/qemu/cachepool/small.yaml b/qa/suites/rbd/qemu/cachepool/small.yaml new file mode 100644 index 00000000000..f8ed11040fa --- /dev/null +++ b/qa/suites/rbd/qemu/cachepool/small.yaml @@ -0,0 +1,11 @@ +tasks: +- exec: + client.0: + - ceph osd pool create cache 4 + - ceph osd tier add rbd cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay rbd cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 60 + - ceph osd pool set cache target_max_objects 250 diff --git a/qa/suites/rbd/qemu/clusters/+ b/qa/suites/rbd/qemu/clusters/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rbd/qemu/clusters/fixed-3.yaml b/qa/suites/rbd/qemu/clusters/fixed-3.yaml new file mode 120000 index 00000000000..a3ac9fc4dec --- /dev/null +++ b/qa/suites/rbd/qemu/clusters/fixed-3.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3.yaml \ No newline at end of file diff --git a/qa/suites/rbd/qemu/clusters/openstack.yaml b/qa/suites/rbd/qemu/clusters/openstack.yaml new file mode 100644 index 00000000000..f87995808a1 --- /dev/null +++ b/qa/suites/rbd/qemu/clusters/openstack.yaml @@ -0,0 +1,8 @@ +openstack: + - machine: + disk: 40 # GB + ram: 30000 # MB + cpus: 1 + volumes: # attached to each instance + count: 3 + size: 30 # GB diff --git a/qa/suites/rbd/qemu/fs b/qa/suites/rbd/qemu/fs new file mode 120000 index 00000000000..3658920363d --- /dev/null +++ b/qa/suites/rbd/qemu/fs @@ -0,0 +1 @@ +../basic/fs \ No newline at end of file diff --git a/qa/suites/rbd/qemu/msgr-failures/few.yaml b/qa/suites/rbd/qemu/msgr-failures/few.yaml new file mode 100644 index 00000000000..a8bc68355ea --- /dev/null +++ b/qa/suites/rbd/qemu/msgr-failures/few.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + log-whitelist: + - wrongly marked me down diff --git a/qa/suites/rbd/qemu/workloads/qemu_bonnie.yaml b/qa/suites/rbd/qemu/workloads/qemu_bonnie.yaml new file mode 100644 index 00000000000..3e523d61b77 --- /dev/null +++ b/qa/suites/rbd/qemu/workloads/qemu_bonnie.yaml @@ -0,0 +1,6 @@ +tasks: +- qemu: + all: + clone: true + test: http://git.ceph.com/?p=ceph.git;a=blob_plain;f=qa/workunits/suites/bonnie.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/qemu/workloads/qemu_fsstress.yaml b/qa/suites/rbd/qemu/workloads/qemu_fsstress.yaml new file mode 100644 index 00000000000..040dc5567b7 --- /dev/null +++ b/qa/suites/rbd/qemu/workloads/qemu_fsstress.yaml @@ -0,0 +1,6 @@ +tasks: +- qemu: + all: + clone: true + test: http://git.ceph.com/?p=ceph.git;a=blob_plain;f=qa/workunits/suites/fsstress.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/qemu/workloads/qemu_iozone.yaml.disabled b/qa/suites/rbd/qemu/workloads/qemu_iozone.yaml.disabled new file mode 100644 index 00000000000..3dae6e78a2f --- /dev/null +++ b/qa/suites/rbd/qemu/workloads/qemu_iozone.yaml.disabled @@ -0,0 +1,6 @@ +tasks: +- qemu: + all: + test: http://git.ceph.com/?p=ceph.git;a=blob_plain;f=qa/workunits/suites/iozone.sh + image_size: 20480 +exclude_arch: armv7l diff --git a/qa/suites/rbd/qemu/workloads/qemu_xfstests.yaml b/qa/suites/rbd/qemu/workloads/qemu_xfstests.yaml new file mode 100644 index 00000000000..b6112d1bb27 --- /dev/null +++ b/qa/suites/rbd/qemu/workloads/qemu_xfstests.yaml @@ -0,0 +1,8 @@ +tasks: +- qemu: + all: + clone: true + type: block + num_rbd: 2 + test: http://git.ceph.com/?p=ceph.git;a=blob_plain;f=qa/run_xfstests_qemu.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/singleton/% b/qa/suites/rbd/singleton/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rbd/singleton/all/formatted-output.yaml b/qa/suites/rbd/singleton/all/formatted-output.yaml new file mode 100644 index 00000000000..8b118b18928 --- /dev/null +++ b/qa/suites/rbd/singleton/all/formatted-output.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: +- cram: + clients: + client.0: + - http://git.ceph.com/?p=ceph.git;a=blob_plain;hb=hammer;f=src/test/cli-integration/rbd/formatted-output.t diff --git a/qa/suites/rbd/singleton/all/merge_diff.yaml b/qa/suites/rbd/singleton/all/merge_diff.yaml new file mode 100644 index 00000000000..fc58d827b0b --- /dev/null +++ b/qa/suites/rbd/singleton/all/merge_diff.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: +- workunit: + clients: + all: [rbd/merge_diff.sh] diff --git a/qa/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml b/qa/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml new file mode 100644 index 00000000000..2771d4e8db1 --- /dev/null +++ b/qa/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml @@ -0,0 +1,12 @@ +exclude_arch: armv7l +roles: +- [mon.a, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + conf: + client: + rbd cache: false +- workunit: + clients: + all: [rbd/qemu-iotests.sh] diff --git a/qa/suites/rbd/singleton/all/qemu-iotests-writeback.yaml b/qa/suites/rbd/singleton/all/qemu-iotests-writeback.yaml new file mode 100644 index 00000000000..f6768df5a22 --- /dev/null +++ b/qa/suites/rbd/singleton/all/qemu-iotests-writeback.yaml @@ -0,0 +1,12 @@ +exclude_arch: armv7l +roles: +- [mon.a, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true +- workunit: + clients: + all: [rbd/qemu-iotests.sh] diff --git a/qa/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml b/qa/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml new file mode 100644 index 00000000000..287509e4953 --- /dev/null +++ b/qa/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml @@ -0,0 +1,13 @@ +exclude_arch: armv7l +roles: +- [mon.a, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache max dirty: 0 +- workunit: + clients: + all: [rbd/qemu-iotests.sh] diff --git a/qa/suites/rbd/singleton/all/rbd-vs-unmanaged-snaps.yaml b/qa/suites/rbd/singleton/all/rbd-vs-unmanaged-snaps.yaml new file mode 100644 index 00000000000..f00153f8464 --- /dev/null +++ b/qa/suites/rbd/singleton/all/rbd-vs-unmanaged-snaps.yaml @@ -0,0 +1,13 @@ +roles: +- [mon.a, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + conf: + client: + rbd validate pool: false +- workunit: + clients: + all: + - mon/rbd_snaps_ops.sh + diff --git a/qa/suites/rbd/singleton/all/read-flags-no-cache.yaml b/qa/suites/rbd/singleton/all/read-flags-no-cache.yaml new file mode 100644 index 00000000000..f7d44456d3b --- /dev/null +++ b/qa/suites/rbd/singleton/all/read-flags-no-cache.yaml @@ -0,0 +1,11 @@ +roles: +- [mon.a, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + conf: + client: + rbd cache: false +- workunit: + clients: + all: [rbd/read-flags.sh] diff --git a/qa/suites/rbd/singleton/all/read-flags-writeback.yaml b/qa/suites/rbd/singleton/all/read-flags-writeback.yaml new file mode 100644 index 00000000000..f25be79e0b6 --- /dev/null +++ b/qa/suites/rbd/singleton/all/read-flags-writeback.yaml @@ -0,0 +1,11 @@ +roles: +- [mon.a, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true +- workunit: + clients: + all: [rbd/read-flags.sh] diff --git a/qa/suites/rbd/singleton/all/read-flags-writethrough.yaml b/qa/suites/rbd/singleton/all/read-flags-writethrough.yaml new file mode 100644 index 00000000000..80d7b4254b6 --- /dev/null +++ b/qa/suites/rbd/singleton/all/read-flags-writethrough.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.a, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache max dirty: 0 +- workunit: + clients: + all: [rbd/read-flags.sh] diff --git a/qa/suites/rbd/thrash/% b/qa/suites/rbd/thrash/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rbd/thrash/base/install.yaml b/qa/suites/rbd/thrash/base/install.yaml new file mode 100644 index 00000000000..2030acb9083 --- /dev/null +++ b/qa/suites/rbd/thrash/base/install.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/rbd/thrash/clusters/+ b/qa/suites/rbd/thrash/clusters/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rbd/thrash/clusters/fixed-2.yaml b/qa/suites/rbd/thrash/clusters/fixed-2.yaml new file mode 120000 index 00000000000..cd0791a1486 --- /dev/null +++ b/qa/suites/rbd/thrash/clusters/fixed-2.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-2.yaml \ No newline at end of file diff --git a/qa/suites/rbd/thrash/clusters/openstack.yaml b/qa/suites/rbd/thrash/clusters/openstack.yaml new file mode 100644 index 00000000000..39e43d021ac --- /dev/null +++ b/qa/suites/rbd/thrash/clusters/openstack.yaml @@ -0,0 +1,8 @@ +openstack: + - machine: + disk: 40 # GB + ram: 8000 # MB + cpus: 1 + volumes: # attached to each instance + count: 3 + size: 30 # GB diff --git a/qa/suites/rbd/thrash/fs/btrfs.yaml b/qa/suites/rbd/thrash/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/rbd/thrash/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/rbd/thrash/fs/xfs.yaml b/qa/suites/rbd/thrash/fs/xfs.yaml new file mode 120000 index 00000000000..4c28d731f6b --- /dev/null +++ b/qa/suites/rbd/thrash/fs/xfs.yaml @@ -0,0 +1 @@ +../../../../fs/xfs.yaml \ No newline at end of file diff --git a/qa/suites/rbd/thrash/msgr-failures/few.yaml b/qa/suites/rbd/thrash/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/rbd/thrash/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/rbd/thrash/thrashers/cache.yaml b/qa/suites/rbd/thrash/thrashers/cache.yaml new file mode 100644 index 00000000000..5bab78ee840 --- /dev/null +++ b/qa/suites/rbd/thrash/thrashers/cache.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +tasks: +- exec: + client.0: + - ceph osd pool create cache 4 + - ceph osd tier add rbd cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay rbd cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 60 + - ceph osd pool set cache target_max_objects 250 +- thrashosds: + timeout: 1200 diff --git a/qa/suites/rbd/thrash/thrashers/default.yaml b/qa/suites/rbd/thrash/thrashers/default.yaml new file mode 100644 index 00000000000..89c9bdfb0e5 --- /dev/null +++ b/qa/suites/rbd/thrash/thrashers/default.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +tasks: +- thrashosds: + timeout: 1200 diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml new file mode 100644 index 00000000000..fef368b9bc4 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "13" diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml new file mode 100644 index 00000000000..c013ac5ca13 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml @@ -0,0 +1,12 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "13" +overrides: + ceph: + conf: + client: + rbd clone copy on read: true diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml new file mode 100644 index 00000000000..188ddc56c60 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "1" diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writeback.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writeback.yaml new file mode 100644 index 00000000000..bd812695c83 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writeback.yaml @@ -0,0 +1,9 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 2000 +overrides: + ceph: + conf: + client: + rbd cache: true diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writethrough.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writethrough.yaml new file mode 100644 index 00000000000..56895298025 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writethrough.yaml @@ -0,0 +1,10 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 2000 +overrides: + ceph: + conf: + client: + rbd cache: true + rbd cache max dirty: 0 diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_copy_on_read.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_copy_on_read.yaml new file mode 100644 index 00000000000..a4860728141 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_copy_on_read.yaml @@ -0,0 +1,10 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 2000 +overrides: + ceph: + conf: + client: + rbd cache: true + rbd clone copy on read: true diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_nocache.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_nocache.yaml new file mode 100644 index 00000000000..6c5e0e45707 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_nocache.yaml @@ -0,0 +1,9 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 2000 +overrides: + ceph: + conf: + client: + rbd cache: false diff --git a/qa/suites/rest/basic/tasks/rest_test.yaml b/qa/suites/rest/basic/tasks/rest_test.yaml new file mode 100644 index 00000000000..a5ef6a6e597 --- /dev/null +++ b/qa/suites/rest/basic/tasks/rest_test.yaml @@ -0,0 +1,26 @@ +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 +- - mon.b + - mon.c + - osd.2 + - osd.3 + - client.0 + +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + conf: + client.rest0: + debug ms: 1 + debug objecter: 20 + debug rados: 20 +- rest-api: [client.0] +- workunit: + clients: + client.0: + - rest/test.py diff --git a/qa/suites/rgw/multifs/% b/qa/suites/rgw/multifs/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rgw/multifs/clusters/fixed-2.yaml b/qa/suites/rgw/multifs/clusters/fixed-2.yaml new file mode 120000 index 00000000000..cd0791a1486 --- /dev/null +++ b/qa/suites/rgw/multifs/clusters/fixed-2.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-2.yaml \ No newline at end of file diff --git a/qa/suites/rgw/multifs/frontend/apache.yaml b/qa/suites/rgw/multifs/frontend/apache.yaml new file mode 100644 index 00000000000..53ebf758ed3 --- /dev/null +++ b/qa/suites/rgw/multifs/frontend/apache.yaml @@ -0,0 +1,3 @@ +overrides: + rgw: + frontend: apache diff --git a/qa/suites/rgw/multifs/frontend/civetweb.yaml b/qa/suites/rgw/multifs/frontend/civetweb.yaml new file mode 100644 index 00000000000..5845a0e6c15 --- /dev/null +++ b/qa/suites/rgw/multifs/frontend/civetweb.yaml @@ -0,0 +1,3 @@ +overrides: + rgw: + frontend: civetweb diff --git a/qa/suites/rgw/multifs/fs/btrfs.yaml b/qa/suites/rgw/multifs/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/rgw/multifs/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/rgw/multifs/fs/ext4.yaml b/qa/suites/rgw/multifs/fs/ext4.yaml new file mode 120000 index 00000000000..65d71886933 --- /dev/null +++ b/qa/suites/rgw/multifs/fs/ext4.yaml @@ -0,0 +1 @@ +../../../../fs/ext4.yaml \ No newline at end of file diff --git a/qa/suites/rgw/multifs/fs/xfs.yaml b/qa/suites/rgw/multifs/fs/xfs.yaml new file mode 120000 index 00000000000..4c28d731f6b --- /dev/null +++ b/qa/suites/rgw/multifs/fs/xfs.yaml @@ -0,0 +1 @@ +../../../../fs/xfs.yaml \ No newline at end of file diff --git a/qa/suites/rgw/multifs/overrides.yaml b/qa/suites/rgw/multifs/overrides.yaml new file mode 100644 index 00000000000..9b2063f0699 --- /dev/null +++ b/qa/suites/rgw/multifs/overrides.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + debug rgw: 20 diff --git a/qa/suites/rgw/multifs/rgw_pool_type b/qa/suites/rgw/multifs/rgw_pool_type new file mode 120000 index 00000000000..0506f616ce2 --- /dev/null +++ b/qa/suites/rgw/multifs/rgw_pool_type @@ -0,0 +1 @@ +../../../rgw_pool_type \ No newline at end of file diff --git a/qa/suites/rgw/multifs/tasks/rgw_bucket_quota.yaml b/qa/suites/rgw/multifs/tasks/rgw_bucket_quota.yaml new file mode 100644 index 00000000000..767debdf3c8 --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/rgw_bucket_quota.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- workunit: + clients: + client.0: + - rgw/s3_bucket_quota.pl diff --git a/qa/suites/rgw/multifs/tasks/rgw_multipart_upload.yaml b/qa/suites/rgw/multifs/tasks/rgw_multipart_upload.yaml new file mode 100644 index 00000000000..1781dee096b --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/rgw_multipart_upload.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- workunit: + clients: + client.0: + - rgw/s3_multipart_upload.pl diff --git a/qa/suites/rgw/multifs/tasks/rgw_readwrite.yaml b/qa/suites/rgw/multifs/tasks/rgw_readwrite.yaml new file mode 100644 index 00000000000..c7efaa1c757 --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/rgw_readwrite.yaml @@ -0,0 +1,16 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- s3readwrite: + client.0: + rgw_server: client.0 + readwrite: + bucket: rwtest + readers: 10 + writers: 3 + duration: 300 + files: + num: 10 + size: 2000 + stddev: 500 diff --git a/qa/suites/rgw/multifs/tasks/rgw_roundtrip.yaml b/qa/suites/rgw/multifs/tasks/rgw_roundtrip.yaml new file mode 100644 index 00000000000..47b3c1894a2 --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/rgw_roundtrip.yaml @@ -0,0 +1,16 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- s3roundtrip: + client.0: + rgw_server: client.0 + roundtrip: + bucket: rttest + readers: 10 + writers: 3 + duration: 300 + files: + num: 10 + size: 2000 + stddev: 500 diff --git a/qa/suites/rgw/multifs/tasks/rgw_s3tests.yaml b/qa/suites/rgw/multifs/tasks/rgw_s3tests.yaml new file mode 100644 index 00000000000..62608773a2a --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/rgw_s3tests.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- s3tests: + client.0: + rgw_server: client.0 diff --git a/qa/suites/rgw/multifs/tasks/rgw_swift.yaml b/qa/suites/rgw/multifs/tasks/rgw_swift.yaml new file mode 100644 index 00000000000..569741b0e15 --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/rgw_swift.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- swift: + client.0: + rgw_server: client.0 diff --git a/qa/suites/rgw/multifs/tasks/rgw_user_quota.yaml b/qa/suites/rgw/multifs/tasks/rgw_user_quota.yaml new file mode 100644 index 00000000000..c2c38a816cc --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/rgw_user_quota.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- workunit: + clients: + client.0: + - rgw/s3_user_quota.pl diff --git a/qa/suites/rgw/singleton/% b/qa/suites/rgw/singleton/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rgw/singleton/all/radosgw-admin-data-sync.yaml b/qa/suites/rgw/singleton/all/radosgw-admin-data-sync.yaml new file mode 100644 index 00000000000..a619f225d9c --- /dev/null +++ b/qa/suites/rgw/singleton/all/radosgw-admin-data-sync.yaml @@ -0,0 +1,56 @@ +roles: +- [mon.a, osd.0, osd.1, osd.2, osd.3, client.0, client.1] +tasks: +- install: +- ceph: + conf: + client: + debug ms: 1 + rgw gc obj min wait: 15 + rgw data log window: 30 + osd: + debug ms: 1 + debug objclass : 20 + client.0: + rgw region: region0 + rgw zone: r0z0 + rgw region root pool: .rgw.region.0 + rgw zone root pool: .rgw.zone.0 + rgw gc pool: .rgw.gc.0 + rgw user uid pool: .users.uid.0 + rgw user keys pool: .users.0 + rgw log data: True + rgw log meta: True + client.1: + rgw region: region0 + rgw zone: r0z1 + rgw region root pool: .rgw.region.0 + rgw zone root pool: .rgw.zone.1 + rgw gc pool: .rgw.gc.1 + rgw user uid pool: .users.uid.1 + rgw user keys pool: .users.1 + rgw log data: False + rgw log meta: False +- rgw: + regions: + region0: + api name: api1 + is master: True + master zone: r0z0 + zones: [r0z0, r0z1] + client.0: + system user: + name: client0-system-user + access key: 0te6NH5mcdcq0Tc5i8i2 + secret key: Oy4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXv + client.1: + system user: + name: client1-system-user + access key: 1te6NH5mcdcq0Tc5i8i3 + secret key: Py4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXw +- radosgw-agent: + client.0: + max-entries: 10 + src: client.0 + dest: client.1 +- radosgw-admin: diff --git a/qa/suites/rgw/singleton/all/radosgw-admin-multi-region.yaml b/qa/suites/rgw/singleton/all/radosgw-admin-multi-region.yaml new file mode 100644 index 00000000000..5ac26c288b3 --- /dev/null +++ b/qa/suites/rgw/singleton/all/radosgw-admin-multi-region.yaml @@ -0,0 +1,61 @@ +roles: +- [mon.a, osd.0, osd.1, osd.2, client.0] +- [mon.b, mon.c, osd.3, osd.4, osd.5, client.1] +tasks: +- install: +- ceph: + conf: + client: + debug ms: 1 + rgw gc obj min wait: 15 + osd: + debug ms: 1 + debug objclass : 20 + client.0: + rgw region: region0 + rgw zone: r0z1 + rgw region root pool: .rgw.region.0 + rgw zone root pool: .rgw.zone.0 + rgw gc pool: .rgw.gc.0 + rgw user uid pool: .users.uid.0 + rgw user keys pool: .users.0 + rgw log data: True + rgw log meta: True + client.1: + rgw region: region1 + rgw zone: r1z1 + rgw region root pool: .rgw.region.1 + rgw zone root pool: .rgw.zone.1 + rgw gc pool: .rgw.gc.1 + rgw user uid pool: .users.uid.1 + rgw user keys pool: .users.1 + rgw log data: False + rgw log meta: False +- rgw: + regions: + region0: + api name: api1 + is master: True + master zone: r0z1 + zones: [r0z1] + region1: + api name: api1 + is master: False + master zone: r1z1 + zones: [r1z1] + client.0: + system user: + name: client0-system-user + access key: 0te6NH5mcdcq0Tc5i8i2 + secret key: Oy4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXv + client.1: + system user: + name: client1-system-user + access key: 1te6NH5mcdcq0Tc5i8i3 + secret key: Py4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXw +- radosgw-agent: + client.0: + src: client.0 + dest: client.1 + metadata-only: true +- radosgw-admin: diff --git a/qa/suites/rgw/singleton/all/radosgw-admin.yaml b/qa/suites/rgw/singleton/all/radosgw-admin.yaml new file mode 100644 index 00000000000..fd2a131fdbb --- /dev/null +++ b/qa/suites/rgw/singleton/all/radosgw-admin.yaml @@ -0,0 +1,15 @@ +roles: +- [mon.a, osd.0, client.0, osd.1, osd.2, osd.3] +tasks: +- install: +- ceph: + conf: + client: + debug ms: 1 + rgw gc obj min wait: 15 + osd: + debug ms: 1 + debug objclass : 20 +- rgw: + client.0: +- radosgw-admin: diff --git a/qa/suites/rgw/singleton/all/radosgw-convert-to-region.yaml b/qa/suites/rgw/singleton/all/radosgw-convert-to-region.yaml new file mode 100644 index 00000000000..292a1d2ae01 --- /dev/null +++ b/qa/suites/rgw/singleton/all/radosgw-convert-to-region.yaml @@ -0,0 +1,73 @@ +overrides: + s3readwrite: + s3: + user_id: s3readwrite-test-user + display_name: test user for the s3readwrite tests + email: tester@inktank + access_key: 2te6NH5mcdcq0Tc5i8i4 + secret_key: Qy4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXx + readwrite: + deterministic_file_names: True + duration: 30 + bucket: testbucket + files: + num: 10 + size: 2000 + stddev: 500 +roles: +- [mon.a, osd.0, osd.1, osd.2, client.0] +- [mon.b, mon.c, osd.3, osd.4, osd.5, client.1] + +tasks: +- install: +- ceph: + conf: + client: + rgw region: default + rgw zone: r1z1 + rgw region root pool: .rgw + rgw zone root pool: .rgw + rgw domain root: .rgw + rgw gc pool: .rgw.gc + rgw user uid pool: .users.uid + rgw user keys pool: .users +- rgw: + regions: + default: + api name: api1 + is master: true + master zone: r1z1 + zones: [r1z1] + client.0: + system user: + name: nr-system + access key: 0te6NH5mcdcq0Tc5i8i2 + secret key: Oy4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXv +- s3readwrite: + client.0: + extra_args: ['--no-cleanup'] + s3: + delete_user: False + readwrite: + writers: 1 + readers: 0 +- rgw: + regions: + default: + api name: api1 + is master: true + master zone: r1z1 + zones: [r1z1] + client.1: + system user: + name: r2-system + access key: 1te6NH5mcdcq0Tc5i8i3 + secret key: Py4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXw +- s3readwrite: + client.1: + s3: + create_user: False + readwrite: + writers: 0 + readers: 2 + diff --git a/qa/suites/rgw/singleton/frontend/apache.yaml b/qa/suites/rgw/singleton/frontend/apache.yaml new file mode 100644 index 00000000000..53ebf758ed3 --- /dev/null +++ b/qa/suites/rgw/singleton/frontend/apache.yaml @@ -0,0 +1,3 @@ +overrides: + rgw: + frontend: apache diff --git a/qa/suites/rgw/singleton/frontend/civetweb.yaml b/qa/suites/rgw/singleton/frontend/civetweb.yaml new file mode 100644 index 00000000000..5845a0e6c15 --- /dev/null +++ b/qa/suites/rgw/singleton/frontend/civetweb.yaml @@ -0,0 +1,3 @@ +overrides: + rgw: + frontend: civetweb diff --git a/qa/suites/rgw/singleton/overrides.yaml b/qa/suites/rgw/singleton/overrides.yaml new file mode 100644 index 00000000000..9b2063f0699 --- /dev/null +++ b/qa/suites/rgw/singleton/overrides.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + debug rgw: 20 diff --git a/qa/suites/rgw/singleton/rgw_pool_type b/qa/suites/rgw/singleton/rgw_pool_type new file mode 120000 index 00000000000..77fa7e71b78 --- /dev/null +++ b/qa/suites/rgw/singleton/rgw_pool_type @@ -0,0 +1 @@ +../../../rgw_pool_type/ \ No newline at end of file diff --git a/qa/suites/rgw/verify/% b/qa/suites/rgw/verify/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rgw/verify/clusters/fixed-2.yaml b/qa/suites/rgw/verify/clusters/fixed-2.yaml new file mode 120000 index 00000000000..cd0791a1486 --- /dev/null +++ b/qa/suites/rgw/verify/clusters/fixed-2.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-2.yaml \ No newline at end of file diff --git a/qa/suites/rgw/verify/frontend/apache.yaml b/qa/suites/rgw/verify/frontend/apache.yaml new file mode 100644 index 00000000000..53ebf758ed3 --- /dev/null +++ b/qa/suites/rgw/verify/frontend/apache.yaml @@ -0,0 +1,3 @@ +overrides: + rgw: + frontend: apache diff --git a/qa/suites/rgw/verify/frontend/civetweb.yaml b/qa/suites/rgw/verify/frontend/civetweb.yaml new file mode 100644 index 00000000000..5845a0e6c15 --- /dev/null +++ b/qa/suites/rgw/verify/frontend/civetweb.yaml @@ -0,0 +1,3 @@ +overrides: + rgw: + frontend: civetweb diff --git a/qa/suites/rgw/verify/fs/btrfs.yaml b/qa/suites/rgw/verify/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/rgw/verify/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/rgw/verify/msgr-failures/few.yaml b/qa/suites/rgw/verify/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/rgw/verify/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/rgw/verify/overrides.yaml b/qa/suites/rgw/verify/overrides.yaml new file mode 100644 index 00000000000..9b2063f0699 --- /dev/null +++ b/qa/suites/rgw/verify/overrides.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + debug rgw: 20 diff --git a/qa/suites/rgw/verify/rgw_pool_type b/qa/suites/rgw/verify/rgw_pool_type new file mode 120000 index 00000000000..77fa7e71b78 --- /dev/null +++ b/qa/suites/rgw/verify/rgw_pool_type @@ -0,0 +1 @@ +../../../rgw_pool_type/ \ No newline at end of file diff --git a/qa/suites/rgw/verify/tasks/rgw_s3tests.yaml b/qa/suites/rgw/verify/tasks/rgw_s3tests.yaml new file mode 100644 index 00000000000..c23a2cbf4ec --- /dev/null +++ b/qa/suites/rgw/verify/tasks/rgw_s3tests.yaml @@ -0,0 +1,10 @@ +tasks: +- install: + flavor: notcmalloc +- ceph: +- rgw: + client.0: + valgrind: [--tool=memcheck] +- s3tests: + client.0: + rgw_server: client.0 diff --git a/qa/suites/rgw/verify/tasks/rgw_s3tests_multiregion.yaml b/qa/suites/rgw/verify/tasks/rgw_s3tests_multiregion.yaml new file mode 100644 index 00000000000..399f4aac2f6 --- /dev/null +++ b/qa/suites/rgw/verify/tasks/rgw_s3tests_multiregion.yaml @@ -0,0 +1,59 @@ +tasks: +- install: + flavor: notcmalloc +- ceph: + conf: + client.0: + rgw region: zero + rgw zone: r0z1 + rgw region root pool: .rgw.region.0 + rgw zone root pool: .rgw.zone.0 + rgw gc pool: .rgw.gc.0 + rgw user uid pool: .users.uid.0 + rgw user keys pool: .users.0 + rgw log data: True + rgw log meta: True + client.1: + rgw region: one + rgw zone: r1z1 + rgw region root pool: .rgw.region.1 + rgw zone root pool: .rgw.zone.1 + rgw gc pool: .rgw.gc.1 + rgw user uid pool: .users.uid.1 + rgw user keys pool: .users.1 + rgw log data: False + rgw log meta: False +- rgw: + default_idle_timeout: 300 + regions: + zero: + api name: api1 + is master: True + master zone: r0z1 + zones: [r0z1] + one: + api name: api1 + is master: False + master zone: r1z1 + zones: [r1z1] + client.0: + valgrind: [--tool=memcheck] + system user: + name: client0-system-user + access key: 1te6NH5mcdcq0Tc5i8i2 + secret key: 1y4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXv + client.1: + valgrind: [--tool=memcheck] + system user: + name: client1-system-user + access key: 0te6NH5mcdcq0Tc5i8i2 + secret key: Oy4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXv +- radosgw-agent: + client.0: + src: client.0 + dest: client.1 + metadata-only: true +- s3tests: + client.0: + idle_timeout: 300 + rgw_server: client.0 diff --git a/qa/suites/rgw/verify/tasks/rgw_swift.yaml b/qa/suites/rgw/verify/tasks/rgw_swift.yaml new file mode 100644 index 00000000000..792fb848a9e --- /dev/null +++ b/qa/suites/rgw/verify/tasks/rgw_swift.yaml @@ -0,0 +1,10 @@ +tasks: +- install: + flavor: notcmalloc +- ceph: +- rgw: + client.0: + valgrind: [--tool=memcheck] +- swift: + client.0: + rgw_server: client.0 diff --git a/qa/suites/rgw/verify/validater/lockdep.yaml b/qa/suites/rgw/verify/validater/lockdep.yaml new file mode 100644 index 00000000000..941fe12b1e4 --- /dev/null +++ b/qa/suites/rgw/verify/validater/lockdep.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + osd: + lockdep: true + mon: + lockdep: true diff --git a/qa/suites/rgw/verify/validater/valgrind.yaml b/qa/suites/rgw/verify/validater/valgrind.yaml new file mode 100644 index 00000000000..0b28c183065 --- /dev/null +++ b/qa/suites/rgw/verify/validater/valgrind.yaml @@ -0,0 +1,12 @@ +overrides: + install: + ceph: + flavor: notcmalloc + ceph: + conf: + global: + osd heartbeat grace: 40 + valgrind: + mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] + osd: [--tool=memcheck] + mds: [--tool=memcheck] diff --git a/qa/suites/samba/% b/qa/suites/samba/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/samba/clusters/samba-basic.yaml b/qa/suites/samba/clusters/samba-basic.yaml new file mode 100644 index 00000000000..caced4a26d1 --- /dev/null +++ b/qa/suites/samba/clusters/samba-basic.yaml @@ -0,0 +1,3 @@ +roles: +- [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1] +- [samba.0, client.0, client.1] diff --git a/qa/suites/samba/debug/mds_client.yaml b/qa/suites/samba/debug/mds_client.yaml new file mode 120000 index 00000000000..2550b024ded --- /dev/null +++ b/qa/suites/samba/debug/mds_client.yaml @@ -0,0 +1 @@ +../../../debug/mds_client.yaml \ No newline at end of file diff --git a/qa/suites/samba/fs/btrfs.yaml b/qa/suites/samba/fs/btrfs.yaml new file mode 120000 index 00000000000..ea693ab0b42 --- /dev/null +++ b/qa/suites/samba/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/samba/install/install.yaml b/qa/suites/samba/install/install.yaml new file mode 100644 index 00000000000..c53f9c55b17 --- /dev/null +++ b/qa/suites/samba/install/install.yaml @@ -0,0 +1,9 @@ +# we currently can't install Samba on RHEL; need a gitbuilder and code updates +os_type: ubuntu + +tasks: +- install: +- install: + project: samba + extra_packages: ['samba'] +- ceph: diff --git a/qa/suites/samba/mount/fuse.yaml b/qa/suites/samba/mount/fuse.yaml new file mode 100644 index 00000000000..d00ffdb4804 --- /dev/null +++ b/qa/suites/samba/mount/fuse.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: [client.0] +- samba: + samba.0: + ceph: "{testdir}/mnt.0" + diff --git a/qa/suites/samba/mount/kclient.yaml b/qa/suites/samba/mount/kclient.yaml new file mode 100644 index 00000000000..14fee85d266 --- /dev/null +++ b/qa/suites/samba/mount/kclient.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- kclient: [client.0] +- samba: + samba.0: + ceph: "{testdir}/mnt.0" + diff --git a/qa/suites/samba/mount/native.yaml b/qa/suites/samba/mount/native.yaml new file mode 100644 index 00000000000..09b8c1c4e3d --- /dev/null +++ b/qa/suites/samba/mount/native.yaml @@ -0,0 +1,2 @@ +tasks: +- samba: diff --git a/qa/suites/samba/mount/noceph.yaml b/qa/suites/samba/mount/noceph.yaml new file mode 100644 index 00000000000..3cad4740d8b --- /dev/null +++ b/qa/suites/samba/mount/noceph.yaml @@ -0,0 +1,5 @@ +tasks: +- localdir: [client.0] +- samba: + samba.0: + ceph: "{testdir}/mnt.0" diff --git a/qa/suites/samba/workload/cifs-dbench.yaml b/qa/suites/samba/workload/cifs-dbench.yaml new file mode 100644 index 00000000000..c13c1c099e5 --- /dev/null +++ b/qa/suites/samba/workload/cifs-dbench.yaml @@ -0,0 +1,8 @@ +tasks: +- cifs-mount: + client.1: + share: ceph +- workunit: + clients: + client.1: + - suites/dbench.sh diff --git a/qa/suites/samba/workload/cifs-fsstress.yaml b/qa/suites/samba/workload/cifs-fsstress.yaml new file mode 100644 index 00000000000..ff003af3433 --- /dev/null +++ b/qa/suites/samba/workload/cifs-fsstress.yaml @@ -0,0 +1,8 @@ +tasks: +- cifs-mount: + client.1: + share: ceph +- workunit: + clients: + client.1: + - suites/fsstress.sh diff --git a/qa/suites/samba/workload/cifs-kernel-build.yaml.disabled b/qa/suites/samba/workload/cifs-kernel-build.yaml.disabled new file mode 100644 index 00000000000..ab9ff8ac731 --- /dev/null +++ b/qa/suites/samba/workload/cifs-kernel-build.yaml.disabled @@ -0,0 +1,9 @@ +tasks: +- cifs-mount: + client.1: + share: ceph +- workunit: + clients: + client.1: + - kernel_untar_build.sh + diff --git a/qa/suites/samba/workload/smbtorture.yaml b/qa/suites/samba/workload/smbtorture.yaml new file mode 100644 index 00000000000..823489a2082 --- /dev/null +++ b/qa/suites/samba/workload/smbtorture.yaml @@ -0,0 +1,39 @@ +tasks: +- pexec: + client.1: + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.lock + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.fdpass + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.unlink + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.attr + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.trans2 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.negnowait + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.dir1 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.deny1 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.deny2 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.deny3 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.denydos + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.ntdeny1 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.ntdeny2 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.tcon + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.tcondev + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.vuid + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.rw1 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.open + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.defer_open + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.xcopy + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.rename + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.properties + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.mangle + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.openattr + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.chkpath + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.secleak + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.disconnect + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.samba3error + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.smb +# - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-holdcon +# - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-holdopen + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-readwrite + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-torture + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.scan-pipe_number + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.scan-ioctl +# - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.scan-maxfid diff --git a/qa/suites/smoke/basic/% b/qa/suites/smoke/basic/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/smoke/basic/clusters/fixed-3-cephfs.yaml b/qa/suites/smoke/basic/clusters/fixed-3-cephfs.yaml new file mode 120000 index 00000000000..a482e650421 --- /dev/null +++ b/qa/suites/smoke/basic/clusters/fixed-3-cephfs.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/qa/suites/smoke/basic/fs/btrfs.yaml b/qa/suites/smoke/basic/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/smoke/basic/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/smoke/basic/tasks/cfuse_workunit_suites_blogbench.yaml b/qa/suites/smoke/basic/tasks/cfuse_workunit_suites_blogbench.yaml new file mode 100644 index 00000000000..2ee417723b0 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/cfuse_workunit_suites_blogbench.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: + fs: xfs +- ceph-fuse: +- workunit: + clients: + all: + - suites/blogbench.sh diff --git a/qa/suites/smoke/basic/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/smoke/basic/tasks/cfuse_workunit_suites_fsstress.yaml new file mode 100644 index 00000000000..cd12eaef570 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: + fs: btrfs +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/smoke/basic/tasks/cfuse_workunit_suites_iozone.yaml b/qa/suites/smoke/basic/tasks/cfuse_workunit_suites_iozone.yaml new file mode 100644 index 00000000000..c4be4cd1de9 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/cfuse_workunit_suites_iozone.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: + fs: btrfs +- ceph-fuse: [client.0] +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/smoke/basic/tasks/cfuse_workunit_suites_pjd.yaml b/qa/suites/smoke/basic/tasks/cfuse_workunit_suites_pjd.yaml new file mode 100644 index 00000000000..d042daa716d --- /dev/null +++ b/qa/suites/smoke/basic/tasks/cfuse_workunit_suites_pjd.yaml @@ -0,0 +1,16 @@ +tasks: +- install: +- ceph: + fs: xfs + conf: + mds: + debug mds: 20 + debug ms: 1 + client: + debug client: 20 + debug ms: 1 +- ceph-fuse: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/smoke/basic/tasks/kclient_workunit_direct_io.yaml b/qa/suites/smoke/basic/tasks/kclient_workunit_direct_io.yaml new file mode 100644 index 00000000000..29ccf463051 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/kclient_workunit_direct_io.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- install: +- ceph: + fs: btrfs +- kclient: +- workunit: + clients: + all: + - direct_io diff --git a/qa/suites/smoke/basic/tasks/kclient_workunit_suites_dbench.yaml b/qa/suites/smoke/basic/tasks/kclient_workunit_suites_dbench.yaml new file mode 100644 index 00000000000..01d7470a50e --- /dev/null +++ b/qa/suites/smoke/basic/tasks/kclient_workunit_suites_dbench.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- install: +- ceph: + fs: xfs +- kclient: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/smoke/basic/tasks/kclient_workunit_suites_fsstress.yaml b/qa/suites/smoke/basic/tasks/kclient_workunit_suites_fsstress.yaml new file mode 100644 index 00000000000..42d6b97c5b5 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/kclient_workunit_suites_fsstress.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- install: +- ceph: + fs: xfs +- kclient: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/smoke/basic/tasks/kclient_workunit_suites_pjd.yaml b/qa/suites/smoke/basic/tasks/kclient_workunit_suites_pjd.yaml new file mode 100644 index 00000000000..6818a2a6833 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/kclient_workunit_suites_pjd.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- install: +- ceph: + fs: xfs +- kclient: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/smoke/basic/tasks/libcephfs_interface_tests.yaml b/qa/suites/smoke/basic/tasks/libcephfs_interface_tests.yaml new file mode 100644 index 00000000000..5273c931ec7 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/libcephfs_interface_tests.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + client: + debug ms: 1 + debug client: 20 +tasks: +- install: +- ceph: + fs: btrfs +- ceph-fuse: +- workunit: + clients: + client.0: + - libcephfs/test.sh diff --git a/qa/suites/smoke/basic/tasks/mon_thrash.yaml b/qa/suites/smoke/basic/tasks/mon_thrash.yaml new file mode 100644 index 00000000000..0ce6d61a6ff --- /dev/null +++ b/qa/suites/smoke/basic/tasks/mon_thrash.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + conf: + global: + ms inject delay max: 1 + ms inject delay probability: 0.005 + ms inject delay type: mon + ms inject internal delays: 0.002 + ms inject socket failures: 2500 +tasks: +- install: null +- ceph: + fs: xfs +- mon_thrash: + revive_delay: 90 + thrash_delay: 1 + thrash_many: true +- workunit: + clients: + client.0: + - rados/test.sh diff --git a/qa/suites/smoke/basic/tasks/rados_api_tests.yaml b/qa/suites/smoke/basic/tasks/rados_api_tests.yaml new file mode 100644 index 00000000000..7049319ea8b --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rados_api_tests.yaml @@ -0,0 +1,15 @@ +tasks: +- install: null +- ceph: + fs: ext4 + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +- thrashosds: + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1 + timeout: 1200 +- workunit: + clients: + client.0: + - rados/test.sh diff --git a/qa/suites/smoke/basic/tasks/rados_bench.yaml b/qa/suites/smoke/basic/tasks/rados_bench.yaml new file mode 100644 index 00000000000..f93b4a610ce --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rados_bench.yaml @@ -0,0 +1,24 @@ +overrides: + ceph: + conf: + global: + ms inject delay max: 1 + ms inject delay probability: 0.005 + ms inject delay type: osd + ms inject internal delays: 0.002 + ms inject socket failures: 2500 +tasks: +- install: null +- ceph: + fs: xfs + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +- thrashosds: + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1 + timeout: 1200 +- radosbench: + clients: + - client.0 + time: 1800 diff --git a/qa/suites/smoke/basic/tasks/rados_cache_snaps.yaml b/qa/suites/smoke/basic/tasks/rados_cache_snaps.yaml new file mode 100644 index 00000000000..37ef5092f02 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rados_cache_snaps.yaml @@ -0,0 +1,41 @@ +tasks: +- install: null +- ceph: + fs: btrfs + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +- thrashosds: + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1 + timeout: 1200 +- exec: + client.0: + - ceph osd pool create base 4 + - ceph osd pool create cache 4 + - ceph osd tier add base cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay base cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 3600 + - ceph osd pool set cache target_max_objects 250 +- rados: + clients: + - client.0 + objects: 500 + op_weights: + copy_from: 50 + delete: 50 + evict: 50 + flush: 50 + read: 100 + rollback: 50 + snap_create: 50 + snap_remove: 50 + try_flush: 50 + write: 100 + ops: 4000 + pool_snaps: true + pools: + - base diff --git a/qa/suites/smoke/basic/tasks/rados_cls_all.yaml b/qa/suites/smoke/basic/tasks/rados_cls_all.yaml new file mode 100644 index 00000000000..7f18a7e95bd --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rados_cls_all.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: + fs: xfs +- workunit: + clients: + client.0: + - cls diff --git a/qa/suites/smoke/basic/tasks/rados_ec_snaps.yaml b/qa/suites/smoke/basic/tasks/rados_ec_snaps.yaml new file mode 100644 index 00000000000..d9282bfa41d --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rados_ec_snaps.yaml @@ -0,0 +1,31 @@ +tasks: +- install: null +- ceph: + fs: xfs + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +- thrashosds: + chance_pgnum_grow: 3 + chance_pgpnum_fix: 1 + timeout: 1200 +- rados: + clients: + - client.0 + ec_pool: true + max_in_flight: 64 + max_seconds: 600 + objects: 1024 + op_weights: + append: 100 + copy_from: 50 + delete: 50 + read: 100 + rmattr: 25 + rollback: 50 + setattr: 25 + snap_create: 50 + snap_remove: 50 + write: 0 + ops: 400000 + size: 16384 diff --git a/qa/suites/smoke/basic/tasks/rados_python.yaml b/qa/suites/smoke/basic/tasks/rados_python.yaml new file mode 100644 index 00000000000..399967cc1c9 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rados_python.yaml @@ -0,0 +1,11 @@ +tasks: +- install: +- ceph: + fs: btrfs + log-whitelist: + - wrongly marked me down +- ceph-fuse: +- workunit: + clients: + client.0: + - rados/test_python.sh diff --git a/qa/suites/smoke/basic/tasks/rados_workunit_loadgen_mix.yaml b/qa/suites/smoke/basic/tasks/rados_workunit_loadgen_mix.yaml new file mode 100644 index 00000000000..0d472a33b79 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rados_workunit_loadgen_mix.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: + fs: ext4 +- ceph-fuse: +- workunit: + clients: + all: + - rados/load-gen-mix.sh diff --git a/qa/suites/smoke/basic/tasks/rbd_api_tests.yaml b/qa/suites/smoke/basic/tasks/rbd_api_tests.yaml new file mode 100644 index 00000000000..a0dda21a51f --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rbd_api_tests.yaml @@ -0,0 +1,11 @@ +tasks: +- install: +- ceph: + fs: xfs +- ceph-fuse: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "1" diff --git a/qa/suites/smoke/basic/tasks/rbd_cli_import_export.yaml b/qa/suites/smoke/basic/tasks/rbd_cli_import_export.yaml new file mode 100644 index 00000000000..e9f38d3a9ff --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rbd_cli_import_export.yaml @@ -0,0 +1,11 @@ +tasks: +- install: +- ceph: + fs: xfs +- ceph-fuse: +- workunit: + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format diff --git a/qa/suites/smoke/basic/tasks/rbd_fsx.yaml b/qa/suites/smoke/basic/tasks/rbd_fsx.yaml new file mode 100644 index 00000000000..ed737a3333e --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rbd_fsx.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + conf: + client: + rbd cache: true + global: + ms inject socket failures: 5000 +tasks: +- install: null +- ceph: + fs: xfs +- thrashosds: + timeout: 1200 +- rbd_fsx: + clients: + - client.0 + ops: 2000 diff --git a/qa/suites/smoke/basic/tasks/rbd_python_api_tests.yaml b/qa/suites/smoke/basic/tasks/rbd_python_api_tests.yaml new file mode 100644 index 00000000000..7ed61d0a339 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rbd_python_api_tests.yaml @@ -0,0 +1,11 @@ +tasks: +- install: +- ceph: + fs: btrfs +- ceph-fuse: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh + env: + RBD_FEATURES: "1" diff --git a/qa/suites/smoke/basic/tasks/rbd_workunit_suites_iozone.yaml b/qa/suites/smoke/basic/tasks/rbd_workunit_suites_iozone.yaml new file mode 100644 index 00000000000..d3591686c03 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rbd_workunit_suites_iozone.yaml @@ -0,0 +1,16 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- install: +- ceph: + fs: btrfs +- rbd: + all: + image_size: 20480 +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/smoke/basic/tasks/rgw_s3tests.yaml b/qa/suites/smoke/basic/tasks/rgw_s3tests.yaml new file mode 100644 index 00000000000..73218919e03 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rgw_s3tests.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: + fs: xfs +- rgw: [client.0] +- s3tests: + client.0: + rgw_server: client.0 diff --git a/qa/suites/smoke/basic/tasks/rgw_swift.yaml b/qa/suites/smoke/basic/tasks/rgw_swift.yaml new file mode 100644 index 00000000000..57c7226e341 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/rgw_swift.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: + fs: ext4 +- rgw: [client.0] +- swift: + client.0: + rgw_server: client.0 diff --git a/qa/suites/stress/bench/% b/qa/suites/stress/bench/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/stress/bench/clusters/fixed-3-cephfs.yaml b/qa/suites/stress/bench/clusters/fixed-3-cephfs.yaml new file mode 120000 index 00000000000..a482e650421 --- /dev/null +++ b/qa/suites/stress/bench/clusters/fixed-3-cephfs.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/qa/suites/stress/bench/tasks/cfuse_workunit_snaps.yaml b/qa/suites/stress/bench/tasks/cfuse_workunit_snaps.yaml new file mode 100644 index 00000000000..eafec39e3d0 --- /dev/null +++ b/qa/suites/stress/bench/tasks/cfuse_workunit_snaps.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- ceph-fuse: +- workunit: + clients: + all: + - snaps diff --git a/qa/suites/stress/bench/tasks/kclient_workunit_suites_fsx.yaml b/qa/suites/stress/bench/tasks/kclient_workunit_suites_fsx.yaml new file mode 100644 index 00000000000..a0d2e765bdb --- /dev/null +++ b/qa/suites/stress/bench/tasks/kclient_workunit_suites_fsx.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- kclient: +- workunit: + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/stress/thrash/% b/qa/suites/stress/thrash/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/stress/thrash/clusters/16-osd.yaml b/qa/suites/stress/thrash/clusters/16-osd.yaml new file mode 100644 index 00000000000..373dd4052c3 --- /dev/null +++ b/qa/suites/stress/thrash/clusters/16-osd.yaml @@ -0,0 +1,18 @@ +roles: +- [mon.0, mds.a, osd.0] +- [mon.1, osd.1] +- [mon.2, osd.2] +- [osd.3] +- [osd.4] +- [osd.5] +- [osd.6] +- [osd.7] +- [osd.8] +- [osd.9] +- [osd.10] +- [osd.11] +- [osd.12] +- [osd.13] +- [osd.14] +- [osd.15] +- [client.0] diff --git a/qa/suites/stress/thrash/clusters/3-osd-1-machine.yaml b/qa/suites/stress/thrash/clusters/3-osd-1-machine.yaml new file mode 100644 index 00000000000..d8ff594b95d --- /dev/null +++ b/qa/suites/stress/thrash/clusters/3-osd-1-machine.yaml @@ -0,0 +1,3 @@ +roles: +- [mon.0, mds.a, osd.0, osd.1, osd.2] +- [mon.1, mon.2, client.0] diff --git a/qa/suites/stress/thrash/clusters/8-osd.yaml b/qa/suites/stress/thrash/clusters/8-osd.yaml new file mode 100644 index 00000000000..3b131054e95 --- /dev/null +++ b/qa/suites/stress/thrash/clusters/8-osd.yaml @@ -0,0 +1,10 @@ +roles: +- [mon.0, mds.a, osd.0] +- [mon.1, osd.1] +- [mon.2, osd.2] +- [osd.3] +- [osd.4] +- [osd.5] +- [osd.6] +- [osd.7] +- [client.0] diff --git a/qa/suites/stress/thrash/fs/btrfs.yaml b/qa/suites/stress/thrash/fs/btrfs.yaml new file mode 120000 index 00000000000..10d0c3f1266 --- /dev/null +++ b/qa/suites/stress/thrash/fs/btrfs.yaml @@ -0,0 +1 @@ +../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/qa/suites/stress/thrash/fs/none.yaml b/qa/suites/stress/thrash/fs/none.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/stress/thrash/fs/xfs.yaml b/qa/suites/stress/thrash/fs/xfs.yaml new file mode 120000 index 00000000000..4c28d731f6b --- /dev/null +++ b/qa/suites/stress/thrash/fs/xfs.yaml @@ -0,0 +1 @@ +../../../../fs/xfs.yaml \ No newline at end of file diff --git a/qa/suites/stress/thrash/thrashers/default.yaml b/qa/suites/stress/thrash/thrashers/default.yaml new file mode 100644 index 00000000000..14d772583cf --- /dev/null +++ b/qa/suites/stress/thrash/thrashers/default.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +- thrashosds: diff --git a/qa/suites/stress/thrash/thrashers/fast.yaml b/qa/suites/stress/thrash/thrashers/fast.yaml new file mode 100644 index 00000000000..eea9c06cd90 --- /dev/null +++ b/qa/suites/stress/thrash/thrashers/fast.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +- thrashosds: + op_delay: 1 + chance_down: 10 diff --git a/qa/suites/stress/thrash/thrashers/more-down.yaml b/qa/suites/stress/thrash/thrashers/more-down.yaml new file mode 100644 index 00000000000..e39098b1cb6 --- /dev/null +++ b/qa/suites/stress/thrash/thrashers/more-down.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost +- thrashosds: + chance_down: 50 diff --git a/qa/suites/stress/thrash/workloads/bonnie_cfuse.yaml b/qa/suites/stress/thrash/workloads/bonnie_cfuse.yaml new file mode 100644 index 00000000000..912f12d6ce7 --- /dev/null +++ b/qa/suites/stress/thrash/workloads/bonnie_cfuse.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/bonnie.sh diff --git a/qa/suites/stress/thrash/workloads/iozone_cfuse.yaml b/qa/suites/stress/thrash/workloads/iozone_cfuse.yaml new file mode 100644 index 00000000000..18a6051be39 --- /dev/null +++ b/qa/suites/stress/thrash/workloads/iozone_cfuse.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/stress/thrash/workloads/radosbench.yaml b/qa/suites/stress/thrash/workloads/radosbench.yaml new file mode 100644 index 00000000000..3940870fce0 --- /dev/null +++ b/qa/suites/stress/thrash/workloads/radosbench.yaml @@ -0,0 +1,4 @@ +tasks: +- radosbench: + clients: [client.0] + time: 1800 diff --git a/qa/suites/stress/thrash/workloads/readwrite.yaml b/qa/suites/stress/thrash/workloads/readwrite.yaml new file mode 100644 index 00000000000..c53e52b0872 --- /dev/null +++ b/qa/suites/stress/thrash/workloads/readwrite.yaml @@ -0,0 +1,9 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 diff --git a/qa/suites/teuthology/buildpackages/% b/qa/suites/teuthology/buildpackages/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/teuthology/buildpackages/distros b/qa/suites/teuthology/buildpackages/distros new file mode 120000 index 00000000000..c5d59352cb5 --- /dev/null +++ b/qa/suites/teuthology/buildpackages/distros @@ -0,0 +1 @@ +../../../distros/supported \ No newline at end of file diff --git a/qa/suites/teuthology/buildpackages/tasks/branch.yaml b/qa/suites/teuthology/buildpackages/tasks/branch.yaml new file mode 100644 index 00000000000..b93c5d07c4a --- /dev/null +++ b/qa/suites/teuthology/buildpackages/tasks/branch.yaml @@ -0,0 +1,10 @@ +roles: + - [mon.0, client.0] +tasks: + - install: + # branch has precedence over sha1 + branch: hammer + sha1: e5b6eea91cc37434f78a987d2dd1d3edd4a23f3f # dumpling + - exec: + client.0: + - ceph --version | grep 'version 0.94' diff --git a/qa/suites/teuthology/buildpackages/tasks/default.yaml b/qa/suites/teuthology/buildpackages/tasks/default.yaml new file mode 100644 index 00000000000..cb583c7634a --- /dev/null +++ b/qa/suites/teuthology/buildpackages/tasks/default.yaml @@ -0,0 +1,14 @@ +roles: + - [client.0] +tasks: + - install: + tag: v0.94.1 + - exec: + client.0: + - ceph --version | grep 'version 0.94.1' + - install.upgrade: + client.0: + tag: v0.94.3 + - exec: + client.0: + - ceph --version | grep 'version 0.94.3' diff --git a/qa/suites/teuthology/buildpackages/tasks/tag.yaml b/qa/suites/teuthology/buildpackages/tasks/tag.yaml new file mode 100644 index 00000000000..126749c9bb0 --- /dev/null +++ b/qa/suites/teuthology/buildpackages/tasks/tag.yaml @@ -0,0 +1,11 @@ +roles: + - [mon.0, client.0] +tasks: + - install: + # tag has precedence over branch and sha1 + tag: v0.94.1 + branch: firefly + sha1: e5b6eea91cc37434f78a987d2dd1d3edd4a23f3f # dumpling + - exec: + client.0: + - ceph --version | grep 'version 0.94.1' diff --git a/qa/suites/teuthology/ceph/% b/qa/suites/teuthology/ceph/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/teuthology/ceph/clusters/single.yaml b/qa/suites/teuthology/ceph/clusters/single.yaml new file mode 100644 index 00000000000..beba3972ce9 --- /dev/null +++ b/qa/suites/teuthology/ceph/clusters/single.yaml @@ -0,0 +1,2 @@ +roles: + - [mon.0, client.0] diff --git a/qa/suites/teuthology/ceph/distros/rhel.yaml b/qa/suites/teuthology/ceph/distros/rhel.yaml new file mode 100644 index 00000000000..7f9778d770c --- /dev/null +++ b/qa/suites/teuthology/ceph/distros/rhel.yaml @@ -0,0 +1 @@ +os_type: rhel diff --git a/qa/suites/teuthology/ceph/distros/ubuntu.yaml b/qa/suites/teuthology/ceph/distros/ubuntu.yaml new file mode 100644 index 00000000000..150451be028 --- /dev/null +++ b/qa/suites/teuthology/ceph/distros/ubuntu.yaml @@ -0,0 +1 @@ +os_type: ubuntu diff --git a/qa/suites/teuthology/ceph/tasks/teuthology.yaml b/qa/suites/teuthology/ceph/tasks/teuthology.yaml new file mode 100644 index 00000000000..d3d7ddd8728 --- /dev/null +++ b/qa/suites/teuthology/ceph/tasks/teuthology.yaml @@ -0,0 +1,2 @@ +tasks: + - install: diff --git a/qa/suites/teuthology/no-ceph/% b/qa/suites/teuthology/no-ceph/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/teuthology/no-ceph/clusters/single.yaml b/qa/suites/teuthology/no-ceph/clusters/single.yaml new file mode 100644 index 00000000000..beba3972ce9 --- /dev/null +++ b/qa/suites/teuthology/no-ceph/clusters/single.yaml @@ -0,0 +1,2 @@ +roles: + - [mon.0, client.0] diff --git a/qa/suites/teuthology/no-ceph/distros/baremetal.yaml b/qa/suites/teuthology/no-ceph/distros/baremetal.yaml new file mode 100644 index 00000000000..59b9779b905 --- /dev/null +++ b/qa/suites/teuthology/no-ceph/distros/baremetal.yaml @@ -0,0 +1 @@ +# left blank so we'll take the default baremetal machine_type and os_type / os_version diff --git a/qa/suites/teuthology/no-ceph/distros/rhel7.0.yaml b/qa/suites/teuthology/no-ceph/distros/rhel7.0.yaml new file mode 100644 index 00000000000..c87c0bc135b --- /dev/null +++ b/qa/suites/teuthology/no-ceph/distros/rhel7.0.yaml @@ -0,0 +1,2 @@ +os_type: rhel +os_version: "7.0" diff --git a/qa/suites/teuthology/no-ceph/distros/ubuntu14.04.yaml b/qa/suites/teuthology/no-ceph/distros/ubuntu14.04.yaml new file mode 100644 index 00000000000..309e989feeb --- /dev/null +++ b/qa/suites/teuthology/no-ceph/distros/ubuntu14.04.yaml @@ -0,0 +1,2 @@ +os_type: ubuntu +os_version: "14.04" diff --git a/qa/suites/teuthology/no-ceph/distros/vps.yaml b/qa/suites/teuthology/no-ceph/distros/vps.yaml new file mode 100644 index 00000000000..5d7ba673c94 --- /dev/null +++ b/qa/suites/teuthology/no-ceph/distros/vps.yaml @@ -0,0 +1 @@ +machine_type: vps diff --git a/qa/suites/teuthology/no-ceph/distros/vps_rhel7.0.yaml b/qa/suites/teuthology/no-ceph/distros/vps_rhel7.0.yaml new file mode 100644 index 00000000000..d19bff22d0b --- /dev/null +++ b/qa/suites/teuthology/no-ceph/distros/vps_rhel7.0.yaml @@ -0,0 +1,3 @@ +machine_type: vps +os_type: rhel +os_version: "7.0" diff --git a/qa/suites/teuthology/no-ceph/distros/vps_ubuntu14.04.yaml b/qa/suites/teuthology/no-ceph/distros/vps_ubuntu14.04.yaml new file mode 100644 index 00000000000..3c3b2500197 --- /dev/null +++ b/qa/suites/teuthology/no-ceph/distros/vps_ubuntu14.04.yaml @@ -0,0 +1,3 @@ +machine_type: vps +os_type: ubuntu +os_version: "14.04" diff --git a/qa/suites/teuthology/no-ceph/tasks/teuthology.yaml b/qa/suites/teuthology/no-ceph/tasks/teuthology.yaml new file mode 100644 index 00000000000..1391458b5e7 --- /dev/null +++ b/qa/suites/teuthology/no-ceph/tasks/teuthology.yaml @@ -0,0 +1,2 @@ +tasks: + - tests: diff --git a/qa/suites/teuthology/workunits/yes.yaml b/qa/suites/teuthology/workunits/yes.yaml new file mode 100644 index 00000000000..45098dbb811 --- /dev/null +++ b/qa/suites/teuthology/workunits/yes.yaml @@ -0,0 +1,8 @@ +roles: + - [client.0] +tasks: +- install: +- workunit: + clients: + all: + - true.sh diff --git a/qa/suites/tgt/basic/% b/qa/suites/tgt/basic/% new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/qa/suites/tgt/basic/% @@ -0,0 +1 @@ + diff --git a/qa/suites/tgt/basic/clusters/fixed-3.yaml b/qa/suites/tgt/basic/clusters/fixed-3.yaml new file mode 100644 index 00000000000..0038432afa7 --- /dev/null +++ b/qa/suites/tgt/basic/clusters/fixed-3.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2] +- [mon.b, mds.a, osd.3, osd.4, osd.5] +- [client.0] diff --git a/qa/suites/tgt/basic/fs/btrfs.yaml b/qa/suites/tgt/basic/fs/btrfs.yaml new file mode 100644 index 00000000000..4c7af311538 --- /dev/null +++ b/qa/suites/tgt/basic/fs/btrfs.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + fs: btrfs + conf: + osd: + osd op thread timeout: 60 diff --git a/qa/suites/tgt/basic/msgr-failures/few.yaml b/qa/suites/tgt/basic/msgr-failures/few.yaml new file mode 100644 index 00000000000..0de320d46b8 --- /dev/null +++ b/qa/suites/tgt/basic/msgr-failures/few.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 diff --git a/qa/suites/tgt/basic/msgr-failures/many.yaml b/qa/suites/tgt/basic/msgr-failures/many.yaml new file mode 100644 index 00000000000..86f8dde8a0e --- /dev/null +++ b/qa/suites/tgt/basic/msgr-failures/many.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 diff --git a/qa/suites/tgt/basic/tasks/blogbench.yaml b/qa/suites/tgt/basic/tasks/blogbench.yaml new file mode 100644 index 00000000000..f77a78b6bc0 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/blogbench.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/blogbench.sh diff --git a/qa/suites/tgt/basic/tasks/bonnie.yaml b/qa/suites/tgt/basic/tasks/bonnie.yaml new file mode 100644 index 00000000000..2cbfcf8872e --- /dev/null +++ b/qa/suites/tgt/basic/tasks/bonnie.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/bonnie.sh diff --git a/qa/suites/tgt/basic/tasks/dbench-short.yaml b/qa/suites/tgt/basic/tasks/dbench-short.yaml new file mode 100644 index 00000000000..fcb721a4d14 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/dbench-short.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/dbench-short.sh diff --git a/qa/suites/tgt/basic/tasks/dbench.yaml b/qa/suites/tgt/basic/tasks/dbench.yaml new file mode 100644 index 00000000000..7f732175faa --- /dev/null +++ b/qa/suites/tgt/basic/tasks/dbench.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/tgt/basic/tasks/ffsb.yaml b/qa/suites/tgt/basic/tasks/ffsb.yaml new file mode 100644 index 00000000000..f50a3a19647 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/ffsb.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/tgt/basic/tasks/fio.yaml b/qa/suites/tgt/basic/tasks/fio.yaml new file mode 100644 index 00000000000..e7346ce528e --- /dev/null +++ b/qa/suites/tgt/basic/tasks/fio.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/fio.sh diff --git a/qa/suites/tgt/basic/tasks/fsstress.yaml b/qa/suites/tgt/basic/tasks/fsstress.yaml new file mode 100644 index 00000000000..c77f511c0f6 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/fsstress.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/tgt/basic/tasks/fsx.yaml b/qa/suites/tgt/basic/tasks/fsx.yaml new file mode 100644 index 00000000000..04732c84009 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/fsx.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/tgt/basic/tasks/fsync-tester.yaml b/qa/suites/tgt/basic/tasks/fsync-tester.yaml new file mode 100644 index 00000000000..ea627b7d184 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/fsync-tester.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/fsync-tester.sh diff --git a/qa/suites/tgt/basic/tasks/iogen.yaml b/qa/suites/tgt/basic/tasks/iogen.yaml new file mode 100644 index 00000000000..1065c74daba --- /dev/null +++ b/qa/suites/tgt/basic/tasks/iogen.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/iogen.sh diff --git a/qa/suites/tgt/basic/tasks/iozone-sync.yaml b/qa/suites/tgt/basic/tasks/iozone-sync.yaml new file mode 100644 index 00000000000..ac241a417e8 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/iozone-sync.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/iozone-sync.sh diff --git a/qa/suites/tgt/basic/tasks/iozone.yaml b/qa/suites/tgt/basic/tasks/iozone.yaml new file mode 100644 index 00000000000..cf5604c21a7 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/iozone.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/tgt/basic/tasks/pjd.yaml b/qa/suites/tgt/basic/tasks/pjd.yaml new file mode 100644 index 00000000000..ba5c631f157 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/pjd.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/% b/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/0-cluster/start.yaml b/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/0-cluster/start.yaml new file mode 100644 index 00000000000..1f53351b201 --- /dev/null +++ b/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/0-cluster/start.yaml @@ -0,0 +1,15 @@ +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 +- - mon.b + - mon.c + - osd.2 + - osd.3 +- - client.0 +overrides: + ceph: + log-whitelist: + - failed to encode map + diff --git a/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/1-install/dumpling-client-x.yaml b/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/1-install/dumpling-client-x.yaml new file mode 100644 index 00000000000..2335a5f8298 --- /dev/null +++ b/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/1-install/dumpling-client-x.yaml @@ -0,0 +1,9 @@ +tasks: +- install: + branch: dumpling +- print: "**** done install dumpling" +- install.upgrade: + client.0: +- print: "**** done install.upgrade client.0" +- ceph: +- print: "**** done ceph" diff --git a/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/2-workload/rados_loadgen_big.yaml b/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/2-workload/rados_loadgen_big.yaml new file mode 100644 index 00000000000..98df38c7191 --- /dev/null +++ b/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/2-workload/rados_loadgen_big.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + clients: + client.0: + - rados/load-gen-big.sh +- print: "**** done rados/load-gen-big.sh" diff --git a/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/2-workload/rbd_import_export.yaml b/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/2-workload/rbd_import_export.yaml new file mode 100644 index 00000000000..57589e90cfb --- /dev/null +++ b/qa/suites/upgrade/client-upgrade/dumpling-client-x/basic/2-workload/rbd_import_export.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + branch: dumpling + clients: + client.0: + - rbd/import_export.sh +- print: "**** done rbd/import_export.sh" diff --git a/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/% b/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/0-cluster/start.yaml b/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/0-cluster/start.yaml new file mode 100644 index 00000000000..db6f5e2fe99 --- /dev/null +++ b/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/0-cluster/start.yaml @@ -0,0 +1,14 @@ +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 +- - mon.b + - mon.c + - osd.2 + - osd.3 +- - client.0 +overrides: + ceph: + log-whitelist: + - failed to encode map diff --git a/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/1-install/firefly-client-x.yaml b/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/1-install/firefly-client-x.yaml new file mode 100644 index 00000000000..39430daa088 --- /dev/null +++ b/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/1-install/firefly-client-x.yaml @@ -0,0 +1,10 @@ +tasks: +- install: + branch: firefly +- print: "**** done install firefly" +- install.upgrade: + exclude_packages: ['ceph-test', 'ceph-test-dbg'] + client.0: +- print: "**** done install.upgrade client.0" +- ceph: +- print: "**** done ceph" diff --git a/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/2-workload/rbd_cli_import_export.yaml b/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/2-workload/rbd_cli_import_export.yaml new file mode 100644 index 00000000000..6d4fd41aff3 --- /dev/null +++ b/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/2-workload/rbd_cli_import_export.yaml @@ -0,0 +1,9 @@ +tasks: +- workunit: + branch: firefly + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format +- print: "**** done rbd/import_export.sh" diff --git a/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/% b/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/0-cluster/start.yaml b/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/0-cluster/start.yaml new file mode 100644 index 00000000000..db6f5e2fe99 --- /dev/null +++ b/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/0-cluster/start.yaml @@ -0,0 +1,14 @@ +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 +- - mon.b + - mon.c + - osd.2 + - osd.3 +- - client.0 +overrides: + ceph: + log-whitelist: + - failed to encode map diff --git a/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/1-install/hammer-client-x.yaml b/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/1-install/hammer-client-x.yaml new file mode 100644 index 00000000000..c6dd4ed9391 --- /dev/null +++ b/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/1-install/hammer-client-x.yaml @@ -0,0 +1,10 @@ +tasks: +- install: + branch: hammer +- print: "**** done install hammer" +- install.upgrade: + exclude_packages: ['ceph-test', 'ceph-test-dbg'] + client.0: +- print: "**** done install.upgrade client.0" +- ceph: +- print: "**** done ceph" diff --git a/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/2-workload/rbd_cli_import_export.yaml b/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/2-workload/rbd_cli_import_export.yaml new file mode 100644 index 00000000000..9bb4f94f73f --- /dev/null +++ b/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/2-workload/rbd_cli_import_export.yaml @@ -0,0 +1,9 @@ +tasks: +- workunit: + branch: hammer + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format +- print: "**** done rbd/import_export.sh" diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/% b/qa/suites/upgrade/dumpling-emperor-x/parallel/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/0-cluster/start.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/0-cluster/start.yaml new file mode 100644 index 00000000000..e3d7f85f9ff --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/0-cluster/start.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + conf: + mon: + mon warn on legacy crush tunables: false + log-whitelist: + - scrub mismatch + - ScrubResult +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 +- - mon.b + - mon.c + - osd.2 + - osd.3 +- - client.0 + - client.1 diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/1-dumpling-install/dumpling.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/1-dumpling-install/dumpling.yaml new file mode 100644 index 00000000000..92df8cebc5f --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/1-dumpling-install/dumpling.yaml @@ -0,0 +1,8 @@ +tasks: +- install: + branch: dumpling +- ceph: + fs: xfs +- parallel: + - workload + - upgrade-sequence diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/+ b/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/rados_api.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/rados_api.yaml new file mode 100644 index 00000000000..96d656e4932 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/rados_api.yaml @@ -0,0 +1,8 @@ +workload: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rados/test.sh + - cls diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/rados_loadgenbig.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/rados_loadgenbig.yaml new file mode 100644 index 00000000000..16241b3bed6 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/rados_loadgenbig.yaml @@ -0,0 +1,7 @@ +workload: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rados/load-gen-big.sh diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/test_rbd_api.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/test_rbd_api.yaml new file mode 100644 index 00000000000..7584f0e1ff0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/test_rbd_api.yaml @@ -0,0 +1,7 @@ +workload: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rbd/test_librbd.sh diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/test_rbd_python.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/test_rbd_python.yaml new file mode 100644 index 00000000000..09c5326592b --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/2-workload/test_rbd_python.yaml @@ -0,0 +1,7 @@ +workload: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rbd/test_librbd_python.sh diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/3-emperor-upgrade/emperor.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/3-emperor-upgrade/emperor.yaml new file mode 100644 index 00000000000..626bc161cbd --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/3-emperor-upgrade/emperor.yaml @@ -0,0 +1,10 @@ +tasks: + - install.upgrade: + mon.a: + branch: emperor + mon.b: + branch: emperor + - ceph.restart: + - parallel: + - workload2 + - upgrade-sequence diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/+ b/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/rados_api.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/rados_api.yaml new file mode 100644 index 00000000000..b6bb42048a4 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/rados_api.yaml @@ -0,0 +1,8 @@ +workload2: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rados/test.sh + - cls diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/rados_loadgenbig.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/rados_loadgenbig.yaml new file mode 100644 index 00000000000..fd5c31dc477 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/rados_loadgenbig.yaml @@ -0,0 +1,7 @@ +workload2: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rados/load-gen-big.sh diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/test_rbd_api.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/test_rbd_api.yaml new file mode 100644 index 00000000000..8c8c97a4bf3 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/test_rbd_api.yaml @@ -0,0 +1,7 @@ +workload2: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rbd/test_librbd.sh diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/test_rbd_python.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/test_rbd_python.yaml new file mode 100644 index 00000000000..1edb13cf907 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/4-workload/test_rbd_python.yaml @@ -0,0 +1,7 @@ +workload2: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rbd/test_librbd_python.sh diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/5-upgrade-sequence/upgrade-all.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/5-upgrade-sequence/upgrade-all.yaml new file mode 100644 index 00000000000..f5d10cdfcab --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/5-upgrade-sequence/upgrade-all.yaml @@ -0,0 +1,6 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + mon.b: + - ceph.restart: [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2, osd.3] diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/5-upgrade-sequence/upgrade-mon-osd-mds.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/5-upgrade-sequence/upgrade-mon-osd-mds.yaml new file mode 100644 index 00000000000..fcb61b1cef2 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/5-upgrade-sequence/upgrade-mon-osd-mds.yaml @@ -0,0 +1,33 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + mon.b: + - ceph.restart: + daemons: [mon.a] + wait-for-healthy: false + wait-for-osds-up: true + - sleep: + duration: 60 + - ceph.restart: + daemons: [mon.b] + wait-for-healthy: false + wait-for-osds-up: true + - sleep: + duration: 60 + - ceph.restart: [mon.c] + - sleep: + duration: 60 + - ceph.restart: [osd.0] + - sleep: + duration: 60 + - ceph.restart: [osd.1] + - sleep: + duration: 60 + - ceph.restart: [osd.2] + - sleep: + duration: 60 + - ceph.restart: [osd.3] + - sleep: + duration: 60 + - ceph.restart: [mds.a] diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/+ b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml new file mode 120000 index 00000000000..4baff9bdd2f --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml new file mode 120000 index 00000000000..4b9d9a44a24 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados-snaps-few-objects.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados-snaps-few-objects.yaml new file mode 100644 index 00000000000..bf85020d8d9 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados-snaps-few-objects.yaml @@ -0,0 +1,12 @@ +tasks: +- rados: + clients: [client.1] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados_loadgenmix.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados_loadgenmix.yaml new file mode 100644 index 00000000000..0bddda0ab84 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados_loadgenmix.yaml @@ -0,0 +1,6 @@ +tasks: + - workunit: + branch: dumpling + clients: + client.1: + - rados/load-gen-mix.sh diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados_mon_thrash.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados_mon_thrash.yaml new file mode 100644 index 00000000000..1a932e059f0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados_mon_thrash.yaml @@ -0,0 +1,9 @@ +tasks: +- mon_thrash: + revive_delay: 20 + thrash_delay: 1 +- workunit: + branch: dumpling + clients: + client.1: + - rados/test.sh diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rbd_cls.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rbd_cls.yaml new file mode 100644 index 00000000000..9407ab48916 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rbd_cls.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + branch: dumpling + clients: + client.1: + - cls/test_cls_rbd.sh + diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rbd_import_export.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rbd_import_export.yaml new file mode 100644 index 00000000000..185cd1ab32a --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rbd_import_export.yaml @@ -0,0 +1,8 @@ +tasks: +- workunit: + branch: dumpling + clients: + client.1: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rgw_s3tests.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rgw_s3tests.yaml new file mode 100644 index 00000000000..22c3a3f821a --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rgw_s3tests.yaml @@ -0,0 +1,5 @@ +tasks: +- rgw: [client.1] +- s3tests: + client.1: + rgw_server: client.1 diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rgw_swift.yaml b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rgw_swift.yaml new file mode 100644 index 00000000000..0ab9febd2fc --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rgw_swift.yaml @@ -0,0 +1,6 @@ +tasks: +# Uncomment the next line if you have not already included rgw_s3tests.yaml in your test. +# - rgw: [client.1] +- swift: + client.1: + rgw_server: client.1 diff --git a/qa/suites/upgrade/dumpling-emperor-x/parallel/distros b/qa/suites/upgrade/dumpling-emperor-x/parallel/distros new file mode 120000 index 00000000000..79010c36a59 --- /dev/null +++ b/qa/suites/upgrade/dumpling-emperor-x/parallel/distros @@ -0,0 +1 @@ +../../../../distros/supported \ No newline at end of file diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/% b/qa/suites/upgrade/dumpling-firefly-x/parallel/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/0-cluster/start.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/0-cluster/start.yaml new file mode 100644 index 00000000000..3eb17b675f3 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/0-cluster/start.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + conf: + mon: + mon warn on legacy crush tunables: false + mon debug unsafe allow tier with nonempty snaps: true + log-whitelist: + - scrub mismatch + - ScrubResult + - failed to encode map +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 +- - mon.b + - mon.c + - osd.2 + - osd.3 +- - client.0 + - client.1 diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/1-dumpling-install/dumpling.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/1-dumpling-install/dumpling.yaml new file mode 100644 index 00000000000..60b2f13c985 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/1-dumpling-install/dumpling.yaml @@ -0,0 +1,14 @@ +tasks: +- install: + branch: dumpling +- print: "**** done dumpling install" +- ceph: + fs: xfs +- parallel: + - workload + - upgrade-sequence +- print: "**** done parallel" +- install.upgrade: + client.0: + branch: firefly +- print: "*** client.0 upgraded to firefly" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/+ b/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/rados_api.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/rados_api.yaml new file mode 100644 index 00000000000..3c7c90ef81f --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/rados_api.yaml @@ -0,0 +1,10 @@ +workload: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + # make sure to run test-upgrade-firefly.sh when running the cluster is mixed mode between firefly and dumpling + - rados/test-upgrade-firefly.sh + - cls + - print: "**** done rados/test.sh & cls" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/rados_loadgenbig.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/rados_loadgenbig.yaml new file mode 100644 index 00000000000..3bcf62b0cb7 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/rados_loadgenbig.yaml @@ -0,0 +1,8 @@ +workload: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rados/load-gen-big.sh + - print: "**** done rados/load-gen-big.sh" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/test_rbd_api.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/test_rbd_api.yaml new file mode 100644 index 00000000000..d5b07c15de8 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/test_rbd_api.yaml @@ -0,0 +1,8 @@ +workload: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rbd/test_librbd.sh + - print: "**** done rbd/test_librbd.sh" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/test_rbd_python.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/test_rbd_python.yaml new file mode 100644 index 00000000000..4063ad7f915 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/2-workload/test_rbd_python.yaml @@ -0,0 +1,8 @@ +workload: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rbd/test_librbd_python.sh + - print: "**** done rbd/test_librbd_python.sh" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/3-upgrade-sequence/upgrade-all.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/3-upgrade-sequence/upgrade-all.yaml new file mode 100644 index 00000000000..7c057dbca99 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/3-upgrade-sequence/upgrade-all.yaml @@ -0,0 +1,10 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + branch: firefly + mon.b: + branch: firefly + - print: "**** done install.upgrade firefly for mon.a and mon.b" + - ceph.restart: [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2, osd.3] + - print: "**** done ceph.restart the cluster" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/4-firefly-upgrade/firefly.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/4-firefly-upgrade/firefly.yaml new file mode 100644 index 00000000000..bb530565107 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/4-firefly-upgrade/firefly.yaml @@ -0,0 +1,8 @@ +tasks: + - parallel: + - workload2 + - upgrade-sequence2 + - print: "**** done parallel" + - install.upgrade: + client.0: + - print: "**** done install.upgrade client.0 to the version from teuthology-suite arg" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/+ b/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/rados_api.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/rados_api.yaml new file mode 100644 index 00000000000..47573726b0f --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/rados_api.yaml @@ -0,0 +1,9 @@ +workload2: + sequential: + - workunit: + branch: firefly + clients: + client.0: + - rados/test.sh + - cls + - print: "**** done #rados/test.sh and cls 2" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/rados_loadgenbig.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/rados_loadgenbig.yaml new file mode 100644 index 00000000000..451130e2e38 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/rados_loadgenbig.yaml @@ -0,0 +1,8 @@ +workload2: + sequential: + - workunit: + branch: firefly + clients: + client.0: + - rados/load-gen-big.sh + - print: "**** done rados/load-gen-big.sh 2" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/test_rbd_api.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/test_rbd_api.yaml new file mode 100644 index 00000000000..1cf824d99bf --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/test_rbd_api.yaml @@ -0,0 +1,8 @@ +workload2: + sequential: + - workunit: + branch: firefly + clients: + client.0: + - rbd/test_librbd.sh + - print: "**** done rbd/test_librbd.sh 2" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/test_rbd_python.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/test_rbd_python.yaml new file mode 100644 index 00000000000..9409329b0a4 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/5-workload/test_rbd_python.yaml @@ -0,0 +1,8 @@ +workload2: + sequential: + - workunit: + branch: firefly + clients: + client.0: + - rbd/test_librbd_python.sh + - print: "**** done rbd/test_librbd_python.sh 2" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/6-upgrade-sequence/upgrade-by-daemon.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/6-upgrade-sequence/upgrade-by-daemon.yaml new file mode 100644 index 00000000000..082a63c36fd --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/6-upgrade-sequence/upgrade-by-daemon.yaml @@ -0,0 +1,39 @@ +upgrade-sequence2: + sequential: + - install.upgrade: + mon.a: + - print: "**** done install.upgrade mon.a to the version from teuthology-suite arg" + - ceph.restart: + daemons: [mon.a] + wait-for-healthy: true + - sleep: + duration: 60 + - ceph.restart: + daemons: [osd.0, osd.1] + wait-for-healthy: true + - sleep: + duration: 60 + - ceph.restart: [mds.a] + - sleep: + duration: 60 + - print: "**** running mixed versions of osds and mons" + - exec: + mon.b: + - ceph osd crush tunables firefly + - install.upgrade: + mon.b: + - print: "**** done install.upgrade mon.b to the version from teuthology-suite arg" + - ceph.restart: + daemons: [mon.b, mon.c] + wait-for-healthy: true + - sleep: + duration: 60 + - ceph.restart: + daemons: [osd.2, osd.3] + wait-for-healthy: true + - sleep: + duration: 60 +### removed to fix #9642 +# - install.upgrade: +# client.0: +# - print: "*** client.0 upgraded" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/+ b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml new file mode 120000 index 00000000000..4baff9bdd2f --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml new file mode 120000 index 00000000000..4b9d9a44a24 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados-snaps-few-objects.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados-snaps-few-objects.yaml new file mode 100644 index 00000000000..bf85020d8d9 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados-snaps-few-objects.yaml @@ -0,0 +1,12 @@ +tasks: +- rados: + clients: [client.1] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados_loadgenmix.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados_loadgenmix.yaml new file mode 100644 index 00000000000..879b7b6d189 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados_loadgenmix.yaml @@ -0,0 +1,5 @@ +tasks: + - workunit: + clients: + client.1: + - rados/load-gen-mix.sh diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados_mon_thrash.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados_mon_thrash.yaml new file mode 100644 index 00000000000..5c4c6297cfe --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados_mon_thrash.yaml @@ -0,0 +1,10 @@ +tasks: + - sequential: + - mon_thrash: + revive_delay: 20 + thrash_delay: 1 + - workunit: + clients: + client.1: + - rados/test.sh + - print: "**** done rados/test.sh - 6-final-workload" diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rbd_cls.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rbd_cls.yaml new file mode 100644 index 00000000000..908b79e869a --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rbd_cls.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + clients: + client.1: + - cls/test_cls_rbd.sh + diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rbd_import_export.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rbd_import_export.yaml new file mode 100644 index 00000000000..a7ce2141ee5 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rbd_import_export.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + clients: + client.1: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rgw_s3tests.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rgw_s3tests.yaml new file mode 100644 index 00000000000..22c3a3f821a --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rgw_s3tests.yaml @@ -0,0 +1,5 @@ +tasks: +- rgw: [client.1] +- s3tests: + client.1: + rgw_server: client.1 diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rgw_swift.yaml b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rgw_swift.yaml new file mode 100644 index 00000000000..0ab9febd2fc --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rgw_swift.yaml @@ -0,0 +1,6 @@ +tasks: +# Uncomment the next line if you have not already included rgw_s3tests.yaml in your test. +# - rgw: [client.1] +- swift: + client.1: + rgw_server: client.1 diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/README b/qa/suites/upgrade/dumpling-firefly-x/parallel/README new file mode 100644 index 00000000000..8eff0ed5c94 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/README @@ -0,0 +1,72 @@ +Purpose +======= + +This suite tests upgrades of a ceph cluster from dumpling (current +branch) to firefly (current branch), and then to a later specified +version (version x, e.g. for example ‘next’ or ‘master’). It runs the +last upgrade (steps in 3-firefly-upgrade) in parallel with firefly +client tests. + + +Structure +========= + +Generally the flow is: +- install dumpling +- test it +- upgrade cluster and clients to firefly +- test firefly while upgrading cluster to version x +- upgrade clients +- test with version x clients + +0-cluster +--------- + +Defines the cluster layout - two nodes run ceph daemons, and a third +acts as a client. (This is under 'roles:' section in the final yaml) + +1-dumpling-install +------------------ + +Installs dumpling and runs correctness tests from the 'workload' section + +2-workload +---------- + +Defines the 'workload' section - correctness tests to run on dumpling +“+” is used to construct a single yaml load from all tests in this directory + +3-firefly-upgrade +----------------- + +First upgrades everything to firefly. Then upgrades the cluster to +version x while running correctness tests (from the 'workload2' +section) on firefly clients in parallel. This upgrade is done by the +'upgrade-sequence' section, defined later. Once the cluster is +upgraded and these tests complete, upgrades the clients to version x +as well. + +Clients are upgraded last to avoid running newer tests that don't work +against firefly, and to verify that firefly clients can continue +working with a newer cluster. + +4-workload +---------- + +Defines the 'workload2' section - correctness tests to run during the +upgrade from firefly to version x. +“+” is used to construct a single yaml load from all tests in this directory + +5-upgrade-sequence +------------------ + +Defines the 'upgrade-sequence' section - the order in which the +upgrade from firefly to version x is done. Note that leaving the +version unspecified here is what makes it upgrade to version x, +which is set as an override when this suite is scheduled. + +6-final-workload +---------------- + +Runs some final correctness tests of version x clients. +“+” is used to construct a single yaml load from all tests in this directory diff --git a/qa/suites/upgrade/dumpling-firefly-x/parallel/distros b/qa/suites/upgrade/dumpling-firefly-x/parallel/distros new file mode 120000 index 00000000000..79010c36a59 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/parallel/distros @@ -0,0 +1 @@ +../../../../distros/supported \ No newline at end of file diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/% b/qa/suites/upgrade/dumpling-firefly-x/stress-split/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/00-cluster/start.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/00-cluster/start.yaml new file mode 100644 index 00000000000..129635fa52a --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/00-cluster/start.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + log-whitelist: + - failed to encode map + conf: + mon: + mon warn on legacy crush tunables: false +roles: +- - mon.a + - mon.b + - mds.a + - osd.0 + - osd.1 + - osd.2 + - mon.c +- - osd.3 + - osd.4 + - osd.5 +- - client.0 diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/01-dumpling-install/dumpling.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/01-dumpling-install/dumpling.yaml new file mode 100644 index 00000000000..c98631e2bbd --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/01-dumpling-install/dumpling.yaml @@ -0,0 +1,5 @@ +tasks: +- install: + branch: dumpling +- ceph: + fs: xfs diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/02-partial-upgrade-firefly/firsthalf.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/02-partial-upgrade-firefly/firsthalf.yaml new file mode 100644 index 00000000000..1098b089d88 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/02-partial-upgrade-firefly/firsthalf.yaml @@ -0,0 +1,6 @@ +tasks: +- install.upgrade: + osd.0: + branch: firefly +- ceph.restart: + daemons: [osd.0, osd.1, osd.2] diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/03-workload/rbd.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/03-workload/rbd.yaml new file mode 100644 index 00000000000..9ccd57c4a82 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/03-workload/rbd.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - cls/test_cls_rbd.sh diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/04-mona-upgrade-firefly/mona.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/04-mona-upgrade-firefly/mona.yaml new file mode 100644 index 00000000000..b6ffb3323d1 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/04-mona-upgrade-firefly/mona.yaml @@ -0,0 +1,5 @@ +tasks: +- ceph.restart: + daemons: [mon.a] + wait-for-healthy: false + wait-for-osds-up: true diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/+ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/rbd-cls.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/rbd-cls.yaml new file mode 100644 index 00000000000..9ccd57c4a82 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/rbd-cls.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - cls/test_cls_rbd.sh diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/readwrite.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/readwrite.yaml new file mode 100644 index 00000000000..c53e52b0872 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/readwrite.yaml @@ -0,0 +1,9 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/06-monb-upgrade-firefly/monb.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/06-monb-upgrade-firefly/monb.yaml new file mode 100644 index 00000000000..513890c41c0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/06-monb-upgrade-firefly/monb.yaml @@ -0,0 +1,5 @@ +tasks: +- ceph.restart: + daemons: [mon.b] + wait-for-healthy: false + wait-for-osds-up: true diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/+ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/radosbench.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/radosbench.yaml new file mode 100644 index 00000000000..3940870fce0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/radosbench.yaml @@ -0,0 +1,4 @@ +tasks: +- radosbench: + clients: [client.0] + time: 1800 diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/rbd_api.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/rbd_api.yaml new file mode 100644 index 00000000000..1ecaee4bf78 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/rbd_api.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/08-monc-upgrade-firefly/monc.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/08-monc-upgrade-firefly/monc.yaml new file mode 100644 index 00000000000..e9273236ba3 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/08-monc-upgrade-firefly/monc.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph.restart: + daemons: [mon.c] + wait-for-healthy: false + wait-for-osds-up: true +- ceph.wait_for_mon_quorum: [a, b, c] diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/09-workload/rbd-python.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/09-workload/rbd-python.yaml new file mode 100644 index 00000000000..8273c40457a --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/09-workload/rbd-python.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + #fixes #10577 + branch: dumpling + clients: + client.0: + - rbd/test_librbd_python.sh diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/10-osds-upgrade-firefly/secondhalf.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/10-osds-upgrade-firefly/secondhalf.yaml new file mode 100644 index 00000000000..917894b2fe4 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/10-osds-upgrade-firefly/secondhalf.yaml @@ -0,0 +1,6 @@ +tasks: +- install.upgrade: + osd.3: + branch: firefly +- ceph.restart: + daemons: [osd.3, osd.4, osd.5] diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/11-workload/snaps-few-objects.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/11-workload/snaps-few-objects.yaml new file mode 100644 index 00000000000..c54039766c0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/11-workload/snaps-few-objects.yaml @@ -0,0 +1,12 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/12-partial-upgrade-x/first.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/12-partial-upgrade-x/first.yaml new file mode 100644 index 00000000000..68c9d44b7c3 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/12-partial-upgrade-x/first.yaml @@ -0,0 +1,5 @@ +tasks: +- install.upgrade: + osd.0: +- ceph.restart: + daemons: [osd.0, osd.1, osd.2] diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/13-workload/rados_loadgen_big.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/13-workload/rados_loadgen_big.yaml new file mode 100644 index 00000000000..b0030093e72 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/13-workload/rados_loadgen_big.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rados/load-gen-big.sh diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/14-mona-upgrade-x/mona.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/14-mona-upgrade-x/mona.yaml new file mode 100644 index 00000000000..b6ffb3323d1 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/14-mona-upgrade-x/mona.yaml @@ -0,0 +1,5 @@ +tasks: +- ceph.restart: + daemons: [mon.a] + wait-for-healthy: false + wait-for-osds-up: true diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/15-workload/rbd-import-export.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/15-workload/rbd-import-export.yaml new file mode 100644 index 00000000000..49070827be0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/15-workload/rbd-import-export.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/16-monb-upgrade-x/monb.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/16-monb-upgrade-x/monb.yaml new file mode 100644 index 00000000000..513890c41c0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/16-monb-upgrade-x/monb.yaml @@ -0,0 +1,5 @@ +tasks: +- ceph.restart: + daemons: [mon.b] + wait-for-healthy: false + wait-for-osds-up: true diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/17-workload/readwrite.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/17-workload/readwrite.yaml new file mode 100644 index 00000000000..c53e52b0872 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/17-workload/readwrite.yaml @@ -0,0 +1,9 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/18-monc-upgrade-x/monc.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/18-monc-upgrade-x/monc.yaml new file mode 100644 index 00000000000..e9273236ba3 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/18-monc-upgrade-x/monc.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph.restart: + daemons: [mon.c] + wait-for-healthy: false + wait-for-osds-up: true +- ceph.wait_for_mon_quorum: [a, b, c] diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/19-workload/radosbench.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/19-workload/radosbench.yaml new file mode 100644 index 00000000000..3940870fce0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/19-workload/radosbench.yaml @@ -0,0 +1,4 @@ +tasks: +- radosbench: + clients: [client.0] + time: 1800 diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/20-osds-upgrade-x/osds_secondhalf.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/20-osds-upgrade-x/osds_secondhalf.yaml new file mode 100644 index 00000000000..88d4bb5ec98 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/20-osds-upgrade-x/osds_secondhalf.yaml @@ -0,0 +1,5 @@ +tasks: +- install.upgrade: + osd.3: +- ceph.restart: + daemons: [osd.3, osd.4, osd.5] diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/+ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rados_stress_watch.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rados_stress_watch.yaml new file mode 100644 index 00000000000..0e1ba010c5b --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rados_stress_watch.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rados/stress_watch.sh diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rbd_cls_tests.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rbd_cls_tests.yaml new file mode 100644 index 00000000000..9ccd57c4a82 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rbd_cls_tests.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - cls/test_cls_rbd.sh diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rgw-swift.yaml b/qa/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rgw-swift.yaml new file mode 100644 index 00000000000..0d79fb621ea --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rgw-swift.yaml @@ -0,0 +1,8 @@ +tasks: +- rgw: + client.0: + default_idle_timeout: 300 +- swift: + client.0: + rgw_server: client.0 + diff --git a/qa/suites/upgrade/dumpling-firefly-x/stress-split/distros b/qa/suites/upgrade/dumpling-firefly-x/stress-split/distros new file mode 120000 index 00000000000..79010c36a59 --- /dev/null +++ b/qa/suites/upgrade/dumpling-firefly-x/stress-split/distros @@ -0,0 +1 @@ +../../../../distros/supported \ No newline at end of file diff --git a/qa/suites/upgrade/dumpling-giant-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml b/qa/suites/upgrade/dumpling-giant-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml new file mode 120000 index 00000000000..4baff9bdd2f --- /dev/null +++ b/qa/suites/upgrade/dumpling-giant-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/dumpling-x/parallel/% b/qa/suites/upgrade/dumpling-x/parallel/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-x/parallel/0-cluster/start.yaml b/qa/suites/upgrade/dumpling-x/parallel/0-cluster/start.yaml new file mode 100644 index 00000000000..c39d9dfeca9 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/0-cluster/start.yaml @@ -0,0 +1,20 @@ +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 +- - mon.b + - mon.c + - osd.2 + - osd.3 +- - client.0 + - client.1 +overrides: + ceph: + conf: + mon: + mon warn on legacy crush tunables: false + log-whitelist: + - scrub mismatch + - ScrubResult + - failed to encode map diff --git a/qa/suites/upgrade/dumpling-x/parallel/1-dumpling-install/dumpling.yaml b/qa/suites/upgrade/dumpling-x/parallel/1-dumpling-install/dumpling.yaml new file mode 100644 index 00000000000..adbdedee518 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/1-dumpling-install/dumpling.yaml @@ -0,0 +1,11 @@ +tasks: +- install: + branch: dumpling +- print: "**** done install" +- ceph: + fs: xfs +- print: "**** done ceph" +- parallel: + - workload + - upgrade-sequence +- print: "**** done parallel" diff --git a/qa/suites/upgrade/dumpling-x/parallel/2-workload/rados_api.yaml b/qa/suites/upgrade/dumpling-x/parallel/2-workload/rados_api.yaml new file mode 100644 index 00000000000..cd820a8a711 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/2-workload/rados_api.yaml @@ -0,0 +1,9 @@ +workload: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rados/test-upgrade-firefly.sh + - cls + diff --git a/qa/suites/upgrade/dumpling-x/parallel/2-workload/rados_loadgenbig.yaml b/qa/suites/upgrade/dumpling-x/parallel/2-workload/rados_loadgenbig.yaml new file mode 100644 index 00000000000..cc1ef874cb0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/2-workload/rados_loadgenbig.yaml @@ -0,0 +1,7 @@ +workload: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rados/load-gen-big.sh diff --git a/qa/suites/upgrade/dumpling-x/parallel/2-workload/test_rbd_api.yaml b/qa/suites/upgrade/dumpling-x/parallel/2-workload/test_rbd_api.yaml new file mode 100644 index 00000000000..36ffa27ec3f --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/2-workload/test_rbd_api.yaml @@ -0,0 +1,7 @@ +workload: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rbd/test_librbd.sh diff --git a/qa/suites/upgrade/dumpling-x/parallel/2-workload/test_rbd_python.yaml b/qa/suites/upgrade/dumpling-x/parallel/2-workload/test_rbd_python.yaml new file mode 100644 index 00000000000..e704a9794b9 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/2-workload/test_rbd_python.yaml @@ -0,0 +1,7 @@ +workload: + sequential: + - workunit: + branch: dumpling + clients: + client.0: + - rbd/test_librbd_python.sh diff --git a/qa/suites/upgrade/dumpling-x/parallel/3-upgrade-sequence/upgrade-all.yaml b/qa/suites/upgrade/dumpling-x/parallel/3-upgrade-sequence/upgrade-all.yaml new file mode 100644 index 00000000000..f5d10cdfcab --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/3-upgrade-sequence/upgrade-all.yaml @@ -0,0 +1,6 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + mon.b: + - ceph.restart: [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2, osd.3] diff --git a/qa/suites/upgrade/dumpling-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/qa/suites/upgrade/dumpling-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml new file mode 100644 index 00000000000..fcb61b1cef2 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml @@ -0,0 +1,33 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + mon.b: + - ceph.restart: + daemons: [mon.a] + wait-for-healthy: false + wait-for-osds-up: true + - sleep: + duration: 60 + - ceph.restart: + daemons: [mon.b] + wait-for-healthy: false + wait-for-osds-up: true + - sleep: + duration: 60 + - ceph.restart: [mon.c] + - sleep: + duration: 60 + - ceph.restart: [osd.0] + - sleep: + duration: 60 + - ceph.restart: [osd.1] + - sleep: + duration: 60 + - ceph.restart: [osd.2] + - sleep: + duration: 60 + - ceph.restart: [osd.3] + - sleep: + duration: 60 + - ceph.restart: [mds.a] diff --git a/qa/suites/upgrade/dumpling-x/parallel/4-final-upgrade/client.yaml b/qa/suites/upgrade/dumpling-x/parallel/4-final-upgrade/client.yaml new file mode 100644 index 00000000000..cf35d41e6c5 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/4-final-upgrade/client.yaml @@ -0,0 +1,4 @@ +tasks: + - install.upgrade: + client.0: + - print: "**** done install.upgrade" diff --git a/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml new file mode 120000 index 00000000000..4baff9bdd2f --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml new file mode 120000 index 00000000000..4b9d9a44a24 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rados-snaps-few-objects.yaml b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rados-snaps-few-objects.yaml new file mode 100644 index 00000000000..40f66da37f2 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rados-snaps-few-objects.yaml @@ -0,0 +1,12 @@ +tasks: + - rados: + clients: [client.1] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rados_loadgenmix.yaml b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rados_loadgenmix.yaml new file mode 100644 index 00000000000..faa96ed24d5 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rados_loadgenmix.yaml @@ -0,0 +1,5 @@ +tasks: + - workunit: + clients: + client.1: + - rados/load-gen-mix.sh diff --git a/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rados_mon_thrash.yaml b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rados_mon_thrash.yaml new file mode 100644 index 00000000000..88019bef17a --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rados_mon_thrash.yaml @@ -0,0 +1,8 @@ +tasks: + - mon_thrash: + revive_delay: 20 + thrash_delay: 1 + - workunit: + clients: + client.1: + - rados/test.sh diff --git a/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rbd_cls.yaml b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rbd_cls.yaml new file mode 100644 index 00000000000..4ef47768237 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rbd_cls.yaml @@ -0,0 +1,6 @@ +tasks: + - workunit: + clients: + client.1: + - cls/test_cls_rbd.sh + diff --git a/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rbd_import_export.yaml b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rbd_import_export.yaml new file mode 100644 index 00000000000..6c40377324d --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rbd_import_export.yaml @@ -0,0 +1,7 @@ +tasks: + - workunit: + clients: + client.1: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format diff --git a/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rgw_s3tests.yaml b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rgw_s3tests.yaml new file mode 100644 index 00000000000..53ceb786ba0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rgw_s3tests.yaml @@ -0,0 +1,6 @@ +tasks: + - rgw: [client.1] + - s3tests: + client.1: + rgw_server: client.1 + branch: dumpling diff --git a/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rgw_swift.yaml b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rgw_swift.yaml new file mode 100644 index 00000000000..445224cef2d --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/5-final-workload/rgw_swift.yaml @@ -0,0 +1,5 @@ +tasks: + - rgw: [client.1] + - swift: + client.1: + rgw_server: client.1 diff --git a/qa/suites/upgrade/dumpling-x/parallel/distros b/qa/suites/upgrade/dumpling-x/parallel/distros new file mode 120000 index 00000000000..79010c36a59 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/parallel/distros @@ -0,0 +1 @@ +../../../../distros/supported \ No newline at end of file diff --git a/qa/suites/upgrade/dumpling-x/stress-split/% b/qa/suites/upgrade/dumpling-x/stress-split/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-x/stress-split/0-cluster/start.yaml b/qa/suites/upgrade/dumpling-x/stress-split/0-cluster/start.yaml new file mode 100644 index 00000000000..a1b6b303f33 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/0-cluster/start.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + conf: + mon: + mon warn on legacy crush tunables: false + log-whitelist: + - failed to encode map +roles: +- - mon.a + - mon.b + - mds.a + - osd.0 + - osd.1 + - osd.2 + - mon.c +- - osd.3 + - osd.4 + - osd.5 +- - client.0 diff --git a/qa/suites/upgrade/dumpling-x/stress-split/1-dumpling-install/dumpling.yaml b/qa/suites/upgrade/dumpling-x/stress-split/1-dumpling-install/dumpling.yaml new file mode 100644 index 00000000000..c98631e2bbd --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/1-dumpling-install/dumpling.yaml @@ -0,0 +1,5 @@ +tasks: +- install: + branch: dumpling +- ceph: + fs: xfs diff --git a/qa/suites/upgrade/dumpling-x/stress-split/2-partial-upgrade/firsthalf.yaml b/qa/suites/upgrade/dumpling-x/stress-split/2-partial-upgrade/firsthalf.yaml new file mode 100644 index 00000000000..312df6e21c6 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/2-partial-upgrade/firsthalf.yaml @@ -0,0 +1,5 @@ +tasks: +- install.upgrade: + osd.0: +- ceph.restart: + daemons: [osd.0, osd.1, osd.2] diff --git a/qa/suites/upgrade/dumpling-x/stress-split/3-thrash/default.yaml b/qa/suites/upgrade/dumpling-x/stress-split/3-thrash/default.yaml new file mode 100644 index 00000000000..a85510eb6fa --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/3-thrash/default.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + - log bound mismatch +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + thrash_primary_affinity: false diff --git a/qa/suites/upgrade/dumpling-x/stress-split/4-mon/mona.yaml b/qa/suites/upgrade/dumpling-x/stress-split/4-mon/mona.yaml new file mode 100644 index 00000000000..b6ffb3323d1 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/4-mon/mona.yaml @@ -0,0 +1,5 @@ +tasks: +- ceph.restart: + daemons: [mon.a] + wait-for-healthy: false + wait-for-osds-up: true diff --git a/qa/suites/upgrade/dumpling-x/stress-split/5-workload/rados_api_tests.yaml b/qa/suites/upgrade/dumpling-x/stress-split/5-workload/rados_api_tests.yaml new file mode 100644 index 00000000000..7b2c72cbb2e --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/5-workload/rados_api_tests.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + branch: dumpling + clients: + client.0: + - rados/test-upgrade-firefly.sh diff --git a/qa/suites/upgrade/dumpling-x/stress-split/5-workload/rbd-cls.yaml b/qa/suites/upgrade/dumpling-x/stress-split/5-workload/rbd-cls.yaml new file mode 100644 index 00000000000..db3dff7fc5c --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/5-workload/rbd-cls.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + branch: dumpling + clients: + client.0: + - cls/test_cls_rbd.sh diff --git a/qa/suites/upgrade/dumpling-x/stress-split/5-workload/rbd-import-export.yaml b/qa/suites/upgrade/dumpling-x/stress-split/5-workload/rbd-import-export.yaml new file mode 100644 index 00000000000..a5a964ce13b --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/5-workload/rbd-import-export.yaml @@ -0,0 +1,8 @@ +tasks: +- workunit: + branch: dumpling + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format diff --git a/qa/suites/upgrade/dumpling-x/stress-split/5-workload/readwrite.yaml b/qa/suites/upgrade/dumpling-x/stress-split/5-workload/readwrite.yaml new file mode 100644 index 00000000000..c53e52b0872 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/5-workload/readwrite.yaml @@ -0,0 +1,9 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 diff --git a/qa/suites/upgrade/dumpling-x/stress-split/5-workload/snaps-few-objects.yaml b/qa/suites/upgrade/dumpling-x/stress-split/5-workload/snaps-few-objects.yaml new file mode 100644 index 00000000000..c54039766c0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/5-workload/snaps-few-objects.yaml @@ -0,0 +1,12 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/dumpling-x/stress-split/6-next-mon/monb.yaml b/qa/suites/upgrade/dumpling-x/stress-split/6-next-mon/monb.yaml new file mode 100644 index 00000000000..513890c41c0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/6-next-mon/monb.yaml @@ -0,0 +1,5 @@ +tasks: +- ceph.restart: + daemons: [mon.b] + wait-for-healthy: false + wait-for-osds-up: true diff --git a/qa/suites/upgrade/dumpling-x/stress-split/7-workload/rados_api_tests.yaml b/qa/suites/upgrade/dumpling-x/stress-split/7-workload/rados_api_tests.yaml new file mode 100644 index 00000000000..7b2c72cbb2e --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/7-workload/rados_api_tests.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + branch: dumpling + clients: + client.0: + - rados/test-upgrade-firefly.sh diff --git a/qa/suites/upgrade/dumpling-x/stress-split/7-workload/radosbench.yaml b/qa/suites/upgrade/dumpling-x/stress-split/7-workload/radosbench.yaml new file mode 100644 index 00000000000..3940870fce0 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/7-workload/radosbench.yaml @@ -0,0 +1,4 @@ +tasks: +- radosbench: + clients: [client.0] + time: 1800 diff --git a/qa/suites/upgrade/dumpling-x/stress-split/7-workload/rbd_api.yaml b/qa/suites/upgrade/dumpling-x/stress-split/7-workload/rbd_api.yaml new file mode 100644 index 00000000000..bbcde3e1559 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/7-workload/rbd_api.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + branch: dumpling + clients: + client.0: + - rbd/test_librbd.sh diff --git a/qa/suites/upgrade/dumpling-x/stress-split/8-next-mon/monc.yaml b/qa/suites/upgrade/dumpling-x/stress-split/8-next-mon/monc.yaml new file mode 100644 index 00000000000..73f22bd5f7c --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/8-next-mon/monc.yaml @@ -0,0 +1,8 @@ +tasks: +- install.upgrade: + mon.c: +- ceph.restart: + daemons: [mon.c] + wait-for-healthy: false + wait-for-osds-up: true +- ceph.wait_for_mon_quorum: [a, b, c] diff --git a/qa/suites/upgrade/dumpling-x/stress-split/9-workload/+ b/qa/suites/upgrade/dumpling-x/stress-split/9-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/dumpling-x/stress-split/9-workload/rados_api_tests.yaml b/qa/suites/upgrade/dumpling-x/stress-split/9-workload/rados_api_tests.yaml new file mode 100644 index 00000000000..7b2c72cbb2e --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/9-workload/rados_api_tests.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + branch: dumpling + clients: + client.0: + - rados/test-upgrade-firefly.sh diff --git a/qa/suites/upgrade/dumpling-x/stress-split/9-workload/rbd-python.yaml b/qa/suites/upgrade/dumpling-x/stress-split/9-workload/rbd-python.yaml new file mode 100644 index 00000000000..1c5e53906f8 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/9-workload/rbd-python.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + branch: dumpling + clients: + client.0: + - rbd/test_librbd_python.sh diff --git a/qa/suites/upgrade/dumpling-x/stress-split/9-workload/rgw-s3tests.yaml b/qa/suites/upgrade/dumpling-x/stress-split/9-workload/rgw-s3tests.yaml new file mode 100644 index 00000000000..e44546dbcaa --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/9-workload/rgw-s3tests.yaml @@ -0,0 +1,8 @@ +tasks: +- rgw: + default_idle_timeout: 300 + client.0: +- swift: + client.0: + rgw_server: client.0 + diff --git a/qa/suites/upgrade/dumpling-x/stress-split/9-workload/snaps-many-objects.yaml b/qa/suites/upgrade/dumpling-x/stress-split/9-workload/snaps-many-objects.yaml new file mode 100644 index 00000000000..9e311c946e1 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/9-workload/snaps-many-objects.yaml @@ -0,0 +1,12 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/dumpling-x/stress-split/distros b/qa/suites/upgrade/dumpling-x/stress-split/distros new file mode 120000 index 00000000000..79010c36a59 --- /dev/null +++ b/qa/suites/upgrade/dumpling-x/stress-split/distros @@ -0,0 +1 @@ +../../../../distros/supported \ No newline at end of file diff --git a/qa/suites/upgrade/firefly-x/parallel/% b/qa/suites/upgrade/firefly-x/parallel/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/firefly-x/parallel/0-cluster/start.yaml b/qa/suites/upgrade/firefly-x/parallel/0-cluster/start.yaml new file mode 100644 index 00000000000..27e08f394e8 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/0-cluster/start.yaml @@ -0,0 +1,24 @@ +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 +- - mon.b + - mon.c + - osd.2 + - osd.3 +- - client.0 + - client.1 + - client.2 + - client.3 + - client.4 +overrides: + ceph: + log-whitelist: + - scrub mismatch + - ScrubResult + - failed to encode map + conf: + mon: + mon warn on legacy crush tunables: false + mon debug unsafe allow tier with nonempty snaps: true diff --git a/qa/suites/upgrade/firefly-x/parallel/1-firefly-install/firefly.yaml b/qa/suites/upgrade/firefly-x/parallel/1-firefly-install/firefly.yaml new file mode 100644 index 00000000000..9f281319604 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/1-firefly-install/firefly.yaml @@ -0,0 +1,11 @@ +tasks: +- install: + branch: firefly +- print: "**** done installing firefly" +- ceph: + fs: xfs +- print: "**** done ceph" +- parallel: + - workload + - upgrade-sequence +- print: "**** done parallel" diff --git a/qa/suites/upgrade/firefly-x/parallel/2-workload/+ b/qa/suites/upgrade/firefly-x/parallel/2-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/firefly-x/parallel/2-workload/ec-rados-parallel.yaml b/qa/suites/upgrade/firefly-x/parallel/2-workload/ec-rados-parallel.yaml new file mode 120000 index 00000000000..c4de249ed96 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/2-workload/ec-rados-parallel.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-parallel.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/firefly-x/parallel/2-workload/rados_api.yaml b/qa/suites/upgrade/firefly-x/parallel/2-workload/rados_api.yaml new file mode 100644 index 00000000000..f8c18a3cb7e --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/2-workload/rados_api.yaml @@ -0,0 +1,8 @@ +workload: + parallel: + - workunit: + branch: firefly + clients: + client.1: + - cls + - print: "**** done cls 2-workload" diff --git a/qa/suites/upgrade/firefly-x/parallel/2-workload/rados_loadgenbig.yaml b/qa/suites/upgrade/firefly-x/parallel/2-workload/rados_loadgenbig.yaml new file mode 100644 index 00000000000..6f1429acb77 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/2-workload/rados_loadgenbig.yaml @@ -0,0 +1,8 @@ +workload: + parallel: + - workunit: + branch: firefly + clients: + client.2: + - rados/load-gen-big.sh + - print: "**** done rados/load-gen-big.sh 2-workload" diff --git a/qa/suites/upgrade/firefly-x/parallel/2-workload/test_rbd_api.yaml b/qa/suites/upgrade/firefly-x/parallel/2-workload/test_rbd_api.yaml new file mode 100644 index 00000000000..0339c575096 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/2-workload/test_rbd_api.yaml @@ -0,0 +1,8 @@ +workload: + parallel: + - workunit: + branch: firefly + clients: + client.3: + - rbd/test_librbd.sh + - print: "**** done rbd/test_librbd.sh 2-workload" diff --git a/qa/suites/upgrade/firefly-x/parallel/2-workload/test_rbd_python.yaml b/qa/suites/upgrade/firefly-x/parallel/2-workload/test_rbd_python.yaml new file mode 100644 index 00000000000..fce9039ed41 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/2-workload/test_rbd_python.yaml @@ -0,0 +1,8 @@ +workload: + parallel: + - workunit: + branch: firefly + clients: + client.4: + - rbd/test_librbd_python.sh + - print: "**** done rbd/test_librbd_python.sh 2-workload" diff --git a/qa/suites/upgrade/firefly-x/parallel/3-upgrade-sequence/upgrade-all.yaml b/qa/suites/upgrade/firefly-x/parallel/3-upgrade-sequence/upgrade-all.yaml new file mode 100644 index 00000000000..f5d10cdfcab --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/3-upgrade-sequence/upgrade-all.yaml @@ -0,0 +1,6 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + mon.b: + - ceph.restart: [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2, osd.3] diff --git a/qa/suites/upgrade/firefly-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/qa/suites/upgrade/firefly-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml new file mode 100644 index 00000000000..0a87823a8d0 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml @@ -0,0 +1,35 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + - print: "**** done install.upgrade mon.a to the version from teuthology-suite arg" + - ceph.restart: + daemons: [mon.a] + wait-for-healthy: true + - sleep: + duration: 60 + - ceph.restart: + daemons: [osd.0, osd.1] + wait-for-healthy: true + - sleep: + duration: 60 + - ceph.restart: [mds.a] + - sleep: + duration: 60 + - print: "**** running mixed versions of osds and mons" + - exec: + mon.b: + - ceph osd crush tunables firefly + - install.upgrade: + mon.b: + - print: "**** done install.upgrade mon.b to the version from teuthology-suite arg" + - ceph.restart: + daemons: [mon.b, mon.c] + wait-for-healthy: true + - sleep: + duration: 60 + - ceph.restart: + daemons: [osd.2, osd.3] + wait-for-healthy: true + - sleep: + duration: 60 diff --git a/qa/suites/upgrade/firefly-x/parallel/4-final-workload/+ b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rados-snaps-few-objects.yaml b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rados-snaps-few-objects.yaml new file mode 100644 index 00000000000..112260734b0 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rados-snaps-few-objects.yaml @@ -0,0 +1,13 @@ +tasks: + - rados: + clients: [client.1] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + - print: "**** done rados 4-final-workload" diff --git a/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rados_loadgenmix.yaml b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rados_loadgenmix.yaml new file mode 100644 index 00000000000..d4a8006e906 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rados_loadgenmix.yaml @@ -0,0 +1,6 @@ +tasks: + - workunit: + clients: + client.1: + - rados/load-gen-mix.sh + - print: "**** done rados/load-gen-mix.sh 4-final-workload" diff --git a/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rados_mon_thrash.yaml b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rados_mon_thrash.yaml new file mode 100644 index 00000000000..f1e30f2419d --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rados_mon_thrash.yaml @@ -0,0 +1,10 @@ +tasks: + - mon_thrash: + revive_delay: 20 + thrash_delay: 1 + - print: "**** done mon_thrash 4-final-workload" + - workunit: + clients: + client.1: + - rados/test.sh + - print: "**** done rados/test.sh 4-final-workload" diff --git a/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rbd_cls.yaml b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rbd_cls.yaml new file mode 100644 index 00000000000..ed75230497d --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rbd_cls.yaml @@ -0,0 +1,6 @@ +tasks: + - workunit: + clients: + client.1: + - cls/test_cls_rbd.sh + - print: "**** done cls/test_cls_rbd.sh 4-final-workload" diff --git a/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rbd_import_export.yaml b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rbd_import_export.yaml new file mode 100644 index 00000000000..2c66c28a276 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rbd_import_export.yaml @@ -0,0 +1,8 @@ +tasks: + - workunit: + clients: + client.1: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format + - print: "**** done rbd/import_export.sh 4-final-workload" diff --git a/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rgw_swift.yaml b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rgw_swift.yaml new file mode 100644 index 00000000000..18089bec6dd --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/4-final-workload/rgw_swift.yaml @@ -0,0 +1,7 @@ +tasks: + - rgw: [client.1] + - print: "**** done rgw 4-final-workload" + - swift: + client.1: + rgw_server: client.1 + - print: "**** done swift 4-final-workload" diff --git a/qa/suites/upgrade/firefly-x/parallel/distros b/qa/suites/upgrade/firefly-x/parallel/distros new file mode 120000 index 00000000000..79010c36a59 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/parallel/distros @@ -0,0 +1 @@ +../../../../distros/supported \ No newline at end of file diff --git a/qa/suites/upgrade/firefly-x/stress-split-erasure-code/% b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/firefly-x/stress-split-erasure-code/0-cluster b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/0-cluster new file mode 120000 index 00000000000..6dceffa2f5b --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/0-cluster @@ -0,0 +1 @@ +../stress-split/0-cluster \ No newline at end of file diff --git a/qa/suites/upgrade/firefly-x/stress-split-erasure-code/1-firefly-install b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/1-firefly-install new file mode 120000 index 00000000000..fad6450f52d --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/1-firefly-install @@ -0,0 +1 @@ +../stress-split/1-firefly-install \ No newline at end of file diff --git a/qa/suites/upgrade/firefly-x/stress-split-erasure-code/2-partial-upgrade b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/2-partial-upgrade new file mode 120000 index 00000000000..c3a4e2c28e5 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/2-partial-upgrade @@ -0,0 +1 @@ +../stress-split/2-partial-upgrade \ No newline at end of file diff --git a/qa/suites/upgrade/firefly-x/stress-split-erasure-code/3-thrash/default.yaml b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/3-thrash/default.yaml new file mode 100644 index 00000000000..f41b9be4b02 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/3-thrash/default.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + - log bound mismatch +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + min_in: 4 diff --git a/qa/suites/upgrade/firefly-x/stress-split-erasure-code/4-mon b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/4-mon new file mode 120000 index 00000000000..0ea7dd5aa8d --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/4-mon @@ -0,0 +1 @@ +../stress-split/4-mon \ No newline at end of file diff --git a/qa/suites/upgrade/firefly-x/stress-split-erasure-code/5-workload/ec-rados-default.yaml b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/5-workload/ec-rados-default.yaml new file mode 120000 index 00000000000..a8a0ae69405 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/5-workload/ec-rados-default.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-default.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/firefly-x/stress-split-erasure-code/6-next-mon b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/6-next-mon new file mode 120000 index 00000000000..0f00e43e5fd --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/6-next-mon @@ -0,0 +1 @@ +../stress-split/6-next-mon \ No newline at end of file diff --git a/qa/suites/upgrade/firefly-x/stress-split-erasure-code/8-next-mon b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/8-next-mon new file mode 120000 index 00000000000..726c6a47a2e --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/8-next-mon @@ -0,0 +1 @@ +../stress-split/8-next-mon \ No newline at end of file diff --git a/qa/suites/upgrade/firefly-x/stress-split-erasure-code/9-workload/ec-no-lrc.yaml b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/9-workload/ec-no-lrc.yaml new file mode 100644 index 00000000000..9814a3fa18d --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/9-workload/ec-no-lrc.yaml @@ -0,0 +1,9 @@ +# +# The lrc plugin cannot be used because some OSD are not upgraded +# yet and would crash. +# +tasks: +- exec: + mon.a: + - |- + ceph osd erasure-code-profile set profile-lrc plugin=lrc 2>&1 | grep "unsupported by:" diff --git a/qa/suites/upgrade/firefly-x/stress-split-erasure-code/9-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/9-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml new file mode 120000 index 00000000000..4b9d9a44a24 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/9-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/firefly-x/stress-split-erasure-code/distros b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/distros new file mode 120000 index 00000000000..8d4309788e0 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split-erasure-code/distros @@ -0,0 +1 @@ +../stress-split/distros \ No newline at end of file diff --git a/qa/suites/upgrade/firefly-x/stress-split/% b/qa/suites/upgrade/firefly-x/stress-split/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/firefly-x/stress-split/0-cluster/start.yaml b/qa/suites/upgrade/firefly-x/stress-split/0-cluster/start.yaml new file mode 100644 index 00000000000..b106b212e6c --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/0-cluster/start.yaml @@ -0,0 +1,30 @@ +overrides: + ceph: + log-whitelist: + - Missing health data for MDS + - failed to encode map + - soft lockup + - detected stalls on CPUs + conf: + mon: + mon warn on legacy crush tunables: false +roles: +- - mon.a + - mon.b + - mon.c + - mds.a + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - osd.4 + - osd.5 + - osd.6 +- - osd.7 + - osd.8 + - osd.9 + - osd.10 + - osd.11 + - osd.12 + - osd.13 +- - client.0 diff --git a/qa/suites/upgrade/firefly-x/stress-split/1-firefly-install/firefly.yaml b/qa/suites/upgrade/firefly-x/stress-split/1-firefly-install/firefly.yaml new file mode 100644 index 00000000000..a3573817aa0 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/1-firefly-install/firefly.yaml @@ -0,0 +1,5 @@ +tasks: +- install: + branch: firefly +- ceph: + fs: xfs diff --git a/qa/suites/upgrade/firefly-x/stress-split/2-partial-upgrade/firsthalf.yaml b/qa/suites/upgrade/firefly-x/stress-split/2-partial-upgrade/firsthalf.yaml new file mode 100644 index 00000000000..52ab10fe780 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/2-partial-upgrade/firsthalf.yaml @@ -0,0 +1,5 @@ +tasks: +- install.upgrade: + osd.0: +- ceph.restart: + daemons: [osd.0, osd.1, osd.2, osd.3, osd.4, osd.5, osd.6] diff --git a/qa/suites/upgrade/firefly-x/stress-split/3-thrash/default.yaml b/qa/suites/upgrade/firefly-x/stress-split/3-thrash/default.yaml new file mode 100644 index 00000000000..21d4c752075 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/3-thrash/default.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + - log bound mismatch +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 diff --git a/qa/suites/upgrade/firefly-x/stress-split/4-mon/mona.yaml b/qa/suites/upgrade/firefly-x/stress-split/4-mon/mona.yaml new file mode 100644 index 00000000000..b6ffb3323d1 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/4-mon/mona.yaml @@ -0,0 +1,5 @@ +tasks: +- ceph.restart: + daemons: [mon.a] + wait-for-healthy: false + wait-for-osds-up: true diff --git a/qa/suites/upgrade/firefly-x/stress-split/5-workload/+ b/qa/suites/upgrade/firefly-x/stress-split/5-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/firefly-x/stress-split/5-workload/rbd-cls.yaml b/qa/suites/upgrade/firefly-x/stress-split/5-workload/rbd-cls.yaml new file mode 100644 index 00000000000..46c61b49a76 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/5-workload/rbd-cls.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + branch: firefly + clients: + client.0: + - cls/test_cls_rbd.sh diff --git a/qa/suites/upgrade/firefly-x/stress-split/5-workload/rbd-import-export.yaml b/qa/suites/upgrade/firefly-x/stress-split/5-workload/rbd-import-export.yaml new file mode 100644 index 00000000000..7fddb418a56 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/5-workload/rbd-import-export.yaml @@ -0,0 +1,8 @@ +tasks: +- workunit: + branch: firefly + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format diff --git a/qa/suites/upgrade/firefly-x/stress-split/5-workload/readwrite.yaml b/qa/suites/upgrade/firefly-x/stress-split/5-workload/readwrite.yaml new file mode 100644 index 00000000000..c53e52b0872 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/5-workload/readwrite.yaml @@ -0,0 +1,9 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 diff --git a/qa/suites/upgrade/firefly-x/stress-split/5-workload/snaps-few-objects.yaml b/qa/suites/upgrade/firefly-x/stress-split/5-workload/snaps-few-objects.yaml new file mode 100644 index 00000000000..c54039766c0 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/5-workload/snaps-few-objects.yaml @@ -0,0 +1,12 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/firefly-x/stress-split/6-next-mon/monb.yaml b/qa/suites/upgrade/firefly-x/stress-split/6-next-mon/monb.yaml new file mode 100644 index 00000000000..513890c41c0 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/6-next-mon/monb.yaml @@ -0,0 +1,5 @@ +tasks: +- ceph.restart: + daemons: [mon.b] + wait-for-healthy: false + wait-for-osds-up: true diff --git a/qa/suites/upgrade/firefly-x/stress-split/7-workload/+ b/qa/suites/upgrade/firefly-x/stress-split/7-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/firefly-x/stress-split/7-workload/radosbench.yaml b/qa/suites/upgrade/firefly-x/stress-split/7-workload/radosbench.yaml new file mode 100644 index 00000000000..3940870fce0 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/7-workload/radosbench.yaml @@ -0,0 +1,4 @@ +tasks: +- radosbench: + clients: [client.0] + time: 1800 diff --git a/qa/suites/upgrade/firefly-x/stress-split/7-workload/rbd_api.yaml b/qa/suites/upgrade/firefly-x/stress-split/7-workload/rbd_api.yaml new file mode 100644 index 00000000000..be46ba0a25c --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/7-workload/rbd_api.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + branch: firefly + clients: + client.0: + - rbd/test_librbd.sh diff --git a/qa/suites/upgrade/firefly-x/stress-split/8-next-mon/monc.yaml b/qa/suites/upgrade/firefly-x/stress-split/8-next-mon/monc.yaml new file mode 100644 index 00000000000..e9273236ba3 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/8-next-mon/monc.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph.restart: + daemons: [mon.c] + wait-for-healthy: false + wait-for-osds-up: true +- ceph.wait_for_mon_quorum: [a, b, c] diff --git a/qa/suites/upgrade/firefly-x/stress-split/9-workload/+ b/qa/suites/upgrade/firefly-x/stress-split/9-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/firefly-x/stress-split/9-workload/rbd-python.yaml b/qa/suites/upgrade/firefly-x/stress-split/9-workload/rbd-python.yaml new file mode 100644 index 00000000000..5b5412661ee --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/9-workload/rbd-python.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + branch: firefly + clients: + client.0: + - rbd/test_librbd_python.sh diff --git a/qa/suites/upgrade/firefly-x/stress-split/9-workload/rgw-swift.yaml b/qa/suites/upgrade/firefly-x/stress-split/9-workload/rgw-swift.yaml new file mode 100644 index 00000000000..bfaae1a943f --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/9-workload/rgw-swift.yaml @@ -0,0 +1,7 @@ +tasks: +- rgw: + client.0: + default_idle_timeout: 300 +- swift: + client.0: + rgw_server: client.0 diff --git a/qa/suites/upgrade/firefly-x/stress-split/9-workload/snaps-many-objects.yaml b/qa/suites/upgrade/firefly-x/stress-split/9-workload/snaps-many-objects.yaml new file mode 100644 index 00000000000..9e311c946e1 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/9-workload/snaps-many-objects.yaml @@ -0,0 +1,12 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/firefly-x/stress-split/distros b/qa/suites/upgrade/firefly-x/stress-split/distros new file mode 120000 index 00000000000..79010c36a59 --- /dev/null +++ b/qa/suites/upgrade/firefly-x/stress-split/distros @@ -0,0 +1 @@ +../../../../distros/supported \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/parallel/% b/qa/suites/upgrade/giant-x/parallel/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/giant-x/parallel/0-cluster/start.yaml b/qa/suites/upgrade/giant-x/parallel/0-cluster/start.yaml new file mode 100644 index 00000000000..27e08f394e8 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/0-cluster/start.yaml @@ -0,0 +1,24 @@ +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 +- - mon.b + - mon.c + - osd.2 + - osd.3 +- - client.0 + - client.1 + - client.2 + - client.3 + - client.4 +overrides: + ceph: + log-whitelist: + - scrub mismatch + - ScrubResult + - failed to encode map + conf: + mon: + mon warn on legacy crush tunables: false + mon debug unsafe allow tier with nonempty snaps: true diff --git a/qa/suites/upgrade/giant-x/parallel/1-giant-install/giant.yaml b/qa/suites/upgrade/giant-x/parallel/1-giant-install/giant.yaml new file mode 100644 index 00000000000..b09a6b74b3d --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/1-giant-install/giant.yaml @@ -0,0 +1,11 @@ +tasks: +- install: + branch: giant +- print: "**** done installing giant" +- ceph: + fs: xfs +- print: "**** done ceph" +- parallel: + - workload + - upgrade-sequence +- print: "**** done parallel" diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/+ b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/ec-rados-parallel.yaml b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/ec-rados-parallel.yaml new file mode 120000 index 00000000000..64b3cabfb0c --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/ec-rados-parallel.yaml @@ -0,0 +1 @@ +../../../../../../erasure-code/ec-rados-parallel.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/rados_api.yaml b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/rados_api.yaml new file mode 100644 index 00000000000..3fa120f11f9 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/rados_api.yaml @@ -0,0 +1,8 @@ +workload: + parallel: + - workunit: + branch: giant + clients: + client.1: + - cls + - print: "**** done cls 2-workload parallel" diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/rados_loadgenbig.yaml b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/rados_loadgenbig.yaml new file mode 100644 index 00000000000..976ef726bf7 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/rados_loadgenbig.yaml @@ -0,0 +1,8 @@ +workload: + parallel: + - workunit: + branch: giant + clients: + client.2: + - rados/load-gen-big.sh + - print: "**** done rados/load-gen-big.sh 2-workload parallel" diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_cache-pool-snaps.yaml b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_cache-pool-snaps.yaml new file mode 100644 index 00000000000..676a9e8e89c --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_cache-pool-snaps.yaml @@ -0,0 +1,36 @@ +overrides: + ceph: + log-whitelist: + - must scrub before tier agent can activate +workload: + parallel: + - sequential: + - exec: + client.0: + - ceph osd pool create base 4 + - ceph osd pool create cache 4 + - ceph osd tier add base cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay base cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 3600 + - ceph osd pool set cache target_max_objects 250 + - rados: + clients: [client.0] + pools: [base] + ops: 4000 + objects: 500 + pool_snaps: true + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + flush: 50 + try_flush: 50 + evict: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + - print: "**** done test_cache-pool-snaps 2-workload parallel_run" diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_rbd_api.yaml b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_rbd_api.yaml new file mode 100644 index 00000000000..8db5561196c --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_rbd_api.yaml @@ -0,0 +1,8 @@ +workload: + parallel: + - workunit: + branch: giant + clients: + client.3: + - rbd/test_librbd.sh + - print: "**** done rbd/test_librbd.sh 2-workload parallel" diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_rbd_python.yaml b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_rbd_python.yaml new file mode 100644 index 00000000000..0e9b05974f9 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_rbd_python.yaml @@ -0,0 +1,8 @@ +workload: + parallel: + - workunit: + branch: giant + clients: + client.4: + - rbd/test_librbd_python.sh + - print: "**** done rbd/test_librbd_python.sh 2-workload parallel" diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/ec-rados-default.yaml b/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/ec-rados-default.yaml new file mode 120000 index 00000000000..fc05f580a28 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/ec-rados-default.yaml @@ -0,0 +1 @@ +../../../../../../erasure-code/ec-rados-default.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/rados_api.yaml b/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/rados_api.yaml new file mode 100644 index 00000000000..f9330607158 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/rados_api.yaml @@ -0,0 +1,8 @@ +workload: + sequential: + - workunit: + branch: giant + clients: + client.1: + - cls + - print: "**** done cls 2-workload sequential" diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/rados_loadgenbig.yaml b/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/rados_loadgenbig.yaml new file mode 100644 index 00000000000..7330f84fb27 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/rados_loadgenbig.yaml @@ -0,0 +1,8 @@ +workload: + sequential: + - workunit: + branch: giant + clients: + client.2: + - rados/load-gen-big.sh + - print: "**** done rados/load-gen-big.sh 2-workload sequential" diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_cache-pool-snaps.yaml b/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_cache-pool-snaps.yaml new file mode 100644 index 00000000000..d3b83fa7d95 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_cache-pool-snaps.yaml @@ -0,0 +1,35 @@ +overrides: + ceph: + log-whitelist: + - must scrub before tier agent can activate +workload: + sequential: + - exec: + client.0: + - ceph osd pool create base 4 + - ceph osd pool create cache 4 + - ceph osd tier add base cache + - ceph osd tier cache-mode cache writeback + - ceph osd tier set-overlay base cache + - ceph osd pool set cache hit_set_type bloom + - ceph osd pool set cache hit_set_count 8 + - ceph osd pool set cache hit_set_period 3600 + - ceph osd pool set cache target_max_objects 250 + - rados: + clients: [client.0] + pools: [base] + ops: 4000 + objects: 500 + pool_snaps: true + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + flush: 50 + try_flush: 50 + evict: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + - print: "**** done test_cache-pool-snaps 2-workload sequential_run" diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_rbd_api.yaml b/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_rbd_api.yaml new file mode 100644 index 00000000000..d8c1539e8b4 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_rbd_api.yaml @@ -0,0 +1,8 @@ +workload: + sequential: + - workunit: + branch: giant + clients: + client.3: + - rbd/test_librbd.sh + - print: "**** done rbd/test_librbd.sh 2-workload sequential" diff --git a/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_rbd_python.yaml b/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_rbd_python.yaml new file mode 100644 index 00000000000..e62bcb5515e --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_rbd_python.yaml @@ -0,0 +1,8 @@ +workload: + sequential: + - workunit: + branch: giant + clients: + client.4: + - rbd/test_librbd_python.sh + - print: "**** done rbd/test_librbd_python.sh 2-workload sequential" diff --git a/qa/suites/upgrade/giant-x/parallel/3-upgrade-sequence/upgrade-all.yaml b/qa/suites/upgrade/giant-x/parallel/3-upgrade-sequence/upgrade-all.yaml new file mode 100644 index 00000000000..3f74d7d14a3 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/3-upgrade-sequence/upgrade-all.yaml @@ -0,0 +1,8 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + mon.b: + - print: "**** done install.upgrade mon.a and mon.b" + - ceph.restart: [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2, osd.3] + - print: "**** done ceph.restart all" diff --git a/qa/suites/upgrade/giant-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/qa/suites/upgrade/giant-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml new file mode 100644 index 00000000000..c39a5687e26 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml @@ -0,0 +1,37 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + - print: "**** done install.upgrade mon.a to the version from teuthology-suite arg" + - ceph.restart: + daemons: [mon.a] + wait-for-healthy: true + - sleep: + duration: 60 + - ceph.restart: + daemons: [osd.0, osd.1] + wait-for-healthy: true + - sleep: + duration: 60 + - ceph.restart: [mds.a] + - sleep: + duration: 60 + - print: "**** running mixed versions of osds and mons" +#do we need to use "ceph osd crush tunables giant" ? + - exec: + mon.b: + - ceph osd crush tunables firefly + - print: "**** done ceph osd crush tunables firefly" + - install.upgrade: + mon.b: + - print: "**** done install.upgrade mon.b to the version from teuthology-suite arg" + - ceph.restart: + daemons: [mon.b, mon.c] + wait-for-healthy: true + - sleep: + duration: 60 + - ceph.restart: + daemons: [osd.2, osd.3] + wait-for-healthy: true + - sleep: + duration: 60 diff --git a/qa/suites/upgrade/giant-x/parallel/4-final-workload/+ b/qa/suites/upgrade/giant-x/parallel/4-final-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/giant-x/parallel/4-final-workload/rados-snaps-few-objects.yaml b/qa/suites/upgrade/giant-x/parallel/4-final-workload/rados-snaps-few-objects.yaml new file mode 100644 index 00000000000..112260734b0 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/4-final-workload/rados-snaps-few-objects.yaml @@ -0,0 +1,13 @@ +tasks: + - rados: + clients: [client.1] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + - print: "**** done rados 4-final-workload" diff --git a/qa/suites/upgrade/giant-x/parallel/4-final-workload/rados_loadgenmix.yaml b/qa/suites/upgrade/giant-x/parallel/4-final-workload/rados_loadgenmix.yaml new file mode 100644 index 00000000000..d4a8006e906 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/4-final-workload/rados_loadgenmix.yaml @@ -0,0 +1,6 @@ +tasks: + - workunit: + clients: + client.1: + - rados/load-gen-mix.sh + - print: "**** done rados/load-gen-mix.sh 4-final-workload" diff --git a/qa/suites/upgrade/giant-x/parallel/4-final-workload/rados_mon_thrash.yaml b/qa/suites/upgrade/giant-x/parallel/4-final-workload/rados_mon_thrash.yaml new file mode 100644 index 00000000000..f1e30f2419d --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/4-final-workload/rados_mon_thrash.yaml @@ -0,0 +1,10 @@ +tasks: + - mon_thrash: + revive_delay: 20 + thrash_delay: 1 + - print: "**** done mon_thrash 4-final-workload" + - workunit: + clients: + client.1: + - rados/test.sh + - print: "**** done rados/test.sh 4-final-workload" diff --git a/qa/suites/upgrade/giant-x/parallel/4-final-workload/rbd_cls.yaml b/qa/suites/upgrade/giant-x/parallel/4-final-workload/rbd_cls.yaml new file mode 100644 index 00000000000..ed75230497d --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/4-final-workload/rbd_cls.yaml @@ -0,0 +1,6 @@ +tasks: + - workunit: + clients: + client.1: + - cls/test_cls_rbd.sh + - print: "**** done cls/test_cls_rbd.sh 4-final-workload" diff --git a/qa/suites/upgrade/giant-x/parallel/4-final-workload/rbd_import_export.yaml b/qa/suites/upgrade/giant-x/parallel/4-final-workload/rbd_import_export.yaml new file mode 100644 index 00000000000..2c66c28a276 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/4-final-workload/rbd_import_export.yaml @@ -0,0 +1,8 @@ +tasks: + - workunit: + clients: + client.1: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format + - print: "**** done rbd/import_export.sh 4-final-workload" diff --git a/qa/suites/upgrade/giant-x/parallel/4-final-workload/rgw_swift.yaml b/qa/suites/upgrade/giant-x/parallel/4-final-workload/rgw_swift.yaml new file mode 100644 index 00000000000..18089bec6dd --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/4-final-workload/rgw_swift.yaml @@ -0,0 +1,7 @@ +tasks: + - rgw: [client.1] + - print: "**** done rgw 4-final-workload" + - swift: + client.1: + rgw_server: client.1 + - print: "**** done swift 4-final-workload" diff --git a/qa/suites/upgrade/giant-x/parallel/distros b/qa/suites/upgrade/giant-x/parallel/distros new file mode 120000 index 00000000000..79010c36a59 --- /dev/null +++ b/qa/suites/upgrade/giant-x/parallel/distros @@ -0,0 +1 @@ +../../../../distros/supported \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/% b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/0-cluster b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/0-cluster new file mode 120000 index 00000000000..6dceffa2f5b --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/0-cluster @@ -0,0 +1 @@ +../stress-split/0-cluster \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/1-giant-install b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/1-giant-install new file mode 120000 index 00000000000..2e0b946d2d2 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/1-giant-install @@ -0,0 +1 @@ +../stress-split/1-giant-install/ \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/2-partial-upgrade b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/2-partial-upgrade new file mode 120000 index 00000000000..c3a4e2c28e5 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/2-partial-upgrade @@ -0,0 +1 @@ +../stress-split/2-partial-upgrade \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/3-thrash/default.yaml b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/3-thrash/default.yaml new file mode 100644 index 00000000000..a33d4e3f4e6 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/3-thrash/default.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + - log bound mismatch +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + min_in: 4 +- print: "**** done thrashosds 3-thrash" diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/4-mon b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/4-mon new file mode 120000 index 00000000000..0ea7dd5aa8d --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/4-mon @@ -0,0 +1 @@ +../stress-split/4-mon \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/5-workload/ec-rados-default.yaml b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/5-workload/ec-rados-default.yaml new file mode 120000 index 00000000000..a8a0ae69405 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/5-workload/ec-rados-default.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-default.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/6-next-mon b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/6-next-mon new file mode 120000 index 00000000000..0f00e43e5fd --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/6-next-mon @@ -0,0 +1 @@ +../stress-split/6-next-mon \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/8-next-mon b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/8-next-mon new file mode 120000 index 00000000000..726c6a47a2e --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/8-next-mon @@ -0,0 +1 @@ +../stress-split/8-next-mon \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/9-workload/ec-rados-plugin=isa-k=2-m=1.yaml b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/9-workload/ec-rados-plugin=isa-k=2-m=1.yaml new file mode 100644 index 00000000000..75c6275fdf7 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/9-workload/ec-rados-plugin=isa-k=2-m=1.yaml @@ -0,0 +1,25 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + erasure_code_profile: + name: isaprofile + plugin: isa + k: 2 + m: 1 + technique: reed_sol_van + ruleset-failure-domain: osd + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 +- print: "**** done ec-rados-plugin=isa-k=2-m=1 9-workload" diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/distros/ubuntu_14.04.yaml b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/distros/ubuntu_14.04.yaml new file mode 100644 index 00000000000..12d479fc32c --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/distros/ubuntu_14.04.yaml @@ -0,0 +1,3 @@ +os_type: ubuntu +os_version: "14.04" +arch: x86_64 diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code/% b/qa/suites/upgrade/giant-x/stress-split-erasure-code/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code/0-cluster b/qa/suites/upgrade/giant-x/stress-split-erasure-code/0-cluster new file mode 120000 index 00000000000..6dceffa2f5b --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code/0-cluster @@ -0,0 +1 @@ +../stress-split/0-cluster \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code/1-giant-install b/qa/suites/upgrade/giant-x/stress-split-erasure-code/1-giant-install new file mode 120000 index 00000000000..2e0b946d2d2 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code/1-giant-install @@ -0,0 +1 @@ +../stress-split/1-giant-install/ \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code/2-partial-upgrade b/qa/suites/upgrade/giant-x/stress-split-erasure-code/2-partial-upgrade new file mode 120000 index 00000000000..c3a4e2c28e5 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code/2-partial-upgrade @@ -0,0 +1 @@ +../stress-split/2-partial-upgrade \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code/3-thrash/default.yaml b/qa/suites/upgrade/giant-x/stress-split-erasure-code/3-thrash/default.yaml new file mode 100644 index 00000000000..a33d4e3f4e6 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code/3-thrash/default.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + - log bound mismatch +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + min_in: 4 +- print: "**** done thrashosds 3-thrash" diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code/4-mon b/qa/suites/upgrade/giant-x/stress-split-erasure-code/4-mon new file mode 120000 index 00000000000..0ea7dd5aa8d --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code/4-mon @@ -0,0 +1 @@ +../stress-split/4-mon \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code/5-workload/ec-rados-default.yaml b/qa/suites/upgrade/giant-x/stress-split-erasure-code/5-workload/ec-rados-default.yaml new file mode 120000 index 00000000000..a8a0ae69405 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code/5-workload/ec-rados-default.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-default.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code/6-next-mon b/qa/suites/upgrade/giant-x/stress-split-erasure-code/6-next-mon new file mode 120000 index 00000000000..0f00e43e5fd --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code/6-next-mon @@ -0,0 +1 @@ +../stress-split/6-next-mon \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code/8-next-mon b/qa/suites/upgrade/giant-x/stress-split-erasure-code/8-next-mon new file mode 120000 index 00000000000..726c6a47a2e --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code/8-next-mon @@ -0,0 +1 @@ +../stress-split/8-next-mon \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code/9-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml b/qa/suites/upgrade/giant-x/stress-split-erasure-code/9-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml new file mode 120000 index 00000000000..4b9d9a44a24 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code/9-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml @@ -0,0 +1 @@ +../../../../../erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split-erasure-code/distros b/qa/suites/upgrade/giant-x/stress-split-erasure-code/distros new file mode 120000 index 00000000000..8d4309788e0 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split-erasure-code/distros @@ -0,0 +1 @@ +../stress-split/distros \ No newline at end of file diff --git a/qa/suites/upgrade/giant-x/stress-split/% b/qa/suites/upgrade/giant-x/stress-split/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/giant-x/stress-split/0-cluster/start.yaml b/qa/suites/upgrade/giant-x/stress-split/0-cluster/start.yaml new file mode 100644 index 00000000000..c27322a7e94 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/0-cluster/start.yaml @@ -0,0 +1,27 @@ +overrides: + ceph: + log-whitelist: + - failed to encode map + conf: + mon: + mon warn on legacy crush tunables: false +roles: +- - mon.a + - mon.b + - mon.c + - mds.a + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - osd.4 + - osd.5 + - osd.6 +- - osd.7 + - osd.8 + - osd.9 + - osd.10 + - osd.11 + - osd.12 + - osd.13 +- - client.0 diff --git a/qa/suites/upgrade/giant-x/stress-split/1-giant-install/giant.yaml b/qa/suites/upgrade/giant-x/stress-split/1-giant-install/giant.yaml new file mode 100644 index 00000000000..3ce313299f7 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/1-giant-install/giant.yaml @@ -0,0 +1,7 @@ +tasks: +- install: + branch: giant +- print: "**** done install giant" +- ceph: + fs: xfs +- print: "**** done ceph" diff --git a/qa/suites/upgrade/giant-x/stress-split/2-partial-upgrade/firsthalf.yaml b/qa/suites/upgrade/giant-x/stress-split/2-partial-upgrade/firsthalf.yaml new file mode 100644 index 00000000000..d42633e6dcb --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/2-partial-upgrade/firsthalf.yaml @@ -0,0 +1,7 @@ +tasks: +- install.upgrade: + osd.0: +- print: "**** done install.upgrade osd.0" +- ceph.restart: + daemons: [osd.0, osd.1, osd.2, osd.3, osd.4, osd.5, osd.6] +- print: "**** done ceph.restart 1st half" diff --git a/qa/suites/upgrade/giant-x/stress-split/3-thrash/default.yaml b/qa/suites/upgrade/giant-x/stress-split/3-thrash/default.yaml new file mode 100644 index 00000000000..d99d0c87420 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/3-thrash/default.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + - log bound mismatch +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 +- print: "**** done thrashosds 3-thrash" diff --git a/qa/suites/upgrade/giant-x/stress-split/4-mon/mona.yaml b/qa/suites/upgrade/giant-x/stress-split/4-mon/mona.yaml new file mode 100644 index 00000000000..7c75c102fc4 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/4-mon/mona.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph.restart: + daemons: [mon.a] + wait-for-healthy: false + wait-for-osds-up: true +- print: "**** done ceph.restart mon.a" diff --git a/qa/suites/upgrade/giant-x/stress-split/5-workload/+ b/qa/suites/upgrade/giant-x/stress-split/5-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/giant-x/stress-split/5-workload/rbd-cls.yaml b/qa/suites/upgrade/giant-x/stress-split/5-workload/rbd-cls.yaml new file mode 100644 index 00000000000..9122be01659 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/5-workload/rbd-cls.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + branch: giant + clients: + client.0: + - cls/test_cls_rbd.sh +- print: "**** done cls/test_cls_rbd.sh 5-workload" diff --git a/qa/suites/upgrade/giant-x/stress-split/5-workload/rbd-import-export.yaml b/qa/suites/upgrade/giant-x/stress-split/5-workload/rbd-import-export.yaml new file mode 100644 index 00000000000..3c39990dea6 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/5-workload/rbd-import-export.yaml @@ -0,0 +1,9 @@ +tasks: +- workunit: + branch: giant + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format +- print: "**** done rbd/import_export.sh 5-workload" diff --git a/qa/suites/upgrade/giant-x/stress-split/5-workload/readwrite.yaml b/qa/suites/upgrade/giant-x/stress-split/5-workload/readwrite.yaml new file mode 100644 index 00000000000..37c21483288 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/5-workload/readwrite.yaml @@ -0,0 +1,10 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 +- print: "**** done rados/readwrite 5-workload" diff --git a/qa/suites/upgrade/giant-x/stress-split/5-workload/snaps-few-objects.yaml b/qa/suites/upgrade/giant-x/stress-split/5-workload/snaps-few-objects.yaml new file mode 100644 index 00000000000..f01232a3cbd --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/5-workload/snaps-few-objects.yaml @@ -0,0 +1,13 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 +- print: "**** done rados/snaps-few-objects 5-workload" diff --git a/qa/suites/upgrade/giant-x/stress-split/6-next-mon/monb.yaml b/qa/suites/upgrade/giant-x/stress-split/6-next-mon/monb.yaml new file mode 100644 index 00000000000..22e87c7ad21 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/6-next-mon/monb.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph.restart: + daemons: [mon.b] + wait-for-healthy: false + wait-for-osds-up: true +- print: "**** done ceph.restart mon.b 6-next-mon" diff --git a/qa/suites/upgrade/giant-x/stress-split/7-workload/+ b/qa/suites/upgrade/giant-x/stress-split/7-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/giant-x/stress-split/7-workload/radosbench.yaml b/qa/suites/upgrade/giant-x/stress-split/7-workload/radosbench.yaml new file mode 100644 index 00000000000..3d87bb1c2c5 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/7-workload/radosbench.yaml @@ -0,0 +1,5 @@ +tasks: +- radosbench: + clients: [client.0] + time: 1800 +- print: "**** done radosbench 7-workload" diff --git a/qa/suites/upgrade/giant-x/stress-split/7-workload/rbd_api.yaml b/qa/suites/upgrade/giant-x/stress-split/7-workload/rbd_api.yaml new file mode 100644 index 00000000000..85536da0c1b --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/7-workload/rbd_api.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + branch: giant + clients: + client.0: + - rbd/test_librbd.sh +- print: "**** done rbd/test_librbd.sh 7-workload" diff --git a/qa/suites/upgrade/giant-x/stress-split/8-next-mon/monc.yaml b/qa/suites/upgrade/giant-x/stress-split/8-next-mon/monc.yaml new file mode 100644 index 00000000000..61253685890 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/8-next-mon/monc.yaml @@ -0,0 +1,8 @@ +tasks: +- ceph.restart: + daemons: [mon.c] + wait-for-healthy: false + wait-for-osds-up: true +- print: "**** done ceph.restart mon.c 8-next-mon" +- ceph.wait_for_mon_quorum: [a, b, c] +- print: "**** done wait_for_mon_quorum 8-next-mon" diff --git a/qa/suites/upgrade/giant-x/stress-split/9-workload/+ b/qa/suites/upgrade/giant-x/stress-split/9-workload/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/giant-x/stress-split/9-workload/rbd-python.yaml b/qa/suites/upgrade/giant-x/stress-split/9-workload/rbd-python.yaml new file mode 100644 index 00000000000..34ece2940c7 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/9-workload/rbd-python.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + branch: giant + clients: + client.0: + - rbd/test_librbd_python.sh +- print: "**** done rbd/test_librbd_python.sh 9-workload" diff --git a/qa/suites/upgrade/giant-x/stress-split/9-workload/rgw-swift.yaml b/qa/suites/upgrade/giant-x/stress-split/9-workload/rgw-swift.yaml new file mode 100644 index 00000000000..8f1416082bb --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/9-workload/rgw-swift.yaml @@ -0,0 +1,9 @@ +tasks: +- rgw: + client.0: + default_idle_timeout: 300 +- print: "**** done rgw 9-workload" +- swift: + client.0: + rgw_server: client.0 +- print: "**** done swift 9-workload" diff --git a/qa/suites/upgrade/giant-x/stress-split/9-workload/snaps-many-objects.yaml b/qa/suites/upgrade/giant-x/stress-split/9-workload/snaps-many-objects.yaml new file mode 100644 index 00000000000..9e311c946e1 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/9-workload/snaps-many-objects.yaml @@ -0,0 +1,12 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/giant-x/stress-split/distros b/qa/suites/upgrade/giant-x/stress-split/distros new file mode 120000 index 00000000000..79010c36a59 --- /dev/null +++ b/qa/suites/upgrade/giant-x/stress-split/distros @@ -0,0 +1 @@ +../../../../distros/supported \ No newline at end of file diff --git a/qa/suites/upgrade/giant/% b/qa/suites/upgrade/giant/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/giant/0-cluster/start.yaml b/qa/suites/upgrade/giant/0-cluster/start.yaml new file mode 100644 index 00000000000..5f2f9715929 --- /dev/null +++ b/qa/suites/upgrade/giant/0-cluster/start.yaml @@ -0,0 +1,20 @@ +overrides: + ceph: + log-whitelist: + - scrub + - scrub mismatch + - ScrubResult + fs: xfs +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 + - osd.2 +- - mon.b + - mon.c + - osd.3 + - osd.4 + - osd.5 + - client.0 + - client.1 diff --git a/qa/suites/upgrade/giant/1-install/latest_firefly_release.yaml b/qa/suites/upgrade/giant/1-install/latest_firefly_release.yaml new file mode 100644 index 00000000000..9ff4a900bf4 --- /dev/null +++ b/qa/suites/upgrade/giant/1-install/latest_firefly_release.yaml @@ -0,0 +1,11 @@ +tasks: +# change tag to the latest firefly released version +- install: + tag: v0.80.6 +- print: "**** done latest firefly install" +- ceph: + fs: xfs +- parallel: + - workload + - upgrade-sequence +- print: "**** done parallel" diff --git a/qa/suites/upgrade/giant/2-workload/blogbench.yaml b/qa/suites/upgrade/giant/2-workload/blogbench.yaml new file mode 100644 index 00000000000..93efdf1430e --- /dev/null +++ b/qa/suites/upgrade/giant/2-workload/blogbench.yaml @@ -0,0 +1,7 @@ +workload: + sequential: + - workunit: + clients: + client.0: + - suites/blogbench.sh + - print: "**** done suites/blogbench.sh" diff --git a/qa/suites/upgrade/giant/2-workload/rbd.yaml b/qa/suites/upgrade/giant/2-workload/rbd.yaml new file mode 100644 index 00000000000..8ffb0ea5269 --- /dev/null +++ b/qa/suites/upgrade/giant/2-workload/rbd.yaml @@ -0,0 +1,14 @@ +workload: + sequential: + - workunit: + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format + - print: "**** done rbd/import_export.sh" + - workunit: + clients: + client.0: + - cls/test_cls_rbd.sh + - print: "**** done cls/test_cls_rbd.sh" diff --git a/qa/suites/upgrade/giant/2-workload/s3tests.yaml b/qa/suites/upgrade/giant/2-workload/s3tests.yaml new file mode 100644 index 00000000000..a5882acba43 --- /dev/null +++ b/qa/suites/upgrade/giant/2-workload/s3tests.yaml @@ -0,0 +1,9 @@ +workload: + sequential: + - rgw: [client.0] + - print: "**** done rgw: [client.0]" + - s3tests: + client.0: + force-branch: firefly-original + rgw_server: client.0 + - print: "**** done s3tests" diff --git a/qa/suites/upgrade/giant/2-workload/testrados.yaml b/qa/suites/upgrade/giant/2-workload/testrados.yaml new file mode 100644 index 00000000000..49339ecd044 --- /dev/null +++ b/qa/suites/upgrade/giant/2-workload/testrados.yaml @@ -0,0 +1,12 @@ +workload: + rados: + clients: [client.0] + ops: 2000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/giant/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/qa/suites/upgrade/giant/3-upgrade-sequence/upgrade-mon-osd-mds.yaml new file mode 100644 index 00000000000..e87a8d1c37e --- /dev/null +++ b/qa/suites/upgrade/giant/3-upgrade-sequence/upgrade-mon-osd-mds.yaml @@ -0,0 +1,37 @@ +upgrade-sequence: + sequential: + - install.upgrade: + all: + branch: giant + - print: "**** done install.upgrade giant" + - ceph.restart: [mon.a] + - sleep: + duration: 60 + - ceph.restart: [mon.b] + - sleep: + duration: 60 + - ceph.restart: [mon.c] + - sleep: + duration: 60 + - ceph.restart: [mds.a] + - sleep: + duration: 60 + - ceph.restart: [osd.0] + - sleep: + duration: 30 + - ceph.restart: [osd.1] + - sleep: + duration: 30 + - ceph.restart: [osd.2] + - sleep: + duration: 30 + - ceph.restart: [osd.3] + - sleep: + duration: 30 + - ceph.restart: [osd.4] + - sleep: + duration: 30 + - ceph.restart: [osd.5] + - sleep: + duration: 30 + - print: "**** done ceph.restart all" diff --git a/qa/suites/upgrade/giant/3-upgrade-sequence/upgrade-osd-mon-mds.yaml b/qa/suites/upgrade/giant/3-upgrade-sequence/upgrade-osd-mon-mds.yaml new file mode 100644 index 00000000000..adcf1b6e83a --- /dev/null +++ b/qa/suites/upgrade/giant/3-upgrade-sequence/upgrade-osd-mon-mds.yaml @@ -0,0 +1,37 @@ +upgrade-sequence: + sequential: + - install.upgrade: + all: + branch: giant + - print: "**** done install.upgrade giant" + - ceph.restart: [osd.0] + - sleep: + duration: 30 + - ceph.restart: [osd.1] + - sleep: + duration: 30 + - ceph.restart: [osd.2] + - sleep: + duration: 30 + - ceph.restart: [osd.3] + - sleep: + duration: 30 + - ceph.restart: [osd.4] + - sleep: + duration: 30 + - ceph.restart: [osd.5] + - sleep: + duration: 60 + - ceph.restart: [mon.a] + - sleep: + duration: 60 + - ceph.restart: [mon.b] + - sleep: + duration: 60 + - ceph.restart: [mon.c] + - sleep: + duration: 60 + - ceph.restart: [mds.a] + - sleep: + duration: 60 + - print: "**** done ceph.restart all" diff --git a/qa/suites/upgrade/giant/4-final/+ b/qa/suites/upgrade/giant/4-final/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/giant/4-final/monthrash.yaml b/qa/suites/upgrade/giant/4-final/monthrash.yaml new file mode 100644 index 00000000000..7833c1c7734 --- /dev/null +++ b/qa/suites/upgrade/giant/4-final/monthrash.yaml @@ -0,0 +1,13 @@ +tasks: +- mon_thrash: + revive_delay: 20 + thrash_delay: 1 +- print: "**** done mon_thrash" +- ceph-fuse: +- print: "**** done ceph-fuse" +- workunit: + clients: + client.0: + - suites/dbench.sh +- print: "**** done suites/dbench.sh" + diff --git a/qa/suites/upgrade/giant/4-final/osdthrash.yaml b/qa/suites/upgrade/giant/4-final/osdthrash.yaml new file mode 100644 index 00000000000..44b5bcedbb6 --- /dev/null +++ b/qa/suites/upgrade/giant/4-final/osdthrash.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + - log bound mismatch +tasks: +- sequential: + - thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + - print: "**** done thrashosds" + - workunit: + clients: + client.0: + - suites/iogen.sh + - print: "**** done suites/iogen.sh" + diff --git a/qa/suites/upgrade/giant/4-final/testrgw.yaml b/qa/suites/upgrade/giant/4-final/testrgw.yaml new file mode 100644 index 00000000000..054b0ff2612 --- /dev/null +++ b/qa/suites/upgrade/giant/4-final/testrgw.yaml @@ -0,0 +1,7 @@ +tasks: +- sequential: + - rgw: [client.1] + - s3tests: + client.1: + rgw_server: client.1 + - print: "**** done s3tests" diff --git a/qa/suites/upgrade/giant/distros b/qa/suites/upgrade/giant/distros new file mode 120000 index 00000000000..dd0d7f1d5bd --- /dev/null +++ b/qa/suites/upgrade/giant/distros @@ -0,0 +1 @@ +../../../distros/supported/ \ No newline at end of file diff --git a/qa/suites/upgrade/hammer/newer/% b/qa/suites/upgrade/hammer/newer/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/hammer/newer/0-cluster/start.yaml b/qa/suites/upgrade/hammer/newer/0-cluster/start.yaml new file mode 100644 index 00000000000..2f00028f52e --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/0-cluster/start.yaml @@ -0,0 +1,24 @@ +overrides: + ceph: + log-whitelist: + - scrub + - scrub mismatch + - ScrubResult + - failed to encode + - soft lockup + - detected stalls on CPUs + fs: xfs +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 + - osd.2 +- - mon.b + - mon.c + - osd.3 + - osd.4 + - osd.5 + - client.0 +- - client.1 + - client.2 diff --git a/qa/suites/upgrade/hammer/newer/1-install/v0.94.2.yaml b/qa/suites/upgrade/hammer/newer/1-install/v0.94.2.yaml new file mode 100644 index 00000000000..c380658c3a0 --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/1-install/v0.94.2.yaml @@ -0,0 +1,10 @@ +tasks: +- install: + tag: v0.94.2 +- print: "**** done v0.94.2 install" +- ceph: + fs: xfs +- parallel: + - workload + - upgrade-sequence +- print: "**** done parallel v0.94.2" diff --git a/qa/suites/upgrade/hammer/newer/1-install/v0.94.3.yaml b/qa/suites/upgrade/hammer/newer/1-install/v0.94.3.yaml new file mode 100644 index 00000000000..9d3fdc0fecd --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/1-install/v0.94.3.yaml @@ -0,0 +1,10 @@ +tasks: +- install: + tag: v0.94.3 +- print: "**** done v0.94.3 install" +- ceph: + fs: xfs +- parallel: + - workload + - upgrade-sequence +- print: "**** done parallel v0.94.3" diff --git a/qa/suites/upgrade/hammer/newer/1-install/v0.94.4.yaml b/qa/suites/upgrade/hammer/newer/1-install/v0.94.4.yaml new file mode 100644 index 00000000000..9bea9f6bf6c --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/1-install/v0.94.4.yaml @@ -0,0 +1,10 @@ +tasks: +- install: + tag: v0.94.4 +- print: "**** done v0.94.4 install" +- ceph: + fs: xfs +- parallel: + - workload + - upgrade-sequence +- print: "**** done parallel v0.94.4" diff --git a/qa/suites/upgrade/hammer/newer/1-install/v0.94.6.yaml b/qa/suites/upgrade/hammer/newer/1-install/v0.94.6.yaml new file mode 100644 index 00000000000..419bdadacb5 --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/1-install/v0.94.6.yaml @@ -0,0 +1,10 @@ +tasks: +- install: + tag: v0.94.6 +- print: "**** done v0.94.6 install" +- ceph: + fs: xfs +- parallel: + - workload + - upgrade-sequence +- print: "**** done parallel v0.94.6" diff --git a/qa/suites/upgrade/hammer/newer/2-workload/blogbench.yaml b/qa/suites/upgrade/hammer/newer/2-workload/blogbench.yaml new file mode 100644 index 00000000000..e3d652eafd5 --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/2-workload/blogbench.yaml @@ -0,0 +1,7 @@ +workload: + sequential: + - workunit: + clients: + client.0: + - suites/blogbench.sh + - print: "**** done suites/blogbench.sh 2-workload" diff --git a/qa/suites/upgrade/hammer/newer/2-workload/rbd.yaml b/qa/suites/upgrade/hammer/newer/2-workload/rbd.yaml new file mode 100644 index 00000000000..d37b294e15d --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/2-workload/rbd.yaml @@ -0,0 +1,9 @@ +workload: + sequential: + - workunit: + clients: + client.1: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format + - print: "**** done rbd/import_export.sh 2-workload" diff --git a/qa/suites/upgrade/hammer/newer/2-workload/s3tests.yaml b/qa/suites/upgrade/hammer/newer/2-workload/s3tests.yaml new file mode 100644 index 00000000000..086346a9ace --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/2-workload/s3tests.yaml @@ -0,0 +1,9 @@ +workload: + sequential: + - rgw: [client.1] + - print: "**** done rgw: [client.1] 2-workload" + - s3tests: + client.1: + force-branch: hammer + rgw_server: client.1 + - print: "**** done s3tests 2-workload off hammer branch" diff --git a/qa/suites/upgrade/hammer/newer/2-workload/testrados.yaml b/qa/suites/upgrade/hammer/newer/2-workload/testrados.yaml new file mode 100644 index 00000000000..49339ecd044 --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/2-workload/testrados.yaml @@ -0,0 +1,12 @@ +workload: + rados: + clients: [client.0] + ops: 2000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/hammer/newer/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/qa/suites/upgrade/hammer/newer/3-upgrade-sequence/upgrade-mon-osd-mds.yaml new file mode 100644 index 00000000000..f0c62fda007 --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/3-upgrade-sequence/upgrade-mon-osd-mds.yaml @@ -0,0 +1,37 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + mon.b: + - print: "**** done install.upgrade hammer" + - ceph.restart: [mon.a] + - sleep: + duration: 60 + - ceph.restart: [mon.b] + - sleep: + duration: 60 + - ceph.restart: [mon.c] + - sleep: + duration: 60 + - ceph.restart: [mds.a] + - sleep: + duration: 60 + - ceph.restart: [osd.0] + - sleep: + duration: 30 + - ceph.restart: [osd.1] + - sleep: + duration: 30 + - ceph.restart: [osd.2] + - sleep: + duration: 30 + - ceph.restart: [osd.3] + - sleep: + duration: 30 + - ceph.restart: [osd.4] + - sleep: + duration: 30 + - ceph.restart: [osd.5] + - sleep: + duration: 30 + - print: "**** done ceph.restart all" diff --git a/qa/suites/upgrade/hammer/newer/3-upgrade-sequence/upgrade-osd-mon-mds.yaml b/qa/suites/upgrade/hammer/newer/3-upgrade-sequence/upgrade-osd-mon-mds.yaml new file mode 100644 index 00000000000..e4df6c8a111 --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/3-upgrade-sequence/upgrade-osd-mon-mds.yaml @@ -0,0 +1,37 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + mon.b: + - print: "**** done install.upgrade hammer" + - ceph.restart: [osd.0] + - sleep: + duration: 30 + - ceph.restart: [osd.1] + - sleep: + duration: 30 + - ceph.restart: [osd.2] + - sleep: + duration: 30 + - ceph.restart: [osd.3] + - sleep: + duration: 30 + - ceph.restart: [osd.4] + - sleep: + duration: 30 + - ceph.restart: [osd.5] + - sleep: + duration: 60 + - ceph.restart: [mon.a] + - sleep: + duration: 60 + - ceph.restart: [mon.b] + - sleep: + duration: 60 + - ceph.restart: [mon.c] + - sleep: + duration: 60 + - ceph.restart: [mds.a] + - sleep: + duration: 60 + - print: "**** done ceph.restart all" diff --git a/qa/suites/upgrade/hammer/newer/4-final/+ b/qa/suites/upgrade/hammer/newer/4-final/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/hammer/newer/4-final/monthrash.yaml b/qa/suites/upgrade/hammer/newer/4-final/monthrash.yaml new file mode 100644 index 00000000000..3774702f08a --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/4-final/monthrash.yaml @@ -0,0 +1,13 @@ +tasks: +- mon_thrash: + revive_delay: 20 + thrash_delay: 1 +- print: "**** done mon_thrash 4-workload" +- ceph-fuse: +- print: "**** done ceph-fuse 4-workload" +- workunit: + clients: + client.0: + - suites/dbench.sh +- print: "**** done suites/dbench.sh 4-workload" + diff --git a/qa/suites/upgrade/hammer/newer/4-final/osdthrash.yaml b/qa/suites/upgrade/hammer/newer/4-final/osdthrash.yaml new file mode 100644 index 00000000000..b4740ada16c --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/4-final/osdthrash.yaml @@ -0,0 +1,20 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + - log bound mismatch + - failed to encode +tasks: +- sequential: + - thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + - print: "**** done thrashosds 4-workload" + - workunit: + clients: + client.0: + - suites/iogen.sh + - print: "**** done suites/iogen.sh 4-workload" + diff --git a/qa/suites/upgrade/hammer/newer/4-final/testrgw.yaml b/qa/suites/upgrade/hammer/newer/4-final/testrgw.yaml new file mode 100644 index 00000000000..85689decb1c --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/4-final/testrgw.yaml @@ -0,0 +1,9 @@ +tasks: +- sequential: + - rgw: [client.2] + - print: "**** done rgw: [client.2] 4-workload" + - s3tests: + client.2: + force-branch: hammer + rgw_server: client.2 + - print: "**** done s3tests 4-workload" diff --git a/qa/suites/upgrade/hammer/newer/distros b/qa/suites/upgrade/hammer/newer/distros new file mode 120000 index 00000000000..ca99fee94fa --- /dev/null +++ b/qa/suites/upgrade/hammer/newer/distros @@ -0,0 +1 @@ +../../../../distros/supported/ \ No newline at end of file diff --git a/qa/suites/upgrade/hammer/older/% b/qa/suites/upgrade/hammer/older/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/hammer/older/0-cluster/start.yaml b/qa/suites/upgrade/hammer/older/0-cluster/start.yaml new file mode 100644 index 00000000000..c1e3ea21410 --- /dev/null +++ b/qa/suites/upgrade/hammer/older/0-cluster/start.yaml @@ -0,0 +1,23 @@ +overrides: + ceph: + log-whitelist: + - scrub + - scrub mismatch + - ScrubResult + - failed to encode + - soft lockup + - detected stalls on CPUs + fs: xfs +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 + - osd.2 +- - mon.b + - mon.c + - osd.3 + - osd.4 + - osd.5 + - client.0 +- - client.1 diff --git a/qa/suites/upgrade/hammer/older/1-install/v0.94.1.yaml b/qa/suites/upgrade/hammer/older/1-install/v0.94.1.yaml new file mode 100644 index 00000000000..f2578e33dc6 --- /dev/null +++ b/qa/suites/upgrade/hammer/older/1-install/v0.94.1.yaml @@ -0,0 +1,10 @@ +tasks: +- install: + tag: v0.94.1 +- print: "**** done v0.94.1 install" +- ceph: + fs: xfs +- parallel: + - workload + - upgrade-sequence +- print: "**** done parallel v0.94.1" diff --git a/qa/suites/upgrade/hammer/older/1-install/v0.94.yaml b/qa/suites/upgrade/hammer/older/1-install/v0.94.yaml new file mode 100644 index 00000000000..7cab6c37f19 --- /dev/null +++ b/qa/suites/upgrade/hammer/older/1-install/v0.94.yaml @@ -0,0 +1,10 @@ +tasks: +- install: + tag: v0.94 +- print: "**** done v0.94 install" +- ceph: + fs: xfs +- parallel: + - workload + - upgrade-sequence +- print: "**** done parallel v0.94" diff --git a/qa/suites/upgrade/hammer/older/2-workload/blogbench.yaml b/qa/suites/upgrade/hammer/older/2-workload/blogbench.yaml new file mode 100644 index 00000000000..df5c9a7d7df --- /dev/null +++ b/qa/suites/upgrade/hammer/older/2-workload/blogbench.yaml @@ -0,0 +1,9 @@ +workload: + sequential: + - ceph-fuse: + - print: "**** done ceph-fuse 2-workload" + - workunit: + clients: + client.0: + - suites/blogbench.sh + - print: "**** done suites/blogbench.sh 2-workload" diff --git a/qa/suites/upgrade/hammer/older/2-workload/rbd.yaml b/qa/suites/upgrade/hammer/older/2-workload/rbd.yaml new file mode 100644 index 00000000000..d37b294e15d --- /dev/null +++ b/qa/suites/upgrade/hammer/older/2-workload/rbd.yaml @@ -0,0 +1,9 @@ +workload: + sequential: + - workunit: + clients: + client.1: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format + - print: "**** done rbd/import_export.sh 2-workload" diff --git a/qa/suites/upgrade/hammer/older/2-workload/testrados.yaml b/qa/suites/upgrade/hammer/older/2-workload/testrados.yaml new file mode 100644 index 00000000000..49339ecd044 --- /dev/null +++ b/qa/suites/upgrade/hammer/older/2-workload/testrados.yaml @@ -0,0 +1,12 @@ +workload: + rados: + clients: [client.0] + ops: 2000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/hammer/older/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/qa/suites/upgrade/hammer/older/3-upgrade-sequence/upgrade-mon-osd-mds.yaml new file mode 100644 index 00000000000..f0c62fda007 --- /dev/null +++ b/qa/suites/upgrade/hammer/older/3-upgrade-sequence/upgrade-mon-osd-mds.yaml @@ -0,0 +1,37 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + mon.b: + - print: "**** done install.upgrade hammer" + - ceph.restart: [mon.a] + - sleep: + duration: 60 + - ceph.restart: [mon.b] + - sleep: + duration: 60 + - ceph.restart: [mon.c] + - sleep: + duration: 60 + - ceph.restart: [mds.a] + - sleep: + duration: 60 + - ceph.restart: [osd.0] + - sleep: + duration: 30 + - ceph.restart: [osd.1] + - sleep: + duration: 30 + - ceph.restart: [osd.2] + - sleep: + duration: 30 + - ceph.restart: [osd.3] + - sleep: + duration: 30 + - ceph.restart: [osd.4] + - sleep: + duration: 30 + - ceph.restart: [osd.5] + - sleep: + duration: 30 + - print: "**** done ceph.restart all" diff --git a/qa/suites/upgrade/hammer/older/3-upgrade-sequence/upgrade-osd-mon-mds.yaml b/qa/suites/upgrade/hammer/older/3-upgrade-sequence/upgrade-osd-mon-mds.yaml new file mode 100644 index 00000000000..e4df6c8a111 --- /dev/null +++ b/qa/suites/upgrade/hammer/older/3-upgrade-sequence/upgrade-osd-mon-mds.yaml @@ -0,0 +1,37 @@ +upgrade-sequence: + sequential: + - install.upgrade: + mon.a: + mon.b: + - print: "**** done install.upgrade hammer" + - ceph.restart: [osd.0] + - sleep: + duration: 30 + - ceph.restart: [osd.1] + - sleep: + duration: 30 + - ceph.restart: [osd.2] + - sleep: + duration: 30 + - ceph.restart: [osd.3] + - sleep: + duration: 30 + - ceph.restart: [osd.4] + - sleep: + duration: 30 + - ceph.restart: [osd.5] + - sleep: + duration: 60 + - ceph.restart: [mon.a] + - sleep: + duration: 60 + - ceph.restart: [mon.b] + - sleep: + duration: 60 + - ceph.restart: [mon.c] + - sleep: + duration: 60 + - ceph.restart: [mds.a] + - sleep: + duration: 60 + - print: "**** done ceph.restart all" diff --git a/qa/suites/upgrade/hammer/older/4-final/+ b/qa/suites/upgrade/hammer/older/4-final/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/upgrade/hammer/older/4-final/monthrash.yaml b/qa/suites/upgrade/hammer/older/4-final/monthrash.yaml new file mode 100644 index 00000000000..3774702f08a --- /dev/null +++ b/qa/suites/upgrade/hammer/older/4-final/monthrash.yaml @@ -0,0 +1,13 @@ +tasks: +- mon_thrash: + revive_delay: 20 + thrash_delay: 1 +- print: "**** done mon_thrash 4-workload" +- ceph-fuse: +- print: "**** done ceph-fuse 4-workload" +- workunit: + clients: + client.0: + - suites/dbench.sh +- print: "**** done suites/dbench.sh 4-workload" + diff --git a/qa/suites/upgrade/hammer/older/4-final/osdthrash.yaml b/qa/suites/upgrade/hammer/older/4-final/osdthrash.yaml new file mode 100644 index 00000000000..b4740ada16c --- /dev/null +++ b/qa/suites/upgrade/hammer/older/4-final/osdthrash.yaml @@ -0,0 +1,20 @@ +overrides: + ceph: + log-whitelist: + - wrongly marked me down + - objects unfound and apparently lost + - log bound mismatch + - failed to encode +tasks: +- sequential: + - thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + - print: "**** done thrashosds 4-workload" + - workunit: + clients: + client.0: + - suites/iogen.sh + - print: "**** done suites/iogen.sh 4-workload" + diff --git a/qa/suites/upgrade/hammer/older/4-final/testrados.yaml b/qa/suites/upgrade/hammer/older/4-final/testrados.yaml new file mode 100644 index 00000000000..71865207eb0 --- /dev/null +++ b/qa/suites/upgrade/hammer/older/4-final/testrados.yaml @@ -0,0 +1,13 @@ +tasks: +- sequential: + - rados: + clients: [client.0] + ops: 2000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/hammer/older/distros b/qa/suites/upgrade/hammer/older/distros new file mode 120000 index 00000000000..ca99fee94fa --- /dev/null +++ b/qa/suites/upgrade/hammer/older/distros @@ -0,0 +1 @@ +../../../../distros/supported/ \ No newline at end of file diff --git a/qa/tasks/__init__.py b/qa/tasks/__init__.py new file mode 100644 index 00000000000..9a7949a001e --- /dev/null +++ b/qa/tasks/__init__.py @@ -0,0 +1,6 @@ +import logging + +# Inherit teuthology's log level +teuthology_log = logging.getLogger('teuthology') +log = logging.getLogger(__name__) +log.setLevel(teuthology_log.level) diff --git a/qa/tasks/admin_socket.py b/qa/tasks/admin_socket.py new file mode 100644 index 00000000000..44235385a8a --- /dev/null +++ b/qa/tasks/admin_socket.py @@ -0,0 +1,192 @@ +""" +Admin Socket task -- used in rados, powercycle, and smoke testing +""" +from cStringIO import StringIO + +import json +import logging +import os +import time + +from teuthology.orchestra import run +from teuthology import misc as teuthology +from teuthology.parallel import parallel + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Run an admin socket command, make sure the output is json, and run + a test program on it. The test program should read json from + stdin. This task succeeds if the test program exits with status 0. + + To run the same test on all clients:: + + tasks: + - ceph: + - rados: + - admin_socket: + all: + dump_requests: + test: http://example.com/script + + To restrict it to certain clients:: + + tasks: + - ceph: + - rados: [client.1] + - admin_socket: + client.1: + dump_requests: + test: http://example.com/script + + If an admin socket command has arguments, they can be specified as + a list:: + + tasks: + - ceph: + - rados: [client.0] + - admin_socket: + client.0: + dump_requests: + test: http://example.com/script + help: + test: http://example.com/test_help_version + args: [version] + + Note that there must be a ceph client with an admin socket running + before this task is run. The tests are parallelized at the client + level. Tests for a single client are run serially. + + :param ctx: Context + :param config: Configuration + """ + assert isinstance(config, dict), \ + 'admin_socket task requires a dict for configuration' + teuthology.replace_all_with_clients(ctx.cluster, config) + + with parallel() as ptask: + for client, tests in config.iteritems(): + ptask.spawn(_run_tests, ctx, client, tests) + + +def _socket_command(ctx, remote, socket_path, command, args): + """ + Run an admin socket command and return the result as a string. + + :param ctx: Context + :param remote: Remote site + :param socket_path: path to socket + :param command: command to be run remotely + :param args: command arguments + + :returns: output of command in json format + """ + json_fp = StringIO() + testdir = teuthology.get_testdir(ctx) + max_tries = 120 + while True: + proc = remote.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'ceph', + '--admin-daemon', socket_path, + ] + command.split(' ') + args, + stdout=json_fp, + check_status=False, + ) + if proc.exitstatus == 0: + break + assert max_tries > 0 + max_tries -= 1 + log.info('ceph cli returned an error, command not registered yet?') + log.info('sleeping and retrying ...') + time.sleep(1) + out = json_fp.getvalue() + json_fp.close() + log.debug('admin socket command %s returned %s', command, out) + return json.loads(out) + +def _run_tests(ctx, client, tests): + """ + Create a temp directory and wait for a client socket to be created. + For each test, copy the executable locally and run the test. + Remove temp directory when finished. + + :param ctx: Context + :param client: client machine to run the test + :param tests: list of tests to run + """ + testdir = teuthology.get_testdir(ctx) + log.debug('Running admin socket tests on %s', client) + (remote,) = ctx.cluster.only(client).remotes.iterkeys() + socket_path = '/var/run/ceph/ceph-{name}.asok'.format(name=client) + overrides = ctx.config.get('overrides', {}).get('admin_socket', {}) + + try: + tmp_dir = os.path.join( + testdir, + 'admin_socket_{client}'.format(client=client), + ) + remote.run( + args=[ + 'mkdir', + '--', + tmp_dir, + run.Raw('&&'), + # wait for client process to create the socket + 'while', 'test', '!', '-e', socket_path, run.Raw(';'), + 'do', 'sleep', '1', run.Raw(';'), 'done', + ], + ) + + for command, config in tests.iteritems(): + if config is None: + config = {} + teuthology.deep_merge(config, overrides) + log.debug('Testing %s with config %s', command, str(config)) + + test_path = None + if 'test' in config: + url = config['test'].format( + branch=config.get('branch', 'master') + ) + test_path = os.path.join(tmp_dir, command) + remote.run( + args=[ + 'wget', + '-q', + '-O', + test_path, + '--', + url, + run.Raw('&&'), + 'chmod', + 'u=rx', + '--', + test_path, + ], + ) + + args = config.get('args', []) + assert isinstance(args, list), \ + 'admin socket command args must be a list' + sock_out = _socket_command(ctx, remote, socket_path, command, args) + if test_path is not None: + remote.run( + args=[ + test_path, + ], + stdin=json.dumps(sock_out), + ) + + finally: + remote.run( + args=[ + 'rm', '-rf', '--', tmp_dir, + ], + ) diff --git a/qa/tasks/apache.conf.template b/qa/tasks/apache.conf.template new file mode 100644 index 00000000000..87426f67ca5 --- /dev/null +++ b/qa/tasks/apache.conf.template @@ -0,0 +1,60 @@ + + LoadModule version_module {mod_path}/mod_version.so + + + LoadModule env_module {mod_path}/mod_env.so + + + LoadModule rewrite_module {mod_path}/mod_rewrite.so + + + LoadModule fastcgi_module {mod_path}/mod_fastcgi.so + + + LoadModule log_config_module {mod_path}/mod_log_config.so + + +Listen {port} +ServerName {host} + += 2.4> + + LoadModule unixd_module {mod_path}/mod_unixd.so + + + LoadModule authz_core_module {mod_path}/mod_authz_core.so + + + LoadModule mpm_worker_module {mod_path}/mod_mpm_worker.so + + User {user} + Group {group} + + +ServerRoot {testdir}/apache +ErrorLog {testdir}/archive/apache.{client}/error.log +LogFormat "%h l %u %t \"%r\" %>s %b \"{{Referer}}i\" \"%{{User-agent}}i\"" combined +CustomLog {testdir}/archive/apache.{client}/access.log combined +PidFile {testdir}/apache/tmp.{client}/apache.pid +DocumentRoot {testdir}/apache/htdocs.{client} +FastCgiIPCDir {testdir}/apache/tmp.{client}/fastcgi_sock +FastCgiExternalServer {testdir}/apache/htdocs.{client}/rgw.fcgi -socket rgw_sock -idle-timeout {idle_timeout} +RewriteEngine On + +RewriteRule ^/([a-zA-Z0-9-_.]*)([/]?.*) /rgw.fcgi?page=$1¶ms=$2&%{{QUERY_STRING}} [E=HTTP_AUTHORIZATION:%{{HTTP:Authorization}},L] + +# Set fastcgi environment variables. +# Note that this is separate from Unix environment variables! +SetEnv RGW_LOG_LEVEL 20 +SetEnv RGW_SHOULD_LOG yes +SetEnv RGW_PRINT_CONTINUE {print_continue} + + + Options +ExecCGI + AllowOverride All + SetHandler fastcgi-script + + +AllowEncodedSlashes On +ServerSignature Off +MaxRequestsPerChild 0 diff --git a/qa/tasks/autotest.py b/qa/tasks/autotest.py new file mode 100644 index 00000000000..efa972123d2 --- /dev/null +++ b/qa/tasks/autotest.py @@ -0,0 +1,166 @@ +""" +Run an autotest test on the ceph cluster. +""" +import json +import logging +import os + +from teuthology import misc as teuthology +from teuthology.parallel import parallel +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Run an autotest test on the ceph cluster. + + Only autotest client tests are supported. + + The config is a mapping from role name to list of tests to run on + that client. + + For example:: + + tasks: + - ceph: + - ceph-fuse: [client.0, client.1] + - autotest: + client.0: [dbench] + client.1: [bonnie] + + You can also specify a list of tests to run on all clients:: + + tasks: + - ceph: + - ceph-fuse: + - autotest: + all: [dbench] + """ + assert isinstance(config, dict) + config = teuthology.replace_all_with_clients(ctx.cluster, config) + log.info('Setting up autotest...') + testdir = teuthology.get_testdir(ctx) + with parallel() as p: + for role in config.iterkeys(): + (remote,) = ctx.cluster.only(role).remotes.keys() + p.spawn(_download, testdir, remote) + + log.info('Making a separate scratch dir for every client...') + for role in config.iterkeys(): + assert isinstance(role, basestring) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) + scratch = os.path.join(mnt, 'client.{id}'.format(id=id_)) + remote.run( + args=[ + 'sudo', + 'install', + '-d', + '-m', '0755', + '--owner={user}'.format(user='ubuntu'), #TODO + '--', + scratch, + ], + ) + + with parallel() as p: + for role, tests in config.iteritems(): + (remote,) = ctx.cluster.only(role).remotes.keys() + p.spawn(_run_tests, testdir, remote, role, tests) + +def _download(testdir, remote): + """ + Download. Does not explicitly support muliple tasks in a single run. + """ + remote.run( + args=[ + # explicitly does not support multiple autotest tasks + # in a single run; the result archival would conflict + 'mkdir', '{tdir}/archive/autotest'.format(tdir=testdir), + run.Raw('&&'), + 'mkdir', '{tdir}/autotest'.format(tdir=testdir), + run.Raw('&&'), + 'wget', + '-nv', + '--no-check-certificate', + 'https://github.com/ceph/autotest/tarball/ceph', + '-O-', + run.Raw('|'), + 'tar', + '-C', '{tdir}/autotest'.format(tdir=testdir), + '-x', + '-z', + '-f-', + '--strip-components=1', + ], + ) + +def _run_tests(testdir, remote, role, tests): + """ + Spawned to run test on remote site + """ + assert isinstance(role, basestring) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) + scratch = os.path.join(mnt, 'client.{id}'.format(id=id_)) + + assert isinstance(tests, list) + for idx, testname in enumerate(tests): + log.info('Running autotest client test #%d: %s...', idx, testname) + + tag = 'client.{id}.num{idx}.{testname}'.format( + idx=idx, + testname=testname, + id=id_, + ) + control = '{tdir}/control.{tag}'.format(tdir=testdir, tag=tag) + teuthology.write_file( + remote=remote, + path=control, + data='import json; data=json.loads({data!r}); job.run_test(**data)'.format( + data=json.dumps(dict( + url=testname, + dir=scratch, + # TODO perhaps tag + # results will be in {testdir}/autotest/client/results/dbench + # or {testdir}/autotest/client/results/dbench.{tag} + )), + ), + ) + remote.run( + args=[ + '{tdir}/autotest/client/bin/autotest'.format(tdir=testdir), + '--verbose', + '--harness=simple', + '--tag={tag}'.format(tag=tag), + control, + run.Raw('3>&1'), + ], + ) + + remote.run( + args=[ + 'rm', '-rf', '--', control, + ], + ) + + remote.run( + args=[ + 'mv', + '--', + '{tdir}/autotest/client/results/{tag}'.format(tdir=testdir, tag=tag), + '{tdir}/archive/autotest/{tag}'.format(tdir=testdir, tag=tag), + ], + ) + + remote.run( + args=[ + 'rm', '-rf', '--', '{tdir}/autotest'.format(tdir=testdir), + ], + ) diff --git a/qa/tasks/blktrace.py b/qa/tasks/blktrace.py new file mode 100644 index 00000000000..401f9e39f64 --- /dev/null +++ b/qa/tasks/blktrace.py @@ -0,0 +1,93 @@ +""" +Run blktrace program through teuthology +""" +import contextlib +import logging + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run + +log = logging.getLogger(__name__) +blktrace = '/usr/sbin/blktrace' +daemon_signal = 'term' + +@contextlib.contextmanager +def setup(ctx, config): + """ + Setup all the remotes + """ + osds = ctx.cluster.only(teuthology.is_type('osd')) + log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=teuthology.get_testdir(ctx)) + + for remote, roles_for_host in osds.remotes.iteritems(): + log.info('Creating %s on %s' % (log_dir, remote.name)) + remote.run( + args=['mkdir', '-p', '-m0755', '--', log_dir], + wait=False, + ) + yield + +@contextlib.contextmanager +def execute(ctx, config): + """ + Run the blktrace program on remote machines. + """ + procs = [] + testdir = teuthology.get_testdir(ctx) + log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir) + + osds = ctx.cluster.only(teuthology.is_type('osd')) + for remote, roles_for_host in osds.remotes.iteritems(): + roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote] + for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): + if roles_to_devs.get(id_): + dev = roles_to_devs[id_] + log.info("running blktrace on %s: %s" % (remote.name, dev)) + + proc = remote.run( + args=[ + 'cd', + log_dir, + run.Raw(';'), + 'daemon-helper', + daemon_signal, + 'sudo', + blktrace, + '-o', + dev.rsplit("/", 1)[1], + '-d', + dev, + ], + wait=False, + stdin=run.PIPE, + ) + procs.append(proc) + try: + yield + finally: + osds = ctx.cluster.only(teuthology.is_type('osd')) + log.info('stopping blktrace processs') + for proc in procs: + proc.stdin.close() + +@contextlib.contextmanager +def task(ctx, config): + """ + Usage: + blktrace: + + Runs blktrace on all clients. + """ + if config is None: + config = dict(('client.{id}'.format(id=id_), None) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')) + elif isinstance(config, list): + config = dict.fromkeys(config) + + with contextutil.nested( + lambda: setup(ctx=ctx, config=config), + lambda: execute(ctx=ctx, config=config), + ): + yield + diff --git a/qa/tasks/boto.cfg.template b/qa/tasks/boto.cfg.template new file mode 100644 index 00000000000..cdfe8873b42 --- /dev/null +++ b/qa/tasks/boto.cfg.template @@ -0,0 +1,2 @@ +[Boto] +http_socket_timeout = {idle_timeout} diff --git a/qa/tasks/buildpackages.py b/qa/tasks/buildpackages.py new file mode 100644 index 00000000000..b7d6cee4f01 --- /dev/null +++ b/qa/tasks/buildpackages.py @@ -0,0 +1,223 @@ +""" +Build ceph packages + +Unit tests: + +py.test -v -s tests/test_buildpackages.py + +Integration tests: + +teuthology-openstack --verbose --key-name myself --key-filename ~/Downloads/myself --ceph infernalis --suite teuthology/buildpackages + +""" +import copy +import logging +import os +import types +from teuthology import packaging +from teuthology import misc +from teuthology.config import config as teuth_config +from teuthology.openstack import OpenStack + +log = logging.getLogger(__name__) + +class LocalGitbuilderProject(packaging.GitbuilderProject): + + def __init__(self): + pass + + +def get_pkg_type(os_type): + if os_type in ('centos', 'fedora', 'opensuse', 'rhel', 'sles'): + return 'rpm' + else: + return 'deb' + +def apply_overrides(ctx, config): + if config is None: + config = {} + else: + config = copy.deepcopy(config) + + assert isinstance(config, dict), \ + "task install only supports a dictionary for configuration" + + project, = config.get('project', 'ceph'), + log.debug('project %s' % project) + overrides = ctx.config.get('overrides') + if overrides: + install_overrides = overrides.get('install', {}) + misc.deep_merge(config, install_overrides.get(project, {})) + return config + +def get_config_install(ctx, config): + config = apply_overrides(ctx, config) + log.debug('install config %s' % config) + return [(config.get('flavor', 'basic'), + config.get('tag', ''), + config.get('branch', ''), + config.get('sha1'))] + +def get_config_install_upgrade(ctx, config): + log.debug('install.upgrade config before override %s' % config) + configs = [] + for (role, role_config) in config.iteritems(): + if role_config is None: + role_config = {} + o = apply_overrides(ctx, role_config) + + log.debug('install.upgrade config ' + str(role_config) + + ' and with overrides ' + str(o)) + # for install.upgrade overrides are actually defaults + configs.append((o.get('flavor', 'basic'), + role_config.get('tag', o.get('tag', '')), + role_config.get('branch', o.get('branch', '')), + role_config.get('sha1', o.get('sha1')))) + return configs + +GET_CONFIG_FUNCTIONS = { + 'install': get_config_install, + 'install.upgrade': get_config_install_upgrade, +} + +def lookup_configs(ctx, node): + configs = [] + if type(node) is types.ListType: + for leaf in node: + configs.extend(lookup_configs(ctx, leaf)) + elif type(node) is types.DictType: + for (key, value) in node.iteritems(): + if key in ('install', 'install.upgrade'): + configs.extend(GET_CONFIG_FUNCTIONS[key](ctx, value)) + elif key in ('overrides',): + pass + else: + configs.extend(lookup_configs(ctx, value)) + return configs + +def get_sha1(ref): + url = teuth_config.get_ceph_git_url() + ls_remote = misc.sh("git ls-remote " + url + " " + ref) + return ls_remote.split()[0] + +def task(ctx, config): + """ + Build Ceph packages. This task will automagically be run + before the task that need to install packages (this is taken + care of by the internal teuthology task). + + The config should be as follows: + + buildpackages: + good_machine: + disk: 40 # GB + ram: 48000 # MB + cpus: 16 + min_machine: + disk: 40 # GB + ram: 8000 # MB + cpus: 1 + + example: + + tasks: + - buildpackages: + good_machine: + disk: 40 # GB + ram: 15000 # MB + cpus: 16 + min_machine: + disk: 40 # GB + ram: 8000 # MB + cpus: 1 + - install: + + When a buildpackages task is already included, the values it contains can be + overriden with: + + overrides: + buildpackages: + good_machine: + disk: 20 # GB + ram: 2000 # MB + cpus: 2 + min_machine: + disk: 10 # GB + ram: 1000 # MB + cpus: 1 + + """ + log.info('Beginning buildpackages...') + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for config not ' + str(config) + overrides = ctx.config.get('overrides', {}) + misc.deep_merge(config, overrides.get('buildpackages', {})) + d = os.path.join(os.path.dirname(__file__), 'buildpackages') + os_type = misc.get_distro(ctx) + os_version = misc.get_distro_version(ctx) + arch = ctx.config.get('arch', OpenStack().get_default_arch()) + dist = LocalGitbuilderProject()._get_distro(distro=os_type, + version=os_version) + pkg_type = get_pkg_type(os_type) + misc.sh( + "flock --close /tmp/buildpackages " + + "make -C " + d + " " + os.environ['HOME'] + "/.ssh_agent") + for (flavor, tag, branch, sha1) in lookup_configs(ctx, ctx.config): + if tag: + sha1 = get_sha1(tag) + elif branch: + sha1 = get_sha1(branch) + log.info("building flavor = " + flavor + "," + + " tag = " + tag + "," + + " branch = " + branch + "," + + " sha1 = " + sha1) + target = ('ceph-' + + pkg_type + '-' + + dist + '-' + + arch + '-' + + flavor + '-' + + sha1) + openstack = OpenStack() + openstack.set_provider() + if openstack.provider == 'ovh': + select = '^(vps|hg)-.*ssd' + else: + select = '' + network = openstack.net() + if network != "": + network = " OPENSTACK_NETWORK='" + network + "' " + openstack.image(os_type, os_version, arch) # create if it does not exist + build_flavor = openstack.flavor_range( + config['min_machine'], config['good_machine'], arch, select) + default_arch = openstack.get_default_arch() + http_flavor = openstack.flavor({ + 'disk': 30, # GB + 'ram': 1024, # MB + 'cpus': 1, + }, default_arch, select) + lock = "/tmp/buildpackages-" + sha1 + "-" + os_type + "-" + os_version + cmd = (". " + os.environ['HOME'] + "/.ssh_agent ; " + + " flock --close " + lock + + " make -C " + d + + network + + " CEPH_GIT_URL=" + teuth_config.get_ceph_git_url() + + " CEPH_PKG_TYPE=" + pkg_type + + " CEPH_OS_TYPE=" + os_type + + " CEPH_OS_VERSION=" + os_version + + " CEPH_DIST=" + dist + + " CEPH_ARCH=" + arch + + " CEPH_SHA1=" + sha1 + + " CEPH_TAG=" + tag + + " CEPH_BRANCH=" + branch + + " CEPH_FLAVOR=" + flavor + + " BUILD_FLAVOR=" + build_flavor + + " HTTP_FLAVOR=" + http_flavor + + " HTTP_ARCH=" + default_arch + + " " + target + + " ") + log.info("buildpackages: " + cmd) + misc.sh(cmd) + teuth_config.gitbuilder_host = openstack.get_ip('packages-repository', '') + log.info('Finished buildpackages') diff --git a/qa/tasks/buildpackages/Makefile b/qa/tasks/buildpackages/Makefile new file mode 100644 index 00000000000..de20fbb7551 --- /dev/null +++ b/qa/tasks/buildpackages/Makefile @@ -0,0 +1,81 @@ +SHELL=/bin/bash +D=/tmp/stampsdir +VPATH=${D} +TIMEOUT_SERVER_CREATE = 30m +TIMEOUT_BUILD = 220m # 20 minutes short of 4 hours +PKG_REPO=packages-repository +PKG_REPO_OS_TYPE=ubuntu +PKG_REPO_OS_VERSION=14.04 +PKG_REPO_USER_DATA=${PKG_REPO_OS_TYPE}-${PKG_REPO_OS_VERSION}-user-data.txt + +# We want to extract the first listed IPv4 address! +# Openstack will provide the addresses field in this format: +# "net1-name=ip(, ip)+(; net2-name=ip(, ip)+)+" +# Each IP may be v4 or v6 (including shortened forms and IPv4-mapped-IPv6 forms) +# 1.2.3.4 +# 2001:db8:6050:ed4d:f816:3eff:fe48:3b36 +# 2001:db8::fe48:3b36 +# 2001:db8::1.2.3.4 +# Example long-form input: +# private-network=10.10.10.69, 2001:db8:6050:ed4d:f816:3eff:fed1:d9f8;net-name2=2001:db8::fe48:3b36, 2001:db8::1.2.3.4, 1.2.3.4; +# TODO: allow selection of the network instead of taking the first network +# TODO: Support IPv6 in future +define get_ip +$$(openstack server show -f value -c addresses $(1) |perl -pe 's/^[^=]+=([^;]+).*/\1/g; s/[ ,]/\n/g; ' |grep -v -e ':' -e '^$$' |head -n1) +endef + +MY_IP=$(shell hostname -I | cut -f1 -d' ') + +${HOME}/.ssh_agent: + ssh-agent -s > ${HOME}/.ssh_agent + source ${HOME}/.ssh_agent ; ssh-add ; ssh-add -l + grep -q ssh_agent ~/.bashrc_teuthology || echo 'source ${HOME}/.ssh_agent' >> ~/.bashrc_teuthology + +flock-${PKG_REPO}: + timeout $(TIMEOUT_SERVER_CREATE) openstack server create --image 'teuthology-ubuntu-14.04-${HTTP_ARCH}' ${OPENSTACK_NETWORK} --flavor ${HTTP_FLAVOR} --key-name teuthology --security-group teuthology --property ownedby=${MY_IP} --user-data ${PKG_REPO_USER_DATA} --wait ${PKG_REPO} + sleep 30 + set -ex ; \ + ip=$(call get_ip,${PKG_REPO}) ; \ + for delay in 1 2 4 8 8 8 8 8 8 8 8 8 16 16 16 16 16 32 32 32 64 128 256 512 ; do if ssh -o 'ConnectTimeout=3' $$ip bash -c '"grep -q READYTORUN /var/log/cloud-init*.log"' ; then break ; else sleep $$delay ; fi ; done ; \ + ssh $$ip sudo apt-get update ; \ + ssh $$ip sudo apt-get install -y nginx rsync && \ + ssh $$ip sudo chown -R ubuntu /usr/share/nginx/html && \ + ssh $$ip sudo rm /usr/share/nginx/html/\* && \ + ssh $$ip sudo perl -pi -e '"s|location / {|location / { autoindex on;|"' /etc/nginx/sites-available/default && \ + ssh $$ip sudo /etc/init.d/nginx restart && \ + perl -pi -e "s/^gitbuilder_host:.*/gitbuilder_host: $$ip/" ~/.teuthology.yaml + touch ${D}/$@ + +${PKG_REPO}: + mkdir -p ${D} + flock --close ${D}/flock-$@.lock ${MAKE} flock-$@ + touch ${D}/$@ + +# Just because 'server create' return success does not mean it actually succeeded! +# Check the server status before we proceed. +# If it's a weird status, bail out and let the delete fire +# eg: ERROR status can happen if there is no VM host without enough capacity for the request. +ceph-${CEPH_PKG_TYPE}-${CEPH_DIST}-${CEPH_ARCH}-${CEPH_FLAVOR}-${CEPH_SHA1}: ${PKG_REPO} + timeout $(TIMEOUT_SERVER_CREATE) openstack server create --image 'teuthology-${CEPH_OS_TYPE}-${CEPH_OS_VERSION}-${CEPH_ARCH}' ${OPENSTACK_NETWORK} --flavor ${BUILD_FLAVOR} --key-name teuthology --security-group teuthology --property ownedby=${MY_IP} --user-data ${CEPH_OS_TYPE}-${CEPH_OS_VERSION}-user-data.txt --wait $@ + set -ex ; \ + trap "openstack server delete --wait $@" EXIT ; \ + for delay in 30 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 ; do \ + status=$$(openstack server show -c status -f value $@) ; \ + case $$status in \ + ACTIVE) break ;; \ + NOSTATE|*BUILD|*BOOT|*RESIZE) sleep $$delay ;; \ + *) exit 1 ;; \ + esac ; \ + done ; \ + ip=$(call get_ip,$@) ; \ + test -n "$$ip" || exit ; \ + for delay in 1 2 4 8 8 8 8 8 8 8 8 8 16 16 16 16 16 32 32 32 64 128 256 512 ; do if ssh -o 'ConnectTimeout=3' $$ip bash -c '"grep -q READYTORUN /var/log/cloud-init*.log"' ; then break ; else sleep $$delay ; fi ; done ; \ + scp make-${CEPH_PKG_TYPE}.sh common.sh ubuntu@$$ip: ; \ + packages_repository=$(call get_ip,${> /etc/ssh/sshd_config + - ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo +preserve_hostname: true +system_info: + default_user: + name: ubuntu +packages: + - dracut-modules-growroot +runcmd: + - mkinitrd --force /boot/initramfs-2.6.32-573.3.1.el6.x86_64.img 2.6.32-573.3.1.el6.x86_64 + - reboot +final_message: "READYTORUN" diff --git a/qa/tasks/buildpackages/centos-7.0-user-data.txt b/qa/tasks/buildpackages/centos-7.0-user-data.txt new file mode 120000 index 00000000000..2eb0e3c88dd --- /dev/null +++ b/qa/tasks/buildpackages/centos-7.0-user-data.txt @@ -0,0 +1 @@ +user-data.txt \ No newline at end of file diff --git a/qa/tasks/buildpackages/centos-7.1-user-data.txt b/qa/tasks/buildpackages/centos-7.1-user-data.txt new file mode 120000 index 00000000000..2eb0e3c88dd --- /dev/null +++ b/qa/tasks/buildpackages/centos-7.1-user-data.txt @@ -0,0 +1 @@ +user-data.txt \ No newline at end of file diff --git a/qa/tasks/buildpackages/centos-7.2-user-data.txt b/qa/tasks/buildpackages/centos-7.2-user-data.txt new file mode 120000 index 00000000000..2eb0e3c88dd --- /dev/null +++ b/qa/tasks/buildpackages/centos-7.2-user-data.txt @@ -0,0 +1 @@ +user-data.txt \ No newline at end of file diff --git a/qa/tasks/buildpackages/common.sh b/qa/tasks/buildpackages/common.sh new file mode 100644 index 00000000000..eb9bc646678 --- /dev/null +++ b/qa/tasks/buildpackages/common.sh @@ -0,0 +1,163 @@ +#!/bin/bash +# +# Copyright (C) 2015 Red Hat +# +# Author: Loic Dachary +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +function install_deps() { + git archive --remote=git://git.ceph.com/ceph.git master install-deps.sh | tar -xvf - + # + # drop the following hack when trusty is not supported anymore + # there is no other way as long as we maintain a debian directory that tries + # to be the same for all distributions + # + if grep --quiet 14.04 /etc/issue 2>/dev/null && sudo apt-get install --force-yes -qq -y dpkg-dev && test "$(dpkg-architecture -qDEB_BUILD_GNU_CPU 2>/dev/null)" = aarch64 ; then + sed -i -e '/libgoogle-perftools-dev/d' debian/control + fi + bash -x install-deps.sh +} + +function git_submodules() { + # see http://tracker.ceph.com/issues/13426 + perl -pi -e 's|git://ceph.com/git/ceph-object-corpus.git|https://github.com/ceph/ceph-object-corpus.git|' .gitmodules + local force=$(if git submodule usage 2>&1 | grep --quiet 'update.*--force'; then echo --force ; fi) + git submodule sync || return 1 + git submodule update $force --init --recursive || return 1 +} + +function get_ceph() { + local git_ceph_url=$1 + local sha1=$2 + + test -d ceph || git clone ${git_ceph_url} ceph + cd ceph + if test -d src ; then # so we don't try to fetch when using a fixture + git fetch --tags http://github.com/ceph/ceph + fi + git fetch --tags ${git_ceph_url} + git checkout ${sha1} +} + +function init_ceph() { + local git_ceph_url=$1 + local sha1=$2 + get_ceph $git_ceph_url $sha1 || return 1 + git_submodules || return 1 + install_deps || return 1 +} + +function flavor2configure() { + local flavor=$1 + + eval $(dpkg-architecture) + + if test $flavor = notcmalloc || test "$DEB_HOST_GNU_CPU" = aarch64 ; then + echo --without-tcmalloc --without-cryptopp + fi +} + +# +# for a given $sha1 in the $ceph_dir repository, lookup all references +# from the remote origin and tags matching the sha1. Add a symbolic +# link in $ref_dir to the $sha1 for each reference found. If the +# reference is a tag, also add a symbolic link to the commit to which +# the tag points, if it is an annotated tag. +# +function link_same() { + local ref_dir=$1 + local ceph_dir=$2 + local sha1=$3 + + mkdir -p $ref_dir + ( + cd ${ceph_dir} + git for-each-ref refs/tags/** refs/remotes/origin/** | grep $sha1 | \ + while read sha1 type ref ; do + if test $type = 'tag' ; then + commit_sha1=$(git rev-parse $ref^{commit}) + if test $commit_sha1 != $sha1 ; then + echo ../sha1/$sha1 ../sha1/$commit_sha1 + fi + fi + echo ../sha1/$sha1 $(basename $ref) + done + ) | while read from to ; do + ( cd $ref_dir ; ln -sf $from $to ) + done +} + +function test_link_same() { + local d=/tmp/link_same$$ + mkdir -p $d/primary + cd $d/primary + git init + touch a ; git add a ; git commit -m 'm' a + git tag tag1 + tag1=$(git rev-parse HEAD) + git branch branch1 + touch b ; git add b ; git commit -m 'm' b + git tag --annotate -m 'a' tag2 + tag2=$(git rev-parse tag2) + sha1_tag2=$(git rev-parse tag2^{commit}) + git branch branch2 + touch c ; git add c ; git commit -m 'm' c + git branch branch3 + sha1_branch3=$(git rev-parse branch3) + + git clone $d/primary $d/secondary + cd $d/secondary + mkdir $d/ref $d/sha1 + + touch $d/sha1/$sha1_branch3 + link_same $d/ref $d/secondary $sha1_branch3 + test $(readlink --canonicalize $d/ref/branch3) = $d/sha1/$sha1_branch3 || return 1 + test $(readlink --canonicalize $d/ref/master) = $d/sha1/$sha1_branch3 || return 1 + + touch $d/sha1/$tag2 + link_same $d/ref $d/secondary $tag2 + test $(readlink --canonicalize $d/ref/tag2) = $d/sha1/$tag2 || return 1 + test $(readlink --canonicalize $d/sha1/$sha1_tag2) = $d/sha1/$tag2 || return 1 + + touch $d/sha1/$tag1 + link_same $d/ref $d/secondary $tag1 + test $(readlink --canonicalize $d/ref/tag1) = $d/sha1/$tag1 || return 1 + test $(readlink --canonicalize $d/ref/branch1) = $d/sha1/$tag1 || return 1 + + rm -fr $d +} + +function maybe_parallel() { + local nproc=$1 + local vers=$2 + + if echo $vers | grep --quiet '0\.67' ; then + return + fi + + if test $nproc -gt 1 ; then + echo -j${nproc} + fi +} + +function test_maybe_parallel() { + test "$(maybe_parallel 1 0.72)" = "" || return 1 + test "$(maybe_parallel 8 0.67)" = "" || return 1 + test "$(maybe_parallel 8 0.72)" = "-j8" || return 1 +} + +if test "$1" = "TEST" ; then + shopt -s -o xtrace + PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: ' + test_link_same + test_maybe_parallel +fi diff --git a/qa/tasks/buildpackages/debian-8.0-user-data.txt b/qa/tasks/buildpackages/debian-8.0-user-data.txt new file mode 100644 index 00000000000..13aba98763a --- /dev/null +++ b/qa/tasks/buildpackages/debian-8.0-user-data.txt @@ -0,0 +1,12 @@ +#cloud-config +bootcmd: + - echo 'APT::Get::AllowUnauthenticated "true";' | tee /etc/apt/apt.conf.d/99disablesigs + - echo nameserver 8.8.8.8 | tee -a /etc/resolv.conf # last resort, in case the DHCP server does not provide a resolver +manage_etc_hosts: true +preserve_hostname: true +system_info: + default_user: + name: ubuntu +runcmd: + - echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers +final_message: "READYTORUN" diff --git a/qa/tasks/buildpackages/make-deb.sh b/qa/tasks/buildpackages/make-deb.sh new file mode 100755 index 00000000000..db9df06ffb5 --- /dev/null +++ b/qa/tasks/buildpackages/make-deb.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# +# Copyright (C) 2015 Red Hat +# +# Author: Loic Dachary +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +# +# Create and upload a deb repository with the same naming conventions +# as https://github.com/ceph/autobuild-ceph/blob/master/build-ceph-deb.sh +# +set -xe + +base=/tmp/release +gitbuilder_host=$1 +codename=$2 +git_ceph_url=$3 +sha1=$4 +flavor=$5 +arch=$6 + +sudo apt-get update +sudo apt-get install -y git + +source $(dirname $0)/common.sh + +init_ceph $git_ceph_url $sha1 + +#codename=$(lsb_release -sc) +releasedir=$base/$(lsb_release -si)/WORKDIR +# +# git describe provides a version that is +# a) human readable +# b) is unique for each commit +# c) compares higher than any previous commit +# d) contains the short hash of the commit +# +vers=$(git describe --match "v*" | sed s/^v//) +# +# always set the debian version to 1 which is ok because the debian +# directory is included in the sources and the upstream version will +# change each time it is modified. +# +dvers="$vers-1" +: ${NPROC:=$(nproc)} +ceph_dir=$(pwd) + +function build_package() { + + rm -fr $releasedir + mkdir -p $releasedir + # + # remove all files not under git so they are not + # included in the distribution. + # + git clean -qdxff + # + # creating the distribution tarbal requires some configure + # options (otherwise parts of the source tree will be left out). + # + ./autogen.sh + # Building with LTTNG on Ubuntu Precise is not possible. + # It fails the LTTNG-is-sane check (it misses headers) + # And the Debian rules files leave it out anyway + case $codename in + precise) lttng_opt="--without-lttng" ;; + *) lttng_opt="--with-lttng" ;; + esac + ./configure $(flavor2configure $flavor) \ + --with-rocksdb --with-ocf \ + --with-nss --with-debug --enable-cephfs-java \ + $lttng_opt --with-babeltrace + # + # use distdir= to set the name of the top level directory of the + # tarbal to match the desired version + # + make distdir=ceph-$vers dist + # + # rename the tarbal to match debian conventions and extract it + # + mv ceph-$vers.tar.gz $releasedir/ceph_$vers.orig.tar.gz + tar -C $releasedir -zxf $releasedir/ceph_$vers.orig.tar.gz + # + # copy the debian directory over + # + cp -a debian $releasedir/ceph-$vers/debian + cd $releasedir + # + # uncomment to remove -dbg packages + # because they are large and take time to build + # + #perl -ni -e 'print if(!(/^Package: .*-dbg$/../^$/))' ceph-$vers/debian/control + #perl -pi -e 's/--dbg-package.*//' ceph-$vers/debian/rules + # + # update the changelog to match the desired version + # + cd ceph-$vers + local chvers=$(head -1 debian/changelog | perl -ne 's/.*\(//; s/\).*//; print') + if [ "$chvers" != "$dvers" ]; then + DEBEMAIL="contact@ceph.com" dch -D $codename --force-distribution -b -v "$dvers" "new version" + fi + # + # create the packages (with ccache) + # + export CEPH_EXTRA_CONFIGURE_ARGS=$(flavor2configure $flavor) + j=$(maybe_parallel $NPROC $vers) + PATH=/usr/lib/ccache:$PATH dpkg-buildpackage $j -uc -us -sa +} + +function build_repo() { + local gitbuilder_host=$1 + + sudo apt-get install -y reprepro + cd ${releasedir}/.. + # + # Create a repository in a directory with a name structured + # as + # + base=ceph-deb-$codename-$arch-$flavor + sha1_dir=$codename/$base/sha1/$sha1 + mkdir -p $sha1_dir/conf + cat > $sha1_dir/conf/distributions < $sha1_dir/version + echo $sha1 > $sha1_dir/sha1 + link_same $codename/$base/ref $ceph_dir $sha1 + if test "$gitbuilder_host" ; then + cd $codename + sudo apt-get install -y rsync + RSYNC_RSH='ssh -o StrictHostKeyChecking=false' rsync -av $base/ $gitbuilder_host:/usr/share/nginx/html/$base/ + fi +} + +build_package +build_repo $gitbuilder_host diff --git a/qa/tasks/buildpackages/make-rpm.sh b/qa/tasks/buildpackages/make-rpm.sh new file mode 100755 index 00000000000..f44efc43207 --- /dev/null +++ b/qa/tasks/buildpackages/make-rpm.sh @@ -0,0 +1,264 @@ +#!/bin/bash +# +# Copyright (C) 2015 Red Hat +# +# Author: Loic Dachary +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +# +# Create and upload a RPM repository with the same naming conventions +# as https://github.com/ceph/autobuild-ceph/blob/master/build-ceph-rpm.sh +# + +set -xe + +base=/tmp/release +gitbuilder_host=$1 +codename=$2 +git_ceph_url=$3 +sha1=$4 +flavor=$5 +arch=$6 + +suse=false +[[ $codename =~ suse ]] && suse=true + +if [ "$suse" = true ] ; then + sudo zypper -n install git +else + sudo yum install -y git +fi + +source $(dirname $0)/common.sh + +init_ceph $git_ceph_url $sha1 + +#id=$(lsb_release -s -i | tr A-Z a-z) +#major=$(lsb_release -s -r | sed -s "s;\..*;;g") +#codename="${id}${major}" +releasedir=$base/$(lsb_release -si | tr ' ' '_')/WORKDIR +# +# git describe provides a version that is +# a) human readable +# b) is unique for each commit +# c) compares higher than any previous commit +# d) contains the short hash of the commit +# +vers=$(git describe --match "v*" | sed s/^v//) +ceph_dir=$(pwd) + +# +# Create a repository in a directory with a name structured +# as +# +base=ceph-rpm-$codename-$arch-$flavor + +function setup_rpmmacros() { + if ! grep -q find_debuginfo_dwz_opts $HOME/.rpmmacros ; then + echo '%_find_debuginfo_dwz_opts %{nil}' >> $HOME/.rpmmacros + fi + if lsb_release -d -s | grep CentOS | grep -q 'release 7' ; then + if ! grep -q '%dist .el7' $HOME/.rpmmacros ; then + echo '%dist .el7' >> $HOME/.rpmmacros + fi + fi +} + +function build_package() { + rm -fr $releasedir + mkdir -p $releasedir + # + # remove all files not under git so they are not + # included in the distribution. + # + git clean -qdxff + # + # creating the distribution tarbal requires some configure + # options (otherwise parts of the source tree will be left out). + # + if [ "$suse" = true ] ; then + sudo zypper -n install bzip2 + else + sudo yum install -y bzip2 + fi + ./autogen.sh + ./configure $(flavor2configure $flavor) --with-debug --with-radosgw --with-fuse --with-libatomic-ops --with-gtk2 --with-nss + # + # use distdir= to set the name of the top level directory of the + # tarbal to match the desired version + # + make dist-bzip2 + # Set up build area + setup_rpmmacros + if [ "$suse" = true ] ; then + sudo zypper -n install rpm-build + else + sudo yum install -y rpm-build + fi + local buildarea=$releasedir + mkdir -p ${buildarea}/SOURCES + mkdir -p ${buildarea}/SRPMS + mkdir -p ${buildarea}/SPECS + cp ceph.spec ${buildarea}/SPECS + mkdir -p ${buildarea}/RPMS + mkdir -p ${buildarea}/BUILD + cp -a ceph-*.tar.bz2 ${buildarea}/SOURCES/. + cp -a rpm/*.patch ${buildarea}/SOURCES || true + ( + cd ${buildarea}/SPECS + ccache=$(echo /usr/lib*/ccache) + # Build RPMs + buildarea=`readlink -fn ${releasedir}` ### rpm wants absolute path + PATH=$ccache:$PATH rpmbuild -ba --define "_unpackaged_files_terminate_build 0" --define "_topdir ${buildarea}" ceph.spec + ) +} + +function build_rpm_release() { + local buildarea=$1 + local sha1=$2 + local gitbuilder_host=$3 + local base=$4 + + cat < ${buildarea}/SPECS/ceph-release.spec +Name: ceph-release +Version: 1 +Release: 0%{?dist} +Summary: Ceph repository configuration +Group: System Environment/Base +License: GPLv2 +URL: http://gitbuilder.ceph.com/$dist +Source0: ceph.repo +#Source0: RPM-GPG-KEY-CEPH +#Source1: ceph.repo +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) +BuildArch: noarch + +%description +This package contains the Ceph repository GPG key as well as configuration +for yum and up2date. + +%prep + +%setup -q -c -T +install -pm 644 %{SOURCE0} . +#install -pm 644 %{SOURCE1} . + +%build + +%install +rm -rf %{buildroot} +#install -Dpm 644 %{SOURCE0} \ +# %{buildroot}/%{_sysconfdir}/pki/rpm-gpg/RPM-GPG-KEY-CEPH +%if 0%{defined suse_version} +install -dm 755 %{buildroot}/%{_sysconfdir}/zypp +install -dm 755 %{buildroot}/%{_sysconfdir}/zypp/repos.d +install -pm 644 %{SOURCE0} \ + %{buildroot}/%{_sysconfdir}/zypp/repos.d +%else +install -dm 755 %{buildroot}/%{_sysconfdir}/yum.repos.d +install -pm 644 %{SOURCE0} \ + %{buildroot}/%{_sysconfdir}/yum.repos.d +%endif + +%clean +#rm -rf %{buildroot} + +%post + +%postun + +%files +%defattr(-,root,root,-) +#%doc GPL +%if 0%{defined suse_version} +/etc/zypp/repos.d/* +%else +/etc/yum.repos.d/* +%endif +#/etc/pki/rpm-gpg/* + +%changelog +* Tue Mar 10 2013 Gary Lowell - 1-0 +- Handle both yum and zypper +- Use URL to ceph git repo for key +- remove config attribute from repo file +* Tue Aug 27 2012 Gary Lowell - 1-0 +- Initial Package +EOF + + cat < $buildarea/SOURCES/ceph.repo +[Ceph] +name=Ceph packages for \$basearch +baseurl=http://${gitbuilder_host}/${base}/sha1/${sha1}/\$basearch +enabled=1 +gpgcheck=0 +type=rpm-md + +[Ceph-noarch] +name=Ceph noarch packages +baseurl=http://${gitbuilder_host}/${base}/sha1/${sha1}/noarch +enabled=1 +gpgcheck=0 +type=rpm-md + +[ceph-source] +name=Ceph source packages +baseurl=http://${gitbuilder_host}/${base}/sha1/${sha1}/SRPMS +enabled=1 +gpgcheck=0 +type=rpm-md +EOF + + rpmbuild -bb --define "_topdir ${buildarea}" ${buildarea}/SPECS/ceph-release.spec +} + +function build_rpm_repo() { + local buildarea=$1 + local gitbuilder_host=$2 + local base=$3 + + if [ "$suse" = true ] ; then + sudo zypper -n install createrepo + else + sudo yum install -y createrepo + fi + + for dir in ${buildarea}/SRPMS ${buildarea}/RPMS/* + do + createrepo ${dir} + done + + local sha1_dir=${buildarea}/../$codename/$base/sha1/$sha1 + mkdir -p $sha1_dir + echo $vers > $sha1_dir/version + echo $sha1 > $sha1_dir/sha1 + echo ceph > $sha1_dir/name + + for dir in ${buildarea}/SRPMS ${buildarea}/RPMS/* + do + cp -fla ${dir} $sha1_dir + done + + link_same ${buildarea}/../$codename/$base/ref $ceph_dir $sha1 + if test "$gitbuilder_host" ; then + ( + cd ${buildarea}/../$codename + RSYNC_RSH='ssh -o StrictHostKeyChecking=false' rsync -av $base/ ubuntu@$gitbuilder_host:/usr/share/nginx/html/$base/ + ) + fi +} + +setup_rpmmacros +build_package +build_rpm_release $releasedir $sha1 $gitbuilder_host $base +build_rpm_repo $releasedir $gitbuilder_host $base diff --git a/qa/tasks/buildpackages/opensuse-42.1-user-data.txt b/qa/tasks/buildpackages/opensuse-42.1-user-data.txt new file mode 100644 index 00000000000..190cac2b1eb --- /dev/null +++ b/qa/tasks/buildpackages/opensuse-42.1-user-data.txt @@ -0,0 +1,13 @@ +#cloud-config +bootcmd: + - echo nameserver 8.8.8.8 | tee -a /etc/resolv.conf # last resort, in case the DHCP server does not provide a resolver +manage_etc_hosts: true +preserve_hostname: true +users: + - name: ubuntu + gecos: User + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + groups: users +runcmd: + - ( MYHOME=/home/ubuntu ; mkdir $MYHOME/.ssh ; chmod 700 $MYHOME/.ssh ; cp /root/.ssh/authorized_keys $MYHOME/.ssh ; chown -R ubuntu.users $MYHOME/.ssh ) +final_message: "READYTORUN" diff --git a/qa/tasks/buildpackages/ubuntu-12.04-user-data.txt b/qa/tasks/buildpackages/ubuntu-12.04-user-data.txt new file mode 120000 index 00000000000..2eb0e3c88dd --- /dev/null +++ b/qa/tasks/buildpackages/ubuntu-12.04-user-data.txt @@ -0,0 +1 @@ +user-data.txt \ No newline at end of file diff --git a/qa/tasks/buildpackages/ubuntu-14.04-user-data.txt b/qa/tasks/buildpackages/ubuntu-14.04-user-data.txt new file mode 120000 index 00000000000..2eb0e3c88dd --- /dev/null +++ b/qa/tasks/buildpackages/ubuntu-14.04-user-data.txt @@ -0,0 +1 @@ +user-data.txt \ No newline at end of file diff --git a/qa/tasks/buildpackages/ubuntu-16.04-user-data.txt b/qa/tasks/buildpackages/ubuntu-16.04-user-data.txt new file mode 120000 index 00000000000..2eb0e3c88dd --- /dev/null +++ b/qa/tasks/buildpackages/ubuntu-16.04-user-data.txt @@ -0,0 +1 @@ +user-data.txt \ No newline at end of file diff --git a/qa/tasks/buildpackages/user-data.txt b/qa/tasks/buildpackages/user-data.txt new file mode 100644 index 00000000000..d5016929dac --- /dev/null +++ b/qa/tasks/buildpackages/user-data.txt @@ -0,0 +1,10 @@ +#cloud-config +bootcmd: + - echo 'APT::Get::AllowUnauthenticated "true";' | tee /etc/apt/apt.conf.d/99disablesigs + - echo nameserver 8.8.8.8 | tee -a /etc/resolv.conf # last resort, in case the DHCP server does not provide a resolver +manage_etc_hosts: true +preserve_hostname: true +system_info: + default_user: + name: ubuntu +final_message: "READYTORUN" diff --git a/qa/tasks/calamari_nosetests.py b/qa/tasks/calamari_nosetests.py new file mode 100644 index 00000000000..5c5b15dbecb --- /dev/null +++ b/qa/tasks/calamari_nosetests.py @@ -0,0 +1,281 @@ +import contextlib +import logging +import os +import textwrap +import yaml + +from teuthology import contextutil +from teuthology import misc +from teuthology import packaging +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +# extra stuff we need to do our job here +EXTRA_PKGS = [ + 'git', +] + +# stuff that would be in a devmode install, but should be +# installed in the system for running nosetests against +# a production install. +EXTRA_NOSETEST_PKGS = [ + 'python-psutil', + 'python-mock', +] + + +def find_client0(cluster): + ''' Find remote that has client.0 role, or None ''' + for rem, roles in cluster.remotes.iteritems(): + if 'client.0' in roles: + return rem + return None + + +def pip(remote, package, venv=None, uninstall=False, force=False): + ''' {un}install a package with pip, possibly in a virtualenv ''' + if venv: + pip = os.path.join(venv, 'bin', 'pip') + args = ['sudo', pip] + else: + args = ['sudo', 'pip'] + + if uninstall: + args.extend(['uninstall', '-y']) + else: + args.append('install') + if force: + args.append('-I') + + args.append(package) + remote.run(args=args) + + +@contextlib.contextmanager +def install_epel(remote): + ''' install a disabled-by-default epel repo config file ''' + remove = False + try: + if remote.os.package_type == 'deb': + yield + else: + remove = True + distromajor = remote.os.version.split('.')[0] + + repofiledata = textwrap.dedent(''' + [epel] + name=epel{version} + metalink=http://mirrors.fedoraproject.org/metalink?repo=epel-{version}&arch=$basearch + enabled=0 + gpgcheck=0 + ''').format(version=distromajor) + + misc.create_file(remote, '/etc/yum.repos.d/epel.repo', + data=repofiledata, sudo=True) + remote.run(args='sudo yum clean all') + yield + + finally: + if remove: + misc.delete_file(remote, '/etc/yum.repos.d/epel.repo', sudo=True) + + +def enable_epel(remote, enable=True): + ''' enable/disable the epel repo ''' + args = 'sudo sed -i'.split() + if enable: + args.extend(['s/enabled=0/enabled=1/']) + else: + args.extend(['s/enabled=1/enabled=0/']) + args.extend(['/etc/yum.repos.d/epel.repo']) + + remote.run(args=args) + remote.run(args='sudo yum clean all') + + +@contextlib.contextmanager +def install_extra_pkgs(client): + ''' Install EXTRA_PKGS ''' + try: + for pkg in EXTRA_PKGS: + packaging.install_package(pkg, client) + yield + + finally: + for pkg in EXTRA_PKGS: + packaging.remove_package(pkg, client) + + +@contextlib.contextmanager +def clone_calamari(config, client): + ''' clone calamari source into current directory on remote ''' + branch = config.get('calamari_branch', 'master') + url = config.get('calamari_giturl', 'git://github.com/ceph/calamari') + try: + cmd = 'git clone -b {branch} {giturl}'.format( + branch=branch, giturl=url + ) + client.run(args=cmd) + yield + finally: + # sudo python setup.py develop may have left some root files around + client.run(args='sudo rm -rf calamari') + + +@contextlib.contextmanager +def write_info_yaml(cluster, client): + ''' write info.yaml to client for nosetests ''' + try: + info = { + 'cluster': { + rem.name: {'roles': roles} + for rem, roles in cluster.remotes.iteritems() + } + } + misc.create_file(client, 'calamari/info.yaml', + data=yaml.safe_dump(info, default_flow_style=False)) + yield + finally: + misc.delete_file(client, 'calamari/info.yaml') + + +@contextlib.contextmanager +def write_test_conf(client): + ''' write calamari/tests/test.conf to client for nosetests ''' + try: + testconf = textwrap.dedent(''' + [testing] + + calamari_control = external + ceph_control = external + bootstrap = False + api_username = admin + api_password = admin + embedded_timeout_factor = 1 + external_timeout_factor = 3 + external_cluster_path = info.yaml + ''') + misc.create_file(client, 'calamari/tests/test.conf', data=testconf) + yield + + finally: + misc.delete_file(client, 'calamari/tests/test.conf') + + +@contextlib.contextmanager +def prepare_nosetest_env(client): + try: + # extra dependencies that would be in the devmode venv + if client.os.package_type == 'rpm': + enable_epel(client, enable=True) + for package in EXTRA_NOSETEST_PKGS: + packaging.install_package(package, client) + if client.os.package_type == 'rpm': + enable_epel(client, enable=False) + + # install nose itself into the calamari venv, force it in case it's + # already installed in the system, so we can invoke it by path without + # fear that it's not present + pip(client, 'nose', venv='/opt/calamari/venv', force=True) + + # install a later version of requests into the venv as well + # (for precise) + pip(client, 'requests', venv='/opt/calamari/venv', force=True) + + # link (setup.py develop) calamari/rest-api into the production venv + # because production does not include calamari_rest.management, needed + # for test_rest_api.py's ApiIntrospection + args = 'cd calamari/rest-api'.split() + [run.Raw(';')] + \ + 'sudo /opt/calamari/venv/bin/python setup.py develop'.split() + client.run(args=args) + + # because, at least in Python 2.6/Centos, site.py uses + # 'os.path.exists()' to process .pth file entries, and exists() uses + # access(2) to check for existence, all the paths leading up to + # $HOME/calamari/rest-api need to be searchable by all users of + # the package, which will include the WSGI/Django app, running + # as the Apache user. So make them all world-read-and-execute. + args = 'sudo chmod a+x'.split() + \ + ['.', './calamari', './calamari/rest-api'] + client.run(args=args) + + # make one dummy request just to get the WSGI app to do + # all its log creation here, before the chmod below (I'm + # looking at you, graphite -- /var/log/calamari/info.log and + # /var/log/calamari/exception.log) + client.run(args='wget -q -O /dev/null http://localhost') + + # /var/log/calamari/* is root-or-apache write-only + client.run(args='sudo chmod a+w /var/log/calamari/*') + + yield + + finally: + args = 'cd calamari/rest-api'.split() + [run.Raw(';')] + \ + 'sudo /opt/calamari/venv/bin/python setup.py develop -u'.split() + client.run(args=args) + for pkg in ('nose', 'requests'): + pip(client, pkg, venv='/opt/calamari/venv', uninstall=True) + for package in EXTRA_NOSETEST_PKGS: + packaging.remove_package(package, client) + + +@contextlib.contextmanager +def run_nosetests(client): + ''' Actually run the tests ''' + args = [ + 'cd', + 'calamari', + run.Raw(';'), + 'CALAMARI_CONFIG=/etc/calamari/calamari.conf', + '/opt/calamari/venv/bin/nosetests', + '-v', + 'tests/', + ] + client.run(args=args) + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run Calamari tests against an instance set up by 'calamari_server'. + + -- clone the Calamari source into $HOME (see options) + -- write calamari/info.yaml describing the cluster + -- write calamari/tests/test.conf containing + 'external' for calamari_control and ceph_control + 'bootstrap = False' to disable test bootstrapping (installing minions) + no api_url necessary (inferred from client.0) + 'external_cluster_path = info.yaml' + -- modify the production Calamari install to allow test runs: + install nose in the venv + install EXTRA_NOSETEST_PKGS + link in, with setup.py develop, calamari_rest (for ApiIntrospection) + -- set CALAMARI_CONFIG to point to /etc/calamari/calamari.conf + -- nosetests -v tests/ + + Options are: + calamari_giturl: url from which to git clone calamari + (default: git://github.com/ceph/calamari) + calamari_branch: git branch of calamari to check out + (default: master) + + Note: the tests must find a clean cluster, so don't forget to + set the crush default type appropriately, or install min_size OSD hosts + """ + client0 = find_client0(ctx.cluster) + if client0 is None: + raise RuntimeError("must have client.0 role") + + with contextutil.nested( + lambda: install_epel(client0), + lambda: install_extra_pkgs(client0), + lambda: clone_calamari(config, client0), + lambda: write_info_yaml(ctx.cluster, client0), + lambda: write_test_conf(client0), + lambda: prepare_nosetest_env(client0), + lambda: run_nosetests(client0), + ): + yield diff --git a/qa/tasks/calamari_setup.py b/qa/tasks/calamari_setup.py new file mode 100644 index 00000000000..09d3d40894e --- /dev/null +++ b/qa/tasks/calamari_setup.py @@ -0,0 +1,393 @@ +""" +Calamari setup task +""" +import contextlib +import logging +import os +import requests +import shutil +import subprocess +import webbrowser + +from cStringIO import StringIO +from teuthology.orchestra import run +from teuthology import contextutil +from teuthology import misc + +log = logging.getLogger(__name__) + +ICE_VERSION_DEFAULT = '1.2.2' + + +@contextlib.contextmanager +def task(ctx, config): + """ + Do the setup of a calamari server. + + - calamari_setup: + version: 'v80.1' + ice_tool_dir: + iceball_location: + + Options are: + + version -- ceph version we are testing against (defaults to 80.1) + ice_tool_dir -- optional local directory where ice-tool exists or will + be loaded (defaults to src in home directory) + ice_version -- version of ICE we're testing (with default) + iceball_location -- Can be an HTTP URL, in which case fetch from this + location, using 'ice_version' and distro information + to select the right tarball. Can also be a local + path. If local path is '.', and iceball is + not already present, then we try to build + an iceball using the ice_tool_dir commands. + ice_git_location -- location of ice tool on git + start_browser -- If True, start a browser. To be used by runs that will + bring up a browser quickly for human use. Set to False + for overnight suites that are testing for problems in + the installation itself (defaults to False). + email -- email address for the user (defaults to x@y.com) + no_epel -- indicates if we should remove epel files prior to yum + installations. Defaults to True. + calamari_user -- user name to log into gui (defaults to admin) + calamari_password -- calamari user password (defaults to admin) + """ + cal_svr = None + start_browser = config.get('start_browser', False) + no_epel = config.get('no_epel', True) + for remote_, roles in ctx.cluster.remotes.items(): + if 'client.0' in roles: + cal_svr = remote_ + break + if not cal_svr: + raise RuntimeError('client.0 not found in roles') + with contextutil.nested( + lambda: adjust_yum_repos(ctx, cal_svr, no_epel), + lambda: calamari_install(config, cal_svr), + lambda: ceph_install(ctx, cal_svr), + lambda: calamari_connect(ctx, cal_svr), + lambda: browser(start_browser, cal_svr.hostname), + ): + yield + + +@contextlib.contextmanager +def adjust_yum_repos(ctx, cal_svr, no_epel): + """ + For each remote machine, fix the repos if yum is used. + """ + ice_distro = str(cal_svr.os) + if ice_distro.startswith('rhel') or ice_distro.startswith('centos'): + if no_epel: + for remote in ctx.cluster.remotes: + fix_yum_repos(remote, ice_distro) + try: + yield + finally: + if ice_distro.startswith('rhel') or ice_distro.startswith('centos'): + if no_epel: + for remote in ctx.cluster.remotes: + restore_yum_repos(remote) + + +def restore_yum_repos(remote): + """ + Copy the old saved repo back in. + """ + if remote.run(args=['sudo', 'rm', '-rf', '/etc/yum.repos.d']).exitstatus: + return False + if remote.run(args=['sudo', 'mv', '/etc/yum.repos.d.old', + '/etc/yum.repos.d']).exitstatus: + return False + + +def fix_yum_repos(remote, distro): + """ + For yum calamari installations, the repos.d directory should only + contain a repo file named rhel.repo + """ + if distro.startswith('centos'): + cmds = [ + 'sudo mkdir /etc/yum.repos.d.old'.split(), + ['sudo', 'cp', run.Raw('/etc/yum.repos.d/*'), + '/etc/yum.repos.d.old'], + ['sudo', 'rm', run.Raw('/etc/yum.repos.d/epel*')], + ] + for cmd in cmds: + if remote.run(args=cmd).exitstatus: + return False + else: + cmds = [ + 'sudo mv /etc/yum.repos.d /etc/yum.repos.d.old'.split(), + 'sudo mkdir /etc/yum.repos.d'.split(), + ] + for cmd in cmds: + if remote.run(args=cmd).exitstatus: + return False + + # map "distroversion" from Remote.os to a tuple of + # (repo title, repo name descriptor, apt-mirror repo path chunk) + yum_repo_params = { + 'rhel 6.4': ('rhel6-server', 'RHEL', 'rhel6repo-server'), + 'rhel 6.5': ('rhel6-server', 'RHEL', 'rhel6repo-server'), + 'rhel 7.0': ('rhel7-server', 'RHEL', 'rhel7repo/server'), + } + repotitle, reponame, path = yum_repo_params[distro] + repopath = '/etc/yum.repos.d/%s.repo' % repotitle + # TO DO: Make this data configurable too + repo_contents = '\n'.join( + ('[%s]' % repotitle, + 'name=%s $releasever - $basearch' % reponame, + 'baseurl=http://apt-mirror.front.sepia.ceph.com/' + path, + 'gpgcheck=0', + 'enabled=1') + ) + misc.sudo_write_file(remote, repopath, repo_contents) + cmds = [ + 'sudo yum clean all'.split(), + 'sudo yum makecache'.split(), + ] + for cmd in cmds: + if remote.run(args=cmd).exitstatus: + return False + return True + + +def get_iceball_with_http(urlbase, ice_version, ice_distro, destdir): + ''' + Copy iceball with http to destdir + ''' + url = '/'.join(( + urlbase, + '{ver}/ICE-{ver}-{distro}.tar.gz'.format( + ver=ice_version, distro=ice_distro + ) + )) + # stream=True means we don't download until copyfileobj below, + # and don't need a temp file + r = requests.get(url, stream=True) + filename = url.split('/')[-1] + with open(filename, 'w') as f: + shutil.copyfileobj(r.raw, f) + log.info('saved %s as %s' % (url, filename)) + + +@contextlib.contextmanager +def calamari_install(config, cal_svr): + """ + Install calamari + + The steps here are: + -- Get the iceball, building it if necessary. + -- Copy the iceball to the calamari server, and untarring it. + -- Running ice-setup.py on the calamari server. + -- Running calamari-ctl initialize. + """ + ice_distro = str(cal_svr.os) + ice_distro = ice_distro.replace(" ", "") + client_id = str(cal_svr) + at_loc = client_id.find('@') + if at_loc > 0: + client_id = client_id[at_loc + 1:] + convert = {'ubuntu12.04': 'precise', 'ubuntu14.04': 'trusty', + 'rhel7.0': 'rhel7', 'debian7': 'wheezy'} + version = config.get('version', 'v0.80.1') + email = config.get('email', 'x@x.com') + ice_tool_dir = config.get('ice_tool_dir', '%s%s%s' % + (os.environ['HOME'], os.sep, 'src')) + calamari_user = config.get('calamari_user', 'admin') + calamari_password = config.get('calamari_passwd', 'admin') + git_icetool_loc = config.get('ice_git_location', + 'git@github.com:inktankstorage') + if ice_distro in convert: + ice_distro = convert[ice_distro] + log.info('calamari server on %s' % ice_distro) + iceball_loc = config.get('iceball_location', '.') + ice_version = config.get('ice_version', ICE_VERSION_DEFAULT) + delete_iceball = False + if iceball_loc.startswith('http'): + get_iceball_with_http(iceball_loc, ice_version, ice_distro, '/tmp') + iceball_loc = '/tmp' + delete_iceball = True + elif iceball_loc == '.': + ice_tool_loc = os.path.join(ice_tool_dir, 'ice-tools') + if not os.path.isdir(ice_tool_loc): + try: + subprocess.check_call(['git', 'clone', + git_icetool_loc + os.sep + + 'ice-tools.git', + ice_tool_loc]) + except subprocess.CalledProcessError: + raise RuntimeError('git clone of ice-tools failed') + exec_ice = os.path.join(ice_tool_loc, + 'teuth-virtenv/bin/make_iceball') + try: + subprocess.check_call('virtualenv teuth-virtenv'.split(), + cwd=ice_tool_loc) + subprocess.check_call( + 'teuth-virtenv/bin/python setup.py develop'.split(), + cwd=ice_tool_loc + ) + subprocess.check_call( + 'teuth-virtenv/bin/pip install -r requirements.txt'.split(), + cwd=ice_tool_loc + ) + subprocess.check_call([exec_ice, '-I', ice_version, + '-b', version, '-o', ice_distro]) + delete_iceball = True + except subprocess.CalledProcessError: + raise RuntimeError('%s failed for %s distro' % + (exec_ice, ice_distro)) + subprocess.check_call('rm -rf teuth-virtenv'.split(), + cwd=ice_tool_loc) + + gz_file = 'ICE-{0}-{1}.tar.gz'.format(ice_version, ice_distro) + lgz_file = os.path.join(iceball_loc, gz_file) + cal_svr.put_file(lgz_file, os.path.join('/tmp/', gz_file)) + ret = cal_svr.run(args=['gunzip', run.Raw('<'), "/tmp/%s" % gz_file, + run.Raw('|'), 'tar', 'xvf', run.Raw('-')]) + if ret.exitstatus: + raise RuntimeError('remote tar failed') + icesetdata = 'yes\n\n%s\nhttp\n' % client_id + ice_in = StringIO(icesetdata) + ice_setup_io = StringIO() + ret = cal_svr.run(args=['sudo', 'python', 'ice_setup.py'], stdin=ice_in, + stdout=ice_setup_io) + log.debug(ice_setup_io.getvalue()) + # Run Calamari-ceph connect. + if ret.exitstatus: + raise RuntimeError('ice_setup.py failed') + icesetdata = '%s\n%s\n%s\n%s\n' % (calamari_user, email, calamari_password, + calamari_password) + ice_in = StringIO(icesetdata) + ret = cal_svr.run(args=['sudo', 'calamari-ctl', 'initialize'], + stdin=ice_in, stdout=ice_setup_io) + log.debug(ice_setup_io.getvalue()) + if ret.exitstatus: + raise RuntimeError('calamari-ctl initialize failed') + try: + yield + finally: + log.info('Cleaning up after Calamari installation') + if delete_iceball: + os.unlink(gz_file) + + +@contextlib.contextmanager +def ceph_install(ctx, cal_svr): + """ + Install ceph if ceph was not previously installed by teuthology. This + code tests the case where calamari is installed on a brand new system. + """ + loc_inst = False + if 'install' not in [x.keys()[0] for x in ctx.config['tasks']]: + loc_inst = True + ret = deploy_ceph(ctx, cal_svr) + if ret: + raise RuntimeError('ceph installs failed') + try: + yield + finally: + if loc_inst: + if not undeploy_ceph(ctx, cal_svr): + log.error('Cleanup of Ceph installed by Calamari-setup failed') + + +def deploy_ceph(ctx, cal_svr): + """ + Perform the ceph-deploy actions needed to bring up a Ceph cluster. This + test is needed to check the ceph-deploy that comes with the calamari + package. + """ + osd_to_name = {} + all_machines = set() + all_mons = set() + for remote in ctx.cluster.remotes: + all_machines.add(remote.shortname) + roles = ctx.cluster.remotes[remote] + for role in roles: + daemon_type, number = role.split('.') + if daemon_type == 'osd': + osd_to_name[number] = remote.shortname + if daemon_type == 'mon': + all_mons.add(remote.shortname) + first_cmds = [['new'] + list(all_mons), ['install'] + list(all_machines), + ['mon', 'create-initial'] ] + ret = True + for entry in first_cmds: + arg_list = ['ceph-deploy'] + entry + log.info('Running: %s' % ' '.join(arg_list)) + ret &= cal_svr.run(args=arg_list).exitstatus + disk_labels = '_dcba' + # NEEDS WORK assumes disks start with vd (need to check this somewhere) + for cmd_pts in [['disk', 'zap'], ['osd', 'prepare'], ['osd', 'activate']]: + mach_osd_cnt = {} + for osdn in osd_to_name: + osd_mac = osd_to_name[osdn] + mach_osd_cnt[osd_mac] = mach_osd_cnt.get(osd_mac, 0) + 1 + arg_list = ['ceph-deploy'] + arg_list.extend(cmd_pts) + disk_id = '%s:vd%s' % (osd_to_name[osdn], + disk_labels[mach_osd_cnt[osd_mac]]) + if 'activate' in cmd_pts: + disk_id += '1' + arg_list.append(disk_id) + log.info('Running: %s' % ' '.join(arg_list)) + ret &= cal_svr.run(args=arg_list).exitstatus + return ret + + +def undeploy_ceph(ctx, cal_svr): + """ + Cleanup deployment of ceph. + """ + all_machines = [] + ret = True + for remote in ctx.cluster.remotes: + ret &= remote.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), + 'sudo', 'service', 'ceph', 'stop'] + ).exitstatus + all_machines.append(remote.shortname) + all_machines = set(all_machines) + cmd1 = ['ceph-deploy', 'uninstall'] + cmd1.extend(all_machines) + ret &= cal_svr.run(args=cmd1).exitstatus + cmd2 = ['ceph-deploy', 'purge'] + cmd2.extend(all_machines) + ret &= cal_svr.run(args=cmd2).exitstatus + for remote in ctx.cluster.remotes: + ret &= remote.run(args=['sudo', 'rm', '-rf', + '.ssh/known_hosts']).exitstatus + return ret + + +@contextlib.contextmanager +def calamari_connect(ctx, cal_svr): + """ + Connect calamari to the ceph nodes. + """ + connects = ['ceph-deploy', 'calamari', 'connect'] + for machine_info in ctx.cluster.remotes: + if 'client.0' not in ctx.cluster.remotes[machine_info]: + connects.append(machine_info.shortname) + ret = cal_svr.run(args=connects) + if ret.exitstatus: + raise RuntimeError('calamari connect failed') + try: + yield + finally: + log.info('Calamari test terminating') + + +@contextlib.contextmanager +def browser(start_browser, web_page): + """ + Bring up a browser, if wanted. + """ + if start_browser: + webbrowser.open('http://%s' % web_page) + try: + yield + finally: + if start_browser: + log.info('Web browser support terminating') diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py new file mode 100644 index 00000000000..c5227adb250 --- /dev/null +++ b/qa/tasks/ceph.py @@ -0,0 +1,1330 @@ +""" +Ceph cluster task. + +Handle the setup, starting, and clean-up of a Ceph cluster. +""" +from cStringIO import StringIO + +import argparse +import contextlib +import logging +import os +import json +import time + +from ceph_manager import CephManager, write_conf, DEFAULT_CONF_PATH +from tasks.cephfs.filesystem import Filesystem +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +import ceph_client as cclient +from teuthology.orchestra.daemon import DaemonGroup + +CEPH_ROLE_TYPES = ['mon', 'osd', 'mds', 'rgw'] + +log = logging.getLogger(__name__) + + +def generate_caps(type_): + """ + Each call will return the next capability for each system type + (essentially a subset of possible role values). Valid types are osd, + mds and client. + """ + defaults = dict( + osd=dict( + mon='allow *', + osd='allow *', + ), + mds=dict( + mon='allow *', + osd='allow *', + mds='allow', + ), + client=dict( + mon='allow rw', + osd='allow rwx', + mds='allow', + ), + ) + for subsystem, capability in defaults[type_].items(): + yield '--cap' + yield subsystem + yield capability + + +@contextlib.contextmanager +def ceph_log(ctx, config): + """ + Create /var/log/ceph log directory that is open to everyone. + Add valgrind and profiling-logger directories. + + :param ctx: Context + :param config: Configuration + """ + log.info('Making ceph log dir writeable by non-root...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'chmod', + '777', + '/var/log/ceph', + ], + wait=False, + ) + ) + log.info('Disabling ceph logrotate...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'rm', '-f', '--', + '/etc/logrotate.d/ceph', + ], + wait=False, + ) + ) + log.info('Creating extra log directories...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'install', '-d', '-m0755', '--', + '/var/log/ceph/valgrind', + '/var/log/ceph/profiling-logger', + ], + wait=False, + ) + ) + + try: + yield + + finally: + if ctx.archive is not None and \ + not (ctx.config.get('archive-on-error') and ctx.summary['success']): + # and logs + log.info('Compressing logs...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'find', + '/var/log/ceph', + '-name', + '*.log', + '-print0', + run.Raw('|'), + 'sudo', + 'xargs', + '-0', + '--no-run-if-empty', + '--', + 'gzip', + '--', + ], + wait=False, + ), + ) + + log.info('Archiving logs...') + path = os.path.join(ctx.archive, 'remote') + os.makedirs(path) + for remote in ctx.cluster.remotes.iterkeys(): + sub = os.path.join(path, remote.shortname) + os.makedirs(sub) + teuthology.pull_directory(remote, '/var/log/ceph', + os.path.join(sub, 'log')) + + +def assign_devs(roles, devs): + """ + Create a dictionary of devs indexed by roles + + :param roles: List of roles + :param devs: Corresponding list of devices. + :returns: Dictionary of devs indexed by roles. + """ + return dict(zip(roles, devs)) + + +@contextlib.contextmanager +def valgrind_post(ctx, config): + """ + After the tests run, look throught all the valgrind logs. Exceptions are raised + if textual errors occured in the logs, or if valgrind exceptions were detected in + the logs. + + :param ctx: Context + :param config: Configuration + """ + try: + yield + finally: + lookup_procs = list() + log.info('Checking for errors in any valgrind logs...') + for remote in ctx.cluster.remotes.iterkeys(): + # look at valgrind logs for each node + proc = remote.run( + args=[ + 'sudo', + 'zgrep', + '', + run.Raw('/var/log/ceph/valgrind/*'), + '/dev/null', # include a second file so that we always get a filename prefix on the output + run.Raw('|'), + 'sort', + run.Raw('|'), + 'uniq', + ], + wait=False, + check_status=False, + stdout=StringIO(), + ) + lookup_procs.append((proc, remote)) + + valgrind_exception = None + for (proc, remote) in lookup_procs: + proc.wait() + out = proc.stdout.getvalue() + for line in out.split('\n'): + if line == '': + continue + try: + (file, kind) = line.split(':') + except Exception: + log.error('failed to split line %s', line) + raise + log.debug('file %s kind %s', file, kind) + if (file.find('mds') >= 0) and kind.find('Lost') > 0: + continue + log.error('saw valgrind issue %s in %s', kind, file) + valgrind_exception = Exception('saw valgrind issues') + + if valgrind_exception is not None: + raise valgrind_exception + + +@contextlib.contextmanager +def crush_setup(ctx, config): + first_mon = teuthology.get_first_mon(ctx, config) + (mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + profile = config.get('crush_tunables', 'default') + log.info('Setting crush tunables to %s', profile) + mon_remote.run( + args=['sudo', 'ceph', 'osd', 'crush', 'tunables', profile]) + yield + + +@contextlib.contextmanager +def cephfs_setup(ctx, config): + testdir = teuthology.get_testdir(ctx) + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + + first_mon = teuthology.get_first_mon(ctx, config) + (mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() + mdss = ctx.cluster.only(teuthology.is_type('mds')) + # If there are any MDSs, then create a filesystem for them to use + # Do this last because requires mon cluster to be up and running + if mdss.remotes: + log.info('Setting up CephFS filesystem...') + + ceph_fs = Filesystem(ctx) + if not ceph_fs.legacy_configured(): + ceph_fs.create() + + is_active_mds = lambda role: role.startswith('mds.') and not role.endswith('-s') and role.find('-s-') == -1 + all_roles = [item for remote_roles in mdss.remotes.values() for item in remote_roles] + num_active = len([r for r in all_roles if is_active_mds(r)]) + mon_remote.run(args=[ + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph', + 'mds', 'set_max_mds', str(num_active)]) + + yield + + +@contextlib.contextmanager +def cluster(ctx, config): + """ + Handle the creation and removal of a ceph cluster. + + On startup: + Create directories needed for the cluster. + Create remote journals for all osds. + Create and set keyring. + Copy the monmap to tht test systems. + Setup mon nodes. + Setup mds nodes. + Mkfs osd nodes. + Add keyring information to monmaps + Mkfs mon nodes. + + On exit: + If errors occured, extract a failure message and store in ctx.summary. + Unmount all test files and temporary journaling files. + Save the monitor information and archive all ceph logs. + Cleanup the keyring setup, and remove all monitor map and data files left over. + + :param ctx: Context + :param config: Configuration + """ + if ctx.config.get('use_existing_cluster', False) is True: + log.info("'use_existing_cluster' is true; skipping cluster creation") + yield + + testdir = teuthology.get_testdir(ctx) + log.info('Creating ceph cluster...') + run.wait( + ctx.cluster.run( + args=[ + 'install', '-d', '-m0755', '--', + '{tdir}/data'.format(tdir=testdir), + ], + wait=False, + ) + ) + + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'install', '-d', '-m0777', '--', '/var/run/ceph', + ], + wait=False, + ) + ) + + devs_to_clean = {} + remote_to_roles_to_devs = {} + remote_to_roles_to_journals = {} + osds = ctx.cluster.only(teuthology.is_type('osd')) + for remote, roles_for_host in osds.remotes.iteritems(): + devs = teuthology.get_scratch_devices(remote) + roles_to_devs = {} + roles_to_journals = {} + if config.get('fs'): + log.info('fs option selected, checking for scratch devs') + log.info('found devs: %s' % (str(devs),)) + devs_id_map = teuthology.get_wwn_id_map(remote, devs) + iddevs = devs_id_map.values() + roles_to_devs = assign_devs( + teuthology.roles_of_type(roles_for_host, 'osd'), iddevs + ) + if len(roles_to_devs) < len(iddevs): + iddevs = iddevs[len(roles_to_devs):] + devs_to_clean[remote] = [] + + if config.get('block_journal'): + log.info('block journal enabled') + roles_to_journals = assign_devs( + teuthology.roles_of_type(roles_for_host, 'osd'), iddevs + ) + log.info('journal map: %s', roles_to_journals) + + if config.get('tmpfs_journal'): + log.info('tmpfs journal enabled') + roles_to_journals = {} + remote.run(args=['sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt']) + for osd in teuthology.roles_of_type(roles_for_host, 'osd'): + tmpfs = '/mnt/osd.%s' % osd + roles_to_journals[osd] = tmpfs + remote.run(args=['truncate', '-s', '1500M', tmpfs]) + log.info('journal map: %s', roles_to_journals) + + log.info('dev map: %s' % (str(roles_to_devs),)) + remote_to_roles_to_devs[remote] = roles_to_devs + remote_to_roles_to_journals[remote] = roles_to_journals + + log.info('Generating config...') + remotes_and_roles = ctx.cluster.remotes.items() + roles = [role_list for (remote, role_list) in remotes_and_roles] + ips = [host for (host, port) in + (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] + conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips) + for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): + for role, journal in roles_to_journals.iteritems(): + key = "osd." + str(role) + if key not in conf: + conf[key] = {} + conf[key]['osd journal'] = journal + for section, keys in config['conf'].iteritems(): + for key, value in keys.iteritems(): + log.info("[%s] %s = %s" % (section, key, value)) + if section not in conf: + conf[section] = {} + conf[section][key] = value + + if config.get('tmpfs_journal'): + conf['journal dio'] = False + + ctx.ceph = argparse.Namespace() + ctx.ceph.conf = conf + + keyring_path = config.get('keyring_path', '/etc/ceph/ceph.keyring') + + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + + firstmon = teuthology.get_first_mon(ctx, config) + + log.info('Setting up %s...' % firstmon) + ctx.cluster.only(firstmon).run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--create-keyring', + keyring_path, + ], + ) + ctx.cluster.only(firstmon).run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--gen-key', + '--name=mon.', + keyring_path, + ], + ) + ctx.cluster.only(firstmon).run( + args=[ + 'sudo', + 'chmod', + '0644', + keyring_path, + ], + ) + (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() + fsid = teuthology.create_simple_monmap( + ctx, + remote=mon0_remote, + conf=conf, + ) + if not 'global' in conf: + conf['global'] = {} + conf['global']['fsid'] = fsid + + log.info('Writing ceph.conf for FSID %s...' % fsid) + conf_path = config.get('conf_path', DEFAULT_CONF_PATH) + write_conf(ctx, conf_path) + + log.info('Creating admin key on %s...' % firstmon) + ctx.cluster.only(firstmon).run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--gen-key', + '--name=client.admin', + '--set-uid=0', + '--cap', 'mon', 'allow *', + '--cap', 'osd', 'allow *', + '--cap', 'mds', 'allow *', + keyring_path, + ], + ) + + log.info('Copying monmap to all nodes...') + keyring = teuthology.get_file( + remote=mon0_remote, + path=keyring_path, + ) + monmap = teuthology.get_file( + remote=mon0_remote, + path='{tdir}/monmap'.format(tdir=testdir), + ) + + for rem in ctx.cluster.remotes.iterkeys(): + # copy mon key and initial monmap + log.info('Sending monmap to node {remote}'.format(remote=rem)) + teuthology.sudo_write_file( + remote=rem, + path=keyring_path, + data=keyring, + perms='0644' + ) + teuthology.write_file( + remote=rem, + path='{tdir}/monmap'.format(tdir=testdir), + data=monmap, + ) + + log.info('Setting up mon nodes...') + mons = ctx.cluster.only(teuthology.is_type('mon')) + run.wait( + mons.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'osdmaptool', + '-c', conf_path, + '--clobber', + '--createsimple', '{num:d}'.format( + num=teuthology.num_instances_of_type(ctx.cluster, 'osd'), + ), + '{tdir}/osdmap'.format(tdir=testdir), + '--pg_bits', '2', + '--pgp_bits', '4', + ], + wait=False, + ), + ) + + log.info('Setting up mds nodes...') + mdss = ctx.cluster.only(teuthology.is_type('mds')) + for remote, roles_for_host in mdss.remotes.iteritems(): + for id_ in teuthology.roles_of_type(roles_for_host, 'mds'): + remote.run( + args=[ + 'sudo', + 'mkdir', + '-p', + '/var/lib/ceph/mds/ceph-{id}'.format(id=id_), + run.Raw('&&'), + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--create-keyring', + '--gen-key', + '--name=mds.{id}'.format(id=id_), + '/var/lib/ceph/mds/ceph-{id}/keyring'.format(id=id_), + ], + ) + + cclient.create_keyring(ctx) + log.info('Running mkfs on osd nodes...') + + ctx.disk_config = argparse.Namespace() + ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs + ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals + ctx.disk_config.remote_to_roles_to_dev_mount_options = {} + ctx.disk_config.remote_to_roles_to_dev_fstype = {} + + log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) + for remote, roles_for_host in osds.remotes.iteritems(): + roles_to_devs = remote_to_roles_to_devs[remote] + roles_to_journals = remote_to_roles_to_journals[remote] + + for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): + remote.run( + args=[ + 'sudo', + 'mkdir', + '-p', + '/var/lib/ceph/osd/ceph-{id}'.format(id=id_), + ]) + log.info(str(roles_to_journals)) + log.info(id_) + if roles_to_devs.get(id_): + dev = roles_to_devs[id_] + fs = config.get('fs') + package = None + mkfs_options = config.get('mkfs_options') + mount_options = config.get('mount_options') + if fs == 'btrfs': + # package = 'btrfs-tools' + if mount_options is None: + mount_options = ['noatime', 'user_subvol_rm_allowed'] + if mkfs_options is None: + mkfs_options = ['-m', 'single', + '-l', '32768', + '-n', '32768'] + if fs == 'xfs': + # package = 'xfsprogs' + if mount_options is None: + mount_options = ['noatime'] + if mkfs_options is None: + mkfs_options = ['-f', '-i', 'size=2048'] + if fs == 'ext4' or fs == 'ext3': + if mount_options is None: + mount_options = ['noatime', 'user_xattr'] + + if mount_options is None: + mount_options = [] + if mkfs_options is None: + mkfs_options = [] + mkfs = ['mkfs.%s' % fs] + mkfs_options + log.info('%s on %s on %s' % (mkfs, dev, remote)) + if package is not None: + remote.run( + args=[ + 'sudo', + 'apt-get', 'install', '-y', package + ], + stdout=StringIO(), + ) + + try: + remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) + except run.CommandFailedError: + # Newer btfs-tools doesn't prompt for overwrite, use -f + if '-f' not in mount_options: + mkfs_options.append('-f') + mkfs = ['mkfs.%s' % fs] + mkfs_options + log.info('%s on %s on %s' % (mkfs, dev, remote)) + remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) + + log.info('mount %s on %s -o %s' % (dev, remote, + ','.join(mount_options))) + remote.run( + args=[ + 'sudo', + 'mount', + '-t', fs, + '-o', ','.join(mount_options), + dev, + os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)), + ] + ) + remote.run( + args=[ + 'sudo', '/sbin/restorecon', mnt_point, + ], + check_status=False, + ) + if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: + ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} + ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][id_] = mount_options + if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: + ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} + ctx.disk_config.remote_to_roles_to_dev_fstype[remote][id_] = fs + devs_to_clean[remote].append( + os.path.join( + os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)), + ) + ) + + for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): + remote.run( + args=[ + 'sudo', + 'MALLOC_CHECK_=3', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-osd', + '--mkfs', + '--mkkey', + '-i', id_, + '--monmap', '{tdir}/monmap'.format(tdir=testdir), + ], + ) + + log.info('Reading keys from all nodes...') + keys_fp = StringIO() + keys = [] + for remote, roles_for_host in ctx.cluster.remotes.iteritems(): + for type_ in ['mds', 'osd']: + for id_ in teuthology.roles_of_type(roles_for_host, type_): + data = teuthology.get_file( + remote=remote, + path='/var/lib/ceph/{type}/ceph-{id}/keyring'.format( + type=type_, + id=id_, + ), + sudo=True, + ) + keys.append((type_, id_, data)) + keys_fp.write(data) + for remote, roles_for_host in ctx.cluster.remotes.iteritems(): + for type_ in ['client']: + for id_ in teuthology.roles_of_type(roles_for_host, type_): + data = teuthology.get_file( + remote=remote, + path='/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) + ) + keys.append((type_, id_, data)) + keys_fp.write(data) + + log.info('Adding keys to all mons...') + writes = mons.run( + args=[ + 'sudo', 'tee', '-a', + keyring_path, + ], + stdin=run.PIPE, + wait=False, + stdout=StringIO(), + ) + keys_fp.seek(0) + teuthology.feed_many_stdins_and_close(keys_fp, writes) + run.wait(writes) + for type_, id_, data in keys: + run.wait( + mons.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + keyring_path, + '--name={type}.{id}'.format( + type=type_, + id=id_, + ), + ] + list(generate_caps(type_)), + wait=False, + ), + ) + + log.info('Running mkfs on mon nodes...') + for remote, roles_for_host in mons.remotes.iteritems(): + for id_ in teuthology.roles_of_type(roles_for_host, 'mon'): + remote.run( + args=[ + 'sudo', + 'mkdir', + '-p', + '/var/lib/ceph/mon/ceph-{id}'.format(id=id_), + ], + ) + remote.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-mon', + '--mkfs', + '-i', id_, + '--monmap={tdir}/monmap'.format(tdir=testdir), + '--osdmap={tdir}/osdmap'.format(tdir=testdir), + '--keyring={kpath}'.format(kpath=keyring_path), + ], + ) + + run.wait( + mons.run( + args=[ + 'rm', + '--', + '{tdir}/monmap'.format(tdir=testdir), + '{tdir}/osdmap'.format(tdir=testdir), + ], + wait=False, + ), + ) + + try: + yield + except Exception: + # we need to know this below + ctx.summary['success'] = False + raise + finally: + (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() + + log.info('Checking cluster log for badness...') + + def first_in_ceph_log(pattern, excludes): + """ + Find the first occurence of the pattern specified in the Ceph log, + Returns None if none found. + + :param pattern: Pattern scanned for. + :param excludes: Patterns to ignore. + :return: First line of text (or None if not found) + """ + args = [ + 'sudo', + 'egrep', pattern, + '/var/log/ceph/ceph.log', + ] + for exclude in excludes: + args.extend([run.Raw('|'), 'egrep', '-v', exclude]) + args.extend([ + run.Raw('|'), 'head', '-n', '1', + ]) + r = mon0_remote.run( + stdout=StringIO(), + args=args, + ) + stdout = r.stdout.getvalue() + if stdout != '': + return stdout + return None + + if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', + config['log_whitelist']) is not None: + log.warning('Found errors (ERR|WRN|SEC) in cluster log') + ctx.summary['success'] = False + # use the most severe problem as the failure reason + if 'failure_reason' not in ctx.summary: + for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: + match = first_in_ceph_log(pattern, config['log_whitelist']) + if match is not None: + ctx.summary['failure_reason'] = \ + '"{match}" in cluster log'.format( + match=match.rstrip('\n'), + ) + break + + for remote, dirs in devs_to_clean.iteritems(): + for dir_ in dirs: + log.info('Unmounting %s on %s' % (dir_, remote)) + try: + remote.run( + args=[ + 'sync', + run.Raw('&&'), + 'sudo', + 'umount', + '-f', + dir_ + ] + ) + except Exception as e: + remote.run(args=[ + 'sudo', + run.Raw('PATH=/usr/sbin:$PATH'), + 'lsof', + run.Raw(';'), + 'ps', 'auxf', + ]) + raise e + + if config.get('tmpfs_journal'): + log.info('tmpfs journal enabled - unmounting tmpfs at /mnt') + for remote, roles_for_host in osds.remotes.iteritems(): + remote.run( + args=['sudo', 'umount', '-f', '/mnt'], + check_status=False, + ) + + if ctx.archive is not None and \ + not (ctx.config.get('archive-on-error') and ctx.summary['success']): + + # archive mon data, too + log.info('Archiving mon data...') + path = os.path.join(ctx.archive, 'data') + os.makedirs(path) + for remote, roles in mons.remotes.iteritems(): + for role in roles: + if role.startswith('mon.'): + teuthology.pull_directory_tarball( + remote, + '/var/lib/ceph/mon', + path + '/' + role + '.tgz') + + log.info('Cleaning ceph cluster...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'rm', + '-rf', + '--', + conf_path, + keyring_path, + '{tdir}/data'.format(tdir=testdir), + '{tdir}/monmap'.format(tdir=testdir), + ], + wait=False, + ), + ) + + +def get_all_pg_info(rem_site, testdir): + """ + Get the results of a ceph pg dump + """ + info = rem_site.run(args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'ceph', 'pg', 'dump', + '--format', 'json'], stdout=StringIO()) + all_info = json.loads(info.stdout.getvalue()) + return all_info['pg_stats'] + + +def osd_scrub_pgs(ctx, config): + """ + Scrub pgs when we exit. + + First make sure all pgs are active and clean. + Next scrub all osds. + Then periodically check until all pgs have scrub time stamps that + indicate the last scrub completed. Time out if no progess is made + here after two minutes. + """ + retries = 12 + delays = 10 + vlist = ctx.cluster.remotes.values() + testdir = teuthology.get_testdir(ctx) + rem_site = ctx.cluster.remotes.keys()[0] + all_clean = False + for _ in range(0, retries): + stats = get_all_pg_info(rem_site, testdir) + states = [stat['state'] for stat in stats] + if len(set(states)) == 1 and states[0] == 'active+clean': + all_clean = True + break + log.info("Waiting for all osds to be active and clean.") + time.sleep(delays) + if not all_clean: + log.info("Scrubbing terminated -- not all pgs were active and clean.") + return + check_time_now = time.localtime() + time.sleep(1) + for slists in vlist: + for role in slists: + if role.startswith('osd.'): + log.info("Scrubbing osd {osd}".format(osd=role)) + rem_site.run(args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'ceph', 'osd', 'deep-scrub', role]) + prev_good = 0 + gap_cnt = 0 + loop = True + while loop: + stats = get_all_pg_info(rem_site, testdir) + timez = [stat['last_scrub_stamp'] for stat in stats] + loop = False + thiscnt = 0 + for tmval in timez: + pgtm = time.strptime(tmval[0:tmval.find('.')], '%Y-%m-%d %H:%M:%S') + if pgtm > check_time_now: + thiscnt += 1 + else: + loop = True + if thiscnt > prev_good: + prev_good = thiscnt + gap_cnt = 0 + else: + gap_cnt += 1 + if gap_cnt > retries: + log.info('Exiting scrub checking -- not all pgs scrubbed.') + return + if loop: + log.info('Still waiting for all pgs to be scrubbed.') + time.sleep(delays) + + +@contextlib.contextmanager +def run_daemon(ctx, config, type_): + """ + Run daemons for a role type. Handle the startup and termination of a a daemon. + On startup -- set coverages, cpu_profile, valgrind values for all remotes, + and a max_mds value for one mds. + On cleanup -- Stop all existing daemons of this type. + + :param ctx: Context + :param config: Configuration + :paran type_: Role type + """ + log.info('Starting %s daemons...' % type_) + testdir = teuthology.get_testdir(ctx) + daemons = ctx.cluster.only(teuthology.is_type(type_)) + + # check whether any daemons if this type are configured + if daemons is None: + return + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + + daemon_signal = 'kill' + if config.get('coverage') or config.get('valgrind') is not None: + daemon_signal = 'term' + + for remote, roles_for_host in daemons.remotes.iteritems(): + for id_ in teuthology.roles_of_type(roles_for_host, type_): + name = '%s.%s' % (type_, id_) + + run_cmd = [ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'daemon-helper', + daemon_signal, + ] + run_cmd_tail = [ + 'ceph-%s' % (type_), + '-f', + '-i', id_] + + if type_ in config.get('cpu_profile', []): + profile_path = '/var/log/ceph/profiling-logger/%s.%s.prof' % (type_, id_) + run_cmd.extend(['env', 'CPUPROFILE=%s' % profile_path]) + + if config.get('valgrind') is not None: + valgrind_args = None + if type_ in config['valgrind']: + valgrind_args = config['valgrind'][type_] + if name in config['valgrind']: + valgrind_args = config['valgrind'][name] + run_cmd = teuthology.get_valgrind_args(testdir, name, + run_cmd, + valgrind_args) + + run_cmd.extend(run_cmd_tail) + + ctx.daemons.add_daemon(remote, type_, id_, + args=run_cmd, + logger=log.getChild(name), + stdin=run.PIPE, + wait=False, + ) + + try: + yield + finally: + teuthology.stop_daemons_of_type(ctx, type_) + + +def healthy(ctx, config): + """ + Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK. + + :param ctx: Context + :param config: Configuration + """ + log.info('Waiting until ceph is healthy...') + firstmon = teuthology.get_first_mon(ctx, config) + (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() + teuthology.wait_until_osds_up( + ctx, + cluster=ctx.cluster, + remote=mon0_remote + ) + teuthology.wait_until_healthy( + ctx, + remote=mon0_remote, + ) + + +def wait_for_osds_up(ctx, config): + """ + Wait for all osd's to come up. + + :param ctx: Context + :param config: Configuration + """ + log.info('Waiting until ceph osds are all up...') + firstmon = teuthology.get_first_mon(ctx, config) + (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() + teuthology.wait_until_osds_up( + ctx, + cluster=ctx.cluster, + remote=mon0_remote + ) + + +def wait_for_mon_quorum(ctx, config): + """ + Check renote ceph status until all monitors are up. + + :param ctx: Context + :param config: Configuration + """ + + assert isinstance(config, list) + firstmon = teuthology.get_first_mon(ctx, config) + (remote,) = ctx.cluster.only(firstmon).remotes.keys() + while True: + r = remote.run( + args=[ + 'ceph', + 'quorum_status', + ], + stdout=StringIO(), + logger=log.getChild('quorum_status'), + ) + j = json.loads(r.stdout.getvalue()) + q = j.get('quorum_names', []) + log.debug('Quorum: %s', q) + if sorted(q) == sorted(config): + break + time.sleep(1) + + +def created_pool(ctx, config): + """ + Add new pools to the dictionary of pools that the ceph-manager + knows about. + """ + for new_pool in config: + if new_pool not in ctx.manager.pools: + ctx.manager.pools[new_pool] = ctx.manager.get_pool_property( + new_pool, 'pg_num') + + +@contextlib.contextmanager +def restart(ctx, config): + """ + restart ceph daemons + + For example:: + tasks: + - ceph.restart: [all] + + For example:: + tasks: + - ceph.restart: [osd.0, mon.1, mds.*] + + or:: + + tasks: + - ceph.restart: + daemons: [osd.0, mon.1] + wait-for-healthy: false + wait-for-osds-up: true + + :param ctx: Context + :param config: Configuration + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES) + for i in daemons: + type_ = i.split('.', 1)[0] + id_ = i.split('.', 1)[1] + ctx.daemons.get_daemon(type_, id_).restart() + + if config.get('wait-for-healthy', True): + healthy(ctx=ctx, config=None) + if config.get('wait-for-osds-up', False): + wait_for_osds_up(ctx=ctx, config=None) + yield + + +@contextlib.contextmanager +def stop(ctx, config): + """ + Stop ceph daemons + + For example:: + tasks: + - ceph.stop: [mds.*] + + tasks: + - ceph.stop: [osd.0, osd.2] + + tasks: + - ceph.stop: + daemons: [osd.0, osd.2] + + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES) + for i in daemons: + type_ = i.split('.', 1)[0] + id_ = i.split('.', 1)[1] + ctx.daemons.get_daemon(type_, id_).stop() + + yield + + +@contextlib.contextmanager +def wait_for_failure(ctx, config): + """ + Wait for a failure of a ceph daemon + + For example:: + tasks: + - ceph.wait_for_failure: [mds.*] + + tasks: + - ceph.wait_for_failure: [osd.0, osd.2] + + tasks: + - ceph.wait_for_failure: + daemons: [osd.0, osd.2] + + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES) + for i in daemons: + type_ = i.split('.', 1)[0] + id_ = i.split('.', 1)[1] + try: + ctx.daemons.get_daemon(type_, id_).wait() + except: + log.info('Saw expected daemon failure. Continuing.') + pass + else: + raise RuntimeError('daemon %s did not fail' % i) + + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Set up and tear down a Ceph cluster. + + For example:: + + tasks: + - ceph: + - interactive: + + You can also specify what branch to run:: + + tasks: + - ceph: + branch: foo + + Or a tag:: + + tasks: + - ceph: + tag: v0.42.13 + + Or a sha1:: + + tasks: + - ceph: + sha1: 1376a5ab0c89780eab39ffbbe436f6a6092314ed + + Or a local source dir:: + + tasks: + - ceph: + path: /home/sage/ceph + + To capture code coverage data, use:: + + tasks: + - ceph: + coverage: true + + To use btrfs, ext4, or xfs on the target's scratch disks, use:: + + tasks: + - ceph: + fs: xfs + mkfs_options: [-b,size=65536,-l,logdev=/dev/sdc1] + mount_options: [nobarrier, inode64] + + Note, this will cause the task to check the /scratch_devs file on each node + for available devices. If no such file is found, /dev/sdb will be used. + + To run some daemons under valgrind, include their names + and the tool/args to use in a valgrind section:: + + tasks: + - ceph: + valgrind: + mds.1: --tool=memcheck + osd.1: [--tool=memcheck, --leak-check=no] + + Those nodes which are using memcheck or valgrind will get + checked for bad results. + + To adjust or modify config options, use:: + + tasks: + - ceph: + conf: + section: + key: value + + For example:: + + tasks: + - ceph: + conf: + mds.0: + some option: value + other key: other value + client.0: + debug client: 10 + debug ms: 1 + + By default, the cluster log is checked for errors and warnings, + and the run marked failed if any appear. You can ignore log + entries by giving a list of egrep compatible regexes, i.e.: + + tasks: + - ceph: + log-whitelist: ['foo.*bar', 'bad message'] + + :param ctx: Context + :param config: Configuration + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + "task ceph only supports a dictionary for configuration" + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph', {})) + + ctx.daemons = DaemonGroup() + + testdir = teuthology.get_testdir(ctx) + if config.get('coverage'): + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + log.info('Creating coverage directory...') + run.wait( + ctx.cluster.run( + args=[ + 'install', '-d', '-m0755', '--', + coverage_dir, + ], + wait=False, + ) + ) + + with contextutil.nested( + lambda: ceph_log(ctx=ctx, config=None), + lambda: valgrind_post(ctx=ctx, config=config), + lambda: cluster(ctx=ctx, config=dict( + conf=config.get('conf', {}), + fs=config.get('fs', None), + mkfs_options=config.get('mkfs_options', None), + mount_options=config.get('mount_options', None), + block_journal=config.get('block_journal', None), + tmpfs_journal=config.get('tmpfs_journal', None), + log_whitelist=config.get('log-whitelist', []), + cpu_profile=set(config.get('cpu_profile', [])), + )), + lambda: run_daemon(ctx=ctx, config=config, type_='mon'), + lambda: crush_setup(ctx=ctx, config=config), + lambda: run_daemon(ctx=ctx, config=config, type_='osd'), + lambda: cephfs_setup(ctx=ctx, config=config), + lambda: run_daemon(ctx=ctx, config=config, type_='mds'), + ): + try: + if config.get('wait-for-healthy', True): + healthy(ctx=ctx, config=None) + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + ctx.manager = CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + yield + finally: + if config.get('wait-for-scrub', True): + osd_scrub_pgs(ctx, config) diff --git a/qa/tasks/ceph_client.py b/qa/tasks/ceph_client.py new file mode 100644 index 00000000000..d7cfd00be3e --- /dev/null +++ b/qa/tasks/ceph_client.py @@ -0,0 +1,40 @@ +""" +Set up client keyring +""" +import logging + +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def create_keyring(ctx): + """ + Set up key ring on remote sites + """ + log.info('Setting up client nodes...') + clients = ctx.cluster.only(teuthology.is_type('client')) + testdir = teuthology.get_testdir(ctx) + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + for remote, roles_for_host in clients.remotes.iteritems(): + for id_ in teuthology.roles_of_type(roles_for_host, 'client'): + client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) + remote.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--create-keyring', + '--gen-key', + # TODO this --name= is not really obeyed, all unknown "types" are munged to "client" + '--name=client.{id}'.format(id=id_), + client_keyring, + run.Raw('&&'), + 'sudo', + 'chmod', + '0644', + client_keyring, + ], + ) diff --git a/qa/tasks/ceph_deploy.py b/qa/tasks/ceph_deploy.py new file mode 100644 index 00000000000..da9f0b713f9 --- /dev/null +++ b/qa/tasks/ceph_deploy.py @@ -0,0 +1,510 @@ +""" +Execute ceph-deploy as a task +""" +from cStringIO import StringIO + +import contextlib +import os +import time +import logging +import traceback + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.task import install as install_fn +from teuthology.orchestra import run +from tasks.cephfs.filesystem import Filesystem + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def download_ceph_deploy(ctx, config): + """ + Downloads ceph-deploy from the ceph.com git mirror and (by default) + switches to the master branch. If the `ceph-deploy-branch` is specified, it + will use that instead. + """ + log.info('Downloading ceph-deploy...') + testdir = teuthology.get_testdir(ctx) + ceph_admin = ctx.cluster.only(teuthology.get_first_mon(ctx, config)) + ceph_deploy_branch = config.get('ceph-deploy-branch', 'master') + + ceph_admin.run( + args=[ + 'git', 'clone', '-b', ceph_deploy_branch, + teuth_config.ceph_git_base_url + 'ceph-deploy.git', + '{tdir}/ceph-deploy'.format(tdir=testdir), + ], + ) + ceph_admin.run( + args=[ + 'cd', + '{tdir}/ceph-deploy'.format(tdir=testdir), + run.Raw('&&'), + './bootstrap', + ], + ) + + try: + yield + finally: + log.info('Removing ceph-deploy ...') + ceph_admin.run( + args=[ + 'rm', + '-rf', + '{tdir}/ceph-deploy'.format(tdir=testdir), + ], + ) + + +def is_healthy(ctx, config): + """Wait until a Ceph cluster is healthy.""" + testdir = teuthology.get_testdir(ctx) + ceph_admin = teuthology.get_first_mon(ctx, config) + (remote,) = ctx.cluster.only(ceph_admin).remotes.keys() + max_tries = 90 # 90 tries * 10 secs --> 15 minutes + tries = 0 + while True: + tries += 1 + if tries >= max_tries: + msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes" + raise RuntimeError(msg) + + r = remote.run( + args=[ + 'cd', + '{tdir}'.format(tdir=testdir), + run.Raw('&&'), + 'sudo', 'ceph', + 'health', + ], + stdout=StringIO(), + logger=log.getChild('health'), + ) + out = r.stdout.getvalue() + log.info('Ceph health: %s', out.rstrip('\n')) + if out.split(None, 1)[0] == 'HEALTH_OK': + break + time.sleep(10) + + +def get_nodes_using_role(ctx, target_role): + """ + Extract the names of nodes that match a given role from a cluster, and modify the + cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy + uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23". + """ + + # Nodes containing a service of the specified role + nodes_of_interest = [] + + # Prepare a modified version of cluster.remotes with ceph-deploy-ized names + modified_remotes = {} + + for _remote, roles_for_host in ctx.cluster.remotes.iteritems(): + modified_remotes[_remote] = [] + for svc_id in roles_for_host: + if svc_id.startswith("{0}.".format(target_role)): + fqdn = str(_remote).split('@')[-1] + nodename = str(str(_remote).split('.')[0]).split('@')[1] + if target_role == 'mon': + nodes_of_interest.append(fqdn) + else: + nodes_of_interest.append(nodename) + + modified_remotes[_remote].append("{0}.{1}".format(target_role, nodename)) + else: + modified_remotes[_remote].append(svc_id) + + ctx.cluster.remotes = modified_remotes + + return nodes_of_interest + +def get_dev_for_osd(ctx, config): + """Get a list of all osd device names.""" + osd_devs = [] + for remote, roles_for_host in ctx.cluster.remotes.iteritems(): + host = remote.name.split('@')[-1] + shortname = host.split('.')[0] + devs = teuthology.get_scratch_devices(remote) + num_osd_per_host = list(teuthology.roles_of_type(roles_for_host, 'osd')) + num_osds = len(num_osd_per_host) + if config.get('separate_journal_disk') is not None: + num_devs_reqd = 2 * num_osds + assert num_devs_reqd <= len(devs), 'fewer data and journal disks than required ' + shortname + for dindex in range(0,num_devs_reqd,2): + jd_index = dindex + 1 + dev_short = devs[dindex].split('/')[-1] + jdev_short = devs[jd_index].split('/')[-1] + osd_devs.append((shortname, dev_short, jdev_short)) + else: + assert num_osds <= len(devs), 'fewer disks than osds ' + shortname + for dev in devs[:num_osds]: + dev_short = dev.split('/')[-1] + osd_devs.append((shortname, dev_short)) + return osd_devs + +def get_all_nodes(ctx, config): + """Return a string of node names separated by blanks""" + nodelist = [] + for t, k in ctx.config['targets'].iteritems(): + host = t.split('@')[-1] + simple_host = host.split('.')[0] + nodelist.append(simple_host) + nodelist = " ".join(nodelist) + return nodelist + +@contextlib.contextmanager +def build_ceph_cluster(ctx, config): + """Build a ceph cluster""" + + # Expect to find ceph_admin on the first mon by ID, same place that the download task + # puts it. Remember this here, because subsequently IDs will change from those in + # the test config to those that ceph-deploy invents. + (ceph_admin,) = ctx.cluster.only(teuthology.get_first_mon(ctx, config)).remotes.iterkeys() + + def execute_ceph_deploy(cmd): + """Remotely execute a ceph_deploy command""" + return ceph_admin.run( + args=[ + 'cd', + '{tdir}/ceph-deploy'.format(tdir=testdir), + run.Raw('&&'), + run.Raw(cmd), + ], + check_status=False, + ).exitstatus + + try: + log.info('Building ceph cluster using ceph-deploy...') + testdir = teuthology.get_testdir(ctx) + ceph_branch = None + if config.get('branch') is not None: + cbranch = config.get('branch') + for var, val in cbranch.iteritems(): + ceph_branch = '--{var}={val}'.format(var=var, val=val) + + ceph_sha = ctx.config['sha1'] + devcommit = '--dev-commit={sha}'.format(sha=ceph_sha) + if ceph_branch: + option = ceph_branch + else: + option = devcommit + all_nodes = get_all_nodes(ctx, config) + mds_nodes = get_nodes_using_role(ctx, 'mds') + mds_nodes = " ".join(mds_nodes) + mon_node = get_nodes_using_role(ctx, 'mon') + mon_nodes = " ".join(mon_node) + new_mon = './ceph-deploy new'+" "+mon_nodes + install_nodes = './ceph-deploy install ' + option + " " + all_nodes + mon_hostname = mon_nodes.split(' ')[0] + mon_hostname = str(mon_hostname) + gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname + deploy_mds = './ceph-deploy mds create'+" "+mds_nodes + no_of_osds = 0 + estatus_install = execute_ceph_deploy(install_nodes) + if estatus_install != 0: + raise RuntimeError("ceph-deploy: Failed to install ceph") + # install ceph-test package too + install_nodes2 = './ceph-deploy install --tests ' + option + \ + " " + all_nodes + estatus_install = execute_ceph_deploy(install_nodes2) + if estatus_install != 0: + raise RuntimeError("ceph-deploy: Failed to install ceph-test") + + if mon_nodes is None: + raise RuntimeError("no monitor nodes in the config file") + + estatus_new = execute_ceph_deploy(new_mon) + if estatus_new != 0: + raise RuntimeError("ceph-deploy: new command failed") + + log.info('adding config inputs...') + testdir = teuthology.get_testdir(ctx) + conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) + + if config.get('conf') is not None: + confp = config.get('conf') + for section, keys in confp.iteritems(): + lines = '[{section}]\n'.format(section=section) + teuthology.append_lines_to_file(ceph_admin, conf_path, lines, + sudo=True) + for key, value in keys.iteritems(): + log.info("[%s] %s = %s" % (section, key, value)) + lines = '{key} = {value}\n'.format(key=key, value=value) + teuthology.append_lines_to_file(ceph_admin, conf_path, lines, + sudo=True) + + mon_create_nodes = './ceph-deploy mon create-initial' + # If the following fails, it is OK, it might just be that the monitors + # are taking way more than a minute/monitor to form quorum, so lets + # try the next block which will wait up to 15 minutes to gatherkeys. + execute_ceph_deploy(mon_create_nodes) + + estatus_gather = execute_ceph_deploy(gather_keys) + max_gather_tries = 90 + gather_tries = 0 + while (estatus_gather != 0): + gather_tries += 1 + if gather_tries >= max_gather_tries: + msg = 'ceph-deploy was not able to gatherkeys after 15 minutes' + raise RuntimeError(msg) + estatus_gather = execute_ceph_deploy(gather_keys) + time.sleep(10) + + if mds_nodes: + estatus_mds = execute_ceph_deploy(deploy_mds) + if estatus_mds != 0: + raise RuntimeError("ceph-deploy: Failed to deploy mds") + + if config.get('test_mon_destroy') is not None: + for d in range(1, len(mon_node)): + mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d] + estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) + if estatus_mon_d != 0: + raise RuntimeError("ceph-deploy: Failed to delete monitor") + + node_dev_list = get_dev_for_osd(ctx, config) + for d in node_dev_list: + node = d[0] + for disk in d[1:]: + zap = './ceph-deploy disk zap ' + node + ':' + disk + estatus = execute_ceph_deploy(zap) + if estatus != 0: + raise RuntimeError("ceph-deploy: Failed to zap osds") + osd_create_cmd = './ceph-deploy osd create ' + if config.get('dmcrypt') is not None: + osd_create_cmd += '--dmcrypt ' + osd_create_cmd += ":".join(d) + estatus_osd = execute_ceph_deploy(osd_create_cmd) + if estatus_osd == 0: + log.info('successfully created osd') + no_of_osds += 1 + else: + raise RuntimeError("ceph-deploy: Failed to create osds") + + if config.get('wait-for-healthy', True) and no_of_osds >= 2: + is_healthy(ctx=ctx, config=None) + + log.info('Setting up client nodes...') + conf_path = '/etc/ceph/ceph.conf' + admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' + first_mon = teuthology.get_first_mon(ctx, config) + (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() + conf_data = teuthology.get_file( + remote=mon0_remote, + path=conf_path, + sudo=True, + ) + admin_keyring = teuthology.get_file( + remote=mon0_remote, + path=admin_keyring_path, + sudo=True, + ) + + clients = ctx.cluster.only(teuthology.is_type('client')) + for remot, roles_for_host in clients.remotes.iteritems(): + for id_ in teuthology.roles_of_type(roles_for_host, 'client'): + client_keyring = \ + '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) + mon0_remote.run( + args=[ + 'cd', + '{tdir}'.format(tdir=testdir), + run.Raw('&&'), + 'sudo', 'bash', '-c', + run.Raw('"'), 'ceph', + 'auth', + 'get-or-create', + 'client.{id}'.format(id=id_), + 'mds', 'allow', + 'mon', 'allow *', + 'osd', 'allow *', + run.Raw('>'), + client_keyring, + run.Raw('"'), + ], + ) + key_data = teuthology.get_file( + remote=mon0_remote, + path=client_keyring, + sudo=True, + ) + teuthology.sudo_write_file( + remote=remot, + path=client_keyring, + data=key_data, + perms='0644' + ) + teuthology.sudo_write_file( + remote=remot, + path=admin_keyring_path, + data=admin_keyring, + perms='0644' + ) + teuthology.sudo_write_file( + remote=remot, + path=conf_path, + data=conf_data, + perms='0644' + ) + + log.info('Configuring CephFS...') + ceph_fs = Filesystem(ctx, admin_remote=clients.remotes.keys()[0]) + if not ceph_fs.legacy_configured(): + ceph_fs.create() + else: + raise RuntimeError( + "The cluster is NOT operational due to insufficient OSDs") + yield + + except Exception: + log.info("Error encountered, logging exception before tearing down ceph-deploy") + log.info(traceback.format_exc()) + raise + finally: + log.info('Stopping ceph...') + ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), + 'sudo', 'service', 'ceph', 'stop' ]) + + # Are you really not running anymore? + # try first with the init tooling + # ignoring the status so this becomes informational only + ctx.cluster.run(args=['sudo', 'status', 'ceph-all', run.Raw('||'), + 'sudo', 'service', 'ceph', 'status'], + check_status=False) + + # and now just check for the processes themselves, as if upstart/sysvinit + # is lying to us. Ignore errors if the grep fails + ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), + 'grep', '-v', 'grep', run.Raw('|'), + 'grep', 'ceph'], check_status=False) + + if ctx.archive is not None: + # archive mon data, too + log.info('Archiving mon data...') + path = os.path.join(ctx.archive, 'data') + os.makedirs(path) + mons = ctx.cluster.only(teuthology.is_type('mon')) + for remote, roles in mons.remotes.iteritems(): + for role in roles: + if role.startswith('mon.'): + teuthology.pull_directory_tarball( + remote, + '/var/lib/ceph/mon', + path + '/' + role + '.tgz') + + log.info('Compressing logs...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'find', + '/var/log/ceph', + '-name', + '*.log', + '-print0', + run.Raw('|'), + 'sudo', + 'xargs', + '-0', + '--no-run-if-empty', + '--', + 'gzip', + '--', + ], + wait=False, + ), + ) + + log.info('Archiving logs...') + path = os.path.join(ctx.archive, 'remote') + os.makedirs(path) + for remote in ctx.cluster.remotes.iterkeys(): + sub = os.path.join(path, remote.shortname) + os.makedirs(sub) + teuthology.pull_directory(remote, '/var/log/ceph', + os.path.join(sub, 'log')) + + # Prevent these from being undefined if the try block fails + all_nodes = get_all_nodes(ctx, config) + purge_nodes = './ceph-deploy purge'+" "+all_nodes + purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes + + log.info('Purging package...') + execute_ceph_deploy(purge_nodes) + log.info('Purging data...') + execute_ceph_deploy(purgedata_nodes) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Set up and tear down a Ceph cluster. + + For example:: + + tasks: + - install: + extras: yes + - ssh_keys: + - ceph-deploy: + branch: + stable: bobtail + mon_initial_members: 1 + + tasks: + - install: + extras: yes + - ssh_keys: + - ceph-deploy: + branch: + dev: master + conf: + mon: + debug mon = 20 + + tasks: + - install: + extras: yes + - ssh_keys: + - ceph-deploy: + branch: + testing: + dmcrypt: yes + separate_journal_disk: yes + + """ + if config is None: + config = {} + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) + + assert isinstance(config, dict), \ + "task ceph-deploy only supports a dictionary for configuration" + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) + + if config.get('branch') is not None: + assert isinstance(config['branch'], dict), 'branch must be a dictionary' + + log.info('task ceph-deploy with config ' + str(config)) + + with contextutil.nested( + lambda: install_fn.ship_utilities(ctx=ctx, config=None), + lambda: download_ceph_deploy(ctx=ctx, config=config), + lambda: build_ceph_cluster(ctx=ctx, config=dict( + conf=config.get('conf', {}), + branch=config.get('branch',{}), + dmcrypt=config.get('dmcrypt',None), + separate_journal_disk=config.get('separate_journal_disk',None), + mon_initial_members=config.get('mon_initial_members', None), + test_mon_destroy=config.get('test_mon_destroy', None), + )), + ): + yield diff --git a/qa/tasks/ceph_fuse.py b/qa/tasks/ceph_fuse.py new file mode 100644 index 00000000000..78dafeda370 --- /dev/null +++ b/qa/tasks/ceph_fuse.py @@ -0,0 +1,144 @@ +""" +Ceph FUSE client task +""" + +import contextlib +import logging + +from teuthology import misc as teuthology +from cephfs.fuse_mount import FuseMount + +log = logging.getLogger(__name__) + + +def get_client_configs(ctx, config): + """ + Get a map of the configuration for each FUSE client in the configuration + by combining the configuration of the current task with any global overrides. + + :param ctx: Context instance + :param config: configuration for this task + :return: dict of client name to config or to None + """ + if config is None: + config = dict(('client.{id}'.format(id=id_), None) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')) + elif isinstance(config, list): + config = dict((name, None) for name in config) + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph-fuse', {})) + + return config + + +@contextlib.contextmanager +def task(ctx, config): + """ + Mount/unmount a ``ceph-fuse`` client. + + The config is optional and defaults to mounting on all clients. If + a config is given, it is expected to be a list of clients to do + this operation on. This lets you e.g. set up one client with + ``ceph-fuse`` and another with ``kclient``. + + Example that mounts all clients:: + + tasks: + - ceph: + - ceph-fuse: + - interactive: + + Example that uses both ``kclient` and ``ceph-fuse``:: + + tasks: + - ceph: + - ceph-fuse: [client.0] + - kclient: [client.1] + - interactive: + + Example that enables valgrind: + + tasks: + - ceph: + - ceph-fuse: + client.0: + valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] + - interactive: + + Example that stops an already-mounted client: + + :: + + tasks: + - ceph: + - ceph-fuse: [client.0] + - ... do something that requires the FS mounted ... + - ceph-fuse: + client.0: + mounted: false + - ... do something that requires the FS unmounted ... + + Example that adds more generous wait time for mount (for virtual machines): + + tasks: + - ceph: + - ceph-fuse: + client.0: + mount_wait: 60 # default is 0, do not wait before checking /sys/ + mount_timeout: 120 # default is 30, give up if /sys/ is not populated + - interactive: + + :param ctx: Context + :param config: Configuration + """ + log.info('Mounting ceph-fuse clients...') + + testdir = teuthology.get_testdir(ctx) + config = get_client_configs(ctx, config) + + # List clients we will configure mounts for, default is all clients + clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys())) + + all_mounts = getattr(ctx, 'mounts', {}) + mounted_by_me = {} + + # Construct any new FuseMount instances + for id_, remote in clients: + client_config = config.get("client.%s" % id_) + if client_config is None: + client_config = {} + + if id_ not in all_mounts: + fuse_mount = FuseMount(client_config, testdir, id_, remote) + all_mounts[id_] = fuse_mount + else: + # Catch bad configs where someone has e.g. tried to use ceph-fuse and kcephfs for the same client + assert isinstance(all_mounts[id_], FuseMount) + + if client_config.get('mounted', True): + mounted_by_me[id_] = all_mounts[id_] + + # Mount any clients we have been asked to (default to mount all) + for mount in mounted_by_me.values(): + mount.mount() + + for mount in mounted_by_me.values(): + mount.wait_until_mounted() + + # Umount any pre-existing clients that we have not been asked to mount + for client_id in set(all_mounts.keys()) - set(mounted_by_me.keys()): + mount = all_mounts[client_id] + if mount.is_mounted(): + mount.umount_wait() + + ctx.mounts = all_mounts + try: + yield all_mounts + finally: + log.info('Unmounting ceph-fuse clients...') + + for mount in mounted_by_me.values(): + # Conditional because an inner context might have umounted it + if mount.is_mounted(): + mount.umount_wait() diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py new file mode 100644 index 00000000000..d7f3c10afa6 --- /dev/null +++ b/qa/tasks/ceph_manager.py @@ -0,0 +1,1990 @@ +""" +ceph manager -- Thrasher and CephManager objects +""" +from cStringIO import StringIO +from functools import wraps +import contextlib +import random +import time +import gevent +import base64 +import json +import logging +import threading +import traceback +import os +from teuthology import misc as teuthology +from tasks.scrub import Scrubber +from util.rados import cmd_erasure_code_profile +from teuthology.orchestra.remote import Remote +from teuthology.orchestra import run + + +DEFAULT_CONF_PATH = '/etc/ceph/ceph.conf' + +log = logging.getLogger(__name__) + + +def write_conf(ctx, conf_path=DEFAULT_CONF_PATH): + conf_fp = StringIO() + ctx.ceph.conf.write(conf_fp) + conf_fp.seek(0) + writes = ctx.cluster.run( + args=[ + 'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'), + 'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'), + 'sudo', 'python', + '-c', + ('import shutil, sys; ' + 'shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))'), + conf_path, + run.Raw('&&'), + 'sudo', 'chmod', '0644', conf_path, + ], + stdin=run.PIPE, + wait=False) + teuthology.feed_many_stdins_and_close(conf_fp, writes) + run.wait(writes) + + +def make_admin_daemon_dir(ctx, remote): + """ + Create /var/run/ceph directory on remote site. + + :param ctx: Context + :param remote: Remote site + """ + remote.run(args=['sudo', + 'install', '-d', '-m0777', '--', '/var/run/ceph', ], ) + + +def mount_osd_data(ctx, remote, osd): + """ + Mount a remote OSD + + :param ctx: Context + :param remote: Remote site + :param ods: Osd name + """ + log.debug('Mounting data for osd.{o} on {r}'.format(o=osd, r=remote)) + if (remote in ctx.disk_config.remote_to_roles_to_dev and + osd in ctx.disk_config.remote_to_roles_to_dev[remote]): + dev = ctx.disk_config.remote_to_roles_to_dev[remote][osd] + mount_options = ctx.disk_config.\ + remote_to_roles_to_dev_mount_options[remote][osd] + fstype = ctx.disk_config.remote_to_roles_to_dev_fstype[remote][osd] + mnt = os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=osd)) + + log.info('Mounting osd.{o}: dev: {n}, ' + 'mountpoint: {p}, type: {t}, options: {v}'.format( + o=osd, n=remote.name, p=mnt, t=fstype, v=mount_options)) + + remote.run( + args=[ + 'sudo', + 'mount', + '-t', fstype, + '-o', ','.join(mount_options), + dev, + mnt, + ] + ) + + +class Thrasher: + """ + Object used to thrash Ceph + """ + def __init__(self, manager, config, logger=None): + self.ceph_manager = manager + self.ceph_manager.wait_for_clean() + osd_status = self.ceph_manager.get_osd_status() + self.in_osds = osd_status['in'] + self.live_osds = osd_status['live'] + self.out_osds = osd_status['out'] + self.dead_osds = osd_status['dead'] + self.stopping = False + self.logger = logger + self.config = config + self.revive_timeout = self.config.get("revive_timeout", 150) + if self.config.get('powercycle'): + self.revive_timeout += 120 + self.clean_wait = self.config.get('clean_wait', 0) + self.minin = self.config.get("min_in", 3) + self.chance_move_pg = self.config.get('chance_move_pg', 1.0) + self.dump_ops_enable = self.config.get('dump_ops_enable') + + num_osds = self.in_osds + self.out_osds + self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * num_osds + if self.logger is not None: + self.log = lambda x: self.logger.info(x) + else: + def tmp(x): + """ + Implement log behavior + """ + print x + self.log = tmp + if self.config is None: + self.config = dict() + # prevent monitor from auto-marking things out while thrasher runs + # try both old and new tell syntax, in case we are testing old code + try: + manager.raw_cluster_cmd('--', 'tell', 'mon.*', 'injectargs', + '--mon-osd-down-out-interval 0') + except Exception: + manager.raw_cluster_cmd('--', 'mon', 'tell', '*', 'injectargs', + '--mon-osd-down-out-interval 0') + self.thread = gevent.spawn(self.do_thrash) + if self.dump_ops_enable == "true": + self.dump_ops_thread = gevent.spawn(self.do_dump_ops) + if self.config.get('powercycle') or not self.cmd_exists_on_osds("ceph-objectstore-tool"): + self.ceph_objectstore_tool = False + self.test_rm_past_intervals = False + if self.config.get('powercycle'): + self.log("Unable to test ceph-objectstore-tool, " + "powercycle testing") + else: + self.log("Unable to test ceph-objectstore-tool, " + "not available on all OSD nodes") + else: + self.ceph_objectstore_tool = \ + self.config.get('ceph_objectstore_tool', True) + self.test_rm_past_intervals = \ + self.config.get('test_rm_past_intervals', True) + + def cmd_exists_on_osds(self, cmd): + allremotes = self.ceph_manager.ctx.cluster.only(\ + teuthology.is_type('osd')).remotes.keys() + allremotes = list(set(allremotes)) + for remote in allremotes: + proc = remote.run(args=['type', cmd], wait=True, + check_status=False, stdout=StringIO(), + stderr=StringIO()) + if proc.exitstatus != 0: + return False; + return True; + + def kill_osd(self, osd=None, mark_down=False, mark_out=False): + """ + :param osd: Osd to be killed. + :mark_down: Mark down if true. + :mark_out: Mark out if true. + """ + if osd is None: + osd = random.choice(self.live_osds) + self.log("Killing osd %s, live_osds are %s" % (str(osd), + str(self.live_osds))) + self.live_osds.remove(osd) + self.dead_osds.append(osd) + self.ceph_manager.kill_osd(osd) + if mark_down: + self.ceph_manager.mark_down_osd(osd) + if mark_out and osd in self.in_osds: + self.out_osd(osd) + if self.ceph_objectstore_tool: + self.log("Testing ceph-objectstore-tool on down osd") + (remote,) = self.ceph_manager.ctx.\ + cluster.only('osd.{o}'.format(o=osd)).remotes.iterkeys() + FSPATH = self.ceph_manager.get_filepath() + JPATH = os.path.join(FSPATH, "journal") + exp_osd = imp_osd = osd + exp_remote = imp_remote = remote + # If an older osd is available we'll move a pg from there + if (len(self.dead_osds) > 1 and + random.random() < self.chance_move_pg): + exp_osd = random.choice(self.dead_osds[:-1]) + (exp_remote,) = self.ceph_manager.ctx.\ + cluster.only('osd.{o}'.format(o=exp_osd)).\ + remotes.iterkeys() + if ('keyvaluestore_backend' in + self.ceph_manager.ctx.ceph.conf['osd']): + prefix = ("sudo adjust-ulimits ceph-objectstore-tool " + "--data-path {fpath} --journal-path {jpath} " + "--type keyvaluestore " + "--log-file=" + "/var/log/ceph/objectstore_tool.\\$pid.log ". + format(fpath=FSPATH, jpath=JPATH)) + else: + prefix = ("sudo adjust-ulimits ceph-objectstore-tool " + "--data-path {fpath} --journal-path {jpath} " + "--log-file=" + "/var/log/ceph/objectstore_tool.\\$pid.log ". + format(fpath=FSPATH, jpath=JPATH)) + cmd = (prefix + "--op list-pgs").format(id=exp_osd) + proc = exp_remote.run(args=cmd, wait=True, + check_status=False, stdout=StringIO()) + if proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "exp list-pgs failure with status {ret}". + format(ret=proc.exitstatus)) + pgs = proc.stdout.getvalue().split('\n')[:-1] + if len(pgs) == 0: + self.log("No PGs found for osd.{osd}".format(osd=exp_osd)) + return + pg = random.choice(pgs) + exp_path = teuthology.get_testdir(self.ceph_manager.ctx) + exp_path = os.path.join(exp_path, "data") + exp_path = os.path.join(exp_path, + "exp.{pg}.{id}".format(pg=pg, id=exp_osd)) + # export + cmd = prefix + "--op export --pgid {pg} --file {file}" + cmd = cmd.format(id=exp_osd, pg=pg, file=exp_path) + proc = exp_remote.run(args=cmd) + if proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "export failure with status {ret}". + format(ret=proc.exitstatus)) + # remove + cmd = prefix + "--op remove --pgid {pg}" + cmd = cmd.format(id=exp_osd, pg=pg) + proc = exp_remote.run(args=cmd) + if proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "remove failure with status {ret}". + format(ret=proc.exitstatus)) + # If there are at least 2 dead osds we might move the pg + if exp_osd != imp_osd: + # If pg isn't already on this osd, then we will move it there + cmd = (prefix + "--op list-pgs").format(id=imp_osd) + proc = imp_remote.run(args=cmd, wait=True, + check_status=False, stdout=StringIO()) + if proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "imp list-pgs failure with status {ret}". + format(ret=proc.exitstatus)) + pgs = proc.stdout.getvalue().split('\n')[:-1] + if pg not in pgs: + self.log("Moving pg {pg} from osd.{fosd} to osd.{tosd}". + format(pg=pg, fosd=exp_osd, tosd=imp_osd)) + if imp_remote != exp_remote: + # Copy export file to the other machine + self.log("Transfer export file from {srem} to {trem}". + format(srem=exp_remote, trem=imp_remote)) + tmpexport = Remote.get_file(exp_remote, exp_path) + Remote.put_file(imp_remote, tmpexport, exp_path) + os.remove(tmpexport) + else: + # Can't move the pg after all + imp_osd = exp_osd + imp_remote = exp_remote + # import + cmd = (prefix + "--op import --file {file}") + cmd = cmd.format(id=imp_osd, file=exp_path) + proc = imp_remote.run(args=cmd, wait=True, check_status=False) + if proc.exitstatus == 10: + self.log("Pool went away before processing an import" + "...ignored") + elif proc.exitstatus == 11: + self.log("Attempt to import an incompatible export" + "...ignored") + elif proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "import failure with status {ret}". + format(ret=proc.exitstatus)) + cmd = "rm -f {file}".format(file=exp_path) + exp_remote.run(args=cmd) + if imp_remote != exp_remote: + imp_remote.run(args=cmd) + + # apply low split settings to each pool + for pool in self.ceph_manager.list_pools(): + no_sudo_prefix = prefix[5:] + cmd = ("CEPH_ARGS='--filestore-merge-threshold 1 " + "--filestore-split-multiple 1' sudo -E " + + no_sudo_prefix + "--op apply-layout-settings --pool " + pool).format(id=osd) + proc = remote.run(args=cmd, wait=True, check_status=False, stderr=StringIO()) + output = proc.stderr.getvalue() + if 'Couldn\'t find pool' in output: + continue + if proc.exitstatus: + raise Exception("ceph-objectstore-tool apply-layout-settings" + " failed with {status}".format(status=proc.exitstatus)) + + def rm_past_intervals(self, osd=None): + """ + :param osd: Osd to find pg to remove past intervals + """ + if self.test_rm_past_intervals: + if osd is None: + osd = random.choice(self.dead_osds) + self.log("Use ceph_objectstore_tool to remove past intervals") + (remote,) = self.ceph_manager.ctx.\ + cluster.only('osd.{o}'.format(o=osd)).remotes.iterkeys() + FSPATH = self.ceph_manager.get_filepath() + JPATH = os.path.join(FSPATH, "journal") + if ('keyvaluestore_backend' in + self.ceph_manager.ctx.ceph.conf['osd']): + prefix = ("sudo adjust-ulimits ceph-objectstore-tool " + "--data-path {fpath} --journal-path {jpath} " + "--type keyvaluestore " + "--log-file=" + "/var/log/ceph/objectstore_tool.\\$pid.log ". + format(fpath=FSPATH, jpath=JPATH)) + else: + prefix = ("sudo adjust-ulimits ceph-objectstore-tool " + "--data-path {fpath} --journal-path {jpath} " + "--log-file=" + "/var/log/ceph/objectstore_tool.\\$pid.log ". + format(fpath=FSPATH, jpath=JPATH)) + cmd = (prefix + "--op list-pgs").format(id=osd) + proc = remote.run(args=cmd, wait=True, + check_status=False, stdout=StringIO()) + if proc.exitstatus: + raise Exception("ceph_objectstore_tool: " + "exp list-pgs failure with status {ret}". + format(ret=proc.exitstatus)) + pgs = proc.stdout.getvalue().split('\n')[:-1] + if len(pgs) == 0: + self.log("No PGs found for osd.{osd}".format(osd=osd)) + return + pg = random.choice(pgs) + cmd = (prefix + "--op rm-past-intervals --pgid {pg}").\ + format(id=osd, pg=pg) + proc = remote.run(args=cmd) + if proc.exitstatus: + raise Exception("ceph_objectstore_tool: " + "rm-past-intervals failure with status {ret}". + format(ret=proc.exitstatus)) + + def blackhole_kill_osd(self, osd=None): + """ + If all else fails, kill the osd. + :param osd: Osd to be killed. + """ + if osd is None: + osd = random.choice(self.live_osds) + self.log("Blackholing and then killing osd %s, live_osds are %s" % + (str(osd), str(self.live_osds))) + self.live_osds.remove(osd) + self.dead_osds.append(osd) + self.ceph_manager.blackhole_kill_osd(osd) + + def revive_osd(self, osd=None): + """ + Revive the osd. + :param osd: Osd to be revived. + """ + if osd is None: + osd = random.choice(self.dead_osds) + self.log("Reviving osd %s" % (str(osd),)) + self.live_osds.append(osd) + self.dead_osds.remove(osd) + self.ceph_manager.revive_osd(osd, self.revive_timeout) + + def out_osd(self, osd=None): + """ + Mark the osd out + :param osd: Osd to be marked. + """ + if osd is None: + osd = random.choice(self.in_osds) + self.log("Removing osd %s, in_osds are: %s" % + (str(osd), str(self.in_osds))) + self.ceph_manager.mark_out_osd(osd) + self.in_osds.remove(osd) + self.out_osds.append(osd) + + def in_osd(self, osd=None): + """ + Mark the osd out + :param osd: Osd to be marked. + """ + if osd is None: + osd = random.choice(self.out_osds) + if osd in self.dead_osds: + return self.revive_osd(osd) + self.log("Adding osd %s" % (str(osd),)) + self.out_osds.remove(osd) + self.in_osds.append(osd) + self.ceph_manager.mark_in_osd(osd) + self.log("Added osd %s" % (str(osd),)) + + def reweight_osd(self, osd=None): + """ + Reweight an osd that is in + :param osd: Osd to be marked. + """ + if osd is None: + osd = random.choice(self.in_osds) + val = random.uniform(.1, 1.0) + self.log("Reweighting osd %s to %s" % (str(osd), str(val))) + self.ceph_manager.raw_cluster_cmd('osd', 'reweight', + str(osd), str(val)) + + def primary_affinity(self, osd=None): + if osd is None: + osd = random.choice(self.in_osds) + if random.random() >= .5: + pa = random.random() + elif random.random() >= .5: + pa = 1 + else: + pa = 0 + self.log('Setting osd %s primary_affinity to %f' % (str(osd), pa)) + self.ceph_manager.raw_cluster_cmd('osd', 'primary-affinity', + str(osd), str(pa)) + + def all_up(self): + """ + Make sure all osds are up and not out. + """ + while len(self.dead_osds) > 0: + self.log("reviving osd") + self.revive_osd() + while len(self.out_osds) > 0: + self.log("inning osd") + self.in_osd() + + def do_join(self): + """ + Break out of this Ceph loop + """ + self.stopping = True + self.thread.get() + if self.dump_ops_enable == "true": + self.log("joining the do_dump_ops greenlet") + self.dump_ops_thread.join() + + def grow_pool(self): + """ + Increase the size of the pool + """ + pool = self.ceph_manager.get_pool() + self.log("Growing pool %s" % (pool,)) + self.ceph_manager.expand_pool(pool, + self.config.get('pool_grow_by', 10), + self.max_pgs) + + def fix_pgp_num(self): + """ + Fix number of pgs in pool. + """ + pool = self.ceph_manager.get_pool() + self.log("fixing pg num pool %s" % (pool,)) + self.ceph_manager.set_pool_pgpnum(pool) + + def test_pool_min_size(self): + """ + Kill and revive all osds except one. + """ + self.log("test_pool_min_size") + self.all_up() + self.ceph_manager.wait_for_recovery( + timeout=self.config.get('timeout') + ) + the_one = random.choice(self.in_osds) + self.log("Killing everyone but %s", the_one) + to_kill = filter(lambda x: x != the_one, self.in_osds) + [self.kill_osd(i) for i in to_kill] + [self.out_osd(i) for i in to_kill] + time.sleep(self.config.get("test_pool_min_size_time", 10)) + self.log("Killing %s" % (the_one,)) + self.kill_osd(the_one) + self.out_osd(the_one) + self.log("Reviving everyone but %s" % (the_one,)) + [self.revive_osd(i) for i in to_kill] + [self.in_osd(i) for i in to_kill] + self.log("Revived everyone but %s" % (the_one,)) + self.log("Waiting for clean") + self.ceph_manager.wait_for_recovery( + timeout=self.config.get('timeout') + ) + + def inject_pause(self, conf_key, duration, check_after, should_be_down): + """ + Pause injection testing. Check for osd being down when finished. + """ + the_one = random.choice(self.live_osds) + self.log("inject_pause on {osd}".format(osd=the_one)) + self.log( + "Testing {key} pause injection for duration {duration}".format( + key=conf_key, + duration=duration + )) + self.log( + "Checking after {after}, should_be_down={shouldbedown}".format( + after=check_after, + shouldbedown=should_be_down + )) + self.ceph_manager.set_config(the_one, **{conf_key: duration}) + if not should_be_down: + return + time.sleep(check_after) + status = self.ceph_manager.get_osd_status() + assert the_one in status['down'] + time.sleep(duration - check_after + 20) + status = self.ceph_manager.get_osd_status() + assert not the_one in status['down'] + + def test_backfill_full(self): + """ + Test backfills stopping when the replica fills up. + + First, use osd_backfill_full_ratio to simulate a now full + osd by setting it to 0 on all of the OSDs. + + Second, on a random subset, set + osd_debug_skip_full_check_in_backfill_reservation to force + the more complicated check in do_scan to be exercised. + + Then, verify that all backfills stop. + """ + self.log("injecting osd_backfill_full_ratio = 0") + for i in self.live_osds: + self.ceph_manager.set_config( + i, + osd_debug_skip_full_check_in_backfill_reservation= + random.choice(['false', 'true']), + osd_backfill_full_ratio=0) + for i in range(30): + status = self.ceph_manager.compile_pg_status() + if 'backfill' not in status.keys(): + break + self.log( + "waiting for {still_going} backfills".format( + still_going=status.get('backfill'))) + time.sleep(1) + assert('backfill' not in self.ceph_manager.compile_pg_status().keys()) + for i in self.live_osds: + self.ceph_manager.set_config( + i, + osd_debug_skip_full_check_in_backfill_reservation='false', + osd_backfill_full_ratio=0.85) + + def test_map_discontinuity(self): + """ + 1) Allows the osds to recover + 2) kills an osd + 3) allows the remaining osds to recover + 4) waits for some time + 5) revives the osd + This sequence should cause the revived osd to have to handle + a map gap since the mons would have trimmed + """ + while len(self.in_osds) < (self.minin + 1): + self.in_osd() + self.log("Waiting for recovery") + self.ceph_manager.wait_for_all_up( + timeout=self.config.get('timeout') + ) + # now we wait 20s for the pg status to change, if it takes longer, + # the test *should* fail! + time.sleep(20) + self.ceph_manager.wait_for_clean( + timeout=self.config.get('timeout') + ) + + # now we wait 20s for the backfill replicas to hear about the clean + time.sleep(20) + self.log("Recovered, killing an osd") + self.kill_osd(mark_down=True, mark_out=True) + self.log("Waiting for clean again") + self.ceph_manager.wait_for_clean( + timeout=self.config.get('timeout') + ) + self.log("Waiting for trim") + time.sleep(int(self.config.get("map_discontinuity_sleep_time", 40))) + self.revive_osd() + + def choose_action(self): + """ + Random action selector. + """ + chance_down = self.config.get('chance_down', 0.4) + chance_test_min_size = self.config.get('chance_test_min_size', 0) + chance_test_backfill_full = \ + self.config.get('chance_test_backfill_full', 0) + if isinstance(chance_down, int): + chance_down = float(chance_down) / 100 + minin = self.minin + minout = self.config.get("min_out", 0) + minlive = self.config.get("min_live", 2) + mindead = self.config.get("min_dead", 0) + + self.log('choose_action: min_in %d min_out ' + '%d min_live %d min_dead %d' % + (minin, minout, minlive, mindead)) + actions = [] + if len(self.in_osds) > minin: + actions.append((self.out_osd, 1.0,)) + if len(self.live_osds) > minlive and chance_down > 0: + actions.append((self.kill_osd, chance_down,)) + if len(self.dead_osds) > 1: + actions.append((self.rm_past_intervals, 1.0,)) + if len(self.out_osds) > minout: + actions.append((self.in_osd, 1.7,)) + if len(self.dead_osds) > mindead: + actions.append((self.revive_osd, 1.0,)) + if self.config.get('thrash_primary_affinity', True): + actions.append((self.primary_affinity, 1.0,)) + actions.append((self.reweight_osd, + self.config.get('reweight_osd', .5),)) + actions.append((self.grow_pool, + self.config.get('chance_pgnum_grow', 0),)) + actions.append((self.fix_pgp_num, + self.config.get('chance_pgpnum_fix', 0),)) + actions.append((self.test_pool_min_size, + chance_test_min_size,)) + actions.append((self.test_backfill_full, + chance_test_backfill_full,)) + for key in ['heartbeat_inject_failure', 'filestore_inject_stall']: + for scenario in [ + (lambda: + self.inject_pause(key, + self.config.get('pause_short', 3), + 0, + False), + self.config.get('chance_inject_pause_short', 1),), + (lambda: + self.inject_pause(key, + self.config.get('pause_long', 80), + self.config.get('pause_check_after', 70), + True), + self.config.get('chance_inject_pause_long', 0),)]: + actions.append(scenario) + + total = sum([y for (x, y) in actions]) + val = random.uniform(0, total) + for (action, prob) in actions: + if val < prob: + return action + val -= prob + return None + + def log_exc(func): + @wraps(func) + def wrapper(self): + try: + return func(self) + except: + self.log(traceback.format_exc()) + raise + return wrapper + + @log_exc + def do_dump_ops(self): + """ + Loops and does op dumps on all osds + """ + self.log("starting do_dump_ops") + while not self.stopping: + for osd in self.live_osds: + # Ignore errors because live_osds is in flux + self.ceph_manager.osd_admin_socket(osd, command=['dump_ops_in_flight'], + check_status=False, timeout=30) + self.ceph_manager.osd_admin_socket(osd, command=['dump_blocked_ops'], + check_status=False, timeout=30) + self.ceph_manager.osd_admin_socket(osd, command=['dump_historic_ops'], + check_status=False, timeout=30) + gevent.sleep(0) + + @log_exc + def do_thrash(self): + """ + Loop to select random actions to thrash ceph manager with. + """ + cleanint = self.config.get("clean_interval", 60) + scrubint = self.config.get("scrub_interval", -1) + maxdead = self.config.get("max_dead", 0) + delay = self.config.get("op_delay", 5) + self.log("starting do_thrash") + while not self.stopping: + to_log = [str(x) for x in ["in_osds: ", self.in_osds, + "out_osds: ", self.out_osds, + "dead_osds: ", self.dead_osds, + "live_osds: ", self.live_osds]] + self.log(" ".join(to_log)) + if random.uniform(0, 1) < (float(delay) / cleanint): + while len(self.dead_osds) > maxdead: + self.revive_osd() + for osd in self.in_osds: + self.ceph_manager.raw_cluster_cmd('osd', 'reweight', + str(osd), str(1)) + if random.uniform(0, 1) < float( + self.config.get('chance_test_map_discontinuity', 0)): + self.test_map_discontinuity() + else: + self.ceph_manager.wait_for_recovery( + timeout=self.config.get('timeout') + ) + time.sleep(self.clean_wait) + if scrubint > 0: + if random.uniform(0, 1) < (float(delay) / scrubint): + self.log('Scrubbing while thrashing being performed') + Scrubber(self.ceph_manager, self.config) + self.choose_action()() + time.sleep(delay) + self.all_up() + + +class ObjectStoreTool: + + def __init__(self, manager, pool, **kwargs): + self.manager = manager + self.pool = pool + self.osd = kwargs.get('osd', None) + self.object_name = kwargs.get('object_name', None) + if self.osd and self.pool and self.object_name: + if self.osd == "primary": + self.osd = self.manager.get_object_primary(self.pool, + self.object_name) + assert self.osd + if self.object_name: + self.pgid = self.manager.get_object_pg_with_shard(self.pool, + self.object_name, + self.osd) + self.remote = self.manager.ctx.\ + cluster.only('osd.{o}'.format(o=self.osd)).remotes.keys()[0] + path = self.manager.get_filepath().format(id=self.osd) + self.paths = ("--data-path {path} --journal-path {path}/journal". + format(path=path)) + + def build_cmd(self, options, args, stdin): + lines = [] + if self.object_name: + lines.append("object=$(sudo adjust-ulimits ceph-objectstore-tool " + "{paths} --pgid {pgid} --op list |" + "grep '\"oid\":\"{name}\"')". + format(paths=self.paths, + pgid=self.pgid, + name=self.object_name)) + args = '"$object" ' + args + options += " --pgid {pgid}".format(pgid=self.pgid) + cmd = ("sudo adjust-ulimits ceph-objectstore-tool {paths} {options} {args}". + format(paths=self.paths, + args=args, + options=options)) + if stdin: + cmd = ("echo {payload} | base64 --decode | {cmd}". + format(payload=base64.encode(stdin), + cmd=cmd)) + lines.append(cmd) + return "\n".join(lines) + + def run(self, options, args, stdin=None): + self.manager.kill_osd(self.osd) + cmd = self.build_cmd(options, args, stdin) + self.manager.log(cmd) + try: + proc = self.remote.run(args=['bash', '-e', '-x', '-c', cmd], + check_status=False, + stdout=StringIO(), + stderr=StringIO()) + proc.wait() + if proc.exitstatus != 0: + self.manager.log("failed with " + str(proc.exitstatus)) + error = proc.stdout.getvalue() + " " + proc.stderr.getvalue() + raise Exception(error) + finally: + self.manager.revive_osd(self.osd) + + +class CephManager: + """ + Ceph manager object. + Contains several local functions that form a bulk of this module. + """ + + REPLICATED_POOL = 1 + ERASURE_CODED_POOL = 3 + + def __init__(self, controller, ctx=None, config=None, logger=None): + self.lock = threading.RLock() + self.ctx = ctx + self.config = config + self.controller = controller + self.next_pool_id = 0 + if (logger): + self.log = lambda x: logger.info(x) + else: + def tmp(x): + """ + implement log behavior. + """ + print x + self.log = tmp + if self.config is None: + self.config = dict() + pools = self.list_pools() + self.pools = {} + for pool in pools: + self.pools[pool] = self.get_pool_property(pool, 'pg_num') + + def raw_cluster_cmd(self, *args): + """ + Start ceph on a raw cluster. Return count + """ + testdir = teuthology.get_testdir(self.ctx) + ceph_args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'ceph', + ] + ceph_args.extend(args) + proc = self.controller.run( + args=ceph_args, + stdout=StringIO(), + ) + return proc.stdout.getvalue() + + def raw_cluster_cmd_result(self, *args): + """ + Start ceph on a cluster. Return success or failure information. + """ + testdir = teuthology.get_testdir(self.ctx) + ceph_args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'ceph', + ] + ceph_args.extend(args) + proc = self.controller.run( + args=ceph_args, + check_status=False, + ) + return proc.exitstatus + + def do_rados(self, remote, cmd): + """ + Execute a remote rados command. + """ + testdir = teuthology.get_testdir(self.ctx) + pre = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rados', + ] + pre.extend(cmd) + proc = remote.run( + args=pre, + wait=True, + ) + return proc + + def rados_write_objects(self, pool, num_objects, size, + timelimit, threads, cleanup=False): + """ + Write rados objects + Threads not used yet. + """ + args = [ + '-p', pool, + '--num-objects', num_objects, + '-b', size, + 'bench', timelimit, + 'write' + ] + if not cleanup: + args.append('--no-cleanup') + return self.do_rados(self.controller, map(str, args)) + + def do_put(self, pool, obj, fname): + """ + Implement rados put operation + """ + return self.do_rados( + self.controller, + [ + '-p', + pool, + 'put', + obj, + fname + ] + ) + + def do_get(self, pool, obj, fname='/dev/null'): + """ + Implement rados get operation + """ + return self.do_rados( + self.controller, + [ + '-p', + pool, + 'stat', + obj, + fname + ] + ) + + def osd_admin_socket(self, osd_id, command, check_status=True, timeout=0): + return self.admin_socket('osd', osd_id, command, check_status, timeout) + + def find_remote(self, service_type, service_id): + """ + Get the Remote for the host where a particular service runs. + + :param service_type: 'mds', 'osd', 'client' + :param service_id: The second part of a role, e.g. '0' for + the role 'client.0' + :return: a Remote instance for the host where the + requested role is placed + """ + for _remote, roles_for_host in self.ctx.cluster.remotes.iteritems(): + for id_ in teuthology.roles_of_type(roles_for_host, service_type): + if id_ == str(service_id): + return _remote + + raise KeyError("Service {0}.{1} not found".format(service_type, + service_id)) + + def admin_socket(self, service_type, service_id, + command, check_status=True, timeout=0): + """ + Remotely start up ceph specifying the admin socket + :param command: a list of words to use as the command + to the admin socket + """ + testdir = teuthology.get_testdir(self.ctx) + remote = self.find_remote(service_type, service_id) + args = [ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'timeout', + str(timeout), + 'ceph', + '--admin-daemon', + '/var/run/ceph/ceph-{type}.{id}.asok'.format( + type=service_type, + id=service_id), + ] + args.extend(command) + return remote.run( + args=args, + stdout=StringIO(), + wait=True, + check_status=check_status + ) + + def objectstore_tool(self, pool, options, args, **kwargs): + return ObjectStoreTool(self, pool, **kwargs).run(options, args) + + def get_pgid(self, pool, pgnum): + """ + :param pool: pool name + :param pgnum: pg number + :returns: a string representing this pg. + """ + poolnum = self.get_pool_num(pool) + pg_str = "{poolnum}.{pgnum}".format( + poolnum=poolnum, + pgnum=pgnum) + return pg_str + + def get_pg_replica(self, pool, pgnum): + """ + get replica for pool, pgnum (e.g. (data, 0)->0 + """ + output = self.raw_cluster_cmd("pg", "dump", '--format=json') + j = json.loads('\n'.join(output.split('\n')[1:])) + pg_str = self.get_pgid(pool, pgnum) + for pg in j['pg_stats']: + if pg['pgid'] == pg_str: + return int(pg['acting'][-1]) + assert False + + def get_pg_primary(self, pool, pgnum): + """ + get primary for pool, pgnum (e.g. (data, 0)->0 + """ + output = self.raw_cluster_cmd("pg", "dump", '--format=json') + j = json.loads('\n'.join(output.split('\n')[1:])) + pg_str = self.get_pgid(pool, pgnum) + for pg in j['pg_stats']: + if pg['pgid'] == pg_str: + return int(pg['acting'][0]) + assert False + + def get_pool_num(self, pool): + """ + get number for pool (e.g., data -> 2) + """ + return int(self.get_pool_dump(pool)['pool']) + + def list_pools(self): + """ + list all pool names + """ + osd_dump = self.get_osd_dump_json() + self.log(osd_dump['pools']) + return [str(i['pool_name']) for i in osd_dump['pools']] + + def clear_pools(self): + """ + remove all pools + """ + [self.remove_pool(i) for i in self.list_pools()] + + def kick_recovery_wq(self, osdnum): + """ + Run kick_recovery_wq on cluster. + """ + return self.raw_cluster_cmd( + 'tell', "osd.%d" % (int(osdnum),), + 'debug', + 'kick_recovery_wq', + '0') + + def wait_run_admin_socket(self, service_type, + service_id, args=['version'], timeout=75): + """ + If osd_admin_socket call suceeds, return. Otherwise wait + five seconds and try again. + """ + tries = 0 + while True: + proc = self.admin_socket(service_type, service_id, + args, check_status=False) + if proc.exitstatus is 0: + break + else: + tries += 1 + if (tries * 5) > timeout: + raise Exception('timed out waiting for admin_socket ' + 'to appear after {type}.{id} restart'. + format(type=service_type, + id=service_id)) + self.log("waiting on admin_socket for {type}-{id}, " + "{command}".format(type=service_type, + id=service_id, + command=args)) + time.sleep(5) + + def get_pool_dump(self, pool): + """ + get the osd dump part of a pool + """ + osd_dump = self.get_osd_dump_json() + for i in osd_dump['pools']: + if i['pool_name'] == pool: + return i + assert False + + def set_config(self, osdnum, **argdict): + """ + :param osdnum: osd number + :param argdict: dictionary containing values to set. + """ + for k, v in argdict.iteritems(): + self.wait_run_admin_socket( + 'osd', osdnum, + ['config', 'set', str(k), str(v)]) + + def raw_cluster_status(self): + """ + Get status from cluster + """ + status = self.raw_cluster_cmd('status', '--format=json-pretty') + return json.loads(status) + + def raw_osd_status(self): + """ + Get osd status from cluster + """ + return self.raw_cluster_cmd('osd', 'dump') + + def get_osd_status(self): + """ + Get osd statuses sorted by states that the osds are in. + """ + osd_lines = filter( + lambda x: x.startswith('osd.') and (("up" in x) or ("down" in x)), + self.raw_osd_status().split('\n')) + self.log(osd_lines) + in_osds = [int(i[4:].split()[0]) + for i in filter(lambda x: " in " in x, osd_lines)] + out_osds = [int(i[4:].split()[0]) + for i in filter(lambda x: " out " in x, osd_lines)] + up_osds = [int(i[4:].split()[0]) + for i in filter(lambda x: " up " in x, osd_lines)] + down_osds = [int(i[4:].split()[0]) + for i in filter(lambda x: " down " in x, osd_lines)] + dead_osds = [int(x.id_) + for x in filter(lambda x: + not x.running(), + self.ctx.daemons. + iter_daemons_of_role('osd'))] + live_osds = [int(x.id_) for x in + filter(lambda x: + x.running(), + self.ctx.daemons.iter_daemons_of_role('osd'))] + return {'in': in_osds, 'out': out_osds, 'up': up_osds, + 'down': down_osds, 'dead': dead_osds, 'live': live_osds, + 'raw': osd_lines} + + def get_num_pgs(self): + """ + Check cluster status for the number of pgs + """ + status = self.raw_cluster_status() + self.log(status) + return status['pgmap']['num_pgs'] + + def create_erasure_code_profile(self, profile_name, profile): + """ + Create an erasure code profile name that can be used as a parameter + when creating an erasure coded pool. + """ + with self.lock: + args = cmd_erasure_code_profile(profile_name, profile) + self.raw_cluster_cmd(*args) + + def create_pool_with_unique_name(self, pg_num=16, + erasure_code_profile_name=None): + """ + Create a pool named unique_pool_X where X is unique. + """ + name = "" + with self.lock: + name = "unique_pool_%s" % (str(self.next_pool_id),) + self.next_pool_id += 1 + self.create_pool( + name, + pg_num, + erasure_code_profile_name=erasure_code_profile_name) + return name + + @contextlib.contextmanager + def pool(self, pool_name, pg_num=16, erasure_code_profile_name=None): + self.create_pool(pool_name, pg_num, erasure_code_profile_name) + yield + self.remove_pool(pool_name) + + def create_pool(self, pool_name, pg_num=16, + erasure_code_profile_name=None): + """ + Create a pool named from the pool_name parameter. + :param pool_name: name of the pool being created. + :param pg_num: initial number of pgs. + :param erasure_code_profile_name: if set and !None create an + erasure coded pool using the profile + """ + with self.lock: + assert isinstance(pool_name, str) + assert isinstance(pg_num, int) + assert pool_name not in self.pools + self.log("creating pool_name %s" % (pool_name,)) + if erasure_code_profile_name: + self.raw_cluster_cmd('osd', 'pool', 'create', + pool_name, str(pg_num), str(pg_num), + 'erasure', erasure_code_profile_name) + else: + self.raw_cluster_cmd('osd', 'pool', 'create', + pool_name, str(pg_num)) + self.pools[pool_name] = pg_num + + def remove_pool(self, pool_name): + """ + Remove the indicated pool + :param pool_name: Pool to be removed + """ + with self.lock: + assert isinstance(pool_name, str) + assert pool_name in self.pools + self.log("removing pool_name %s" % (pool_name,)) + del self.pools[pool_name] + self.do_rados(self.controller, + ['rmpool', pool_name, pool_name, + "--yes-i-really-really-mean-it"]) + + def get_pool(self): + """ + Pick a random pool + """ + with self.lock: + return random.choice(self.pools.keys()) + + def get_pool_pg_num(self, pool_name): + """ + Return the number of pgs in the pool specified. + """ + with self.lock: + assert isinstance(pool_name, str) + if pool_name in self.pools: + return self.pools[pool_name] + return 0 + + def get_pool_property(self, pool_name, prop): + """ + :param pool_name: pool + :param prop: property to be checked. + :returns: property as an int value. + """ + with self.lock: + assert isinstance(pool_name, str) + assert isinstance(prop, str) + output = self.raw_cluster_cmd( + 'osd', + 'pool', + 'get', + pool_name, + prop) + return int(output.split()[1]) + + def set_pool_property(self, pool_name, prop, val): + """ + :param pool_name: pool + :param prop: property to be set. + :param val: value to set. + + This routine retries if set operation fails. + """ + with self.lock: + assert isinstance(pool_name, str) + assert isinstance(prop, str) + assert isinstance(val, int) + tries = 0 + while True: + r = self.raw_cluster_cmd_result( + 'osd', + 'pool', + 'set', + pool_name, + prop, + str(val)) + if r != 11: # EAGAIN + break + tries += 1 + if tries > 50: + raise Exception('timed out getting EAGAIN ' + 'when setting pool property %s %s = %s' % + (pool_name, prop, val)) + self.log('got EAGAIN setting pool property, ' + 'waiting a few seconds...') + time.sleep(2) + + def expand_pool(self, pool_name, by, max_pgs): + """ + Increase the number of pgs in a pool + """ + with self.lock: + assert isinstance(pool_name, str) + assert isinstance(by, int) + assert pool_name in self.pools + if self.get_num_creating() > 0: + return + if (self.pools[pool_name] + by) > max_pgs: + return + self.log("increase pool size by %d" % (by,)) + new_pg_num = self.pools[pool_name] + by + self.set_pool_property(pool_name, "pg_num", new_pg_num) + self.pools[pool_name] = new_pg_num + + def set_pool_pgpnum(self, pool_name): + """ + Set pgpnum property of pool_name pool. + """ + with self.lock: + assert isinstance(pool_name, str) + assert pool_name in self.pools + if self.get_num_creating() > 0: + return + self.set_pool_property(pool_name, 'pgp_num', self.pools[pool_name]) + + def list_pg_missing(self, pgid): + """ + return list of missing pgs with the id specified + """ + r = None + offset = {} + while True: + out = self.raw_cluster_cmd('--', 'pg', pgid, 'list_missing', + json.dumps(offset)) + j = json.loads(out) + if r is None: + r = j + else: + r['objects'].extend(j['objects']) + if not 'more' in j: + break + if j['more'] == 0: + break + offset = j['objects'][-1]['oid'] + if 'more' in r: + del r['more'] + return r + + def get_pg_stats(self): + """ + Dump the cluster and get pg stats + """ + out = self.raw_cluster_cmd('pg', 'dump', '--format=json') + j = json.loads('\n'.join(out.split('\n')[1:])) + return j['pg_stats'] + + def compile_pg_status(self): + """ + Return a histogram of pg state values + """ + ret = {} + j = self.get_pg_stats() + for pg in j: + for status in pg['state'].split('+'): + if status not in ret: + ret[status] = 0 + ret[status] += 1 + return ret + + def pg_scrubbing(self, pool, pgnum): + """ + pg scrubbing wrapper + """ + pgstr = self.get_pgid(pool, pgnum) + stats = self.get_single_pg_stats(pgstr) + return 'scrub' in stats['state'] + + def pg_repairing(self, pool, pgnum): + """ + pg repairing wrapper + """ + pgstr = self.get_pgid(pool, pgnum) + stats = self.get_single_pg_stats(pgstr) + return 'repair' in stats['state'] + + def pg_inconsistent(self, pool, pgnum): + """ + pg inconsistent wrapper + """ + pgstr = self.get_pgid(pool, pgnum) + stats = self.get_single_pg_stats(pgstr) + return 'inconsistent' in stats['state'] + + def get_last_scrub_stamp(self, pool, pgnum): + """ + Get the timestamp of the last scrub. + """ + stats = self.get_single_pg_stats(self.get_pgid(pool, pgnum)) + return stats["last_scrub_stamp"] + + def do_pg_scrub(self, pool, pgnum, stype): + """ + Scrub pg and wait for scrubbing to finish + """ + init = self.get_last_scrub_stamp(pool, pgnum) + while init == self.get_last_scrub_stamp(pool, pgnum): + self.log("waiting for scrub type %s" % (stype,)) + self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum)) + time.sleep(10) + + def get_single_pg_stats(self, pgid): + """ + Return pg for the pgid specified. + """ + all_stats = self.get_pg_stats() + + for pg in all_stats: + if pg['pgid'] == pgid: + return pg + + return None + + def get_object_pg_with_shard(self, pool, name, osdid): + """ + """ + pool_dump = self.get_pool_dump(pool) + object_map = self.get_object_map(pool, name) + if pool_dump["type"] == CephManager.ERASURE_CODED_POOL: + shard = object_map['acting'].index(osdid) + return "{pgid}s{shard}".format(pgid=object_map['pgid'], + shard=shard) + else: + return object_map['pgid'] + + def get_object_primary(self, pool, name): + """ + """ + object_map = self.get_object_map(pool, name) + return object_map['acting_primary'] + + def get_object_map(self, pool, name): + """ + osd map --format=json converted to a python object + :returns: the python object + """ + out = self.raw_cluster_cmd('--format=json', 'osd', 'map', pool, name) + return json.loads('\n'.join(out.split('\n')[1:])) + + def get_osd_dump_json(self): + """ + osd dump --format=json converted to a python object + :returns: the python object + """ + out = self.raw_cluster_cmd('osd', 'dump', '--format=json') + return json.loads('\n'.join(out.split('\n')[1:])) + + def get_osd_dump(self): + """ + Dump osds + :returns: all osds + """ + return self.get_osd_dump_json()['osds'] + + def get_stuck_pgs(self, type_, threshold): + """ + :returns: stuck pg information from the cluster + """ + out = self.raw_cluster_cmd('pg', 'dump_stuck', type_, str(threshold), + '--format=json') + return json.loads(out) + + def get_num_unfound_objects(self): + """ + Check cluster status to get the number of unfound objects + """ + status = self.raw_cluster_status() + self.log(status) + return status['pgmap'].get('unfound_objects', 0) + + def get_num_creating(self): + """ + Find the number of pgs in creating mode. + """ + pgs = self.get_pg_stats() + num = 0 + for pg in pgs: + if 'creating' in pg['state']: + num += 1 + return num + + def get_num_active_clean(self): + """ + Find the number of active and clean pgs. + """ + pgs = self.get_pg_stats() + num = 0 + for pg in pgs: + if (pg['state'].count('active') and + pg['state'].count('clean') and + not pg['state'].count('stale')): + num += 1 + return num + + def get_num_active_recovered(self): + """ + Find the number of active and recovered pgs. + """ + pgs = self.get_pg_stats() + num = 0 + for pg in pgs: + if (pg['state'].count('active') and + not pg['state'].count('recover') and + not pg['state'].count('backfill') and + not pg['state'].count('stale')): + num += 1 + return num + + def get_is_making_recovery_progress(self): + """ + Return whether there is recovery progress discernable in the + raw cluster status + """ + status = self.raw_cluster_status() + kps = status['pgmap'].get('recovering_keys_per_sec', 0) + bps = status['pgmap'].get('recovering_bytes_per_sec', 0) + ops = status['pgmap'].get('recovering_objects_per_sec', 0) + return kps > 0 or bps > 0 or ops > 0 + + def get_num_active(self): + """ + Find the number of active pgs. + """ + pgs = self.get_pg_stats() + num = 0 + for pg in pgs: + if pg['state'].count('active') and not pg['state'].count('stale'): + num += 1 + return num + + def get_num_down(self): + """ + Find the number of pgs that are down. + """ + pgs = self.get_pg_stats() + num = 0 + for pg in pgs: + if ((pg['state'].count('down') and not + pg['state'].count('stale')) or + (pg['state'].count('incomplete') and not + pg['state'].count('stale'))): + num += 1 + return num + + def get_num_active_down(self): + """ + Find the number of pgs that are either active or down. + """ + pgs = self.get_pg_stats() + num = 0 + for pg in pgs: + if ((pg['state'].count('active') and not + pg['state'].count('stale')) or + (pg['state'].count('down') and not + pg['state'].count('stale')) or + (pg['state'].count('incomplete') and not + pg['state'].count('stale'))): + num += 1 + return num + + def is_clean(self): + """ + True if all pgs are clean + """ + return self.get_num_active_clean() == self.get_num_pgs() + + def is_recovered(self): + """ + True if all pgs have recovered + """ + return self.get_num_active_recovered() == self.get_num_pgs() + + def is_active_or_down(self): + """ + True if all pgs are active or down + """ + return self.get_num_active_down() == self.get_num_pgs() + + def wait_for_clean(self, timeout=None): + """ + Returns true when all pgs are clean. + """ + self.log("waiting for clean") + start = time.time() + num_active_clean = self.get_num_active_clean() + while not self.is_clean(): + if timeout is not None: + if self.get_is_making_recovery_progress(): + self.log("making progress, resetting timeout") + start = time.time() + else: + self.log("no progress seen, keeping timeout for now") + if time.time() - start >= timeout: + self.log('dumping pgs') + out = self.raw_cluster_cmd('pg', 'dump') + self.log(out) + assert time.time() - start < timeout, \ + 'failed to become clean before timeout expired' + cur_active_clean = self.get_num_active_clean() + if cur_active_clean != num_active_clean: + start = time.time() + num_active_clean = cur_active_clean + time.sleep(3) + self.log("clean!") + + def are_all_osds_up(self): + """ + Returns true if all osds are up. + """ + x = self.get_osd_dump() + return (len(x) == sum([(y['up'] > 0) for y in x])) + + def wait_for_all_up(self, timeout=None): + """ + When this exits, either the timeout has expired, or all + osds are up. + """ + self.log("waiting for all up") + start = time.time() + while not self.are_all_osds_up(): + if timeout is not None: + assert time.time() - start < timeout, \ + 'timeout expired in wait_for_all_up' + time.sleep(3) + self.log("all up!") + + def wait_for_recovery(self, timeout=None): + """ + Check peering. When this exists, we have recovered. + """ + self.log("waiting for recovery to complete") + start = time.time() + num_active_recovered = self.get_num_active_recovered() + while not self.is_recovered(): + now = time.time() + if timeout is not None: + if self.get_is_making_recovery_progress(): + self.log("making progress, resetting timeout") + start = time.time() + else: + self.log("no progress seen, keeping timeout for now") + if now - start >= timeout: + self.log('dumping pgs') + out = self.raw_cluster_cmd('pg', 'dump') + self.log(out) + assert now - start < timeout, \ + 'failed to recover before timeout expired' + cur_active_recovered = self.get_num_active_recovered() + if cur_active_recovered != num_active_recovered: + start = time.time() + num_active_recovered = cur_active_recovered + time.sleep(3) + self.log("recovered!") + + def wait_for_active(self, timeout=None): + """ + Check peering. When this exists, we are definitely active + """ + self.log("waiting for peering to complete") + start = time.time() + num_active = self.get_num_active() + while not self.is_active(): + if timeout is not None: + if time.time() - start >= timeout: + self.log('dumping pgs') + out = self.raw_cluster_cmd('pg', 'dump') + self.log(out) + assert time.time() - start < timeout, \ + 'failed to recover before timeout expired' + cur_active = self.get_num_active() + if cur_active != num_active: + start = time.time() + num_active = cur_active + time.sleep(3) + self.log("active!") + + def wait_for_active_or_down(self, timeout=None): + """ + Check peering. When this exists, we are definitely either + active or down + """ + self.log("waiting for peering to complete or become blocked") + start = time.time() + num_active_down = self.get_num_active_down() + while not self.is_active_or_down(): + if timeout is not None: + if time.time() - start >= timeout: + self.log('dumping pgs') + out = self.raw_cluster_cmd('pg', 'dump') + self.log(out) + assert time.time() - start < timeout, \ + 'failed to recover before timeout expired' + cur_active_down = self.get_num_active_down() + if cur_active_down != num_active_down: + start = time.time() + num_active_down = cur_active_down + time.sleep(3) + self.log("active or down!") + + def osd_is_up(self, osd): + """ + Wrapper for osd check + """ + osds = self.get_osd_dump() + return osds[osd]['up'] > 0 + + def wait_till_osd_is_up(self, osd, timeout=None): + """ + Loop waiting for osd. + """ + self.log('waiting for osd.%d to be up' % osd) + start = time.time() + while not self.osd_is_up(osd): + if timeout is not None: + assert time.time() - start < timeout, \ + 'osd.%d failed to come up before timeout expired' % osd + time.sleep(3) + self.log('osd.%d is up' % osd) + + def is_active(self): + """ + Wrapper to check if all pgs are active + """ + return self.get_num_active() == self.get_num_pgs() + + def wait_till_active(self, timeout=None): + """ + Wait until all pgs are active. + """ + self.log("waiting till active") + start = time.time() + while not self.is_active(): + if timeout is not None: + if time.time() - start >= timeout: + self.log('dumping pgs') + out = self.raw_cluster_cmd('pg', 'dump') + self.log(out) + assert time.time() - start < timeout, \ + 'failed to become active before timeout expired' + time.sleep(3) + self.log("active!") + + def mark_out_osd(self, osd): + """ + Wrapper to mark osd out. + """ + self.raw_cluster_cmd('osd', 'out', str(osd)) + + def kill_osd(self, osd): + """ + Kill osds by either power cycling (if indicated by the config) + or by stopping. + """ + if self.config.get('powercycle'): + (remote,) = (self.ctx.cluster.only('osd.{o}'.format(o=osd)). + remotes.iterkeys()) + self.log('kill_osd on osd.{o} ' + 'doing powercycle of {s}'.format(o=osd, s=remote.name)) + assert remote.console is not None, ("powercycling requested " + "but RemoteConsole is not " + "initialized. " + "Check ipmi config.") + remote.console.power_off() + else: + self.ctx.daemons.get_daemon('osd', osd).stop() + + def blackhole_kill_osd(self, osd): + """ + Stop osd if nothing else works. + """ + self.raw_cluster_cmd('--', 'tell', 'osd.%d' % osd, + 'injectargs', '--filestore-blackhole') + time.sleep(2) + self.ctx.daemons.get_daemon('osd', osd).stop() + + def revive_osd(self, osd, timeout=150): + """ + Revive osds by either power cycling (if indicated by the config) + or by restarting. + """ + if self.config.get('powercycle'): + (remote,) = (self.ctx.cluster.only('osd.{o}'.format(o=osd)). + remotes.iterkeys()) + self.log('kill_osd on osd.{o} doing powercycle of {s}'. + format(o=osd, s=remote.name)) + assert remote.console is not None, ("powercycling requested " + "but RemoteConsole is not " + "initialized. " + "Check ipmi config.") + remote.console.power_on() + if not remote.console.check_status(300): + raise Exception('Failed to revive osd.{o} via ipmi'. + format(o=osd)) + teuthology.reconnect(self.ctx, 60, [remote]) + mount_osd_data(self.ctx, remote, str(osd)) + make_admin_daemon_dir(self.ctx, remote) + self.ctx.daemons.get_daemon('osd', osd).reset() + self.ctx.daemons.get_daemon('osd', osd).restart() + # wait for dump_ops_in_flight; this command doesn't appear + # until after the signal handler is installed and it is safe + # to stop the osd again without making valgrind leak checks + # unhappy. see #5924. + self.wait_run_admin_socket('osd', osd, + args=['dump_ops_in_flight'], + timeout=timeout) + + def mark_down_osd(self, osd): + """ + Cluster command wrapper + """ + self.raw_cluster_cmd('osd', 'down', str(osd)) + + def mark_in_osd(self, osd): + """ + Cluster command wrapper + """ + self.raw_cluster_cmd('osd', 'in', str(osd)) + + ## monitors + def signal_mon(self, mon, sig): + """ + Wrapper to local get_deamon call + """ + self.ctx.daemons.get_daemon('mon', mon).signal(sig) + + def kill_mon(self, mon): + """ + Kill the monitor by either power cycling (if the config says so), + or by doing a stop. + """ + if self.config.get('powercycle'): + (remote,) = (self.ctx.cluster.only('mon.{m}'.format(m=mon)). + remotes.iterkeys()) + self.log('kill_mon on mon.{m} doing powercycle of {s}'. + format(m=mon, s=remote.name)) + assert remote.console is not None, ("powercycling requested " + "but RemoteConsole is not " + "initialized. " + "Check ipmi config.") + + remote.console.power_off() + else: + self.ctx.daemons.get_daemon('mon', mon).stop() + + def revive_mon(self, mon): + """ + Restart by either power cycling (if the config says so), + or by doing a normal restart. + """ + if self.config.get('powercycle'): + (remote,) = (self.ctx.cluster.only('mon.{m}'.format(m=mon)). + remotes.iterkeys()) + self.log('revive_mon on mon.{m} doing powercycle of {s}'. + format(m=mon, s=remote.name)) + assert remote.console is not None, ("powercycling requested " + "but RemoteConsole is not " + "initialized. " + "Check ipmi config.") + + remote.console.power_on() + make_admin_daemon_dir(self.ctx, remote) + self.ctx.daemons.get_daemon('mon', mon).restart() + + def get_mon_status(self, mon): + """ + Extract all the monitor status information from the cluster + """ + addr = self.ctx.ceph.conf['mon.%s' % mon]['mon addr'] + out = self.raw_cluster_cmd('-m', addr, 'mon_status') + return json.loads(out) + + def get_mon_quorum(self): + """ + Extract monitor quorum information from the cluster + """ + out = self.raw_cluster_cmd('quorum_status') + j = json.loads(out) + self.log('quorum_status is %s' % out) + return j['quorum'] + + def wait_for_mon_quorum_size(self, size, timeout=300): + """ + Loop until quorum size is reached. + """ + self.log('waiting for quorum size %d' % size) + start = time.time() + while not len(self.get_mon_quorum()) == size: + if timeout is not None: + assert time.time() - start < timeout, \ + ('failed to reach quorum size %d ' + 'before timeout expired' % size) + time.sleep(3) + self.log("quorum is size %d" % size) + + def get_mon_health(self, debug=False): + """ + Extract all the monitor health information. + """ + out = self.raw_cluster_cmd('health', '--format=json') + if debug: + self.log('health:\n{h}'.format(h=out)) + return json.loads(out) + + ## metadata servers + + def kill_mds(self, mds): + """ + Powercyle if set in config, otherwise just stop. + """ + if self.config.get('powercycle'): + (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)). + remotes.iterkeys()) + self.log('kill_mds on mds.{m} doing powercycle of {s}'. + format(m=mds, s=remote.name)) + assert remote.console is not None, ("powercycling requested " + "but RemoteConsole is not " + "initialized. " + "Check ipmi config.") + remote.console.power_off() + else: + self.ctx.daemons.get_daemon('mds', mds).stop() + + def kill_mds_by_rank(self, rank): + """ + kill_mds wrapper to kill based on rank passed. + """ + status = self.get_mds_status_by_rank(rank) + self.kill_mds(status['name']) + + def revive_mds(self, mds, standby_for_rank=None): + """ + Revive mds -- do an ipmpi powercycle (if indicated by the config) + and then restart (using --hot-standby if specified. + """ + if self.config.get('powercycle'): + (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)). + remotes.iterkeys()) + self.log('revive_mds on mds.{m} doing powercycle of {s}'. + format(m=mds, s=remote.name)) + assert remote.console is not None, ("powercycling requested " + "but RemoteConsole is not " + "initialized. " + "Check ipmi config.") + remote.console.power_on() + make_admin_daemon_dir(self.ctx, remote) + args = [] + if standby_for_rank: + args.extend(['--hot-standby', standby_for_rank]) + self.ctx.daemons.get_daemon('mds', mds).restart(*args) + + def revive_mds_by_rank(self, rank, standby_for_rank=None): + """ + revive_mds wrapper to revive based on rank passed. + """ + status = self.get_mds_status_by_rank(rank) + self.revive_mds(status['name'], standby_for_rank) + + def get_mds_status(self, mds): + """ + Run cluster commands for the mds in order to get mds information + """ + out = self.raw_cluster_cmd('mds', 'dump', '--format=json') + j = json.loads(' '.join(out.splitlines()[1:])) + # collate; for dup ids, larger gid wins. + for info in j['info'].itervalues(): + if info['name'] == mds: + return info + return None + + def get_mds_status_by_rank(self, rank): + """ + Run cluster commands for the mds in order to get mds information + check rank. + """ + out = self.raw_cluster_cmd('mds', 'dump', '--format=json') + j = json.loads(' '.join(out.splitlines()[1:])) + # collate; for dup ids, larger gid wins. + for info in j['info'].itervalues(): + if info['rank'] == rank: + return info + return None + + def get_mds_status_all(self): + """ + Run cluster command to extract all the mds status. + """ + out = self.raw_cluster_cmd('mds', 'dump', '--format=json') + j = json.loads(' '.join(out.splitlines()[1:])) + return j + + def get_filepath(self): + """ + Return path to osd data with {id} needing to be replaced + """ + return "/var/lib/ceph/osd/ceph-{id}" + +def utility_task(name): + """ + Generate ceph_manager subtask corresponding to ceph_manager + method name + """ + def task(ctx, config): + if config is None: + config = {} + args = config.get('args', []) + kwargs = config.get('kwargs', {}) + fn = getattr(ctx.manager, name) + fn(*args, **kwargs) + return task + +revive_osd = utility_task("revive_osd") +kill_osd = utility_task("kill_osd") +create_pool = utility_task("create_pool") +remove_pool = utility_task("remove_pool") +wait_for_clean = utility_task("wait_for_clean") +set_pool_property = utility_task("set_pool_property") diff --git a/qa/tasks/ceph_objectstore_tool.py b/qa/tasks/ceph_objectstore_tool.py new file mode 100644 index 00000000000..3b899de33b8 --- /dev/null +++ b/qa/tasks/ceph_objectstore_tool.py @@ -0,0 +1,679 @@ +""" +ceph_objectstore_tool - Simple test of ceph-objectstore-tool utility +""" +from cStringIO import StringIO +import contextlib +import logging +import ceph_manager +from teuthology import misc as teuthology +import time +import os +import string +from teuthology.orchestra import run +import sys +import tempfile +import json +from util.rados import (rados, create_replicated_pool, create_ec_pool) +# from util.rados import (rados, create_ec_pool, +# create_replicated_pool, +# create_cache_pool) + +log = logging.getLogger(__name__) + +# Should get cluster name "ceph" from somewhere +# and normal path from osd_data and osd_journal in conf +FSPATH = "/var/lib/ceph/osd/ceph-{id}" +JPATH = "/var/lib/ceph/osd/ceph-{id}/journal" + + +def cod_setup_local_data(log, ctx, NUM_OBJECTS, DATADIR, + BASE_NAME, DATALINECOUNT): + objects = range(1, NUM_OBJECTS + 1) + for i in objects: + NAME = BASE_NAME + "{num}".format(num=i) + LOCALNAME = os.path.join(DATADIR, NAME) + + dataline = range(DATALINECOUNT) + fd = open(LOCALNAME, "w") + data = "This is the data for " + NAME + "\n" + for _ in dataline: + fd.write(data) + fd.close() + + +def cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, + BASE_NAME, DATALINECOUNT): + + objects = range(1, NUM_OBJECTS + 1) + for i in objects: + NAME = BASE_NAME + "{num}".format(num=i) + DDNAME = os.path.join(DATADIR, NAME) + + remote.run(args=['rm', '-f', DDNAME]) + + dataline = range(DATALINECOUNT) + data = "This is the data for " + NAME + "\n" + DATA = "" + for _ in dataline: + DATA += data + teuthology.write_file(remote, DDNAME, DATA) + + +def cod_setup(log, ctx, remote, NUM_OBJECTS, DATADIR, + BASE_NAME, DATALINECOUNT, POOL, db, ec): + ERRORS = 0 + log.info("Creating {objs} objects in pool".format(objs=NUM_OBJECTS)) + + objects = range(1, NUM_OBJECTS + 1) + for i in objects: + NAME = BASE_NAME + "{num}".format(num=i) + DDNAME = os.path.join(DATADIR, NAME) + + proc = rados(ctx, remote, ['-p', POOL, 'put', NAME, DDNAME], + wait=False) + # proc = remote.run(args=['rados', '-p', POOL, 'put', NAME, DDNAME]) + ret = proc.wait() + if ret != 0: + log.critical("Rados put failed with status {ret}". + format(ret=proc.exitstatus)) + sys.exit(1) + + db[NAME] = {} + + keys = range(i) + db[NAME]["xattr"] = {} + for k in keys: + if k == 0: + continue + mykey = "key{i}-{k}".format(i=i, k=k) + myval = "val{i}-{k}".format(i=i, k=k) + proc = remote.run(args=['rados', '-p', POOL, 'setxattr', + NAME, mykey, myval]) + ret = proc.wait() + if ret != 0: + log.error("setxattr failed with {ret}".format(ret=ret)) + ERRORS += 1 + db[NAME]["xattr"][mykey] = myval + + # Erasure coded pools don't support omap + if ec: + continue + + # Create omap header in all objects but REPobject1 + if i != 1: + myhdr = "hdr{i}".format(i=i) + proc = remote.run(args=['rados', '-p', POOL, 'setomapheader', + NAME, myhdr]) + ret = proc.wait() + if ret != 0: + log.critical("setomapheader failed with {ret}".format(ret=ret)) + ERRORS += 1 + db[NAME]["omapheader"] = myhdr + + db[NAME]["omap"] = {} + for k in keys: + if k == 0: + continue + mykey = "okey{i}-{k}".format(i=i, k=k) + myval = "oval{i}-{k}".format(i=i, k=k) + proc = remote.run(args=['rados', '-p', POOL, 'setomapval', + NAME, mykey, myval]) + ret = proc.wait() + if ret != 0: + log.critical("setomapval failed with {ret}".format(ret=ret)) + db[NAME]["omap"][mykey] = myval + + return ERRORS + + +def get_lines(filename): + tmpfd = open(filename, "r") + line = True + lines = [] + while line: + line = tmpfd.readline().rstrip('\n') + if line: + lines += [line] + tmpfd.close() + os.unlink(filename) + return lines + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run ceph_objectstore_tool test + + The config should be as follows:: + + ceph_objectstore_tool: + objects: 20 # + pgnum: 12 + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'ceph_objectstore_tool task only accepts a dict for configuration' + + log.info('Beginning ceph_objectstore_tool...') + + log.debug(config) + log.debug(ctx) + clients = ctx.cluster.only(teuthology.is_type('client')) + assert len(clients.remotes) > 0, 'Must specify at least 1 client' + (cli_remote, _) = clients.remotes.popitem() + log.debug(cli_remote) + + # clients = dict(teuthology.get_clients(ctx=ctx, roles=config.keys())) + # client = clients.popitem() + # log.info(client) + osds = ctx.cluster.only(teuthology.is_type('osd')) + log.info("OSDS") + log.info(osds) + log.info(osds.remotes) + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + config=config, + logger=log.getChild('ceph_manager'), + ) + ctx.manager = manager + + while (len(manager.get_osd_status()['up']) != + len(manager.get_osd_status()['raw'])): + time.sleep(10) + while (len(manager.get_osd_status()['in']) != + len(manager.get_osd_status()['up'])): + time.sleep(10) + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'nodown') + + PGNUM = config.get('pgnum', 12) + log.info("pgnum: {num}".format(num=PGNUM)) + + ERRORS = 0 + + REP_POOL = "rep_pool" + REP_NAME = "REPobject" + create_replicated_pool(cli_remote, REP_POOL, PGNUM) + ERRORS += test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME) + + EC_POOL = "ec_pool" + EC_NAME = "ECobject" + create_ec_pool(cli_remote, EC_POOL, 'default', PGNUM) + ERRORS += test_objectstore(ctx, config, cli_remote, + EC_POOL, EC_NAME, ec=True) + + if ERRORS == 0: + log.info("TEST PASSED") + else: + log.error("TEST FAILED WITH {errcount} ERRORS".format(errcount=ERRORS)) + + assert ERRORS == 0 + + try: + yield + finally: + log.info('Ending ceph_objectstore_tool') + + +def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False): + manager = ctx.manager + + osds = ctx.cluster.only(teuthology.is_type('osd')) + + TEUTHDIR = teuthology.get_testdir(ctx) + DATADIR = os.path.join(TEUTHDIR, "data") + DATALINECOUNT = 10000 + ERRORS = 0 + NUM_OBJECTS = config.get('objects', 10) + log.info("objects: {num}".format(num=NUM_OBJECTS)) + + pool_dump = manager.get_pool_dump(REP_POOL) + REPID = pool_dump['pool'] + + log.debug("repid={num}".format(num=REPID)) + + db = {} + + LOCALDIR = tempfile.mkdtemp("cod") + + cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR, + REP_NAME, DATALINECOUNT) + allremote = [] + allremote.append(cli_remote) + allremote += osds.remotes.keys() + allremote = list(set(allremote)) + for remote in allremote: + cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, + REP_NAME, DATALINECOUNT) + + ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR, + REP_NAME, DATALINECOUNT, REP_POOL, db, ec) + + pgs = {} + for stats in manager.get_pg_stats(): + if stats["pgid"].find(str(REPID) + ".") != 0: + continue + if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL: + for osd in stats["acting"]: + pgs.setdefault(osd, []).append(stats["pgid"]) + elif pool_dump["type"] == ceph_manager.CephManager.ERASURE_CODED_POOL: + shard = 0 + for osd in stats["acting"]: + pgs.setdefault(osd, []).append("{pgid}s{shard}". + format(pgid=stats["pgid"], + shard=shard)) + shard += 1 + else: + raise Exception("{pool} has an unexpected type {type}". + format(pool=REP_POOL, type=pool_dump["type"])) + + log.info(pgs) + log.info(db) + + for osd in manager.get_osd_status()['up']: + manager.kill_osd(osd) + time.sleep(5) + + pgswithobjects = set() + objsinpg = {} + + # Test --op list and generate json for all objects + log.info("Test --op list by generating json for all objects") + prefix = ("sudo ceph-objectstore-tool " + "--data-path {fpath} " + "--journal-path {jpath} ").format(fpath=FSPATH, jpath=JPATH) + for remote in osds.remotes.iterkeys(): + log.debug(remote) + log.debug(osds.remotes[remote]) + for role in osds.remotes[remote]: + if string.find(role, "osd.") != 0: + continue + osdid = int(role.split('.')[1]) + log.info("process osd.{id} on {remote}". + format(id=osdid, remote=remote)) + cmd = (prefix + "--op list").format(id=osdid) + proc = remote.run(args=cmd.split(), check_status=False, + stdout=StringIO()) + if proc.exitstatus != 0: + log.error("Bad exit status {ret} from --op list request". + format(ret=proc.exitstatus)) + ERRORS += 1 + else: + for pgline in proc.stdout.getvalue().splitlines(): + if not pgline: + continue + (pg, obj) = json.loads(pgline) + name = obj['oid'] + if name in db: + pgswithobjects.add(pg) + objsinpg.setdefault(pg, []).append(name) + db[name].setdefault("pg2json", + {})[pg] = json.dumps(obj) + + log.info(db) + log.info(pgswithobjects) + log.info(objsinpg) + + if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL: + # Test get-bytes + log.info("Test get-bytes and set-bytes") + for basename in db.keys(): + file = os.path.join(DATADIR, basename) + GETNAME = os.path.join(DATADIR, "get") + SETNAME = os.path.join(DATADIR, "set") + + for remote in osds.remotes.iterkeys(): + for role in osds.remotes[remote]: + if string.find(role, "osd.") != 0: + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg, JSON in db[basename]["pg2json"].iteritems(): + if pg in pgs[osdid]: + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ("get-bytes {fname}". + format(fname=GETNAME).split()) + proc = remote.run(args=cmd, check_status=False) + if proc.exitstatus != 0: + remote.run(args="rm -f {getfile}". + format(getfile=GETNAME).split()) + log.error("Bad exit status {ret}". + format(ret=proc.exitstatus)) + ERRORS += 1 + continue + cmd = ("diff -q {file} {getfile}". + format(file=file, getfile=GETNAME)) + proc = remote.run(args=cmd.split()) + if proc.exitstatus != 0: + log.error("Data from get-bytes differ") + # log.debug("Got:") + # cat_file(logging.DEBUG, GETNAME) + # log.debug("Expected:") + # cat_file(logging.DEBUG, file) + ERRORS += 1 + remote.run(args="rm -f {getfile}". + format(getfile=GETNAME).split()) + + data = ("put-bytes going into {file}\n". + format(file=file)) + teuthology.write_file(remote, SETNAME, data) + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ("set-bytes {fname}". + format(fname=SETNAME).split()) + proc = remote.run(args=cmd, check_status=False) + proc.wait() + if proc.exitstatus != 0: + log.info("set-bytes failed for object {obj} " + "in pg {pg} osd.{id} ret={ret}". + format(obj=basename, pg=pg, + id=osdid, ret=proc.exitstatus)) + ERRORS += 1 + + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += "get-bytes -".split() + proc = remote.run(args=cmd, check_status=False, + stdout=StringIO()) + proc.wait() + if proc.exitstatus != 0: + log.error("get-bytes after " + "set-bytes ret={ret}". + format(ret=proc.exitstatus)) + ERRORS += 1 + else: + if data != proc.stdout.getvalue(): + log.error("Data inconsistent after " + "set-bytes, got:") + log.error(proc.stdout.getvalue()) + ERRORS += 1 + + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ("set-bytes {fname}". + format(fname=file).split()) + proc = remote.run(args=cmd, check_status=False) + proc.wait() + if proc.exitstatus != 0: + log.info("set-bytes failed for object {obj} " + "in pg {pg} osd.{id} ret={ret}". + format(obj=basename, pg=pg, + id=osdid, ret=proc.exitstatus)) + ERRORS += 1 + + log.info("Test list-attrs get-attr") + for basename in db.keys(): + file = os.path.join(DATADIR, basename) + GETNAME = os.path.join(DATADIR, "get") + SETNAME = os.path.join(DATADIR, "set") + + for remote in osds.remotes.iterkeys(): + for role in osds.remotes[remote]: + if string.find(role, "osd.") != 0: + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg, JSON in db[basename]["pg2json"].iteritems(): + if pg in pgs[osdid]: + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ["list-attrs"] + proc = remote.run(args=cmd, check_status=False, + stdout=StringIO(), stderr=StringIO()) + proc.wait() + if proc.exitstatus != 0: + log.error("Bad exit status {ret}". + format(ret=proc.exitstatus)) + ERRORS += 1 + continue + keys = proc.stdout.getvalue().split() + values = dict(db[basename]["xattr"]) + + for key in keys: + if (key == "_" or + key == "snapset" or + key == "hinfo_key"): + continue + key = key.strip("_") + if key not in values: + log.error("The key {key} should be present". + format(key=key)) + ERRORS += 1 + continue + exp = values.pop(key) + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ("get-attr {key}". + format(key="_" + key).split()) + proc = remote.run(args=cmd, check_status=False, + stdout=StringIO()) + proc.wait() + if proc.exitstatus != 0: + log.error("get-attr failed with {ret}". + format(ret=proc.exitstatus)) + ERRORS += 1 + continue + val = proc.stdout.getvalue() + if exp != val: + log.error("For key {key} got value {got} " + "instead of {expected}". + format(key=key, got=val, + expected=exp)) + ERRORS += 1 + if "hinfo_key" in keys: + cmd_prefix = prefix.format(id=osdid) + cmd = """ + expected=$({prefix} --pgid {pg} '{json}' get-attr {key} | base64) + echo placeholder | {prefix} --pgid {pg} '{json}' set-attr {key} - + test $({prefix} --pgid {pg} '{json}' get-attr {key}) = placeholder + echo $expected | base64 --decode | \ + {prefix} --pgid {pg} '{json}' set-attr {key} - + test $({prefix} --pgid {pg} '{json}' get-attr {key} | base64) = $expected + """.format(prefix=cmd_prefix, pg=pg, json=JSON, + key="hinfo_key") + log.debug(cmd) + proc = remote.run(args=['bash', '-e', '-x', + '-c', cmd], + check_status=False, + stdout=StringIO(), + stderr=StringIO()) + proc.wait() + if proc.exitstatus != 0: + log.error("failed with " + + str(proc.exitstatus)) + log.error(proc.stdout.getvalue() + " " + + proc.stderr.getvalue()) + ERRORS += 1 + + if len(values) != 0: + log.error("Not all keys found, remaining keys:") + log.error(values) + + log.info("Test pg info") + for remote in osds.remotes.iterkeys(): + for role in osds.remotes[remote]: + if string.find(role, "osd.") != 0: + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + cmd = ((prefix + "--op info --pgid {pg}"). + format(id=osdid, pg=pg).split()) + proc = remote.run(args=cmd, check_status=False, + stdout=StringIO()) + proc.wait() + if proc.exitstatus != 0: + log.error("Failure of --op info command with {ret}". + format(proc.exitstatus)) + ERRORS += 1 + continue + info = proc.stdout.getvalue() + if not str(pg) in info: + log.error("Bad data from info: {info}".format(info=info)) + ERRORS += 1 + + log.info("Test pg logging") + for remote in osds.remotes.iterkeys(): + for role in osds.remotes[remote]: + if string.find(role, "osd.") != 0: + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + cmd = ((prefix + "--op log --pgid {pg}"). + format(id=osdid, pg=pg).split()) + proc = remote.run(args=cmd, check_status=False, + stdout=StringIO()) + proc.wait() + if proc.exitstatus != 0: + log.error("Getting log failed for pg {pg} " + "from osd.{id} with {ret}". + format(pg=pg, id=osdid, ret=proc.exitstatus)) + ERRORS += 1 + continue + HASOBJ = pg in pgswithobjects + MODOBJ = "modify" in proc.stdout.getvalue() + if HASOBJ != MODOBJ: + log.error("Bad log for pg {pg} from osd.{id}". + format(pg=pg, id=osdid)) + MSG = (HASOBJ and [""] or ["NOT "])[0] + log.error("Log should {msg}have a modify entry". + format(msg=MSG)) + ERRORS += 1 + + log.info("Test pg export") + EXP_ERRORS = 0 + for remote in osds.remotes.iterkeys(): + for role in osds.remotes[remote]: + if string.find(role, "osd.") != 0: + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + fpath = os.path.join(DATADIR, "osd{id}.{pg}". + format(id=osdid, pg=pg)) + + cmd = ((prefix + "--op export --pgid {pg} --file {file}"). + format(id=osdid, pg=pg, file=fpath)) + proc = remote.run(args=cmd, check_status=False, + stdout=StringIO()) + proc.wait() + if proc.exitstatus != 0: + log.error("Exporting failed for pg {pg} " + "on osd.{id} with {ret}". + format(pg=pg, id=osdid, ret=proc.exitstatus)) + EXP_ERRORS += 1 + + ERRORS += EXP_ERRORS + + log.info("Test pg removal") + RM_ERRORS = 0 + for remote in osds.remotes.iterkeys(): + for role in osds.remotes[remote]: + if string.find(role, "osd.") != 0: + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + cmd = ((prefix + "--op remove --pgid {pg}"). + format(pg=pg, id=osdid)) + proc = remote.run(args=cmd, check_status=False, + stdout=StringIO()) + proc.wait() + if proc.exitstatus != 0: + log.error("Removing failed for pg {pg} " + "on osd.{id} with {ret}". + format(pg=pg, id=osdid, ret=proc.exitstatus)) + RM_ERRORS += 1 + + ERRORS += RM_ERRORS + + IMP_ERRORS = 0 + if EXP_ERRORS == 0 and RM_ERRORS == 0: + log.info("Test pg import") + + for remote in osds.remotes.iterkeys(): + for role in osds.remotes[remote]: + if string.find(role, "osd.") != 0: + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + fpath = os.path.join(DATADIR, "osd{id}.{pg}". + format(id=osdid, pg=pg)) + + cmd = ((prefix + "--op import --file {file}"). + format(id=osdid, file=fpath)) + proc = remote.run(args=cmd, check_status=False, + stdout=StringIO()) + proc.wait() + if proc.exitstatus != 0: + log.error("Import failed from {file} with {ret}". + format(file=fpath, ret=proc.exitstatus)) + IMP_ERRORS += 1 + else: + log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES") + + ERRORS += IMP_ERRORS + + if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: + log.info("Restarting OSDs....") + # They are still look to be up because of setting nodown + for osd in manager.get_osd_status()['up']: + manager.revive_osd(osd) + # Wait for health? + time.sleep(5) + # Let scrub after test runs verify consistency of all copies + log.info("Verify replicated import data") + objects = range(1, NUM_OBJECTS + 1) + for i in objects: + NAME = REP_NAME + "{num}".format(num=i) + TESTNAME = os.path.join(DATADIR, "gettest") + REFNAME = os.path.join(DATADIR, NAME) + + proc = rados(ctx, cli_remote, + ['-p', REP_POOL, 'get', NAME, TESTNAME], wait=False) + + ret = proc.wait() + if ret != 0: + log.error("After import, rados get failed with {ret}". + format(ret=proc.exitstatus)) + ERRORS += 1 + continue + + cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME, + ref=REFNAME) + proc = cli_remote.run(args=cmd, check_status=False) + proc.wait() + if proc.exitstatus != 0: + log.error("Data comparison failed for {obj}".format(obj=NAME)) + ERRORS += 1 + + return ERRORS diff --git a/qa/tasks/cephfs/__init__.py b/qa/tasks/cephfs/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py new file mode 100644 index 00000000000..8700bc51c54 --- /dev/null +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -0,0 +1,223 @@ +import logging +import unittest +import time +from teuthology.task import interactive + + +log = logging.getLogger(__name__) + + +class CephFSTestCase(unittest.TestCase): + """ + Test case for Ceph FS, requires caller to populate Filesystem and Mounts, + into the fs, mount_a, mount_b class attributes (setting mount_b is optional) + + Handles resetting the cluster under test between tests. + """ + # Environment references + mount_a = None + mount_b = None + fs = None + + def setUp(self): + self.fs.clear_firewall() + + # Unmount in order to start each test on a fresh mount, such + # that test_barrier can have a firm expectation of what OSD + # epoch the clients start with. + if self.mount_a.is_mounted(): + self.mount_a.umount_wait() + + if self.mount_b: + if self.mount_b.is_mounted(): + self.mount_b.umount_wait() + + # To avoid any issues with e.g. unlink bugs, we destroy and recreate + # the filesystem rather than just doing a rm -rf of files + self.fs.mds_stop() + self.fs.mds_fail() + self.fs.delete() + self.fs.create() + + # In case the previous filesystem had filled up the RADOS cluster, wait for that + # flag to pass. + osd_mon_report_interval_max = int(self.fs.get_config("osd_mon_report_interval_max", service_type='osd')) + self.wait_until_true(lambda: not self.fs.is_full(), + timeout=osd_mon_report_interval_max * 5) + + self.fs.mds_restart() + self.fs.wait_for_daemons() + if not self.mount_a.is_mounted(): + self.mount_a.mount() + self.mount_a.wait_until_mounted() + + if self.mount_b: + if not self.mount_b.is_mounted(): + self.mount_b.mount() + self.mount_b.wait_until_mounted() + + self.configs_set = set() + + def tearDown(self): + self.fs.clear_firewall() + self.mount_a.teardown() + if self.mount_b: + self.mount_b.teardown() + + for subsys, key in self.configs_set: + self.fs.clear_ceph_conf(subsys, key) + + def set_conf(self, subsys, key, value): + self.configs_set.add((subsys, key)) + self.fs.set_ceph_conf(subsys, key, value) + + def assert_session_count(self, expected, ls_data=None): + if ls_data is None: + ls_data = self.fs.mds_asok(['session', 'ls']) + + self.assertEqual(expected, len(ls_data), "Expected {0} sessions, found {1}".format( + expected, len(ls_data) + )) + + def assert_session_state(self, client_id, expected_state): + self.assertEqual( + self._session_by_id( + self.fs.mds_asok(['session', 'ls'])).get(client_id, {'state': None})['state'], + expected_state) + + def get_session_data(self, client_id): + return self._session_by_id(client_id) + + def _session_list(self): + ls_data = self.fs.mds_asok(['session', 'ls']) + ls_data = [s for s in ls_data if s['state'] not in ['stale', 'closed']] + return ls_data + + def get_session(self, client_id, session_ls=None): + if session_ls is None: + session_ls = self.fs.mds_asok(['session', 'ls']) + + return self._session_by_id(session_ls)[client_id] + + def _session_by_id(self, session_ls): + return dict([(s['id'], s) for s in session_ls]) + + def wait_until_equal(self, get_fn, expect_val, timeout, reject_fn=None): + period = 5 + elapsed = 0 + while True: + val = get_fn() + if val == expect_val: + return + elif reject_fn and reject_fn(val): + raise RuntimeError("wait_until_equal: forbidden value {0} seen".format(val)) + else: + if elapsed >= timeout: + raise RuntimeError("Timed out after {0} seconds waiting for {1} (currently {2})".format( + elapsed, expect_val, val + )) + else: + log.debug("wait_until_equal: {0} != {1}, waiting...".format(val, expect_val)) + time.sleep(period) + elapsed += period + + log.debug("wait_until_equal: success") + + def wait_until_true(self, condition, timeout): + period = 5 + elapsed = 0 + while True: + if condition(): + return + else: + if elapsed >= timeout: + raise RuntimeError("Timed out after {0} seconds".format(elapsed)) + else: + log.debug("wait_until_true: waiting...") + time.sleep(period) + elapsed += period + + log.debug("wait_until_true: success") + + +class LogStream(object): + def __init__(self): + self.buffer = "" + + def write(self, data): + self.buffer += data + if "\n" in self.buffer: + lines = self.buffer.split("\n") + for line in lines[:-1]: + log.info(line) + self.buffer = lines[-1] + + def flush(self): + pass + + +class InteractiveFailureResult(unittest.TextTestResult): + """ + Specialization that implements interactive-on-error style + behavior. + """ + ctx = None + + def addFailure(self, test, err): + log.error(self._exc_info_to_string(err, test)) + log.error("Failure in test '{0}', going interactive".format( + self.getDescription(test) + )) + interactive.task(ctx=self.ctx, config=None) + + def addError(self, test, err): + log.error(self._exc_info_to_string(err, test)) + log.error("Error in test '{0}', going interactive".format( + self.getDescription(test) + )) + interactive.task(ctx=self.ctx, config=None) + + +def run_tests(ctx, config, test_klass, params): + for k, v in params.items(): + setattr(test_klass, k, v) + + # Execute test suite + # ================== + if config and 'test_name' in config: + # Test names like TestCase.this_test + suite = unittest.TestLoader().loadTestsFromName( + "{0}.{1}".format(test_klass.__module__, config['test_name'])) + else: + suite = unittest.TestLoader().loadTestsFromTestCase(test_klass) + + if ctx.config.get("interactive-on-error", False): + InteractiveFailureResult.ctx = ctx + result_class = InteractiveFailureResult + else: + result_class = unittest.TextTestResult + + # Unmount all clients not involved + for mount in ctx.mounts.values(): + if mount is not params.get('mount_a') and mount is not params.get('mount_b'): + if mount.is_mounted(): + log.info("Unmounting unneeded client {0}".format(mount.client_id)) + mount.umount_wait() + + # Execute! + result = unittest.TextTestRunner( + stream=LogStream(), + resultclass=result_class, + verbosity=2, + failfast=True).run(suite) + + if not result.wasSuccessful(): + result.printErrors() # duplicate output at end for convenience + + bad_tests = [] + for test, error in result.errors: + bad_tests.append(str(test)) + for test, failure in result.failures: + bad_tests.append(str(test)) + + raise RuntimeError("Test failure: {0}".format(", ".join(bad_tests))) diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py new file mode 100644 index 00000000000..5023c143823 --- /dev/null +++ b/qa/tasks/cephfs/filesystem.py @@ -0,0 +1,571 @@ + +from StringIO import StringIO +import json +import logging +import time +import datetime +import re + +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra import run +from teuthology import misc +from teuthology.nuke import clear_firewall +from teuthology.parallel import parallel +from tasks.ceph_manager import write_conf +from tasks import ceph_manager + + +log = logging.getLogger(__name__) + + +DAEMON_WAIT_TIMEOUT = 120 +ROOT_INO = 1 + + +class ObjectNotFound(Exception): + def __init__(self, object_name): + self._object_name = object_name + + def __str__(self): + return "Object not found: '{0}'".format(self._object_name) + + +class Filesystem(object): + """ + This object is for driving a CephFS filesystem. + + Limitations: + * Assume a single filesystem+cluster + * Assume a single MDS + """ + def __init__(self, ctx, admin_remote=None): + self._ctx = ctx + + self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds')) + if len(self.mds_ids) == 0: + raise RuntimeError("This task requires at least one MDS") + + first_mon = misc.get_first_mon(ctx, None) + if admin_remote is None: + (self.admin_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() + else: + self.admin_remote = admin_remote + self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager')) + if hasattr(self._ctx, "daemons"): + # Presence of 'daemons' attribute implies ceph task rather than ceph_deploy task + self.mds_daemons = dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids]) + + client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client')) + self.client_id = client_list[0] + self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1] + + def create(self): + pg_warn_min_per_osd = int(self.get_config('mon_pg_warn_min_per_osd')) + osd_count = len(list(misc.all_roles_of_type(self._ctx.cluster, 'osd'))) + pgs_per_fs_pool = pg_warn_min_per_osd * osd_count + + self.admin_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'metadata', pgs_per_fs_pool.__str__()]) + self.admin_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'data', pgs_per_fs_pool.__str__()]) + self.admin_remote.run(args=['sudo', 'ceph', 'fs', 'new', 'default', 'metadata', 'data']) + + def delete(self): + self.admin_remote.run(args=['sudo', 'ceph', 'fs', 'rm', 'default', '--yes-i-really-mean-it']) + self.admin_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'delete', + 'metadata', 'metadata', '--yes-i-really-really-mean-it']) + self.admin_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'delete', + 'data', 'data', '--yes-i-really-really-mean-it']) + + def legacy_configured(self): + """ + Check if a legacy (i.e. pre "fs new") filesystem configuration is present. If this is + the case, the caller should avoid using Filesystem.create + """ + try: + proc = self.admin_remote.run(args=['sudo', 'ceph', '--format=json-pretty', 'osd', 'lspools'], + stdout=StringIO()) + pools = json.loads(proc.stdout.getvalue()) + metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools] + except CommandFailedError as e: + # For use in upgrade tests, Ceph cuttlefish and earlier don't support + # structured output (--format) from the CLI. + if e.exitstatus == 22: + metadata_pool_exists = True + else: + raise + + return metadata_pool_exists + + def _df(self): + return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")) + + def _fs_ls(self): + fs_list = json.loads(self.mon_manager.raw_cluster_cmd("fs", "ls", "--format=json-pretty")) + assert len(fs_list) == 1 # we don't handle multiple filesystems yet + return fs_list[0] + + def get_data_pool_name(self): + """ + Return the name of the data pool if there is only one, else raise exception -- call + this in tests where there will only be one data pool. + """ + names = self.get_data_pool_names() + if len(names) > 1: + raise RuntimeError("Multiple data pools found") + else: + return names[0] + + def get_data_pool_names(self): + return self._fs_ls()['data_pools'] + + def get_metadata_pool_name(self): + return self._fs_ls()['metadata_pool'] + + def get_pool_df(self, pool_name): + """ + Return a dict like: + {u'bytes_used': 0, u'max_avail': 83848701, u'objects': 0, u'kb_used': 0} + """ + for pool_df in self._df()['pools']: + if pool_df['name'] == pool_name: + return pool_df['stats'] + + raise RuntimeError("Pool name '{0}' not found".format(pool_name)) + + def get_usage(self): + return self._df()['stats']['total_used_bytes'] + + def get_mds_hostnames(self): + result = set() + for mds_id in self.mds_ids: + mds_remote = self.mon_manager.find_remote('mds', mds_id) + result.add(mds_remote.hostname) + + return list(result) + + def get_config(self, key, service_type=None): + """ + Get config from mon by default, or a specific service if caller asks for it + """ + if service_type is None: + service_type = 'mon' + + service_id = sorted(misc.all_roles_of_type(self._ctx.cluster, service_type))[0] + return self.json_asok(['config', 'get', key], service_type, service_id)[key] + + def set_ceph_conf(self, subsys, key, value): + if subsys not in self._ctx.ceph.conf: + self._ctx.ceph.conf[subsys] = {} + self._ctx.ceph.conf[subsys][key] = value + write_conf(self._ctx) # XXX because we don't have the ceph task's config object, if they + # used a different config path this won't work. + + def clear_ceph_conf(self, subsys, key): + del self._ctx.ceph.conf[subsys][key] + write_conf(self._ctx) + + def are_daemons_healthy(self): + """ + Return true if all daemons are in one of active, standby, standby-replay + :return: + """ + status = self.mon_manager.get_mds_status_all() + for mds_id, mds_status in status['info'].items(): + if mds_status['state'] not in ["up:active", "up:standby", "up:standby-replay"]: + log.warning("Unhealthy mds state {0}:{1}".format(mds_id, mds_status['state'])) + return False + + return True + + def get_active_names(self): + """ + Return MDS daemon names of those daemons holding ranks + in state up:active + + :return: list of strings like ['a', 'b'], sorted by rank + """ + status = self.mon_manager.get_mds_status_all() + result = [] + for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): + if mds_status['state'] == 'up:active': + result.append(mds_status['name']) + + return result + + def wait_for_daemons(self, timeout=None): + """ + Wait until all daemons are healthy + :return: + """ + + if timeout is None: + timeout = DAEMON_WAIT_TIMEOUT + + elapsed = 0 + while True: + if self.are_daemons_healthy(): + return + else: + time.sleep(1) + elapsed += 1 + + if elapsed > timeout: + raise RuntimeError("Timed out waiting for MDS daemons to become healthy") + + def get_lone_mds_id(self): + if len(self.mds_ids) != 1: + raise ValueError("Explicit MDS argument required when multiple MDSs in use") + else: + return self.mds_ids[0] + + def _one_or_all(self, mds_id, cb): + """ + Call a callback for a single named MDS, or for all + + :param mds_id: MDS daemon name, or None + :param cb: Callback taking single argument of MDS daemon name + """ + if mds_id is None: + with parallel() as p: + for mds_id in self.mds_ids: + p.spawn(cb, mds_id) + else: + cb(mds_id) + + def mds_stop(self, mds_id=None): + """ + Stop the MDS daemon process(se). If it held a rank, that rank + will eventually go laggy. + """ + self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].stop()) + + def mds_fail(self, mds_id=None): + """ + Inform MDSMonitor of the death of the daemon process(es). If it held + a rank, that rank will be relinquished. + """ + self._one_or_all(mds_id, lambda id_: self.mon_manager.raw_cluster_cmd("mds", "fail", id_)) + + def mds_restart(self, mds_id=None): + self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].restart()) + + def mds_fail_restart(self, mds_id=None): + """ + Variation on restart that includes marking MDSs as failed, so that doing this + operation followed by waiting for healthy daemon states guarantees that they + have gone down and come up, rather than potentially seeing the healthy states + that existed before the restart. + """ + def _fail_restart(id_): + self.mds_daemons[id_].stop() + self.mon_manager.raw_cluster_cmd("mds", "fail", id_) + self.mds_daemons[id_].restart() + + self._one_or_all(mds_id, _fail_restart) + + def reset(self): + log.info("Creating new filesystem") + + self.mon_manager.raw_cluster_cmd_result('mds', 'set', "max_mds", "0") + for mds_id in self.mds_ids: + assert not self._ctx.daemons.get_daemon('mds', mds_id).running() + self.mon_manager.raw_cluster_cmd_result('mds', 'fail', mds_id) + self.mon_manager.raw_cluster_cmd_result('fs', 'rm', "default", "--yes-i-really-mean-it") + self.mon_manager.raw_cluster_cmd_result('fs', 'new', "default", "metadata", "data") + + def get_metadata_object(self, object_type, object_id): + """ + Retrieve an object from the metadata pool, pass it through + ceph-dencoder to dump it to JSON, and return the decoded object. + """ + temp_bin_path = '/tmp/out.bin' + + # FIXME get the metadata pool name from mdsmap instead of hardcoding + self.client_remote.run(args=[ + 'sudo', 'rados', '-p', 'metadata', 'get', object_id, temp_bin_path + ]) + + stdout = StringIO() + self.client_remote.run(args=[ + 'sudo', 'ceph-dencoder', 'type', object_type, 'import', temp_bin_path, 'decode', 'dump_json' + ], stdout=stdout) + dump_json = stdout.getvalue().strip() + try: + dump = json.loads(dump_json) + except (TypeError, ValueError): + log.error("Failed to decode JSON: '{0}'".format(dump_json)) + raise + + return dump + + def get_journal_version(self): + """ + Read the JournalPointer and Journal::Header objects to learn the version of + encoding in use. + """ + journal_pointer_object = '400.00000000' + journal_pointer_dump = self.get_metadata_object("JournalPointer", journal_pointer_object) + journal_ino = journal_pointer_dump['journal_pointer']['front'] + + journal_header_object = "{0:x}.00000000".format(journal_ino) + journal_header_dump = self.get_metadata_object('Journaler::Header', journal_header_object) + + version = journal_header_dump['journal_header']['stream_format'] + log.info("Read journal version {0}".format(version)) + + return version + + def json_asok(self, command, service_type, service_id): + proc = self.mon_manager.admin_socket(service_type, service_id, command) + response_data = proc.stdout.getvalue() + log.info("_json_asok output: {0}".format(response_data)) + if response_data.strip(): + return json.loads(response_data) + else: + return None + + def mds_asok(self, command, mds_id=None): + if mds_id is None: + mds_id = self.get_lone_mds_id() + + return self.json_asok(command, 'mds', mds_id) + + def get_mds_map(self): + """ + Return the MDS map, as a JSON-esque dict from 'mds dump' + """ + return json.loads(self.mon_manager.raw_cluster_cmd('mds', 'dump', '--format=json-pretty')) + + def get_mds_addr(self, mds_id): + """ + Return the instance addr as a string, like "10.214.133.138:6807\/10825" + """ + mds_map = self.get_mds_map() + for gid_string, mds_info in mds_map['info'].items(): + # For some reason + if mds_info['name'] == mds_id: + return mds_info['addr'] + + log.warn(json.dumps(mds_map, indent=2)) # dump map for debugging + raise RuntimeError("MDS id '{0}' not found in MDS map".format(mds_id)) + + def set_clients_block(self, blocked, mds_id=None): + """ + Block (using iptables) client communications to this MDS. Be careful: if + other services are running on this MDS, or other MDSs try to talk to this + MDS, their communications may also be blocked as collatoral damage. + + :param mds_id: Optional ID of MDS to block, default to all + :return: + """ + da_flag = "-A" if blocked else "-D" + + def set_block(_mds_id): + remote = self.mon_manager.find_remote('mds', _mds_id) + + addr = self.get_mds_addr(_mds_id) + ip_str, port_str, inst_str = re.match("(.+):(.+)/(.+)", addr).groups() + + remote.run( + args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m", + "comment", "--comment", "teuthology"]) + remote.run( + args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m", + "comment", "--comment", "teuthology"]) + + self._one_or_all(mds_id, set_block) + + def clear_firewall(self): + clear_firewall(self._ctx) + + def is_full(self): + flags = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['flags'] + return 'full' in flags + + def wait_for_state(self, goal_state, reject=None, timeout=None, mds_id=None): + """ + Block until the MDS reaches a particular state, or a failure condition + is met. + + :param goal_state: Return once the MDS is in this state + :param reject: Fail if the MDS enters this state before the goal state + :param timeout: Fail if this many seconds pass before reaching goal + :return: number of seconds waited, rounded down to integer + """ + + if mds_id is None: + mds_id = self.get_lone_mds_id() + + elapsed = 0 + while True: + # mds_info is None if no daemon currently claims this rank + mds_info = self.mon_manager.get_mds_status(mds_id) + current_state = mds_info['state'] if mds_info else None + + if current_state == goal_state: + log.info("reached state '{0}' in {1}s".format(current_state, elapsed)) + return elapsed + elif reject is not None and current_state == reject: + raise RuntimeError("MDS in reject state {0}".format(current_state)) + elif timeout is not None and elapsed > timeout: + raise RuntimeError( + "Reached timeout after {0} seconds waiting for state {1}, while in state {2}".format( + elapsed, goal_state, current_state + )) + else: + time.sleep(1) + elapsed += 1 + + def read_backtrace(self, ino_no): + """ + Read the backtrace from the data pool, return a dict in the format + given by inode_backtrace_t::dump, which is something like: + + :: + + rados -p cephfs_data getxattr 10000000002.00000000 parent > out.bin + ceph-dencoder type inode_backtrace_t import out.bin decode dump_json + + { "ino": 1099511627778, + "ancestors": [ + { "dirino": 1, + "dname": "blah", + "version": 11}], + "pool": 1, + "old_pools": []} + + """ + mds_id = self.mds_ids[0] + remote = self.mds_daemons[mds_id].remote + + obj_name = "{0:x}.00000000".format(ino_no) + + temp_file = "/tmp/{0}_{1}".format(obj_name, datetime.datetime.now().isoformat()) + + args = [ + "rados", "-p", self.get_data_pool_name(), "getxattr", obj_name, "parent", + run.Raw(">"), temp_file + ] + try: + remote.run( + args=args, + stdout=StringIO()) + except CommandFailedError as e: + log.error(e.__str__()) + raise ObjectNotFound(obj_name) + + p = remote.run( + args=["ceph-dencoder", "type", "inode_backtrace_t", "import", temp_file, "decode", "dump_json"], + stdout=StringIO() + ) + + return json.loads(p.stdout.getvalue().strip()) + + def rados(self, args, pool=None): + """ + Call into the `rados` CLI from an MDS + """ + + if pool is None: + pool = self.get_metadata_pool_name() + + # Doesn't matter which MDS we use to run rados commands, they all + # have access to the pools + mds_id = self.mds_ids[0] + remote = self.mds_daemons[mds_id].remote + + # NB we could alternatively use librados pybindings for this, but it's a one-liner + # using the `rados` CLI + args = ["rados", "-p", pool] + args + p = remote.run( + args=args, + stdout=StringIO()) + return p.stdout.getvalue().strip() + + def list_dirfrag(self, dir_ino): + """ + Read the named object and return the list of omap keys + + :return a list of 0 or more strings + """ + + dirfrag_obj_name = "{0:x}.00000000".format(dir_ino) + + try: + key_list_str = self.rados(["listomapkeys", dirfrag_obj_name]) + except CommandFailedError as e: + log.error(e.__str__()) + raise ObjectNotFound(dirfrag_obj_name) + + return key_list_str.split("\n") if key_list_str else [] + + def erase_metadata_objects(self, prefix): + """ + For all objects in the metadata pool matching the prefix, + erase them. + + This O(N) with the number of objects in the pool, so only suitable + for use on toy test filesystems. + """ + all_objects = self.rados(["ls"]).split("\n") + matching_objects = [o for o in all_objects if o.startswith(prefix)] + for o in matching_objects: + self.rados(["rm", o]) + + def erase_mds_objects(self, rank): + """ + Erase all the per-MDS objects for a particular rank. This includes + inotable, sessiontable, journal + """ + + def obj_prefix(multiplier): + """ + MDS object naming conventions like rank 1's + journal is at 201.*** + """ + return "%x." % (multiplier * 0x100 + rank) + + # MDS_INO_LOG_OFFSET + self.erase_metadata_objects(obj_prefix(2)) + # MDS_INO_LOG_BACKUP_OFFSET + self.erase_metadata_objects(obj_prefix(3)) + # MDS_INO_LOG_POINTER_OFFSET + self.erase_metadata_objects(obj_prefix(4)) + # MDSTables & SessionMap + self.erase_metadata_objects("mds{rank:d}_".format(rank=rank)) + + def _run_tool(self, tool, args, rank=None, quiet=False): + mds_id = self.mds_ids[0] + remote = self.mds_daemons[mds_id].remote + + # Tests frequently have [client] configuration that jacks up + # the objecter log level (unlikely to be interesting here) + # and does not set the mds log level (very interesting here) + if quiet: + base_args = [tool, '--debug-mds=1', '--debug-objecter=1'] + else: + base_args = [tool, '--debug-mds=4', '--debug-objecter=1'] + + if rank is not None: + base_args.extend(["--rank", "%d" % rank]) + + t1 = datetime.datetime.now() + r = remote.run( + args=base_args + args, + stdout=StringIO()).stdout.getvalue().strip() + duration = datetime.datetime.now() - t1 + log.info("Ran {0} in time {1}, result:\n{2}".format( + base_args + args, duration, r + )) + return r + + def journal_tool(self, args, rank=None, quiet=False): + """ + Invoke cephfs-journal-tool with the passed arguments, and return its stdout + """ + return self._run_tool("cephfs-journal-tool", args, rank, quiet) + + def table_tool(self, args, quiet=False): + """ + Invoke cephfs-table-tool with the passed arguments, and return its stdout + """ + return self._run_tool("cephfs-table-tool", args, None, quiet) diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py new file mode 100644 index 00000000000..1a6f5074d78 --- /dev/null +++ b/qa/tasks/cephfs/fuse_mount.py @@ -0,0 +1,354 @@ + +from StringIO import StringIO +import json +import time +import os +import logging +from textwrap import dedent + +from teuthology import misc +from teuthology.orchestra import run +from teuthology.orchestra.run import CommandFailedError +from .mount import CephFSMount + +log = logging.getLogger(__name__) + + +class FuseMount(CephFSMount): + def __init__(self, client_config, test_dir, client_id, client_remote): + super(FuseMount, self).__init__(test_dir, client_id, client_remote) + + self.client_config = client_config if client_config else {} + self.fuse_daemon = None + self._fuse_conn = None + + def mount(self): + log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) + + daemon_signal = 'kill' + if self.client_config.get('coverage') or self.client_config.get('valgrind') is not None: + daemon_signal = 'term' + + mnt = os.path.join(self.test_dir, 'mnt.{id}'.format(id=self.client_id)) + log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( + id=self.client_id, remote=self.client_remote, mnt=mnt)) + + self.client_remote.run( + args=[ + 'mkdir', + '--', + mnt, + ], + ) + + run_cmd = [ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=self.test_dir), + 'daemon-helper', + daemon_signal, + ] + run_cmd_tail = [ + 'ceph-fuse', + '-f', + '--name', 'client.{id}'.format(id=self.client_id), + # TODO ceph-fuse doesn't understand dash dash '--', + mnt, + ] + + if self.client_config.get('valgrind') is not None: + run_cmd = misc.get_valgrind_args( + self.test_dir, + 'client.{id}'.format(id=self.client_id), + run_cmd, + self.client_config.get('valgrind'), + ) + + run_cmd.extend(run_cmd_tail) + + def list_connections(): + self.client_remote.run( + args=["sudo", "mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"], + check_status=False + ) + p = self.client_remote.run( + args=["ls", "/sys/fs/fuse/connections"], + stdout=StringIO(), + check_status=False + ) + if p.exitstatus != 0: + return [] + + ls_str = p.stdout.getvalue().strip() + if ls_str: + return [int(n) for n in ls_str.split("\n")] + else: + return [] + + # Before starting ceph-fuse process, note the contents of + # /sys/fs/fuse/connections + pre_mount_conns = list_connections() + log.info("Pre-mount connections: {0}".format(pre_mount_conns)) + + proc = self.client_remote.run( + args=run_cmd, + logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), + stdin=run.PIPE, + wait=False, + ) + self.fuse_daemon = proc + + # Wait for the connection reference to appear in /sys + mount_wait = self.client_config.get('mount_wait', 0) + if mount_wait > 0: + log.info("Fuse mount waits {0} seconds before checking /sys/".format(mount_wait)) + time.sleep(mount_wait) + timeout = int(self.client_config.get('mount_timeout', 30)) + waited = 0 + while list_connections() == pre_mount_conns: + time.sleep(1) + waited += 1 + if waited > timeout: + raise RuntimeError("Fuse mount failed to populate /sys/ after {0} seconds".format( + waited + )) + + post_mount_conns = list_connections() + log.info("Post-mount connections: {0}".format(post_mount_conns)) + + # Record our fuse connection number so that we can use it when + # forcing an unmount + new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) + if len(new_conns) == 0: + raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns)) + elif len(new_conns) > 1: + raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns)) + else: + self._fuse_conn = new_conns[0] + + def is_mounted(self): + try: + proc = self.client_remote.run( + args=[ + 'stat', + '--file-system', + '--printf=%T\n', + '--', + self.mountpoint, + ], + stdout=StringIO(), + ) + except CommandFailedError: + # This happens if the mount directory doesn't exist + log.info('mount point does not exist: %s', self.mountpoint) + return False + + fstype = proc.stdout.getvalue().rstrip('\n') + if fstype == 'fuseblk': + log.info('ceph-fuse is mounted on %s', self.mountpoint) + return True + else: + log.debug('ceph-fuse not mounted, got fs type {fstype!r}'.format( + fstype=fstype)) + return False + + def wait_until_mounted(self): + """ + Check to make sure that fuse is mounted on mountpoint. If not, + sleep for 5 seconds and check again. + """ + + while not self.is_mounted(): + # Even if it's not mounted, it should at least + # be running: catch simple failures where it has terminated. + assert not self.fuse_daemon.poll() + + time.sleep(5) + + # Now that we're mounted, set permissions so that the rest of the test will have + # unrestricted access to the filesystem mount. + self.client_remote.run( + args=['sudo', 'chmod', '1777', self.mountpoint]) + + def _mountpoint_exists(self): + return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False).exitstatus == 0 + + def umount(self): + try: + log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name)) + self.client_remote.run( + args=[ + 'sudo', + 'fusermount', + '-u', + self.mountpoint, + ], + ) + except run.CommandFailedError: + log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name)) + + # abort the fuse mount, killing all hung processes + if self._fuse_conn: + self.run_python(dedent(""" + import os + path = "/sys/fs/fuse/connections/{0}/abort" + if os.path.exists(path): + open(path, "w").write("1") + """).format(self._fuse_conn)) + self._fuse_conn = None + + stderr = StringIO() + try: + # make sure its unmounted + self.client_remote.run( + args=[ + 'sudo', + 'umount', + '-l', + '-f', + self.mountpoint, + ], + stderr=stderr + ) + except CommandFailedError: + if "not found" in stderr.getvalue(): + # Missing mount point, so we are unmounted already, yay. + pass + else: + raise + + assert not self.is_mounted() + self._fuse_conn = None + + def umount_wait(self, force=False): + """ + :param force: Complete cleanly even if the MDS is offline + """ + if force: + # When we expect to be forcing, kill the ceph-fuse process directly. + # This should avoid hitting the more aggressive fallback killing + # in umount() which can affect other mounts too. + self.fuse_daemon.stdin.close() + + # However, we will still hit the aggressive wait if there is an ongoing + # mount -o remount (especially if the remount is stuck because MDSs + # are unavailable) + + self.umount() + + try: + if self.fuse_daemon: + self.fuse_daemon.wait() + except CommandFailedError: + pass + + self.cleanup() + + def cleanup(self): + """ + Remove the mount point. + + Prerequisite: the client is not mounted. + """ + stderr = StringIO() + try: + self.client_remote.run( + args=[ + 'rmdir', + '--', + self.mountpoint, + ], + stderr=stderr + ) + except CommandFailedError: + if "No such file or directory" in stderr.getvalue(): + pass + else: + raise + + def kill(self): + """ + Terminate the client without removing the mount point. + """ + self.fuse_daemon.stdin.close() + try: + self.fuse_daemon.wait() + except CommandFailedError: + pass + + def kill_cleanup(self): + """ + Follow up ``kill`` to get to a clean unmounted state. + """ + self.umount() + self.cleanup() + + def teardown(self): + """ + Whatever the state of the mount, get it gone. + """ + super(FuseMount, self).teardown() + + self.umount() + + if not self.fuse_daemon.finished: + self.fuse_daemon.stdin.close() + try: + self.fuse_daemon.wait() + except CommandFailedError: + pass + + # Indiscriminate, unlike the touchier cleanup() + self.client_remote.run( + args=[ + 'rm', + '-rf', + self.mountpoint, + ], + ) + + def _admin_socket(self, args): + pyscript = """ +import glob +import re +import os +import subprocess + +def find_socket(client_name): + files = glob.glob("/var/run/ceph/ceph-{{client_name}}.*.asok".format(client_name=client_name)) + for f in files: + pid = re.match(".*\.(\d+)\.asok$", f).group(1) + if os.path.exists("/proc/{{0}}".format(pid)): + return f + raise RuntimeError("Client socket {{0}} not found".format(client_name)) + +print find_socket("{client_name}") +""".format(client_name="client.{0}".format(self.client_id)) + + # Find the admin socket + p = self.client_remote.run(args=[ + 'python', '-c', pyscript + ], stdout=StringIO()) + asok_path = p.stdout.getvalue().strip() + log.info("Found client admin socket at {0}".format(asok_path)) + + # Query client ID from admin socket + p = self.client_remote.run( + args=['sudo', 'ceph', '--admin-daemon', asok_path] + args, + stdout=StringIO()) + return json.loads(p.stdout.getvalue()) + + def get_global_id(self): + """ + Look up the CephFS client ID for this mount + """ + + return self._admin_socket(['mds_sessions'])['id'] + + def get_osd_epoch(self): + """ + Return 2-tuple of osd_epoch, osd_epoch_barrier + """ + status = self._admin_socket(['status']) + return status['osd_epoch'], status['osd_epoch_barrier'] diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py new file mode 100644 index 00000000000..9ff4fdbd58d --- /dev/null +++ b/qa/tasks/cephfs/kernel_mount.py @@ -0,0 +1,243 @@ +from StringIO import StringIO +import json +import logging +from textwrap import dedent +from teuthology.orchestra.run import CommandFailedError +from teuthology import misc + +from teuthology.orchestra import remote as orchestra_remote +from teuthology.orchestra import run +from .mount import CephFSMount + +log = logging.getLogger(__name__) + + +class KernelMount(CephFSMount): + def __init__(self, mons, test_dir, client_id, client_remote, + ipmi_user, ipmi_password, ipmi_domain): + super(KernelMount, self).__init__(test_dir, client_id, client_remote) + self.mons = mons + + self.mounted = False + self.ipmi_user = ipmi_user + self.ipmi_password = ipmi_password + self.ipmi_domain = ipmi_domain + + def write_secret_file(self, remote, role, keyring, filename): + """ + Stash the keyring in the filename specified. + """ + remote.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=self.test_dir), + 'ceph-authtool', + '--name={role}'.format(role=role), + '--print-key', + keyring, + run.Raw('>'), + filename, + ], + ) + + def mount(self): + log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format( + id=self.client_id, remote=self.client_remote, mnt=self.mountpoint)) + + keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=self.client_id) + secret = '{tdir}/data/client.{id}.secret'.format(tdir=self.test_dir, id=self.client_id) + self.write_secret_file(self.client_remote, 'client.{id}'.format(id=self.client_id), + keyring, secret) + + self.client_remote.run( + args=[ + 'mkdir', + '--', + self.mountpoint, + ], + ) + + self.client_remote.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=self.test_dir), + '/sbin/mount.ceph', + '{mons}:/'.format(mons=','.join(self.mons)), + self.mountpoint, + '-v', + '-o', + 'name={id},secretfile={secret}'.format(id=self.client_id, + secret=secret), + ], + ) + + self.client_remote.run( + args=['sudo', 'chmod', '1777', self.mountpoint]) + + self.mounted = True + + def umount(self): + log.debug('Unmounting client client.{id}...'.format(id=self.client_id)) + self.client_remote.run( + args=[ + 'sudo', + 'umount', + self.mountpoint, + ], + ) + self.client_remote.run( + args=[ + 'rmdir', + '--', + self.mountpoint, + ], + ) + self.mounted = False + + def cleanup(self): + pass + + def umount_wait(self, force=False): + """ + Unlike the fuse client, the kernel client's umount is immediate + """ + try: + self.umount() + except CommandFailedError: + if not force: + raise + + self.kill() + self.kill_cleanup() + + self.mounted = False + + def is_mounted(self): + return self.mounted + + def wait_until_mounted(self): + """ + Unlike the fuse client, the kernel client is up and running as soon + as the initial mount() function returns. + """ + assert self.mounted + + def teardown(self): + super(KernelMount, self).teardown() + if self.mounted: + self.umount() + + def kill(self): + """ + The Ceph kernel client doesn't have a mechanism to kill itself (doing + that in side the kernel would be weird anyway), so we reboot the whole node + to get the same effect. + + We use IPMI to reboot, because we don't want the client to send any + releases of capabilities. + """ + + con = orchestra_remote.getRemoteConsole(self.client_remote.hostname, + self.ipmi_user, + self.ipmi_password, + self.ipmi_domain) + con.power_off() + + self.mounted = False + + def kill_cleanup(self): + assert not self.mounted + + con = orchestra_remote.getRemoteConsole(self.client_remote.hostname, + self.ipmi_user, + self.ipmi_password, + self.ipmi_domain) + con.power_on() + + # Wait for node to come back up after reboot + misc.reconnect(None, 300, [self.client_remote]) + + # Remove mount directory + self.client_remote.run( + args=[ + 'rmdir', + '--', + self.mountpoint, + ], + ) + + def _find_debug_dir(self): + """ + Find the debugfs folder for this mount + """ + pyscript = dedent(""" + import glob + import os + import json + + def get_id_to_dir(): + result = {} + for dir in glob.glob("/sys/kernel/debug/ceph/*"): + mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines() + client_id = mds_sessions_lines[1].split()[1].strip('"') + + result[client_id] = dir + return result + + print json.dumps(get_id_to_dir()) + """) + + p = self.client_remote.run(args=[ + 'sudo', 'python', '-c', pyscript + ], stdout=StringIO()) + client_id_to_dir = json.loads(p.stdout.getvalue()) + + try: + return client_id_to_dir[self.client_id] + except KeyError: + log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format( + self.client_id, ",".join(client_id_to_dir.keys()) + )) + raise + + def _read_debug_file(self, filename): + debug_dir = self._find_debug_dir() + + pyscript = dedent(""" + import os + + print open(os.path.join("{debug_dir}", "{filename}")).read() + """).format(debug_dir=debug_dir, filename=filename) + + p = self.client_remote.run(args=[ + 'sudo', 'python', '-c', pyscript + ], stdout=StringIO()) + return p.stdout.getvalue() + + def get_global_id(self): + """ + Look up the CephFS client ID for this mount, using debugfs. + """ + + assert self.mounted + + mds_sessions = self._read_debug_file("mds_sessions") + lines = mds_sessions.split("\n") + return int(lines[0].split()[1]) + + def get_osd_epoch(self): + """ + Return 2-tuple of osd_epoch, osd_epoch_barrier + """ + osd_map = self._read_debug_file("osdmap") + lines = osd_map.split("\n") + epoch = int(lines[0].split()[1]) + + mds_sessions = self._read_debug_file("mds_sessions") + lines = mds_sessions.split("\n") + epoch_barrier = int(lines[2].split()[1].strip('"')) + + return epoch, epoch_barrier \ No newline at end of file diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py new file mode 100644 index 00000000000..1413b818f3b --- /dev/null +++ b/qa/tasks/cephfs/mount.py @@ -0,0 +1,367 @@ +from contextlib import contextmanager +import logging +import datetime +import time +from textwrap import dedent +import os +from StringIO import StringIO +from teuthology.orchestra import run +from teuthology.orchestra.run import CommandFailedError, ConnectionLostError + +log = logging.getLogger(__name__) + + +class CephFSMount(object): + def __init__(self, test_dir, client_id, client_remote): + """ + :param test_dir: Global teuthology test dir + :param client_id: Client ID, the 'foo' in client.foo + :param client_remote: Remote instance for the host where client will run + """ + + self.test_dir = test_dir + self.client_id = client_id + self.client_remote = client_remote + + self.mountpoint = os.path.join(self.test_dir, 'mnt.{id}'.format(id=self.client_id)) + self.test_files = ['a', 'b', 'c'] + + self.background_procs = [] + + def is_mounted(self): + raise NotImplementedError() + + def mount(self): + raise NotImplementedError() + + def umount(self): + raise NotImplementedError() + + def umount_wait(self, force=False): + raise NotImplementedError() + + def kill_cleanup(self): + raise NotImplementedError() + + def kill(self): + raise NotImplementedError() + + def cleanup(self): + raise NotImplementedError() + + def wait_until_mounted(self): + raise NotImplementedError() + + @contextmanager + def mounted(self): + """ + A context manager, from an initially unmounted state, to mount + this, yield, and then unmount and clean up. + """ + self.mount() + self.wait_until_mounted() + try: + yield + finally: + self.umount_wait() + + def create_files(self): + assert(self.is_mounted()) + + for suffix in self.test_files: + log.info("Creating file {0}".format(suffix)) + self.client_remote.run(args=[ + 'sudo', 'touch', os.path.join(self.mountpoint, suffix) + ]) + + def check_files(self): + assert(self.is_mounted()) + + for suffix in self.test_files: + log.info("Checking file {0}".format(suffix)) + r = self.client_remote.run(args=[ + 'sudo', 'ls', os.path.join(self.mountpoint, suffix) + ], check_status=False) + if r.exitstatus != 0: + raise RuntimeError("Expected file {0} not found".format(suffix)) + + def create_destroy(self): + assert(self.is_mounted()) + + filename = "{0} {1}".format(datetime.datetime.now(), self.client_id) + log.debug("Creating test file {0}".format(filename)) + self.client_remote.run(args=[ + 'sudo', 'touch', os.path.join(self.mountpoint, filename) + ]) + log.debug("Deleting test file {0}".format(filename)) + self.client_remote.run(args=[ + 'sudo', 'rm', '-f', os.path.join(self.mountpoint, filename) + ]) + + def _run_python(self, pyscript): + return self.client_remote.run(args=[ + 'sudo', 'adjust-ulimits', 'daemon-helper', 'kill', 'python', '-c', pyscript + ], wait=False, stdin=run.PIPE, stdout=StringIO()) + + def run_python(self, pyscript): + p = self._run_python(pyscript) + p.wait() + + def run_shell(self, args, wait=True): + args = ["cd", self.mountpoint, run.Raw('&&')] + args + return self.client_remote.run(args=args, stdout=StringIO(), wait=wait) + + def open_no_data(self, basename): + """ + A pure metadata operation + """ + assert(self.is_mounted()) + + path = os.path.join(self.mountpoint, basename) + + p = self._run_python(dedent( + """ + f = open("{path}", 'w') + """.format(path=path) + )) + p.wait() + + def open_background(self, basename="background_file"): + """ + Open a file for writing, then block such that the client + will hold a capability + """ + assert(self.is_mounted()) + + path = os.path.join(self.mountpoint, basename) + + pyscript = dedent(""" + import time + + f = open("{path}", 'w') + f.write('content') + f.flush() + f.write('content2') + while True: + time.sleep(1) + """).format(path=path) + + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + return rproc + + def wait_for_visible(self, basename="background_file", timeout=30): + i = 0 + while i < timeout: + r = self.client_remote.run(args=[ + 'sudo', 'ls', os.path.join(self.mountpoint, basename) + ], check_status=False) + if r.exitstatus == 0: + log.debug("File {0} became visible from {1} after {2}s".format( + basename, self.client_id, i)) + return + else: + time.sleep(1) + i += 1 + + raise RuntimeError("Timed out after {0}s waiting for {1} to become visible from {2}".format( + i, basename, self.client_id)) + + def lock_background(self, basename="background_file"): + """ + Open and lock a files for writing, hold the lock in a background process + """ + assert(self.is_mounted()) + + path = os.path.join(self.mountpoint, basename) + + pyscript = dedent(""" + import time + import fcntl + import struct + + f1 = open("{path}-1", 'w') + fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB) + + f2 = open("{path}-2", 'w') + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) + fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) + while True: + time.sleep(1) + """).format(path=path) + + log.info("lock file {0}".format(basename)) + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + return rproc + + def check_filelock(self, basename="background_file"): + assert(self.is_mounted()) + + path = os.path.join(self.mountpoint, basename) + + pyscript = dedent(""" + import fcntl + import errno + import struct + + f1 = open("{path}-1", 'r') + try: + fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB) + except IOError, e: + if e.errno == errno.EAGAIN: + pass + else: + raise RuntimeError("flock on file {path}-1 not found") + + f2 = open("{path}-2", 'r') + try: + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) + fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) + except IOError, e: + if e.errno == errno.EAGAIN: + pass + else: + raise RuntimeError("posix lock on file {path}-2 not found") + """).format(path=path) + + log.info("check lock on file {0}".format(basename)) + self.client_remote.run(args=[ + 'sudo', 'python', '-c', pyscript + ]) + + def write_background(self, basename="background_file", loop=False): + """ + Open a file for writing, complete as soon as you can + :param basename: + :return: + """ + assert(self.is_mounted()) + + path = os.path.join(self.mountpoint, basename) + + pyscript = dedent(""" + import os + import time + + fd = os.open("{path}", os.O_RDWR | os.O_CREAT, 0644) + try: + while True: + os.write(fd, 'content') + time.sleep(1) + if not {loop}: + break + except IOError, e: + pass + os.close(fd) + """).format(path=path, loop=str(loop)) + + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + return rproc + + def write_n_mb(self, filename, n_mb, seek=0): + """ + Write the requested number of megabytes to a file + """ + assert(self.is_mounted()) + + self.run_shell(["dd", "if=/dev/urandom", "of={0}".format(filename), + "bs=1M", + "count={0}".format(n_mb), + "seek={0}".format(seek) + ]) + + def open_n_background(self, fs_path, count): + """ + Open N files for writing, hold them open in a background process + + :param fs_path: Path relative to CephFS root, e.g. "foo/bar" + :return: a RemoteProcess + """ + assert(self.is_mounted()) + + abs_path = os.path.join(self.mountpoint, fs_path) + + pyscript = dedent(""" + import sys + import time + import os + + n = {count} + abs_path = "{abs_path}" + + if not os.path.exists(os.path.dirname(abs_path)): + os.makedirs(os.path.dirname(abs_path)) + + handles = [] + for i in range(0, n): + fname = "{{0}}_{{1}}".format(abs_path, i) + handles.append(open(fname, 'w')) + + while True: + time.sleep(1) + """).format(abs_path=abs_path, count=count) + + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + return rproc + + def teardown(self): + for p in self.background_procs: + log.info("Terminating background process") + if p.stdin: + p.stdin.close() + try: + p.wait() + except (CommandFailedError, ConnectionLostError): + pass + + def spam_dir_background(self, path): + """ + Create directory `path` and do lots of metadata operations + in it until further notice. + """ + assert(self.is_mounted()) + abs_path = os.path.join(self.mountpoint, path) + + pyscript = dedent(""" + import sys + import time + import os + + abs_path = "{abs_path}" + + if not os.path.exists(abs_path): + os.makedirs(abs_path) + + n = 0 + while True: + file_path = os.path.join(abs_path, "tmp%d" % n) + f = open(file_path, 'w') + f.close() + n = n + 1 + """).format(abs_path=abs_path) + + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + return rproc + + def get_global_id(self): + raise NotImplementedError() + + def get_osd_epoch(self): + raise NotImplementedError() + + def path_to_ino(self, fs_path): + abs_path = os.path.join(self.mountpoint, fs_path) + + pyscript = dedent(""" + import os + import stat + + print os.stat("{path}").st_ino + """).format(path=abs_path) + proc = self._run_python(pyscript) + proc.wait() + return int(proc.stdout.getvalue().strip()) diff --git a/qa/tasks/chef.py b/qa/tasks/chef.py new file mode 100644 index 00000000000..9a9f1bc2c82 --- /dev/null +++ b/qa/tasks/chef.py @@ -0,0 +1,35 @@ +""" +Chef-solo task +""" +import logging + +from teuthology.orchestra import run +from teuthology import misc + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Run chef-solo on all nodes. + """ + log.info('Running chef-solo...') + + run.wait( + ctx.cluster.run( + args=[ + 'wget', +# '-q', + '-O-', +# 'https://raw.github.com/ceph/ceph-qa-chef/master/solo/solo-from-scratch', + 'http://git.ceph.com/?p=ceph-qa-chef.git;a=blob_plain;f=solo/solo-from-scratch;hb=HEAD', + run.Raw('|'), + 'sh', + '-x', + ], + wait=False, + ) + ) + + log.info('Reconnecting after ceph-qa-chef run') + misc.reconnect(ctx, 10) #Reconnect for ulimit and other ceph-qa-chef changes + diff --git a/qa/tasks/cifs_mount.py b/qa/tasks/cifs_mount.py new file mode 100644 index 00000000000..b282b0b7dfb --- /dev/null +++ b/qa/tasks/cifs_mount.py @@ -0,0 +1,137 @@ +""" +Mount cifs clients. Unmount when finished. +""" +import contextlib +import logging +import os + +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Mount/unmount a cifs client. + + The config is optional and defaults to mounting on all clients. If + a config is given, it is expected to be a list of clients to do + this operation on. + + Example that starts smbd and mounts cifs on all nodes:: + + tasks: + - ceph: + - samba: + - cifs-mount: + - interactive: + + Example that splits smbd and cifs: + + tasks: + - ceph: + - samba: [samba.0] + - cifs-mount: [client.0] + - ceph-fuse: [client.1] + - interactive: + + Example that specifies the share name: + + tasks: + - ceph: + - ceph-fuse: + - samba: + samba.0: + cephfuse: "{testdir}/mnt.0" + - cifs-mount: + client.0: + share: cephfuse + + :param ctx: Context + :param config: Configuration + """ + log.info('Mounting cifs clients...') + + if config is None: + config = dict(('client.{id}'.format(id=id_), None) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')) + elif isinstance(config, list): + config = dict((name, None) for name in config) + + clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys())) + + from .samba import get_sambas + samba_roles = ['samba.{id_}'.format(id_=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')] + sambas = list(get_sambas(ctx=ctx, roles=samba_roles)) + (ip, _) = sambas[0][1].ssh.get_transport().getpeername() + log.info('samba ip: {ip}'.format(ip=ip)) + + for id_, remote in clients: + mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_)) + log.info('Mounting cifs client.{id} at {remote} {mnt}...'.format( + id=id_, remote=remote,mnt=mnt)) + + remote.run( + args=[ + 'mkdir', + '--', + mnt, + ], + ) + + rolestr = 'client.{id_}'.format(id_=id_) + unc = "ceph" + log.info("config: {c}".format(c=config)) + if config[rolestr] is not None and 'share' in config[rolestr]: + unc = config[rolestr]['share'] + + remote.run( + args=[ + 'sudo', + 'mount', + '-t', + 'cifs', + '//{sambaip}/{unc}'.format(sambaip=ip, unc=unc), + '-o', + 'username=ubuntu,password=ubuntu', + mnt, + ], + ) + + remote.run( + args=[ + 'sudo', + 'chown', + 'ubuntu:ubuntu', + '{m}/'.format(m=mnt), + ], + ) + + try: + yield + finally: + log.info('Unmounting cifs clients...') + for id_, remote in clients: + remote.run( + args=[ + 'sudo', + 'umount', + mnt, + ], + ) + for id_, remote in clients: + while True: + try: + remote.run( + args=[ + 'rmdir', '--', mnt, + run.Raw('2>&1'), + run.Raw('|'), + 'grep', 'Device or resource busy', + ], + ) + import time + time.sleep(1) + except Exception: + break diff --git a/qa/tasks/cram.py b/qa/tasks/cram.py new file mode 100644 index 00000000000..05138af4de0 --- /dev/null +++ b/qa/tasks/cram.py @@ -0,0 +1,135 @@ +""" +Cram tests +""" +import logging +import os + +from teuthology import misc as teuthology +from teuthology.parallel import parallel +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Run all cram tests from the specified urls on the specified + clients. Each client runs tests in parallel. + + Limitations: + Tests must have a .t suffix. Tests with duplicate names will + overwrite each other, so only the last one will run. + + For example:: + + tasks: + - ceph: + - cram: + clients: + client.0: + - http://ceph.com/qa/test.t + - http://ceph.com/qa/test2.t] + client.1: [http://ceph.com/qa/test.t] + + You can also run a list of cram tests on all clients:: + + tasks: + - ceph: + - cram: + clients: + all: [http://ceph.com/qa/test.t] + + :param ctx: Context + :param config: Configuration + """ + assert isinstance(config, dict) + assert 'clients' in config and isinstance(config['clients'], dict), \ + 'configuration must contain a dictionary of clients' + + clients = teuthology.replace_all_with_clients(ctx.cluster, + config['clients']) + testdir = teuthology.get_testdir(ctx) + + try: + for client, tests in clients.iteritems(): + (remote,) = ctx.cluster.only(client).remotes.iterkeys() + client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client) + remote.run( + args=[ + 'mkdir', '--', client_dir, + run.Raw('&&'), + 'virtualenv', '{tdir}/virtualenv'.format(tdir=testdir), + run.Raw('&&'), + '{tdir}/virtualenv/bin/pip'.format(tdir=testdir), + 'install', 'cram==0.6', + ], + ) + for test in tests: + log.info('fetching test %s for %s', test, client) + assert test.endswith('.t'), 'tests must end in .t' + remote.run( + args=[ + 'wget', '-nc', '-nv', '-P', client_dir, '--', test, + ], + ) + + with parallel() as p: + for role in clients.iterkeys(): + p.spawn(_run_tests, ctx, role) + finally: + for client, tests in clients.iteritems(): + (remote,) = ctx.cluster.only(client).remotes.iterkeys() + client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client) + test_files = set([test.rsplit('/', 1)[1] for test in tests]) + + # remove test files unless they failed + for test_file in test_files: + abs_file = os.path.join(client_dir, test_file) + remote.run( + args=[ + 'test', '-f', abs_file + '.err', + run.Raw('||'), + 'rm', '-f', '--', abs_file, + ], + ) + + # ignore failure since more than one client may + # be run on a host, and the client dir should be + # non-empty if the test failed + remote.run( + args=[ + 'rm', '-rf', '--', + '{tdir}/virtualenv'.format(tdir=testdir), + run.Raw(';'), + 'rmdir', '--ignore-fail-on-non-empty', client_dir, + ], + ) + +def _run_tests(ctx, role): + """ + For each role, check to make sure it's a client, then run the cram on that client + + :param ctx: Context + :param role: Roles + """ + assert isinstance(role, basestring) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + ceph_ref = ctx.summary.get('ceph-sha1', 'master') + + testdir = teuthology.get_testdir(ctx) + log.info('Running tests for %s...', role) + remote.run( + args=[ + run.Raw('CEPH_REF={ref}'.format(ref=ceph_ref)), + run.Raw('CEPH_ID="{id}"'.format(id=id_)), + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + '{tdir}/virtualenv/bin/cram'.format(tdir=testdir), + '-v', '--', + run.Raw('{tdir}/archive/cram.{role}/*.t'.format(tdir=testdir, role=role)), + ], + logger=log.getChild(role), + ) diff --git a/qa/tasks/devstack.py b/qa/tasks/devstack.py new file mode 100644 index 00000000000..c5cd41b06bd --- /dev/null +++ b/qa/tasks/devstack.py @@ -0,0 +1,382 @@ +#!/usr/bin/env python +import contextlib +import logging +from cStringIO import StringIO +import textwrap +from configparser import ConfigParser +import time + +from teuthology.orchestra import run +from teuthology import misc +from teuthology.contextutil import nested + +log = logging.getLogger(__name__) + +DEVSTACK_GIT_REPO = 'https://github.com/openstack-dev/devstack.git' +DS_STABLE_BRANCHES = ("havana", "grizzly") + +is_devstack_node = lambda role: role.startswith('devstack') +is_osd_node = lambda role: role.startswith('osd') + + +@contextlib.contextmanager +def task(ctx, config): + if config is None: + config = {} + if not isinstance(config, dict): + raise TypeError("config must be a dict") + with nested(lambda: install(ctx=ctx, config=config), + lambda: smoke(ctx=ctx, config=config), + ): + yield + + +@contextlib.contextmanager +def install(ctx, config): + """ + Install OpenStack DevStack and configure it to use a Ceph cluster for + Glance and Cinder. + + Requires one node with a role 'devstack' + + Since devstack runs rampant on the system it's used on, typically you will + want to reprovision that machine after using devstack on it. + + Also, the default 2GB of RAM that is given to vps nodes is insufficient. I + recommend 4GB. Downburst can be instructed to give 4GB to a vps node by + adding this to the yaml: + + downburst: + ram: 4G + + This was created using documentation found here: + https://github.com/openstack-dev/devstack/blob/master/README.md + http://ceph.com/docs/master/rbd/rbd-openstack/ + """ + if config is None: + config = {} + if not isinstance(config, dict): + raise TypeError("config must be a dict") + + devstack_node = ctx.cluster.only(is_devstack_node).remotes.keys()[0] + an_osd_node = ctx.cluster.only(is_osd_node).remotes.keys()[0] + + devstack_branch = config.get("branch", "master") + install_devstack(devstack_node, devstack_branch) + try: + configure_devstack_and_ceph(ctx, config, devstack_node, an_osd_node) + yield + finally: + pass + + +def install_devstack(devstack_node, branch="master"): + log.info("Cloning DevStack repo...") + + args = ['git', 'clone', DEVSTACK_GIT_REPO] + devstack_node.run(args=args) + + if branch != "master": + if branch in DS_STABLE_BRANCHES and not branch.startswith("stable"): + branch = "stable/" + branch + log.info("Checking out {branch} branch...".format(branch=branch)) + cmd = "cd devstack && git checkout " + branch + devstack_node.run(args=cmd) + + log.info("Installing DevStack...") + args = ['cd', 'devstack', run.Raw('&&'), './stack.sh'] + devstack_node.run(args=args) + + +def configure_devstack_and_ceph(ctx, config, devstack_node, ceph_node): + pool_size = config.get('pool_size', '128') + create_pools(ceph_node, pool_size) + distribute_ceph_conf(devstack_node, ceph_node) + # This is where we would install python-ceph and ceph-common but it appears + # the ceph task does that for us. + generate_ceph_keys(ceph_node) + distribute_ceph_keys(devstack_node, ceph_node) + secret_uuid = set_libvirt_secret(devstack_node, ceph_node) + update_devstack_config_files(devstack_node, secret_uuid) + set_apache_servername(devstack_node) + # Rebooting is the most-often-used method of restarting devstack services + misc.reboot(devstack_node) + start_devstack(devstack_node) + restart_apache(devstack_node) + + +def create_pools(ceph_node, pool_size): + log.info("Creating pools on Ceph cluster...") + + for pool_name in ['volumes', 'images', 'backups']: + args = ['ceph', 'osd', 'pool', 'create', pool_name, pool_size] + ceph_node.run(args=args) + + +def distribute_ceph_conf(devstack_node, ceph_node): + log.info("Copying ceph.conf to DevStack node...") + + ceph_conf_path = '/etc/ceph/ceph.conf' + ceph_conf = misc.get_file(ceph_node, ceph_conf_path, sudo=True) + misc.sudo_write_file(devstack_node, ceph_conf_path, ceph_conf) + + +def generate_ceph_keys(ceph_node): + log.info("Generating Ceph keys...") + + ceph_auth_cmds = [ + ['ceph', 'auth', 'get-or-create', 'client.cinder', 'mon', + 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=volumes, allow rx pool=images'], # noqa + ['ceph', 'auth', 'get-or-create', 'client.glance', 'mon', + 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=images'], # noqa + ['ceph', 'auth', 'get-or-create', 'client.cinder-backup', 'mon', + 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=backups'], # noqa + ] + for cmd in ceph_auth_cmds: + ceph_node.run(args=cmd) + + +def distribute_ceph_keys(devstack_node, ceph_node): + log.info("Copying Ceph keys to DevStack node...") + + def copy_key(from_remote, key_name, to_remote, dest_path, owner): + key_stringio = StringIO() + from_remote.run( + args=['ceph', 'auth', 'get-or-create', key_name], + stdout=key_stringio) + key_stringio.seek(0) + misc.sudo_write_file(to_remote, dest_path, + key_stringio, owner=owner) + keys = [ + dict(name='client.glance', + path='/etc/ceph/ceph.client.glance.keyring', + # devstack appears to just want root:root + #owner='glance:glance', + ), + dict(name='client.cinder', + path='/etc/ceph/ceph.client.cinder.keyring', + # devstack appears to just want root:root + #owner='cinder:cinder', + ), + dict(name='client.cinder-backup', + path='/etc/ceph/ceph.client.cinder-backup.keyring', + # devstack appears to just want root:root + #owner='cinder:cinder', + ), + ] + for key_dict in keys: + copy_key(ceph_node, key_dict['name'], devstack_node, + key_dict['path'], key_dict.get('owner')) + + +def set_libvirt_secret(devstack_node, ceph_node): + log.info("Setting libvirt secret...") + + cinder_key_stringio = StringIO() + ceph_node.run(args=['ceph', 'auth', 'get-key', 'client.cinder'], + stdout=cinder_key_stringio) + cinder_key = cinder_key_stringio.getvalue().strip() + + uuid_stringio = StringIO() + devstack_node.run(args=['uuidgen'], stdout=uuid_stringio) + uuid = uuid_stringio.getvalue().strip() + + secret_path = '/tmp/secret.xml' + secret_template = textwrap.dedent(""" + + {uuid} + + client.cinder secret + + """) + misc.sudo_write_file(devstack_node, secret_path, + secret_template.format(uuid=uuid)) + devstack_node.run(args=['sudo', 'virsh', 'secret-define', '--file', + secret_path]) + devstack_node.run(args=['sudo', 'virsh', 'secret-set-value', '--secret', + uuid, '--base64', cinder_key]) + return uuid + + +def update_devstack_config_files(devstack_node, secret_uuid): + log.info("Updating DevStack config files to use Ceph...") + + def backup_config(node, file_name, backup_ext='.orig.teuth'): + node.run(args=['cp', '-f', file_name, file_name + backup_ext]) + + def update_config(config_name, config_stream, update_dict, + section='DEFAULT'): + parser = ConfigParser() + parser.read_file(config_stream) + for (key, value) in update_dict.items(): + parser.set(section, key, value) + out_stream = StringIO() + parser.write(out_stream) + out_stream.seek(0) + return out_stream + + updates = [ + dict(name='/etc/glance/glance-api.conf', options=dict( + default_store='rbd', + rbd_store_user='glance', + rbd_store_pool='images', + show_image_direct_url='True',)), + dict(name='/etc/cinder/cinder.conf', options=dict( + volume_driver='cinder.volume.drivers.rbd.RBDDriver', + rbd_pool='volumes', + rbd_ceph_conf='/etc/ceph/ceph.conf', + rbd_flatten_volume_from_snapshot='false', + rbd_max_clone_depth='5', + glance_api_version='2', + rbd_user='cinder', + rbd_secret_uuid=secret_uuid, + backup_driver='cinder.backup.drivers.ceph', + backup_ceph_conf='/etc/ceph/ceph.conf', + backup_ceph_user='cinder-backup', + backup_ceph_chunk_size='134217728', + backup_ceph_pool='backups', + backup_ceph_stripe_unit='0', + backup_ceph_stripe_count='0', + restore_discard_excess_bytes='true', + )), + dict(name='/etc/nova/nova.conf', options=dict( + libvirt_images_type='rbd', + libvirt_images_rbd_pool='volumes', + libvirt_images_rbd_ceph_conf='/etc/ceph/ceph.conf', + rbd_user='cinder', + rbd_secret_uuid=secret_uuid, + libvirt_inject_password='false', + libvirt_inject_key='false', + libvirt_inject_partition='-2', + )), + ] + + for update in updates: + file_name = update['name'] + options = update['options'] + config_str = misc.get_file(devstack_node, file_name, sudo=True) + config_stream = StringIO(config_str) + backup_config(devstack_node, file_name) + new_config_stream = update_config(file_name, config_stream, options) + misc.sudo_write_file(devstack_node, file_name, new_config_stream) + + +def set_apache_servername(node): + # Apache complains: "Could not reliably determine the server's fully + # qualified domain name, using 127.0.0.1 for ServerName" + # So, let's make sure it knows its name. + log.info("Setting Apache ServerName...") + + hostname = node.hostname + config_file = '/etc/apache2/conf.d/servername' + misc.sudo_write_file(node, config_file, + "ServerName {name}".format(name=hostname)) + + +def start_devstack(devstack_node): + log.info("Patching devstack start script...") + # This causes screen to start headless - otherwise rejoin-stack.sh fails + # because there is no terminal attached. + cmd = "cd devstack && sed -ie 's/screen -c/screen -dm -c/' rejoin-stack.sh" + devstack_node.run(args=cmd) + + log.info("Starting devstack...") + cmd = "cd devstack && ./rejoin-stack.sh" + devstack_node.run(args=cmd) + + # This was added because I was getting timeouts on Cinder requests - which + # were trying to access Keystone on port 5000. A more robust way to handle + # this would be to introduce a wait-loop on devstack_node that checks to + # see if a service is listening on port 5000. + log.info("Waiting 30s for devstack to start...") + time.sleep(30) + + +def restart_apache(node): + node.run(args=['sudo', '/etc/init.d/apache2', 'restart'], wait=True) + + +@contextlib.contextmanager +def exercise(ctx, config): + log.info("Running devstack exercises...") + + if config is None: + config = {} + if not isinstance(config, dict): + raise TypeError("config must be a dict") + + devstack_node = ctx.cluster.only(is_devstack_node).remotes.keys()[0] + + # TODO: save the log *and* preserve failures + #devstack_archive_dir = create_devstack_archive(ctx, devstack_node) + + try: + #cmd = "cd devstack && ./exercise.sh 2>&1 | tee {dir}/exercise.log".format( # noqa + # dir=devstack_archive_dir) + cmd = "cd devstack && ./exercise.sh" + devstack_node.run(args=cmd, wait=True) + yield + finally: + pass + + +def create_devstack_archive(ctx, devstack_node): + test_dir = misc.get_testdir(ctx) + devstack_archive_dir = "{test_dir}/archive/devstack".format( + test_dir=test_dir) + devstack_node.run(args="mkdir -p " + devstack_archive_dir) + return devstack_archive_dir + + +@contextlib.contextmanager +def smoke(ctx, config): + log.info("Running a basic smoketest...") + + devstack_node = ctx.cluster.only(is_devstack_node).remotes.keys()[0] + an_osd_node = ctx.cluster.only(is_osd_node).remotes.keys()[0] + + try: + create_volume(devstack_node, an_osd_node, 'smoke0', 1) + yield + finally: + pass + + +def create_volume(devstack_node, ceph_node, vol_name, size): + """ + :param size: The size of the volume, in GB + """ + size = str(size) + log.info("Creating a {size}GB volume named {name}...".format( + name=vol_name, + size=size)) + args = ['source', 'devstack/openrc', run.Raw('&&'), 'cinder', 'create', + '--display-name', vol_name, size] + out_stream = StringIO() + devstack_node.run(args=args, stdout=out_stream, wait=True) + vol_info = parse_os_table(out_stream.getvalue()) + log.debug("Volume info: %s", str(vol_info)) + + out_stream = StringIO() + try: + ceph_node.run(args="rbd --id cinder ls -l volumes", stdout=out_stream, + wait=True) + except run.CommandFailedError: + log.debug("Original rbd call failed; retrying without '--id cinder'") + ceph_node.run(args="rbd ls -l volumes", stdout=out_stream, + wait=True) + + assert vol_info['id'] in out_stream.getvalue(), \ + "Volume not found on Ceph cluster" + assert vol_info['size'] == size, \ + "Volume size on Ceph cluster is different than specified" + return vol_info['id'] + + +def parse_os_table(table_str): + out_dict = dict() + for line in table_str.split('\n'): + if line.startswith('|'): + items = line.split() + out_dict[items[1]] = items[3] + return out_dict diff --git a/qa/tasks/die_on_err.py b/qa/tasks/die_on_err.py new file mode 100644 index 00000000000..bf422ae547d --- /dev/null +++ b/qa/tasks/die_on_err.py @@ -0,0 +1,70 @@ +""" +Raise exceptions on osd coredumps or test err directories +""" +import contextlib +import logging +import time +from teuthology.orchestra import run + +import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Die if {testdir}/err exists or if an OSD dumps core + """ + if config is None: + config = {} + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < num_osds: + time.sleep(10) + + testdir = teuthology.get_testdir(ctx) + + while True: + for i in range(num_osds): + (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.iterkeys() + p = osd_remote.run( + args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ], + wait=True, + check_status=False, + ) + exit_status = p.exitstatus + + if exit_status == 0: + log.info("osd %d has an error" % i) + raise Exception("osd %d error" % i) + + log_path = '/var/log/ceph/osd.%d.log' % (i) + + p = osd_remote.run( + args = [ + 'tail', '-1', log_path, + run.Raw('|'), + 'grep', '-q', 'end dump' + ], + wait=True, + check_status=False, + ) + exit_status = p.exitstatus + + if exit_status == 0: + log.info("osd %d dumped core" % i) + raise Exception("osd %d dumped core" % i) + + time.sleep(5) diff --git a/qa/tasks/divergent_priors.py b/qa/tasks/divergent_priors.py new file mode 100644 index 00000000000..d81ea472734 --- /dev/null +++ b/qa/tasks/divergent_priors.py @@ -0,0 +1,169 @@ +""" +Special case divergence test +""" +import logging +import time + +from teuthology import misc as teuthology +from util.rados import rados + + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Test handling of divergent entries with prior_version + prior to log_tail + + overrides: + ceph: + conf: + osd: + debug osd: 5 + + Requires 3 osds on a single test node. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'divergent_priors task only accepts a dict for configuration' + + while len(ctx.manager.get_osd_status()['up']) < 3: + time.sleep(10) + ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + ctx.manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + ctx.manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + ctx.manager.raw_cluster_cmd('osd', 'set', 'noout') + ctx.manager.raw_cluster_cmd('osd', 'set', 'noin') + ctx.manager.raw_cluster_cmd('osd', 'set', 'nodown') + ctx.manager.wait_for_clean() + + # something that is always there + dummyfile = '/etc/fstab' + dummyfile2 = '/etc/resolv.conf' + + # create 1 pg pool + log.info('creating foo') + ctx.manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') + + osds = [0, 1, 2] + for i in osds: + ctx.manager.set_config(i, osd_min_pg_log_entries=10) + ctx.manager.set_config(i, osd_max_pg_log_entries=10) + ctx.manager.set_config(i, osd_pg_log_trim_min=5) + + # determine primary + divergent = ctx.manager.get_pg_primary('foo', 0) + log.info("primary and soon to be divergent is %d", divergent) + non_divergent = list(osds) + non_divergent.remove(divergent) + + log.info('writing initial objects') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + # write 100 objects + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) + + ctx.manager.wait_for_clean() + + # blackhole non_divergent + log.info("blackholing osds %s", str(non_divergent)) + for i in non_divergent: + ctx.manager.set_config(i, filestore_blackhole=1) + + DIVERGENT_WRITE = 5 + DIVERGENT_REMOVE = 5 + # Write some soon to be divergent + log.info('writing divergent objects') + for i in range(DIVERGENT_WRITE): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, + dummyfile2], wait=False) + # Remove some soon to be divergent + log.info('remove divergent objects') + for i in range(DIVERGENT_REMOVE): + rados(ctx, mon, ['-p', 'foo', 'rm', + 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) + time.sleep(10) + mon.run( + args=['killall', '-9', 'rados'], + wait=True, + check_status=False) + + # kill all the osds but leave divergent in + log.info('killing all the osds') + for i in osds: + ctx.manager.kill_osd(i) + for i in osds: + ctx.manager.mark_down_osd(i) + for i in non_divergent: + ctx.manager.mark_out_osd(i) + + # bring up non-divergent + log.info("bringing up non_divergent %s", str(non_divergent)) + for i in non_divergent: + ctx.manager.revive_osd(i) + for i in non_divergent: + ctx.manager.mark_in_osd(i) + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) + log.info('writing non-divergent object ' + objname) + rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) + + ctx.manager.wait_for_recovery() + + # ensure no recovery of up osds first + log.info('delay recovery') + for i in non_divergent: + ctx.manager.wait_run_admin_socket( + 'osd', i, ['set_recovery_delay', '100000']) + + # bring in our divergent friend + log.info("revive divergent %d", divergent) + ctx.manager.raw_cluster_cmd('osd', 'set', 'noup') + ctx.manager.revive_osd(divergent) + + log.info('delay recovery divergent') + ctx.manager.wait_run_admin_socket( + 'osd', divergent, ['set_recovery_delay', '100000']) + + ctx.manager.raw_cluster_cmd('osd', 'unset', 'noup') + while len(ctx.manager.get_osd_status()['up']) < 3: + time.sleep(10) + + log.info('wait for peering') + rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) + + # At this point the divergent_priors should have been detected + + log.info("killing divergent %d", divergent) + ctx.manager.kill_osd(divergent) + log.info("reviving divergent %d", divergent) + ctx.manager.revive_osd(divergent) + + time.sleep(20) + + log.info('allowing recovery') + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in osds: + ctx.manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', + 'kick_recovery_wq', ' 0') + + log.info('reading divergent objects') + for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): + exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, + '/tmp/existing']) + assert exit_status is 0 + + (remote,) = ctx.\ + cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() + msg = "dirty_divergent_priors: true, divergent_priors: %d" \ + % (DIVERGENT_WRITE + DIVERGENT_REMOVE) + cmd = 'grep "{msg}" /var/log/ceph/ceph-osd.{osd}.log'\ + .format(msg=msg, osd=divergent) + proc = remote.run(args=cmd, wait=True, check_status=False) + assert proc.exitstatus == 0 + + log.info("success") diff --git a/qa/tasks/divergent_priors2.py b/qa/tasks/divergent_priors2.py new file mode 100644 index 00000000000..78d6043a536 --- /dev/null +++ b/qa/tasks/divergent_priors2.py @@ -0,0 +1,205 @@ +""" +Special case divergence test with ceph-objectstore-tool export/remove/import +""" +import logging +import time +from cStringIO import StringIO + +from teuthology import misc as teuthology +from util.rados import rados +import os + + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Test handling of divergent entries with prior_version + prior to log_tail and a ceph-objectstore-tool export/import + + overrides: + ceph: + conf: + osd: + debug osd: 5 + + Requires 3 osds on a single test node. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'divergent_priors task only accepts a dict for configuration' + + while len(ctx.manager.get_osd_status()['up']) < 3: + time.sleep(10) + ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + ctx.manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + ctx.manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + ctx.manager.raw_cluster_cmd('osd', 'set', 'noout') + ctx.manager.raw_cluster_cmd('osd', 'set', 'noin') + ctx.manager.raw_cluster_cmd('osd', 'set', 'nodown') + ctx.manager.wait_for_clean() + + # something that is always there + dummyfile = '/etc/fstab' + dummyfile2 = '/etc/resolv.conf' + testdir = teuthology.get_testdir(ctx) + + # create 1 pg pool + log.info('creating foo') + ctx.manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') + + osds = [0, 1, 2] + for i in osds: + ctx.manager.set_config(i, osd_min_pg_log_entries=10) + ctx.manager.set_config(i, osd_max_pg_log_entries=10) + ctx.manager.set_config(i, osd_pg_log_trim_min=5) + + # determine primary + divergent = ctx.manager.get_pg_primary('foo', 0) + log.info("primary and soon to be divergent is %d", divergent) + non_divergent = list(osds) + non_divergent.remove(divergent) + + log.info('writing initial objects') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + # write 100 objects + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) + + ctx.manager.wait_for_clean() + + # blackhole non_divergent + log.info("blackholing osds %s", str(non_divergent)) + for i in non_divergent: + ctx.manager.set_config(i, filestore_blackhole=1) + + DIVERGENT_WRITE = 5 + DIVERGENT_REMOVE = 5 + # Write some soon to be divergent + log.info('writing divergent objects') + for i in range(DIVERGENT_WRITE): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, + dummyfile2], wait=False) + # Remove some soon to be divergent + log.info('remove divergent objects') + for i in range(DIVERGENT_REMOVE): + rados(ctx, mon, ['-p', 'foo', 'rm', + 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) + time.sleep(10) + mon.run( + args=['killall', '-9', 'rados'], + wait=True, + check_status=False) + + # kill all the osds but leave divergent in + log.info('killing all the osds') + for i in osds: + ctx.manager.kill_osd(i) + for i in osds: + ctx.manager.mark_down_osd(i) + for i in non_divergent: + ctx.manager.mark_out_osd(i) + + # bring up non-divergent + log.info("bringing up non_divergent %s", str(non_divergent)) + for i in non_divergent: + ctx.manager.revive_osd(i) + for i in non_divergent: + ctx.manager.mark_in_osd(i) + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) + log.info('writing non-divergent object ' + objname) + rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) + + ctx.manager.wait_for_recovery() + + # ensure no recovery of up osds first + log.info('delay recovery') + for i in non_divergent: + ctx.manager.wait_run_admin_socket( + 'osd', i, ['set_recovery_delay', '100000']) + + # bring in our divergent friend + log.info("revive divergent %d", divergent) + ctx.manager.raw_cluster_cmd('osd', 'set', 'noup') + ctx.manager.revive_osd(divergent) + + log.info('delay recovery divergent') + ctx.manager.wait_run_admin_socket( + 'osd', divergent, ['set_recovery_delay', '100000']) + + ctx.manager.raw_cluster_cmd('osd', 'unset', 'noup') + while len(ctx.manager.get_osd_status()['up']) < 3: + time.sleep(10) + + log.info('wait for peering') + rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) + + # At this point the divergent_priors should have been detected + + log.info("killing divergent %d", divergent) + ctx.manager.kill_osd(divergent) + + # Export a pg + (exp_remote,) = ctx.\ + cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() + FSPATH = ctx.manager.get_filepath() + JPATH = os.path.join(FSPATH, "journal") + prefix = ("sudo adjust-ulimits ceph-objectstore-tool " + "--data-path {fpath} --journal-path {jpath} " + "--log-file=" + "/var/log/ceph/objectstore_tool.$$.log ". + format(fpath=FSPATH, jpath=JPATH)) + pid = os.getpid() + expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) + cmd = ((prefix + "--op export --pgid 1.0 --file {file}"). + format(id=divergent, file=expfile)) + proc = exp_remote.run(args=cmd, wait=True, + check_status=False, stdout=StringIO()) + assert proc.exitstatus == 0 + + cmd = ((prefix + "--op remove --pgid 1.0"). + format(id=divergent, file=expfile)) + proc = exp_remote.run(args=cmd, wait=True, + check_status=False, stdout=StringIO()) + assert proc.exitstatus == 0 + + cmd = ((prefix + "--op import --file {file}"). + format(id=divergent, file=expfile)) + proc = exp_remote.run(args=cmd, wait=True, + check_status=False, stdout=StringIO()) + assert proc.exitstatus == 0 + + log.info("reviving divergent %d", divergent) + ctx.manager.revive_osd(divergent) + ctx.manager.wait_run_admin_socket('osd', divergent, ['dump_ops_in_flight']) + time.sleep(20); + + log.info('allowing recovery') + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in osds: + ctx.manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', + 'kick_recovery_wq', ' 0') + + log.info('reading divergent objects') + for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): + exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, + '/tmp/existing']) + assert exit_status is 0 + + (remote,) = ctx.\ + cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() + msg = "dirty_divergent_priors: true, divergent_priors: %d" \ + % (DIVERGENT_WRITE + DIVERGENT_REMOVE) + cmd = 'grep "{msg}" /var/log/ceph/ceph-osd.{osd}.log'\ + .format(msg=msg, osd=divergent) + proc = remote.run(args=cmd, wait=True, check_status=False) + assert proc.exitstatus == 0 + + cmd = 'rm {file}'.format(file=expfile) + remote.run(args=cmd, wait=True) + log.info("success") diff --git a/qa/tasks/dump_stuck.py b/qa/tasks/dump_stuck.py new file mode 100644 index 00000000000..9e1780f0156 --- /dev/null +++ b/qa/tasks/dump_stuck.py @@ -0,0 +1,146 @@ +""" +Dump_stuck command +""" +import logging +import re +import time + +import ceph_manager +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + +def check_stuck(manager, num_inactive, num_unclean, num_stale, timeout=10): + """ + Do checks. Make sure get_stuck_pgs return the right amout of information, then + extract health information from the raw_cluster_cmd and compare the results with + values passed in. This passes if all asserts pass. + + :param num_manager: Ceph manager + :param num_inactive: number of inaactive pages that are stuck + :param num_unclean: number of unclean pages that are stuck + :paran num_stale: number of stale pages that are stuck + :param timeout: timeout value for get_stuck_pgs calls + """ + inactive = manager.get_stuck_pgs('inactive', timeout) + assert len(inactive) == num_inactive + unclean = manager.get_stuck_pgs('unclean', timeout) + assert len(unclean) == num_unclean + stale = manager.get_stuck_pgs('stale', timeout) + assert len(stale) == num_stale + + # check health output as well + health = manager.raw_cluster_cmd('health') + log.debug('ceph health is: %s', health) + if num_inactive > 0: + m = re.search('(\d+) pgs stuck inactive', health) + assert int(m.group(1)) == num_inactive + if num_unclean > 0: + m = re.search('(\d+) pgs stuck unclean', health) + assert int(m.group(1)) == num_unclean + if num_stale > 0: + m = re.search('(\d+) pgs stuck stale', health) + assert int(m.group(1)) == num_stale + +def task(ctx, config): + """ + Test the dump_stuck command. + + :param ctx: Context + :param config: Configuration + """ + assert config is None, \ + 'dump_stuck requires no configuration' + assert teuthology.num_instances_of_type(ctx.cluster, 'osd') == 2, \ + 'dump_stuck requires exactly 2 osds' + + timeout = 60 + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.wait_for_clean(timeout) + + manager.raw_cluster_cmd('tell', 'mon.0', 'injectargs', '--', +# '--mon-osd-report-timeout 90', + '--mon-pg-stuck-threshold 10') + + check_stuck( + manager, + num_inactive=0, + num_unclean=0, + num_stale=0, + ) + num_pgs = manager.get_num_pgs() + + manager.mark_out_osd(0) + time.sleep(timeout) + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.wait_for_recovery(timeout) + + check_stuck( + manager, + num_inactive=0, + num_unclean=num_pgs, + num_stale=0, + ) + + manager.mark_in_osd(0) + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.wait_for_clean(timeout) + + check_stuck( + manager, + num_inactive=0, + num_unclean=0, + num_stale=0, + ) + + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'): + manager.kill_osd(id_) + manager.mark_down_osd(id_) + + starttime = time.time() + done = False + while not done: + try: + check_stuck( + manager, + num_inactive=0, + num_unclean=0, + num_stale=num_pgs, + ) + done = True + except AssertionError: + # wait up to 15 minutes to become stale + if time.time() - starttime > 900: + raise + + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'): + manager.revive_osd(id_) + manager.mark_in_osd(id_) + while True: + try: + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + break + except Exception: + log.exception('osds must not be started yet, waiting...') + time.sleep(1) + manager.wait_for_clean(timeout) + + check_stuck( + manager, + num_inactive=0, + num_unclean=0, + num_stale=0, + ) diff --git a/qa/tasks/ec_lost_unfound.py b/qa/tasks/ec_lost_unfound.py new file mode 100644 index 00000000000..5a9678d8c27 --- /dev/null +++ b/qa/tasks/ec_lost_unfound.py @@ -0,0 +1,137 @@ +""" +Lost_unfound +""" +import logging +import ceph_manager +from teuthology import misc as teuthology +from util.rados import rados + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of lost objects on an ec pool. + + A pretty rigid cluster is brought up andtested by this task + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'lost_unfound task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') + manager.wait_for_clean() + + profile = config.get('erasure_code_profile', { + 'k': '2', + 'm': '2', + 'ruleset-failure-domain': 'osd' + }) + profile_name = profile.get('name', 'lost_unfound') + manager.create_erasure_code_profile(profile_name, profile) + pool = manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name) + + # something that is always there, readable and never empty + dummyfile = '/etc/group' + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.wait_for_recovery() + + # create old objects + for f in range(1, 10): + rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f]) + + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', + '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' + ) + + manager.kill_osd(0) + manager.mark_down_osd(0) + manager.kill_osd(3) + manager.mark_down_osd(3) + + for f in range(1, 10): + rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) + + # take out osd.1 and a necessary shard of those objects. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') + manager.revive_osd(0) + manager.wait_till_osd_is_up(0) + manager.revive_osd(3) + manager.wait_till_osd_is_up(3) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') + manager.wait_till_active() + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') + + # verify that there are unfound objects + unfound = manager.get_num_unfound_objects() + log.info("there are %d unfound objects" % unfound) + assert unfound + + # mark stuff lost + pgs = manager.get_pg_stats() + for pg in pgs: + if pg['stat_sum']['num_objects_unfound'] > 0: + # verify that i can list them direct from the osd + log.info('listing missing/lost in %s state %s', pg['pgid'], + pg['state']); + m = manager.list_pg_missing(pg['pgid']) + log.info('%s' % m) + assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] + + log.info("reverting unfound in %s", pg['pgid']) + manager.raw_cluster_cmd('pg', pg['pgid'], + 'mark_unfound_lost', 'delete') + else: + log.info("no unfound in %s", pg['pgid']) + + manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') + manager.wait_for_recovery() + + # verify result + for f in range(1, 10): + err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-']) + assert err + + # see if osd.1 can cope + manager.revive_osd(1) + manager.wait_till_osd_is_up(1) + manager.wait_for_clean() diff --git a/qa/tasks/filestore_idempotent.py b/qa/tasks/filestore_idempotent.py new file mode 100644 index 00000000000..da3995eafcb --- /dev/null +++ b/qa/tasks/filestore_idempotent.py @@ -0,0 +1,81 @@ +""" +Filestore/filejournal handler +""" +import logging +from teuthology.orchestra import run +import random + +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test filestore/filejournal handling of non-idempotent events. + + Currently this is a kludge; we require the ceph task preceeds us just + so that we get the tarball installed to run the test binary. + + :param ctx: Context + :param config: Configuration + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + # just use the first client... + client = clients[0]; + (remote,) = ctx.cluster.only(client).remotes.iterkeys() + + testdir = teuthology.get_testdir(ctx) + + dir = '%s/data/test.%s' % (testdir, client) + + seed = str(int(random.uniform(1,100))) + + try: + log.info('creating a working dir') + remote.run(args=['mkdir', dir]) + remote.run( + args=[ + 'cd', dir, + run.Raw('&&'), + 'wget','-q', '-Orun_seed_to.sh', + 'http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to.sh;hb=HEAD', + run.Raw('&&'), + 'wget','-q', '-Orun_seed_to_range.sh', + 'http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to_range.sh;hb=HEAD', + run.Raw('&&'), + 'chmod', '+x', 'run_seed_to.sh', 'run_seed_to_range.sh', + ]); + + log.info('running a series of tests') + proc = remote.run( + args=[ + 'cd', dir, + run.Raw('&&'), + './run_seed_to_range.sh', seed, '50', '300', + ], + wait=False, + check_status=False) + result = proc.wait() + + if result != 0: + remote.run( + args=[ + 'cp', '-a', dir, '{tdir}/archive/idempotent_failure'.format(tdir=testdir), + ]) + raise Exception("./run_seed_to_range.sh errored out") + + finally: + remote.run(args=[ + 'rm', '-rf', '--', dir + ]) + diff --git a/qa/tasks/kclient.py b/qa/tasks/kclient.py new file mode 100644 index 00000000000..ca1fb3ba716 --- /dev/null +++ b/qa/tasks/kclient.py @@ -0,0 +1,107 @@ +""" +Mount/unmount a ``kernel`` client. +""" +import contextlib +import logging + +from teuthology.misc import deep_merge +from teuthology import misc +from cephfs.kernel_mount import KernelMount + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Mount/unmount a ``kernel`` client. + + The config is optional and defaults to mounting on all clients. If + a config is given, it is expected to be a list of clients to do + this operation on. This lets you e.g. set up one client with + ``ceph-fuse`` and another with ``kclient``. + + Example that mounts all clients:: + + tasks: + - ceph: + - kclient: + - interactive: + + Example that uses both ``kclient` and ``ceph-fuse``:: + + tasks: + - ceph: + - ceph-fuse: [client.0] + - kclient: [client.1] + - interactive: + + + Pass a dictionary instead of lists to specify per-client config: + + tasks: + -kclient: + client.0: + debug: true + + :param ctx: Context + :param config: Configuration + """ + log.info('Mounting kernel clients...') + assert config is None or isinstance(config, list) or isinstance(config, dict), \ + "task kclient got invalid config" + + if config is None: + config = ['client.{id}'.format(id=id_) + for id_ in misc.all_roles_of_type(ctx.cluster, 'client')] + + if isinstance(config, list): + client_roles = config + config = dict([r, dict()] for r in client_roles) + elif isinstance(config, dict): + client_roles = config.keys() + else: + raise ValueError("Invalid config object: {0} ({1})".format(config, config.__class__)) + + # config has been converted to a dict by this point + overrides = ctx.config.get('overrides', {}) + deep_merge(config, overrides.get('kclient', {})) + + clients = list(misc.get_clients(ctx=ctx, roles=client_roles)) + + test_dir = misc.get_testdir(ctx) + + # Assemble mon addresses + remotes_and_roles = ctx.cluster.remotes.items() + roles = [roles for (remote_, roles) in remotes_and_roles] + ips = [remote_.ssh.get_transport().getpeername()[0] + for (remote_, _) in remotes_and_roles] + mons = misc.get_mons(roles, ips).values() + + mounts = {} + for id_, remote in clients: + kernel_mount = KernelMount( + mons, + test_dir, + id_, + remote, + ctx.teuthology_config.get('ipmi_user', None), + ctx.teuthology_config.get('ipmi_password', None), + ctx.teuthology_config.get('ipmi_domain', None) + ) + + mounts[id_] = kernel_mount + + client_config = config["client.{0}".format(id_)] + if client_config.get('debug', False): + remote.run(args=["sudo", "bash", "-c", "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"]) + remote.run(args=["sudo", "bash", "-c", "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"]) + + kernel_mount.mount() + + ctx.mounts = mounts + try: + yield mounts + finally: + log.info('Unmounting kernel clients...') + for mount in mounts.values(): + mount.umount() diff --git a/qa/tasks/locktest.py b/qa/tasks/locktest.py new file mode 100755 index 00000000000..9de5ba40c5b --- /dev/null +++ b/qa/tasks/locktest.py @@ -0,0 +1,134 @@ +""" +locktests +""" +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Run locktests, from the xfstests suite, on the given + clients. Whether the clients are ceph-fuse or kernel does not + matter, and the two clients can refer to the same mount. + + The config is a list of two clients to run the locktest on. The + first client will be the host. + + For example: + tasks: + - ceph: + - ceph-fuse: [client.0, client.1] + - locktest: + [client.0, client.1] + + This task does not yield; there would be little point. + + :param ctx: Context + :param config: Configuration + """ + + assert isinstance(config, list) + log.info('fetching and building locktests...') + (host,) = ctx.cluster.only(config[0]).remotes + (client,) = ctx.cluster.only(config[1]).remotes + ( _, _, host_id) = config[0].partition('.') + ( _, _, client_id) = config[1].partition('.') + testdir = teuthology.get_testdir(ctx) + hostmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=host_id) + clientmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=client_id) + + try: + for client_name in config: + log.info('building on {client_}'.format(client_=client_name)) + ctx.cluster.only(client_name).run( + args=[ + # explicitly does not support multiple autotest tasks + # in a single run; the result archival would conflict + 'mkdir', '{tdir}/archive/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'mkdir', '{tdir}/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'wget', + '-nv', + 'https://raw.github.com/gregsfortytwo/xfstests-ceph/master/src/locktest.c', + '-O', '{tdir}/locktest/locktest.c'.format(tdir=testdir), + run.Raw('&&'), + 'g++', '{tdir}/locktest/locktest.c'.format(tdir=testdir), + '-o', '{tdir}/locktest/locktest'.format(tdir=testdir) + ], + logger=log.getChild('locktest_client.{id}'.format(id=client_name)), + ) + + log.info('built locktest on each client') + + host.run(args=['sudo', 'touch', + '{mnt}/locktestfile'.format(mnt=hostmnt), + run.Raw('&&'), + 'sudo', 'chown', 'ubuntu.ubuntu', + '{mnt}/locktestfile'.format(mnt=hostmnt) + ] + ) + + log.info('starting on host') + hostproc = host.run( + args=[ + '{tdir}/locktest/locktest'.format(tdir=testdir), + '-p', '6788', + '-d', + '{mnt}/locktestfile'.format(mnt=hostmnt), + ], + wait=False, + logger=log.getChild('locktest.host'), + ) + log.info('starting on client') + (_,_,hostaddr) = host.name.partition('@') + clientproc = client.run( + args=[ + '{tdir}/locktest/locktest'.format(tdir=testdir), + '-p', '6788', + '-d', + '-h', hostaddr, + '{mnt}/locktestfile'.format(mnt=clientmnt), + ], + logger=log.getChild('locktest.client'), + wait=False + ) + + hostresult = hostproc.wait() + clientresult = clientproc.wait() + if (hostresult != 0) or (clientresult != 0): + raise Exception("Did not pass locking test!") + log.info('finished locktest executable with results {r} and {s}'. \ + format(r=hostresult, s=clientresult)) + + finally: + log.info('cleaning up host dir') + host.run( + args=[ + 'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir), + run.Raw('&&'), + 'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'rmdir', '{tdir}/locktest' + ], + logger=log.getChild('.{id}'.format(id=config[0])), + ) + log.info('cleaning up client dir') + client.run( + args=[ + 'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir), + run.Raw('&&'), + 'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'rmdir', '{tdir}/locktest'.format(tdir=testdir) + ], + logger=log.getChild('.{id}'.format(\ + id=config[1])), + ) diff --git a/qa/tasks/lost_unfound.py b/qa/tasks/lost_unfound.py new file mode 100644 index 00000000000..af1df4de0cb --- /dev/null +++ b/qa/tasks/lost_unfound.py @@ -0,0 +1,157 @@ +""" +Lost_unfound +""" +import logging +import time +import ceph_manager +from teuthology import misc as teuthology +from util.rados import rados + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of lost objects. + + A pretty rigid cluseter is brought up andtested by this task + """ + POOL = 'unfound_pool' + if config is None: + config = {} + assert isinstance(config, dict), \ + 'lost_unfound task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_clean() + + manager.create_pool(POOL) + + # something that is always there + dummyfile = '/etc/fstab' + + # take an osd out until the very end + manager.kill_osd(2) + manager.mark_down_osd(2) + manager.mark_out_osd(2) + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.wait_for_recovery() + + # create old objects + for f in range(1, 10): + rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) + + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', + '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' + ) + + manager.kill_osd(0) + manager.mark_down_osd(0) + + for f in range(1, 10): + rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) + + # bring osd.0 back up, let it peer, but don't replicate the new + # objects... + log.info('osd.0 command_args is %s' % 'foo') + log.info(ctx.daemons.get_daemon('osd', 0).command_args) + ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([ + '--osd-recovery-delay-start', '1000' + ]) + manager.revive_osd(0) + manager.mark_in_osd(0) + manager.wait_till_osd_is_up(0) + + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.wait_till_active() + + # take out osd.1 and the only copy of those objects. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.mark_out_osd(1) + manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') + + # bring up osd.2 so that things would otherwise, in theory, recovery fully + manager.revive_osd(2) + manager.mark_in_osd(2) + manager.wait_till_osd_is_up(2) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_till_active() + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + + # verify that there are unfound objects + unfound = manager.get_num_unfound_objects() + log.info("there are %d unfound objects" % unfound) + assert unfound + + # mark stuff lost + pgs = manager.get_pg_stats() + for pg in pgs: + if pg['stat_sum']['num_objects_unfound'] > 0: + primary = 'osd.%d' % pg['acting'][0] + + # verify that i can list them direct from the osd + log.info('listing missing/lost in %s state %s', pg['pgid'], + pg['state']); + m = manager.list_pg_missing(pg['pgid']) + #log.info('%s' % m) + assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] + num_unfound=0 + for o in m['objects']: + if len(o['locations']) == 0: + num_unfound += 1 + assert m['num_unfound'] == num_unfound + + log.info("reverting unfound in %s on %s", pg['pgid'], primary) + manager.raw_cluster_cmd('pg', pg['pgid'], + 'mark_unfound_lost', 'revert') + else: + log.info("no unfound in %s", pg['pgid']) + + manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_recovery() + + # verify result + for f in range(1, 10): + err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) + assert not err + + # see if osd.1 can cope + manager.revive_osd(1) + manager.mark_in_osd(1) + manager.wait_till_osd_is_up(1) + manager.wait_for_clean() diff --git a/qa/tasks/manypools.py b/qa/tasks/manypools.py new file mode 100644 index 00000000000..1ddcba5c8a9 --- /dev/null +++ b/qa/tasks/manypools.py @@ -0,0 +1,73 @@ +""" +Force pg creation on all osds +""" +from teuthology import misc as teuthology +from teuthology.orchestra import run +import logging + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Create the specified number of pools and write 16 objects to them (thereby forcing + the PG creation on each OSD). This task creates pools from all the clients, + in parallel. It is easy to add other daemon types which have the appropriate + permissions, but I don't think anything else does. + The config is just the number of pools to create. I recommend setting + "mon create pg interval" to a very low value in your ceph config to speed + this up. + + You probably want to do this to look at memory consumption, and + maybe to test how performance changes with the number of PGs. For example: + + tasks: + - ceph: + config: + mon: + mon create pg interval: 1 + - manypools: 3000 + - radosbench: + clients: [client.0] + time: 360 + """ + + log.info('creating {n} pools'.format(n=config)) + + poolnum = int(config) + creator_remotes = [] + client_roles = teuthology.all_roles_of_type(ctx.cluster, 'client') + log.info('got client_roles={client_roles_}'.format(client_roles_=client_roles)) + for role in client_roles: + log.info('role={role_}'.format(role_=role)) + (creator_remote, ) = ctx.cluster.only('client.{id}'.format(id=role)).remotes.iterkeys() + creator_remotes.append((creator_remote, 'client.{id}'.format(id=role))) + + remaining_pools = poolnum + poolprocs=dict() + while (remaining_pools > 0): + log.info('{n} pools remaining to create'.format(n=remaining_pools)) + for remote, role_ in creator_remotes: + poolnum = remaining_pools + remaining_pools -= 1 + if remaining_pools < 0: + continue + log.info('creating pool{num} on {role}'.format(num=poolnum, role=role_)) + proc = remote.run( + args=[ + 'rados', + '--name', role_, + 'mkpool', 'pool{num}'.format(num=poolnum), '-1', + run.Raw('&&'), + 'rados', + '--name', role_, + '--pool', 'pool{num}'.format(num=poolnum), + 'bench', '0', 'write', '-t', '16', '--block-size', '1' + ], + wait = False + ) + log.info('waiting for pool and object creates') + poolprocs[remote] = proc + + run.wait(poolprocs.itervalues()) + + log.info('created all {n} pools and wrote 16 objects to each'.format(n=poolnum)) diff --git a/qa/tasks/mds_auto_repair.py b/qa/tasks/mds_auto_repair.py new file mode 100644 index 00000000000..e7580613b2a --- /dev/null +++ b/qa/tasks/mds_auto_repair.py @@ -0,0 +1,122 @@ + +""" +Exercise the MDS's auto repair functions +""" + +import contextlib +import logging +import time + +from teuthology.orchestra.run import CommandFailedError + +from tasks.cephfs.filesystem import Filesystem +from tasks.cephfs.cephfs_test_case import CephFSTestCase, run_tests + + +log = logging.getLogger(__name__) + + +# Arbitrary timeouts for operations involving restarting +# an MDS or waiting for it to come up +MDS_RESTART_GRACE = 60 + + +class TestMDSAutoRepair(CephFSTestCase): + def test_backtrace_repair(self): + """ + MDS should verify/fix backtrace on fetch dirfrag + """ + + # trim log segment as fast as possible + self.set_conf('mds', 'mds cache size', 100) + self.set_conf('mds', 'mds verify backtrace', 1) + self.fs.mds_restart() + self.fs.wait_for_daemons() + + create_script = "mkdir {0}; for i in `seq 0 500`; do touch {0}/file$i; done" + # create main test directory + self.mount_a.run_shell(["sudo", "bash", "-c", create_script.format("testdir1")]) + + # create more files in another directory. make sure MDS trim dentries in testdir1 + self.mount_a.run_shell(["sudo", "bash", "-c", create_script.format("testdir2")]) + + # flush journal entries to dirfrag objects + self.fs.mds_asok(['flush', 'journal']) + + # drop inodes caps + self.mount_a.umount_wait() + self.mount_a.mount() + self.mount_a.wait_until_mounted() + + # wait MDS to trim dentries in testdir1. 60 seconds should be long enough. + time.sleep(60) + + # remove testdir1's backtrace + proc = self.mount_a.run_shell(["sudo", "ls", "-id", "testdir1"]) + self.assertEqual(proc.exitstatus, 0) + objname = "{:x}.00000000".format(long(proc.stdout.getvalue().split()[0])) + proc = self.mount_a.run_shell(["sudo", "rados", "-p", "metadata", "rmxattr", objname, "parent"]) + self.assertEqual(proc.exitstatus, 0) + + # readdir (fetch dirfrag) should fix testdir1's backtrace + self.mount_a.run_shell(["sudo", "ls", "testdir1"]) + + # add more entries to journal + self.mount_a.run_shell(["sudo", "rm", "-rf", " testdir2"]) + + # flush journal entries to dirfrag objects + self.fs.mds_asok(['flush', 'journal']) + + # check if backtrace exists + proc = self.mount_a.run_shell(["sudo", "rados", "-p", "metadata", "getxattr", objname, "parent"]) + self.assertEqual(proc.exitstatus, 0) + + def test_mds_readonly(self): + """ + test if MDS behave correct when it's readonly + """ + # operation should successd when MDS is not readonly + self.mount_a.run_shell(["sudo", "touch", "test_file1"]) + writer = self.mount_a.write_background(loop=True) + + time.sleep(10) + self.assertFalse(writer.finished) + + # force MDS to read-only mode + self.fs.mds_asok(['force_readonly']) + time.sleep(10) + + # touching test file should fail + try: + self.mount_a.run_shell(["sudo", "touch", "test_file1"]) + except CommandFailedError: + pass + else: + self.assertTrue(False) + + # background writer also should fail + self.assertTrue(writer.finished) + + # restart mds to make it writable + self.fs.mds_restart() + self.fs.wait_for_daemons() + + +@contextlib.contextmanager +def task(ctx, config): + fs = Filesystem(ctx) + mount_a = ctx.mounts.values()[0] + + # Stash references on ctx so that we can easily debug in interactive mode + # ======================================================================= + ctx.filesystem = fs + ctx.mount_a = mount_a + + run_tests(ctx, config, TestMDSAutoRepair, { + 'fs': fs, + 'mount_a': mount_a, + }) + + # Continue to any downstream tasks + # ================================ + yield diff --git a/qa/tasks/mds_client_limits.py b/qa/tasks/mds_client_limits.py new file mode 100644 index 00000000000..ae722886753 --- /dev/null +++ b/qa/tasks/mds_client_limits.py @@ -0,0 +1,183 @@ + +""" +Exercise the MDS's behaviour when clients and the MDCache reach or +exceed the limits of how many caps/inodes they should hold. +""" + +import contextlib +import logging +from unittest import SkipTest +from teuthology.orchestra.run import CommandFailedError + +from tasks.cephfs.filesystem import Filesystem +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase, run_tests + + +log = logging.getLogger(__name__) + + +# Arbitrary timeouts for operations involving restarting +# an MDS or waiting for it to come up +MDS_RESTART_GRACE = 60 + +# Hardcoded values from Server::recall_client_state +CAP_RECALL_RATIO = 0.8 +CAP_RECALL_MIN = 100 + + +class TestClientLimits(CephFSTestCase): + # Environment references + mds_session_timeout = None + mds_reconnect_timeout = None + ms_max_backoff = None + + def wait_for_health(self, pattern, timeout): + """ + Wait until 'ceph health' contains a single message matching the pattern + """ + def seen_health_warning(): + health = self.fs.mon_manager.get_mon_health() + summary_strings = [s['summary'] for s in health['summary']] + if len(summary_strings) == 0: + log.debug("Not expected number of summary strings ({0})".format(summary_strings)) + return False + elif len(summary_strings) == 1 and pattern in summary_strings[0]: + return True + else: + raise RuntimeError("Unexpected health messages: {0}".format(summary_strings)) + + self.wait_until_true(seen_health_warning, timeout) + + def _test_client_pin(self, use_subdir): + """ + When a client pins an inode in its cache, for example because the file is held open, + it should reject requests from the MDS to trim these caps. The MDS should complain + to the user that it is unable to enforce its cache size limits because of this + objectionable client. + + :param use_subdir: whether to put test files in a subdir or use root + """ + + cache_size = 200 + open_files = 250 + + self.set_conf('mds', 'mds cache size', cache_size) + self.fs.mds_restart() + self.fs.wait_for_daemons() + + mount_a_client_id = self.mount_a.get_global_id() + path = "subdir/mount_a" if use_subdir else "mount_a" + open_proc = self.mount_a.open_n_background(path, open_files) + + # Client should now hold: + # `open_files` caps for the open files + # 1 cap for root + # 1 cap for subdir + self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'], + open_files + (2 if use_subdir else 1), + timeout=600, + reject_fn=lambda x: x > open_files + 2) + + # MDS should not be happy about that, as the client is failing to comply + # with the SESSION_RECALL messages it is being sent + mds_recall_state_timeout = int(self.fs.get_config("mds_recall_state_timeout")) + self.wait_for_health("failing to respond to cache pressure", mds_recall_state_timeout + 10) + + # When the client closes the files, it should retain only as many caps as allowed + # under the SESSION_RECALL policy + log.info("Terminating process holding files open") + open_proc.stdin.close() + try: + open_proc.wait() + except CommandFailedError: + # We killed it, so it raises an error + pass + + # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message, + # which depend on the cache size and overall ratio + self.wait_until_equal( + lambda: self.get_session(mount_a_client_id)['num_caps'], + int(cache_size * 0.8), + timeout=600, + reject_fn=lambda x: x < int(cache_size*.8)) + + def test_client_pin_root(self): + self._test_client_pin(False) + + def test_client_pin(self): + self._test_client_pin(True) + + def test_client_release_bug(self): + """ + When a client has a bug (which we will simulate) preventing it from releasing caps, + the MDS should notice that releases are not being sent promptly, and generate a health + metric to that effect. + """ + + # The debug hook to inject the failure only exists in the fuse client + if not isinstance(self.mount_a, FuseMount): + raise SkipTest("Require FUSE client to inject client release failure") + + self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true') + self.mount_a.teardown() + self.mount_a.mount() + self.mount_a.wait_until_mounted() + mount_a_client_id = self.mount_a.get_global_id() + + # Client A creates a file. He will hold the write caps on the file, and later (simulated bug) fail + # to comply with the MDSs request to release that cap + self.mount_a.run_shell(["touch", "file1"]) + + # Client B tries to stat the file that client A created + rproc = self.mount_b.write_background("file1") + + # After mds_revoke_cap_timeout, we should see a health warning (extra lag from + # MDS beacon period) + mds_revoke_cap_timeout = int(self.fs.get_config("mds_revoke_cap_timeout")) + self.wait_for_health("failing to respond to capability release", mds_revoke_cap_timeout + 10) + + # Client B should still be stuck + self.assertFalse(rproc.finished) + + # Kill client A + self.mount_a.kill() + self.mount_a.kill_cleanup() + + # Client B should complete + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + rproc.wait() + + +@contextlib.contextmanager +def task(ctx, config): + fs = Filesystem(ctx) + + # Pick out the clients we will use from the configuration + # ======================================================= + if len(ctx.mounts) < 2: + raise RuntimeError("Need at least two clients") + mount_a = ctx.mounts.values()[0] + mount_b = ctx.mounts.values()[1] + + if not isinstance(mount_a, FuseMount) or not isinstance(mount_b, FuseMount): + # kclient kill() power cycles nodes, so requires clients to each be on + # their own node + if mount_a.client_remote.hostname == mount_b.client_remote.hostname: + raise RuntimeError("kclient clients must be on separate nodes") + + # Stash references on ctx so that we can easily debug in interactive mode + # ======================================================================= + ctx.filesystem = fs + ctx.mount_a = mount_a + ctx.mount_b = mount_b + + run_tests(ctx, config, TestClientLimits, { + 'fs': fs, + 'mount_a': mount_a, + 'mount_b': mount_b + }) + + # Continue to any downstream tasks + # ================================ + yield diff --git a/qa/tasks/mds_client_recovery.py b/qa/tasks/mds_client_recovery.py new file mode 100644 index 00000000000..7dd7402e8e8 --- /dev/null +++ b/qa/tasks/mds_client_recovery.py @@ -0,0 +1,429 @@ + +""" +Teuthology task for exercising CephFS client recovery +""" + +import contextlib +import logging +import time +import unittest + +from teuthology.orchestra.run import CommandFailedError, ConnectionLostError +from teuthology.task import interactive + +from tasks.cephfs.cephfs_test_case import CephFSTestCase, run_tests +from tasks.cephfs.filesystem import Filesystem +from tasks.cephfs.fuse_mount import FuseMount + + +log = logging.getLogger(__name__) + + +# Arbitrary timeouts for operations involving restarting +# an MDS or waiting for it to come up +MDS_RESTART_GRACE = 60 + + +class TestClientRecovery(CephFSTestCase): + # Environment references + mds_session_timeout = None + mds_reconnect_timeout = None + ms_max_backoff = None + + def test_basic(self): + # Check that two clients come up healthy and see each others' files + # ===================================================== + self.mount_a.create_files() + self.mount_a.check_files() + self.mount_a.umount_wait() + + self.mount_b.check_files() + + self.mount_a.mount() + self.mount_a.wait_until_mounted() + + # Check that the admin socket interface is correctly reporting + # two sessions + # ===================================================== + ls_data = self._session_list() + self.assert_session_count(2, ls_data) + + self.assertSetEqual( + set([l['id'] for l in ls_data]), + {self.mount_a.get_global_id(), self.mount_b.get_global_id()} + ) + + def test_restart(self): + # Check that after an MDS restart both clients reconnect and continue + # to handle I/O + # ===================================================== + self.fs.mds_stop() + self.fs.mds_fail() + self.fs.mds_restart() + self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) + + self.mount_a.create_destroy() + self.mount_b.create_destroy() + + def _session_num_caps(self, client_id): + ls_data = self.fs.mds_asok(['session', 'ls']) + return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps']) + + def test_reconnect_timeout(self): + # Reconnect timeout + # ================= + # Check that if I stop an MDS and a client goes away, the MDS waits + # for the reconnect period + self.fs.mds_stop() + self.fs.mds_fail() + + mount_a_client_id = self.mount_a.get_global_id() + self.mount_a.umount_wait(force=True) + + self.fs.mds_restart() + + self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE) + + ls_data = self._session_list() + self.assert_session_count(2, ls_data) + + # The session for the dead client should have the 'reconnect' flag set + self.assertTrue(self.get_session(mount_a_client_id)['reconnecting']) + + # Wait for the reconnect state to clear, this should take the + # reconnect timeout period. + in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2) + # Check that the period we waited to enter active is within a factor + # of two of the reconnect timeout. + self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout / 2, + "Should have been in reconnect phase for {0} but only took {1}".format( + self.mds_reconnect_timeout, in_reconnect_for + )) + + self.assert_session_count(1) + + # Check that the client that timed out during reconnect can + # mount again and do I/O + self.mount_a.mount() + self.mount_a.wait_until_mounted() + self.mount_a.create_destroy() + + self.assert_session_count(2) + + def test_reconnect_eviction(self): + # Eviction during reconnect + # ========================= + self.fs.mds_stop() + self.fs.mds_fail() + + mount_a_client_id = self.mount_a.get_global_id() + self.mount_a.umount_wait(force=True) + + self.fs.mds_restart() + + # Enter reconnect phase + self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE) + self.assert_session_count(2) + + # Evict the stuck client + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + self.assert_session_count(1) + + # Observe that we proceed to active phase without waiting full reconnect timeout + evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) + # Once we evict the troublemaker, the reconnect phase should complete + # in well under the reconnect timeout. + self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5, + "reconnect did not complete soon enough after eviction, took {0}".format( + evict_til_active + )) + + # Bring the client back + self.mount_a.mount() + self.mount_a.wait_until_mounted() + self.mount_a.create_destroy() + + def test_stale_caps(self): + # Capability release from stale session + # ===================================== + cap_holder = self.mount_a.open_background() + + # Wait for the file to be visible from another client, indicating + # that mount_a has completed its network ops + self.mount_b.wait_for_visible() + + # Simulate client death + self.mount_a.kill() + + try: + # Now, after mds_session_timeout seconds, the waiter should + # complete their operation when the MDS marks the holder's + # session stale. + cap_waiter = self.mount_b.write_background() + a = time.time() + cap_waiter.wait() + b = time.time() + + # Should have succeeded + self.assertEqual(cap_waiter.exitstatus, 0) + + cap_waited = b - a + log.info("cap_waiter waited {0}s".format(cap_waited)) + self.assertTrue(self.mds_session_timeout / 2.0 <= cap_waited <= self.mds_session_timeout * 2.0, + "Capability handover took {0}, expected approx {1}".format( + cap_waited, self.mds_session_timeout + )) + + cap_holder.stdin.close() + try: + cap_holder.wait() + except (CommandFailedError, ConnectionLostError): + # We killed it (and possibly its node), so it raises an error + pass + finally: + # teardown() doesn't quite handle this case cleanly, so help it out + self.mount_a.kill_cleanup() + + self.mount_a.mount() + self.mount_a.wait_until_mounted() + + def test_evicted_caps(self): + # Eviction while holding a capability + # =================================== + + # Take out a write capability on a file on client A, + # and then immediately kill it. + cap_holder = self.mount_a.open_background() + mount_a_client_id = self.mount_a.get_global_id() + + # Wait for the file to be visible from another client, indicating + # that mount_a has completed its network ops + self.mount_b.wait_for_visible() + + # Simulate client death + self.mount_a.kill() + + try: + # The waiter should get stuck waiting for the capability + # held on the MDS by the now-dead client A + cap_waiter = self.mount_b.write_background() + time.sleep(5) + self.assertFalse(cap_waiter.finished) + + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + # Now, because I evicted the old holder of the capability, it should + # immediately get handed over to the waiter + a = time.time() + cap_waiter.wait() + b = time.time() + cap_waited = b - a + log.info("cap_waiter waited {0}s".format(cap_waited)) + # This is the check that it happened 'now' rather than waiting + # for the session timeout + self.assertLess(cap_waited, self.mds_session_timeout / 2.0, + "Capability handover took {0}, expected less than {1}".format( + cap_waited, self.mds_session_timeout / 2.0 + )) + + cap_holder.stdin.close() + try: + cap_holder.wait() + except (CommandFailedError, ConnectionLostError): + # We killed it (and possibly its node), so it raises an error + pass + finally: + self.mount_a.kill_cleanup() + + self.mount_a.mount() + self.mount_a.wait_until_mounted() + + def test_trim_caps(self): + # Trim capability when reconnecting MDS + # =================================== + + count = 500 + # Create lots of files + for i in range(count): + self.mount_a.run_shell(["sudo", "touch", "f{0}".format(i)]) + + # Populate mount_b's cache + self.mount_b.run_shell(["sudo", "ls"]) + + client_id = self.mount_b.get_global_id() + num_caps = self._session_num_caps(client_id) + self.assertGreaterEqual(num_caps, count) + + # Restart MDS. client should trim its cache when reconnecting to the MDS + self.fs.mds_stop() + self.fs.mds_fail() + self.fs.mds_restart() + self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) + + num_caps = self._session_num_caps(client_id) + self.assertLess(num_caps, count, + "should have less than {0} capabilities, have {1}".format( + count, num_caps + )) + + def test_network_death(self): + """ + Simulate software freeze or temporary network failure. + + Check that the client blocks I/O during failure, and completes + I/O after failure. + """ + + # We only need one client + self.mount_b.umount_wait() + + # Initially our one client session should be visible + client_id = self.mount_a.get_global_id() + ls_data = self._session_list() + self.assert_session_count(1, ls_data) + self.assertEqual(ls_data[0]['id'], client_id) + self.assert_session_state(client_id, "open") + + # ...and capable of doing I/O without blocking + self.mount_a.create_files() + + # ...but if we turn off the network + self.fs.set_clients_block(True) + + # ...and try and start an I/O + write_blocked = self.mount_a.write_background() + + # ...then it should block + self.assertFalse(write_blocked.finished) + self.assert_session_state(client_id, "open") + time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale + self.assertFalse(write_blocked.finished) + self.assert_session_state(client_id, "stale") + + # ...until we re-enable I/O + self.fs.set_clients_block(False) + + # ...when it should complete promptly + a = time.time() + write_blocked.wait() + b = time.time() + recovery_time = b - a + log.info("recovery time: {0}".format(recovery_time)) + self.assertLess(recovery_time, self.ms_max_backoff * 2) + self.assert_session_state(client_id, "open") + + def test_filelock(self): + """ + Check that file lock doesn't get lost after an MDS restart + """ + lock_holder = self.mount_a.lock_background() + + self.mount_b.wait_for_visible("background_file-2") + self.mount_b.check_filelock() + + self.fs.mds_stop() + self.fs.mds_fail() + self.fs.mds_restart() + self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) + + self.mount_b.check_filelock() + + # Tear down the background process + lock_holder.stdin.close() + try: + lock_holder.wait() + except (CommandFailedError, ConnectionLostError): + # We killed it, so it raises an error + pass + + +class LogStream(object): + def __init__(self): + self.buffer = "" + + def write(self, data): + self.buffer += data + if "\n" in self.buffer: + lines = self.buffer.split("\n") + for line in lines[:-1]: + log.info(line) + self.buffer = lines[-1] + + def flush(self): + pass + + +class InteractiveFailureResult(unittest.TextTestResult): + """ + Specialization that implements interactive-on-error style + behavior. + """ + ctx = None + + def addFailure(self, test, err): + log.error(self._exc_info_to_string(err, test)) + log.error("Failure in test '{0}', going interactive".format( + self.getDescription(test) + )) + interactive.task(ctx=self.ctx, config=None) + + def addError(self, test, err): + log.error(self._exc_info_to_string(err, test)) + log.error("Error in test '{0}', going interactive".format( + self.getDescription(test) + )) + interactive.task(ctx=self.ctx, config=None) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Execute CephFS client recovery test suite. + + Requires: + - An outer ceph_fuse task with at least two clients + - That the clients are on a separate host to the MDS + """ + fs = Filesystem(ctx) + + # Pick out the clients we will use from the configuration + # ======================================================= + if len(ctx.mounts) < 2: + raise RuntimeError("Need at least two clients") + mount_a = ctx.mounts.values()[0] + mount_b = ctx.mounts.values()[1] + + if not isinstance(mount_a, FuseMount) or not isinstance(mount_b, FuseMount): + # kclient kill() power cycles nodes, so requires clients to each be on + # their own node + if mount_a.client_remote.hostname == mount_b.client_remote.hostname: + raise RuntimeError("kclient clients must be on separate nodes") + + # Check we have at least one remote client for use with network-dependent tests + # ============================================================================= + if mount_a.client_remote.hostname in fs.get_mds_hostnames(): + raise RuntimeError("Require first client to on separate server from MDSs") + + # Stash references on ctx so that we can easily debug in interactive mode + # ======================================================================= + ctx.filesystem = fs + ctx.mount_a = mount_a + ctx.mount_b = mount_b + + run_tests(ctx, config, TestClientRecovery, { + "mds_reconnect_timeout": int(fs.mds_asok( + ['config', 'get', 'mds_reconnect_timeout'] + )['mds_reconnect_timeout']), + "mds_session_timeout": int(fs.mds_asok( + ['config', 'get', 'mds_session_timeout'] + )['mds_session_timeout']), + "ms_max_backoff": int(fs.mds_asok( + ['config', 'get', 'ms_max_backoff'] + )['ms_max_backoff']), + "fs": fs, + "mount_a": mount_a, + "mount_b": mount_b + }) + + # Continue to any downstream tasks + # ================================ + yield diff --git a/qa/tasks/mds_creation_failure.py b/qa/tasks/mds_creation_failure.py new file mode 100644 index 00000000000..d1de1569442 --- /dev/null +++ b/qa/tasks/mds_creation_failure.py @@ -0,0 +1,85 @@ + +import logging +import contextlib +import time +import ceph_manager +from teuthology import misc +from teuthology.orchestra.run import CommandFailedError, Raw + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Go through filesystem creation with a synthetic failure in an MDS + in its 'up:creating' state, to exercise the retry behaviour. + """ + # Grab handles to the teuthology objects of interest + mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds')) + if len(mdslist) != 1: + # Require exactly one MDS, the code path for creation failure when + # a standby is available is different + raise RuntimeError("This task requires exactly one MDS") + + mds_id = mdslist[0] + (mds_remote,) = ctx.cluster.only('mds.{_id}'.format(_id=mds_id)).remotes.iterkeys() + manager = ceph_manager.CephManager( + mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'), + ) + + # Stop MDS + manager.raw_cluster_cmd('mds', 'set', "max_mds", "0") + mds = ctx.daemons.get_daemon('mds', mds_id) + mds.stop() + manager.raw_cluster_cmd('mds', 'fail', mds_id) + + # Reset the filesystem so that next start will go into CREATING + manager.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it") + manager.raw_cluster_cmd('fs', 'new', "default", "metadata", "data") + + # Start the MDS with mds_kill_create_at set, it will crash during creation + mds.restart_with_args(["--mds_kill_create_at=1"]) + try: + mds.wait_for_exit() + except CommandFailedError as e: + if e.exitstatus == 1: + log.info("MDS creation killed as expected") + else: + log.error("Unexpected status code %s" % e.exitstatus) + raise + + # Since I have intentionally caused a crash, I will clean up the resulting core + # file to avoid task.internal.coredump seeing it as a failure. + log.info("Removing core file from synthetic MDS failure") + mds_remote.run(args=['rm', '-f', Raw("{archive}/coredump/*.core".format(archive=misc.get_archive_dir(ctx)))]) + + # It should have left the MDS map state still in CREATING + status = manager.get_mds_status(mds_id) + assert status['state'] == 'up:creating' + + # Start the MDS again without the kill flag set, it should proceed with creation successfully + mds.restart() + + # Wait for state ACTIVE + t = 0 + create_timeout = 120 + while True: + status = manager.get_mds_status(mds_id) + if status['state'] == 'up:active': + log.info("MDS creation completed successfully") + break + elif status['state'] == 'up:creating': + log.info("MDS still in creating state") + if t > create_timeout: + log.error("Creating did not complete within %ss" % create_timeout) + raise RuntimeError("Creating did not complete within %ss" % create_timeout) + t += 1 + time.sleep(1) + else: + log.error("Unexpected MDS state: %s" % status['state']) + assert(status['state'] in ['up:active', 'up:creating']) + + # The system should be back up in a happy healthy state, go ahead and run any further tasks + # inside this context. + yield diff --git a/qa/tasks/mds_flush.py b/qa/tasks/mds_flush.py new file mode 100644 index 00000000000..458de83aefa --- /dev/null +++ b/qa/tasks/mds_flush.py @@ -0,0 +1,136 @@ +import contextlib +from textwrap import dedent +from tasks.cephfs.cephfs_test_case import run_tests, CephFSTestCase +from tasks.cephfs.filesystem import Filesystem, ObjectNotFound, ROOT_INO + + +class TestFlush(CephFSTestCase): + def test_flush(self): + self.mount_a.run_shell(["mkdir", "mydir"]) + self.mount_a.run_shell(["touch", "mydir/alpha"]) + dir_ino = self.mount_a.path_to_ino("mydir") + file_ino = self.mount_a.path_to_ino("mydir/alpha") + + # Unmount the client so that it isn't still holding caps + self.mount_a.umount_wait() + + # Before flush, the dirfrag object does not exist + with self.assertRaises(ObjectNotFound): + self.fs.list_dirfrag(dir_ino) + + # Before flush, the file's backtrace has not been written + with self.assertRaises(ObjectNotFound): + self.fs.read_backtrace(file_ino) + + # Before flush, there are no dentries in the root + self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) + + # Execute flush + flush_data = self.fs.mds_asok(["flush", "journal"]) + self.assertEqual(flush_data['return_code'], 0) + + # After flush, the dirfrag object has been created + dir_list = self.fs.list_dirfrag(dir_ino) + self.assertEqual(dir_list, ["alpha_head"]) + + # And the 'mydir' dentry is in the root + self.assertEqual(self.fs.list_dirfrag(ROOT_INO), ['mydir_head']) + + # ...and the data object has its backtrace + backtrace = self.fs.read_backtrace(file_ino) + self.assertEqual(['alpha', 'mydir'], [a['dname'] for a in backtrace['ancestors']]) + self.assertEqual([dir_ino, 1], [a['dirino'] for a in backtrace['ancestors']]) + self.assertEqual(file_ino, backtrace['ino']) + + # ...and the journal is truncated to just a single subtreemap from the + # newly created segment + summary_output = self.fs.journal_tool(["event", "get", "summary"]) + try: + self.assertEqual(summary_output, + dedent( + """ + Events by type: + SUBTREEMAP: 1 + Errors: 0 + """ + ).strip()) + except AssertionError: + # In some states, flushing the journal will leave you + # an extra event from locks a client held. This is + # correct behaviour: the MDS is flushing the journal, + # it's just that new events are getting added too. + # In this case, we should nevertheless see a fully + # empty journal after a second flush. + self.assertEqual(summary_output, + dedent( + """ + Events by type: + SUBTREEMAP: 1 + UPDATE: 1 + Errors: 0 + """ + ).strip()) + flush_data = self.fs.mds_asok(["flush", "journal"]) + self.assertEqual(flush_data['return_code'], 0) + self.assertEqual(self.fs.journal_tool(["event", "get", "summary"]), + dedent( + """ + Events by type: + SUBTREEMAP: 1 + Errors: 0 + """ + ).strip()) + + # Now for deletion! + self.mount_a.mount() + self.mount_a.wait_until_mounted() + self.mount_a.run_shell(["rm", "-rf", "mydir"]) + + # We will count the RADOS deletions and MDS file purges, to verify that + # the expected behaviour is happening as a result of the purge + initial_dels = self.fs.mds_asok(['perf', 'dump'])['objecter']['osdop_delete'] + initial_purges = self.fs.mds_asok(['perf', 'dump'])['mds_cache']['strays_purged'] + + flush_data = self.fs.mds_asok(["flush", "journal"]) + self.assertEqual(flush_data['return_code'], 0) + + # We expect to see a single file purge + self.wait_until_true( + lambda: self.fs.mds_asok(['perf', 'dump'])['mds_cache']['strays_purged'] - initial_purges >= 1, + 60) + + # We expect two deletions, one of the dirfrag and one of the backtrace + self.wait_until_true( + lambda: self.fs.mds_asok(['perf', 'dump'])['objecter']['osdop_delete'] - initial_dels >= 2, + 60) # timeout is fairly long to allow for tick+rados latencies + + with self.assertRaises(ObjectNotFound): + self.fs.list_dirfrag(dir_ino) + with self.assertRaises(ObjectNotFound): + self.fs.read_backtrace(file_ino) + self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) + + +@contextlib.contextmanager +def task(ctx, config): + fs = Filesystem(ctx) + + # Pick out the clients we will use from the configuration + # ======================================================= + if len(ctx.mounts) < 1: + raise RuntimeError("Need at least one client") + mount = ctx.mounts.values()[0] + + # Stash references on ctx so that we can easily debug in interactive mode + # ======================================================================= + ctx.filesystem = fs + ctx.mount = mount + + run_tests(ctx, config, TestFlush, { + 'fs': fs, + 'mount_a': mount, + }) + + # Continue to any downstream tasks + # ================================ + yield diff --git a/qa/tasks/mds_full.py b/qa/tasks/mds_full.py new file mode 100644 index 00000000000..c984d1a9bf7 --- /dev/null +++ b/qa/tasks/mds_full.py @@ -0,0 +1,363 @@ + +""" +Exercise the MDS and Client behaviour when the cluster fills up. +""" + +import contextlib +import json +import logging +import os +from textwrap import dedent +import time +from teuthology.orchestra.run import CommandFailedError + +from tasks.cephfs.filesystem import Filesystem +from tasks.cephfs.cephfs_test_case import CephFSTestCase, run_tests + + +log = logging.getLogger(__name__) + + +class TestClusterFull(CephFSTestCase): + # Persist-between-tests constants + pool_capacity = None + + def setUp(self): + super(TestClusterFull, self).setUp() + + if self.pool_capacity is None: + # This is a hack to overcome weird fluctuations in the reported + # `max_avail` attribute of pools that sometimes occurs in between + # tests (reason as yet unclear, but this dodges the issue) + TestClusterFull.pool_capacity = self.fs.get_pool_df(self._data_pool_name())['max_avail'] + + def test_barrier(self): + """ + That when an OSD epoch barrier is set on an MDS, subsequently + issued capabilities cause clients to update their OSD map to that + epoch. + """ + + # Check the initial barrier epoch on the MDS: this should be + # set to the latest map at MDS startup + initial_osd_epoch = json.loads( + self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip() + )['epoch'] + self.assertGreaterEqual(self.fs.mds_asok(["status"])['osdmap_epoch_barrier'], initial_osd_epoch) + + # Sync up clients with initial MDS OSD map barrier + self.mount_a.open_no_data("foo") + self.mount_b.open_no_data("bar") + + # Grab mount_a's initial OSD epoch: later we will check that + # it hasn't advanced beyond this point. + mount_a_initial_epoch = self.mount_a.get_osd_epoch()[0] + + # Freshly mounted at start of test, should be up to date with OSD map + self.assertGreaterEqual(mount_a_initial_epoch, initial_osd_epoch) + self.assertGreaterEqual(self.mount_b.get_osd_epoch()[0], initial_osd_epoch) + + # Set and unset a flag to cause OSD epoch to increment + self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause") + self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause") + + out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip() + new_epoch = json.loads(out)['epoch'] + self.assertNotEqual(initial_osd_epoch, new_epoch) + + # Do a metadata operation on client A, witness that it ends up with + # the old OSD map from startup time (nothing has prompted it + # to update its map) + self.mount_a.open_no_data("alpha") + + # Sleep long enough that if the OSD map was propagating it would + # have done so (this is arbitrary because we are 'waiting' for something + # to *not* happen). + time.sleep(30) + + mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() + self.assertEqual(mount_a_epoch, mount_a_initial_epoch) + + # Set a barrier on the MDS + self.fs.mds_asok(["osdmap", "barrier", new_epoch.__str__()]) + + # Do an operation on client B, witness that it ends up with + # the latest OSD map from the barrier + self.mount_b.run_shell(["touch", "bravo"]) + self.mount_b.open_no_data("bravo") + + # Some time passes here because the metadata part of the operation + # completes immediately, while the resulting OSD map update happens + # asynchronously (it's an Objecter::_maybe_request_map) as a result + # of seeing the new epoch barrier. + self.wait_until_equal( + lambda: self.mount_b.get_osd_epoch(), + (new_epoch, new_epoch), + 30, + lambda x: x[0] > new_epoch or x[1] > new_epoch) + + # ...and none of this should have affected the oblivious mount a, + # because it wasn't doing any data or metadata IO + mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() + self.assertEqual(mount_a_epoch, mount_a_initial_epoch) + + def _data_pool_name(self): + data_pool_names = self.fs.get_data_pool_names() + if len(data_pool_names) > 1: + raise RuntimeError("This test can't handle multiple data pools") + else: + return data_pool_names[0] + + def _test_full(self, easy_case): + """ + - That a client trying to write data to a file is prevented + from doing so with an -EFULL result + - That they are also prevented from creating new files by the MDS. + - That they may delete another file to get the system healthy again + + :param easy_case: if true, delete a successfully written file to + free up space. else, delete the file that experienced + the failed write. + """ + + osd_mon_report_interval_max = int(self.fs.get_config("osd_mon_report_interval_max", service_type='osd')) + mon_osd_full_ratio = float(self.fs.get_config("mon_osd_full_ratio")) + + pool_capacity = self.pool_capacity + fill_mb = int(1.05 * mon_osd_full_ratio * (pool_capacity / (1024.0 * 1024.0))) + 2 + + log.info("Writing {0}MB should fill this cluster".format(fill_mb)) + + # Fill up the cluster. This dd may or may not fail, as it depends on + # how soon the cluster recognises its own fullness + self.mount_a.write_n_mb("large_file_a", fill_mb / 2) + try: + self.mount_a.write_n_mb("large_file_b", fill_mb / 2) + except CommandFailedError: + log.info("Writing file B failed (full status happened already)") + assert self.fs.is_full() + else: + log.info("Writing file B succeeded (full status will happen soon)") + self.wait_until_true(lambda: self.fs.is_full(), + timeout=osd_mon_report_interval_max * 5) + + # Attempting to write more data should give me ENOSPC + with self.assertRaises(CommandFailedError) as ar: + self.mount_a.write_n_mb("large_file_b", 50, seek=fill_mb / 2) + self.assertEqual(ar.exception.exitstatus, 1) # dd returns 1 on "No space" + + # Wait for the MDS to see the latest OSD map so that it will reliably + # be applying the policy of rejecting non-deletion metadata operations + # while in the full state. + osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch'] + self.wait_until_true( + lambda: self.fs.mds_asok(['status'])['osdmap_epoch'] >= osd_epoch, + timeout=10) + + with self.assertRaises(CommandFailedError): + self.mount_a.write_n_mb("small_file_1", 0) + + # Clear out some space + if easy_case: + self.mount_a.run_shell(['rm', '-f', 'large_file_a']) + self.mount_a.run_shell(['rm', '-f', 'large_file_b']) + else: + # In the hard case it is the file that filled the system. + # Before the new #7317 (ENOSPC, epoch barrier) changes, this + # would fail because the last objects written would be + # stuck in the client cache as objecter operations. + self.mount_a.run_shell(['rm', '-f', 'large_file_b']) + self.mount_a.run_shell(['rm', '-f', 'large_file_a']) + + # Here we are waiting for two things to happen: + # * The MDS to purge the stray folder and execute object deletions + # * The OSDs to inform the mon that they are no longer full + self.wait_until_true(lambda: not self.fs.is_full(), + timeout=osd_mon_report_interval_max * 5) + + # Wait for the MDS to see the latest OSD map so that it will reliably + # be applying the free space policy + osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch'] + self.wait_until_true( + lambda: self.fs.mds_asok(['status'])['osdmap_epoch'] >= osd_epoch, + timeout=10) + + # Now I should be able to write again + self.mount_a.write_n_mb("large_file", 50, seek=0) + + # Ensure that the MDS keeps its OSD epoch barrier across a restart + + def test_full_different_file(self): + self._test_full(True) + + def test_full_same_file(self): + self._test_full(False) + + def _remote_write_test(self, template): + """ + Run some remote python in a way that's useful for + testing free space behaviour (see test_* methods using this) + """ + file_path = os.path.join(self.mount_a.mountpoint, "full_test_file") + + # Enough to trip the full flag + osd_mon_report_interval_max = int(self.fs.get_config("osd_mon_report_interval_max", service_type='osd')) + mon_osd_full_ratio = float(self.fs.get_config("mon_osd_full_ratio")) + pool_capacity = self.pool_capacity + + # Sufficient data to cause RADOS cluster to go 'full' + fill_mb = int(1.05 * mon_osd_full_ratio * (pool_capacity / (1024.0 * 1024.0))) + log.info("pool capacity {0}, {1}MB should be enough to fill it".format(pool_capacity, fill_mb)) + + # Long enough for RADOS cluster to notice it is full and set flag on mons + full_wait = osd_mon_report_interval_max * 1.5 + + # Configs for this test should bring this setting down in order to + # run reasonably quickly + if osd_mon_report_interval_max > 10: + log.warn("This test may run rather slowly unless you decrease" + "osd_mon_report_interval_max (5 is a good setting)!") + + self.mount_a.run_python(template.format( + fill_mb=fill_mb, + file_path=file_path, + full_wait=full_wait + )) + + def test_full_fclose(self): + # A remote script which opens a file handle, fills up the filesystem, and then + # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync + remote_script = dedent(""" + import time + import datetime + import subprocess + import os + + # Write some buffered data through before going full, all should be well + bytes = 0 + f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT) + bytes += os.write(f, 'a' * 4096) + os.fsync(f) + + # Okay, now we're going to fill up the filesystem, and then keep + # writing until we see an error from fsync. As long as we're doing + # buffered IO, the error should always only appear from fsync and not + # from write + full = False + + for n in range(0, {fill_mb}): + bytes += os.write(f, 'x' * 1024 * 1024) + + # OK, now we should sneak in under the full condition + # due to the time it takes the OSDs to report to the + # mons, and get a successful fsync on our full-making data + os.fsync(f) + + # Now wait for the full flag to get set so that our + # next flush IO will fail + time.sleep(30) + + # A buffered IO, should succeed + os.write(f, 'x' * 4096) + + # Wait long enough for a background flush that should fail + time.sleep(30) + + # ...and check that the failed background flush is reflected in fclose + try: + os.close(f) + except OSError: + print "close() returned an error as expected" + else: + raise RuntimeError("close() failed to raise error") + + os.unlink("{file_path}") + """) + self._remote_write_test(remote_script) + + def test_full_fsync(self): + """ + That when the full flag is encountered during asynchronous + flushes, such that an fwrite() succeeds but an fsync/fclose() + should return the ENOSPC error. + """ + + # A remote script which opens a file handle, fills up the filesystem, and then + # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync + remote_script = dedent(""" + import time + import datetime + import subprocess + import os + + # Write some buffered data through before going full, all should be well + bytes = 0 + f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT) + bytes += os.write(f, 'a' * 4096) + os.fsync(f) + + # Okay, now we're going to fill up the filesystem, and then keep + # writing until we see an error from fsync. As long as we're doing + # buffered IO, the error should always only appear from fsync and not + # from write + full = False + + for n in range(0, {fill_mb} + 1): + bytes += os.write(f, 'x' * 1024 * 1024) + try: + os.fsync(f) + except OSError as e: + print "Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)) + full = True + break + else: + print "Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0)) + + if n > {fill_mb} * 0.8: + # Be cautious in the last region where we expect to hit + # the full condition, so that we don't overshoot too dramatically + time.sleep({full_wait}) + + if not full: + raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes) + + # The error sticks to the inode until we dispose of it + try: + os.close(f) + except OSError: + print "Saw error from close() as expected" + else: + raise RuntimeError("Did not see expected error from close()") + + os.unlink("{file_path}") + """) + + self._remote_write_test(remote_script) + + +@contextlib.contextmanager +def task(ctx, config): + fs = Filesystem(ctx) + + # Pick out the clients we will use from the configuration + # ======================================================= + if len(ctx.mounts) < 2: + raise RuntimeError("Need at least two clients") + mount_a = ctx.mounts.values()[0] + mount_b = ctx.mounts.values()[1] + + # Stash references on ctx so that we can easily debug in interactive mode + # ======================================================================= + ctx.filesystem = fs + ctx.mount_a = mount_a + ctx.mount_b = mount_b + + run_tests(ctx, config, TestClusterFull, { + 'fs': fs, + 'mount_a': mount_a, + 'mount_b': mount_b + }) + + # Continue to any downstream tasks + # ================================ + yield diff --git a/qa/tasks/mds_journal_migration.py b/qa/tasks/mds_journal_migration.py new file mode 100644 index 00000000000..992186e67c5 --- /dev/null +++ b/qa/tasks/mds_journal_migration.py @@ -0,0 +1,132 @@ +from StringIO import StringIO +import contextlib +import logging +from teuthology import misc + +from tasks.workunit import task as workunit +from cephfs.filesystem import Filesystem + +log = logging.getLogger(__name__) + + +JOURNAL_FORMAT_LEGACY = 0 +JOURNAL_FORMAT_RESILIENT = 1 + + +@contextlib.contextmanager +def task(ctx, config): + """ + Given a Ceph cluster has already been set up, exercise the migration + of the CephFS journal from an older format to the latest format. On + successful completion the filesystem will be running with a journal + in the new format. + + Optionally specify which client to use like this: + + - mds-journal_migration: + client: client.0 + + """ + if not hasattr(ctx, 'ceph'): + raise RuntimeError("This task must be nested in 'ceph' task") + + if not hasattr(ctx, 'mounts'): + raise RuntimeError("This task must be nested inside 'kclient' or 'ceph_fuse' task") + + # Determine which client we will use + if config and 'client' in config: + # Use client specified in config + client_role = config['client'] + client_list = list(misc.get_clients(ctx, [client_role])) + try: + client_id = client_list[0][0] + except IndexError: + raise RuntimeError("Client role '{0}' not found".format(client_role)) + else: + # Pick one arbitrary client to use + client_list = list(misc.all_roles_of_type(ctx.cluster, 'client')) + try: + client_id = client_list[0] + except IndexError: + raise RuntimeError("This task requires at least one client") + + fs = Filesystem(ctx) + ctx.fs = fs + old_journal_version = JOURNAL_FORMAT_LEGACY + new_journal_version = JOURNAL_FORMAT_RESILIENT + + fs.set_ceph_conf('mds', 'mds journal format', old_journal_version) + + # Create a filesystem using the older journal format. + for mount in ctx.mounts.values(): + mount.umount_wait() + fs.mds_stop() + fs.reset() + fs.mds_restart() + + # Do some client work so that the log is populated with something. + mount = ctx.mounts[client_id] + with mount.mounted(): + mount.create_files() + mount.check_files() # sanity, this should always pass + + # Run a more substantial workunit so that the length of the log to be + # coverted is going span at least a few segments + workunit(ctx, { + 'clients': { + "client.{0}".format(client_id): ["suites/fsstress.sh"], + }, + "timeout": "3h" + }) + + # Modify the ceph.conf to ask the MDS to use the new journal format. + fs.set_ceph_conf('mds', 'mds journal format', new_journal_version) + + # Restart the MDS. + fs.mds_fail_restart() + fs.wait_for_daemons() + + # This ensures that all daemons come up into a valid state + fs.wait_for_daemons() + + # Check that files created in the initial client workload are still visible + # in a client mount. + with mount.mounted(): + mount.check_files() + + # Verify that the journal really has been rewritten. + journal_version = fs.get_journal_version() + if journal_version != new_journal_version: + raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format( + new_journal_version, journal_version() + )) + + # Verify that cephfs-journal-tool can now read the rewritten journal + proc = mount.client_remote.run( + args=["cephfs-journal-tool", "journal", "inspect"], + stdout=StringIO()) + if not proc.stdout.getvalue().strip().endswith(": OK"): + raise RuntimeError("Unexpected journal-tool result: '{0}'".format( + proc.stdout.getvalue() + )) + + mount.client_remote.run( + args=["sudo", "cephfs-journal-tool", "event", "get", "json", "--path", "/tmp/journal.json"]) + proc = mount.client_remote.run( + args=[ + "python", + "-c", + "import json; print len(json.load(open('/tmp/journal.json')))" + ], + stdout=StringIO()) + event_count = int(proc.stdout.getvalue().strip()) + if event_count < 1000: + # Approximate value of "lots", expected from having run fsstress + raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count)) + + # Leave all MDSs and clients running for any child tasks + for mount in ctx.mounts.values(): + mount.mount() + mount.wait_until_mounted() + + yield diff --git a/qa/tasks/mds_journal_repair.py b/qa/tasks/mds_journal_repair.py new file mode 100644 index 00000000000..8195c314ca4 --- /dev/null +++ b/qa/tasks/mds_journal_repair.py @@ -0,0 +1,362 @@ + +""" +Test our tools for recovering the content of damaged journals +""" + +import contextlib +import json +import logging +from textwrap import dedent +import time +from teuthology.orchestra.run import CommandFailedError +from tasks.cephfs.filesystem import Filesystem, ObjectNotFound, ROOT_INO +from tasks.cephfs.cephfs_test_case import CephFSTestCase, run_tests + + +log = logging.getLogger(__name__) + + +class TestJournalRepair(CephFSTestCase): + def test_inject_to_empty(self): + """ + That when some dentries in the journal but nothing is in + the backing store, we correctly populate the backing store + from the journalled dentries. + """ + + # Inject metadata operations + self.mount_a.run_shell(["touch", "rootfile"]) + self.mount_a.run_shell(["mkdir", "subdir"]) + self.mount_a.run_shell(["touch", "subdir/subdirfile"]) + # There are several different paths for handling hardlinks, depending + # on whether an existing dentry (being overwritten) is also a hardlink + self.mount_a.run_shell(["mkdir", "linkdir"]) + + # Test inode -> remote transition for a dentry + self.mount_a.run_shell(["touch", "linkdir/link0"]) + self.mount_a.run_shell(["rm", "-f", "linkdir/link0"]) + self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link0"]) + + # Test nothing -> remote transition + self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link1"]) + + # Test remote -> inode transition + self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link2"]) + self.mount_a.run_shell(["rm", "-f", "linkdir/link2"]) + self.mount_a.run_shell(["touch", "linkdir/link2"]) + + # Test remote -> diff remote transition + self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link3"]) + self.mount_a.run_shell(["rm", "-f", "linkdir/link3"]) + self.mount_a.run_shell(["ln", "rootfile", "linkdir/link3"]) + + # Before we unmount, make a note of the inode numbers, later we will + # check that they match what we recover from the journal + rootfile_ino = self.mount_a.path_to_ino("rootfile") + subdir_ino = self.mount_a.path_to_ino("subdir") + linkdir_ino = self.mount_a.path_to_ino("linkdir") + subdirfile_ino = self.mount_a.path_to_ino("subdir/subdirfile") + + self.mount_a.umount_wait() + + # Stop the MDS + self.fs.mds_stop() + self.fs.mds_fail() + + # Now, the journal should contain the operations, but the backing + # store shouldn't + with self.assertRaises(ObjectNotFound): + self.fs.list_dirfrag(subdir_ino) + self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) + + # Execute the dentry recovery, this should populate the backing store + self.fs.journal_tool(['event', 'recover_dentries', 'list']) + + # Dentries in ROOT_INO are present + self.assertEqual(sorted(self.fs.list_dirfrag(ROOT_INO)), sorted(['rootfile_head', 'subdir_head', 'linkdir_head'])) + self.assertEqual(self.fs.list_dirfrag(subdir_ino), ['subdirfile_head']) + self.assertEqual(sorted(self.fs.list_dirfrag(linkdir_ino)), + sorted(['link0_head', 'link1_head', 'link2_head', 'link3_head'])) + + # Now check the MDS can read what we wrote: truncate the journal + # and start the mds. + self.fs.journal_tool(['journal', 'reset']) + self.fs.mds_restart() + self.fs.wait_for_daemons() + + # List files + self.mount_a.mount() + self.mount_a.wait_until_mounted() + + # First ls -R to populate MDCache, such that hardlinks will + # resolve properly (recover_dentries does not create backtraces, + # so ordinarily hardlinks to inodes that happen not to have backtraces + # will be invisible in readdir). + # FIXME: hook in forward scrub here to regenerate backtraces + proc = self.mount_a.run_shell(['ls', '-R']) + + proc = self.mount_a.run_shell(['ls', '-R']) + self.assertEqual(proc.stdout.getvalue().strip(), + dedent(""" + .: + linkdir + rootfile + subdir + + ./linkdir: + link0 + link1 + link2 + link3 + + ./subdir: + subdirfile + """).strip()) + + # Check the correct inos were preserved by path + self.assertEqual(rootfile_ino, self.mount_a.path_to_ino("rootfile")) + self.assertEqual(subdir_ino, self.mount_a.path_to_ino("subdir")) + self.assertEqual(subdirfile_ino, self.mount_a.path_to_ino("subdir/subdirfile")) + + # Check that the hard link handling came out correctly + self.assertEqual(self.mount_a.path_to_ino("linkdir/link0"), subdirfile_ino) + self.assertEqual(self.mount_a.path_to_ino("linkdir/link1"), subdirfile_ino) + self.assertNotEqual(self.mount_a.path_to_ino("linkdir/link2"), subdirfile_ino) + self.assertEqual(self.mount_a.path_to_ino("linkdir/link3"), rootfile_ino) + + # Create a new file, ensure it is not issued the same ino as one of the + # recovered ones + self.mount_a.run_shell(["touch", "afterwards"]) + new_ino = self.mount_a.path_to_ino("afterwards") + self.assertNotIn(new_ino, [rootfile_ino, subdir_ino, subdirfile_ino]) + + def test_reset(self): + """ + That after forcibly modifying the backing store, we can get back into + a good state by resetting the MDSMap. + + The scenario is that we have two active MDSs, and we lose the journals. Once + we have completely lost confidence in the integrity of the metadata, we want to + return the system to a single-MDS state to go into a scrub to recover what we + can. + """ + + # Set max_mds to 2 + self.fs.mon_manager.raw_cluster_cmd_result('mds', 'set', "max_mds", "2") + + # See that we have two active MDSs + self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30, + reject_fn=lambda v: v > 2 or v < 1) + active_mds_names = self.fs.get_active_names() + + # Do a bunch of I/O such that at least some will hit the second MDS: create + # lots of directories so that the balancer should find it easy to make a decision + # to allocate some of them to the second mds. + spammers = [] + for n in range(0, 16): + dir_name = "spam_{0}".format(n) + spammers.append(self.mount_a.spam_dir_background(dir_name)) + + def subtrees_assigned(): + got_subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=active_mds_names[0]) + rank_1_count = len([s for s in got_subtrees if s['auth_first'] == 1]) + + # Greater than 1, because there is typically 1 for ~mds1, and once it + # has been assigned something in addition to that it means it has been + # assigned a "real" subtree. + return rank_1_count > 1 + + # We are waiting for the MDS to respond to hot directories, which + # is not guaranteed to happen at a particular time, so a lengthy timeout here. + self.wait_until_true(subtrees_assigned, 600) + + # Flush the journals so that we have some backing store data + # belonging to one MDS, and some to the other MDS. + for mds_name in active_mds_names: + self.fs.mds_asok(["flush", "journal"], mds_name) + + # Stop (hard) the second MDS daemon + self.fs.mds_stop(active_mds_names[1]) + + # Wipe out the tables for MDS rank 1 so that it is broken and can't start + # (this is the simulated failure that we will demonstrate that the disaster + # recovery tools can get us back from) + self.fs.erase_metadata_objects(prefix="mds1_") + + # Try to access files from the client + blocked_ls = self.mount_a.run_shell(["ls", "-R"], wait=False) + + # Check that this "ls -R" blocked rather than completing: indicates + # it got stuck trying to access subtrees which were on the now-dead MDS. + log.info("Sleeping to check ls is blocked...") + time.sleep(60) + self.assertFalse(blocked_ls.finished) + + # This mount is now useless because it will depend on MDS rank 1, and MDS rank 1 + # is not coming back. Kill it. + log.info("Killing mount, it's blocked on the MDS we killed") + self.mount_a.kill() + self.mount_a.kill_cleanup() + try: + # Now that the mount is dead, the ls -R should error out. + blocked_ls.wait() + except CommandFailedError: + pass + + log.info("Terminating spammer processes...") + for spammer_proc in spammers: + spammer_proc.stdin.close() + try: + spammer_proc.wait() + except CommandFailedError: + pass + + # See that the second MDS will crash when it starts and tries to + # acquire rank 1 + self.fs.mds_restart(active_mds_names[1]) + crasher = self.fs.mds_daemons[active_mds_names[1]].proc + + try: + crasher.wait() + except CommandFailedError as e: + log.info("MDS '{0}' crashed with status {1} as expected".format(active_mds_names[1], e.exitstatus)) + self.fs.mds_daemons[active_mds_names[1]].proc = None + else: + raise RuntimeError("MDS daemon '{0}' did not crash as expected".format(active_mds_names[1])) + + # Now it's crashed, let the MDSMonitor know that it's not coming back + self.fs.mds_fail(active_mds_names[1]) + + # Now give up and go through a disaster recovery procedure + self.fs.mds_stop(active_mds_names[0]) + self.fs.mds_fail(active_mds_names[0]) + # Invoke recover_dentries quietly, because otherwise log spews millions of lines + self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=0, quiet=True) + self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=1, quiet=True) + self.fs.table_tool(["0", "reset", "session"]) + self.fs.journal_tool(["journal", "reset"], rank=0) + self.fs.erase_mds_objects(1) + self.fs.admin_remote.run(args=['sudo', 'ceph', 'fs', 'reset', 'default', '--yes-i-really-mean-it']) + + # Bring an MDS back online, mount a client, and see that we can walk the full + # filesystem tree again + self.fs.mds_restart(active_mds_names[0]) + self.wait_until_equal(lambda: self.fs.get_active_names(), [active_mds_names[0]], 30, + reject_fn=lambda v: len(v) > 1) + self.mount_a.mount() + self.mount_a.run_shell(["ls", "-R"], wait=True) + + def test_table_tool(self): + active_mdss = self.fs.get_active_names() + self.assertEqual(len(active_mdss), 1) + mds_name = active_mdss[0] + + self.mount_a.run_shell(["touch", "foo"]) + self.fs.mds_asok(["flush", "journal"], mds_name) + + log.info(self.fs.table_tool(["all", "show", "inode"])) + log.info(self.fs.table_tool(["all", "show", "snap"])) + log.info(self.fs.table_tool(["all", "show", "session"])) + + # Inode table should always be the same because initial state + # and choice of inode are deterministic. + # Should see one inode consumed + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "inode"])), + {"0": { + "data": { + "version": 2, + "inotable": { + "projected_free": [ + {"start": 1099511628777, + "len": 1099511626775}], + "free": [ + {"start": 1099511628777, + "len": 1099511626775}]}}, + "result": 0}} + + ) + + # Should see one session + session_data = json.loads(self.fs.table_tool( + ["all", "show", "session"])) + self.assertEqual(len(session_data["0"]["data"]["Sessions"]), 1) + self.assertEqual(session_data["0"]["result"], 0) + + # Should see no snaps + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "snap"])), + {"version": 0, + "snapserver": {"last_snap": 1, + "pending_noop": [], + "snaps": [], + "need_to_purge": {}, + "pending_create": [], + "pending_destroy": []}, + "result": 0} + ) + + # Reset everything + for table in ["session", "inode", "snap"]: + self.fs.table_tool(["all", "reset", table]) + + log.info(self.fs.table_tool(["all", "show", "inode"])) + log.info(self.fs.table_tool(["all", "show", "snap"])) + log.info(self.fs.table_tool(["all", "show", "session"])) + + # Should see 0 sessions + session_data = json.loads(self.fs.table_tool( + ["all", "show", "session"])) + self.assertEqual(len(session_data["0"]["data"]["Sessions"]), 0) + self.assertEqual(session_data["0"]["result"], 0) + + # Should see entire inode range now marked free + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "inode"])), + {"0": {"data": {"version": 1, + "inotable": {"projected_free": [ + {"start": 1099511627776, + "len": 1099511627776}], + "free": [ + {"start": 1099511627776, + "len": 1099511627776}]}}, + "result": 0}} + ) + + # Should see no snaps + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "snap"])), + {"version": 1, + "snapserver": {"last_snap": 1, + "pending_noop": [], + "snaps": [], + "need_to_purge": {}, + "pending_create": [], + "pending_destroy": []}, + "result": 0} + ) + + +@contextlib.contextmanager +def task(ctx, config): + fs = Filesystem(ctx) + + # Pick out the clients we will use from the configuration + # ======================================================= + if len(ctx.mounts) < 1: + raise RuntimeError("Need at least one clients") + mount_a = ctx.mounts.values()[0] + + # Stash references on ctx so that we can easily debug in interactive mode + # ======================================================================= + ctx.filesystem = fs + ctx.mount_a = mount_a + + run_tests(ctx, config, TestJournalRepair, { + 'fs': fs, + 'mount_a': mount_a + }) + + # Continue to any downstream tasks + # ================================ + yield diff --git a/qa/tasks/mds_scrub_checks.py b/qa/tasks/mds_scrub_checks.py new file mode 100644 index 00000000000..d0c35aab964 --- /dev/null +++ b/qa/tasks/mds_scrub_checks.py @@ -0,0 +1,239 @@ +""" +MDS admin socket scrubbing-related tests. +""" +from cStringIO import StringIO +import json +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology + +from tasks.cephfs.filesystem import Filesystem + +log = logging.getLogger(__name__) + + +def run_test(ctx, config, filesystem): + """ + Run flush and scrub commands on the specified files in the filesystem. This + task will run through a sequence of operations, but it is not comprehensive + on its own -- it doesn't manipulate the mds cache state to test on both + in- and out-of-memory parts of the hierarchy. So it's designed to be run + multiple times within a single test run, so that the test can manipulate + memory state. + + Usage: + mds_scrub_checks: + mds_rank: 0 + path: path/to/test/dir + client: 0 + run_seq: [0-9]+ + + Increment the run_seq on subsequent invocations within a single test run; + it uses that value to generate unique folder and file names. + """ + + mds_rank = config.get("mds_rank") + test_path = config.get("path") + run_seq = config.get("run_seq") + client_id = config.get("client") + + if mds_rank is None or test_path is None or run_seq is None: + raise ValueError("Must specify each of mds_rank, test_path, run_seq," + "client_id in config!") + + teuthdir = teuthology.get_testdir(ctx) + client_path = "{teuthdir}/mnt.{id_}/{test_path}".\ + format(teuthdir=teuthdir, + id_=client_id, + test_path=test_path) + + log.info("Cloning repo into place (if not present)") + repo_path = clone_repo(ctx, client_id, client_path) + + log.info("Initiating mds_scrub_checks on mds.{id_}, " + "test_path {path}, run_seq {seq}".format( + id_=mds_rank, path=test_path, seq=run_seq)) + + def json_validator(json, rc, element, expected_value): + if (rc != 0): + return False, "asok command returned error {rc}".format(rc=str(rc)) + element_value = json.get(element) + if element_value != expected_value: + return False, "unexpectedly got {jv} instead of {ev}!".format( + jv=element_value, ev=expected_value) + return True, "Succeeded" + + success_validator = lambda j, r: json_validator(j, r, "return_code", 0) + + nep = "{test_path}/i/dont/exist".format(test_path=test_path) + command = "flush_path {nep}".format(nep=nep) + asok_command(ctx, mds_rank, command, + lambda j, r: json_validator(j, r, "return_code", -2), + filesystem) + + command = "scrub_path {nep}".format(nep=nep) + asok_command(ctx, mds_rank, command, + lambda j, r: json_validator(j, r, "return_code", -2), + filesystem) + + test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=test_path) + dirpath = "{repo_path}/suites".format(repo_path=test_repo_path) + + if (run_seq == 0): + log.info("First run: flushing {dirpath}".format(dirpath=dirpath)) + command = "flush_path {dirpath}".format(dirpath=dirpath) + asok_command(ctx, mds_rank, command, success_validator, filesystem) + command = "scrub_path {dirpath}".format(dirpath=dirpath) + asok_command(ctx, mds_rank, command, success_validator, filesystem) + + filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format( + repo_path=test_repo_path) + if (run_seq == 0): + log.info("First run: flushing {filepath}".format(filepath=filepath)) + command = "flush_path {filepath}".format(filepath=filepath) + asok_command(ctx, mds_rank, command, success_validator, filesystem) + command = "scrub_path {filepath}".format(filepath=filepath) + asok_command(ctx, mds_rank, command, success_validator, filesystem) + + filepath = "{repo_path}/suites/fs/basic/clusters/fixed-3-cephfs.yaml".\ + format(repo_path=test_repo_path) + command = "scrub_path {filepath}".format(filepath=filepath) + asok_command(ctx, mds_rank, command, + lambda j, r: json_validator(j, r, "performed_validation", + False), + filesystem) + + if (run_seq == 0): + log.info("First run: flushing base dir /") + command = "flush_path /" + asok_command(ctx, mds_rank, command, success_validator, filesystem) + command = "scrub_path /" + asok_command(ctx, mds_rank, command, success_validator, filesystem) + + client = ctx.manager.find_remote("client", client_id) + new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq) + test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path, + i=run_seq) + client.run(args=[ + "mkdir", new_dir]) + command = "flush_path {dir}".format(dir=test_new_dir) + asok_command(ctx, mds_rank, command, success_validator, filesystem) + + new_file = "{repo_path}/new_file_{i}".format(repo_path=repo_path, + i=run_seq) + test_new_file = "{repo_path}/new_file_{i}".format(repo_path=test_repo_path, + i=run_seq) + client.run(args=[ + "echo", "hello", run.Raw('>'), new_file]) + command = "flush_path {file}".format(file=test_new_file) + asok_command(ctx, mds_rank, command, success_validator, filesystem) + + # check that scrub fails on errors. First, get ino + client = ctx.manager.find_remote("client", 0) + proc = client.run( + args=[ + "ls", "-li", new_file, run.Raw('|'), + "grep", "-o", run.Raw('"^[0-9]*"') + ], + wait=False, + stdout=StringIO() + ) + proc.wait() + ino = int(proc.stdout.getvalue().strip()) + rados_obj_name = "{ino}.00000000".format(ino=hex(ino).split('x')[1]) + client.run( + args=[ + "rados", "-p", "data", "rmxattr", + rados_obj_name, "parent" + ] + ) + command = "scrub_path {file}".format(file=test_new_file) + asok_command(ctx, mds_rank, command, + lambda j, r: json_validator(j, r, "return_code", -61), filesystem) + client.run( + args=[ + "rados", "-p", "data", "rm", rados_obj_name + ] + ) + asok_command(ctx, mds_rank, command, + lambda j, r: json_validator(j, r, "return_code", -2), filesystem) + + command = "flush_path /" + asok_command(ctx, mds_rank, command, success_validator, filesystem) + + +class AsokCommandFailedError(Exception): + """ + Exception thrown when we get an unexpected response + on an admin socket command + """ + def __init__(self, command, rc, json, errstring): + self.command = command + self.rc = rc + self.json = json + self.errstring = errstring + + def __str__(self): + return "Admin socket: {command} failed with rc={rc}," + "json output={json}, because '{es}'".format( + command=self.command, rc=self.rc, + json=self.json, es=self.errstring) + + +def asok_command(ctx, mds_rank, command, validator, filesystem): + log.info("Running command '{command}'".format(command=command)) + + command_list = command.split() + + # we just assume there's an active mds for every rank + mds_id = filesystem.get_active_names()[mds_rank] + + proc = ctx.manager.admin_socket('mds', mds_id, + command_list, check_status=False) + rout = proc.exitstatus + sout = proc.stdout.getvalue() + + if sout.strip(): + jout = json.loads(sout) + else: + jout = None + + log.info("command '{command}' got response code " + "'{rout}' and stdout '{sout}'".format( + command=command, rout=rout, sout=sout)) + + success, errstring = validator(jout, rout) + + if not success: + raise AsokCommandFailedError(command, rout, jout, errstring) + + return jout + + +def clone_repo(ctx, client_id, path): + repo = "ceph-qa-suite" + repo_path = "{path}/{repo}".format(path=path, repo=repo) + + client = ctx.manager.find_remote("client", client_id) + client.run( + args=[ + "mkdir", "-p", path + ] + ) + client.run( + args=[ + "ls", repo_path, run.Raw('||'), + "git", "clone", '--branch', 'giant', + "http://github.com/ceph/{repo}".format(repo=repo), + "{path}/{repo}".format(path=path, repo=repo) + ] + ) + + return repo_path + + +def task(ctx, config): + fs = Filesystem(ctx) + + run_test(ctx, config, fs) diff --git a/qa/tasks/mds_thrash.py b/qa/tasks/mds_thrash.py new file mode 100644 index 00000000000..c60b741a49e --- /dev/null +++ b/qa/tasks/mds_thrash.py @@ -0,0 +1,352 @@ +""" +Thrash mds by simulating failures +""" +import logging +import contextlib +import ceph_manager +import random +import time +from gevent.greenlet import Greenlet +from gevent.event import Event +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + + +class MDSThrasher(Greenlet): + """ + MDSThrasher:: + + The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc). + + The config is optional. Many of the config parameters are a a maximum value + to use when selecting a random value from a range. To always use the maximum + value, set no_random to true. The config is a dict containing some or all of: + + seed: [no default] seed the random number generator + + randomize: [default: true] enables randomization and use the max/min values + + max_thrash: [default: 1] the maximum number of MDSs that will be thrashed at + any given time. + + max_thrash_delay: [default: 30] maximum number of seconds to delay before + thrashing again. + + max_revive_delay: [default: 10] maximum number of seconds to delay before + bringing back a thrashed MDS + + thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed + during replay. Value should be between 0.0 and 1.0 + + max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in + the replay state before thrashing + + thrash_weights: allows specific MDSs to be thrashed more/less frequently. This option + overrides anything specified by max_thrash. This option is a dict containing + mds.x: weight pairs. For example, [mds.a: 0.7, mds.b: 0.3, mds.c: 0.0]. Each weight + is a value from 0.0 to 1.0. Any MDSs not specified will be automatically + given a weight of 0.0. For a given MDS, by default the trasher delays for up + to max_thrash_delay, trashes, waits for the MDS to recover, and iterates. If a non-zero + weight is specified for an MDS, for each iteration the thrasher chooses whether to thrash + during that iteration based on a random value [0-1] not exceeding the weight of that MDS. + + Examples:: + + + The following example sets the likelihood that mds.a will be thrashed + to 80%, mds.b to 20%, and other MDSs will not be thrashed. It also sets the + likelihood that an MDS will be thrashed in replay to 40%. + Thrash weights do not have to sum to 1. + + tasks: + - ceph: + - mds_thrash: + thrash_weights: + - mds.a: 0.8 + - mds.b: 0.2 + thrash_in_replay: 0.4 + - ceph-fuse: + - workunit: + clients: + all: [suites/fsx.sh] + + The following example disables randomization, and uses the max delay values: + + tasks: + - ceph: + - mds_thrash: + max_thrash_delay: 10 + max_revive_delay: 1 + max_replay_thrash_delay: 4 + + """ + + def __init__(self, ctx, manager, config, logger, failure_group, weight): + super(MDSThrasher, self).__init__() + + self.ctx = ctx + self.manager = manager + assert self.manager.is_clean() + + self.stopping = Event() + self.logger = logger + self.config = config + + self.randomize = bool(self.config.get('randomize', True)) + self.max_thrash_delay = float(self.config.get('thrash_delay', 30.0)) + self.thrash_in_replay = float(self.config.get('thrash_in_replay', False)) + assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format( + v=self.thrash_in_replay) + + self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0)) + + self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0)) + + self.failure_group = failure_group + self.weight = weight + + def _run(self): + try: + self.do_thrash() + except: + # Log exceptions here so we get the full backtrace (it's lost + # by the time someone does a .get() on this greenlet) + self.logger.exception("Exception in do_thrash:") + raise + + def log(self, x): + """Write data to logger assigned to this MDThrasher""" + self.logger.info(x) + + def stop(self): + self.stopping.set() + + def do_thrash(self): + """ + Perform the random thrashing action + """ + self.log('starting mds_do_thrash for failure group: ' + ', '.join( + ['mds.{_id}'.format(_id=_f) for _f in self.failure_group])) + while not self.stopping.is_set(): + delay = self.max_thrash_delay + if self.randomize: + delay = random.randrange(0.0, self.max_thrash_delay) + + if delay > 0.0: + self.log('waiting for {delay} secs before thrashing'.format(delay=delay)) + self.stopping.wait(delay) + if self.stopping.is_set(): + continue + + skip = random.randrange(0.0, 1.0) + if self.weight < 1.0 and skip > self.weight: + self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip, + weight=self.weight)) + continue + + # find the active mds in the failure group + statuses = [self.manager.get_mds_status(m) for m in self.failure_group] + actives = filter(lambda s: s and s['state'] == 'up:active', statuses) + assert len(actives) == 1, 'Can only have one active in a failure group' + + active_mds = actives[0]['name'] + active_rank = actives[0]['rank'] + + self.log('kill mds.{id} (rank={r})'.format(id=active_mds, r=active_rank)) + self.manager.kill_mds_by_rank(active_rank) + + # wait for mon to report killed mds as crashed + last_laggy_since = None + itercount = 0 + while True: + failed = self.manager.get_mds_status_all()['failed'] + status = self.manager.get_mds_status(active_mds) + if not status: + break + if 'laggy_since' in status: + last_laggy_since = status['laggy_since'] + break + if any([(f == active_mds) for f in failed]): + break + self.log( + 'waiting till mds map indicates mds.{_id} is laggy/crashed, in failed state, or mds.{_id} is removed from mdsmap'.format( + _id=active_mds)) + itercount = itercount + 1 + if itercount > 10: + self.log('mds map: {status}'.format(status=self.manager.get_mds_status_all())) + time.sleep(2) + if last_laggy_since: + self.log( + 'mds.{_id} reported laggy/crashed since: {since}'.format(_id=active_mds, since=last_laggy_since)) + else: + self.log('mds.{_id} down, removed from mdsmap'.format(_id=active_mds, since=last_laggy_since)) + + # wait for a standby mds to takeover and become active + takeover_mds = None + takeover_rank = None + itercount = 0 + while True: + statuses = [self.manager.get_mds_status(m) for m in self.failure_group] + actives = filter(lambda s: s and s['state'] == 'up:active', statuses) + if len(actives) > 0: + assert len(actives) == 1, 'Can only have one active in failure group' + takeover_mds = actives[0]['name'] + takeover_rank = actives[0]['rank'] + break + itercount = itercount + 1 + if itercount > 10: + self.log('mds map: {status}'.format(status=self.manager.get_mds_status_all())) + + self.log('New active mds is mds.{_id}'.format(_id=takeover_mds)) + + # wait for a while before restarting old active to become new + # standby + delay = self.max_revive_delay + if self.randomize: + delay = random.randrange(0.0, self.max_revive_delay) + + self.log('waiting for {delay} secs before reviving mds.{id}'.format( + delay=delay, id=active_mds)) + time.sleep(delay) + + self.log('reviving mds.{id}'.format(id=active_mds)) + self.manager.revive_mds(active_mds, standby_for_rank=takeover_rank) + + status = {} + while True: + status = self.manager.get_mds_status(active_mds) + if status and (status['state'] == 'up:standby' or status['state'] == 'up:standby-replay'): + break + self.log( + 'waiting till mds map indicates mds.{_id} is in standby or standby-replay'.format(_id=active_mds)) + time.sleep(2) + self.log('mds.{_id} reported in {state} state'.format(_id=active_mds, state=status['state'])) + + # don't do replay thrashing right now + continue + # this might race with replay -> active transition... + if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay: + + delay = self.max_replay_thrash_delay + if self.randomize: + delay = random.randrange(0.0, self.max_replay_thrash_delay) + time.sleep(delay) + self.log('kill replaying mds.{id}'.format(id=self.to_kill)) + self.manager.kill_mds(self.to_kill) + + delay = self.max_revive_delay + if self.randomize: + delay = random.randrange(0.0, self.max_revive_delay) + + self.log('waiting for {delay} secs before reviving mds.{id}'.format( + delay=delay, id=self.to_kill)) + time.sleep(delay) + + self.log('revive mds.{id}'.format(id=self.to_kill)) + self.manager.revive_mds(self.to_kill) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Stress test the mds by thrashing while another task/workunit + is running. + + Please refer to MDSThrasher class for further information on the + available options. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'mds_thrash task only accepts a dict for configuration' + mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds')) + assert len(mdslist) > 1, \ + 'mds_thrash task requires at least 2 metadata servers' + + # choose random seed + seed = None + if 'seed' in config: + seed = int(config['seed']) + else: + seed = int(time.time()) + log.info('mds thrasher using random seed: {seed}'.format(seed=seed)) + random.seed(seed) + + max_thrashers = config.get('max_thrash', 1) + thrashers = {} + + (first,) = ctx.cluster.only('mds.{_id}'.format(_id=mdslist[0])).remotes.iterkeys() + manager = ceph_manager.CephManager( + first, ctx=ctx, logger=log.getChild('ceph_manager'), + ) + + # make sure everyone is in active, standby, or standby-replay + log.info('Wait for all MDSs to reach steady state...') + statuses = None + statuses_by_rank = None + while True: + statuses = {m: manager.get_mds_status(m) for m in mdslist} + statuses_by_rank = {} + for _, s in statuses.iteritems(): + if isinstance(s, dict): + statuses_by_rank[s['rank']] = s + + ready = filter(lambda (_, s): s is not None and (s['state'] == 'up:active' + or s['state'] == 'up:standby' + or s['state'] == 'up:standby-replay'), + statuses.items()) + if len(ready) == len(statuses): + break + time.sleep(2) + log.info('Ready to start thrashing') + + # setup failure groups + failure_groups = {} + actives = {s['name']: s for (_, s) in statuses.iteritems() if s['state'] == 'up:active'} + log.info('Actives is: {d}'.format(d=actives)) + log.info('Statuses is: {d}'.format(d=statuses_by_rank)) + for active in actives: + for (r, s) in statuses.iteritems(): + if s['standby_for_name'] == active: + if not active in failure_groups: + failure_groups[active] = [] + log.info('Assigning mds rank {r} to failure group {g}'.format(r=r, g=active)) + failure_groups[active].append(r) + + manager.wait_for_clean() + for (active, standbys) in failure_groups.iteritems(): + weight = 1.0 + if 'thrash_weights' in config: + weight = int(config['thrash_weights'].get('mds.{_id}'.format(_id=active), '0.0')) + + failure_group = [active] + failure_group.extend(standbys) + + thrasher = MDSThrasher( + ctx, manager, config, + logger=log.getChild('mds_thrasher.failure_group.[{a}, {sbs}]'.format( + a=active, + sbs=', '.join(standbys) + ) + ), + failure_group=failure_group, + weight=weight) + thrasher.start() + thrashers[active] = thrasher + + # if thrash_weights isn't specified and we've reached max_thrash, + # we're done + if not 'thrash_weights' in config and len(thrashers) == max_thrashers: + break + + try: + log.debug('Yielding') + yield + finally: + log.info('joining mds_thrashers') + for t in thrashers: + log.info('join thrasher for failure group [{fg}]'.format(fg=', '.join(failure_group))) + thrashers[t].stop() + thrashers[t].join() + log.info('done joining') diff --git a/qa/tasks/metadata.yaml b/qa/tasks/metadata.yaml new file mode 100644 index 00000000000..ccdc3b077cb --- /dev/null +++ b/qa/tasks/metadata.yaml @@ -0,0 +1,2 @@ +instance-id: test +local-hostname: test diff --git a/qa/tasks/mon_clock_skew_check.py b/qa/tasks/mon_clock_skew_check.py new file mode 100644 index 00000000000..891e6ec484e --- /dev/null +++ b/qa/tasks/mon_clock_skew_check.py @@ -0,0 +1,261 @@ +""" +Handle clock skews in monitors. +""" +import logging +import contextlib +import ceph_manager +import time +import gevent +from StringIO import StringIO +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +class ClockSkewCheck: + """ + Periodically check if there are any clock skews among the monitors in the + quorum. By default, assume no skews are supposed to exist; that can be + changed using the 'expect-skew' option. If 'fail-on-skew' is set to false, + then we will always succeed and only report skews if any are found. + + This class does not spawn a thread. It assumes that, if that is indeed + wanted, it should be done by a third party (for instance, the task using + this class). We intend it as such in order to reuse this class if need be. + + This task accepts the following options: + + interval amount of seconds to wait in-between checks. (default: 30.0) + max-skew maximum skew, in seconds, that is considered tolerable before + issuing a warning. (default: 0.05) + expect-skew 'true' or 'false', to indicate whether to expect a skew during + the run or not. If 'true', the test will fail if no skew is + found, and succeed if a skew is indeed found; if 'false', it's + the other way around. (default: false) + never-fail Don't fail the run if a skew is detected and we weren't + expecting it, or if no skew is detected and we were expecting + it. (default: False) + + at-least-once Runs at least once, even if we are told to stop. + (default: True) + at-least-once-timeout If we were told to stop but we are attempting to + run at least once, timeout after this many seconds. + (default: 600) + + Example: + Expect a skew higher than 0.05 seconds, but only report it without + failing the teuthology run. + + - mon_clock_skew_check: + interval: 30 + max-skew: 0.05 + expect_skew: true + never-fail: true + """ + + def __init__(self, ctx, manager, config, logger): + self.ctx = ctx + self.manager = manager + + self.stopping = False + self.logger = logger + self.config = config + + if self.config is None: + self.config = dict() + + self.check_interval = float(self.config.get('interval', 30.0)) + + first_mon = teuthology.get_first_mon(ctx, config) + remote = ctx.cluster.only(first_mon).remotes.keys()[0] + proc = remote.run( + args=[ + 'sudo', + 'ceph-mon', + '-i', first_mon[4:], + '--show-config-value', 'mon_clock_drift_allowed' + ], stdout=StringIO(), wait=True + ) + self.max_skew = self.config.get('max-skew', float(proc.stdout.getvalue())) + + self.expect_skew = self.config.get('expect-skew', False) + self.never_fail = self.config.get('never-fail', False) + self.at_least_once = self.config.get('at-least-once', True) + self.at_least_once_timeout = self.config.get('at-least-once-timeout', 600.0) + + def info(self, x): + """ + locally define logger for info messages + """ + self.logger.info(x) + + def warn(self, x): + """ + locally define logger for warnings + """ + self.logger.warn(x) + + def debug(self, x): + """ + locally define logger for debug messages + """ + self.logger.info(x) + self.logger.debug(x) + + def finish(self): + """ + Break out of the do_check loop. + """ + self.stopping = True + + def sleep_interval(self): + """ + If a sleep interval is set, sleep for that amount of time. + """ + if self.check_interval > 0.0: + self.debug('sleeping for {s} seconds'.format( + s=self.check_interval)) + time.sleep(self.check_interval) + + def print_skews(self, skews): + """ + Display skew values. + """ + total = len(skews) + if total > 0: + self.info('---------- found {n} skews ----------'.format(n=total)) + for mon_id, values in skews.iteritems(): + self.info('mon.{id}: {v}'.format(id=mon_id, v=values)) + self.info('-------------------------------------') + else: + self.info('---------- no skews were found ----------') + + def do_check(self): + """ + Clock skew checker. Loops until finish() is called. + """ + self.info('start checking for clock skews') + skews = dict() + ran_once = False + + started_on = None + + while not self.stopping or (self.at_least_once and not ran_once): + + if self.at_least_once and not ran_once and self.stopping: + if started_on is None: + self.info('kicking-off timeout (if any)') + started_on = time.time() + elif self.at_least_once_timeout > 0.0: + assert time.time() - started_on < self.at_least_once_timeout, \ + 'failed to obtain a timecheck before timeout expired' + + quorum_size = len(teuthology.get_mon_names(self.ctx)) + self.manager.wait_for_mon_quorum_size(quorum_size) + + health = self.manager.get_mon_health(True) + timechecks = health['timechecks'] + + clean_check = False + + if timechecks['round_status'] == 'finished': + assert (timechecks['round'] % 2) == 0, \ + 'timecheck marked as finished but round ' \ + 'disagrees (r {r})'.format( + r=timechecks['round']) + clean_check = True + else: + assert timechecks['round_status'] == 'on-going', \ + 'timecheck status expected \'on-going\' ' \ + 'but found \'{s}\' instead'.format( + s=timechecks['round_status']) + if 'mons' in timechecks.keys() and len(timechecks['mons']) > 1: + self.info('round still on-going, but there are available reports') + else: + self.info('no timechecks available just yet') + self.sleep_interval() + continue + + assert len(timechecks['mons']) > 1, \ + 'there are not enough reported timechecks; ' \ + 'expected > 1 found {n}'.format(n=len(timechecks['mons'])) + + for check in timechecks['mons']: + mon_skew = float(check['skew']) + mon_health = check['health'] + mon_id = check['name'] + if abs(mon_skew) > self.max_skew: + assert mon_health == 'HEALTH_WARN', \ + 'mon.{id} health is \'{health}\' but skew {s} > max {ms}'.format( + id=mon_id,health=mon_health,s=abs(mon_skew),ms=self.max_skew) + + log_str = 'mon.{id} with skew {s} > max {ms}'.format( + id=mon_id,s=abs(mon_skew),ms=self.max_skew) + + """ add to skew list """ + details = check['details'] + skews[mon_id] = {'skew': mon_skew, 'details': details} + + if self.expect_skew: + self.info('expected skew: {str}'.format(str=log_str)) + else: + self.warn('unexpected skew: {str}'.format(str=log_str)) + + if clean_check or (self.expect_skew and len(skews) > 0): + ran_once = True + self.print_skews(skews) + self.sleep_interval() + + total = len(skews) + self.print_skews(skews) + + error_str = '' + found_error = False + + if self.expect_skew: + if total == 0: + error_str = 'We were expecting a skew, but none was found!' + found_error = True + else: + if total > 0: + error_str = 'We were not expecting a skew, but we did find it!' + found_error = True + + if found_error: + self.info(error_str) + if not self.never_fail: + assert False, error_str + +@contextlib.contextmanager +def task(ctx, config): + """ + Use clas ClockSkewCheck to check for clock skews on the monitors. + This task will spawn a thread running ClockSkewCheck's do_check(). + + All the configuration will be directly handled by ClockSkewCheck, + so please refer to the class documentation for further information. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'mon_clock_skew_check task only accepts a dict for configuration' + log.info('Beginning mon_clock_skew_check...') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + skew_check = ClockSkewCheck(ctx, + manager, config, + logger=log.getChild('mon_clock_skew_check')) + skew_check_thread = gevent.spawn(skew_check.do_check) + try: + yield + finally: + log.info('joining mon_clock_skew_check') + skew_check.finish() + skew_check_thread.get() + + diff --git a/qa/tasks/mon_recovery.py b/qa/tasks/mon_recovery.py new file mode 100644 index 00000000000..bfa2cdf78f1 --- /dev/null +++ b/qa/tasks/mon_recovery.py @@ -0,0 +1,80 @@ +""" +Monitor recovery +""" +import logging +import ceph_manager +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test monitor recovery. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)] + log.info("mon ids = %s" % mons) + + manager.wait_for_mon_quorum_size(len(mons)) + + log.info('verifying all monitors are in the quorum') + for m in mons: + s = manager.get_mon_status(m) + assert s['state'] == 'leader' or s['state'] == 'peon' + assert len(s['quorum']) == len(mons) + + log.info('restarting each monitor in turn') + for m in mons: + # stop a monitor + manager.kill_mon(m) + manager.wait_for_mon_quorum_size(len(mons) - 1) + + # restart + manager.revive_mon(m) + manager.wait_for_mon_quorum_size(len(mons)) + + # in forward and reverse order, + rmons = mons + rmons.reverse() + for mons in mons, rmons: + log.info('stopping all monitors') + for m in mons: + manager.kill_mon(m) + + log.info('forming a minimal quorum for %s, then adding monitors' % mons) + qnum = (len(mons) / 2) + 1 + num = 0 + for m in mons: + manager.revive_mon(m) + num += 1 + if num >= qnum: + manager.wait_for_mon_quorum_size(num) + + # on both leader and non-leader ranks... + for rank in [0, 1]: + # take one out + log.info('removing mon %s' % mons[rank]) + manager.kill_mon(mons[rank]) + manager.wait_for_mon_quorum_size(len(mons) - 1) + + log.info('causing some monitor log activity') + m = 30 + for n in range(1, m): + manager.raw_cluster_cmd('log', '%d of %d' % (n, m)) + + log.info('adding mon %s back in' % mons[rank]) + manager.revive_mon(mons[rank]) + manager.wait_for_mon_quorum_size(len(mons)) diff --git a/qa/tasks/mon_thrash.py b/qa/tasks/mon_thrash.py new file mode 100644 index 00000000000..b45aaa99978 --- /dev/null +++ b/qa/tasks/mon_thrash.py @@ -0,0 +1,343 @@ +""" +Monitor thrash +""" +import logging +import contextlib +import ceph_manager +import random +import time +import gevent +import json +import math +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def _get_mons(ctx): + """ + Get monitor names from the context value. + """ + mons = [f[len('mon.'):] for f in teuthology.get_mon_names(ctx)] + return mons + +class MonitorThrasher: + """ + How it works:: + + - pick a monitor + - kill it + - wait for quorum to be formed + - sleep for 'revive_delay' seconds + - revive monitor + - wait for quorum to be formed + - sleep for 'thrash_delay' seconds + + Options:: + + seed Seed to use on the RNG to reproduce a previous + behaviour (default: None; i.e., not set) + revive_delay Number of seconds to wait before reviving + the monitor (default: 10) + thrash_delay Number of seconds to wait in-between + test iterations (default: 0) + thrash_store Thrash monitor store before killing the monitor being thrashed (default: False) + thrash_store_probability Probability of thrashing a monitor's store + (default: 50) + thrash_many Thrash multiple monitors instead of just one. If + 'maintain-quorum' is set to False, then we will + thrash up to as many monitors as there are + available. (default: False) + maintain_quorum Always maintain quorum, taking care on how many + monitors we kill during the thrashing. If we + happen to only have one or two monitors configured, + if this option is set to True, then we won't run + this task as we cannot guarantee maintenance of + quorum. Setting it to false however would allow the + task to run with as many as just one single monitor. + (default: True) + freeze_mon_probability: how often to freeze the mon instead of killing it, + in % (default: 0) + freeze_mon_duration: how many seconds to freeze the mon (default: 15) + scrub Scrub after each iteration (default: True) + + Note: if 'store-thrash' is set to True, then 'maintain-quorum' must also + be set to True. + + For example:: + + tasks: + - ceph: + - mon_thrash: + revive_delay: 20 + thrash_delay: 1 + thrash_store: true + thrash_store_probability: 40 + seed: 31337 + maintain_quorum: true + thrash_many: true + - ceph-fuse: + - workunit: + clients: + all: + - mon/workloadgen.sh + """ + def __init__(self, ctx, manager, config, logger): + self.ctx = ctx + self.manager = manager + self.manager.wait_for_clean() + + self.stopping = False + self.logger = logger + self.config = config + + if self.config is None: + self.config = dict() + + """ Test reproducibility """ + self.random_seed = self.config.get('seed', None) + + if self.random_seed is None: + self.random_seed = int(time.time()) + + self.rng = random.Random() + self.rng.seed(int(self.random_seed)) + + """ Monitor thrashing """ + self.revive_delay = float(self.config.get('revive_delay', 10.0)) + self.thrash_delay = float(self.config.get('thrash_delay', 0.0)) + + self.thrash_many = self.config.get('thrash_many', False) + self.maintain_quorum = self.config.get('maintain_quorum', True) + + self.scrub = self.config.get('scrub', True) + + self.freeze_mon_probability = float(self.config.get('freeze_mon_probability', 10)) + self.freeze_mon_duration = float(self.config.get('freeze_mon_duration', 15.0)) + + assert self.max_killable() > 0, \ + 'Unable to kill at least one monitor with the current config.' + + """ Store thrashing """ + self.store_thrash = self.config.get('store_thrash', False) + self.store_thrash_probability = int( + self.config.get('store_thrash_probability', 50)) + if self.store_thrash: + assert self.store_thrash_probability > 0, \ + 'store_thrash is set, probability must be > 0' + assert self.maintain_quorum, \ + 'store_thrash = true must imply maintain_quorum = true' + + self.thread = gevent.spawn(self.do_thrash) + + def log(self, x): + """ + locally log info messages + """ + self.logger.info(x) + + def do_join(self): + """ + Break out of this processes thrashing loop. + """ + self.stopping = True + self.thread.get() + + def should_thrash_store(self): + """ + If allowed, indicate that we should thrash a certain percentage of + the time as determined by the store_thrash_probability value. + """ + if not self.store_thrash: + return False + return self.rng.randrange(0, 101) < self.store_thrash_probability + + def thrash_store(self, mon): + """ + Thrash the monitor specified. + :param mon: monitor to thrash + """ + addr = self.ctx.ceph.conf['mon.%s' % mon]['mon addr'] + self.log('thrashing mon.{id}@{addr} store'.format(id=mon, addr=addr)) + out = self.manager.raw_cluster_cmd('-m', addr, 'sync', 'force') + j = json.loads(out) + assert j['ret'] == 0, \ + 'error forcing store sync on mon.{id}:\n{ret}'.format( + id=mon,ret=out) + + def should_freeze_mon(self): + """ + Indicate that we should freeze a certain percentago of the time + as determined by the freeze_mon_probability value. + """ + return self.rng.randrange(0, 101) < self.freeze_mon_probability + + def freeze_mon(self, mon): + """ + Send STOP signal to freeze the monitor. + """ + log.info('Sending STOP to mon %s', mon) + self.manager.signal_mon(mon, 19) # STOP + + def unfreeze_mon(self, mon): + """ + Send CONT signal to unfreeze the monitor. + """ + log.info('Sending CONT to mon %s', mon) + self.manager.signal_mon(mon, 18) # CONT + + def kill_mon(self, mon): + """ + Kill the monitor specified + """ + self.log('killing mon.{id}'.format(id=mon)) + self.manager.kill_mon(mon) + + def revive_mon(self, mon): + """ + Revive the monitor specified + """ + self.log('killing mon.{id}'.format(id=mon)) + self.log('reviving mon.{id}'.format(id=mon)) + self.manager.revive_mon(mon) + + def max_killable(self): + """ + Return the maximum number of monitors we can kill. + """ + m = len(_get_mons(self.ctx)) + if self.maintain_quorum: + return max(math.ceil(m/2.0)-1, 0) + else: + return m + + def do_thrash(self): + """ + Cotinuously loop and thrash the monitors. + """ + self.log('start thrashing') + self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\ + 'thrash many: {tm}, maintain quorum: {mq} '\ + 'store thrash: {st}, probability: {stp} '\ + 'freeze mon: prob {fp} duration {fd}'.format( + s=self.random_seed,r=self.revive_delay,t=self.thrash_delay, + tm=self.thrash_many, mq=self.maintain_quorum, + st=self.store_thrash,stp=self.store_thrash_probability, + fp=self.freeze_mon_probability,fd=self.freeze_mon_duration, + )) + + while not self.stopping: + mons = _get_mons(self.ctx) + self.manager.wait_for_mon_quorum_size(len(mons)) + self.log('making sure all monitors are in the quorum') + for m in mons: + s = self.manager.get_mon_status(m) + assert s['state'] == 'leader' or s['state'] == 'peon' + assert len(s['quorum']) == len(mons) + + kill_up_to = self.rng.randrange(1, self.max_killable()+1) + mons_to_kill = self.rng.sample(mons, kill_up_to) + self.log('monitors to thrash: {m}'.format(m=mons_to_kill)) + + mons_to_freeze = [] + for mon in mons: + if mon in mons_to_kill: + continue + if self.should_freeze_mon(): + mons_to_freeze.append(mon) + self.log('monitors to freeze: {m}'.format(m=mons_to_freeze)) + + for mon in mons_to_kill: + self.log('thrashing mon.{m}'.format(m=mon)) + + """ we only thrash stores if we are maintaining quorum """ + if self.should_thrash_store() and self.maintain_quorum: + self.thrash_store(mon) + + self.kill_mon(mon) + + if mons_to_freeze: + for mon in mons_to_freeze: + self.freeze_mon(mon) + self.log('waiting for {delay} secs to unfreeze mons'.format( + delay=self.freeze_mon_duration)) + time.sleep(self.freeze_mon_duration) + for mon in mons_to_freeze: + self.unfreeze_mon(mon) + + if self.maintain_quorum: + self.manager.wait_for_mon_quorum_size(len(mons)-len(mons_to_kill)) + for m in mons: + if m in mons_to_kill: + continue + s = self.manager.get_mon_status(m) + assert s['state'] == 'leader' or s['state'] == 'peon' + assert len(s['quorum']) == len(mons)-len(mons_to_kill) + + self.log('waiting for {delay} secs before reviving monitors'.format( + delay=self.revive_delay)) + time.sleep(self.revive_delay) + + for mon in mons_to_kill: + self.revive_mon(mon) + # do more freezes + if mons_to_freeze: + for mon in mons_to_freeze: + self.freeze_mon(mon) + self.log('waiting for {delay} secs to unfreeze mons'.format( + delay=self.freeze_mon_duration)) + time.sleep(self.freeze_mon_duration) + for mon in mons_to_freeze: + self.unfreeze_mon(mon) + + self.manager.wait_for_mon_quorum_size(len(mons)) + for m in mons: + s = self.manager.get_mon_status(m) + assert s['state'] == 'leader' or s['state'] == 'peon' + assert len(s['quorum']) == len(mons) + + if self.scrub: + self.log('triggering scrub') + try: + self.manager.raw_cluster_cmd('scrub') + except Exception: + log.exception("Saw exception while triggering scrub") + + if self.thrash_delay > 0.0: + self.log('waiting for {delay} secs before continuing thrashing'.format( + delay=self.thrash_delay)) + time.sleep(self.thrash_delay) + +@contextlib.contextmanager +def task(ctx, config): + """ + Stress test the monitor by thrashing them while another task/workunit + is running. + + Please refer to MonitorThrasher class for further information on the + available options. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'mon_thrash task only accepts a dict for configuration' + assert len(_get_mons(ctx)) > 2, \ + 'mon_thrash task requires at least 3 monitors' + log.info('Beginning mon_thrash...') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + thrash_proc = MonitorThrasher(ctx, + manager, config, + logger=log.getChild('mon_thrasher')) + try: + log.debug('Yielding') + yield + finally: + log.info('joining mon_thrasher') + thrash_proc.do_join() + mons = _get_mons(ctx) + manager.wait_for_mon_quorum_size(len(mons)) diff --git a/qa/tasks/multibench.py b/qa/tasks/multibench.py new file mode 100644 index 00000000000..13b5ffe2cf8 --- /dev/null +++ b/qa/tasks/multibench.py @@ -0,0 +1,57 @@ +""" +Multibench testing +""" +import contextlib +import logging +import radosbench +import time +import copy +import gevent + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run multibench + + The config should be as follows: + + multibench: + time: + segments: + radosbench: + + example: + + tasks: + - ceph: + - multibench: + clients: [client.0] + time: 360 + - interactive: + """ + log.info('Beginning multibench...') + assert isinstance(config, dict), \ + "please list clients to run on" + + def run_one(num): + """Run test spawn from gevent""" + start = time.time() + benchcontext = copy.copy(config.get('radosbench')) + iterations = 0 + while time.time() - start < int(config.get('time', 600)): + log.info("Starting iteration %s of segment %s"%(iterations, num)) + benchcontext['pool'] = str(num) + "-" + str(iterations) + with radosbench.task(ctx, benchcontext): + time.sleep() + iterations += 1 + log.info("Starting %s threads"%(str(config.get('segments', 3)),)) + segments = [ + gevent.spawn(run_one, i) + for i in range(0, int(config.get('segments', 3)))] + + try: + yield + finally: + [i.get() for i in segments] diff --git a/qa/tasks/object_source_down.py b/qa/tasks/object_source_down.py new file mode 100644 index 00000000000..17b94490668 --- /dev/null +++ b/qa/tasks/object_source_down.py @@ -0,0 +1,103 @@ +""" +Test Object locations going down +""" +import logging +import ceph_manager +from teuthology import misc as teuthology +from util.rados import rados + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of object location going down + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'lost_unfound task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + manager.sleep(10) + manager.wait_for_clean() + + # something that is always there + dummyfile = '/etc/fstab' + + # take 0, 1 out + manager.mark_out_osd(0) + manager.mark_out_osd(1) + manager.wait_for_clean() + + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.0', + 'injectargs', + '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' + ) + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', + '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' + ) + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.2', + 'injectargs', + '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' + ) + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.3', + 'injectargs', + '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' + ) + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile]) + + # create old objects + for f in range(1, 10): + rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) + + manager.mark_out_osd(3) + manager.wait_till_active() + + manager.mark_in_osd(0) + manager.wait_till_active() + + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + + manager.mark_out_osd(2) + manager.wait_till_active() + + # bring up 1 + manager.mark_in_osd(1) + manager.wait_till_active() + + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + log.info("Getting unfound objects") + unfound = manager.get_num_unfound_objects() + assert not unfound + + manager.kill_osd(2) + manager.mark_down_osd(2) + manager.kill_osd(3) + manager.mark_down_osd(3) + + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + log.info("Getting unfound objects") + unfound = manager.get_num_unfound_objects() + assert unfound diff --git a/qa/tasks/omapbench.py b/qa/tasks/omapbench.py new file mode 100644 index 00000000000..e026c74dbc0 --- /dev/null +++ b/qa/tasks/omapbench.py @@ -0,0 +1,83 @@ +""" +Run omapbench executable within teuthology +""" +import contextlib +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run omapbench + + The config should be as follows:: + + omapbench: + clients: [client list] + threads: + objects: + entries: + keysize: + valsize: + increment: + omaptype: + + example:: + + tasks: + - ceph: + - omapbench: + clients: [client.0] + threads: 30 + objects: 1000 + entries: 10 + keysize: 10 + valsize: 100 + increment: 100 + omaptype: uniform + - interactive: + """ + log.info('Beginning omapbench...') + assert isinstance(config, dict), \ + "please list clients to run on" + omapbench = {} + testdir = teuthology.get_testdir(ctx) + print(str(config.get('increment',-1))) + for role in config.get('clients', ['client.0']): + assert isinstance(role, basestring) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + proc = remote.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'omapbench', + '--name', role[len(PREFIX):], + '-t', str(config.get('threads', 30)), + '-o', str(config.get('objects', 1000)), + '--entries', str(config.get('entries',10)), + '--keysize', str(config.get('keysize',10)), + '--valsize', str(config.get('valsize',1000)), + '--inc', str(config.get('increment',10)), + '--omaptype', str(config.get('omaptype','uniform')) + ]).format(tdir=testdir), + ], + logger=log.getChild('omapbench.{id}'.format(id=id_)), + stdin=run.PIPE, + wait=False + ) + omapbench[id_] = proc + + try: + yield + finally: + log.info('joining omapbench') + run.wait(omapbench.itervalues()) diff --git a/qa/tasks/osd_backfill.py b/qa/tasks/osd_backfill.py new file mode 100644 index 00000000000..f3b59e398cb --- /dev/null +++ b/qa/tasks/osd_backfill.py @@ -0,0 +1,105 @@ +""" +Osd backfill test +""" +import logging +import ceph_manager +import time +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + + +def rados_start(ctx, remote, cmd): + """ + Run a remote rados command (currently used to only write data) + """ + log.info("rados %s" % ' '.join(cmd)) + testdir = teuthology.get_testdir(ctx) + pre = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rados', + ]; + pre.extend(cmd) + proc = remote.run( + args=pre, + wait=False, + ) + return proc + +def task(ctx, config): + """ + Test backfill + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'thrashosds task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + assert num_osds == 3 + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + manager.sleep(10) + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_clean() + + # write some data + p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096', + '--no-cleanup']) + err = p.wait() + log.info('err is %d' % err) + + # mark osd.0 out to trigger a rebalance/backfill + manager.mark_out_osd(0) + + # also mark it down to it won't be included in pg_temps + manager.kill_osd(0) + manager.mark_down_osd(0) + + # wait for everything to peer and be happy... + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_recovery() + + # write some new data + p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '30', 'write', '-b', '4096', + '--no-cleanup']) + + time.sleep(15) + + # blackhole + restart osd.1 + # this triggers a divergent backfill target + manager.blackhole_kill_osd(1) + time.sleep(2) + manager.revive_osd(1) + + # wait for our writes to complete + succeed + err = p.wait() + log.info('err is %d' % err) + + # cluster must recover + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_recovery() + + # re-add osd.0 + manager.revive_osd(0) + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_clean() + + diff --git a/qa/tasks/osd_failsafe_enospc.py b/qa/tasks/osd_failsafe_enospc.py new file mode 100644 index 00000000000..2af94cd58e4 --- /dev/null +++ b/qa/tasks/osd_failsafe_enospc.py @@ -0,0 +1,211 @@ +""" +Handle osdfailsafe configuration settings (nearfull ratio and full ratio) +""" +from cStringIO import StringIO +import logging +import time + +from teuthology.orchestra import run +from util.rados import rados +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio + configuration settings + + In order for test to pass must use log-whitelist as follows + + tasks: + - chef: + - install: + - ceph: + log-whitelist: ['OSD near full', 'OSD full dropping all updates'] + - osd_failsafe_enospc: + + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'osd_failsafe_enospc task only accepts a dict for configuration' + + # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding + sleep_time = 50 + + # something that is always there + dummyfile = '/etc/fstab' + dummyfile2 = '/etc/resolv.conf' + + # create 1 pg pool with 1 rep which can only be on osd.0 + osds = ctx.manager.get_osd_dump() + for osd in osds: + if osd['osd'] != 0: + ctx.manager.mark_out_osd(osd['osd']) + + log.info('creating pool foo') + ctx.manager.create_pool("foo") + ctx.manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1') + + # State NONE -> NEAR + log.info('1. Verify warning messages when exceeding nearfull_ratio') + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + proc = mon.run( + args=[ + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001') + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) + assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count + + # State NEAR -> FULL + log.info('2. Verify error messages when exceeding full_ratio') + + proc = mon.run( + args=[ + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count + + log.info('3. Verify write failure when exceeding full_ratio') + + # Write data should fail + ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile]) + assert ret != 0, 'Expected write failure but it succeeded with exit status 0' + + # Put back default + ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') + time.sleep(10) + + # State FULL -> NEAR + log.info('4. Verify write success when NOT exceeding full_ratio') + + # Write should succeed + ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2]) + assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret + + log.info('5. Verify warning messages again when exceeding nearfull_ratio') + + proc = mon.run( + args=[ + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) + assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count + + ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90') + time.sleep(10) + + # State NONE -> FULL + log.info('6. Verify error messages again when exceeding full_ratio') + + proc = mon.run( + args=[ + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) + assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count + + # State FULL -> NONE + log.info('7. Verify no messages settings back to default') + + ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') + time.sleep(10) + + proc = mon.run( + args=[ + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) + assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count + + log.info('Test Passed') + + # Bring all OSDs back in + ctx.manager.remove_pool("foo") + for osd in osds: + if osd['osd'] != 0: + ctx.manager.mark_in_osd(osd['osd']) diff --git a/qa/tasks/osd_recovery.py b/qa/tasks/osd_recovery.py new file mode 100644 index 00000000000..450384aa2fe --- /dev/null +++ b/qa/tasks/osd_recovery.py @@ -0,0 +1,208 @@ +""" +osd recovery +""" +import logging +import ceph_manager +import time +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + + +def rados_start(testdir, remote, cmd): + """ + Run a remote rados command (currently used to only write data) + """ + log.info("rados %s" % ' '.join(cmd)) + pre = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rados', + ]; + pre.extend(cmd) + proc = remote.run( + args=pre, + wait=False, + ) + return proc + +def task(ctx, config): + """ + Test (non-backfill) recovery + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + testdir = teuthology.get_testdir(ctx) + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + assert num_osds == 3 + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + manager.sleep(10) + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_clean() + + # test some osdmap flags + manager.raw_cluster_cmd('osd', 'set', 'noin') + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'noup') + manager.raw_cluster_cmd('osd', 'set', 'nodown') + manager.raw_cluster_cmd('osd', 'unset', 'noin') + manager.raw_cluster_cmd('osd', 'unset', 'noout') + manager.raw_cluster_cmd('osd', 'unset', 'noup') + manager.raw_cluster_cmd('osd', 'unset', 'nodown') + + # write some new data + p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '60', 'write', '-b', '4096', + '--no-cleanup']) + + time.sleep(15) + + # trigger a divergent target: + # blackhole + restart osd.1 (shorter log) + manager.blackhole_kill_osd(1) + # kill osd.2 (longer log... we'll make it divergent below) + manager.kill_osd(2) + time.sleep(2) + manager.revive_osd(1) + + # wait for our writes to complete + succeed + err = p.wait() + log.info('err is %d' % err) + + # cluster must repeer + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.wait_for_active_or_down() + + # write some more (make sure osd.2 really is divergent) + p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096']) + p.wait() + + # revive divergent osd + manager.revive_osd(2) + + while len(manager.get_osd_status()['up']) < 3: + log.info('waiting a bit...') + time.sleep(2) + log.info('3 are up!') + + # cluster must recover + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_clean() + + +def test_incomplete_pgs(ctx, config): + """ + Test handling of incomplete pgs. Requires 4 osds. + """ + testdir = teuthology.get_testdir(ctx) + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + assert num_osds == 4 + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 4: + time.sleep(10) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') + manager.wait_for_clean() + + log.info('Testing incomplete pgs...') + + for i in range(4): + manager.set_config( + i, + osd_recovery_delay_start=1000) + + # move data off of osd.0, osd.1 + manager.raw_cluster_cmd('osd', 'out', '0', '1') + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') + manager.wait_for_clean() + + # lots of objects in rbd (no pg log, will backfill) + p = rados_start(testdir, mon, + ['-p', 'rbd', 'bench', '60', 'write', '-b', '1', + '--no-cleanup']) + p.wait() + + # few objects in rbd pool (with pg log, normal recovery) + for f in range(1, 20): + p = rados_start(testdir, mon, ['-p', 'rbd', 'put', + 'foo.%d' % f, '/etc/passwd']) + p.wait() + + # move it back + manager.raw_cluster_cmd('osd', 'in', '0', '1') + manager.raw_cluster_cmd('osd', 'out', '2', '3') + time.sleep(10) + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') + time.sleep(10) + manager.wait_for_active() + + assert not manager.is_clean() + assert not manager.is_recovered() + + # kill 2 + 3 + log.info('stopping 2,3') + manager.kill_osd(2) + manager.kill_osd(3) + log.info('...') + manager.raw_cluster_cmd('osd', 'down', '2', '3') + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.wait_for_active_or_down() + + assert manager.get_num_down() > 0 + + # revive 2 + 3 + manager.revive_osd(2) + manager.revive_osd(3) + while len(manager.get_osd_status()['up']) < 4: + log.info('waiting a bit...') + time.sleep(2) + log.info('all are up!') + + for i in range(4): + manager.kick_recovery_wq(i) + + # cluster must recover + manager.wait_for_clean() diff --git a/qa/tasks/peer.py b/qa/tasks/peer.py new file mode 100644 index 00000000000..f1789cf12d6 --- /dev/null +++ b/qa/tasks/peer.py @@ -0,0 +1,96 @@ +""" +Peer test (Single test, not much configurable here) +""" +import logging +import json + +import ceph_manager +from teuthology import misc as teuthology +from util.rados import rados + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test peering. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'peer task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + manager.sleep(10) + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_clean() + + for i in range(3): + manager.set_config( + i, + osd_recovery_delay_start=120) + + # take on osd down + manager.kill_osd(2) + manager.mark_down_osd(2) + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-']) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.wait_for_recovery() + + # kill another and revive 2, so that some pgs can't peer. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.revive_osd(2) + manager.wait_till_osd_is_up(2) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + + manager.wait_for_active_or_down() + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + + # look for down pgs + num_down_pgs = 0 + pgs = manager.get_pg_stats() + for pg in pgs: + out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query') + log.debug("out string %s",out) + j = json.loads(out) + log.info("pg is %s, query json is %s", pg, j) + + if pg['state'].count('down'): + num_down_pgs += 1 + # verify that it is blocked on osd.1 + rs = j['recovery_state'] + assert len(rs) > 0 + assert rs[0]['name'] == 'Started/Primary/Peering/GetInfo' + assert rs[1]['name'] == 'Started/Primary/Peering' + assert rs[1]['blocked'] + assert rs[1]['down_osds_we_would_probe'] == [1] + assert len(rs[1]['peering_blocked_by']) == 1 + assert rs[1]['peering_blocked_by'][0]['osd'] == 1 + + assert num_down_pgs > 0 + + # bring it all back + manager.revive_osd(1) + manager.wait_till_osd_is_up(1) + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_clean() diff --git a/qa/tasks/peering_speed_test.py b/qa/tasks/peering_speed_test.py new file mode 100644 index 00000000000..602a7da3066 --- /dev/null +++ b/qa/tasks/peering_speed_test.py @@ -0,0 +1,84 @@ +""" +Remotely run peering tests. +""" +import logging +import time + +log = logging.getLogger(__name__) + +from args import argify + +POOLNAME = "POOLNAME" +ARGS = [ + ('num_pgs', 'number of pgs to create', 256, int), + ('max_time', 'seconds to complete peering', 0, int), + ('runs', 'trials to run', 10, int), + ('num_objects', 'objects to create', 256 * 1024, int), + ('object_size', 'size in bytes for objects', 64, int), + ('creation_time_limit', 'time limit for pool population', 60*60, int), + ('create_threads', 'concurrent writes for create', 256, int) + ] + +def setup(ctx, config): + """ + Setup peering test on remotes. + """ + ctx.manager.clear_pools() + ctx.manager.create_pool(POOLNAME, config.num_pgs) + log.info("populating pool") + ctx.manager.rados_write_objects( + POOLNAME, + config.num_objects, + config.object_size, + config.creation_time_limit, + config.create_threads) + log.info("done populating pool") + +def do_run(ctx, config): + """ + Perform the test. + """ + start = time.time() + # mark in osd + ctx.manager.mark_in_osd(0) + log.info("writing out objects") + ctx.manager.rados_write_objects( + POOLNAME, + config.num_pgs, # write 1 object per pg or so + 1, + config.creation_time_limit, + config.num_pgs, # lots of concurrency + cleanup = True) + peering_end = time.time() + + log.info("peering done, waiting on recovery") + ctx.manager.wait_for_clean() + + log.info("recovery done") + recovery_end = time.time() + if config.max_time: + assert(peering_end - start < config.max_time) + ctx.manager.mark_out_osd(0) + ctx.manager.wait_for_clean() + return { + 'time_to_active': peering_end - start, + 'time_to_clean': recovery_end - start + } + +@argify("peering_speed_test", ARGS) +def task(ctx, config): + """ + Peering speed test + """ + setup(ctx, config) + ctx.manager.mark_out_osd(0) + ctx.manager.wait_for_clean() + ret = [] + for i in range(config.runs): + log.info("Run {i}".format(i = i)) + ret.append(do_run(ctx, config)) + + ctx.manager.mark_in_osd(0) + ctx.summary['recovery_times'] = { + 'runs': ret + } diff --git a/qa/tasks/populate_rbd_pool.py b/qa/tasks/populate_rbd_pool.py new file mode 100644 index 00000000000..059a33fc112 --- /dev/null +++ b/qa/tasks/populate_rbd_pool.py @@ -0,0 +1,82 @@ +""" +Populate rbd pools +""" +import contextlib +import logging + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Populate pools with prefix with + rbd images at snaps + + The config could be as follows:: + + populate_rbd_pool: + client: + pool_prefix: foo + num_pools: 5 + num_images: 10 + num_snaps: 3 + image_size: 10737418240 + """ + if config is None: + config = {} + client = config.get("client", "client.0") + pool_prefix = config.get("pool_prefix", "foo") + num_pools = config.get("num_pools", 2) + num_images = config.get("num_images", 20) + num_snaps = config.get("num_snaps", 4) + image_size = config.get("image_size", 100) + write_size = config.get("write_size", 1024*1024) + write_threads = config.get("write_threads", 10) + write_total_per_snap = config.get("write_total_per_snap", 1024*1024*30) + + (remote,) = ctx.cluster.only(client).remotes.iterkeys() + + for poolid in range(num_pools): + poolname = "%s-%s" % (pool_prefix, str(poolid)) + log.info("Creating pool %s" % (poolname,)) + ctx.manager.create_pool(poolname) + for imageid in range(num_images): + imagename = "rbd-%s" % (str(imageid),) + log.info("Creating imagename %s" % (imagename,)) + remote.run( + args = [ + "rbd", + "create", + imagename, + "--image-format", "1", + "--size", str(image_size), + "--pool", str(poolname)]) + def bench_run(): + remote.run( + args = [ + "rbd", + "bench-write", + imagename, + "--pool", poolname, + "--io-size", str(write_size), + "--io-threads", str(write_threads), + "--io-total", str(write_total_per_snap), + "--io-pattern", "rand"]) + log.info("imagename %s first bench" % (imagename,)) + bench_run() + for snapid in range(num_snaps): + snapname = "snap-%s" % (str(snapid),) + log.info("imagename %s creating snap %s" % (imagename, snapname)) + remote.run( + args = [ + "rbd", "snap", "create", + "--pool", poolname, + "--snap", snapname, + imagename + ]) + bench_run() + + try: + yield + finally: + log.info('done') diff --git a/qa/tasks/qemu.py b/qa/tasks/qemu.py new file mode 100644 index 00000000000..44591a12003 --- /dev/null +++ b/qa/tasks/qemu.py @@ -0,0 +1,449 @@ +""" +Qemu task +""" +from cStringIO import StringIO + +import contextlib +import logging +import os + +from teuthology import misc as teuthology +from teuthology import contextutil +from tasks import rbd +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +DEFAULT_NUM_RBD = 1 +DEFAULT_IMAGE_URL = 'http://ceph.com/qa/ubuntu-12.04.qcow2' +DEFAULT_MEM = 4096 # in megabytes + +def create_images(ctx, config, managers): + for client, client_config in config.iteritems(): + num_rbd = client_config.get('num_rbd', 1) + clone = client_config.get('clone', False) + assert num_rbd > 0, 'at least one rbd device must be used' + for i in xrange(num_rbd): + create_config = { + client: { + 'image_name': '{client}.{num}'.format(client=client, num=i), + 'image_format': 2 if clone else 1, + } + } + managers.append( + lambda create_config=create_config: + rbd.create_image(ctx=ctx, config=create_config) + ) + +def create_clones(ctx, config, managers): + for client, client_config in config.iteritems(): + num_rbd = client_config.get('num_rbd', 1) + clone = client_config.get('clone', False) + if clone: + for i in xrange(num_rbd): + create_config = { + client: { + 'image_name': + '{client}.{num}-clone'.format(client=client, num=i), + 'parent_name': + '{client}.{num}'.format(client=client, num=i), + } + } + managers.append( + lambda create_config=create_config: + rbd.clone_image(ctx=ctx, config=create_config) + ) + +@contextlib.contextmanager +def create_dirs(ctx, config): + """ + Handle directory creation and cleanup + """ + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.iteritems(): + assert 'test' in client_config, 'You must specify a test to run' + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'install', '-d', '-m0755', '--', + '{tdir}/qemu'.format(tdir=testdir), + '{tdir}/archive/qemu'.format(tdir=testdir), + ] + ) + try: + yield + finally: + for client, client_config in config.iteritems(): + assert 'test' in client_config, 'You must specify a test to run' + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'rmdir', '{tdir}/qemu'.format(tdir=testdir), run.Raw('||'), 'true', + ] + ) + +@contextlib.contextmanager +def generate_iso(ctx, config): + """Execute system commands to generate iso""" + log.info('generating iso...') + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.iteritems(): + assert 'test' in client_config, 'You must specify a test to run' + (remote,) = ctx.cluster.only(client).remotes.keys() + src_dir = os.path.dirname(__file__) + userdata_path = os.path.join(testdir, 'qemu', 'userdata.' + client) + metadata_path = os.path.join(testdir, 'qemu', 'metadata.' + client) + + with file(os.path.join(src_dir, 'userdata_setup.yaml'), 'rb') as f: + test_setup = ''.join(f.readlines()) + # configuring the commands to setup the nfs mount + mnt_dir = "/export/{client}".format(client=client) + test_setup = test_setup.format( + mnt_dir=mnt_dir + ) + + with file(os.path.join(src_dir, 'userdata_teardown.yaml'), 'rb') as f: + test_teardown = ''.join(f.readlines()) + + user_data = test_setup + if client_config.get('type', 'filesystem') == 'filesystem': + for i in xrange(0, client_config.get('num_rbd', DEFAULT_NUM_RBD)): + dev_letter = chr(ord('b') + i) + user_data += """ +- | + #!/bin/bash + mkdir /mnt/test_{dev_letter} + mkfs -t xfs /dev/vd{dev_letter} + mount -t xfs /dev/vd{dev_letter} /mnt/test_{dev_letter} +""".format(dev_letter=dev_letter) + + # this may change later to pass the directories as args to the + # script or something. xfstests needs that. + user_data += """ +- | + #!/bin/bash + test -d /mnt/test_b && cd /mnt/test_b + /mnt/cdrom/test.sh > /mnt/log/test.log 2>&1 && touch /mnt/log/success +""" + test_teardown + + teuthology.write_file(remote, userdata_path, StringIO(user_data)) + + with file(os.path.join(src_dir, 'metadata.yaml'), 'rb') as f: + teuthology.write_file(remote, metadata_path, f) + + test_file = '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client) + remote.run( + args=[ + 'wget', '-nv', '-O', test_file, + client_config['test'], + run.Raw('&&'), + 'chmod', '755', test_file, + ], + ) + remote.run( + args=[ + 'genisoimage', '-quiet', '-input-charset', 'utf-8', + '-volid', 'cidata', '-joliet', '-rock', + '-o', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), + '-graft-points', + 'user-data={userdata}'.format(userdata=userdata_path), + 'meta-data={metadata}'.format(metadata=metadata_path), + 'test.sh={file}'.format(file=test_file), + ], + ) + try: + yield + finally: + for client in config.iterkeys(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'rm', '-f', + '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), + os.path.join(testdir, 'qemu', 'userdata.' + client), + os.path.join(testdir, 'qemu', 'metadata.' + client), + '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client), + ], + ) + +@contextlib.contextmanager +def download_image(ctx, config): + """Downland base image, remove image file when done""" + log.info('downloading base image') + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.iteritems(): + (remote,) = ctx.cluster.only(client).remotes.keys() + base_file = '{tdir}/qemu/base.{client}.qcow2'.format(tdir=testdir, client=client) + remote.run( + args=[ + 'wget', '-nv', '-O', base_file, DEFAULT_IMAGE_URL, + ] + ) + try: + yield + finally: + log.debug('cleaning up base image files') + for client in config.iterkeys(): + base_file = '{tdir}/qemu/base.{client}.qcow2'.format( + tdir=testdir, + client=client, + ) + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'rm', '-f', base_file, + ], + ) + + +def _setup_nfs_mount(remote, client, mount_dir): + """ + Sets up an nfs mount on the remote that the guest can use to + store logs. This nfs mount is also used to touch a file + at the end of the test to indiciate if the test was successful + or not. + """ + export_dir = "/export/{client}".format(client=client) + log.info("Creating the nfs export directory...") + remote.run(args=[ + 'sudo', 'mkdir', '-p', export_dir, + ]) + log.info("Mounting the test directory...") + remote.run(args=[ + 'sudo', 'mount', '--bind', mount_dir, export_dir, + ]) + log.info("Adding mount to /etc/exports...") + export = "{dir} *(rw,no_root_squash,no_subtree_check,insecure)".format( + dir=export_dir + ) + remote.run(args=[ + 'echo', export, run.Raw("|"), + 'sudo', 'tee', '-a', "/etc/exports", + ]) + log.info("Restarting NFS...") + if remote.os.package_type == "deb": + remote.run(args=['sudo', 'service', 'nfs-kernel-server', 'restart']) + else: + remote.run(args=['sudo', 'systemctl', 'restart', 'nfs']) + + +def _teardown_nfs_mount(remote, client): + """ + Tears down the nfs mount on the remote used for logging and reporting the + status of the tests being ran in the guest. + """ + log.info("Tearing down the nfs mount for {remote}".format(remote=remote)) + export_dir = "/export/{client}".format(client=client) + log.info("Stopping NFS...") + if remote.os.package_type == "deb": + remote.run(args=[ + 'sudo', 'service', 'nfs-kernel-server', 'stop' + ]) + else: + remote.run(args=[ + 'sudo', 'systemctl', 'stop', 'nfs' + ]) + log.info("Unmounting exported directory...") + remote.run(args=[ + 'sudo', 'umount', export_dir + ]) + log.info("Deleting exported directory...") + remote.run(args=[ + 'sudo', 'rm', '-r', '/export' + ]) + log.info("Deleting export from /etc/exports...") + remote.run(args=[ + 'sudo', 'sed', '-i', '$ d', '/etc/exports' + ]) + log.info("Starting NFS...") + if remote.os.package_type == "deb": + remote.run(args=[ + 'sudo', 'service', 'nfs-kernel-server', 'start' + ]) + else: + remote.run(args=[ + 'sudo', 'systemctl', 'start', 'nfs' + ]) + + +@contextlib.contextmanager +def run_qemu(ctx, config): + """Setup kvm environment and start qemu""" + procs = [] + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.iteritems(): + (remote,) = ctx.cluster.only(client).remotes.keys() + log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir, client=client) + remote.run( + args=[ + 'mkdir', log_dir, run.Raw('&&'), + 'sudo', 'modprobe', 'kvm', + ] + ) + + # make an nfs mount to use for logging and to + # allow to test to tell teuthology the tests outcome + _setup_nfs_mount(remote, client, log_dir) + + base_file = '{tdir}/qemu/base.{client}.qcow2'.format( + tdir=testdir, + client=client + ) + qemu_cmd = 'qemu-system-x86_64' + if remote.os.package_type == "rpm": + qemu_cmd = "/usr/libexec/qemu-kvm" + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'daemon-helper', + 'term', + qemu_cmd, '-enable-kvm', '-nographic', + '-m', str(client_config.get('memory', DEFAULT_MEM)), + # base OS device + '-drive', + 'file={base},format=qcow2,if=virtio'.format(base=base_file), + # cd holding metadata for cloud-init + '-cdrom', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), + ] + + cachemode = 'none' + ceph_config = ctx.ceph.conf.get('global', {}) + ceph_config.update(ctx.ceph.conf.get('client', {})) + ceph_config.update(ctx.ceph.conf.get(client, {})) + if ceph_config.get('rbd cache'): + if ceph_config.get('rbd cache max dirty', 1) > 0: + cachemode = 'writeback' + else: + cachemode = 'writethrough' + + clone = client_config.get('clone', False) + for i in xrange(client_config.get('num_rbd', DEFAULT_NUM_RBD)): + suffix = '-clone' if clone else '' + args.extend([ + '-drive', + 'file=rbd:rbd/{img}:id={id},format=raw,if=virtio,cache={cachemode}'.format( + img='{client}.{num}{suffix}'.format(client=client, num=i, + suffix=suffix), + id=client[len('client.'):], + cachemode=cachemode, + ), + ]) + + log.info('starting qemu...') + procs.append( + remote.run( + args=args, + logger=log.getChild(client), + stdin=run.PIPE, + wait=False, + ) + ) + + try: + yield + finally: + log.info('waiting for qemu tests to finish...') + run.wait(procs) + + log.debug('checking that qemu tests succeeded...') + for client in config.iterkeys(): + (remote,) = ctx.cluster.only(client).remotes.keys() + # teardown nfs mount + _teardown_nfs_mount(remote, client) + # check for test status + remote.run( + args=[ + 'test', '-f', + '{tdir}/archive/qemu/{client}/success'.format( + tdir=testdir, + client=client + ), + ], + ) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run a test inside of QEMU on top of rbd. Only one test + is supported per client. + + For example, you can specify which clients to run on:: + + tasks: + - ceph: + - qemu: + client.0: + test: http://ceph.com/qa/test.sh + client.1: + test: http://ceph.com/qa/test2.sh + + Or use the same settings on all clients: + + tasks: + - ceph: + - qemu: + all: + test: http://ceph.com/qa/test.sh + + For tests that don't need a filesystem, set type to block:: + + tasks: + - ceph: + - qemu: + client.0: + test: http://ceph.com/qa/test.sh + type: block + + The test should be configured to run on /dev/vdb and later + devices. + + If you want to run a test that uses more than one rbd image, + specify how many images to use:: + + tasks: + - ceph: + - qemu: + client.0: + test: http://ceph.com/qa/test.sh + type: block + num_rbd: 2 + + You can set the amount of memory the VM has (default is 1024 MB):: + + tasks: + - ceph: + - qemu: + client.0: + test: http://ceph.com/qa/test.sh + memory: 512 # megabytes + + If you want to run a test against a cloned rbd image, set clone to true:: + + tasks: + - ceph: + - qemu: + client.0: + test: http://ceph.com/qa/test.sh + clone: true + """ + assert isinstance(config, dict), \ + "task qemu only supports a dictionary for configuration" + + config = teuthology.replace_all_with_clients(ctx.cluster, config) + + managers = [] + create_images(ctx=ctx, config=config, managers=managers) + managers.extend([ + lambda: create_dirs(ctx=ctx, config=config), + lambda: generate_iso(ctx=ctx, config=config), + lambda: download_image(ctx=ctx, config=config), + ]) + create_clones(ctx=ctx, config=config, managers=managers) + managers.append( + lambda: run_qemu(ctx=ctx, config=config), + ) + + with contextutil.nested(*managers): + yield diff --git a/qa/tasks/rados.py b/qa/tasks/rados.py new file mode 100644 index 00000000000..720da159630 --- /dev/null +++ b/qa/tasks/rados.py @@ -0,0 +1,196 @@ +""" +Rados modle-based integration tests +""" +import contextlib +import logging +import gevent +from teuthology import misc as teuthology + +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run RadosModel-based integration tests. + + The config should be as follows:: + + rados: + clients: [client list] + ops: + objects: + max_in_flight: + object_size: + min_stride_size: + max_stride_size: + op_weights: + runs: - the pool is remade between runs + ec_pool: use an ec pool + erasure_code_profile: profile to use with the erasure coded pool + pool_snaps: use pool snapshots instead of selfmanaged snapshots + write_fadvise_dontneed: write behavior like with LIBRADOS_OP_FLAG_FADVISE_DONTNEED. + This mean data don't access in the near future. + Let osd backend don't keep data in cache. + + For example:: + + tasks: + - ceph: + - rados: + clients: [client.0] + ops: 1000 + max_seconds: 0 # 0 for no limit + objects: 25 + max_in_flight: 16 + object_size: 4000000 + min_stride_size: 1024 + max_stride_size: 4096 + op_weights: + read: 20 + write: 10 + delete: 2 + snap_create: 3 + rollback: 2 + snap_remove: 0 + ec_pool: create an ec pool, defaults to False + erasure_code_profile: + name: teuthologyprofile + k: 2 + m: 1 + ruleset-failure-domain: osd + pool_snaps: true + write_fadvise_dontneed: true + runs: 10 + - interactive: + + Optionally, you can provide the pool name to run against: + + tasks: + - ceph: + - exec: + client.0: + - ceph osd pool create foo + - rados: + clients: [client.0] + pools: [foo] + ... + + Alternatively, you can provide a pool prefix: + + tasks: + - ceph: + - exec: + client.0: + - ceph osd pool create foo.client.0 + - rados: + clients: [client.0] + pool_prefix: foo + ... + + """ + log.info('Beginning rados...') + assert isinstance(config, dict), \ + "please list clients to run on" + + object_size = int(config.get('object_size', 4000000)) + op_weights = config.get('op_weights', {}) + testdir = teuthology.get_testdir(ctx) + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'ceph_test_rados'] + if config.get('ec_pool', False): + args.extend(['--ec-pool']) + if config.get('write_fadvise_dontneed', False): + args.extend(['--write-fadvise-dontneed']) + if config.get('pool_snaps', False): + args.extend(['--pool-snaps']) + args.extend([ + '--op', 'read', str(op_weights.get('read', 100)), + '--op', 'write', str(op_weights.get('write', 100)), + '--op', 'delete', str(op_weights.get('delete', 10)), + '--max-ops', str(config.get('ops', 10000)), + '--objects', str(config.get('objects', 500)), + '--max-in-flight', str(config.get('max_in_flight', 16)), + '--size', str(object_size), + '--min-stride-size', str(config.get('min_stride_size', object_size / 10)), + '--max-stride-size', str(config.get('max_stride_size', object_size / 5)), + '--max-seconds', str(config.get('max_seconds', 0)) + ]) + # Parallel of the op_types in test/osd/TestRados.cc + for field in [ + # read handled above + # write handled above + # delete handled above + "snap_create", + "snap_remove", + "rollback", + "setattr", + "rmattr", + "watch", + "copy_from", + "hit_set_list", + "is_dirty", + "undirty", + "cache_flush", + "cache_try_flush", + "cache_evict", + "append", + ]: + if field in op_weights: + args.extend([ + '--op', field, str(op_weights[field]), + ]) + + def thread(): + """Thread spawned by gevent""" + clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + log.info('clients are %s' % clients) + if config.get('ec_pool', False): + profile = config.get('erasure_code_profile', {}) + profile_name = profile.get('name', 'teuthologyprofile') + ctx.manager.create_erasure_code_profile(profile_name, profile) + else: + profile_name = None + for i in range(int(config.get('runs', '1'))): + log.info("starting run %s out of %s", str(i), config.get('runs', '1')) + tests = {} + existing_pools = config.get('pools', []) + created_pools = [] + for role in config.get('clients', clients): + assert isinstance(role, basestring) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + + pool = config.get('pool', None) + if not pool and existing_pools: + pool = existing_pools.pop() + else: + pool = ctx.manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name) + created_pools.append(pool) + + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + proc = remote.run( + args=["CEPH_CLIENT_ID={id_}".format(id_=id_)] + args + + ["--pool", pool], + logger=log.getChild("rados.{id}".format(id=id_)), + stdin=run.PIPE, + wait=False + ) + tests[id_] = proc + run.wait(tests.itervalues()) + + for pool in created_pools: + ctx.manager.remove_pool(pool) + + running = gevent.spawn(thread) + + try: + yield + finally: + log.info('joining rados') + running.get() diff --git a/qa/tasks/radosbench.py b/qa/tasks/radosbench.py new file mode 100644 index 00000000000..73c54372857 --- /dev/null +++ b/qa/tasks/radosbench.py @@ -0,0 +1,100 @@ +""" +Rados benchmarking +""" +import contextlib +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run radosbench + + The config should be as follows: + + radosbench: + clients: [client list] + time: + pool: + size: write size to use + unique_pool: use a unique pool, defaults to False + ec_pool: create an ec pool, defaults to False + create_pool: create pool, defaults to False + erasure_code_profile: + name: teuthologyprofile + k: 2 + m: 1 + ruleset-failure-domain: osd + + example: + + tasks: + - ceph: + - radosbench: + clients: [client.0] + time: 360 + - interactive: + """ + log.info('Beginning radosbench...') + assert isinstance(config, dict), \ + "please list clients to run on" + radosbench = {} + + testdir = teuthology.get_testdir(ctx) + + for role in config.get('clients', ['client.0']): + assert isinstance(role, basestring) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + + if config.get('ec_pool', False): + profile = config.get('erasure_code_profile', {}) + profile_name = profile.get('name', 'teuthologyprofile') + ctx.manager.create_erasure_code_profile(profile_name, profile) + else: + profile_name = None + + pool = 'data' + if config.get('create_pool', True): + if config.get('pool'): + pool = config.get('pool') + if pool != 'data': + ctx.manager.create_pool(pool, erasure_code_profile_name=profile_name) + else: + pool = ctx.manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name) + + proc = remote.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'rados', + '--no-log-to-stderr', + '--name', role, + '-b', str(config.get('size', 4<<20)), + '-p' , pool, + 'bench', str(config.get('time', 360)), 'write', + ]).format(tdir=testdir), + ], + logger=log.getChild('radosbench.{id}'.format(id=id_)), + stdin=run.PIPE, + wait=False + ) + radosbench[id_] = proc + + try: + yield + finally: + timeout = config.get('time', 360) * 5 + 180 + log.info('joining radosbench (timing out after %ss)', timeout) + run.wait(radosbench.itervalues(), timeout=timeout) + + if pool is not 'data': + ctx.manager.remove_pool(pool) diff --git a/qa/tasks/radosgw_admin.py b/qa/tasks/radosgw_admin.py new file mode 100644 index 00000000000..b6baa4cd4e5 --- /dev/null +++ b/qa/tasks/radosgw_admin.py @@ -0,0 +1,1018 @@ +""" +Rgw admin testing against a running instance +""" +# The test cases in this file have been annotated for inventory. +# To extract the inventory (in csv format) use the command: +# +# grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //' +# + +import copy +import json +import logging +import time + +from cStringIO import StringIO + +import boto.exception +import boto.s3.connection +import boto.s3.acl + +import httplib2 + +import util.rgw as rgw_utils + +from teuthology import misc as teuthology +from util.rgw import rgwadmin, get_user_summary, get_user_successful_ops + +log = logging.getLogger(__name__) + +def create_presigned_url(conn, method, bucket_name, key_name, expiration): + return conn.generate_url(expires_in=expiration, + method=method, + bucket=bucket_name, + key=key_name, + query_auth=True, + ) + +def send_raw_http_request(conn, method, bucket_name, key_name, follow_redirects = False): + url = create_presigned_url(conn, method, bucket_name, key_name, 3600) + print url + h = httplib2.Http() + h.follow_redirects = follow_redirects + return h.request(url, method) + + +def get_acl(key): + """ + Helper function to get the xml acl from a key, ensuring that the xml + version tag is removed from the acl response + """ + raw_acl = key.get_xml_acl() + + def remove_version(string): + return string.split( + '' + )[-1] + + def remove_newlines(string): + return string.strip('\n') + + return remove_version( + remove_newlines(raw_acl) + ) + + +def task(ctx, config): + """ + Test radosgw-admin functionality against a running rgw instance. + """ + global log + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task s3tests only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + multi_region_run = rgw_utils.multi_region_enabled(ctx) + + client = clients[0]; # default choice, multi-region code may overwrite this + if multi_region_run: + client = rgw_utils.get_master_client(ctx, clients) + + # once the client is chosen, pull the host name and assigned port out of + # the role_endpoints that were assigned by the rgw task + (remote_host, remote_port) = ctx.rgw.role_endpoints[client] + + ## + user1='foo' + user2='fud' + subuser1='foo:foo1' + subuser2='foo:foo2' + display_name1='Foo' + display_name2='Fud' + email='foo@foo.com' + email2='bar@bar.com' + access_key='9te6NH5mcdcq0Tc5i8i1' + secret_key='Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu' + access_key2='p5YnriCv1nAtykxBrupQ' + secret_key2='Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh' + swift_secret1='gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL' + swift_secret2='ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy' + + bucket_name='myfoo' + bucket_name2='mybar' + + # connect to rgw + connection = boto.s3.connection.S3Connection( + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + is_secure=False, + port=remote_port, + host=remote_host, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + connection2 = boto.s3.connection.S3Connection( + aws_access_key_id=access_key2, + aws_secret_access_key=secret_key2, + is_secure=False, + port=remote_port, + host=remote_host, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + + # legend (test cases can be easily grep-ed out) + # TESTCASE 'testname','object','method','operation','assertion' + # TESTCASE 'info-nosuch','user','info','non-existent user','fails' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1]) + assert err + + # TESTCASE 'create-ok','user','create','w/all valid info','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user1, + '--display-name', display_name1, + '--email', email, + '--access-key', access_key, + '--secret', secret_key, + '--max-buckets', '4' + ], + check_status=True) + + # TESTCASE 'duplicate email','user','create','existing user email','fails' + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user2, + '--display-name', display_name2, + '--email', email, + ]) + assert err + + # TESTCASE 'info-existing','user','info','existing user','returns correct info' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert out['user_id'] == user1 + assert out['email'] == email + assert out['display_name'] == display_name1 + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + assert not out['suspended'] + + # this whole block should only be run if regions have been configured + if multi_region_run: + rgw_utils.radosgw_agent_sync_all(ctx) + # post-sync, validate that user1 exists on the sync destination host + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + dest_client = c_config['dest'] + (err, out) = rgwadmin(ctx, dest_client, ['metadata', 'list', 'user']) + (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user1], check_status=True) + assert out['user_id'] == user1 + assert out['email'] == email + assert out['display_name'] == display_name1 + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + assert not out['suspended'] + + # compare the metadata between different regions, make sure it matches + log.debug('compare the metadata between different regions, make sure it matches') + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + source_client = c_config['src'] + dest_client = c_config['dest'] + (err1, out1) = rgwadmin(ctx, source_client, + ['metadata', 'get', 'user:{uid}'.format(uid=user1)], check_status=True) + (err2, out2) = rgwadmin(ctx, dest_client, + ['metadata', 'get', 'user:{uid}'.format(uid=user1)], check_status=True) + assert out1 == out2 + + # suspend a user on the master, then check the status on the destination + log.debug('suspend a user on the master, then check the status on the destination') + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + source_client = c_config['src'] + dest_client = c_config['dest'] + (err, out) = rgwadmin(ctx, source_client, ['user', 'suspend', '--uid', user1]) + rgw_utils.radosgw_agent_sync_all(ctx) + (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user1], check_status=True) + assert out['suspended'] + + # delete a user on the master, then check that it's gone on the destination + log.debug('delete a user on the master, then check that it\'s gone on the destination') + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + source_client = c_config['src'] + dest_client = c_config['dest'] + (err, out) = rgwadmin(ctx, source_client, ['user', 'rm', '--uid', user1], check_status=True) + rgw_utils.radosgw_agent_sync_all(ctx) + (err, out) = rgwadmin(ctx, source_client, ['user', 'info', '--uid', user1]) + assert out is None + (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user1]) + assert out is None + + # then recreate it so later tests pass + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user1, + '--display-name', display_name1, + '--email', email, + '--access-key', access_key, + '--secret', secret_key, + '--max-buckets', '4' + ], + check_status=True) + + # now do the multi-region bucket tests + log.debug('now do the multi-region bucket tests') + + # Create a second user for the following tests + log.debug('Create a second user for the following tests') + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user2, + '--display-name', display_name2, + '--email', email2, + '--access-key', access_key2, + '--secret', secret_key2, + '--max-buckets', '4' + ], + check_status=True) + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user2], check_status=True) + assert out is not None + + # create a bucket and do a sync + log.debug('create a bucket and do a sync') + bucket = connection.create_bucket(bucket_name2) + rgw_utils.radosgw_agent_sync_all(ctx) + + # compare the metadata for the bucket between different regions, make sure it matches + log.debug('compare the metadata for the bucket between different regions, make sure it matches') + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + source_client = c_config['src'] + dest_client = c_config['dest'] + (err1, out1) = rgwadmin(ctx, source_client, + ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], + check_status=True) + (err2, out2) = rgwadmin(ctx, dest_client, + ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], + check_status=True) + assert out1 == out2 + + # get the bucket.instance info and compare that + src_bucket_id = out1['data']['bucket']['bucket_id'] + dest_bucket_id = out2['data']['bucket']['bucket_id'] + (err1, out1) = rgwadmin(ctx, source_client, ['metadata', 'get', + 'bucket.instance:{bucket_name}:{bucket_instance}'.format( + bucket_name=bucket_name2,bucket_instance=src_bucket_id)], + check_status=True) + (err2, out2) = rgwadmin(ctx, dest_client, ['metadata', 'get', + 'bucket.instance:{bucket_name}:{bucket_instance}'.format( + bucket_name=bucket_name2,bucket_instance=dest_bucket_id)], + check_status=True) + del out1['data']['bucket_info']['bucket']['pool'] + del out1['data']['bucket_info']['bucket']['index_pool'] + del out2['data']['bucket_info']['bucket']['pool'] + del out2['data']['bucket_info']['bucket']['index_pool'] + assert out1 == out2 + + same_region = 0 + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + source_client = c_config['src'] + dest_client = c_config['dest'] + + source_region = rgw_utils.region_for_client(ctx, source_client) + dest_region = rgw_utils.region_for_client(ctx, dest_client) + + # 301 is only returned for requests to something in a different region + if source_region == dest_region: + log.debug('301 is only returned for requests to something in a different region') + same_region += 1 + continue + + # Attempt to create a new connection with user1 to the destination RGW + log.debug('Attempt to create a new connection with user1 to the destination RGW') + # and use that to attempt a delete (that should fail) + + (dest_remote_host, dest_remote_port) = ctx.rgw.role_endpoints[dest_client] + connection_dest = boto.s3.connection.S3Connection( + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + is_secure=False, + port=dest_remote_port, + host=dest_remote_host, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + + # this should fail + r, content = send_raw_http_request(connection_dest, 'DELETE', bucket_name2, '', follow_redirects = False) + assert r.status == 301 + + # now delete the bucket on the source RGW and do another sync + log.debug('now delete the bucket on the source RGW and do another sync') + bucket.delete() + rgw_utils.radosgw_agent_sync_all(ctx) + + if same_region == len(ctx.radosgw_agent.config): + bucket.delete() + rgw_utils.radosgw_agent_sync_all(ctx) + + # make sure that the bucket no longer exists in either region + log.debug('make sure that the bucket no longer exists in either region') + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + source_client = c_config['src'] + dest_client = c_config['dest'] + (err1, out1) = rgwadmin(ctx, source_client, ['metadata', 'get', + 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)]) + (err2, out2) = rgwadmin(ctx, dest_client, ['metadata', 'get', + 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)]) + # Both of the previous calls should have errors due to requesting + # metadata for non-existent buckets + assert err1 + assert err2 + + # create a bucket and then sync it + log.debug('create a bucket and then sync it') + bucket = connection.create_bucket(bucket_name2) + rgw_utils.radosgw_agent_sync_all(ctx) + + # compare the metadata for the bucket between different regions, make sure it matches + log.debug('compare the metadata for the bucket between different regions, make sure it matches') + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + source_client = c_config['src'] + dest_client = c_config['dest'] + (err1, out1) = rgwadmin(ctx, source_client, + ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], + check_status=True) + (err2, out2) = rgwadmin(ctx, dest_client, + ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], + check_status=True) + assert out1 == out2 + + # Now delete the bucket and recreate it with a different user + log.debug('Now delete the bucket and recreate it with a different user') + # within the same window of time and then sync. + bucket.delete() + bucket = connection2.create_bucket(bucket_name2) + rgw_utils.radosgw_agent_sync_all(ctx) + + # compare the metadata for the bucket between different regions, make sure it matches + log.debug('compare the metadata for the bucket between different regions, make sure it matches') + # user2 should own the bucket in both regions + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + source_client = c_config['src'] + dest_client = c_config['dest'] + (err1, out1) = rgwadmin(ctx, source_client, + ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], + check_status=True) + (err2, out2) = rgwadmin(ctx, dest_client, + ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], + check_status=True) + assert out1 == out2 + assert out1['data']['owner'] == user2 + assert out1['data']['owner'] != user1 + + # now we're going to use this bucket to test meta-data update propagation + log.debug('now we\'re going to use this bucket to test meta-data update propagation') + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + source_client = c_config['src'] + dest_client = c_config['dest'] + + # get the metadata so we can tweak it + log.debug('get the metadata so we can tweak it') + (err, orig_data) = rgwadmin(ctx, source_client, + ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], + check_status=True) + + # manually edit mtime for this bucket to be 300 seconds in the past + log.debug('manually edit mtime for this bucket to be 300 seconds in the past') + new_data = copy.deepcopy(orig_data) + new_data['mtime'] = orig_data['mtime'] - 300 + assert new_data != orig_data + (err, out) = rgwadmin(ctx, source_client, + ['metadata', 'put', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], + stdin=StringIO(json.dumps(new_data)), + check_status=True) + + # get the metadata and make sure that the 'put' worked + log.debug('get the metadata and make sure that the \'put\' worked') + (err, out) = rgwadmin(ctx, source_client, + ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], + check_status=True) + assert out == new_data + + # sync to propagate the new metadata + log.debug('sync to propagate the new metadata') + rgw_utils.radosgw_agent_sync_all(ctx) + + # get the metadata from the dest and compare it to what we just set + log.debug('get the metadata from the dest and compare it to what we just set') + # and what the source region has. + (err1, out1) = rgwadmin(ctx, source_client, + ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], + check_status=True) + (err2, out2) = rgwadmin(ctx, dest_client, + ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], + check_status=True) + # yeah for the transitive property + assert out1 == out2 + assert out1 == new_data + + # now we delete the bucket + log.debug('now we delete the bucket') + bucket.delete() + + log.debug('sync to propagate the deleted bucket') + rgw_utils.radosgw_agent_sync_all(ctx) + + # Delete user2 as later tests do not expect it to exist. + # Verify that it is gone on both regions + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + source_client = c_config['src'] + dest_client = c_config['dest'] + (err, out) = rgwadmin(ctx, source_client, + ['user', 'rm', '--uid', user2], check_status=True) + rgw_utils.radosgw_agent_sync_all(ctx) + # The two 'user info' calls should fail and not return any data + # since we just deleted this user. + (err, out) = rgwadmin(ctx, source_client, ['user', 'info', '--uid', user2]) + assert out is None + (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user2]) + assert out is None + + # Test data sync + + # First create a bucket for data sync test purpose + bucket = connection.create_bucket(bucket_name + 'data') + + # Create a tiny file and check if in sync + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + if c_config.get('metadata-only'): + continue + + for full in (True, False): + source_client = c_config['src'] + dest_client = c_config['dest'] + k = boto.s3.key.Key(bucket) + k.key = 'tiny_file' + k.set_contents_from_string("123456789") + safety_window = rgw_utils.radosgw_data_log_window(ctx, source_client) + time.sleep(safety_window) + rgw_utils.radosgw_agent_sync_all(ctx, data=True, full=full) + (dest_host, dest_port) = ctx.rgw.role_endpoints[dest_client] + dest_connection = boto.s3.connection.S3Connection( + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + is_secure=False, + port=dest_port, + host=dest_host, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + dest_k = dest_connection.get_bucket(bucket_name + 'data').get_key('tiny_file') + assert k.get_contents_as_string() == dest_k.get_contents_as_string() + + # check that deleting it removes it from the dest zone + k.delete() + time.sleep(safety_window) + # full sync doesn't handle deleted objects yet + rgw_utils.radosgw_agent_sync_all(ctx, data=True, full=False) + + dest_bucket = dest_connection.get_bucket(bucket_name + 'data') + dest_k = dest_bucket.get_key('tiny_file') + assert dest_k == None, 'object not deleted from destination zone' + + # finally we delete the bucket + bucket.delete() + + bucket = connection.create_bucket(bucket_name + 'data2') + for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): + if c_config.get('metadata-only'): + continue + + for full in (True, False): + source_client = c_config['src'] + dest_client = c_config['dest'] + (dest_host, dest_port) = ctx.rgw.role_endpoints[dest_client] + dest_connection = boto.s3.connection.S3Connection( + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + is_secure=False, + port=dest_port, + host=dest_host, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + for i in range(20): + k = boto.s3.key.Key(bucket) + k.key = 'tiny_file_' + str(i) + k.set_contents_from_string(str(i) * 100) + + safety_window = rgw_utils.radosgw_data_log_window(ctx, source_client) + time.sleep(safety_window) + rgw_utils.radosgw_agent_sync_all(ctx, data=True, full=full) + + for i in range(20): + dest_k = dest_connection.get_bucket(bucket_name + 'data2').get_key('tiny_file_' + str(i)) + assert (str(i) * 100) == dest_k.get_contents_as_string() + k = boto.s3.key.Key(bucket) + k.key = 'tiny_file_' + str(i) + k.delete() + + # check that deleting removes the objects from the dest zone + time.sleep(safety_window) + # full sync doesn't delete deleted objects yet + rgw_utils.radosgw_agent_sync_all(ctx, data=True, full=False) + + for i in range(20): + dest_bucket = dest_connection.get_bucket(bucket_name + 'data2') + dest_k = dest_bucket.get_key('tiny_file_' + str(i)) + assert dest_k == None, 'object %d not deleted from destination zone' % i + bucket.delete() + + # end of 'if multi_region_run:' + + # TESTCASE 'suspend-ok','user','suspend','active user','succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1], + check_status=True) + + # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert out['suspended'] + + # TESTCASE 're-enable','user','enable','suspended user','succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1], check_status=True) + + # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert not out['suspended'] + + # TESTCASE 'add-keys','key','create','w/valid info','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'key', 'create', '--uid', user1, + '--access-key', access_key2, '--secret', secret_key2, + ], check_status=True) + + # TESTCASE 'info-new-key','user','info','after key addition','returns all keys' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], + check_status=True) + assert len(out['keys']) == 2 + assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2 + assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2 + + # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed' + (err, out) = rgwadmin(ctx, client, [ + 'key', 'rm', '--uid', user1, + '--access-key', access_key2, + ], check_status=True) + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + + # TESTCASE 'add-swift-key','key','create','swift key','succeeds' + subuser_access = 'full' + subuser_perm = 'full-control' + + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'create', '--subuser', subuser1, + '--access', subuser_access + ], check_status=True) + + # TESTCASE 'add-swift-key','key','create','swift key','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'modify', '--subuser', subuser1, + '--secret', swift_secret1, + '--key-type', 'swift', + ], check_status=True) + + # TESTCASE 'subuser-perm-mask', 'subuser', 'info', 'test subuser perm mask durability', 'succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1]) + + assert out['subusers'][0]['permissions'] == subuser_perm + + # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert len(out['swift_keys']) == 1 + assert out['swift_keys'][0]['user'] == subuser1 + assert out['swift_keys'][0]['secret_key'] == swift_secret1 + + # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'create', '--subuser', subuser2, + '--secret', swift_secret2, + '--key-type', 'swift', + ], check_status=True) + + # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert len(out['swift_keys']) == 2 + assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2 + assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2 + + # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed' + (err, out) = rgwadmin(ctx, client, [ + 'key', 'rm', '--subuser', subuser1, + '--key-type', 'swift', + ], check_status=True) + assert len(out['swift_keys']) == 1 + + # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed' + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'rm', '--subuser', subuser1, + ], check_status=True) + assert len(out['subusers']) == 1 + + # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed' + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'rm', '--subuser', subuser2, + '--key-type', 'swift', '--purge-keys', + ], check_status=True) + assert len(out['swift_keys']) == 0 + assert len(out['subusers']) == 0 + + # TESTCASE 'bucket-stats','bucket','stats','no session/buckets','succeeds, empty list' + (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1], + check_status=True) + assert len(out) == 0 + + if multi_region_run: + rgw_utils.radosgw_agent_sync_all(ctx) + + # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list' + (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True) + assert len(out) == 0 + + # create a first bucket + bucket = connection.create_bucket(bucket_name) + + # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list' + (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True) + assert len(out) == 1 + assert out[0] == bucket_name + + # TESTCASE 'bucket-list-all','bucket','list','all buckets','succeeds, expected list' + (err, out) = rgwadmin(ctx, client, ['bucket', 'list'], check_status=True) + assert len(out) >= 1 + assert bucket_name in out; + + # TESTCASE 'max-bucket-limit,'bucket','create','4 buckets','5th bucket fails due to max buckets == 4' + bucket2 = connection.create_bucket(bucket_name + '2') + bucket3 = connection.create_bucket(bucket_name + '3') + bucket4 = connection.create_bucket(bucket_name + '4') + # the 5th should fail. + failed = False + try: + connection.create_bucket(bucket_name + '5') + except Exception: + failed = True + assert failed + + # delete the buckets + bucket2.delete() + bucket3.delete() + bucket4.delete() + + # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list' + (err, out) = rgwadmin(ctx, client, [ + 'bucket', 'stats', '--bucket', bucket_name], check_status=True) + assert out['owner'] == user1 + bucket_id = out['id'] + + # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID' + (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1], check_status=True) + assert len(out) == 1 + assert out[0]['id'] == bucket_id # does it return the same ID twice in a row? + + # use some space + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('one') + + # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object' + (err, out) = rgwadmin(ctx, client, [ + 'bucket', 'stats', '--bucket', bucket_name], check_status=True) + assert out['id'] == bucket_id + assert out['usage']['rgw.main']['num_objects'] == 1 + assert out['usage']['rgw.main']['size_kb'] > 0 + + # reclaim it + key.delete() + + # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error' + (err, out) = rgwadmin(ctx, client, + ['bucket', 'unlink', '--uid', user1, '--bucket', bucket_name], + check_status=True) + + # create a second user to link the bucket to + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user2, + '--display-name', display_name2, + '--access-key', access_key2, + '--secret', secret_key2, + '--max-buckets', '1', + ], + check_status=True) + + # try creating an object with the first user before the bucket is relinked + denied = False + key = boto.s3.key.Key(bucket) + + try: + key.set_contents_from_string('two') + except boto.exception.S3ResponseError: + denied = True + + assert not denied + + # delete the object + key.delete() + + # link the bucket to another user + (err, out) = rgwadmin(ctx, client, ['metadata', 'get', 'bucket:{n}'.format(n=bucket_name)], + check_status=True) + + bucket_data = out['data'] + assert bucket_data['bucket']['name'] == bucket_name + + bucket_id = bucket_data['bucket']['bucket_id'] + + # link the bucket to another user + (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--uid', user2, '--bucket', bucket_name, '--bucket-id', bucket_id], + check_status=True) + + # try to remove user, should fail (has a linked bucket) + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2]) + assert err + + # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'succeeds, bucket unlinked' + (err, out) = rgwadmin(ctx, client, ['bucket', 'unlink', '--uid', user2, '--bucket', bucket_name], + check_status=True) + + # relink the bucket to the first user and delete the second user + (err, out) = rgwadmin(ctx, client, + ['bucket', 'link', '--uid', user1, '--bucket', bucket_name, '--bucket-id', bucket_id], + check_status=True) + + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2], + check_status=True) + + # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed' + + # upload an object + object_name = 'four' + key = boto.s3.key.Key(bucket, object_name) + key.set_contents_from_string(object_name) + + # now delete it + (err, out) = rgwadmin(ctx, client, + ['object', 'rm', '--bucket', bucket_name, '--object', object_name], + check_status=True) + + # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects' + (err, out) = rgwadmin(ctx, client, [ + 'bucket', 'stats', '--bucket', bucket_name], + check_status=True) + assert out['id'] == bucket_id + assert out['usage']['rgw.main']['num_objects'] == 0 + + # list log objects + # TESTCASE 'log-list','log','list','after activity','succeeds, lists one no objects' + (err, out) = rgwadmin(ctx, client, ['log', 'list'], check_status=True) + assert len(out) > 0 + + for obj in out: + # TESTCASE 'log-show','log','show','after activity','returns expected info' + if obj[:4] == 'meta' or obj[:4] == 'data': + continue + + (err, rgwlog) = rgwadmin(ctx, client, ['log', 'show', '--object', obj], + check_status=True) + assert len(rgwlog) > 0 + + # exempt bucket_name2 from checking as it was only used for multi-region tests + assert rgwlog['bucket'].find(bucket_name) == 0 or rgwlog['bucket'].find(bucket_name2) == 0 + assert rgwlog['bucket'] != bucket_name or rgwlog['bucket_id'] == bucket_id + assert rgwlog['bucket_owner'] == user1 or rgwlog['bucket'] == bucket_name + '5' or rgwlog['bucket'] == bucket_name2 + for entry in rgwlog['log_entries']: + log.debug('checking log entry: ', entry) + assert entry['bucket'] == rgwlog['bucket'] + possible_buckets = [bucket_name + '5', bucket_name2] + user = entry['user'] + assert user == user1 or user.endswith('system-user') or \ + rgwlog['bucket'] in possible_buckets + + # TESTCASE 'log-rm','log','rm','delete log objects','succeeds' + (err, out) = rgwadmin(ctx, client, ['log', 'rm', '--object', obj], + check_status=True) + + # TODO: show log by bucket+date + + # need to wait for all usage data to get flushed, should take up to 30 seconds + timestamp = time.time() + while time.time() - timestamp <= (20 * 60): # wait up to 20 minutes + (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--categories', 'delete_obj']) # last operation we did is delete obj, wait for it to flush + if get_user_successful_ops(out, user1) > 0: + break + time.sleep(1) + + assert time.time() - timestamp <= (20 * 60) + + # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds' + (err, out) = rgwadmin(ctx, client, ['usage', 'show'], check_status=True) + assert len(out['entries']) > 0 + assert len(out['summary']) > 0 + + user_summary = get_user_summary(out, user1) + + total = user_summary['total'] + assert total['successful_ops'] > 0 + + # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds' + (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1], + check_status=True) + assert len(out['entries']) > 0 + assert len(out['summary']) > 0 + user_summary = out['summary'][0] + for entry in user_summary['categories']: + assert entry['successful_ops'] > 0 + assert user_summary['user'] == user1 + + # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds' + test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket'] + for cat in test_categories: + (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1, '--categories', cat], + check_status=True) + assert len(out['summary']) > 0 + user_summary = out['summary'][0] + assert user_summary['user'] == user1 + assert len(user_summary['categories']) == 1 + entry = user_summary['categories'][0] + assert entry['category'] == cat + assert entry['successful_ops'] > 0 + + # the usage flush interval is 30 seconds, wait that much an then some + # to make sure everything has been flushed + time.sleep(35) + + # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed' + (err, out) = rgwadmin(ctx, client, ['usage', 'trim', '--uid', user1], + check_status=True) + (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1], + check_status=True) + assert len(out['entries']) == 0 + assert len(out['summary']) == 0 + + # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1], + check_status=True) + + # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects' + try: + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('five') + except boto.exception.S3ResponseError as e: + assert e.status == 403 + + # TESTCASE 'user-renable2','user','enable','suspended user','succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1], + check_status=True) + + # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects' + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('six') + + # TESTCASE 'gc-list', 'gc', 'list', 'get list of objects ready for garbage collection' + + # create an object large enough to be split into multiple parts + test_string = 'foo'*10000000 + + big_key = boto.s3.key.Key(bucket) + big_key.set_contents_from_string(test_string) + + # now delete the head + big_key.delete() + + # wait a bit to give the garbage collector time to cycle + time.sleep(15) + + (err, out) = rgwadmin(ctx, client, ['gc', 'list']) + + assert len(out) > 0 + + # TESTCASE 'gc-process', 'gc', 'process', 'manually collect garbage' + (err, out) = rgwadmin(ctx, client, ['gc', 'process'], check_status=True) + + #confirm + (err, out) = rgwadmin(ctx, client, ['gc', 'list']) + + assert len(out) == 0 + + # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets' + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1]) + assert err + + # delete should fail because ``key`` still exists + try: + bucket.delete() + except boto.exception.S3ResponseError as e: + assert e.status == 409 + + key.delete() + bucket.delete() + + # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy' + bucket = connection.create_bucket(bucket_name) + + # create an object + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('seven') + + # should be private already but guarantee it + key.set_acl('private') + + (err, out) = rgwadmin(ctx, client, + ['policy', '--bucket', bucket.name, '--object', key.key], + check_status=True) + + acl = get_acl(key) + + assert acl == out.strip('\n') + + # add another grantee by making the object public read + key.set_acl('public-read') + + (err, out) = rgwadmin(ctx, client, + ['policy', '--bucket', bucket.name, '--object', key.key], + check_status=True) + + acl = get_acl(key) + + assert acl == out.strip('\n') + + # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds' + bucket = connection.create_bucket(bucket_name) + key_name = ['eight', 'nine', 'ten', 'eleven'] + for i in range(4): + key = boto.s3.key.Key(bucket) + key.set_contents_from_string(key_name[i]) + + (err, out) = rgwadmin(ctx, client, + ['bucket', 'rm', '--bucket', bucket_name, '--purge-objects'], + check_status=True) + + # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds' + caps='user=read' + (err, out) = rgwadmin(ctx, client, ['caps', 'add', '--uid', user1, '--caps', caps]) + + assert out['caps'][0]['perm'] == 'read' + + # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds' + (err, out) = rgwadmin(ctx, client, ['caps', 'rm', '--uid', user1, '--caps', caps]) + + assert not out['caps'] + + # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets' + bucket = connection.create_bucket(bucket_name) + key = boto.s3.key.Key(bucket) + + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1]) + assert err + + # TESTCASE 'rm-user2', 'user', 'rm', 'user with data', 'succeeds' + bucket = connection.create_bucket(bucket_name) + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('twelve') + + (err, out) = rgwadmin(ctx, client, + ['user', 'rm', '--uid', user1, '--purge-data' ], + check_status=True) + + # TESTCASE 'rm-user3','user','rm','deleted user','fails' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1]) + assert err + + # TESTCASE 'zone-info', 'zone', 'get', 'get zone info', 'succeeds, has default placement rule' + # + + (err, out) = rgwadmin(ctx, client, ['zone', 'get']) + orig_placement_pools = len(out['placement_pools']) + + # removed this test, it is not correct to assume that zone has default placement, it really + # depends on how we set it up before + # + # assert len(out) > 0 + # assert len(out['placement_pools']) == 1 + + # default_rule = out['placement_pools'][0] + # assert default_rule['key'] == 'default-placement' + + rule={'key': 'new-placement', 'val': {'data_pool': '.rgw.buckets.2', 'index_pool': '.rgw.buckets.index.2'}} + + out['placement_pools'].append(rule) + + (err, out) = rgwadmin(ctx, client, ['zone', 'set'], + stdin=StringIO(json.dumps(out)), + check_status=True) + + (err, out) = rgwadmin(ctx, client, ['zone', 'get']) + assert len(out) > 0 + assert len(out['placement_pools']) == orig_placement_pools + 1 diff --git a/qa/tasks/radosgw_admin_rest.py b/qa/tasks/radosgw_admin_rest.py new file mode 100644 index 00000000000..7bd72d19536 --- /dev/null +++ b/qa/tasks/radosgw_admin_rest.py @@ -0,0 +1,668 @@ +""" +Run a series of rgw admin commands through the rest interface. + +The test cases in this file have been annotated for inventory. +To extract the inventory (in csv format) use the command: + + grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //' + +""" +from cStringIO import StringIO +import logging +import json + +import boto.exception +import boto.s3.connection +import boto.s3.acl + +import requests +import time + +from boto.connection import AWSAuthConnection +from teuthology import misc as teuthology +from util.rgw import get_user_summary, get_user_successful_ops + +log = logging.getLogger(__name__) + +def rgwadmin(ctx, client, cmd): + """ + Perform rgw admin command + + :param client: client + :param cmd: command to execute. + :return: command exit status, json result. + """ + log.info('radosgw-admin: %s' % cmd) + testdir = teuthology.get_testdir(ctx) + pre = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '--log-to-stderr', + '--format', 'json', + ] + pre.extend(cmd) + (remote,) = ctx.cluster.only(client).remotes.iterkeys() + proc = remote.run( + args=pre, + check_status=False, + stdout=StringIO(), + stderr=StringIO(), + ) + r = proc.exitstatus + out = proc.stdout.getvalue() + j = None + if not r and out != '': + try: + j = json.loads(out) + log.info(' json result: %s' % j) + except ValueError: + j = out + log.info(' raw result: %s' % j) + return (r, j) + + +def rgwadmin_rest(connection, cmd, params=None, headers=None, raw=False): + """ + perform a rest command + """ + log.info('radosgw-admin-rest: %s %s' % (cmd, params)) + put_cmds = ['create', 'link', 'add'] + post_cmds = ['unlink', 'modify'] + delete_cmds = ['trim', 'rm', 'process'] + get_cmds = ['check', 'info', 'show', 'list'] + + bucket_sub_resources = ['object', 'policy', 'index'] + user_sub_resources = ['subuser', 'key', 'caps'] + zone_sub_resources = ['pool', 'log', 'garbage'] + + def get_cmd_method_and_handler(cmd): + """ + Get the rest command and handler from information in cmd and + from the imported requests object. + """ + if cmd[1] in put_cmds: + return 'PUT', requests.put + elif cmd[1] in delete_cmds: + return 'DELETE', requests.delete + elif cmd[1] in post_cmds: + return 'POST', requests.post + elif cmd[1] in get_cmds: + return 'GET', requests.get + + def get_resource(cmd): + """ + Get the name of the resource from information in cmd. + """ + if cmd[0] == 'bucket' or cmd[0] in bucket_sub_resources: + if cmd[0] == 'bucket': + return 'bucket', '' + else: + return 'bucket', cmd[0] + elif cmd[0] == 'user' or cmd[0] in user_sub_resources: + if cmd[0] == 'user': + return 'user', '' + else: + return 'user', cmd[0] + elif cmd[0] == 'usage': + return 'usage', '' + elif cmd[0] == 'zone' or cmd[0] in zone_sub_resources: + if cmd[0] == 'zone': + return 'zone', '' + else: + return 'zone', cmd[0] + + def build_admin_request(conn, method, resource = '', headers=None, data='', + query_args=None, params=None): + """ + Build an administative request adapted from the build_request() + method of boto.connection + """ + + path = conn.calling_format.build_path_base('admin', resource) + auth_path = conn.calling_format.build_auth_path('admin', resource) + host = conn.calling_format.build_host(conn.server_name(), 'admin') + if query_args: + path += '?' + query_args + boto.log.debug('path=%s' % path) + auth_path += '?' + query_args + boto.log.debug('auth_path=%s' % auth_path) + return AWSAuthConnection.build_base_http_request(conn, method, path, + auth_path, params, headers, data, host) + + method, handler = get_cmd_method_and_handler(cmd) + resource, query_args = get_resource(cmd) + request = build_admin_request(connection, method, resource, + query_args=query_args, headers=headers) + + url = '{protocol}://{host}{path}'.format(protocol=request.protocol, + host=request.host, path=request.path) + + request.authorize(connection=connection) + result = handler(url, params=params, headers=request.headers) + + if raw: + log.info(' text result: %s' % result.txt) + return result.status_code, result.txt + else: + log.info(' json result: %s' % result.json()) + return result.status_code, result.json() + + +def task(ctx, config): + """ + Test radosgw-admin functionality through the RESTful interface + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task s3tests only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + # just use the first client... + client = clients[0] + + ## + admin_user = 'ada' + admin_display_name = 'Ms. Admin User' + admin_access_key = 'MH1WC2XQ1S8UISFDZC8W' + admin_secret_key = 'dQyrTPA0s248YeN5bBv4ukvKU0kh54LWWywkrpoG' + admin_caps = 'users=read, write; usage=read, write; buckets=read, write; zone=read, write' + + user1 = 'foo' + user2 = 'fud' + subuser1 = 'foo:foo1' + subuser2 = 'foo:foo2' + display_name1 = 'Foo' + display_name2 = 'Fud' + email = 'foo@foo.com' + access_key = '9te6NH5mcdcq0Tc5i8i1' + secret_key = 'Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu' + access_key2 = 'p5YnriCv1nAtykxBrupQ' + secret_key2 = 'Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh' + swift_secret1 = 'gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL' + swift_secret2 = 'ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy' + + bucket_name = 'myfoo' + + # legend (test cases can be easily grep-ed out) + # TESTCASE 'testname','object','method','operation','assertion' + # TESTCASE 'create-admin-user','user','create','administrative user','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', admin_user, + '--display-name', admin_display_name, + '--access-key', admin_access_key, + '--secret', admin_secret_key, + '--max-buckets', '0', + '--caps', admin_caps + ]) + logging.error(out) + logging.error(err) + assert not err + + (remote,) = ctx.cluster.only(client).remotes.iterkeys() + remote_host = remote.name.split('@')[1] + admin_conn = boto.s3.connection.S3Connection( + aws_access_key_id=admin_access_key, + aws_secret_access_key=admin_secret_key, + is_secure=False, + port=7280, + host=remote_host, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + + # TESTCASE 'info-nosuch','user','info','non-existent user','fails' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {"uid": user1}) + assert ret == 404 + + # TESTCASE 'create-ok','user','create','w/all valid info','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['user', 'create'], + {'uid' : user1, + 'display-name' : display_name1, + 'email' : email, + 'access-key' : access_key, + 'secret-key' : secret_key, + 'max-buckets' : '4' + }) + + assert ret == 200 + + # TESTCASE 'info-existing','user','info','existing user','returns correct info' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + + assert out['user_id'] == user1 + assert out['email'] == email + assert out['display_name'] == display_name1 + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + assert not out['suspended'] + + # TESTCASE 'suspend-ok','user','suspend','active user','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True}) + assert ret == 200 + + # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert out['suspended'] + + # TESTCASE 're-enable','user','enable','suspended user','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'}) + assert not err + + # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert not out['suspended'] + + # TESTCASE 'add-keys','key','create','w/valid info','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['key', 'create'], + {'uid' : user1, + 'access-key' : access_key2, + 'secret-key' : secret_key2 + }) + + + assert ret == 200 + + # TESTCASE 'info-new-key','user','info','after key addition','returns all keys' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out['keys']) == 2 + assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2 + assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2 + + # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed' + (ret, out) = rgwadmin_rest(admin_conn, + ['key', 'rm'], + {'uid' : user1, + 'access-key' : access_key2 + }) + + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + + # TESTCASE 'add-swift-key','key','create','swift key','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['subuser', 'create'], + {'subuser' : subuser1, + 'secret-key' : swift_secret1, + 'key-type' : 'swift' + }) + + assert ret == 200 + + # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out['swift_keys']) == 1 + assert out['swift_keys'][0]['user'] == subuser1 + assert out['swift_keys'][0]['secret_key'] == swift_secret1 + + # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['subuser', 'create'], + {'subuser' : subuser2, + 'secret-key' : swift_secret2, + 'key-type' : 'swift' + }) + + assert ret == 200 + + # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out['swift_keys']) == 2 + assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2 + assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2 + + # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed' + (ret, out) = rgwadmin_rest(admin_conn, + ['key', 'rm'], + {'subuser' : subuser1, + 'key-type' :'swift' + }) + + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert len(out['swift_keys']) == 1 + + # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed' + (ret, out) = rgwadmin_rest(admin_conn, + ['subuser', 'rm'], + {'subuser' : subuser1 + }) + + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert len(out['subusers']) == 1 + + # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed' + (ret, out) = rgwadmin_rest(admin_conn, + ['subuser', 'rm'], + {'subuser' : subuser2, + 'key-type' : 'swift', + '{purge-keys' :True + }) + + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert len(out['swift_keys']) == 0 + assert len(out['subusers']) == 0 + + # TESTCASE 'bucket-stats','bucket','info','no session/buckets','succeeds, empty list' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out) == 0 + + # connect to rgw + connection = boto.s3.connection.S3Connection( + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + is_secure=False, + port=7280, + host=remote_host, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + + # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True}) + assert ret == 200 + assert len(out) == 0 + + # create a first bucket + bucket = connection.create_bucket(bucket_name) + + # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out) == 1 + assert out[0] == bucket_name + + # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list' + (ret, out) = rgwadmin_rest(admin_conn, + ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True}) + + assert ret == 200 + assert out['owner'] == user1 + bucket_id = out['id'] + + # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True}) + assert ret == 200 + assert len(out) == 1 + assert out[0]['id'] == bucket_id # does it return the same ID twice in a row? + + # use some space + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('one') + + # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True}) + assert ret == 200 + assert out['id'] == bucket_id + assert out['usage']['rgw.main']['num_objects'] == 1 + assert out['usage']['rgw.main']['size_kb'] > 0 + + # reclaim it + key.delete() + + # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'unlink'], {'uid' : user1, 'bucket' : bucket_name}) + + assert ret == 200 + + # create a second user to link the bucket to + (ret, out) = rgwadmin_rest(admin_conn, + ['user', 'create'], + {'uid' : user2, + 'display-name' : display_name2, + 'access-key' : access_key2, + 'secret-key' : secret_key2, + 'max-buckets' : '1', + }) + + assert ret == 200 + + # try creating an object with the first user before the bucket is relinked + denied = False + key = boto.s3.key.Key(bucket) + + try: + key.set_contents_from_string('two') + except boto.exception.S3ResponseError: + denied = True + + assert not denied + + # delete the object + key.delete() + + # link the bucket to another user + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'link'], {'uid' : user2, 'bucket' : bucket_name}) + + assert ret == 200 + + # try creating an object with the first user which should cause an error + key = boto.s3.key.Key(bucket) + + try: + key.set_contents_from_string('three') + except boto.exception.S3ResponseError: + denied = True + + assert denied + + # relink the bucket to the first user and delete the second user + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'link'], {'uid' : user1, 'bucket' : bucket_name}) + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user2}) + assert ret == 200 + + # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed' + + # upload an object + object_name = 'four' + key = boto.s3.key.Key(bucket, object_name) + key.set_contents_from_string(object_name) + + # now delete it + (ret, out) = rgwadmin_rest(admin_conn, ['object', 'rm'], {'bucket' : bucket_name, 'object' : object_name}) + assert ret == 200 + + # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True}) + assert ret == 200 + assert out['id'] == bucket_id + assert out['usage']['rgw.main']['num_objects'] == 0 + + # create a bucket for deletion stats + useless_bucket = connection.create_bucket('useless_bucket') + useless_key = useless_bucket.new_key('useless_key') + useless_key.set_contents_from_string('useless string') + + # delete it + useless_key.delete() + useless_bucket.delete() + + # wait for the statistics to flush + time.sleep(60) + + # need to wait for all usage data to get flushed, should take up to 30 seconds + timestamp = time.time() + while time.time() - timestamp <= (20 * 60): # wait up to 20 minutes + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'categories' : 'delete_obj'}) # last operation we did is delete obj, wait for it to flush + + if get_user_successful_ops(out, user1) > 0: + break + time.sleep(1) + + assert time.time() - timestamp <= (20 * 60) + + # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show']) + assert ret == 200 + assert len(out['entries']) > 0 + assert len(out['summary']) > 0 + user_summary = get_user_summary(out, user1) + total = user_summary['total'] + assert total['successful_ops'] > 0 + + # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1}) + assert ret == 200 + assert len(out['entries']) > 0 + assert len(out['summary']) > 0 + user_summary = out['summary'][0] + for entry in user_summary['categories']: + assert entry['successful_ops'] > 0 + assert user_summary['user'] == user1 + + # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds' + test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket'] + for cat in test_categories: + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1, 'categories' : cat}) + assert ret == 200 + assert len(out['summary']) > 0 + user_summary = out['summary'][0] + assert user_summary['user'] == user1 + assert len(user_summary['categories']) == 1 + entry = user_summary['categories'][0] + assert entry['category'] == cat + assert entry['successful_ops'] > 0 + + # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed' + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'trim'], {'uid' : user1}) + assert ret == 200 + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1}) + assert ret == 200 + assert len(out['entries']) == 0 + assert len(out['summary']) == 0 + + # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True}) + assert ret == 200 + + # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects' + try: + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('five') + except boto.exception.S3ResponseError as e: + assert e.status == 403 + + # TESTCASE 'user-renable2','user','enable','suspended user','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'}) + assert ret == 200 + + # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects' + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('six') + + # TESTCASE 'garbage-list', 'garbage', 'list', 'get list of objects ready for garbage collection' + + # create an object large enough to be split into multiple parts + test_string = 'foo'*10000000 + + big_key = boto.s3.key.Key(bucket) + big_key.set_contents_from_string(test_string) + + # now delete the head + big_key.delete() + + # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1}) + assert ret == 409 + + # delete should fail because ``key`` still exists + try: + bucket.delete() + except boto.exception.S3ResponseError as e: + assert e.status == 409 + + key.delete() + bucket.delete() + + # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy' + bucket = connection.create_bucket(bucket_name) + + # create an object + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('seven') + + # should be private already but guarantee it + key.set_acl('private') + + (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key}) + assert ret == 200 + + acl = key.get_xml_acl() + assert acl == out.strip('\n') + + # add another grantee by making the object public read + key.set_acl('public-read') + + (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key}) + assert ret == 200 + + acl = key.get_xml_acl() + assert acl == out.strip('\n') + + # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds' + bucket = connection.create_bucket(bucket_name) + key_name = ['eight', 'nine', 'ten', 'eleven'] + for i in range(4): + key = boto.s3.key.Key(bucket) + key.set_contents_from_string(key_name[i]) + + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'rm'], {'bucket' : bucket_name, 'purge-objects' : True}) + assert ret == 200 + + # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds' + caps = 'usage=read' + (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'add'], {'uid' : user1, 'user-caps' : caps}) + assert ret == 200 + assert out[0]['perm'] == 'read' + + # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'rm'], {'uid' : user1, 'user-caps' : caps}) + assert ret == 200 + assert not out + + # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets' + bucket = connection.create_bucket(bucket_name) + key = boto.s3.key.Key(bucket) + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1}) + assert ret == 409 + + # TESTCASE 'rm-user2', 'user', 'rm', user with data', 'succeeds' + bucket = connection.create_bucket(bucket_name) + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('twelve') + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1, 'purge-data' : True}) + assert ret == 200 + + # TESTCASE 'rm-user3','user','info','deleted user','fails' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 404 + diff --git a/qa/tasks/radosgw_agent.py b/qa/tasks/radosgw_agent.py new file mode 100644 index 00000000000..0254805d2af --- /dev/null +++ b/qa/tasks/radosgw_agent.py @@ -0,0 +1,211 @@ +""" +Run rados gateway agent in test mode +""" +import contextlib +import logging +import argparse + +from teuthology.orchestra import run +from teuthology import misc as teuthology +import util.rgw as rgw_utils + +log = logging.getLogger(__name__) + +def run_radosgw_agent(ctx, config): + """ + Run a single radosgw-agent. See task() for config format. + """ + return_list = list() + for (client, cconf) in config.items(): + # don't process entries that are not clients + if not client.startswith('client.'): + log.debug('key {data} does not start with \'client.\', moving on'.format( + data=client)) + continue + + src_client = cconf['src'] + dest_client = cconf['dest'] + + src_zone = rgw_utils.zone_for_client(ctx, src_client) + dest_zone = rgw_utils.zone_for_client(ctx, dest_client) + + log.info("source is %s", src_zone) + log.info("dest is %s", dest_zone) + + testdir = teuthology.get_testdir(ctx) + (remote,) = ctx.cluster.only(client).remotes.keys() + # figure out which branch to pull from + branch = cconf.get('force-branch', None) + if not branch: + branch = cconf.get('branch', 'master') + sha1 = cconf.get('sha1') + remote.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'git', 'clone', + '-b', branch, +# 'https://github.com/ceph/radosgw-agent.git', + 'git://git.ceph.com/radosgw-agent.git', + 'radosgw-agent.{client}'.format(client=client), + ] + ) + if sha1 is not None: + remote.run( + args=[ + 'cd', testdir, run.Raw('&&'), + run.Raw('&&'), + 'git', 'reset', '--hard', sha1, + ] + ) + remote.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'cd', 'radosgw-agent.{client}'.format(client=client), + run.Raw('&&'), + './bootstrap', + ] + ) + + src_host, src_port = rgw_utils.get_zone_host_and_port(ctx, src_client, + src_zone) + dest_host, dest_port = rgw_utils.get_zone_host_and_port(ctx, dest_client, + dest_zone) + src_access, src_secret = rgw_utils.get_zone_system_keys(ctx, src_client, + src_zone) + dest_access, dest_secret = rgw_utils.get_zone_system_keys(ctx, dest_client, + dest_zone) + sync_scope = cconf.get('sync-scope', None) + port = cconf.get('port', 8000) + daemon_name = '{host}.{port}.syncdaemon'.format(host=remote.name, port=port) + in_args=[ + 'daemon-helper', + 'kill', + '{tdir}/radosgw-agent.{client}/radosgw-agent'.format(tdir=testdir, + client=client), + '-v', + '--src-access-key', src_access, + '--src-secret-key', src_secret, + '--source', "http://{addr}:{port}".format(addr=src_host, port=src_port), + '--dest-access-key', dest_access, + '--dest-secret-key', dest_secret, + '--max-entries', str(cconf.get('max-entries', 1000)), + '--log-file', '{tdir}/archive/rgw_sync_agent.{client}.log'.format( + tdir=testdir, + client=client), + '--object-sync-timeout', '30', + ] + + if cconf.get('metadata-only', False): + in_args.append('--metadata-only') + + # the test server and full/incremental flags are mutually exclusive + if sync_scope is None: + in_args.append('--test-server-host') + in_args.append('0.0.0.0') + in_args.append('--test-server-port') + in_args.append(str(port)) + log.debug('Starting a sync test server on {client}'.format(client=client)) + # Stash the radosgw-agent server / port # for use by subsequent tasks + ctx.radosgw_agent.endpoint = (client, str(port)) + else: + in_args.append('--sync-scope') + in_args.append(sync_scope) + log.debug('Starting a {scope} sync on {client}'.format(scope=sync_scope,client=client)) + + # positional arg for destination must come last + in_args.append("http://{addr}:{port}".format(addr=dest_host, + port=dest_port)) + + return_list.append((client, remote.run( + args=in_args, + wait=False, + stdin=run.PIPE, + logger=log.getChild(daemon_name), + ))) + return return_list + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run radosgw-agents in test mode. + + Configuration is clients to run the agents on, with settings for + source client, destination client, and port to listen on. Binds + to 0.0.0.0. Port defaults to 8000. This must be run on clients + that have the correct zone root pools and rgw zone set in + ceph.conf, or the task cannot read the region information from the + cluster. + + By default, this task will start an HTTP server that will trigger full + or incremental syncs based on requests made to it. + Alternatively, a single full sync can be triggered by + specifying 'sync-scope: full' or a loop of incremental syncs can be triggered + by specifying 'sync-scope: incremental' (the loop will sleep + '--incremental-sync-delay' seconds between each sync, default is 30 seconds). + + By default, both data and metadata are synced. To only sync + metadata, for example because you want to sync between regions, + set metadata-only: true. + + An example:: + + tasks: + - ceph: + conf: + client.0: + rgw zone = foo + rgw zone root pool = .root.pool + client.1: + rgw zone = bar + rgw zone root pool = .root.pool2 + - rgw: # region configuration omitted for brevity + - radosgw-agent: + client.0: + branch: wip-next-feature-branch + src: client.0 + dest: client.1 + sync-scope: full + metadata-only: true + # port: 8000 (default) + client.1: + src: client.1 + dest: client.0 + port: 8001 + """ + assert isinstance(config, dict), 'rgw_sync_agent requires a dictionary config' + log.debug("config is %s", config) + + overrides = ctx.config.get('overrides', {}) + # merge each client section, but only if it exists in config since there isn't + # a sensible default action for this task + for client in config.iterkeys(): + if config[client]: + log.debug('config[{client}]: {data}'.format(client=client, data=config[client])) + teuthology.deep_merge(config[client], overrides.get('radosgw-agent', {})) + + ctx.radosgw_agent = argparse.Namespace() + ctx.radosgw_agent.config = config + + procs = run_radosgw_agent(ctx, config) + + ctx.radosgw_agent.procs = procs + + try: + yield + finally: + testdir = teuthology.get_testdir(ctx) + try: + for client, proc in procs: + log.info("shutting down sync agent on %s", client) + proc.stdin.close() + proc.wait() + finally: + for client, proc in procs: + ctx.cluster.only(client).run( + args=[ + 'rm', '-rf', + '{tdir}/radosgw-agent.{client}'.format(tdir=testdir, + client=client) + ] + ) diff --git a/qa/tasks/rbd.py b/qa/tasks/rbd.py new file mode 100644 index 00000000000..4bf529373a1 --- /dev/null +++ b/qa/tasks/rbd.py @@ -0,0 +1,585 @@ +""" +Rbd testing task +""" +import contextlib +import logging +import os + +from cStringIO import StringIO +from teuthology.orchestra import run +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.parallel import parallel +from teuthology.task.common_fs_utils import generic_mkfs +from teuthology.task.common_fs_utils import generic_mount +from teuthology.task.common_fs_utils import default_image_name + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def create_image(ctx, config): + """ + Create an rbd image. + + For example:: + + tasks: + - ceph: + - rbd.create_image: + client.0: + image_name: testimage + image_size: 100 + image_format: 1 + client.1: + + Image size is expressed as a number of megabytes; default value + is 10240. + + Image format value must be either 1 or 2; default value is 1. + + """ + assert isinstance(config, dict) or isinstance(config, list), \ + "task create_image only supports a list or dictionary for configuration" + + if isinstance(config, dict): + images = config.items() + else: + images = [(role, None) for role in config] + + testdir = teuthology.get_testdir(ctx) + for role, properties in images: + if properties is None: + properties = {} + name = properties.get('image_name', default_image_name(role)) + size = properties.get('image_size', 10240) + fmt = properties.get('image_format', 1) + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Creating image {name} with size {size}'.format(name=name, + size=size)) + args = [ + 'adjust-ulimits', + 'ceph-coverage'.format(tdir=testdir), + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + '-p', 'rbd', + 'create', + '--size', str(size), + name, + ] + # omit format option if using the default (format 1) + # since old versions of don't support it + if int(fmt) != 1: + args += ['--image-format', str(fmt)] + remote.run(args=args) + try: + yield + finally: + log.info('Deleting rbd images...') + for role, properties in images: + if properties is None: + properties = {} + name = properties.get('image_name', default_image_name(role)) + (remote,) = ctx.cluster.only(role).remotes.keys() + remote.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + '-p', 'rbd', + 'rm', + name, + ], + ) + +@contextlib.contextmanager +def clone_image(ctx, config): + """ + Clones a parent imag + + For example:: + + tasks: + - ceph: + - rbd.clone_image: + client.0: + parent_name: testimage + image_name: cloneimage + """ + assert isinstance(config, dict) or isinstance(config, list), \ + "task clone_image only supports a list or dictionary for configuration" + + if isinstance(config, dict): + images = config.items() + else: + images = [(role, None) for role in config] + + testdir = teuthology.get_testdir(ctx) + for role, properties in images: + if properties is None: + properties = {} + + name = properties.get('image_name', default_image_name(role)) + parent_name = properties.get('parent_name') + assert parent_name is not None, \ + "parent_name is required" + parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name) + + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Clone image {parent} to {child}'.format(parent=parent_name, + child=name)) + for cmd in [('snap', 'create', parent_spec), + ('snap', 'protect', parent_spec), + ('clone', parent_spec, name)]: + args = [ + 'adjust-ulimits', + 'ceph-coverage'.format(tdir=testdir), + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', '-p', 'rbd' + ] + args.extend(cmd) + remote.run(args=args) + + try: + yield + finally: + log.info('Deleting rbd clones...') + for role, properties in images: + if properties is None: + properties = {} + name = properties.get('image_name', default_image_name(role)) + parent_name = properties.get('parent_name') + parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name) + + (remote,) = ctx.cluster.only(role).remotes.keys() + + for cmd in [('rm', name), + ('snap', 'unprotect', parent_spec), + ('snap', 'rm', parent_spec)]: + args = [ + 'adjust-ulimits', + 'ceph-coverage'.format(tdir=testdir), + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', '-p', 'rbd' + ] + args.extend(cmd) + remote.run(args=args) + +@contextlib.contextmanager +def modprobe(ctx, config): + """ + Load the rbd kernel module.. + + For example:: + + tasks: + - ceph: + - rbd.create_image: [client.0] + - rbd.modprobe: [client.0] + """ + log.info('Loading rbd kernel module...') + for role in config: + (remote,) = ctx.cluster.only(role).remotes.keys() + remote.run( + args=[ + 'sudo', + 'modprobe', + 'rbd', + ], + ) + try: + yield + finally: + log.info('Unloading rbd kernel module...') + for role in config: + (remote,) = ctx.cluster.only(role).remotes.keys() + remote.run( + args=[ + 'sudo', + 'modprobe', + '-r', + 'rbd', + # force errors to be ignored; necessary if more + # than one device was created, which may mean + # the module isn't quite ready to go the first + # time through. + run.Raw('||'), + 'true', + ], + ) + +@contextlib.contextmanager +def dev_create(ctx, config): + """ + Map block devices to rbd images. + + For example:: + + tasks: + - ceph: + - rbd.create_image: [client.0] + - rbd.modprobe: [client.0] + - rbd.dev_create: + client.0: testimage.client.0 + """ + assert isinstance(config, dict) or isinstance(config, list), \ + "task dev_create only supports a list or dictionary for configuration" + + if isinstance(config, dict): + role_images = config.items() + else: + role_images = [(role, None) for role in config] + + log.info('Creating rbd block devices...') + + testdir = teuthology.get_testdir(ctx) + + for role, image in role_images: + if image is None: + image = default_image_name(role) + (remote,) = ctx.cluster.only(role).remotes.keys() + + remote.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + '--user', role.rsplit('.')[-1], + '-p', 'rbd', + 'map', + image, + run.Raw('&&'), + # wait for the symlink to be created by udev + 'while', 'test', '!', '-e', '/dev/rbd/rbd/{image}'.format(image=image), run.Raw(';'), 'do', + 'sleep', '1', run.Raw(';'), + 'done', + ], + ) + try: + yield + finally: + log.info('Unmapping rbd devices...') + for role, image in role_images: + if image is None: + image = default_image_name(role) + (remote,) = ctx.cluster.only(role).remotes.keys() + remote.run( + args=[ + 'LD_LIBRARY_PATH={tdir}/binary/usr/local/lib'.format(tdir=testdir), + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + '-p', 'rbd', + 'unmap', + '/dev/rbd/rbd/{imgname}'.format(imgname=image), + run.Raw('&&'), + # wait for the symlink to be deleted by udev + 'while', 'test', '-e', '/dev/rbd/rbd/{image}'.format(image=image), + run.Raw(';'), + 'do', + 'sleep', '1', run.Raw(';'), + 'done', + ], + ) + + +def rbd_devname_rtn(ctx, image): + return '/dev/rbd/rbd/{image}'.format(image=image) + +def canonical_path(ctx, role, path): + """ + Determine the canonical path for a given path on the host + representing the given role. A canonical path contains no + . or .. components, and includes no symbolic links. + """ + version_fp = StringIO() + ctx.cluster.only(role).run( + args=[ 'readlink', '-f', path ], + stdout=version_fp, + ) + canonical_path = version_fp.getvalue().rstrip('\n') + version_fp.close() + return canonical_path + +@contextlib.contextmanager +def run_xfstests(ctx, config): + """ + Run xfstests over specified devices. + + Warning: both the test and scratch devices specified will be + overwritten. Normally xfstests modifies (but does not destroy) + the test device, but for now the run script used here re-makes + both filesystems. + + Note: Only one instance of xfstests can run on a single host at + a time, although this is not enforced. + + This task in its current form needs some improvement. For + example, it assumes all roles provided in the config are + clients, and that the config provided is a list of key/value + pairs. For now please use the xfstests() interface, below. + + For example:: + + tasks: + - ceph: + - rbd.run_xfstests: + client.0: + count: 2 + test_dev: 'test_dev' + scratch_dev: 'scratch_dev' + fs_type: 'xfs' + tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015' + randomize: true + """ + with parallel() as p: + for role, properties in config.items(): + p.spawn(run_xfstests_one_client, ctx, role, properties) + yield + +def run_xfstests_one_client(ctx, role, properties): + """ + Spawned routine to handle xfs tests for a single client + """ + testdir = teuthology.get_testdir(ctx) + try: + count = properties.get('count') + test_dev = properties.get('test_dev') + assert test_dev is not None, \ + "task run_xfstests requires test_dev to be defined" + test_dev = canonical_path(ctx, role, test_dev) + + scratch_dev = properties.get('scratch_dev') + assert scratch_dev is not None, \ + "task run_xfstests requires scratch_dev to be defined" + scratch_dev = canonical_path(ctx, role, scratch_dev) + + fs_type = properties.get('fs_type') + tests = properties.get('tests') + randomize = properties.get('randomize') + + (remote,) = ctx.cluster.only(role).remotes.keys() + + # Fetch the test script + test_root = teuthology.get_testdir(ctx) + test_script = 'run_xfstests_krbd.sh' + test_path = os.path.join(test_root, test_script) + + git_branch = 'master' + test_url = 'https://raw.github.com/ceph/ceph/{branch}/qa/{script}'.format(branch=git_branch, script=test_script) + + log.info('Fetching {script} for {role} from {url}'.format(script=test_script, + role=role, + url=test_url)) + args = [ 'wget', '-O', test_path, '--', test_url ] + remote.run(args=args) + + log.info('Running xfstests on {role}:'.format(role=role)) + log.info(' iteration count: {count}:'.format(count=count)) + log.info(' test device: {dev}'.format(dev=test_dev)) + log.info(' scratch device: {dev}'.format(dev=scratch_dev)) + log.info(' using fs_type: {fs_type}'.format(fs_type=fs_type)) + log.info(' tests to run: {tests}'.format(tests=tests)) + log.info(' randomize: {randomize}'.format(randomize=randomize)) + + # Note that the device paths are interpreted using + # readlink -f in order to get their canonical + # pathname (so it matches what the kernel remembers). + args = [ + '/usr/bin/sudo', + 'TESTDIR={tdir}'.format(tdir=testdir), + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + '/bin/bash', + test_path, + '-c', str(count), + '-f', fs_type, + '-t', test_dev, + '-s', scratch_dev, + ] + if randomize: + args.append('-r') + if tests: + args.extend(['--', tests]) + remote.run(args=args, logger=log.getChild(role)) + finally: + log.info('Removing {script} on {role}'.format(script=test_script, + role=role)) + remote.run(args=['rm', '-f', test_path]) + +@contextlib.contextmanager +def xfstests(ctx, config): + """ + Run xfstests over rbd devices. This interface sets up all + required configuration automatically if not otherwise specified. + Note that only one instance of xfstests can run on a single host + at a time. By default, the set of tests specified is run once. + If a (non-zero) count value is supplied, the complete set of + tests will be run that number of times. + + For example:: + + tasks: + - ceph: + # Image sizes are in MB + - rbd.xfstests: + client.0: + count: 3 + test_image: 'test_image' + test_size: 250 + test_format: 2 + scratch_image: 'scratch_image' + scratch_size: 250 + scratch_format: 1 + fs_type: 'xfs' + tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015' + randomize: true + """ + if config is None: + config = { 'all': None } + assert isinstance(config, dict) or isinstance(config, list), \ + "task xfstests only supports a list or dictionary for configuration" + if isinstance(config, dict): + config = teuthology.replace_all_with_clients(ctx.cluster, config) + runs = config.items() + else: + runs = [(role, None) for role in config] + + running_xfstests = {} + for role, properties in runs: + assert role.startswith('client.'), \ + "task xfstests can only run on client nodes" + for host, roles_for_host in ctx.cluster.remotes.items(): + if role in roles_for_host: + assert host not in running_xfstests, \ + "task xfstests allows only one instance at a time per host" + running_xfstests[host] = True + + images_config = {} + scratch_config = {} + modprobe_config = {} + image_map_config = {} + scratch_map_config = {} + xfstests_config = {} + for role, properties in runs: + if properties is None: + properties = {} + + test_image = properties.get('test_image', 'test_image.{role}'.format(role=role)) + test_size = properties.get('test_size', 2000) # 2G + test_fmt = properties.get('test_format', 1) + scratch_image = properties.get('scratch_image', 'scratch_image.{role}'.format(role=role)) + scratch_size = properties.get('scratch_size', 10000) # 10G + scratch_fmt = properties.get('scratch_format', 1) + + images_config[role] = dict( + image_name=test_image, + image_size=test_size, + image_format=test_fmt, + ) + + scratch_config[role] = dict( + image_name=scratch_image, + image_size=scratch_size, + image_format=scratch_fmt, + ) + + xfstests_config[role] = dict( + count=properties.get('count', 1), + test_dev='/dev/rbd/rbd/{image}'.format(image=test_image), + scratch_dev='/dev/rbd/rbd/{image}'.format(image=scratch_image), + fs_type=properties.get('fs_type', 'xfs'), + randomize=properties.get('randomize', False), + tests=properties.get('tests'), + ) + + log.info('Setting up xfstests using RBD images:') + log.info(' test ({size} MB): {image}'.format(size=test_size, + image=test_image)) + log.info(' scratch ({size} MB): {image}'.format(size=scratch_size, + image=scratch_image)) + modprobe_config[role] = None + image_map_config[role] = test_image + scratch_map_config[role] = scratch_image + + with contextutil.nested( + lambda: create_image(ctx=ctx, config=images_config), + lambda: create_image(ctx=ctx, config=scratch_config), + lambda: modprobe(ctx=ctx, config=modprobe_config), + lambda: dev_create(ctx=ctx, config=image_map_config), + lambda: dev_create(ctx=ctx, config=scratch_map_config), + lambda: run_xfstests(ctx=ctx, config=xfstests_config), + ): + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Create and mount an rbd image. + + For example, you can specify which clients to run on:: + + tasks: + - ceph: + - rbd: [client.0, client.1] + + There are a few image options:: + + tasks: + - ceph: + - rbd: + client.0: # uses defaults + client.1: + image_name: foo + image_size: 2048 + image_format: 2 + fs_type: xfs + + To use default options on all clients:: + + tasks: + - ceph: + - rbd: + all: + + To create 20GiB images and format them with xfs on all clients:: + + tasks: + - ceph: + - rbd: + all: + image_size: 20480 + fs_type: xfs + """ + if config is None: + config = { 'all': None } + norm_config = config + if isinstance(config, dict): + norm_config = teuthology.replace_all_with_clients(ctx.cluster, config) + if isinstance(norm_config, dict): + role_images = {} + for role, properties in norm_config.iteritems(): + if properties is None: + properties = {} + role_images[role] = properties.get('image_name') + else: + role_images = norm_config + + log.debug('rbd config is: %s', norm_config) + + with contextutil.nested( + lambda: create_image(ctx=ctx, config=norm_config), + lambda: modprobe(ctx=ctx, config=norm_config), + lambda: dev_create(ctx=ctx, config=role_images), + lambda: generic_mkfs(ctx=ctx, config=norm_config, + devname_rtn=rbd_devname_rtn), + lambda: generic_mount(ctx=ctx, config=role_images, + devname_rtn=rbd_devname_rtn), + ): + yield diff --git a/qa/tasks/rbd_fsx.py b/qa/tasks/rbd_fsx.py new file mode 100644 index 00000000000..d848a88c566 --- /dev/null +++ b/qa/tasks/rbd_fsx.py @@ -0,0 +1,82 @@ +""" +Run fsx on an rbd image +""" +import contextlib +import logging + +from teuthology.parallel import parallel +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run fsx on an rbd image. + + Currently this requires running as client.admin + to create a pool. + + Specify which clients to run on as a list:: + + tasks: + ceph: + rbd_fsx: + clients: [client.0, client.1] + + You can optionally change some properties of fsx: + + tasks: + ceph: + rbd_fsx: + clients: + seed: + ops: + size: + """ + log.info('starting rbd_fsx...') + with parallel() as p: + for role in config['clients']: + p.spawn(_run_one_client, ctx, config, role) + yield + +def _run_one_client(ctx, config, role): + """Spawned task that runs the client""" + krbd = config.get('krbd', False) + testdir = teuthology.get_testdir(ctx) + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + + args = [] + if krbd: + args.append('sudo') # rbd map/unmap need privileges + args.extend([ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'ceph_test_librbd_fsx', + '-d', # debug output for all operations + '-W', '-R', # mmap doesn't work with rbd + '-p', str(config.get('progress_interval', 100)), # show progress + '-P', '{tdir}/archive'.format(tdir=testdir), + '-r', str(config.get('readbdy',1)), + '-w', str(config.get('writebdy',1)), + '-t', str(config.get('truncbdy',1)), + '-h', str(config.get('holebdy',1)), + '-l', str(config.get('size', 250000000)), + '-S', str(config.get('seed', 0)), + '-N', str(config.get('ops', 1000)), + ]) + if krbd: + args.append('-K') # -K enables krbd mode + if config.get('direct_io', False): + args.append('-Z') # -Z use direct IO + if not config.get('randomized_striping', True): + args.append('-U') # -U disables randomized striping + if not config.get('punch_holes', True): + args.append('-H') # -H disables discard ops + args.extend([ + 'pool_{pool}'.format(pool=role), + 'image_{image}'.format(image=role), + ]) + + remote.run(args=args) diff --git a/qa/tasks/recovery_bench.py b/qa/tasks/recovery_bench.py new file mode 100644 index 00000000000..1984b97d31e --- /dev/null +++ b/qa/tasks/recovery_bench.py @@ -0,0 +1,208 @@ +""" +Recovery system benchmarking +""" +from cStringIO import StringIO + +import contextlib +import gevent +import json +import logging +import random +import time + +import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Benchmark the recovery system. + + Generates objects with smalliobench, runs it normally to get a + baseline performance measurement, then marks an OSD out and reruns + to measure performance during recovery. + + The config should be as follows: + + recovery_bench: + duration: + num_objects: + io_size: + + example: + + tasks: + - ceph: + - recovery_bench: + duration: 60 + num_objects: 500 + io_size: 4096 + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'recovery_bench task only accepts a dict for configuration' + + log.info('Beginning recovery bench...') + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + while len(manager.get_osd_status()['up']) < num_osds: + manager.sleep(10) + + bench_proc = RecoveryBencher( + manager, + config, + ) + try: + yield + finally: + log.info('joining recovery bencher') + bench_proc.do_join() + +class RecoveryBencher: + """ + RecoveryBencher + """ + def __init__(self, manager, config): + self.ceph_manager = manager + self.ceph_manager.wait_for_clean() + + osd_status = self.ceph_manager.get_osd_status() + self.osds = osd_status['up'] + + self.config = config + if self.config is None: + self.config = dict() + + else: + def tmp(x): + """ + Local wrapper to print value. + """ + print x + self.log = tmp + + log.info("spawning thread") + + self.thread = gevent.spawn(self.do_bench) + + def do_join(self): + """ + Join the recovery bencher. This is called after the main + task exits. + """ + self.thread.get() + + def do_bench(self): + """ + Do the benchmarking. + """ + duration = self.config.get("duration", 60) + num_objects = self.config.get("num_objects", 500) + io_size = self.config.get("io_size", 4096) + + osd = str(random.choice(self.osds)) + (osd_remote,) = self.ceph_manager.ctx.cluster.only('osd.%s' % osd).remotes.iterkeys() + + testdir = teuthology.get_testdir(self.ceph_manager.ctx) + + # create the objects + osd_remote.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'smalliobench'.format(tdir=testdir), + '--use-prefix', 'recovery_bench', + '--init-only', '1', + '--num-objects', str(num_objects), + '--io-size', str(io_size), + ], + wait=True, + ) + + # baseline bench + log.info('non-recovery (baseline)') + p = osd_remote.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'smalliobench', + '--use-prefix', 'recovery_bench', + '--do-not-init', '1', + '--duration', str(duration), + '--io-size', str(io_size), + ], + stdout=StringIO(), + stderr=StringIO(), + wait=True, + ) + self.process_samples(p.stderr.getvalue()) + + self.ceph_manager.raw_cluster_cmd('osd', 'out', osd) + time.sleep(5) + + # recovery bench + log.info('recovery active') + p = osd_remote.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'smalliobench', + '--use-prefix', 'recovery_bench', + '--do-not-init', '1', + '--duration', str(duration), + '--io-size', str(io_size), + ], + stdout=StringIO(), + stderr=StringIO(), + wait=True, + ) + self.process_samples(p.stderr.getvalue()) + + self.ceph_manager.raw_cluster_cmd('osd', 'in', osd) + + def process_samples(self, input): + """ + Extract samples from the input and process the results + + :param input: input lines in JSON format + """ + lat = {} + for line in input.split('\n'): + try: + sample = json.loads(line) + samples = lat.setdefault(sample['type'], []) + samples.append(float(sample['latency'])) + except Exception: + pass + + for type in lat: + samples = lat[type] + samples.sort() + + num = len(samples) + + # median + if num & 1 == 1: # odd number of samples + median = samples[num / 2] + else: + median = (samples[num / 2] + samples[num / 2 - 1]) / 2 + + # 99% + ninety_nine = samples[int(num * 0.99)] + + log.info("%s: median %f, 99%% %f" % (type, median, ninety_nine)) diff --git a/qa/tasks/reg11184.py b/qa/tasks/reg11184.py new file mode 100644 index 00000000000..6ba39bde898 --- /dev/null +++ b/qa/tasks/reg11184.py @@ -0,0 +1,240 @@ +""" +Special regression test for tracker #11184 + +Synopsis: osd/SnapMapper.cc: 282: FAILED assert(check(oid)) + +This is accomplished by moving a pg that wasn't part of split and still include +divergent priors. +""" +import logging +import time +from cStringIO import StringIO + +from teuthology import misc as teuthology +from util.rados import rados +import os + + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Test handling of divergent entries during export / import + to regression test tracker #11184 + + overrides: + ceph: + conf: + osd: + debug osd: 5 + + Requires 3 osds on a single test node. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'divergent_priors task only accepts a dict for configuration' + + while len(ctx.manager.get_osd_status()['up']) < 3: + time.sleep(10) + ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + ctx.manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + ctx.manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + ctx.manager.raw_cluster_cmd('osd', 'set', 'noout') + ctx.manager.raw_cluster_cmd('osd', 'set', 'noin') + ctx.manager.raw_cluster_cmd('osd', 'set', 'nodown') + ctx.manager.wait_for_clean() + + # something that is always there + dummyfile = '/etc/fstab' + dummyfile2 = '/etc/resolv.conf' + testdir = teuthology.get_testdir(ctx) + + # create 1 pg pool + log.info('creating foo') + ctx.manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') + + osds = [0, 1, 2] + for i in osds: + ctx.manager.set_config(i, osd_min_pg_log_entries=10) + ctx.manager.set_config(i, osd_max_pg_log_entries=10) + ctx.manager.set_config(i, osd_pg_log_trim_min=5) + + # determine primary + divergent = ctx.manager.get_pg_primary('foo', 0) + log.info("primary and soon to be divergent is %d", divergent) + non_divergent = list(osds) + non_divergent.remove(divergent) + + log.info('writing initial objects') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + # write 100 objects + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) + + ctx.manager.wait_for_clean() + + # blackhole non_divergent + log.info("blackholing osds %s", str(non_divergent)) + for i in non_divergent: + ctx.manager.set_config(i, filestore_blackhole=1) + + DIVERGENT_WRITE = 5 + DIVERGENT_REMOVE = 5 + # Write some soon to be divergent + log.info('writing divergent objects') + for i in range(DIVERGENT_WRITE): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, + dummyfile2], wait=False) + # Remove some soon to be divergent + log.info('remove divergent objects') + for i in range(DIVERGENT_REMOVE): + rados(ctx, mon, ['-p', 'foo', 'rm', + 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) + time.sleep(10) + mon.run( + args=['killall', '-9', 'rados'], + wait=True, + check_status=False) + + # kill all the osds but leave divergent in + log.info('killing all the osds') + for i in osds: + ctx.manager.kill_osd(i) + for i in osds: + ctx.manager.mark_down_osd(i) + for i in non_divergent: + ctx.manager.mark_out_osd(i) + + # bring up non-divergent + log.info("bringing up non_divergent %s", str(non_divergent)) + for i in non_divergent: + ctx.manager.revive_osd(i) + for i in non_divergent: + ctx.manager.mark_in_osd(i) + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) + log.info('writing non-divergent object ' + objname) + rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) + + ctx.manager.wait_for_recovery() + + # ensure no recovery of up osds first + log.info('delay recovery') + for i in non_divergent: + ctx.manager.wait_run_admin_socket( + 'osd', i, ['set_recovery_delay', '100000']) + + # bring in our divergent friend + log.info("revive divergent %d", divergent) + ctx.manager.raw_cluster_cmd('osd', 'set', 'noup') + ctx.manager.revive_osd(divergent) + + log.info('delay recovery divergent') + ctx.manager.wait_run_admin_socket( + 'osd', divergent, ['set_recovery_delay', '100000']) + + ctx.manager.raw_cluster_cmd('osd', 'unset', 'noup') + while len(ctx.manager.get_osd_status()['up']) < 3: + time.sleep(10) + + log.info('wait for peering') + rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) + + # At this point the divergent_priors should have been detected + + log.info("killing divergent %d", divergent) + ctx.manager.kill_osd(divergent) + + # Split pgs for pool foo + ctx.manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'pg_num', '2') + time.sleep(5) + + # Export a pg + (exp_remote,) = ctx.\ + cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() + FSPATH = ctx.manager.get_filepath() + JPATH = os.path.join(FSPATH, "journal") + prefix = ("sudo adjust-ulimits ceph-objectstore-tool " + "--data-path {fpath} --journal-path {jpath} " + "--log-file=" + "/var/log/ceph/objectstore_tool.$$.log ". + format(fpath=FSPATH, jpath=JPATH)) + pid = os.getpid() + expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) + cmd = ((prefix + "--op export --pgid 1.0 --file {file}"). + format(id=divergent, file=expfile)) + proc = exp_remote.run(args=cmd, wait=True, + check_status=False, stdout=StringIO()) + assert proc.exitstatus == 0 + + # Remove the same pg that was exported + cmd = ((prefix + "--op remove --pgid 1.0"). + format(id=divergent, file=expfile)) + proc = exp_remote.run(args=cmd, wait=True, + check_status=False, stdout=StringIO()) + assert proc.exitstatus == 0 + + # Kill one of non-divergent OSDs + log.info('killing osd.%d' % non_divergent[1]) + ctx.manager.kill_osd(non_divergent[1]) + ctx.manager.mark_down_osd(non_divergent[1]) + # ctx.manager.mark_out_osd(non_divergent[1]) + + cmd = ((prefix + "--op import --file {file}"). + format(id=non_divergent[1], file=expfile)) + proc = exp_remote.run(args=cmd, wait=True, + check_status=False, stdout=StringIO()) + assert proc.exitstatus == 0 + + # bring in our divergent friend and other node + log.info("revive divergent %d", divergent) + ctx.manager.revive_osd(divergent) + ctx.manager.mark_in_osd(divergent) + log.info("revive %d", non_divergent[1]) + ctx.manager.revive_osd(non_divergent[1]) + + while len(ctx.manager.get_osd_status()['up']) < 3: + time.sleep(10) + + log.info('delay recovery divergent') + ctx.manager.set_config(divergent, osd_recovery_delay_start=100000) + log.info('mark divergent in') + ctx.manager.mark_in_osd(divergent) + + log.info('wait for peering') + rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) + + log.info("killing divergent %d", divergent) + ctx.manager.kill_osd(divergent) + log.info("reviving divergent %d", divergent) + ctx.manager.revive_osd(divergent) + time.sleep(3) + + log.info('allowing recovery') + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in osds: + ctx.manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', + 'kick_recovery_wq', ' 0') + + log.info('reading divergent objects') + for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): + exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, + '/tmp/existing']) + assert exit_status is 0 + + (remote,) = ctx.\ + cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() + msg = "dirty_divergent_priors: true, divergent_priors: %d" \ + % (DIVERGENT_WRITE + DIVERGENT_REMOVE) + cmd = 'grep "{msg}" /var/log/ceph/ceph-osd.{osd}.log'\ + .format(msg=msg, osd=divergent) + proc = remote.run(args=cmd, wait=True, check_status=False) + assert proc.exitstatus == 0 + + cmd = 'rm {file}'.format(file=expfile) + remote.run(args=cmd, wait=True) + log.info("success") diff --git a/qa/tasks/rep_lost_unfound_delete.py b/qa/tasks/rep_lost_unfound_delete.py new file mode 100644 index 00000000000..b36d260b122 --- /dev/null +++ b/qa/tasks/rep_lost_unfound_delete.py @@ -0,0 +1,156 @@ +""" +Lost_unfound +""" +import logging +import ceph_manager +from teuthology import misc as teuthology +from util.rados import rados + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of lost objects. + + A pretty rigid cluseter is brought up andtested by this task + """ + POOL = 'unfounddel_pool' + if config is None: + config = {} + assert isinstance(config, dict), \ + 'lost_unfound task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + manager.sleep(10) + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_clean() + + manager.create_pool(POOL) + + # something that is always there + dummyfile = '/etc/fstab' + + # take an osd out until the very end + manager.kill_osd(2) + manager.mark_down_osd(2) + manager.mark_out_osd(2) + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.wait_for_recovery() + + # create old objects + for f in range(1, 10): + rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) + + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', + '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' + ) + + manager.kill_osd(0) + manager.mark_down_osd(0) + + for f in range(1, 10): + rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) + + # bring osd.0 back up, let it peer, but don't replicate the new + # objects... + log.info('osd.0 command_args is %s' % 'foo') + log.info(ctx.daemons.get_daemon('osd', 0).command_args) + ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([ + '--osd-recovery-delay-start', '1000' + ]) + manager.revive_osd(0) + manager.mark_in_osd(0) + manager.wait_till_osd_is_up(0) + + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.wait_till_active() + + # take out osd.1 and the only copy of those objects. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.mark_out_osd(1) + manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') + + # bring up osd.2 so that things would otherwise, in theory, recovery fully + manager.revive_osd(2) + manager.mark_in_osd(2) + manager.wait_till_osd_is_up(2) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_till_active() + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + + # verify that there are unfound objects + unfound = manager.get_num_unfound_objects() + log.info("there are %d unfound objects" % unfound) + assert unfound + + # mark stuff lost + pgs = manager.get_pg_stats() + for pg in pgs: + if pg['stat_sum']['num_objects_unfound'] > 0: + primary = 'osd.%d' % pg['acting'][0] + + # verify that i can list them direct from the osd + log.info('listing missing/lost in %s state %s', pg['pgid'], + pg['state']); + m = manager.list_pg_missing(pg['pgid']) + #log.info('%s' % m) + assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] + num_unfound=0 + for o in m['objects']: + if len(o['locations']) == 0: + num_unfound += 1 + assert m['num_unfound'] == num_unfound + + log.info("reverting unfound in %s on %s", pg['pgid'], primary) + manager.raw_cluster_cmd('pg', pg['pgid'], + 'mark_unfound_lost', 'delete') + else: + log.info("no unfound in %s", pg['pgid']) + + manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_recovery() + + # verify result + for f in range(1, 10): + err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) + assert err + + # see if osd.1 can cope + manager.revive_osd(1) + manager.mark_in_osd(1) + manager.wait_till_osd_is_up(1) + manager.wait_for_clean() diff --git a/qa/tasks/repair_test.py b/qa/tasks/repair_test.py new file mode 100644 index 00000000000..f71d99e2fb8 --- /dev/null +++ b/qa/tasks/repair_test.py @@ -0,0 +1,312 @@ +""" +Test pool repairing after objects are damaged. +""" +import logging +import time + +import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + + +def choose_primary(ctx, pool, num): + """ + Return primary to test on. + """ + log.info("Choosing primary") + return ctx.manager.get_pg_primary(pool, num) + + +def choose_replica(ctx, pool, num): + """ + Return replica to test on. + """ + log.info("Choosing replica") + return ctx.manager.get_pg_replica(pool, num) + + +def trunc(ctx, osd, pool, obj): + """ + truncate an object + """ + log.info("truncating object") + return ctx.manager.osd_admin_socket( + osd, + ['truncobj', pool, obj, '1']) + + +def dataerr(ctx, osd, pool, obj): + """ + cause an error in the data + """ + log.info("injecting data err on object") + return ctx.manager.osd_admin_socket( + osd, + ['injectdataerr', pool, obj]) + + +def mdataerr(ctx, osd, pool, obj): + """ + cause an error in the mdata + """ + log.info("injecting mdata err on object") + return ctx.manager.osd_admin_socket( + osd, + ['injectmdataerr', pool, obj]) + + +def omaperr(ctx, osd, pool, obj): + """ + Cause an omap error. + """ + log.info("injecting omap err on object") + return ctx.manager.osd_admin_socket(osd, ['setomapval', pool, obj, + 'badkey', 'badval']) + + +def repair_test_1(ctx, corrupter, chooser, scrub_type): + """ + Creates an object in the pool, corrupts it, + scrubs it, and verifies that the pool is inconsistent. It then repairs + the pool, rescrubs it, and verifies that the pool is consistent + + :param corrupter: error generating function (truncate, data-error, or + meta-data error, for example). + :param chooser: osd type chooser (primary or replica) + :param scrub_type: regular scrub or deep-scrub + """ + pool = "repair_pool_1" + ctx.manager.wait_for_clean() + with ctx.manager.pool(pool, 1): + + log.info("starting repair test type 1") + victim_osd = chooser(ctx, pool, 0) + + # create object + log.info("doing put") + ctx.manager.do_put(pool, 'repair_test_obj', '/etc/hosts') + + # corrupt object + log.info("corrupting object") + corrupter(ctx, victim_osd, pool, 'repair_test_obj') + + # verify inconsistent + log.info("scrubbing") + ctx.manager.do_pg_scrub(pool, 0, scrub_type) + + assert ctx.manager.pg_inconsistent(pool, 0) + + # repair + log.info("repairing") + ctx.manager.do_pg_scrub(pool, 0, "repair") + + log.info("re-scrubbing") + ctx.manager.do_pg_scrub(pool, 0, scrub_type) + + # verify consistent + assert not ctx.manager.pg_inconsistent(pool, 0) + log.info("done") + + +def repair_test_2(ctx, config, chooser): + """ + First creates a set of objects and + sets the omap value. It then corrupts an object, does both a scrub + and a deep-scrub, and then corrupts more objects. After that, it + repairs the pool and makes sure that the pool is consistent some + time after a deep-scrub. + + :param chooser: primary or replica selection routine. + """ + pool = "repair_pool_2" + ctx.manager.wait_for_clean() + with ctx.manager.pool(pool, 1): + log.info("starting repair test type 2") + victim_osd = chooser(ctx, pool, 0) + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + # create object + log.info("doing put and setomapval") + ctx.manager.do_put(pool, 'file1', '/etc/hosts') + ctx.manager.do_rados(mon, ['-p', pool, 'setomapval', 'file1', + 'key', 'val']) + ctx.manager.do_put(pool, 'file2', '/etc/hosts') + ctx.manager.do_put(pool, 'file3', '/etc/hosts') + ctx.manager.do_put(pool, 'file4', '/etc/hosts') + ctx.manager.do_put(pool, 'file5', '/etc/hosts') + ctx.manager.do_rados(mon, ['-p', pool, 'setomapval', 'file5', + 'key', 'val']) + ctx.manager.do_put(pool, 'file6', '/etc/hosts') + + # corrupt object + log.info("corrupting object") + omaperr(ctx, victim_osd, pool, 'file1') + + # verify inconsistent + log.info("scrubbing") + ctx.manager.do_pg_scrub(pool, 0, 'deep-scrub') + + assert ctx.manager.pg_inconsistent(pool, 0) + + # Regression test for bug #4778, should still + # be inconsistent after scrub + ctx.manager.do_pg_scrub(pool, 0, 'scrub') + + assert ctx.manager.pg_inconsistent(pool, 0) + + # Additional corruptions including 2 types for file1 + log.info("corrupting more objects") + dataerr(ctx, victim_osd, pool, 'file1') + mdataerr(ctx, victim_osd, pool, 'file2') + trunc(ctx, victim_osd, pool, 'file3') + omaperr(ctx, victim_osd, pool, 'file6') + + # see still inconsistent + log.info("scrubbing") + ctx.manager.do_pg_scrub(pool, 0, 'deep-scrub') + + assert ctx.manager.pg_inconsistent(pool, 0) + + # repair + log.info("repairing") + ctx.manager.do_pg_scrub(pool, 0, "repair") + + # Let repair clear inconsistent flag + time.sleep(10) + + # verify consistent + assert not ctx.manager.pg_inconsistent(pool, 0) + + # In the future repair might determine state of + # inconsistency itself, verify with a deep-scrub + log.info("scrubbing") + ctx.manager.do_pg_scrub(pool, 0, 'deep-scrub') + + # verify consistent + assert not ctx.manager.pg_inconsistent(pool, 0) + + log.info("done") + + +def hinfoerr(ctx, victim, pool, obj): + """ + cause an error in the hinfo_key + """ + log.info("remove the hinfo_key") + ctx.manager.objectstore_tool(pool, + options='', + args='rm-attr hinfo_key', + object_name=obj, + osd=victim) + + +def repair_test_erasure_code(ctx, corrupter, victim, scrub_type): + """ + Creates an object in the pool, corrupts it, + scrubs it, and verifies that the pool is inconsistent. It then repairs + the pool, rescrubs it, and verifies that the pool is consistent + + :param corrupter: error generating function. + :param chooser: osd type chooser (primary or replica) + :param scrub_type: regular scrub or deep-scrub + """ + pool = "repair_pool_3" + ctx.manager.wait_for_clean() + with ctx.manager.pool(pool_name=pool, pg_num=1, + erasure_code_profile_name='default'): + + log.info("starting repair test for erasure code") + + # create object + log.info("doing put") + ctx.manager.do_put(pool, 'repair_test_obj', '/etc/hosts') + + # corrupt object + log.info("corrupting object") + corrupter(ctx, victim, pool, 'repair_test_obj') + + # verify inconsistent + log.info("scrubbing") + ctx.manager.do_pg_scrub(pool, 0, scrub_type) + + assert ctx.manager.pg_inconsistent(pool, 0) + + # repair + log.info("repairing") + ctx.manager.do_pg_scrub(pool, 0, "repair") + + log.info("re-scrubbing") + ctx.manager.do_pg_scrub(pool, 0, scrub_type) + + # verify consistent + assert not ctx.manager.pg_inconsistent(pool, 0) + log.info("done") + + +def task(ctx, config): + """ + Test [deep] repair in several situations: + Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica] + + The config should be as follows: + + Must include the log-whitelist below + Must enable filestore_debug_inject_read_err config + + example: + + tasks: + - chef: + - install: + - ceph: + log-whitelist: + - 'candidate had a read error' + - 'deep-scrub 0 missing, 1 inconsistent objects' + - 'deep-scrub 0 missing, 4 inconsistent objects' + - 'deep-scrub 1 errors' + - 'deep-scrub 4 errors' + - '!= known omap_digest' + - 'repair 0 missing, 1 inconsistent objects' + - 'repair 0 missing, 4 inconsistent objects' + - 'repair 1 errors, 1 fixed' + - 'repair 4 errors, 4 fixed' + - 'scrub 0 missing, 1 inconsistent' + - 'scrub 1 errors' + - 'size 1 != known size' + conf: + osd: + filestore debug inject read err: true + - repair_test: + + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'repair_test task only accepts a dict for config' + + if not hasattr(ctx, 'manager'): + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + ctx.manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager') + ) + + ctx.manager.wait_for_all_up() + + ctx.manager.raw_cluster_cmd('osd', 'set', 'noscrub') + ctx.manager.raw_cluster_cmd('osd', 'set', 'nodeep-scrub') + + repair_test_1(ctx, mdataerr, choose_primary, "scrub") + repair_test_1(ctx, mdataerr, choose_replica, "scrub") + repair_test_1(ctx, dataerr, choose_primary, "deep-scrub") + repair_test_1(ctx, dataerr, choose_replica, "deep-scrub") + repair_test_1(ctx, trunc, choose_primary, "scrub") + repair_test_1(ctx, trunc, choose_replica, "scrub") + repair_test_2(ctx, config, choose_primary) + repair_test_2(ctx, config, choose_replica) + + repair_test_erasure_code(ctx, hinfoerr, 'primary', "deep-scrub") diff --git a/qa/tasks/rest_api.py b/qa/tasks/rest_api.py new file mode 100644 index 00000000000..0956d00be51 --- /dev/null +++ b/qa/tasks/rest_api.py @@ -0,0 +1,183 @@ +""" +Rest Api +""" +import logging +import contextlib +import time + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.orchestra.daemon import DaemonGroup + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def run_rest_api_daemon(ctx, api_clients): + """ + Wrapper starts the rest api daemons + """ + if not hasattr(ctx, 'daemons'): + ctx.daemons = DaemonGroup() + remotes = ctx.cluster.only(teuthology.is_type('client')).remotes + for rems, roles in remotes.iteritems(): + for whole_id_ in roles: + if whole_id_ in api_clients: + id_ = whole_id_[len('clients'):] + run_cmd = [ + 'sudo', + 'daemon-helper', + 'kill', + 'ceph-rest-api', + '-n', + 'client.rest{id}'.format(id=id_), ] + cl_rest_id = 'client.rest{id}'.format(id=id_) + ctx.daemons.add_daemon(rems, 'restapi', + cl_rest_id, + args=run_cmd, + logger=log.getChild(cl_rest_id), + stdin=run.PIPE, + wait=False, + ) + for i in range(1, 12): + log.info('testing for ceph-rest-api try {0}'.format(i)) + run_cmd = [ + 'wget', + '-O', + '/dev/null', + '-q', + 'http://localhost:5000/api/v0.1/status' + ] + proc = rems.run( + args=run_cmd, + check_status=False + ) + if proc.exitstatus == 0: + break + time.sleep(5) + if proc.exitstatus != 0: + raise RuntimeError('Cannot contact ceph-rest-api') + try: + yield + + finally: + """ + TO DO: destroy daemons started -- modify iter_daemons_of_role + """ + teuthology.stop_daemons_of_type(ctx, 'restapi') + +@contextlib.contextmanager +def task(ctx, config): + """ + Start up rest-api. + + To start on on all clients:: + + tasks: + - ceph: + - rest-api: + + To only run on certain clients:: + + tasks: + - ceph: + - rest-api: [client.0, client.3] + + or + + tasks: + - ceph: + - rest-api: + client.0: + client.3: + + The general flow of things here is: + 1. Find clients on which rest-api is supposed to run (api_clients) + 2. Generate keyring values + 3. Start up ceph-rest-api daemons + On cleanup: + 4. Stop the daemons + 5. Delete keyring value files. + """ + api_clients = [] + remotes = ctx.cluster.only(teuthology.is_type('client')).remotes + log.info(remotes) + if config == None: + api_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + else: + api_clients = config + log.info(api_clients) + testdir = teuthology.get_testdir(ctx) + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + for rems, roles in remotes.iteritems(): + for whole_id_ in roles: + if whole_id_ in api_clients: + id_ = whole_id_[len('client.'):] + keyring = '/etc/ceph/ceph.client.rest{id}.keyring'.format( + id=id_) + rems.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--create-keyring', + '--gen-key', + '--name=client.rest{id}'.format(id=id_), + '--set-uid=0', + '--cap', 'mon', 'allow *', + '--cap', 'osd', 'allow *', + '--cap', 'mds', 'allow', + keyring, + run.Raw('&&'), + 'sudo', + 'chmod', + '0644', + keyring, + ], + ) + rems.run( + args=[ + 'sudo', + 'sh', + '-c', + run.Raw("'"), + "echo", + '[client.rest{id}]'.format(id=id_), + run.Raw('>>'), + "/etc/ceph/ceph.conf", + run.Raw("'") + ] + ) + rems.run( + args=[ + 'sudo', + 'sh', + '-c', + run.Raw("'"), + 'echo', + 'restapi', + 'keyring', + '=', + '/etc/ceph/ceph.client.rest{id}.keyring'.format(id=id_), + run.Raw('>>'), + '/etc/ceph/ceph.conf', + run.Raw("'"), + ] + ) + rems.run( + args=[ + 'ceph', + 'auth', + 'import', + '-i', + '/etc/ceph/ceph.client.rest{id}.keyring'.format(id=id_), + ] + ) + with contextutil.nested( + lambda: run_rest_api_daemon(ctx=ctx, api_clients=api_clients),): + yield + diff --git a/qa/tasks/restart.py b/qa/tasks/restart.py new file mode 100644 index 00000000000..697345a975b --- /dev/null +++ b/qa/tasks/restart.py @@ -0,0 +1,163 @@ +""" +Daemon restart +""" +import logging +import pipes + +from teuthology import misc as teuthology +from teuthology.orchestra import run as tor + +from teuthology.orchestra import run +log = logging.getLogger(__name__) + +def restart_daemon(ctx, config, role, id_, *args): + """ + Handle restart (including the execution of the command parameters passed) + """ + log.info('Restarting {r}.{i} daemon...'.format(r=role, i=id_)) + daemon = ctx.daemons.get_daemon(role, id_) + log.debug('Waiting for exit of {r}.{i} daemon...'.format(r=role, i=id_)) + try: + daemon.wait_for_exit() + except tor.CommandFailedError as e: + log.debug('Command Failed: {e}'.format(e=e)) + if len(args) > 0: + confargs = ['--{k}={v}'.format(k=k, v=v) for k,v in zip(args[0::2], args[1::2])] + log.debug('Doing restart of {r}.{i} daemon with args: {a}...'.format(r=role, i=id_, a=confargs)) + daemon.restart_with_args(confargs) + else: + log.debug('Doing restart of {r}.{i} daemon...'.format(r=role, i=id_)) + daemon.restart() + +def get_tests(ctx, config, role, remote, testdir): + """Download restart tests""" + srcdir = '{tdir}/restart.{role}'.format(tdir=testdir, role=role) + + refspec = config.get('branch') + if refspec is None: + refspec = config.get('sha1') + if refspec is None: + refspec = config.get('tag') + if refspec is None: + refspec = 'HEAD' + log.info('Pulling restart qa/workunits from ref %s', refspec) + + remote.run( + logger=log.getChild(role), + args=[ + 'mkdir', '--', srcdir, + run.Raw('&&'), + 'git', + 'archive', + '--remote=git://git.ceph.com/ceph.git', + '%s:qa/workunits' % refspec, + run.Raw('|'), + 'tar', + '-C', srcdir, + '-x', + '-f-', + run.Raw('&&'), + 'cd', '--', srcdir, + run.Raw('&&'), + 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', + run.Raw('&&'), + 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir), + run.Raw('>{tdir}/restarts.list'.format(tdir=testdir)), + ], + ) + restarts = sorted(teuthology.get_file( + remote, + '{tdir}/restarts.list'.format(tdir=testdir)).split('\0')) + return (srcdir, restarts) + +def task(ctx, config): + """ + Execute commands and allow daemon restart with config options. + Each process executed can output to stdout restart commands of the form: + restart + This will restart the daemon . with the specified config values once + by modifying the conf file with those values, and then replacing the old conf file + once the daemon is restarted. + This task does not kill a running daemon, it assumes the daemon will abort on an + assert specified in the config. + + tasks: + - install: + - ceph: + - restart: + exec: + client.0: + - test_backtraces.py + + """ + assert isinstance(config, dict), "task kill got invalid config" + + testdir = teuthology.get_testdir(ctx) + + try: + assert 'exec' in config, "config requires exec key with : entries" + for role, task in config['exec'].iteritems(): + log.info('restart for role {r}'.format(r=role)) + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + srcdir, restarts = get_tests(ctx, config, role, remote, testdir) + log.info('Running command on role %s host %s', role, remote.name) + spec = '{spec}'.format(spec=task[0]) + log.info('Restarts list: %s', restarts) + log.info('Spec is %s', spec) + to_run = [w for w in restarts if w == task or w.find(spec) != -1] + log.info('To run: %s', to_run) + for c in to_run: + log.info('Running restart script %s...', c) + args = [ + run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), + ] + env = config.get('env') + if env is not None: + for var, val in env.iteritems(): + quoted_val = pipes.quote(val) + env_arg = '{var}={val}'.format(var=var, val=quoted_val) + args.append(run.Raw(env_arg)) + args.extend([ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + '{srcdir}/{c}'.format( + srcdir=srcdir, + c=c, + ), + ]) + proc = remote.run( + args=args, + stdout=tor.PIPE, + stdin=tor.PIPE, + stderr=log, + wait=False, + ) + log.info('waiting for a command from script') + while True: + l = proc.stdout.readline() + if not l or l == '': + break + log.debug('script command: {c}'.format(c=l)) + ll = l.strip() + cmd = ll.split(' ') + if cmd[0] == "done": + break + assert cmd[0] == 'restart', "script sent invalid command request to kill task" + # cmd should be: restart + # or to clear, just: restart + restart_daemon(ctx, config, cmd[1], cmd[2], *cmd[3:]) + proc.stdin.writelines(['restarted\n']) + proc.stdin.flush() + try: + proc.wait() + except tor.CommandFailedError: + raise Exception('restart task got non-zero exit status from script: {s}'.format(s=c)) + finally: + log.info('Finishing %s on %s...', task, role) + remote.run( + logger=log.getChild(role), + args=[ + 'rm', '-rf', '--', '{tdir}/restarts.list'.format(tdir=testdir), srcdir, + ], + ) diff --git a/qa/tasks/rgw.py b/qa/tasks/rgw.py new file mode 100644 index 00000000000..591a8a8cc68 --- /dev/null +++ b/qa/tasks/rgw.py @@ -0,0 +1,846 @@ +""" +rgw routines +""" +import argparse +import contextlib +import json +import logging +import os + +from cStringIO import StringIO + +from teuthology.orchestra import run +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra.run import CommandFailedError +from util.rgw import rgwadmin +from util.rados import (rados, create_ec_pool, + create_replicated_pool, + create_cache_pool) + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def create_apache_dirs(ctx, config): + """ + Remotely create apache directories. Delete when finished. + """ + log.info('Creating apache directories...') + testdir = teuthology.get_testdir(ctx) + for client in config.iterkeys(): + ctx.cluster.only(client).run( + args=[ + 'mkdir', + '-p', + '{tdir}/apache/htdocs.{client}'.format(tdir=testdir, + client=client), + '{tdir}/apache/tmp.{client}/fastcgi_sock'.format( + tdir=testdir, + client=client), + run.Raw('&&'), + 'mkdir', + '{tdir}/archive/apache.{client}'.format(tdir=testdir, + client=client), + ], + ) + try: + yield + finally: + log.info('Cleaning up apache directories...') + for client in config.iterkeys(): + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/apache/tmp.{client}'.format(tdir=testdir, + client=client), + run.Raw('&&'), + 'rmdir', + '{tdir}/apache/htdocs.{client}'.format(tdir=testdir, + client=client), + ], + ) + + for client in config.iterkeys(): + ctx.cluster.only(client).run( + args=[ + 'rmdir', + '{tdir}/apache'.format(tdir=testdir), + ], + check_status=False, # only need to remove once per host + ) + + +@contextlib.contextmanager +def ship_apache_configs(ctx, config, role_endpoints): + """ + Ship apache config and rgw.fgci to all clients. Clean up on termination + """ + assert isinstance(config, dict) + assert isinstance(role_endpoints, dict) + testdir = teuthology.get_testdir(ctx) + log.info('Shipping apache config and rgw.fcgi...') + src = os.path.join(os.path.dirname(__file__), 'apache.conf.template') + for client, conf in config.iteritems(): + (remote,) = ctx.cluster.only(client).remotes.keys() + system_type = teuthology.get_system_type(remote) + if not conf: + conf = {} + idle_timeout = conf.get('idle_timeout', ctx.rgw.default_idle_timeout) + if system_type == 'deb': + mod_path = '/usr/lib/apache2/modules' + print_continue = 'on' + user = 'www-data' + group = 'www-data' + apache24_modconfig = ''' + IncludeOptional /etc/apache2/mods-available/mpm_event.conf + IncludeOptional /etc/apache2/mods-available/mpm_event.load +''' + else: + mod_path = '/usr/lib64/httpd/modules' + print_continue = 'off' + user = 'apache' + group = 'apache' + apache24_modconfig = \ + 'IncludeOptional /etc/httpd/conf.modules.d/00-mpm.conf' + host, port = role_endpoints[client] + with file(src, 'rb') as f: + conf = f.read().format( + testdir=testdir, + mod_path=mod_path, + print_continue=print_continue, + host=host, + port=port, + client=client, + idle_timeout=idle_timeout, + user=user, + group=group, + apache24_modconfig=apache24_modconfig, + ) + teuthology.write_file( + remote=remote, + path='{tdir}/apache/apache.{client}.conf'.format( + tdir=testdir, + client=client), + data=conf, + ) + teuthology.write_file( + remote=remote, + path='{tdir}/apache/htdocs.{client}/rgw.fcgi'.format( + tdir=testdir, + client=client), + data="""#!/bin/sh +ulimit -c unlimited +exec radosgw -f -n {client} -k /etc/ceph/ceph.{client}.keyring --rgw-socket-path {tdir}/apache/tmp.{client}/fastcgi_sock/rgw_sock + +""".format(tdir=testdir, client=client) + ) + remote.run( + args=[ + 'chmod', + 'a=rx', + '{tdir}/apache/htdocs.{client}/rgw.fcgi'.format(tdir=testdir, + client=client), + ], + ) + try: + yield + finally: + log.info('Removing apache config...') + for client in config.iterkeys(): + ctx.cluster.only(client).run( + args=[ + 'rm', + '-f', + '{tdir}/apache/apache.{client}.conf'.format(tdir=testdir, + client=client), + run.Raw('&&'), + 'rm', + '-f', + '{tdir}/apache/htdocs.{client}/rgw.fcgi'.format( + tdir=testdir, + client=client), + ], + ) + + +@contextlib.contextmanager +def start_rgw(ctx, config): + """ + Start rgw on remote sites. + """ + log.info('Starting rgw...') + testdir = teuthology.get_testdir(ctx) + for client in config.iterkeys(): + (remote,) = ctx.cluster.only(client).remotes.iterkeys() + + client_config = config.get(client) + if client_config is None: + client_config = {} + log.info("rgw %s config is %s", client, client_config) + id_ = client.split('.', 1)[1] + log.info('client {client} is id {id}'.format(client=client, id=id_)) + cmd_prefix = [ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'daemon-helper', + 'term', + ] + + rgw_cmd = ['radosgw'] + + if ctx.rgw.frontend == 'apache': + rgw_cmd.extend([ + '--rgw-socket-path', + '{tdir}/apache/tmp.{client}/fastcgi_sock/rgw_sock'.format( + tdir=testdir, + client=client, + ), + ]) + elif ctx.rgw.frontend == 'civetweb': + host, port = ctx.rgw.role_endpoints[client] + rgw_cmd.extend([ + '--rgw-frontends', + 'civetweb port={port}'.format(port=port), + ]) + + rgw_cmd.extend([ + '-n', client, + '-k', '/etc/ceph/ceph.{client}.keyring'.format(client=client), + '--log-file', + '/var/log/ceph/rgw.{client}.log'.format(client=client), + '--rgw_ops_log_socket_path', + '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, + client=client), + '--foreground', + run.Raw('|'), + 'sudo', + 'tee', + '/var/log/ceph/rgw.{client}.stdout'.format(tdir=testdir, + client=client), + run.Raw('2>&1'), + ]) + + if client_config.get('valgrind'): + cmd_prefix = teuthology.get_valgrind_args( + testdir, + client, + cmd_prefix, + client_config.get('valgrind') + ) + + run_cmd = list(cmd_prefix) + run_cmd.extend(rgw_cmd) + + ctx.daemons.add_daemon( + remote, 'rgw', client, + args=run_cmd, + logger=log.getChild(client), + stdin=run.PIPE, + wait=False, + ) + + try: + yield + finally: + teuthology.stop_daemons_of_type(ctx, 'rgw') + for client in config.iterkeys(): + ctx.cluster.only(client).run( + args=[ + 'rm', + '-f', + '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, + client=client), + ], + ) + + +@contextlib.contextmanager +def start_apache(ctx, config): + """ + Start apache on remote sites. + """ + log.info('Starting apache...') + testdir = teuthology.get_testdir(ctx) + apaches = {} + for client in config.iterkeys(): + (remote,) = ctx.cluster.only(client).remotes.keys() + system_type = teuthology.get_system_type(remote) + if system_type == 'deb': + apache_name = 'apache2' + else: + try: + remote.run( + args=[ + 'stat', + '/usr/sbin/httpd.worker', + ], + ) + apache_name = '/usr/sbin/httpd.worker' + except CommandFailedError: + apache_name = '/usr/sbin/httpd' + + proc = remote.run( + args=[ + 'adjust-ulimits', + 'daemon-helper', + 'kill', + apache_name, + '-X', + '-f', + '{tdir}/apache/apache.{client}.conf'.format(tdir=testdir, + client=client), + ], + logger=log.getChild(client), + stdin=run.PIPE, + wait=False, + ) + apaches[client] = proc + + try: + yield + finally: + log.info('Stopping apache...') + for client, proc in apaches.iteritems(): + proc.stdin.close() + + run.wait(apaches.itervalues()) + + +def extract_user_info(client_config): + """ + Extract user info from the client config specified. Returns a dict + that includes system key information. + """ + # test if there isn't a system user or if there isn't a name for that + # user, return None + if ('system user' not in client_config or + 'name' not in client_config['system user']): + return None + + user_info = dict() + user_info['system_key'] = dict( + user=client_config['system user']['name'], + access_key=client_config['system user']['access key'], + secret_key=client_config['system user']['secret key'], + ) + return user_info + + +def extract_zone_info(ctx, client, client_config): + """ + Get zone information. + :param client: dictionary of client information + :param client_config: dictionary of client configuration information + :returns: zone extracted from client and client_config information + """ + ceph_config = ctx.ceph.conf.get('global', {}) + ceph_config.update(ctx.ceph.conf.get('client', {})) + ceph_config.update(ctx.ceph.conf.get(client, {})) + for key in ['rgw zone', 'rgw region', 'rgw zone root pool']: + assert key in ceph_config, \ + 'ceph conf must contain {key} for {client}'.format(key=key, + client=client) + region = ceph_config['rgw region'] + zone = ceph_config['rgw zone'] + zone_info = dict() + for key in ['rgw control pool', 'rgw gc pool', 'rgw log pool', + 'rgw intent log pool', 'rgw usage log pool', + 'rgw user keys pool', 'rgw user email pool', + 'rgw user swift pool', 'rgw user uid pool', + 'rgw domain root']: + new_key = key.split(' ', 1)[1] + new_key = new_key.replace(' ', '_') + + if key in ceph_config: + value = ceph_config[key] + log.debug('{key} specified in ceph_config ({val})'.format( + key=key, val=value)) + zone_info[new_key] = value + else: + zone_info[new_key] = '.' + region + '.' + zone + '.' + new_key + + index_pool = '.' + region + '.' + zone + '.' + 'index_pool' + data_pool = '.' + region + '.' + zone + '.' + 'data_pool' + data_extra_pool = '.' + region + '.' + zone + '.' + 'data_extra_pool' + + zone_info['placement_pools'] = [{'key': 'default_placement', + 'val': {'index_pool': index_pool, + 'data_pool': data_pool, + 'data_extra_pool': data_extra_pool} + }] + + # these keys are meant for the zones argument in the region info. We + # insert them into zone_info with a different format and then remove them + # in the fill_in_endpoints() method + for key in ['rgw log meta', 'rgw log data']: + if key in ceph_config: + zone_info[key] = ceph_config[key] + + # these keys are meant for the zones argument in the region info. We + # insert them into zone_info with a different format and then remove them + # in the fill_in_endpoints() method + for key in ['rgw log meta', 'rgw log data']: + if key in ceph_config: + zone_info[key] = ceph_config[key] + + return region, zone, zone_info + + +def extract_region_info(region, region_info): + """ + Extract region information from the region_info parameter, using get + to set default values. + + :param region: name of the region + :param region_info: region information (in dictionary form). + :returns: dictionary of region information set from region_info, using + default values for missing fields. + """ + assert isinstance(region_info['zones'], list) and region_info['zones'], \ + 'zones must be a non-empty list' + return dict( + name=region, + api_name=region_info.get('api name', region), + is_master=region_info.get('is master', False), + log_meta=region_info.get('log meta', False), + log_data=region_info.get('log data', False), + master_zone=region_info.get('master zone', region_info['zones'][0]), + placement_targets=region_info.get('placement targets', + [{'name': 'default_placement', + 'tags': []}]), + default_placement=region_info.get('default placement', + 'default_placement'), + ) + + +def assign_ports(ctx, config): + """ + Assign port numberst starting with port 7280. + """ + port = 7280 + role_endpoints = {} + for remote, roles_for_host in ctx.cluster.remotes.iteritems(): + for role in roles_for_host: + if role in config: + role_endpoints[role] = (remote.name.split('@')[1], port) + port += 1 + + return role_endpoints + + +def fill_in_endpoints(region_info, role_zones, role_endpoints): + """ + Iterate through the list of role_endpoints, filling in zone information + + :param region_info: region data + :param role_zones: region and zone information. + :param role_endpoints: endpoints being used + """ + for role, (host, port) in role_endpoints.iteritems(): + region, zone, zone_info, _ = role_zones[role] + host, port = role_endpoints[role] + endpoint = 'http://{host}:{port}/'.format(host=host, port=port) + # check if the region specified under client actually exists + # in region_info (it should, if properly configured). + # If not, throw a reasonable error + if region not in region_info: + raise Exception( + 'Region: {region} was specified but no corresponding' + ' entry was found under \'regions\''.format(region=region)) + + region_conf = region_info[region] + region_conf.setdefault('endpoints', []) + region_conf['endpoints'].append(endpoint) + + # this is the payload for the 'zones' field in the region field + zone_payload = dict() + zone_payload['endpoints'] = [endpoint] + zone_payload['name'] = zone + + # Pull the log meta and log data settings out of zone_info, if they + # exist, then pop them as they don't actually belong in the zone info + for key in ['rgw log meta', 'rgw log data']: + new_key = key.split(' ', 1)[1] + new_key = new_key.replace(' ', '_') + + if key in zone_info: + value = zone_info.pop(key) + else: + value = 'false' + + zone_payload[new_key] = value + + region_conf.setdefault('zones', []) + region_conf['zones'].append(zone_payload) + + +@contextlib.contextmanager +def configure_users(ctx, config, everywhere=False): + """ + Create users by remotely running rgwadmin commands using extracted + user information. + """ + log.info('Configuring users...') + + # extract the user info and append it to the payload tuple for the given + # client + for client, c_config in config.iteritems(): + if not c_config: + continue + user_info = extract_user_info(c_config) + if not user_info: + continue + + # For data sync the master zones and regions must have the + # system users of the secondary zones. To keep this simple, + # just create the system users on every client if regions are + # configured. + clients_to_create_as = [client] + if everywhere: + clients_to_create_as = config.keys() + for client_name in clients_to_create_as: + log.debug('Creating user {user} on {client}'.format( + user=user_info['system_key']['user'], client=client)) + rgwadmin(ctx, client_name, + cmd=[ + 'user', 'create', + '--uid', user_info['system_key']['user'], + '--access-key', user_info['system_key']['access_key'], + '--secret', user_info['system_key']['secret_key'], + '--display-name', user_info['system_key']['user'], + '--system', + ], + check_status=True, + ) + + yield + + +@contextlib.contextmanager +def create_nonregion_pools(ctx, config, regions): + """Create replicated or erasure coded data pools for rgw.""" + if regions: + yield + return + + log.info('creating data pools') + for client in config.keys(): + (remote,) = ctx.cluster.only(client).remotes.iterkeys() + data_pool = '.rgw.buckets' + if ctx.rgw.ec_data_pool: + create_ec_pool(remote, data_pool, client, 64, + ctx.rgw.erasure_code_profile) + else: + create_replicated_pool(remote, data_pool, 64) + if ctx.rgw.cache_pools: + create_cache_pool(remote, data_pool, data_pool + '.cache', 64, + 64*1024*1024) + yield + + +@contextlib.contextmanager +def configure_regions_and_zones(ctx, config, regions, role_endpoints): + """ + Configure regions and zones from rados and rgw. + """ + if not regions: + log.debug( + 'In rgw.configure_regions_and_zones() and regions is None. ' + 'Bailing') + yield + return + + log.info('Configuring regions and zones...') + + log.debug('config is %r', config) + log.debug('regions are %r', regions) + log.debug('role_endpoints = %r', role_endpoints) + # extract the zone info + role_zones = dict([(client, extract_zone_info(ctx, client, c_config)) + for client, c_config in config.iteritems()]) + log.debug('roles_zones = %r', role_zones) + + # extract the user info and append it to the payload tuple for the given + # client + for client, c_config in config.iteritems(): + if not c_config: + user_info = None + else: + user_info = extract_user_info(c_config) + + (region, zone, zone_info) = role_zones[client] + role_zones[client] = (region, zone, zone_info, user_info) + + region_info = dict([ + (region_name, extract_region_info(region_name, r_config)) + for region_name, r_config in regions.iteritems()]) + + fill_in_endpoints(region_info, role_zones, role_endpoints) + + # clear out the old defaults + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + # removing these objects from .rgw.root and the per-zone root pools + # may or may not matter + rados(ctx, mon, + cmd=['-p', '.rgw.root', 'rm', 'region_info.default']) + rados(ctx, mon, + cmd=['-p', '.rgw.root', 'rm', 'zone_info.default']) + + for client in config.iterkeys(): + for role, (_, zone, zone_info, user_info) in role_zones.iteritems(): + rados(ctx, mon, + cmd=['-p', zone_info['domain_root'], + 'rm', 'region_info.default']) + rados(ctx, mon, + cmd=['-p', zone_info['domain_root'], + 'rm', 'zone_info.default']) + + (remote,) = ctx.cluster.only(role).remotes.keys() + for pool_info in zone_info['placement_pools']: + remote.run(args=['ceph', 'osd', 'pool', 'create', + pool_info['val']['index_pool'], '64', '64']) + if ctx.rgw.ec_data_pool: + create_ec_pool(remote, pool_info['val']['data_pool'], + zone, 64, ctx.rgw.erasure_code_profile) + else: + create_replicated_pool( + remote, pool_info['val']['data_pool'], + 64) + + rgwadmin(ctx, client, + cmd=['-n', client, 'zone', 'set', '--rgw-zone', zone], + stdin=StringIO(json.dumps(dict( + zone_info.items() + user_info.items()))), + check_status=True) + + for region, info in region_info.iteritems(): + region_json = json.dumps(info) + log.debug('region info is: %s', region_json) + rgwadmin(ctx, client, + cmd=['-n', client, 'region', 'set'], + stdin=StringIO(region_json), + check_status=True) + if info['is_master']: + rgwadmin(ctx, client, + cmd=['-n', client, + 'region', 'default', + '--rgw-region', region], + check_status=True) + + rgwadmin(ctx, client, cmd=['-n', client, 'regionmap', 'update']) + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Either use configure apache to run a rados gateway, or use the built-in + civetweb server. + Only one should be run per machine, since it uses a hard-coded port for + now. + + For example, to run rgw on all clients:: + + tasks: + - ceph: + - rgw: + + To only run on certain clients:: + + tasks: + - ceph: + - rgw: [client.0, client.3] + + or + + tasks: + - ceph: + - rgw: + client.0: + client.3: + + You can adjust the idle timeout for fastcgi (default is 30 seconds): + + tasks: + - ceph: + - rgw: + client.0: + idle_timeout: 90 + + To run radosgw through valgrind: + + tasks: + - ceph: + - rgw: + client.0: + valgrind: [--tool=memcheck] + client.3: + valgrind: [--tool=memcheck] + + To use civetweb instead of apache: + + tasks: + - ceph: + - rgw: + - client.0 + overrides: + rgw: + frontend: civetweb + + Note that without a modified fastcgi module e.g. with the default + one on CentOS, you must have rgw print continue = false in ceph.conf:: + + tasks: + - ceph: + conf: + global: + rgw print continue: false + - rgw: [client.0] + + To run rgws for multiple regions or zones, describe the regions + and their zones in a regions section. The endpoints will be + generated by this task. Each client must have a region, zone, + and pools assigned in ceph.conf:: + + tasks: + - install: + - ceph: + conf: + client.0: + rgw region: foo + rgw zone: foo-1 + rgw region root pool: .rgw.rroot.foo + rgw zone root pool: .rgw.zroot.foo + rgw log meta: true + rgw log data: true + client.1: + rgw region: bar + rgw zone: bar-master + rgw region root pool: .rgw.rroot.bar + rgw zone root pool: .rgw.zroot.bar + rgw log meta: true + rgw log data: true + client.2: + rgw region: bar + rgw zone: bar-secondary + rgw region root pool: .rgw.rroot.bar + rgw zone root pool: .rgw.zroot.bar-secondary + - rgw: + default_idle_timeout: 30 + ec-data-pool: true + erasure_code_profile: + k: 2 + m: 1 + ruleset-failure-domain: osd + regions: + foo: + api name: api_name # default: region name + is master: true # default: false + master zone: foo-1 # default: first zone + zones: [foo-1] + log meta: true + log data: true + placement targets: [target1, target2] # default: [] + default placement: target2 # default: '' + bar: + api name: bar-api + zones: [bar-master, bar-secondary] + client.0: + system user: + name: foo-system + access key: X2IYPSTY1072DDY1SJMC + secret key: YIMHICpPvT+MhLTbSsiBJ1jQF15IFvJA8tgwJEcm + client.1: + system user: + name: bar1 + access key: Y2IYPSTY1072DDY1SJMC + secret key: XIMHICpPvT+MhLTbSsiBJ1jQF15IFvJA8tgwJEcm + client.2: + system user: + name: bar2 + access key: Z2IYPSTY1072DDY1SJMC + secret key: ZIMHICpPvT+MhLTbSsiBJ1jQF15IFvJA8tgwJEcm + """ + if config is None: + config = dict(('client.{id}'.format(id=id_), None) + for id_ in teuthology.all_roles_of_type( + ctx.cluster, 'client')) + elif isinstance(config, list): + config = dict((name, None) for name in config) + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('rgw', {})) + + regions = {} + if 'regions' in config: + # separate region info so only clients are keys in config + regions = config['regions'] + del config['regions'] + + role_endpoints = assign_ports(ctx, config) + ctx.rgw = argparse.Namespace() + ctx.rgw.role_endpoints = role_endpoints + # stash the region info for later, since it was deleted from the config + # structure + ctx.rgw.regions = regions + + ctx.rgw.ec_data_pool = False + if 'ec-data-pool' in config: + ctx.rgw.ec_data_pool = bool(config['ec-data-pool']) + del config['ec-data-pool'] + ctx.rgw.erasure_code_profile = {} + if 'erasure_code_profile' in config: + ctx.rgw.erasure_code_profile = config['erasure_code_profile'] + del config['erasure_code_profile'] + ctx.rgw.default_idle_timeout = 30 + if 'default_idle_timeout' in config: + ctx.rgw.default_idle_timeout = int(config['default_idle_timeout']) + del config['default_idle_timeout'] + ctx.rgw.cache_pools = False + if 'cache-pools' in config: + ctx.rgw.cache_pools = bool(config['cache-pools']) + del config['cache-pools'] + + ctx.rgw.frontend = 'apache' + if 'frontend' in config: + ctx.rgw.frontend = config['frontend'] + del config['frontend'] + + subtasks = [ + lambda: configure_regions_and_zones( + ctx=ctx, + config=config, + regions=regions, + role_endpoints=role_endpoints, + ), + lambda: configure_users( + ctx=ctx, + config=config, + everywhere=bool(regions), + ), + lambda: create_nonregion_pools( + ctx=ctx, config=config, regions=regions), + ] + if ctx.rgw.frontend == 'apache': + subtasks.insert(0, lambda: create_apache_dirs(ctx=ctx, config=config)) + subtasks.extend([ + lambda: ship_apache_configs(ctx=ctx, config=config, + role_endpoints=role_endpoints), + lambda: start_rgw(ctx=ctx, config=config), + lambda: start_apache(ctx=ctx, config=config), + ]) + elif ctx.rgw.frontend == 'civetweb': + subtasks.extend([ + lambda: start_rgw(ctx=ctx, config=config), + ]) + else: + raise ValueError("frontend must be 'apache' or 'civetweb'") + + log.info("Using %s as radosgw frontend", ctx.rgw.frontend) + with contextutil.nested(*subtasks): + yield diff --git a/qa/tasks/rgw_logsocket.py b/qa/tasks/rgw_logsocket.py new file mode 100644 index 00000000000..6f49b00d8a4 --- /dev/null +++ b/qa/tasks/rgw_logsocket.py @@ -0,0 +1,161 @@ +""" +rgw s3tests logging wrappers +""" +from cStringIO import StringIO +from configobj import ConfigObj +import contextlib +import logging +import s3tests + +from teuthology import misc as teuthology +from teuthology import contextutil + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def download(ctx, config): + """ + Run s3tests download function + """ + return s3tests.download(ctx, config) + +def _config_user(s3tests_conf, section, user): + """ + Run s3tests user config function + """ + return s3tests._config_user(s3tests_conf, section, user) + +@contextlib.contextmanager +def create_users(ctx, config): + """ + Run s3tests user create function + """ + return s3tests.create_users(ctx, config) + +@contextlib.contextmanager +def configure(ctx, config): + """ + Run s3tests user configure function + """ + return s3tests.configure(ctx, config) + +@contextlib.contextmanager +def run_tests(ctx, config): + """ + Run remote netcat tests + """ + assert isinstance(config, dict) + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.iteritems(): + client_config['extra_args'] = [ + 's3tests.functional.test_s3:test_bucket_list_return_data', + ] +# args = [ +# 'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), +# '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir), +# '-w', +# '{tdir}/s3-tests'.format(tdir=testdir), +# '-v', +# 's3tests.functional.test_s3:test_bucket_list_return_data', +# ] +# if client_config is not None and 'extra_args' in client_config: +# args.extend(client_config['extra_args']) +# +# ctx.cluster.only(client).run( +# args=args, +# ) + + s3tests.run_tests(ctx, config) + + netcat_out = StringIO() + + for client, client_config in config.iteritems(): + ctx.cluster.only(client).run( + args = [ + 'netcat', + '-w', '5', + '-U', '{tdir}/rgw.opslog.sock'.format(tdir=testdir), + ], + stdout = netcat_out, + ) + + out = netcat_out.getvalue() + + assert len(out) > 100 + + log.info('Received', out) + + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run some s3-tests suite against rgw, verify opslog socket returns data + + Must restrict testing to a particular client:: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: [client.0] + + To pass extra arguments to nose (e.g. to run a certain test):: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: + client.0: + extra_args: ['test_s3:test_object_acl_grand_public_read'] + client.1: + extra_args: ['--exclude', 'test_100_continue'] + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task s3tests only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for (client, cconf) in config.iteritems(): + teuthology.deep_merge(cconf, overrides.get('rgw-logsocket', {})) + + log.debug('config is %s', config) + + s3tests_conf = {} + for client in clients: + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : 7280, + 'is_secure' : 'no', + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + } + ) + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: create_users(ctx=ctx, config=dict( + clients=clients, + s3tests_conf=s3tests_conf, + )), + lambda: configure(ctx=ctx, config=dict( + clients=config, + s3tests_conf=s3tests_conf, + )), + lambda: run_tests(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/s3readwrite.py b/qa/tasks/s3readwrite.py new file mode 100644 index 00000000000..9f1507ef816 --- /dev/null +++ b/qa/tasks/s3readwrite.py @@ -0,0 +1,346 @@ +""" +Run rgw s3 readwite tests +""" +from cStringIO import StringIO +import base64 +import contextlib +import logging +import os +import random +import string +import yaml + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.orchestra import run +from teuthology.orchestra.connection import split_user + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the s3 tests from the git builder. + Remove downloaded s3 file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading s3-tests...') + testdir = teuthology.get_testdir(ctx) + for (client, cconf) in config.items(): + branch = cconf.get('force-branch', None) + if not branch: + branch = cconf.get('branch', 'master') + sha1 = cconf.get('sha1') + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', branch, + teuth_config.ceph_git_base_url + 's3-tests.git', + '{tdir}/s3-tests'.format(tdir=testdir), + ], + ) + if sha1 is not None: + ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/s3-tests'.format(tdir=testdir), + run.Raw('&&'), + 'git', 'reset', '--hard', sha1, + ], + ) + try: + yield + finally: + log.info('Removing s3-tests...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/s3-tests'.format(tdir=testdir), + ], + ) + + +def _config_user(s3tests_conf, section, user): + """ + Configure users for this section by stashing away keys, ids, and + email addresses. + """ + s3tests_conf[section].setdefault('user_id', user) + s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user)) + s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user)) + s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.uppercase) for i in xrange(20))) + s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40))) + +@contextlib.contextmanager +def create_users(ctx, config): + """ + Create a default s3 user. + """ + assert isinstance(config, dict) + log.info('Creating rgw users...') + testdir = teuthology.get_testdir(ctx) + users = {'s3': 'foo'} + cached_client_user_names = dict() + for client in config['clients']: + cached_client_user_names[client] = dict() + s3tests_conf = config['s3tests_conf'][client] + s3tests_conf.setdefault('readwrite', {}) + s3tests_conf['readwrite'].setdefault('bucket', 'rwtest-' + client + '-{random}-') + s3tests_conf['readwrite'].setdefault('readers', 10) + s3tests_conf['readwrite'].setdefault('writers', 3) + s3tests_conf['readwrite'].setdefault('duration', 300) + s3tests_conf['readwrite'].setdefault('files', {}) + rwconf = s3tests_conf['readwrite'] + rwconf['files'].setdefault('num', 10) + rwconf['files'].setdefault('size', 2000) + rwconf['files'].setdefault('stddev', 500) + for section, user in users.iteritems(): + _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) + log.debug('creating user {user} on {client}'.format(user=s3tests_conf[section]['user_id'], + client=client)) + + # stash the 'delete_user' flag along with user name for easier cleanup + delete_this_user = True + if 'delete_user' in s3tests_conf['s3']: + delete_this_user = s3tests_conf['s3']['delete_user'] + log.debug('delete_user set to {flag} for {client}'.format(flag=delete_this_user, client=client)) + cached_client_user_names[client][section+user] = (s3tests_conf[section]['user_id'], delete_this_user) + + # skip actual user creation if the create_user flag is set to false for this client + if 'create_user' in s3tests_conf['s3'] and s3tests_conf['s3']['create_user'] == False: + log.debug('create_user set to False, skipping user creation for {client}'.format(client=client)) + continue + else: + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + 'user', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--display-name', s3tests_conf[section]['display_name'], + '--access-key', s3tests_conf[section]['access_key'], + '--secret', s3tests_conf[section]['secret_key'], + '--email', s3tests_conf[section]['email'], + ], + ) + try: + yield + finally: + for client in config['clients']: + for section, user in users.iteritems(): + #uid = '{user}.{client}'.format(user=user, client=client) + real_uid, delete_this_user = cached_client_user_names[client][section+user] + if delete_this_user: + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + 'user', 'rm', + '--uid', real_uid, + '--purge-data', + ], + ) + else: + log.debug('skipping delete for user {uid} on {client}'.format(uid=real_uid, client=client)) + +@contextlib.contextmanager +def configure(ctx, config): + """ + Configure the s3-tests. This includes the running of the + bootstrap code and the updating of local conf files. + """ + assert isinstance(config, dict) + log.info('Configuring s3-readwrite-tests...') + for client, properties in config['clients'].iteritems(): + s3tests_conf = config['s3tests_conf'][client] + if properties is not None and 'rgw_server' in properties: + host = None + for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']): + log.info('roles: ' + str(roles)) + log.info('target: ' + str(target)) + if properties['rgw_server'] in roles: + _, host = split_user(target) + assert host is not None, "Invalid client specified as the rgw_server" + s3tests_conf['s3']['host'] = host + else: + s3tests_conf['s3']['host'] = 'localhost' + + def_conf = s3tests_conf['DEFAULT'] + s3tests_conf['s3'].setdefault('port', def_conf['port']) + s3tests_conf['s3'].setdefault('is_secure', def_conf['is_secure']) + + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'cd', + '{tdir}/s3-tests'.format(tdir=teuthology.get_testdir(ctx)), + run.Raw('&&'), + './bootstrap', + ], + ) + conf_fp = StringIO() + conf = dict( + s3=s3tests_conf['s3'], + readwrite=s3tests_conf['readwrite'], + ) + yaml.safe_dump(conf, conf_fp, default_flow_style=False) + teuthology.write_file( + remote=remote, + path='{tdir}/archive/s3readwrite.{client}.config.yaml'.format(tdir=teuthology.get_testdir(ctx), client=client), + data=conf_fp.getvalue(), + ) + yield + + +@contextlib.contextmanager +def run_tests(ctx, config): + """ + Run the s3readwrite tests after everything is set up. + + :param ctx: Context passed to task + :param config: specific configuration information + """ + assert isinstance(config, dict) + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.iteritems(): + (remote,) = ctx.cluster.only(client).remotes.keys() + conf = teuthology.get_file(remote, '{tdir}/archive/s3readwrite.{client}.config.yaml'.format(tdir=testdir, client=client)) + args = [ + '{tdir}/s3-tests/virtualenv/bin/s3tests-test-readwrite'.format(tdir=testdir), + ] + if client_config is not None and 'extra_args' in client_config: + args.extend(client_config['extra_args']) + + ctx.cluster.only(client).run( + args=args, + stdin=conf, + ) + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run the s3tests-test-readwrite suite against rgw. + + To run all tests on all clients:: + + tasks: + - ceph: + - rgw: + - s3readwrite: + + To restrict testing to particular clients:: + + tasks: + - ceph: + - rgw: [client.0] + - s3readwrite: [client.0] + + To run against a server on client.1:: + + tasks: + - ceph: + - rgw: [client.1] + - s3readwrite: + client.0: + rgw_server: client.1 + + To pass extra test arguments + + tasks: + - ceph: + - rgw: [client.0] + - s3readwrite: + client.0: + readwrite: + bucket: mybucket + readers: 10 + writers: 3 + duration: 600 + files: + num: 10 + size: 2000 + stddev: 500 + client.1: + ... + + To override s3 configuration + + tasks: + - ceph: + - rgw: [client.0] + - s3readwrite: + client.0: + s3: + user_id: myuserid + display_name: myname + email: my@email + access_key: myaccesskey + secret_key: mysecretkey + + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task s3tests only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.iterkeys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('s3readwrite', {})) + + log.debug('in s3readwrite, config is %s', config) + + s3tests_conf = {} + for client in clients: + if config[client] is None: + config[client] = {} + config[client].setdefault('s3', {}) + config[client].setdefault('readwrite', {}) + + s3tests_conf[client] = ({ + 'DEFAULT': + { + 'port' : 7280, + 'is_secure' : False, + }, + 'readwrite' : config[client]['readwrite'], + 's3' : config[client]['s3'], + }) + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: create_users(ctx=ctx, config=dict( + clients=clients, + s3tests_conf=s3tests_conf, + )), + lambda: configure(ctx=ctx, config=dict( + clients=config, + s3tests_conf=s3tests_conf, + )), + lambda: run_tests(ctx=ctx, config=config), + ): + pass + yield diff --git a/qa/tasks/s3roundtrip.py b/qa/tasks/s3roundtrip.py new file mode 100644 index 00000000000..4c17144dbae --- /dev/null +++ b/qa/tasks/s3roundtrip.py @@ -0,0 +1,302 @@ +""" +Run rgw roundtrip message tests +""" +from cStringIO import StringIO +import base64 +import contextlib +import logging +import os +import random +import string +import yaml + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.orchestra import run +from teuthology.orchestra.connection import split_user + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the s3 tests from the git builder. + Remove downloaded s3 file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, list) + log.info('Downloading s3-tests...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + teuth_config.ceph_git_base_url + 's3-tests.git', + '{tdir}/s3-tests'.format(tdir=testdir), + ], + ) + try: + yield + finally: + log.info('Removing s3-tests...') + for client in config: + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/s3-tests'.format(tdir=testdir), + ], + ) + +def _config_user(s3tests_conf, section, user): + """ + Configure users for this section by stashing away keys, ids, and + email addresses. + """ + s3tests_conf[section].setdefault('user_id', user) + s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user)) + s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user)) + s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.uppercase) for i in xrange(20))) + s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40))) + +@contextlib.contextmanager +def create_users(ctx, config): + """ + Create a default s3 user. + """ + assert isinstance(config, dict) + log.info('Creating rgw users...') + testdir = teuthology.get_testdir(ctx) + users = {'s3': 'foo'} + for client in config['clients']: + s3tests_conf = config['s3tests_conf'][client] + s3tests_conf.setdefault('roundtrip', {}) + s3tests_conf['roundtrip'].setdefault('bucket', 'rttest-' + client + '-{random}-') + s3tests_conf['roundtrip'].setdefault('readers', 10) + s3tests_conf['roundtrip'].setdefault('writers', 3) + s3tests_conf['roundtrip'].setdefault('duration', 300) + s3tests_conf['roundtrip'].setdefault('files', {}) + rtconf = s3tests_conf['roundtrip'] + rtconf['files'].setdefault('num', 10) + rtconf['files'].setdefault('size', 2000) + rtconf['files'].setdefault('stddev', 500) + for section, user in [('s3', 'foo')]: + _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + 'user', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--display-name', s3tests_conf[section]['display_name'], + '--access-key', s3tests_conf[section]['access_key'], + '--secret', s3tests_conf[section]['secret_key'], + '--email', s3tests_conf[section]['email'], + ], + ) + try: + yield + finally: + for client in config['clients']: + for user in users.itervalues(): + uid = '{user}.{client}'.format(user=user, client=client) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + 'user', 'rm', + '--uid', uid, + '--purge-data', + ], + ) + +@contextlib.contextmanager +def configure(ctx, config): + """ + Configure the s3-tests. This includes the running of the + bootstrap code and the updating of local conf files. + """ + assert isinstance(config, dict) + log.info('Configuring s3-roundtrip-tests...') + testdir = teuthology.get_testdir(ctx) + for client, properties in config['clients'].iteritems(): + s3tests_conf = config['s3tests_conf'][client] + if properties is not None and 'rgw_server' in properties: + host = None + for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']): + log.info('roles: ' + str(roles)) + log.info('target: ' + str(target)) + if properties['rgw_server'] in roles: + _, host = split_user(target) + assert host is not None, "Invalid client specified as the rgw_server" + s3tests_conf['s3']['host'] = host + else: + s3tests_conf['s3']['host'] = 'localhost' + + def_conf = s3tests_conf['DEFAULT'] + s3tests_conf['s3'].setdefault('port', def_conf['port']) + s3tests_conf['s3'].setdefault('is_secure', def_conf['is_secure']) + + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'cd', + '{tdir}/s3-tests'.format(tdir=testdir), + run.Raw('&&'), + './bootstrap', + ], + ) + conf_fp = StringIO() + conf = dict( + s3=s3tests_conf['s3'], + roundtrip=s3tests_conf['roundtrip'], + ) + yaml.safe_dump(conf, conf_fp, default_flow_style=False) + teuthology.write_file( + remote=remote, + path='{tdir}/archive/s3roundtrip.{client}.config.yaml'.format(tdir=testdir, client=client), + data=conf_fp.getvalue(), + ) + yield + + +@contextlib.contextmanager +def run_tests(ctx, config): + """ + Run the s3 roundtrip after everything is set up. + + :param ctx: Context passed to task + :param config: specific configuration information + """ + assert isinstance(config, dict) + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.iteritems(): + (remote,) = ctx.cluster.only(client).remotes.keys() + conf = teuthology.get_file(remote, '{tdir}/archive/s3roundtrip.{client}.config.yaml'.format(tdir=testdir, client=client)) + args = [ + '{tdir}/s3-tests/virtualenv/bin/s3tests-test-roundtrip'.format(tdir=testdir), + ] + if client_config is not None and 'extra_args' in client_config: + args.extend(client_config['extra_args']) + + ctx.cluster.only(client).run( + args=args, + stdin=conf, + ) + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run the s3tests-test-roundtrip suite against rgw. + + To run all tests on all clients:: + + tasks: + - ceph: + - rgw: + - s3roundtrip: + + To restrict testing to particular clients:: + + tasks: + - ceph: + - rgw: [client.0] + - s3roundtrip: [client.0] + + To run against a server on client.1:: + + tasks: + - ceph: + - rgw: [client.1] + - s3roundtrip: + client.0: + rgw_server: client.1 + + To pass extra test arguments + + tasks: + - ceph: + - rgw: [client.0] + - s3roundtrip: + client.0: + roundtrip: + bucket: mybucket + readers: 10 + writers: 3 + duration: 600 + files: + num: 10 + size: 2000 + stddev: 500 + client.1: + ... + + To override s3 configuration + + tasks: + - ceph: + - rgw: [client.0] + - s3roundtrip: + client.0: + s3: + user_id: myuserid + display_name: myname + email: my@email + access_key: myaccesskey + secret_key: mysecretkey + + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task s3tests only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + s3tests_conf = {} + for client in clients: + if config[client] is None: + config[client] = {} + config[client].setdefault('s3', {}) + config[client].setdefault('roundtrip', {}) + + s3tests_conf[client] = ({ + 'DEFAULT': + { + 'port' : 7280, + 'is_secure' : False, + }, + 'roundtrip' : config[client]['roundtrip'], + 's3' : config[client]['s3'], + }) + + with contextutil.nested( + lambda: download(ctx=ctx, config=clients), + lambda: create_users(ctx=ctx, config=dict( + clients=clients, + s3tests_conf=s3tests_conf, + )), + lambda: configure(ctx=ctx, config=dict( + clients=config, + s3tests_conf=s3tests_conf, + )), + lambda: run_tests(ctx=ctx, config=config), + ): + pass + yield diff --git a/qa/tasks/s3tests.py b/qa/tasks/s3tests.py new file mode 100644 index 00000000000..d0f6431dd5f --- /dev/null +++ b/qa/tasks/s3tests.py @@ -0,0 +1,442 @@ +""" +Run a set of s3 tests on rgw. +""" +from cStringIO import StringIO +from configobj import ConfigObj +import base64 +import contextlib +import logging +import os +import random +import string + +import util.rgw as rgw_utils + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.orchestra import run +from teuthology.orchestra.connection import split_user + +log = logging.getLogger(__name__) + +def extract_sync_client_data(ctx, client_name): + """ + Extract synchronized client rgw zone and rgw region information. + + :param ctx: Context passed to the s3tests task + :param name: Name of client that we are synching with + """ + return_region_name = None + return_dict = None + client = ctx.ceph.conf.get(client_name, None) + if client: + current_client_zone = client.get('rgw zone', None) + if current_client_zone: + (endpoint_host, endpoint_port) = ctx.rgw.role_endpoints.get(client_name, (None, None)) + # pull out the radosgw_agent stuff + regions = ctx.rgw.regions + for region in regions: + log.debug('jbuck, region is {region}'.format(region=region)) + region_data = ctx.rgw.regions[region] + log.debug('region data is {region}'.format(region=region_data)) + zones = region_data['zones'] + for zone in zones: + if current_client_zone in zone: + return_region_name = region + return_dict = dict() + return_dict['api_name'] = region_data['api name'] + return_dict['is_master'] = region_data['is master'] + return_dict['port'] = endpoint_port + return_dict['host'] = endpoint_host + + # The s3tests expect the sync_agent_[addr|port} to be + # set on the non-master node for some reason + if not region_data['is master']: + (rgwagent_host, rgwagent_port) = ctx.radosgw_agent.endpoint + (return_dict['sync_agent_addr'], _) = ctx.rgw.role_endpoints[rgwagent_host] + return_dict['sync_agent_port'] = rgwagent_port + + else: #if client_zone: + log.debug('No zone info for {host}'.format(host=client_name)) + else: # if client + log.debug('No ceph conf for {host}'.format(host=client_name)) + + return return_region_name, return_dict + +def update_conf_with_region_info(ctx, config, s3tests_conf): + """ + Scan for a client (passed in s3tests_conf) that is an s3agent + with which we can sync. Update information in local conf file + if such a client is found. + """ + for key in s3tests_conf.keys(): + # we'll assume that there's only one sync relationship (source / destination) with client.X + # as the key for now + + # Iterate through all of the radosgw_agent (rgwa) configs and see if a + # given client is involved in a relationship. + # If a given client isn't, skip it + this_client_in_rgwa_config = False + for rgwa in ctx.radosgw_agent.config.keys(): + rgwa_data = ctx.radosgw_agent.config[rgwa] + + if key in rgwa_data['src'] or key in rgwa_data['dest']: + this_client_in_rgwa_config = True + log.debug('{client} is in an radosgw-agent sync relationship'.format(client=key)) + radosgw_sync_data = ctx.radosgw_agent.config[key] + break + if not this_client_in_rgwa_config: + log.debug('{client} is NOT in an radosgw-agent sync relationship'.format(client=key)) + continue + + source_client = radosgw_sync_data['src'] + dest_client = radosgw_sync_data['dest'] + + # #xtract the pertinent info for the source side + source_region_name, source_region_dict = extract_sync_client_data(ctx, source_client) + log.debug('\t{key} source_region {source_region} source_dict {source_dict}'.format + (key=key,source_region=source_region_name,source_dict=source_region_dict)) + + # The source *should* be the master region, but test anyway and then set it as the default region + if source_region_dict['is_master']: + log.debug('Setting {region} as default_region'.format(region=source_region_name)) + s3tests_conf[key]['fixtures'].setdefault('default_region', source_region_name) + + # Extract the pertinent info for the destination side + dest_region_name, dest_region_dict = extract_sync_client_data(ctx, dest_client) + log.debug('\t{key} dest_region {dest_region} dest_dict {dest_dict}'.format + (key=key,dest_region=dest_region_name,dest_dict=dest_region_dict)) + + # now add these regions to the s3tests_conf object + s3tests_conf[key]['region {region_name}'.format(region_name=source_region_name)] = source_region_dict + s3tests_conf[key]['region {region_name}'.format(region_name=dest_region_name)] = dest_region_dict + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the s3 tests from the git builder. + Remove downloaded s3 file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading s3-tests...') + testdir = teuthology.get_testdir(ctx) + for (client, cconf) in config.items(): + branch = cconf.get('force-branch', None) + if not branch: + ceph_branch = ctx.config.get('branch') + suite_branch = ctx.config.get('suite_branch', ceph_branch) + branch = cconf.get('branch', suite_branch) + if not branch: + raise ValueError( + "Could not determine what branch to use for s3tests!") + else: + log.info("Using branch '%s' for s3tests", branch) + sha1 = cconf.get('sha1') + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', branch, + teuth_config.ceph_git_base_url + 's3-tests.git', + '{tdir}/s3-tests'.format(tdir=testdir), + ], + ) + if sha1 is not None: + ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/s3-tests'.format(tdir=testdir), + run.Raw('&&'), + 'git', 'reset', '--hard', sha1, + ], + ) + try: + yield + finally: + log.info('Removing s3-tests...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/s3-tests'.format(tdir=testdir), + ], + ) + + +def _config_user(s3tests_conf, section, user): + """ + Configure users for this section by stashing away keys, ids, and + email addresses. + """ + s3tests_conf[section].setdefault('user_id', user) + s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user)) + s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user)) + s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.uppercase) for i in xrange(20))) + s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40))) + + +@contextlib.contextmanager +def create_users(ctx, config): + """ + Create a main and an alternate s3 user. + """ + assert isinstance(config, dict) + log.info('Creating rgw users...') + testdir = teuthology.get_testdir(ctx) + users = {'s3 main': 'foo', 's3 alt': 'bar'} + for client in config['clients']: + s3tests_conf = config['s3tests_conf'][client] + s3tests_conf.setdefault('fixtures', {}) + s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-') + for section, user in users.iteritems(): + _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) + log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client)) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + 'user', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--display-name', s3tests_conf[section]['display_name'], + '--access-key', s3tests_conf[section]['access_key'], + '--secret', s3tests_conf[section]['secret_key'], + '--email', s3tests_conf[section]['email'], + ], + ) + try: + yield + finally: + for client in config['clients']: + for user in users.itervalues(): + uid = '{user}.{client}'.format(user=user, client=client) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + 'user', 'rm', + '--uid', uid, + '--purge-data', + ], + ) + + +@contextlib.contextmanager +def configure(ctx, config): + """ + Configure the s3-tests. This includes the running of the + bootstrap code and the updating of local conf files. + """ + assert isinstance(config, dict) + log.info('Configuring s3-tests...') + testdir = teuthology.get_testdir(ctx) + for client, properties in config['clients'].iteritems(): + s3tests_conf = config['s3tests_conf'][client] + if properties is not None and 'rgw_server' in properties: + host = None + for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']): + log.info('roles: ' + str(roles)) + log.info('target: ' + str(target)) + if properties['rgw_server'] in roles: + _, host = split_user(target) + assert host is not None, "Invalid client specified as the rgw_server" + s3tests_conf['DEFAULT']['host'] = host + else: + s3tests_conf['DEFAULT']['host'] = 'localhost' + + if properties is not None and 'slow_backend' in properties: + s3tests_conf['fixtures']['slow backend'] = properties['slow_backend'] + + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'cd', + '{tdir}/s3-tests'.format(tdir=testdir), + run.Raw('&&'), + './bootstrap', + ], + ) + conf_fp = StringIO() + s3tests_conf.write(conf_fp) + teuthology.write_file( + remote=remote, + path='{tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), + data=conf_fp.getvalue(), + ) + + log.info('Configuring boto...') + boto_src = os.path.join(os.path.dirname(__file__), 'boto.cfg.template') + for client, properties in config['clients'].iteritems(): + with file(boto_src, 'rb') as f: + (remote,) = ctx.cluster.only(client).remotes.keys() + conf = f.read().format( + idle_timeout=config.get('idle_timeout', 30) + ) + teuthology.write_file( + remote=remote, + path='{tdir}/boto.cfg'.format(tdir=testdir), + data=conf, + ) + + try: + yield + + finally: + log.info('Cleaning up boto...') + for client, properties in config['clients'].iteritems(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'rm', + '{tdir}/boto.cfg'.format(tdir=testdir), + ], + ) + +@contextlib.contextmanager +def sync_users(ctx, config): + """ + Sync this user. + """ + assert isinstance(config, dict) + # do a full sync if this is a multi-region test + if rgw_utils.multi_region_enabled(ctx): + log.debug('Doing a full sync') + rgw_utils.radosgw_agent_sync_all(ctx) + else: + log.debug('Not a multi-region config; skipping the metadata sync') + + yield + +@contextlib.contextmanager +def run_tests(ctx, config): + """ + Run the s3tests after everything is set up. + + :param ctx: Context passed to task + :param config: specific configuration information + """ + assert isinstance(config, dict) + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.iteritems(): + args = [ + 'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), + 'BOTO_CONFIG={tdir}/boto.cfg'.format(tdir=testdir), + '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir), + '-w', + '{tdir}/s3-tests'.format(tdir=testdir), + '-v', + '-a', '!fails_on_rgw', + ] + if client_config is not None and 'extra_args' in client_config: + args.extend(client_config['extra_args']) + + ctx.cluster.only(client).run( + args=args, + label="s3 tests against rgw" + ) + yield + +@contextlib.contextmanager +def task(ctx, config): + """ + Run the s3-tests suite against rgw. + + To run all tests on all clients:: + + tasks: + - ceph: + - rgw: + - s3tests: + + To restrict testing to particular clients:: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: [client.0] + + To run against a server on client.1 and increase the boto timeout to 10m:: + + tasks: + - ceph: + - rgw: [client.1] + - s3tests: + client.0: + rgw_server: client.1 + idle_timeout: 600 + + To pass extra arguments to nose (e.g. to run a certain test):: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: + client.0: + extra_args: ['test_s3:test_object_acl_grand_public_read'] + client.1: + extra_args: ['--exclude', 'test_100_continue'] + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task s3tests only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.iterkeys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('s3tests', {})) + + log.debug('s3tests config is %s', config) + + s3tests_conf = {} + for client in clients: + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : 7280, + 'is_secure' : 'no', + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + } + ) + + # Only attempt to add in the region info if there's a radosgw_agent configured + if hasattr(ctx, 'radosgw_agent'): + update_conf_with_region_info(ctx, config, s3tests_conf) + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: create_users(ctx=ctx, config=dict( + clients=clients, + s3tests_conf=s3tests_conf, + )), + lambda: sync_users(ctx=ctx, config=config), + lambda: configure(ctx=ctx, config=dict( + clients=config, + s3tests_conf=s3tests_conf, + )), + lambda: run_tests(ctx=ctx, config=config), + ): + pass + yield diff --git a/qa/tasks/samba.py b/qa/tasks/samba.py new file mode 100644 index 00000000000..38ebe026b4a --- /dev/null +++ b/qa/tasks/samba.py @@ -0,0 +1,245 @@ +""" +Samba +""" +import contextlib +import logging +import sys +import time + +from teuthology import misc as teuthology +from teuthology.orchestra import run +from teuthology.orchestra.daemon import DaemonGroup + +log = logging.getLogger(__name__) + + +def get_sambas(ctx, roles): + """ + Scan for roles that are samba. Yield the id of the the samba role + (samba.0, samba.1...) and the associated remote site + + :param ctx: Context + :param roles: roles for this test (extracted from yaml files) + """ + for role in roles: + assert isinstance(role, basestring) + PREFIX = 'samba.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + yield (id_, remote) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Setup samba smbd with ceph vfs module. This task assumes the samba + package has already been installed via the install task. + + The config is optional and defaults to starting samba on all nodes. + If a config is given, it is expected to be a list of + samba nodes to start smbd servers on. + + Example that starts smbd on all samba nodes:: + + tasks: + - install: + - install: + project: samba + extra_packages: ['samba'] + - ceph: + - samba: + - interactive: + + Example that starts smbd on just one of the samba nodes and cifs on the other:: + + tasks: + - samba: [samba.0] + - cifs: [samba.1] + + An optional backend can be specified, and requires a path which smbd will + use as the backend storage location: + + roles: + - [osd.0, osd.1, osd.2, mon.0, mon.1, mon.2, mds.a] + - [client.0, samba.0] + + tasks: + - ceph: + - ceph-fuse: [client.0] + - samba: + samba.0: + cephfuse: "{testdir}/mnt.0" + + This mounts ceph to {testdir}/mnt.0 using fuse, and starts smbd with + a UNC of //localhost/cephfuse. Access through that UNC will be on + the ceph fuse mount point. + + If no arguments are specified in the samba + role, the default behavior is to enable the ceph UNC //localhost/ceph + and use the ceph vfs module as the smbd backend. + + :param ctx: Context + :param config: Configuration + """ + log.info("Setting up smbd with ceph vfs...") + assert config is None or isinstance(config, list) or isinstance(config, dict), \ + "task samba got invalid config" + + if config is None: + config = dict(('samba.{id}'.format(id=id_), None) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')) + elif isinstance(config, list): + config = dict((name, None) for name in config) + + samba_servers = list(get_sambas(ctx=ctx, roles=config.keys())) + + testdir = teuthology.get_testdir(ctx) + + if not hasattr(ctx, 'daemons'): + ctx.daemons = DaemonGroup() + + for id_, remote in samba_servers: + + rolestr = "samba.{id_}".format(id_=id_) + + confextras = """vfs objects = ceph + ceph:config_file = /etc/ceph/ceph.conf""" + + unc = "ceph" + backend = "/" + + if config[rolestr] is not None: + # verify that there's just one parameter in role + if len(config[rolestr]) != 1: + log.error("samba config for role samba.{id_} must have only one parameter".format(id_=id_)) + raise Exception('invalid config') + confextras = "" + (unc, backendstr) = config[rolestr].items()[0] + backend = backendstr.format(testdir=testdir) + + # on first samba role, set ownership and permissions of ceph root + # so that samba tests succeed + if config[rolestr] is None and id_ == samba_servers[0][0]: + remote.run( + args=[ + 'mkdir', '-p', '/tmp/cmnt', run.Raw('&&'), + 'sudo', 'ceph-fuse', '/tmp/cmnt', run.Raw('&&'), + 'sudo', 'chown', 'ubuntu:ubuntu', '/tmp/cmnt/', run.Raw('&&'), + 'sudo', 'chmod', '1777', '/tmp/cmnt/', run.Raw('&&'), + 'sudo', 'umount', '/tmp/cmnt/', run.Raw('&&'), + 'rm', '-rf', '/tmp/cmnt', + ], + ) + else: + remote.run( + args=[ + 'sudo', 'chown', 'ubuntu:ubuntu', backend, run.Raw('&&'), + 'sudo', 'chmod', '1777', backend, + ], + ) + + teuthology.sudo_write_file(remote, "/usr/local/samba/etc/smb.conf", """ +[global] + workgroup = WORKGROUP + netbios name = DOMAIN + +[{unc}] + path = {backend} + {extras} + writeable = yes + valid users = ubuntu +""".format(extras=confextras, unc=unc, backend=backend)) + + # create ubuntu user + remote.run( + args=[ + 'sudo', '/usr/local/samba/bin/smbpasswd', '-e', 'ubuntu', + run.Raw('||'), + 'printf', run.Raw('"ubuntu\nubuntu\n"'), + run.Raw('|'), + 'sudo', '/usr/local/samba/bin/smbpasswd', '-s', '-a', 'ubuntu' + ]) + + smbd_cmd = [ + 'sudo', + 'daemon-helper', + 'kill', + 'nostdin', + '/usr/local/samba/sbin/smbd', + '-F', + ] + ctx.daemons.add_daemon(remote, 'smbd', id_, + args=smbd_cmd, + logger=log.getChild("smbd.{id_}".format(id_=id_)), + stdin=run.PIPE, + wait=False, + ) + + # let smbd initialize, probably a better way... + seconds_to_sleep = 100 + log.info('Sleeping for %s seconds...' % seconds_to_sleep) + time.sleep(seconds_to_sleep) + log.info('Sleeping stopped...') + + try: + yield + finally: + log.info('Stopping smbd processes...') + exc_info = (None, None, None) + for d in ctx.daemons.iter_daemons_of_role('smbd'): + try: + d.stop() + except (run.CommandFailedError, + run.CommandCrashedError, + run.ConnectionLostError): + exc_info = sys.exc_info() + log.exception('Saw exception from %s.%s', d.role, d.id_) + if exc_info != (None, None, None): + raise exc_info[0], exc_info[1], exc_info[2] + + for id_, remote in samba_servers: + remote.run( + args=[ + 'sudo', + 'rm', '-rf', + '/usr/local/samba/etc/smb.conf', + '/usr/local/samba/private/*', + '/usr/local/samba/var/run/', + '/usr/local/samba/var/locks', + '/usr/local/samba/var/lock', + ], + ) + # make sure daemons are gone + try: + remote.run( + args=[ + 'while', + 'sudo', 'killall', '-9', 'smbd', + run.Raw(';'), + 'do', 'sleep', '1', + run.Raw(';'), + 'done', + ], + ) + + remote.run( + args=[ + 'sudo', + 'lsof', + backend, + ], + check_status=False + ) + remote.run( + args=[ + 'sudo', + 'fuser', + '-M', + backend, + ], + check_status=False + ) + except Exception: + log.exception("Saw exception") + pass diff --git a/qa/tasks/scrub.py b/qa/tasks/scrub.py new file mode 100644 index 00000000000..7a25300a677 --- /dev/null +++ b/qa/tasks/scrub.py @@ -0,0 +1,117 @@ +""" +Scrub osds +""" +import contextlib +import gevent +import logging +import random +import time + +import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run scrub periodically. Randomly chooses an OSD to scrub. + + The config should be as follows: + + scrub: + frequency: + deep: + + example: + + tasks: + - ceph: + - scrub: + frequency: 30 + deep: 0 + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'scrub task only accepts a dict for configuration' + + log.info('Beginning scrub...') + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + while len(manager.get_osd_status()['up']) < num_osds: + manager.sleep(10) + + scrub_proc = Scrubber( + manager, + config, + ) + try: + yield + finally: + log.info('joining scrub') + scrub_proc.do_join() + +class Scrubber: + """ + Scrubbing is actually performed during initialzation + """ + def __init__(self, manager, config): + """ + Spawn scrubbing thread upon completion. + """ + self.ceph_manager = manager + self.ceph_manager.wait_for_clean() + + osd_status = self.ceph_manager.get_osd_status() + self.osds = osd_status['up'] + + self.config = config + if self.config is None: + self.config = dict() + + else: + def tmp(x): + """Local display""" + print x + self.log = tmp + + self.stopping = False + + log.info("spawning thread") + + self.thread = gevent.spawn(self.do_scrub) + + def do_join(self): + """Scrubbing thread finished""" + self.stopping = True + self.thread.get() + + def do_scrub(self): + """Perform the scrub operation""" + frequency = self.config.get("frequency", 30) + deep = self.config.get("deep", 0) + + log.info("stopping %s" % self.stopping) + + while not self.stopping: + osd = str(random.choice(self.osds)) + + if deep: + cmd = 'deep-scrub' + else: + cmd = 'scrub' + + log.info('%sbing %s' % (cmd, osd)) + self.ceph_manager.raw_cluster_cmd('osd', cmd, osd) + + time.sleep(frequency) diff --git a/qa/tasks/scrub_test.py b/qa/tasks/scrub_test.py new file mode 100644 index 00000000000..3443ae9f45e --- /dev/null +++ b/qa/tasks/scrub_test.py @@ -0,0 +1,199 @@ +"""Scrub testing""" +from cStringIO import StringIO + +import logging +import os +import time + +import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test [deep] scrub + + tasks: + - chef: + - install: + - ceph: + log-whitelist: + - '!= known digest' + - '!= known omap_digest' + - deep-scrub 0 missing, 1 inconsistent objects + - deep-scrub 1 errors + - repair 0 missing, 1 inconsistent objects + - repair 1 errors, 1 fixed + - scrub_test: + + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'scrub_test task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < num_osds: + time.sleep(10) + + for i in range(num_osds): + manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'flush_pg_stats') + manager.wait_for_clean() + + # write some data + p = manager.do_rados(mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1', 'write', '-b', '4096']) + err = p.exitstatus + log.info('err is %d' % err) + + # wait for some PG to have data that we can mess with + victim = None + osd = None + while victim is None: + stats = manager.get_pg_stats() + for pg in stats: + size = pg['stat_sum']['num_bytes'] + if size > 0: + victim = pg['pgid'] + osd = pg['acting'][0] + break + + if victim is None: + time.sleep(3) + + log.info('messing with PG %s on osd %d' % (victim, osd)) + + (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.iterkeys() + data_path = os.path.join( + '/var/lib/ceph/osd', + 'ceph-{id}'.format(id=osd), + 'current', + '{pg}_head'.format(pg=victim) + ) + + # fuzz time + ls_fp = StringIO() + osd_remote.run( + args=[ 'ls', data_path ], + stdout=ls_fp, + ) + ls_out = ls_fp.getvalue() + ls_fp.close() + + # find an object file we can mess with + osdfilename = None + for line in ls_out.split('\n'): + if 'object' in line: + osdfilename = line + break + assert osdfilename is not None + + # Get actual object name from osd stored filename + tmp=osdfilename.split('__') + objname=tmp[0] + objname=objname.replace('\u', '_') + log.info('fuzzing %s' % objname) + + # put a single \0 at the beginning of the file + osd_remote.run( + args=[ 'sudo', 'dd', + 'if=/dev/zero', + 'of=%s' % os.path.join(data_path, osdfilename), + 'bs=1', 'count=1', 'conv=notrunc' + ] + ) + + # scrub, verify inconsistent + manager.raw_cluster_cmd('pg', 'deep-scrub', victim) + # Give deep-scrub a chance to start + time.sleep(60) + + while True: + stats = manager.get_single_pg_stats(victim) + state = stats['state'] + + # wait for the scrub to finish + if 'scrubbing' in state: + time.sleep(3) + continue + + inconsistent = stats['state'].find('+inconsistent') != -1 + assert inconsistent + break + + + # repair, verify no longer inconsistent + manager.raw_cluster_cmd('pg', 'repair', victim) + # Give repair a chance to start + time.sleep(60) + + while True: + stats = manager.get_single_pg_stats(victim) + state = stats['state'] + + # wait for the scrub to finish + if 'scrubbing' in state: + time.sleep(3) + continue + + inconsistent = stats['state'].find('+inconsistent') != -1 + assert not inconsistent + break + + # Test deep-scrub with various omap modifications + manager.do_rados(mon, ['-p', 'rbd', 'setomapval', objname, 'key', 'val']) + manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', objname, 'hdr']) + + # Modify omap on specific osd + log.info('fuzzing omap of %s' % objname) + manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key']); + manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, 'badkey', 'badval']); + manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr']); + + # scrub, verify inconsistent + manager.raw_cluster_cmd('pg', 'deep-scrub', victim) + # Give deep-scrub a chance to start + time.sleep(60) + + while True: + stats = manager.get_single_pg_stats(victim) + state = stats['state'] + + # wait for the scrub to finish + if 'scrubbing' in state: + time.sleep(3) + continue + + inconsistent = stats['state'].find('+inconsistent') != -1 + assert inconsistent + break + + # repair, verify no longer inconsistent + manager.raw_cluster_cmd('pg', 'repair', victim) + # Give repair a chance to start + time.sleep(60) + + while True: + stats = manager.get_single_pg_stats(victim) + state = stats['state'] + + # wait for the scrub to finish + if 'scrubbing' in state: + time.sleep(3) + continue + + inconsistent = stats['state'].find('+inconsistent') != -1 + assert not inconsistent + break + + log.info('test successful!') diff --git a/qa/tasks/tests/__init__.py b/qa/tasks/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/tasks/tests/test_buildpackages.py b/qa/tasks/tests/test_buildpackages.py new file mode 100644 index 00000000000..fed5aa02b91 --- /dev/null +++ b/qa/tasks/tests/test_buildpackages.py @@ -0,0 +1,170 @@ +# py.test -v -s tests/test_buildpackages.py + +from mock import patch, Mock + +from .. import buildpackages +from teuthology import packaging + +def test_get_tag_branch_sha1(): + gitbuilder = packaging.GitbuilderProject( + 'ceph', + { + 'os_type': 'centos', + 'os_version': '7.0', + }) + (tag, branch, sha1) = buildpackages.get_tag_branch_sha1(gitbuilder) + assert tag == None + assert branch == None + assert sha1 is not None + + gitbuilder = packaging.GitbuilderProject( + 'ceph', + { + 'os_type': 'centos', + 'os_version': '7.0', + 'sha1': 'asha1', + }) + (tag, branch, sha1) = buildpackages.get_tag_branch_sha1(gitbuilder) + assert tag == None + assert branch == None + assert sha1 == 'asha1' + + remote = Mock + remote.arch = 'x86_64' + remote.os = Mock + remote.os.name = 'ubuntu' + remote.os.version = '14.04' + remote.os.codename = 'trusty' + remote.system_type = 'deb' + ctx = Mock + ctx.cluster = Mock + ctx.cluster.remotes = {remote: ['client.0']} + + expected_tag = 'v0.94.1' + expected_sha1 = 'expectedsha1' + def check_output(cmd, shell): + assert shell == True + return expected_sha1 + " refs/tags/" + expected_tag + with patch.multiple( + buildpackages, + check_output=check_output, + ): + gitbuilder = packaging.GitbuilderProject( + 'ceph', + { + 'os_type': 'centos', + 'os_version': '7.0', + 'sha1': 'asha1', + 'all': { + 'tag': tag, + }, + }, + ctx = ctx, + remote = remote) + (tag, branch, sha1) = buildpackages.get_tag_branch_sha1(gitbuilder) + assert tag == expected_tag + assert branch == None + assert sha1 == expected_sha1 + + expected_branch = 'hammer' + expected_sha1 = 'otherexpectedsha1' + def check_output(cmd, shell): + assert shell == True + return expected_sha1 + " refs/heads/" + expected_branch + with patch.multiple( + buildpackages, + check_output=check_output, + ): + gitbuilder = packaging.GitbuilderProject( + 'ceph', + { + 'os_type': 'centos', + 'os_version': '7.0', + 'sha1': 'asha1', + 'all': { + 'branch': branch, + }, + }, + ctx = ctx, + remote = remote) + (tag, branch, sha1) = buildpackages.get_tag_branch_sha1(gitbuilder) + assert tag == None + assert branch == expected_branch + assert sha1 == expected_sha1 + +def test_lookup_configs(): + expected_system_type = 'deb' + def make_remote(): + remote = Mock() + remote.arch = 'x86_64' + remote.os = Mock() + remote.os.name = 'ubuntu' + remote.os.version = '14.04' + remote.os.codename = 'trusty' + remote.system_type = expected_system_type + return remote + ctx = Mock() + class cluster: + remote1 = make_remote() + remote2 = make_remote() + remotes = { + remote1: ['client.0'], + remote2: ['mon.a','osd.0'], + } + def only(self, role): + result = Mock() + if role in ('client.0',): + result.remotes = { cluster.remote1: None } + elif role in ('osd.0', 'mon.a'): + result.remotes = { cluster.remote2: None } + else: + result.remotes = None + return result + ctx.cluster = cluster() + ctx.config = { + 'roles': [ ['client.0'], ['mon.a','osd.0'] ], + } + + # nothing -> nothing + assert buildpackages.lookup_configs(ctx, {}) == [] + assert buildpackages.lookup_configs(ctx, {1:[1,2,3]}) == [] + assert buildpackages.lookup_configs(ctx, [[1,2,3]]) == [] + assert buildpackages.lookup_configs(ctx, None) == [] + + # + # the overrides applies to install and to install.upgrade + # that have no tag, branch or sha1 + # + config = { + 'overrides': { + 'install': { + 'ceph': { + 'sha1': 'overridesha1', + 'tag': 'overridetag', + 'branch': 'overridebranch', + }, + }, + }, + 'tasks': [ + { + 'install': { + 'sha1': 'installsha1', + }, + }, + { + 'install.upgrade': { + 'osd.0': { + }, + 'client.0': { + 'sha1': 'client0sha1', + }, + }, + } + ], + } + ctx.config = config + expected_configs = [{'branch': 'overridebranch', 'sha1': 'overridesha1', 'tag': 'overridetag'}, + {'project': 'ceph', 'branch': 'overridebranch', 'sha1': 'overridesha1', 'tag': 'overridetag'}, + {'project': 'ceph', 'sha1': 'client0sha1'}] + + assert buildpackages.lookup_configs(ctx, config) == expected_configs diff --git a/qa/tasks/tests/test_devstack.py b/qa/tasks/tests/test_devstack.py new file mode 100644 index 00000000000..117b3076818 --- /dev/null +++ b/qa/tasks/tests/test_devstack.py @@ -0,0 +1,48 @@ +from textwrap import dedent + +from .. import devstack + + +class TestDevstack(object): + def test_parse_os_table(self): + table_str = dedent(""" + +---------------------+--------------------------------------+ + | Property | Value | + +---------------------+--------------------------------------+ + | attachments | [] | + | availability_zone | nova | + | bootable | false | + | created_at | 2014-02-21T17:14:47.548361 | + | display_description | None | + | display_name | NAME | + | id | ffdbd1bb-60dc-4d95-acfe-88774c09ad3e | + | metadata | {} | + | size | 1 | + | snapshot_id | None | + | source_volid | None | + | status | creating | + | volume_type | None | + +---------------------+--------------------------------------+ + """).strip() + expected = { + 'Property': 'Value', + 'attachments': '[]', + 'availability_zone': 'nova', + 'bootable': 'false', + 'created_at': '2014-02-21T17:14:47.548361', + 'display_description': 'None', + 'display_name': 'NAME', + 'id': 'ffdbd1bb-60dc-4d95-acfe-88774c09ad3e', + 'metadata': '{}', + 'size': '1', + 'snapshot_id': 'None', + 'source_volid': 'None', + 'status': 'creating', + 'volume_type': 'None'} + + vol_info = devstack.parse_os_table(table_str) + assert vol_info == expected + + + + diff --git a/qa/tasks/tests/test_radosgw_admin.py b/qa/tasks/tests/test_radosgw_admin.py new file mode 100644 index 00000000000..59f357891ca --- /dev/null +++ b/qa/tasks/tests/test_radosgw_admin.py @@ -0,0 +1,31 @@ +from mock import Mock + +from .. import radosgw_admin + +acl_with_version = """fooFoofooFooFULL_CONTROL +""" # noqa + + +acl_without_version = """fooFoofooFooFULL_CONTROL +""" # noqa + + +class TestGetAcl(object): + + def setup(self): + self.key = Mock() + + def test_removes_xml_version(self): + self.key.get_xml_acl = Mock(return_value=acl_with_version) + result = radosgw_admin.get_acl(self.key) + assert result.startswith(' +# +# Author: Loic Dachary +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +from .. import rados + +class TestRados(object): + + def test_cmd_erasure_code_profile(self): + name = 'NAME' + cmd = rados.cmd_erasure_code_profile(name, {}) + assert 'k=2' in cmd + assert name in cmd + cmd = rados.cmd_erasure_code_profile(name, { 'k': '88' }) + assert 'k=88' in cmd + assert name in cmd diff --git a/qa/tasks/watch_notify_same_primary.py b/qa/tasks/watch_notify_same_primary.py new file mode 100644 index 00000000000..168b6bb6c30 --- /dev/null +++ b/qa/tasks/watch_notify_same_primary.py @@ -0,0 +1,133 @@ + +""" +watch_notify_same_primary task +""" +from cStringIO import StringIO +import contextlib +import logging + +from teuthology.orchestra import run +from teuthology.contextutil import safe_while + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run watch_notify_same_primary + + The config should be as follows: + + watch_notify_same_primary: + clients: [client list] + + The client list should contain 1 client + + The test requires 3 osds. + + example: + + tasks: + - ceph: + - watch_notify_same_primary: + clients: [client.0] + - interactive: + """ + log.info('Beginning watch_notify_same_primary...') + assert isinstance(config, dict), \ + "please list clients to run on" + + clients = config.get('clients', ['client.0']) + assert len(clients) == 1 + role = clients[0] + assert isinstance(role, basestring) + PREFIX = 'client.' + assert role.startswith(PREFIX) + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + ctx.manager.raw_cluster_cmd('osd', 'set', 'noout') + + pool = ctx.manager.create_pool_with_unique_name() + def obj(n): return "foo-{num}".format(num=n) + def start_watch(n): + remote.run( + args = [ + "rados", + "-p", pool, + "put", + obj(n), + "/etc/resolv.conf"], + logger=log.getChild('watch.{id}'.format(id=n))) + proc = remote.run( + args = [ + "rados", + "-p", pool, + "watch", + obj(n)], + stdin=run.PIPE, + stdout=StringIO(), + stderr=StringIO(), + wait=False) + return proc + + num = 20 + + watches = [start_watch(i) for i in range(num)] + + # wait for them all to register + for i in range(num): + with safe_while() as proceed: + while proceed(): + proc = remote.run( + args = [ + "rados", + "-p", pool, + "listwatchers", + obj(i)], + stdout=StringIO()) + lines = proc.stdout.getvalue() + num_watchers = lines.count('watcher=') + log.info('i see %d watchers for %s', num_watchers, obj(i)) + if num_watchers >= 1: + break + + def notify(n, msg): + remote.run( + args = [ + "rados", + "-p", pool, + "notify", + obj(n), + msg], + logger=log.getChild('notify.{id}'.format(id=n))) + + [notify(n, 'notify1') for n in range(len(watches))] + + ctx.manager.kill_osd(0) + ctx.manager.mark_down_osd(0) + + [notify(n, 'notify2') for n in range(len(watches))] + + try: + yield + finally: + log.info('joining watch_notify_stress') + for watch in watches: + watch.stdin.write("\n") + + run.wait(watches) + + for watch in watches: + lines = watch.stdout.getvalue().split("\n") + got1 = False + got2 = False + for l in lines: + if 'notify1' in l: + got1 = True + if 'notify2' in l: + got2 = True + log.info(lines) + assert got1 and got2 + + ctx.manager.revive_osd(0) + ctx.manager.remove_pool(pool) diff --git a/qa/tasks/watch_notify_stress.py b/qa/tasks/watch_notify_stress.py new file mode 100644 index 00000000000..6db313fea6d --- /dev/null +++ b/qa/tasks/watch_notify_stress.py @@ -0,0 +1,69 @@ +""" +test_stress_watch task +""" +import contextlib +import logging +import proc_thrasher + +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run test_stress_watch + + The config should be as follows: + + test_stress_watch: + clients: [client list] + + example: + + tasks: + - ceph: + - test_stress_watch: + clients: [client.0] + - interactive: + """ + log.info('Beginning test_stress_watch...') + assert isinstance(config, dict), \ + "please list clients to run on" + testwatch = {} + + remotes = [] + + for role in config.get('clients', ['client.0']): + assert isinstance(role, basestring) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + remotes.append(remote) + + args =['CEPH_CLIENT_ID={id_}'.format(id_=id_), + 'CEPH_ARGS="{flags}"'.format(flags=config.get('flags', '')), + 'daemon-helper', + 'kill', + 'multi_stress_watch foo foo' + ] + + log.info("args are %s" % (args,)) + + proc = proc_thrasher.ProcThrasher({}, remote, + args=[run.Raw(i) for i in args], + logger=log.getChild('testwatch.{id}'.format(id=id_)), + stdin=run.PIPE, + wait=False + ) + proc.start() + testwatch[id_] = proc + + try: + yield + finally: + log.info('joining watch_notify_stress') + for i in testwatch.itervalues(): + i.join() diff --git a/qa/tasks/workunit.py b/qa/tasks/workunit.py new file mode 100644 index 00000000000..833d81df217 --- /dev/null +++ b/qa/tasks/workunit.py @@ -0,0 +1,380 @@ +""" +Workunit task -- Run ceph on sets of specific clients +""" +import logging +import pipes +import os + +from teuthology import misc +from teuthology.config import config as teuth_config +from teuthology.orchestra.run import CommandFailedError +from teuthology.parallel import parallel +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +CLIENT_PREFIX = 'client.' + + +def task(ctx, config): + """ + Run ceph on all workunits found under the specified path. + + For example:: + + tasks: + - ceph: + - ceph-fuse: [client.0] + - workunit: + clients: + client.0: [direct_io, xattrs.sh] + client.1: [snaps] + branch: foo + + You can also run a list of workunits on all clients: + tasks: + - ceph: + - ceph-fuse: + - workunit: + tag: v0.47 + clients: + all: [direct_io, xattrs.sh, snaps] + + If you have an "all" section it will run all the workunits + on each client simultaneously, AFTER running any workunits specified + for individual clients. (This prevents unintended simultaneous runs.) + + To customize tests, you can specify environment variables as a dict. You + can also specify a time limit for each work unit (defaults to 3h): + + tasks: + - ceph: + - ceph-fuse: + - workunit: + sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6 + clients: + all: [snaps] + env: + FOO: bar + BAZ: quux + timeout: 3h + + :param ctx: Context + :param config: Configuration + """ + assert isinstance(config, dict) + assert isinstance(config.get('clients'), dict), \ + 'configuration must contain a dictionary of clients' + + overrides = ctx.config.get('overrides', {}) + misc.deep_merge(config, overrides.get('workunit', {})) + + refspec = config.get('branch') + if refspec is None: + refspec = config.get('tag') + if refspec is None: + refspec = config.get('sha1') + if refspec is None: + refspec = 'HEAD' + + timeout = config.get('timeout', '3h') + + log.info('Pulling workunits from ref %s', refspec) + + created_mountpoint = {} + + if config.get('env') is not None: + assert isinstance(config['env'], dict), 'env must be a dictionary' + clients = config['clients'] + + # Create scratch dirs for any non-all workunits + log.info('Making a separate scratch dir for every client...') + for role in clients.iterkeys(): + assert isinstance(role, basestring) + if role == "all": + continue + + assert role.startswith(CLIENT_PREFIX) + created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir')) + created_mountpoint[role] = created_mnt_dir + + # Execute any non-all workunits + with parallel() as p: + for role, tests in clients.iteritems(): + if role != "all": + p.spawn(_run_tests, ctx, refspec, role, tests, + config.get('env'), timeout=timeout) + + # Clean up dirs from any non-all workunits + for role, created in created_mountpoint.items(): + _delete_dir(ctx, role, created) + + # Execute any 'all' workunits + if 'all' in clients: + all_tasks = clients["all"] + _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'), + config.get('subdir'), timeout=timeout) + + +def _delete_dir(ctx, role, created_mountpoint): + """ + Delete file used by this role, and delete the directory that this + role appeared in. + + :param ctx: Context + :param role: "role.#" where # is used for the role id. + """ + testdir = misc.get_testdir(ctx) + id_ = role[len(CLIENT_PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) + # Is there any reason why this is not: join(mnt, role) ? + client = os.path.join(mnt, 'client.{id}'.format(id=id_)) + + # Remove the directory inside the mount where the workunit ran + remote.run( + args=[ + 'sudo', + 'rm', + '-rf', + '--', + client, + ], + ) + log.info("Deleted dir {dir}".format(dir=client)) + + # If the mount was an artificially created dir, delete that too + if created_mountpoint: + remote.run( + args=[ + 'rmdir', + '--', + mnt, + ], + ) + log.info("Deleted artificial mount point {dir}".format(dir=client)) + + +def _make_scratch_dir(ctx, role, subdir): + """ + Make scratch directories for this role. This also makes the mount + point if that directory does not exist. + + :param ctx: Context + :param role: "role.#" where # is used for the role id. + :param subdir: use this subdir (False if not used) + """ + created_mountpoint = False + id_ = role[len(CLIENT_PREFIX):] + log.debug("getting remote for {id} role {role_}".format(id=id_, role_=role)) + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + dir_owner = remote.user + mnt = os.path.join(misc.get_testdir(ctx), 'mnt.{id}'.format(id=id_)) + # if neither kclient nor ceph-fuse are required for a workunit, + # mnt may not exist. Stat and create the directory if it doesn't. + try: + remote.run( + args=[ + 'stat', + '--', + mnt, + ], + ) + log.info('Did not need to create dir {dir}'.format(dir=mnt)) + except CommandFailedError: + remote.run( + args=[ + 'mkdir', + '--', + mnt, + ], + ) + log.info('Created dir {dir}'.format(dir=mnt)) + created_mountpoint = True + + if not subdir: + subdir = 'client.{id}'.format(id=id_) + + if created_mountpoint: + remote.run( + args=[ + 'cd', + '--', + mnt, + run.Raw('&&'), + 'mkdir', + '--', + subdir, + ], + ) + else: + remote.run( + args=[ + # cd first so this will fail if the mount point does + # not exist; pure install -d will silently do the + # wrong thing + 'cd', + '--', + mnt, + run.Raw('&&'), + 'sudo', + 'install', + '-d', + '-m', '0755', + '--owner={user}'.format(user=dir_owner), + '--', + subdir, + ], + ) + + return created_mountpoint + + +def _spawn_on_all_clients(ctx, refspec, tests, env, subdir, timeout=None): + """ + Make a scratch directory for each client in the cluster, and then for each + test spawn _run_tests() for each role. + + See run_tests() for parameter documentation. + """ + client_generator = misc.all_roles_of_type(ctx.cluster, 'client') + client_remotes = list() + + created_mountpoint = {} + for client in client_generator: + (client_remote,) = ctx.cluster.only('client.{id}'.format(id=client)).remotes.iterkeys() + client_remotes.append((client_remote, 'client.{id}'.format(id=client))) + created_mountpoint[client] = _make_scratch_dir(ctx, "client.{id}".format(id=client), subdir) + + for unit in tests: + with parallel() as p: + for remote, role in client_remotes: + p.spawn(_run_tests, ctx, refspec, role, [unit], env, subdir, + timeout=timeout) + + # cleanup the generated client directories + client_generator = misc.all_roles_of_type(ctx.cluster, 'client') + for client in client_generator: + _delete_dir(ctx, 'client.{id}'.format(id=client), created_mountpoint[client]) + + +def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None): + """ + Run the individual test. Create a scratch directory and then extract the + workunits from git. Make the executables, and then run the tests. + Clean up (remove files created) after the tests are finished. + + :param ctx: Context + :param refspec: branch, sha1, or version tag used to identify this + build + :param tests: specific tests specified. + :param env: environment set in yaml file. Could be None. + :param subdir: subdirectory set in yaml file. Could be None + :param timeout: If present, use the 'timeout' command on the remote host + to limit execution time. Must be specified by a number + followed by 's' for seconds, 'm' for minutes, 'h' for + hours, or 'd' for days. If '0' or anything that evaluates + to False is passed, the 'timeout' command is not used. + """ + testdir = misc.get_testdir(ctx) + assert isinstance(role, basestring) + assert role.startswith(CLIENT_PREFIX) + id_ = role[len(CLIENT_PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) + # subdir so we can remove and recreate this a lot without sudo + if subdir is None: + scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') + else: + scratch_tmp = os.path.join(mnt, subdir) + srcdir = '{tdir}/workunit.{role}'.format(tdir=testdir, role=role) + clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role) + + git_url = teuth_config.get_ceph_git_url() + remote.run( + logger=log.getChild(role), + args=[ + 'git', + 'clone', + git_url, + clonedir, + run.Raw(';'), + 'cd', '--', clonedir, + run.Raw('&&'), + 'git', 'checkout', refspec, + run.Raw('&&'), + 'mv', 'qa/workunits', srcdir, + ], + ) + + remote.run( + logger=log.getChild(role), + args=[ + 'cd', '--', srcdir, + run.Raw('&&'), + 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', + run.Raw('&&'), + 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir), + run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)), + ], + ) + + workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role) + workunits = sorted(misc.get_file(remote, workunits_file).split('\0')) + assert workunits + + try: + assert isinstance(tests, list) + for spec in tests: + log.info('Running workunits matching %s on %s...', spec, role) + prefix = '{spec}/'.format(spec=spec) + to_run = [w for w in workunits if w == spec or w.startswith(prefix)] + if not to_run: + raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) + for workunit in to_run: + log.info('Running workunit %s...', workunit) + args = [ + 'mkdir', '-p', '--', scratch_tmp, + run.Raw('&&'), + 'cd', '--', scratch_tmp, + run.Raw('&&'), + run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'), + run.Raw('CEPH_REF={ref}'.format(ref=refspec)), + run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), + run.Raw('CEPH_ID="{id}"'.format(id=id_)), + run.Raw('PATH=$PATH:/usr/sbin') + ] + if env is not None: + for var, val in env.iteritems(): + quoted_val = pipes.quote(val) + env_arg = '{var}={val}'.format(var=var, val=quoted_val) + args.append(run.Raw(env_arg)) + args.extend([ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir)]) + if timeout and timeout != '0': + args.extend(['timeout', timeout]) + args.extend([ + '{srcdir}/{workunit}'.format( + srcdir=srcdir, + workunit=workunit, + ), + ]) + remote.run( + logger=log.getChild(role), + args=args, + label="workunit test {workunit}".format(workunit=workunit) + ) + remote.run( + logger=log.getChild(role), + args=['sudo', 'rm', '-rf', '--', scratch_tmp], + ) + finally: + log.info('Stopping %s on %s...', tests, role) + remote.run( + logger=log.getChild(role), + args=[ + 'rm', '-rf', '--', workunits_file, srcdir, clonedir, + ], + ) diff --git a/qa/tox.ini b/qa/tox.ini new file mode 100644 index 00000000000..c5826ecb6ec --- /dev/null +++ b/qa/tox.ini @@ -0,0 +1,8 @@ +[tox] +envlist = flake8 +skipsdist = True + +[testenv:flake8] +deps= + flake8 +commands=flake8 --select=F,E9 --exclude=venv,.tox diff --git a/rgw_pool_type/ec-cache.yaml b/rgw_pool_type/ec-cache.yaml deleted file mode 100644 index 6462fbe8862..00000000000 --- a/rgw_pool_type/ec-cache.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - rgw: - ec-data-pool: true - cache-pools: true - s3tests: - slow_backend: true diff --git a/rgw_pool_type/ec-profile.yaml b/rgw_pool_type/ec-profile.yaml deleted file mode 100644 index 52798f85e72..00000000000 --- a/rgw_pool_type/ec-profile.yaml +++ /dev/null @@ -1,10 +0,0 @@ -overrides: - rgw: - ec-data-pool: true - erasure_code_profile: - name: testprofile - k: 3 - m: 1 - ruleset-failure-domain: osd - s3tests: - slow_backend: true diff --git a/rgw_pool_type/ec.yaml b/rgw_pool_type/ec.yaml deleted file mode 100644 index 7c99b7f85c8..00000000000 --- a/rgw_pool_type/ec.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - rgw: - ec-data-pool: true - s3tests: - slow_backend: true diff --git a/rgw_pool_type/replicated.yaml b/rgw_pool_type/replicated.yaml deleted file mode 100644 index c91709eaae7..00000000000 --- a/rgw_pool_type/replicated.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - rgw: - ec-data-pool: false diff --git a/suites/big/rados-thrash/% b/suites/big/rados-thrash/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/big/rados-thrash/ceph/ceph.yaml b/suites/big/rados-thrash/ceph/ceph.yaml deleted file mode 100644 index 2030acb9083..00000000000 --- a/suites/big/rados-thrash/ceph/ceph.yaml +++ /dev/null @@ -1,3 +0,0 @@ -tasks: -- install: -- ceph: diff --git a/suites/big/rados-thrash/clusters/big.yaml b/suites/big/rados-thrash/clusters/big.yaml deleted file mode 100644 index 18197ad8571..00000000000 --- a/suites/big/rados-thrash/clusters/big.yaml +++ /dev/null @@ -1,68 +0,0 @@ -roles: -- [osd.0, osd.1, osd.2, client.0, mon.a] -- [osd.3, osd.4, osd.5, client.1, mon.b] -- [osd.6, osd.7, osd.8, client.2, mon.c] -- [osd.9, osd.10, osd.11, client.3, mon.d] -- [osd.12, osd.13, osd.14, client.4, mon.e] -- [osd.15, osd.16, osd.17, client.5] -- [osd.18, osd.19, osd.20, client.6] -- [osd.21, osd.22, osd.23, client.7] -- [osd.24, osd.25, osd.26, client.8] -- [osd.27, osd.28, osd.29, client.9] -- [osd.30, osd.31, osd.32, client.10] -- [osd.33, osd.34, osd.35, client.11] -- [osd.36, osd.37, osd.38, client.12] -- [osd.39, osd.40, osd.41, client.13] -- [osd.42, osd.43, osd.44, client.14] -- [osd.45, osd.46, osd.47, client.15] -- [osd.48, osd.49, osd.50, client.16] -- [osd.51, osd.52, osd.53, client.17] -- [osd.54, osd.55, osd.56, client.18] -- [osd.57, osd.58, osd.59, client.19] -- [osd.60, osd.61, osd.62, client.20] -- [osd.63, osd.64, osd.65, client.21] -- [osd.66, osd.67, osd.68, client.22] -- [osd.69, osd.70, osd.71, client.23] -- [osd.72, osd.73, osd.74, client.24] -- [osd.75, osd.76, osd.77, client.25] -- [osd.78, osd.79, osd.80, client.26] -- [osd.81, osd.82, osd.83, client.27] -- [osd.84, osd.85, osd.86, client.28] -- [osd.87, osd.88, osd.89, client.29] -- [osd.90, osd.91, osd.92, client.30] -- [osd.93, osd.94, osd.95, client.31] -- [osd.96, osd.97, osd.98, client.32] -- [osd.99, osd.100, osd.101, client.33] -- [osd.102, osd.103, osd.104, client.34] -- [osd.105, osd.106, osd.107, client.35] -- [osd.108, osd.109, osd.110, client.36] -- [osd.111, osd.112, osd.113, client.37] -- [osd.114, osd.115, osd.116, client.38] -- [osd.117, osd.118, osd.119, client.39] -- [osd.120, osd.121, osd.122, client.40] -- [osd.123, osd.124, osd.125, client.41] -- [osd.126, osd.127, osd.128, client.42] -- [osd.129, osd.130, osd.131, client.43] -- [osd.132, osd.133, osd.134, client.44] -- [osd.135, osd.136, osd.137, client.45] -- [osd.138, osd.139, osd.140, client.46] -- [osd.141, osd.142, osd.143, client.47] -- [osd.144, osd.145, osd.146, client.48] -- [osd.147, osd.148, osd.149, client.49] -- [osd.150, osd.151, osd.152, client.50] -#- [osd.153, osd.154, osd.155, client.51] -#- [osd.156, osd.157, osd.158, client.52] -#- [osd.159, osd.160, osd.161, client.53] -#- [osd.162, osd.163, osd.164, client.54] -#- [osd.165, osd.166, osd.167, client.55] -#- [osd.168, osd.169, osd.170, client.56] -#- [osd.171, osd.172, osd.173, client.57] -#- [osd.174, osd.175, osd.176, client.58] -#- [osd.177, osd.178, osd.179, client.59] -#- [osd.180, osd.181, osd.182, client.60] -#- [osd.183, osd.184, osd.185, client.61] -#- [osd.186, osd.187, osd.188, client.62] -#- [osd.189, osd.190, osd.191, client.63] -#- [osd.192, osd.193, osd.194, client.64] -#- [osd.195, osd.196, osd.197, client.65] -#- [osd.198, osd.199, osd.200, client.66] diff --git a/suites/big/rados-thrash/clusters/medium.yaml b/suites/big/rados-thrash/clusters/medium.yaml deleted file mode 100644 index 48b66dd5ca3..00000000000 --- a/suites/big/rados-thrash/clusters/medium.yaml +++ /dev/null @@ -1,22 +0,0 @@ -roles: -- [osd.0, osd.1, osd.2, client.0, mon.a] -- [osd.3, osd.4, osd.5, client.1, mon.b] -- [osd.6, osd.7, osd.8, client.2, mon.c] -- [osd.9, osd.10, osd.11, client.3, mon.d] -- [osd.12, osd.13, osd.14, client.4, mon.e] -- [osd.15, osd.16, osd.17, client.5] -- [osd.18, osd.19, osd.20, client.6] -- [osd.21, osd.22, osd.23, client.7] -- [osd.24, osd.25, osd.26, client.8] -- [osd.27, osd.28, osd.29, client.9] -- [osd.30, osd.31, osd.32, client.10] -- [osd.33, osd.34, osd.35, client.11] -- [osd.36, osd.37, osd.38, client.12] -- [osd.39, osd.40, osd.41, client.13] -- [osd.42, osd.43, osd.44, client.14] -- [osd.45, osd.46, osd.47, client.15] -- [osd.48, osd.49, osd.50, client.16] -- [osd.51, osd.52, osd.53, client.17] -- [osd.54, osd.55, osd.56, client.18] -- [osd.57, osd.58, osd.59, client.19] -- [osd.60, osd.61, osd.62, client.20] diff --git a/suites/big/rados-thrash/clusters/small.yaml b/suites/big/rados-thrash/clusters/small.yaml deleted file mode 100644 index b5a79906c69..00000000000 --- a/suites/big/rados-thrash/clusters/small.yaml +++ /dev/null @@ -1,6 +0,0 @@ -roles: -- [osd.0, osd.1, osd.2, client.0, mon.a] -- [osd.3, osd.4, osd.5, client.1, mon.b] -- [osd.6, osd.7, osd.8, client.2, mon.c] -- [osd.9, osd.10, osd.11, client.3, mon.d] -- [osd.12, osd.13, osd.14, client.4, mon.e] diff --git a/suites/big/rados-thrash/fs/btrfs.yaml b/suites/big/rados-thrash/fs/btrfs.yaml deleted file mode 100644 index 0b3f6fac7a5..00000000000 --- a/suites/big/rados-thrash/fs/btrfs.yaml +++ /dev/null @@ -1,7 +0,0 @@ -overrides: - ceph: - fs: btrfs - conf: - osd: - osd sloppy crc: true - osd op thread timeout: 60 diff --git a/suites/big/rados-thrash/fs/xfs.yaml b/suites/big/rados-thrash/fs/xfs.yaml deleted file mode 100644 index b4a82911a2f..00000000000 --- a/suites/big/rados-thrash/fs/xfs.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - ceph: - fs: xfs - conf: - osd: - osd sloppy crc: true diff --git a/suites/big/rados-thrash/thrashers/default.yaml b/suites/big/rados-thrash/thrashers/default.yaml deleted file mode 100644 index d67ff20a693..00000000000 --- a/suites/big/rados-thrash/thrashers/default.yaml +++ /dev/null @@ -1,10 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -tasks: -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 diff --git a/suites/big/rados-thrash/workloads/snaps-few-objects.yaml b/suites/big/rados-thrash/workloads/snaps-few-objects.yaml deleted file mode 100644 index b73bb6781dc..00000000000 --- a/suites/big/rados-thrash/workloads/snaps-few-objects.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: -- rados: - ops: 4000 - max_seconds: 3600 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 diff --git a/suites/buildpackages/any/% b/suites/buildpackages/any/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/buildpackages/any/distros b/suites/buildpackages/any/distros deleted file mode 120000 index 1ce8f29bf14..00000000000 --- a/suites/buildpackages/any/distros +++ /dev/null @@ -1 +0,0 @@ -../../../distros/all \ No newline at end of file diff --git a/suites/buildpackages/any/tasks/release.yaml b/suites/buildpackages/any/tasks/release.yaml deleted file mode 100644 index d7a3b62c8cd..00000000000 --- a/suites/buildpackages/any/tasks/release.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# --suite buildpackages/any --ceph v10.0.1 --filter centos_7,ubuntu_14.04 -roles: - - [client.0] -tasks: - - install: - - exec: - client.0: - - ceph --version | grep 'version ' diff --git a/suites/buildpackages/tests/% b/suites/buildpackages/tests/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/buildpackages/tests/distros b/suites/buildpackages/tests/distros deleted file mode 120000 index 1ce8f29bf14..00000000000 --- a/suites/buildpackages/tests/distros +++ /dev/null @@ -1 +0,0 @@ -../../../distros/all \ No newline at end of file diff --git a/suites/buildpackages/tests/tasks/release.yaml b/suites/buildpackages/tests/tasks/release.yaml deleted file mode 100644 index 05e87789d71..00000000000 --- a/suites/buildpackages/tests/tasks/release.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# --suite buildpackages/tests --ceph v10.0.1 --filter centos_7.2,ubuntu_14.04 -overrides: - ansible.cephlab: - playbook: users.yml - buildpackages: - good_machine: - disk: 20 # GB - ram: 2000 # MB - cpus: 2 - min_machine: - disk: 10 # GB - ram: 1000 # MB - cpus: 1 -roles: - - [client.0] -tasks: - - install: - - exec: - client.0: - - ceph --version | grep 'version ' diff --git a/suites/calamari/% b/suites/calamari/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/calamari/clusters/osd-3.yaml b/suites/calamari/clusters/osd-3.yaml deleted file mode 100644 index 66f4fe59850..00000000000 --- a/suites/calamari/clusters/osd-3.yaml +++ /dev/null @@ -1,5 +0,0 @@ -roles: -- [client.0] -- [mon.0, osd.0] -- [osd.1] -- [osd.2] diff --git a/suites/calamari/distros/centos6.4.yaml b/suites/calamari/distros/centos6.4.yaml deleted file mode 100644 index 2240054be17..00000000000 --- a/suites/calamari/distros/centos6.4.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: centos -os_version: '6.4' diff --git a/suites/calamari/distros/centos6.5.yaml b/suites/calamari/distros/centos6.5.yaml deleted file mode 100644 index e2ee6b36f2e..00000000000 --- a/suites/calamari/distros/centos6.5.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: centos -os_version: '6.5' diff --git a/suites/calamari/distros/precise.yaml b/suites/calamari/distros/precise.yaml deleted file mode 100644 index 7aaa31b660c..00000000000 --- a/suites/calamari/distros/precise.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: ubuntu -os_version: precise diff --git a/suites/calamari/distros/rhel6.4.yaml b/suites/calamari/distros/rhel6.4.yaml deleted file mode 100644 index 72dd4d13109..00000000000 --- a/suites/calamari/distros/rhel6.4.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: rhel -os_version: '6.4' diff --git a/suites/calamari/distros/rhel6.5.yaml b/suites/calamari/distros/rhel6.5.yaml deleted file mode 100644 index 4294d98d6f3..00000000000 --- a/suites/calamari/distros/rhel6.5.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: rhel -os_version: '6.5' diff --git a/suites/calamari/distros/rhel7.0.yaml b/suites/calamari/distros/rhel7.0.yaml deleted file mode 100644 index 1571f9477ad..00000000000 --- a/suites/calamari/distros/rhel7.0.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: rhel -os_version: '7.0' diff --git a/suites/calamari/distros/trusty.yaml b/suites/calamari/distros/trusty.yaml deleted file mode 100644 index cef9fd0037e..00000000000 --- a/suites/calamari/distros/trusty.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: ubuntu -os_version: trusty diff --git a/suites/calamari/distros/wheezy.yaml.disabled b/suites/calamari/distros/wheezy.yaml.disabled deleted file mode 100644 index 47c54de4649..00000000000 --- a/suites/calamari/distros/wheezy.yaml.disabled +++ /dev/null @@ -1,2 +0,0 @@ -os_type: debian -os_version: '7.0' diff --git a/suites/calamari/tasks/calamari.yaml b/suites/calamari/tasks/calamari.yaml deleted file mode 100644 index 70e1129a032..00000000000 --- a/suites/calamari/tasks/calamari.yaml +++ /dev/null @@ -1,10 +0,0 @@ -machine_type: vps - -tasks: -- ssh_keys: -- calamari_setup: - iceball_location: http://download.inktank.com/enterprise-testing - ice_version: 1.2.2 - email: calamari@inktank.com -- calamari_nosetests: - calamari_branch: wip-testing-1.2.2 diff --git a/suites/ceph-deploy/basic/% b/suites/ceph-deploy/basic/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/ceph-deploy/basic/ceph-deploy-overrides b/suites/ceph-deploy/basic/ceph-deploy-overrides deleted file mode 120000 index 3954c75a789..00000000000 --- a/suites/ceph-deploy/basic/ceph-deploy-overrides +++ /dev/null @@ -1 +0,0 @@ -../../../ceph-deploy-overrides \ No newline at end of file diff --git a/suites/ceph-deploy/basic/config_options b/suites/ceph-deploy/basic/config_options deleted file mode 120000 index 50bcdbe600e..00000000000 --- a/suites/ceph-deploy/basic/config_options +++ /dev/null @@ -1 +0,0 @@ -../../../config_options \ No newline at end of file diff --git a/suites/ceph-deploy/basic/distros b/suites/ceph-deploy/basic/distros deleted file mode 120000 index c5d59352cb5..00000000000 --- a/suites/ceph-deploy/basic/distros +++ /dev/null @@ -1 +0,0 @@ -../../../distros/supported \ No newline at end of file diff --git a/suites/ceph-deploy/basic/tasks/ceph-deploy_hello_world.yaml b/suites/ceph-deploy/basic/tasks/ceph-deploy_hello_world.yaml deleted file mode 100644 index 1e090fad379..00000000000 --- a/suites/ceph-deploy/basic/tasks/ceph-deploy_hello_world.yaml +++ /dev/null @@ -1,37 +0,0 @@ -overrides: - ceph-deploy: - conf: - global: - debug ms: 1 - osd: - debug osd: 10 - mon: - debug mon: 10 -roles: -- - mon.a - - mds.0 - - osd.0 -- - osd.1 - - mon.b - - client.0 -openstack: - - machine: - disk: 10 # GB - ram: 2000 # MB - cpus: 1 - volumes: # attached to each instance - count: 3 - size: 10 # GB -tasks: -- install: - extras: yes -- print: "**** done install extras" -- ssh_keys: -- print: "**** done ssh_keys" -- ceph-deploy: -- print: "**** done ceph-deploy" -- workunit: - clients: - client.0: - - ceph-deploy/ceph-deploy_hello_world.sh -- print: "**** done ceph-deploy/ceph-deploy_hello_world.sh" diff --git a/suites/dummy/% b/suites/dummy/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/dummy/all/nop.yaml b/suites/dummy/all/nop.yaml deleted file mode 100644 index cef190df58e..00000000000 --- a/suites/dummy/all/nop.yaml +++ /dev/null @@ -1,9 +0,0 @@ -overrides: - ansible.cephlab: - playbook: users.yml -roles: - - [mon.a, mds.a, osd.0, osd.1, client.0] - -tasks: - - nop: - diff --git a/suites/experimental/multimds/% b/suites/experimental/multimds/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/experimental/multimds/clusters/7-multimds.yaml b/suites/experimental/multimds/clusters/7-multimds.yaml deleted file mode 100644 index 17cfd7b3d79..00000000000 --- a/suites/experimental/multimds/clusters/7-multimds.yaml +++ /dev/null @@ -1,8 +0,0 @@ -roles: -- [mon.a, mds.a, mds.a-s] -- [mon.b, mds.b, mds.b-s] -- [mon.c, mds.c, mds.c-s] -- [osd.0] -- [osd.1] -- [osd.2] -- [client.0] diff --git a/suites/experimental/multimds/tasks/fsstress_thrash_subtrees.yaml b/suites/experimental/multimds/tasks/fsstress_thrash_subtrees.yaml deleted file mode 100644 index bee01a83586..00000000000 --- a/suites/experimental/multimds/tasks/fsstress_thrash_subtrees.yaml +++ /dev/null @@ -1,15 +0,0 @@ -tasks: -- install: -- ceph: - conf: - mds: - mds thrash exports: 1 - mds debug subtrees: 1 - mds debug scatterstat: 1 - mds verify scatter: 1 -- ceph-fuse: -- workunit: - clients: - client.0: - - suites/fsstress.sh - diff --git a/suites/fs/basic/% b/suites/fs/basic/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/fs/basic/clusters/fixed-3-cephfs.yaml b/suites/fs/basic/clusters/fixed-3-cephfs.yaml deleted file mode 120000 index a482e650421..00000000000 --- a/suites/fs/basic/clusters/fixed-3-cephfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/suites/fs/basic/debug/mds_client.yaml b/suites/fs/basic/debug/mds_client.yaml deleted file mode 120000 index 335c1cafed7..00000000000 --- a/suites/fs/basic/debug/mds_client.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/suites/fs/basic/fs/btrfs.yaml b/suites/fs/basic/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/fs/basic/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/fs/basic/inline/no.yaml b/suites/fs/basic/inline/no.yaml deleted file mode 100644 index 2030acb9083..00000000000 --- a/suites/fs/basic/inline/no.yaml +++ /dev/null @@ -1,3 +0,0 @@ -tasks: -- install: -- ceph: diff --git a/suites/fs/basic/inline/yes.yaml b/suites/fs/basic/inline/yes.yaml deleted file mode 100644 index 72a285c590f..00000000000 --- a/suites/fs/basic/inline/yes.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install: -- ceph: -- exec: - client.0: - - ceph mds set inline_data true --yes-i-really-mean-it diff --git a/suites/fs/basic/overrides/whitelist_wrongly_marked_down.yaml b/suites/fs/basic/overrides/whitelist_wrongly_marked_down.yaml deleted file mode 120000 index 08f746bf894..00000000000 --- a/suites/fs/basic/overrides/whitelist_wrongly_marked_down.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/suites/fs/basic/tasks/cephfs_journal_tool.yaml b/suites/fs/basic/tasks/cephfs_journal_tool.yaml deleted file mode 100644 index f24890857a8..00000000000 --- a/suites/fs/basic/tasks/cephfs_journal_tool.yaml +++ /dev/null @@ -1,20 +0,0 @@ - -tasks: -- ceph-fuse: -- workunit: - clients: - all: [fs/misc/trivial_sync.sh] -- ceph-fuse: - client.0: - mounted: false -- ceph.stop: [mds.*] -- workunit: - clients: - client.0: [suites/cephfs_journal_tool_smoke.sh] -- ceph.restart: [mds.*] -- ceph-fuse: - client.0: - mounted: true -- workunit: - clients: - all: [fs/misc/trivial_sync.sh] diff --git a/suites/fs/basic/tasks/cephfs_scrub_tests.yaml b/suites/fs/basic/tasks/cephfs_scrub_tests.yaml deleted file mode 100644 index b08dcdd0c0f..00000000000 --- a/suites/fs/basic/tasks/cephfs_scrub_tests.yaml +++ /dev/null @@ -1,21 +0,0 @@ -overrides: - ceph: - conf: - mds: - mds log max segments: 1 - mds cache max size: 1000 -tasks: -- ceph-fuse: -- mds_scrub_checks: - mds_rank: 0 - path: /scrub/test/path - client: 0 - run_seq: 0 -- workunit: - clients: - client.0: [suites/pjd.sh] -- mds_scrub_checks: - mds_rank: 0 - path: /scrub/test/path - client: 0 - run_seq: 1 diff --git a/suites/fs/basic/tasks/cfuse_workunit_kernel_untar_build.yaml b/suites/fs/basic/tasks/cfuse_workunit_kernel_untar_build.yaml deleted file mode 100644 index 0531b3a2578..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_kernel_untar_build.yaml +++ /dev/null @@ -1,14 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - kernel_untar_build.sh -openstack: - - machine: - disk: 100 # GB - ram: 8000 # MB - cpus: 1 - volumes: # attached to each instance - count: 3 - size: 30 # GB diff --git a/suites/fs/basic/tasks/cfuse_workunit_misc.yaml b/suites/fs/basic/tasks/cfuse_workunit_misc.yaml deleted file mode 100644 index 6dfec976eec..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_misc.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - timeout: 6h - clients: - all: - - fs/misc - diff --git a/suites/fs/basic/tasks/cfuse_workunit_misc_test_o_trunc.yaml b/suites/fs/basic/tasks/cfuse_workunit_misc_test_o_trunc.yaml deleted file mode 100644 index c9720a2fd48..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_misc_test_o_trunc.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - fs/test_o_trunc.sh diff --git a/suites/fs/basic/tasks/cfuse_workunit_quota.yaml b/suites/fs/basic/tasks/cfuse_workunit_quota.yaml deleted file mode 100644 index a6d35ab8ece..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_quota.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - timeout: 6h - clients: - all: - - fs/quota - -overrides: - ceph: - conf: - client: - client quota: true diff --git a/suites/fs/basic/tasks/cfuse_workunit_suites_blogbench.yaml b/suites/fs/basic/tasks/cfuse_workunit_suites_blogbench.yaml deleted file mode 100644 index 09898e16bda..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_suites_blogbench.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/blogbench.sh diff --git a/suites/fs/basic/tasks/cfuse_workunit_suites_dbench.yaml b/suites/fs/basic/tasks/cfuse_workunit_suites_dbench.yaml deleted file mode 100644 index ad96b4c5e7f..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_suites_dbench.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/dbench.sh diff --git a/suites/fs/basic/tasks/cfuse_workunit_suites_ffsb.yaml b/suites/fs/basic/tasks/cfuse_workunit_suites_ffsb.yaml deleted file mode 100644 index 86008160034..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_suites_ffsb.yaml +++ /dev/null @@ -1,11 +0,0 @@ -overrides: - ceph: - conf: - osd: - filestore flush min: 0 -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/ffsb.sh diff --git a/suites/fs/basic/tasks/cfuse_workunit_suites_fsstress.yaml b/suites/fs/basic/tasks/cfuse_workunit_suites_fsstress.yaml deleted file mode 100644 index 5908d951b2d..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_suites_fsstress.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/fs/basic/tasks/cfuse_workunit_suites_fsx.yaml b/suites/fs/basic/tasks/cfuse_workunit_suites_fsx.yaml deleted file mode 100644 index 3c11ed74fc7..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_suites_fsx.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/fsx.sh diff --git a/suites/fs/basic/tasks/cfuse_workunit_suites_fsync.yaml b/suites/fs/basic/tasks/cfuse_workunit_suites_fsync.yaml deleted file mode 100644 index c6043e209bd..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_suites_fsync.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/fsync-tester.sh diff --git a/suites/fs/basic/tasks/cfuse_workunit_suites_iogen.yaml b/suites/fs/basic/tasks/cfuse_workunit_suites_iogen.yaml deleted file mode 100644 index 6989990e22a..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_suites_iogen.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/iogen.sh - diff --git a/suites/fs/basic/tasks/cfuse_workunit_suites_iozone.yaml b/suites/fs/basic/tasks/cfuse_workunit_suites_iozone.yaml deleted file mode 100644 index 1e23f670e28..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_suites_iozone.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: [client.0] -- workunit: - clients: - all: - - suites/iozone.sh diff --git a/suites/fs/basic/tasks/cfuse_workunit_suites_pjd.yaml b/suites/fs/basic/tasks/cfuse_workunit_suites_pjd.yaml deleted file mode 100644 index 65bcd0d0333..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_suites_pjd.yaml +++ /dev/null @@ -1,15 +0,0 @@ -overrides: - ceph: - conf: - client: - debug ms: 1 - debug client: 20 - mds: - debug ms: 1 - debug mds: 20 -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/pjd.sh diff --git a/suites/fs/basic/tasks/cfuse_workunit_suites_truncate_delay.yaml b/suites/fs/basic/tasks/cfuse_workunit_suites_truncate_delay.yaml deleted file mode 100644 index 911026e13bb..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_suites_truncate_delay.yaml +++ /dev/null @@ -1,15 +0,0 @@ -overrides: - ceph: - conf: - client: - ms_inject_delay_probability: 1 - ms_inject_delay_type: osd - ms_inject_delay_max: 5 - client_oc_max_dirty_age: 1 -tasks: -- ceph-fuse: -- exec: - client.0: - - cd $TESTDIR/mnt.* && dd if=/dev/zero of=./foo count=100 - - sleep 2 - - cd $TESTDIR/mnt.* && truncate --size 0 ./foo diff --git a/suites/fs/basic/tasks/cfuse_workunit_trivial_sync.yaml b/suites/fs/basic/tasks/cfuse_workunit_trivial_sync.yaml deleted file mode 100644 index 9509650c76c..00000000000 --- a/suites/fs/basic/tasks/cfuse_workunit_trivial_sync.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: [fs/misc/trivial_sync.sh] diff --git a/suites/fs/basic/tasks/libcephfs_interface_tests.yaml b/suites/fs/basic/tasks/libcephfs_interface_tests.yaml deleted file mode 100644 index 0b1d41fea5c..00000000000 --- a/suites/fs/basic/tasks/libcephfs_interface_tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - client.0: - - libcephfs/test.sh diff --git a/suites/fs/basic/tasks/libcephfs_java.yaml b/suites/fs/basic/tasks/libcephfs_java.yaml deleted file mode 100644 index 4330d50965e..00000000000 --- a/suites/fs/basic/tasks/libcephfs_java.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - client.0: - - libcephfs-java/test.sh diff --git a/suites/fs/basic/tasks/mds_creation_retry.yaml b/suites/fs/basic/tasks/mds_creation_retry.yaml deleted file mode 100644 index 76ceeafa8e7..00000000000 --- a/suites/fs/basic/tasks/mds_creation_retry.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: --mds_creation_failure: --ceph-fuse: -- workunit: - clients: - all: [fs/misc/trivial_sync.sh] - diff --git a/suites/fs/multiclient/% b/suites/fs/multiclient/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/fs/multiclient/clusters/three_clients.yaml b/suites/fs/multiclient/clusters/three_clients.yaml deleted file mode 100644 index fd2535fd4a0..00000000000 --- a/suites/fs/multiclient/clusters/three_clients.yaml +++ /dev/null @@ -1,5 +0,0 @@ -roles: -- [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2] -- [client.2] -- [client.1] -- [client.0] diff --git a/suites/fs/multiclient/clusters/two_clients.yaml b/suites/fs/multiclient/clusters/two_clients.yaml deleted file mode 100644 index 2258befd8bf..00000000000 --- a/suites/fs/multiclient/clusters/two_clients.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2] -- [client.1] -- [client.0] diff --git a/suites/fs/multiclient/debug/mds_client.yaml b/suites/fs/multiclient/debug/mds_client.yaml deleted file mode 120000 index 335c1cafed7..00000000000 --- a/suites/fs/multiclient/debug/mds_client.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/suites/fs/multiclient/fs/btrfs.yaml b/suites/fs/multiclient/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/fs/multiclient/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/fs/multiclient/mount/ceph-fuse.yaml b/suites/fs/multiclient/mount/ceph-fuse.yaml deleted file mode 100644 index 37ac5b69e61..00000000000 --- a/suites/fs/multiclient/mount/ceph-fuse.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- install: -- ceph: -- ceph-fuse: diff --git a/suites/fs/multiclient/mount/kclient.yaml.disabled b/suites/fs/multiclient/mount/kclient.yaml.disabled deleted file mode 100644 index 04adb48b63f..00000000000 --- a/suites/fs/multiclient/mount/kclient.yaml.disabled +++ /dev/null @@ -1,9 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false -tasks: -- install: -- ceph: -- kclient: diff --git a/suites/fs/multiclient/tasks/fsx-mpi.yaml.disabled b/suites/fs/multiclient/tasks/fsx-mpi.yaml.disabled deleted file mode 100644 index e486c44c51e..00000000000 --- a/suites/fs/multiclient/tasks/fsx-mpi.yaml.disabled +++ /dev/null @@ -1,20 +0,0 @@ -# make sure we get the same MPI version on all hosts -os_type: ubuntu -os_version: "14.04" - -tasks: -- pexec: - clients: - - cd $TESTDIR - - wget http://ceph.com/qa/fsx-mpi.c - - mpicc fsx-mpi.c -o fsx-mpi - - rm fsx-mpi.c - - ln -s $TESTDIR/mnt.* $TESTDIR/gmnt -- ssh_keys: -- mpi: - exec: $TESTDIR/fsx-mpi 1MB -N 50000 -p 10000 -l 1048576 - workdir: $TESTDIR/gmnt -- pexec: - all: - - rm $TESTDIR/gmnt - - rm $TESTDIR/fsx-mpi diff --git a/suites/fs/multiclient/tasks/ior-shared-file.yaml b/suites/fs/multiclient/tasks/ior-shared-file.yaml deleted file mode 100644 index dcf24247a92..00000000000 --- a/suites/fs/multiclient/tasks/ior-shared-file.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# make sure we get the same MPI version on all hosts -os_type: ubuntu -os_version: "14.04" - -tasks: -- pexec: - clients: - - cd $TESTDIR - - wget http://ceph.com/qa/ior.tbz2 - - tar xvfj ior.tbz2 - - cd ior - - ./configure - - make - - make install DESTDIR=$TESTDIR/binary/ - - cd $TESTDIR/ - - rm ior.tbz2 - - rm -r ior - - ln -s $TESTDIR/mnt.* $TESTDIR/gmnt -- ssh_keys: -- mpi: - exec: $TESTDIR/binary/usr/local/bin/ior -e -w -r -W -b 10m -a POSIX -o $TESTDIR/gmnt/ior.testfile -- pexec: - all: - - rm -f $TESTDIR/gmnt/ior.testfile - - rm -f $TESTDIR/gmnt - - rm -rf $TESTDIR/binary diff --git a/suites/fs/multiclient/tasks/mdtest.yaml b/suites/fs/multiclient/tasks/mdtest.yaml deleted file mode 100644 index 1dd95d954fb..00000000000 --- a/suites/fs/multiclient/tasks/mdtest.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# make sure we get the same MPI version on all hosts -os_type: ubuntu -os_version: "14.04" - -tasks: -- pexec: - clients: - - cd $TESTDIR - - wget http://ceph.com/qa/mdtest-1.9.3.tgz - - mkdir mdtest-1.9.3 - - cd mdtest-1.9.3 - - tar xvfz $TESTDIR/mdtest-1.9.3.tgz - - rm $TESTDIR/mdtest-1.9.3.tgz - - MPI_CC=mpicc make - - ln -s $TESTDIR/mnt.* $TESTDIR/gmnt -- ssh_keys: -- mpi: - exec: $TESTDIR/mdtest-1.9.3/mdtest -d $TESTDIR/gmnt -I 20 -z 5 -b 2 -R -- pexec: - all: - - rm -f $TESTDIR/gmnt - - rm -rf $TESTDIR/mdtest-1.9.3 - - rm -rf $TESTDIR/._mdtest-1.9.3 \ No newline at end of file diff --git a/suites/fs/recovery/% b/suites/fs/recovery/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/fs/recovery/clusters/2-remote-clients.yaml b/suites/fs/recovery/clusters/2-remote-clients.yaml deleted file mode 100644 index d8af6b6ae12..00000000000 --- a/suites/fs/recovery/clusters/2-remote-clients.yaml +++ /dev/null @@ -1,3 +0,0 @@ -roles: -- [mon.a, osd.0, mds.a] -- [client.0, client.1, osd.1, osd.2] diff --git a/suites/fs/recovery/debug/mds_client.yaml b/suites/fs/recovery/debug/mds_client.yaml deleted file mode 100644 index cf5995fdda4..00000000000 --- a/suites/fs/recovery/debug/mds_client.yaml +++ /dev/null @@ -1,9 +0,0 @@ -overrides: - ceph: - conf: - mds: - debug ms: 1 - debug mds: 20 - client: - debug ms: 1 - debug client: 20 diff --git a/suites/fs/recovery/mounts/ceph-fuse.yaml b/suites/fs/recovery/mounts/ceph-fuse.yaml deleted file mode 100644 index 8092598f404..00000000000 --- a/suites/fs/recovery/mounts/ceph-fuse.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- ceph-fuse: - client.0: - mounted: false - client.1: - mounted: false diff --git a/suites/fs/recovery/tasks/client-limits.yaml b/suites/fs/recovery/tasks/client-limits.yaml deleted file mode 100644 index 4cf874d5df8..00000000000 --- a/suites/fs/recovery/tasks/client-limits.yaml +++ /dev/null @@ -1,8 +0,0 @@ - -overrides: - ceph: - log-whitelist: - - responding to mclientcaps\(revoke\) - -tasks: -- mds_client_limits: diff --git a/suites/fs/recovery/tasks/client-recovery.yaml b/suites/fs/recovery/tasks/client-recovery.yaml deleted file mode 100644 index 2cd39510f35..00000000000 --- a/suites/fs/recovery/tasks/client-recovery.yaml +++ /dev/null @@ -1,11 +0,0 @@ - -# The task interferes with the network, so we need -# to permit OSDs to complain about that. -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - slow request - -tasks: -- mds_client_recovery: diff --git a/suites/fs/recovery/tasks/mds-flush.yaml b/suites/fs/recovery/tasks/mds-flush.yaml deleted file mode 100644 index 8e3021e0060..00000000000 --- a/suites/fs/recovery/tasks/mds-flush.yaml +++ /dev/null @@ -1,3 +0,0 @@ - -tasks: -- mds_flush: diff --git a/suites/fs/recovery/tasks/mds-full.yaml b/suites/fs/recovery/tasks/mds-full.yaml deleted file mode 100644 index fe52ae32043..00000000000 --- a/suites/fs/recovery/tasks/mds-full.yaml +++ /dev/null @@ -1,13 +0,0 @@ - -overrides: - ceph: - log-whitelist: - - OSD full dropping all updates - - OSD near full - conf: - osd: - osd objectstore: memstore - memstore device bytes: 100000000 - -tasks: - - mds_full: diff --git a/suites/fs/snaps/% b/suites/fs/snaps/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/fs/snaps/clusters/fixed-3-cephfs.yaml b/suites/fs/snaps/clusters/fixed-3-cephfs.yaml deleted file mode 120000 index a482e650421..00000000000 --- a/suites/fs/snaps/clusters/fixed-3-cephfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/suites/fs/snaps/fs/btrfs.yaml b/suites/fs/snaps/fs/btrfs.yaml deleted file mode 100644 index 4c7af311538..00000000000 --- a/suites/fs/snaps/fs/btrfs.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - ceph: - fs: btrfs - conf: - osd: - osd op thread timeout: 60 diff --git a/suites/fs/snaps/mount/ceph-fuse.yaml b/suites/fs/snaps/mount/ceph-fuse.yaml deleted file mode 100644 index 37ac5b69e61..00000000000 --- a/suites/fs/snaps/mount/ceph-fuse.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- install: -- ceph: -- ceph-fuse: diff --git a/suites/fs/snaps/tasks/snaptests.yaml b/suites/fs/snaps/tasks/snaptests.yaml deleted file mode 100644 index 7f7b0f21569..00000000000 --- a/suites/fs/snaps/tasks/snaptests.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - clients: - all: - - snaps/snaptest-0.sh - - snaps/snaptest-1.sh - - snaps/snaptest-2.sh diff --git a/suites/fs/standbyreplay/% b/suites/fs/standbyreplay/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/fs/standbyreplay/clusters/standby-replay.yaml b/suites/fs/standbyreplay/clusters/standby-replay.yaml deleted file mode 100644 index 5d21765ca18..00000000000 --- a/suites/fs/standbyreplay/clusters/standby-replay.yaml +++ /dev/null @@ -1,11 +0,0 @@ - -overrides: - ceph: - conf: - mds: - mds standby replay: true - -roles: -- [mon.a, mds.a, mds.b-s-0, osd.0, osd.1] -- [mon.b, mds.c-s-0, mds.d-s-0, mon.c, osd.2, osd.3] -- [client.0] diff --git a/suites/fs/standbyreplay/mount/fuse.yaml b/suites/fs/standbyreplay/mount/fuse.yaml deleted file mode 100644 index 5769caaaaac..00000000000 --- a/suites/fs/standbyreplay/mount/fuse.yaml +++ /dev/null @@ -1,5 +0,0 @@ - -tasks: - - install: - - ceph: - - ceph_fuse: diff --git a/suites/fs/standbyreplay/tasks/migration.yaml b/suites/fs/standbyreplay/tasks/migration.yaml deleted file mode 100644 index 09181ac1150..00000000000 --- a/suites/fs/standbyreplay/tasks/migration.yaml +++ /dev/null @@ -1,7 +0,0 @@ - -tasks: - - mds_journal_migration: - - workunit: - clients: - all: [fs/misc/trivial_sync.sh] - diff --git a/suites/fs/thrash/% b/suites/fs/thrash/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/fs/thrash/ceph-thrash/default.yaml b/suites/fs/thrash/ceph-thrash/default.yaml deleted file mode 100644 index aefdf826ce7..00000000000 --- a/suites/fs/thrash/ceph-thrash/default.yaml +++ /dev/null @@ -1,2 +0,0 @@ -tasks: -- mds_thrash: diff --git a/suites/fs/thrash/ceph/base.yaml b/suites/fs/thrash/ceph/base.yaml deleted file mode 100644 index 2030acb9083..00000000000 --- a/suites/fs/thrash/ceph/base.yaml +++ /dev/null @@ -1,3 +0,0 @@ -tasks: -- install: -- ceph: diff --git a/suites/fs/thrash/clusters/mds-1active-1standby.yaml b/suites/fs/thrash/clusters/mds-1active-1standby.yaml deleted file mode 100644 index 7e951b95889..00000000000 --- a/suites/fs/thrash/clusters/mds-1active-1standby.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.c, osd.0, osd.1, osd.2] -- [mon.b, mds.a, osd.3, osd.4, osd.5] -- [client.0, mds.b-s-a] diff --git a/suites/fs/thrash/debug/mds_client.yaml b/suites/fs/thrash/debug/mds_client.yaml deleted file mode 120000 index 335c1cafed7..00000000000 --- a/suites/fs/thrash/debug/mds_client.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/suites/fs/thrash/fs/btrfs.yaml b/suites/fs/thrash/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/fs/thrash/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/fs/thrash/msgr-failures/none.yaml b/suites/fs/thrash/msgr-failures/none.yaml deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml b/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml deleted file mode 100644 index adcebc0baac..00000000000 --- a/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml +++ /dev/null @@ -1,8 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 2500 - mds inject delay type: osd mds - ms inject delay probability: .005 - ms inject delay max: 1 diff --git a/suites/fs/thrash/overrides/whitelist_wrongly_marked_down.yaml b/suites/fs/thrash/overrides/whitelist_wrongly_marked_down.yaml deleted file mode 120000 index 08f746bf894..00000000000 --- a/suites/fs/thrash/overrides/whitelist_wrongly_marked_down.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/suites/fs/thrash/tasks/cfuse_workunit_suites_fsstress.yaml b/suites/fs/thrash/tasks/cfuse_workunit_suites_fsstress.yaml deleted file mode 100644 index 5908d951b2d..00000000000 --- a/suites/fs/thrash/tasks/cfuse_workunit_suites_fsstress.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/fs/thrash/tasks/cfuse_workunit_suites_pjd.yaml b/suites/fs/thrash/tasks/cfuse_workunit_suites_pjd.yaml deleted file mode 100644 index 930bf4a671d..00000000000 --- a/suites/fs/thrash/tasks/cfuse_workunit_suites_pjd.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/pjd.sh diff --git a/suites/fs/thrash/tasks/cfuse_workunit_trivial_sync.yaml b/suites/fs/thrash/tasks/cfuse_workunit_trivial_sync.yaml deleted file mode 100644 index 9509650c76c..00000000000 --- a/suites/fs/thrash/tasks/cfuse_workunit_trivial_sync.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: [fs/misc/trivial_sync.sh] diff --git a/suites/fs/traceless/% b/suites/fs/traceless/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/fs/traceless/clusters/fixed-3-cephfs.yaml b/suites/fs/traceless/clusters/fixed-3-cephfs.yaml deleted file mode 120000 index a482e650421..00000000000 --- a/suites/fs/traceless/clusters/fixed-3-cephfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/suites/fs/traceless/debug/mds_client.yaml b/suites/fs/traceless/debug/mds_client.yaml deleted file mode 120000 index 335c1cafed7..00000000000 --- a/suites/fs/traceless/debug/mds_client.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/suites/fs/traceless/fs/btrfs.yaml b/suites/fs/traceless/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/fs/traceless/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/fs/traceless/overrides/whitelist_wrongly_marked_down.yaml b/suites/fs/traceless/overrides/whitelist_wrongly_marked_down.yaml deleted file mode 120000 index 08f746bf894..00000000000 --- a/suites/fs/traceless/overrides/whitelist_wrongly_marked_down.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/suites/fs/traceless/tasks/cfuse_workunit_suites_blogbench.yaml b/suites/fs/traceless/tasks/cfuse_workunit_suites_blogbench.yaml deleted file mode 100644 index ed9d92d5bda..00000000000 --- a/suites/fs/traceless/tasks/cfuse_workunit_suites_blogbench.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- ceph-fuse: -- workunit: - clients: - all: - - suites/blogbench.sh diff --git a/suites/fs/traceless/tasks/cfuse_workunit_suites_dbench.yaml b/suites/fs/traceless/tasks/cfuse_workunit_suites_dbench.yaml deleted file mode 100644 index e678ed47cc6..00000000000 --- a/suites/fs/traceless/tasks/cfuse_workunit_suites_dbench.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- ceph-fuse: -- workunit: - clients: - all: - - suites/dbench.sh diff --git a/suites/fs/traceless/tasks/cfuse_workunit_suites_ffsb.yaml b/suites/fs/traceless/tasks/cfuse_workunit_suites_ffsb.yaml deleted file mode 100644 index 652a3a62f59..00000000000 --- a/suites/fs/traceless/tasks/cfuse_workunit_suites_ffsb.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- install: -- ceph: - conf: - osd: - filestore flush min: 0 -- ceph-fuse: -- workunit: - clients: - all: - - suites/ffsb.sh diff --git a/suites/fs/traceless/tasks/cfuse_workunit_suites_fsstress.yaml b/suites/fs/traceless/tasks/cfuse_workunit_suites_fsstress.yaml deleted file mode 100644 index b58487c0785..00000000000 --- a/suites/fs/traceless/tasks/cfuse_workunit_suites_fsstress.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- ceph-fuse: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/fs/traceless/traceless/50pc.yaml b/suites/fs/traceless/traceless/50pc.yaml deleted file mode 100644 index e0418bcb2be..00000000000 --- a/suites/fs/traceless/traceless/50pc.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - mds: - mds inject traceless reply probability: .5 diff --git a/suites/fs/verify/% b/suites/fs/verify/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/fs/verify/clusters/fixed-3-cephfs.yaml b/suites/fs/verify/clusters/fixed-3-cephfs.yaml deleted file mode 120000 index a482e650421..00000000000 --- a/suites/fs/verify/clusters/fixed-3-cephfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/suites/fs/verify/debug/mds_client.yaml b/suites/fs/verify/debug/mds_client.yaml deleted file mode 120000 index 335c1cafed7..00000000000 --- a/suites/fs/verify/debug/mds_client.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/suites/fs/verify/fs/btrfs.yaml b/suites/fs/verify/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/fs/verify/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/fs/verify/overrides/whitelist_wrongly_marked_down.yaml b/suites/fs/verify/overrides/whitelist_wrongly_marked_down.yaml deleted file mode 120000 index 08f746bf894..00000000000 --- a/suites/fs/verify/overrides/whitelist_wrongly_marked_down.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/suites/fs/verify/tasks/cfuse_workunit_suites_dbench.yaml b/suites/fs/verify/tasks/cfuse_workunit_suites_dbench.yaml deleted file mode 100644 index 73319776f03..00000000000 --- a/suites/fs/verify/tasks/cfuse_workunit_suites_dbench.yaml +++ /dev/null @@ -1,12 +0,0 @@ -tasks: -- install: -- ceph: - conf: - client: - debug client: 1/20 - debug ms: 0/10 -- ceph-fuse: -- workunit: - clients: - all: - - suites/dbench.sh diff --git a/suites/fs/verify/tasks/cfuse_workunit_suites_fsstress.yaml b/suites/fs/verify/tasks/cfuse_workunit_suites_fsstress.yaml deleted file mode 100644 index b58487c0785..00000000000 --- a/suites/fs/verify/tasks/cfuse_workunit_suites_fsstress.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- ceph-fuse: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/fs/verify/tasks/libcephfs_interface_tests.yaml b/suites/fs/verify/tasks/libcephfs_interface_tests.yaml deleted file mode 100644 index 22d1f142161..00000000000 --- a/suites/fs/verify/tasks/libcephfs_interface_tests.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- ceph-fuse: -- workunit: - clients: - client.0: - - libcephfs/test.sh diff --git a/suites/fs/verify/validater/lockdep.yaml b/suites/fs/verify/validater/lockdep.yaml deleted file mode 100644 index 25f84355c0b..00000000000 --- a/suites/fs/verify/validater/lockdep.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - lockdep: true diff --git a/suites/fs/verify/validater/valgrind.yaml b/suites/fs/verify/validater/valgrind.yaml deleted file mode 100644 index 973f460ad47..00000000000 --- a/suites/fs/verify/validater/valgrind.yaml +++ /dev/null @@ -1,15 +0,0 @@ -overrides: - install: - ceph: - flavor: notcmalloc - ceph: - conf: - global: - osd heartbeat grace: 40 - valgrind: - mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] - osd: [--tool=memcheck] - mds: [--tool=memcheck] - ceph-fuse: - client.0: - valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] diff --git a/suites/hadoop/basic/% b/suites/hadoop/basic/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/hadoop/basic/clusters/fixed-3.yaml b/suites/hadoop/basic/clusters/fixed-3.yaml deleted file mode 100644 index 708d751178c..00000000000 --- a/suites/hadoop/basic/clusters/fixed-3.yaml +++ /dev/null @@ -1,5 +0,0 @@ -roles: -- [mon.0, mds.0, osd.0, hadoop.master.0] -- [mon.1, osd.1, hadoop.slave.0] -- [mon.2, hadoop.slave.1, client.0] - diff --git a/suites/hadoop/basic/tasks/repl.yaml b/suites/hadoop/basic/tasks/repl.yaml deleted file mode 100644 index 60cdcca327e..00000000000 --- a/suites/hadoop/basic/tasks/repl.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- ssh_keys: -- install: -- ceph: -- hadoop: -- workunit: - clients: - client.0: [hadoop/repl.sh] diff --git a/suites/hadoop/basic/tasks/wordcount.yaml b/suites/hadoop/basic/tasks/wordcount.yaml deleted file mode 100644 index b84941b81ed..00000000000 --- a/suites/hadoop/basic/tasks/wordcount.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- ssh_keys: -- install: -- ceph: -- hadoop: -- workunit: - clients: - client.0: [hadoop/wordcount.sh] diff --git a/suites/kcephfs/cephfs/% b/suites/kcephfs/cephfs/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/kcephfs/cephfs/clusters/fixed-3-cephfs.yaml b/suites/kcephfs/cephfs/clusters/fixed-3-cephfs.yaml deleted file mode 120000 index a482e650421..00000000000 --- a/suites/kcephfs/cephfs/clusters/fixed-3-cephfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/suites/kcephfs/cephfs/conf.yaml b/suites/kcephfs/cephfs/conf.yaml deleted file mode 100644 index 30da870b25d..00000000000 --- a/suites/kcephfs/cephfs/conf.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false diff --git a/suites/kcephfs/cephfs/fs/btrfs.yaml b/suites/kcephfs/cephfs/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/kcephfs/cephfs/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/kcephfs/cephfs/inline/no.yaml b/suites/kcephfs/cephfs/inline/no.yaml deleted file mode 100644 index 2030acb9083..00000000000 --- a/suites/kcephfs/cephfs/inline/no.yaml +++ /dev/null @@ -1,3 +0,0 @@ -tasks: -- install: -- ceph: diff --git a/suites/kcephfs/cephfs/inline/yes.yaml b/suites/kcephfs/cephfs/inline/yes.yaml deleted file mode 100644 index 72a285c590f..00000000000 --- a/suites/kcephfs/cephfs/inline/yes.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install: -- ceph: -- exec: - client.0: - - ceph mds set inline_data true --yes-i-really-mean-it diff --git a/suites/kcephfs/cephfs/tasks/kclient_workunit_direct_io.yaml b/suites/kcephfs/cephfs/tasks/kclient_workunit_direct_io.yaml deleted file mode 100644 index cc4b32a441a..00000000000 --- a/suites/kcephfs/cephfs/tasks/kclient_workunit_direct_io.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: - - direct_io - diff --git a/suites/kcephfs/cephfs/tasks/kclient_workunit_kernel_untar_build.yaml b/suites/kcephfs/cephfs/tasks/kclient_workunit_kernel_untar_build.yaml deleted file mode 100644 index 84d15f66d6c..00000000000 --- a/suites/kcephfs/cephfs/tasks/kclient_workunit_kernel_untar_build.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: - - kernel_untar_build.sh diff --git a/suites/kcephfs/cephfs/tasks/kclient_workunit_misc.yaml b/suites/kcephfs/cephfs/tasks/kclient_workunit_misc.yaml deleted file mode 100644 index e3f4fb17672..00000000000 --- a/suites/kcephfs/cephfs/tasks/kclient_workunit_misc.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: - - fs/misc diff --git a/suites/kcephfs/cephfs/tasks/kclient_workunit_o_trunc.yaml b/suites/kcephfs/cephfs/tasks/kclient_workunit_o_trunc.yaml deleted file mode 100644 index 5219fc929ef..00000000000 --- a/suites/kcephfs/cephfs/tasks/kclient_workunit_o_trunc.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: - - fs/test_o_trunc.sh - diff --git a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_dbench.yaml b/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_dbench.yaml deleted file mode 100644 index 8dd810a3765..00000000000 --- a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_dbench.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: - - suites/dbench.sh diff --git a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_ffsb.yaml b/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_ffsb.yaml deleted file mode 100644 index 059ffe1ea4a..00000000000 --- a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_ffsb.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: - - suites/ffsb.sh diff --git a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsstress.yaml b/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsstress.yaml deleted file mode 100644 index bc49fc9086c..00000000000 --- a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsstress.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsx.yaml b/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsx.yaml deleted file mode 100644 index 38d9604fcac..00000000000 --- a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsx.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: - - suites/fsx.sh diff --git a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsync.yaml b/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsync.yaml deleted file mode 100644 index 452641cfc9e..00000000000 --- a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_fsync.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: - - suites/fsync-tester.sh diff --git a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_iozone.yaml b/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_iozone.yaml deleted file mode 100644 index 832e0241b27..00000000000 --- a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_iozone.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: - - suites/iozone.sh diff --git a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_pjd.yaml b/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_pjd.yaml deleted file mode 100644 index 09abaeb6eec..00000000000 --- a/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_pjd.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: - - suites/pjd.sh diff --git a/suites/kcephfs/cephfs/tasks/kclient_workunit_trivial_sync.yaml b/suites/kcephfs/cephfs/tasks/kclient_workunit_trivial_sync.yaml deleted file mode 100644 index d317a392983..00000000000 --- a/suites/kcephfs/cephfs/tasks/kclient_workunit_trivial_sync.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: [fs/misc/trivial_sync.sh] diff --git a/suites/kcephfs/mixed-clients/% b/suites/kcephfs/mixed-clients/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/kcephfs/mixed-clients/clusters/2-clients.yaml b/suites/kcephfs/mixed-clients/clusters/2-clients.yaml deleted file mode 100644 index 067ce1a8278..00000000000 --- a/suites/kcephfs/mixed-clients/clusters/2-clients.yaml +++ /dev/null @@ -1,5 +0,0 @@ -roles: -- [mon.a, mds.a, osd.0, osd.1] -- [mon.b, mon.c, osd.2, osd.3] -- [client.0] -- [client.1] diff --git a/suites/kcephfs/mixed-clients/conf.yaml b/suites/kcephfs/mixed-clients/conf.yaml deleted file mode 100644 index 30da870b25d..00000000000 --- a/suites/kcephfs/mixed-clients/conf.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false diff --git a/suites/kcephfs/mixed-clients/fs/btrfs.yaml b/suites/kcephfs/mixed-clients/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/kcephfs/mixed-clients/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/kcephfs/mixed-clients/tasks/kernel_cfuse_workunits_dbench_iozone.yaml b/suites/kcephfs/mixed-clients/tasks/kernel_cfuse_workunits_dbench_iozone.yaml deleted file mode 100644 index 0121a01c538..00000000000 --- a/suites/kcephfs/mixed-clients/tasks/kernel_cfuse_workunits_dbench_iozone.yaml +++ /dev/null @@ -1,20 +0,0 @@ -tasks: -- install: -- ceph: -- parallel: - - user-workload - - kclient-workload -user-workload: - sequential: - - ceph-fuse: [client.0] - - workunit: - clients: - client.0: - - suites/iozone.sh -kclient-workload: - sequential: - - kclient: [client.1] - - workunit: - clients: - client.1: - - suites/dbench.sh diff --git a/suites/kcephfs/mixed-clients/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml b/suites/kcephfs/mixed-clients/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml deleted file mode 100644 index 7b0ce5b5d58..00000000000 --- a/suites/kcephfs/mixed-clients/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml +++ /dev/null @@ -1,20 +0,0 @@ -tasks: -- install: -- ceph: -- parallel: - - user-workload - - kclient-workload -user-workload: - sequential: - - ceph-fuse: [client.0] - - workunit: - clients: - client.0: - - suites/blogbench.sh -kclient-workload: - sequential: - - kclient: [client.1] - - workunit: - clients: - client.1: - - kernel_untar_build.sh diff --git a/suites/kcephfs/thrash/% b/suites/kcephfs/thrash/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/kcephfs/thrash/clusters/fixed-3-cephfs.yaml b/suites/kcephfs/thrash/clusters/fixed-3-cephfs.yaml deleted file mode 120000 index a482e650421..00000000000 --- a/suites/kcephfs/thrash/clusters/fixed-3-cephfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/suites/kcephfs/thrash/conf.yaml b/suites/kcephfs/thrash/conf.yaml deleted file mode 100644 index 30da870b25d..00000000000 --- a/suites/kcephfs/thrash/conf.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false diff --git a/suites/kcephfs/thrash/fs/btrfs.yaml b/suites/kcephfs/thrash/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/kcephfs/thrash/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/kcephfs/thrash/thrashers/default.yaml b/suites/kcephfs/thrash/thrashers/default.yaml deleted file mode 100644 index 14d772583cf..00000000000 --- a/suites/kcephfs/thrash/thrashers/default.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -- thrashosds: diff --git a/suites/kcephfs/thrash/thrashers/mds.yaml b/suites/kcephfs/thrash/thrashers/mds.yaml deleted file mode 100644 index cab4a01a5fd..00000000000 --- a/suites/kcephfs/thrash/thrashers/mds.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- install: -- ceph: -- mds_thrash: diff --git a/suites/kcephfs/thrash/thrashers/mon.yaml b/suites/kcephfs/thrash/thrashers/mon.yaml deleted file mode 100644 index 90612f21865..00000000000 --- a/suites/kcephfs/thrash/thrashers/mon.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install: -- ceph: -- mon_thrash: - revive_delay: 20 - thrash_delay: 1 diff --git a/suites/kcephfs/thrash/workloads/kclient_workunit_suites_ffsb.yaml b/suites/kcephfs/thrash/workloads/kclient_workunit_suites_ffsb.yaml deleted file mode 100644 index 0c4a1528d08..00000000000 --- a/suites/kcephfs/thrash/workloads/kclient_workunit_suites_ffsb.yaml +++ /dev/null @@ -1,11 +0,0 @@ -overrides: - ceph: - conf: - osd: - filestore flush min: 0 -tasks: -- kclient: -- workunit: - clients: - all: - - suites/ffsb.sh diff --git a/suites/kcephfs/thrash/workloads/kclient_workunit_suites_iozone.yaml b/suites/kcephfs/thrash/workloads/kclient_workunit_suites_iozone.yaml deleted file mode 100644 index 832e0241b27..00000000000 --- a/suites/kcephfs/thrash/workloads/kclient_workunit_suites_iozone.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- kclient: -- workunit: - clients: - all: - - suites/iozone.sh diff --git a/suites/knfs/basic/% b/suites/knfs/basic/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/knfs/basic/ceph/base.yaml b/suites/knfs/basic/ceph/base.yaml deleted file mode 100644 index 7e80c462c37..00000000000 --- a/suites/knfs/basic/ceph/base.yaml +++ /dev/null @@ -1,13 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false - -tasks: -- install: -- ceph: -- kclient: [client.0] -- knfsd: - client.0: - options: [rw,no_root_squash,async] diff --git a/suites/knfs/basic/clusters/extra-client.yaml b/suites/knfs/basic/clusters/extra-client.yaml deleted file mode 120000 index 1582e308945..00000000000 --- a/suites/knfs/basic/clusters/extra-client.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/extra-client.yaml \ No newline at end of file diff --git a/suites/knfs/basic/fs/btrfs.yaml b/suites/knfs/basic/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/knfs/basic/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/knfs/basic/mount/v3.yaml b/suites/knfs/basic/mount/v3.yaml deleted file mode 100644 index 1b61119242b..00000000000 --- a/suites/knfs/basic/mount/v3.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- nfs: - client.1: - server: client.0 - options: [rw,hard,intr,nfsvers=3] diff --git a/suites/knfs/basic/mount/v4.yaml b/suites/knfs/basic/mount/v4.yaml deleted file mode 100644 index 88405666bfb..00000000000 --- a/suites/knfs/basic/mount/v4.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- nfs: - client.1: - server: client.0 - options: [rw,hard,intr,nfsvers=4] diff --git a/suites/knfs/basic/tasks/nfs-workunit-kernel-untar-build.yaml b/suites/knfs/basic/tasks/nfs-workunit-kernel-untar-build.yaml deleted file mode 100644 index b9c0a5e05a3..00000000000 --- a/suites/knfs/basic/tasks/nfs-workunit-kernel-untar-build.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - timeout: 6h - clients: - client.1: - - kernel_untar_build.sh diff --git a/suites/knfs/basic/tasks/nfs_workunit_misc.yaml b/suites/knfs/basic/tasks/nfs_workunit_misc.yaml deleted file mode 100644 index 135c4a74009..00000000000 --- a/suites/knfs/basic/tasks/nfs_workunit_misc.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- workunit: - clients: - client.1: - - fs/misc/chmod.sh - - fs/misc/i_complete_vs_rename.sh - - fs/misc/trivial_sync.sh - #- fs/misc/multiple_rsync.sh - #- fs/misc/xattrs.sh -# Once we can run multiple_rsync.sh and xattrs.sh we can change to this -# - misc diff --git a/suites/knfs/basic/tasks/nfs_workunit_suites_blogbench.yaml b/suites/knfs/basic/tasks/nfs_workunit_suites_blogbench.yaml deleted file mode 100644 index e554a3d9a06..00000000000 --- a/suites/knfs/basic/tasks/nfs_workunit_suites_blogbench.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.1: - - suites/blogbench.sh diff --git a/suites/knfs/basic/tasks/nfs_workunit_suites_dbench.yaml b/suites/knfs/basic/tasks/nfs_workunit_suites_dbench.yaml deleted file mode 100644 index 1da1b768d02..00000000000 --- a/suites/knfs/basic/tasks/nfs_workunit_suites_dbench.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.1: - - suites/dbench-short.sh diff --git a/suites/knfs/basic/tasks/nfs_workunit_suites_ffsb.yaml b/suites/knfs/basic/tasks/nfs_workunit_suites_ffsb.yaml deleted file mode 100644 index 3090f91ea43..00000000000 --- a/suites/knfs/basic/tasks/nfs_workunit_suites_ffsb.yaml +++ /dev/null @@ -1,10 +0,0 @@ -overrides: - ceph: - conf: - osd: - filestore flush min: 0 -tasks: -- workunit: - clients: - client.1: - - suites/ffsb.sh diff --git a/suites/knfs/basic/tasks/nfs_workunit_suites_fsstress.yaml b/suites/knfs/basic/tasks/nfs_workunit_suites_fsstress.yaml deleted file mode 100644 index bbe7b7a4045..00000000000 --- a/suites/knfs/basic/tasks/nfs_workunit_suites_fsstress.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.1: - - suites/fsstress.sh diff --git a/suites/knfs/basic/tasks/nfs_workunit_suites_iozone.yaml b/suites/knfs/basic/tasks/nfs_workunit_suites_iozone.yaml deleted file mode 100644 index 7c3eec2ff3e..00000000000 --- a/suites/knfs/basic/tasks/nfs_workunit_suites_iozone.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.1: - - suites/iozone.sh diff --git a/suites/krbd/rbd-nomount/% b/suites/krbd/rbd-nomount/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/krbd/rbd-nomount/clusters/fixed-3.yaml b/suites/krbd/rbd-nomount/clusters/fixed-3.yaml deleted file mode 120000 index a3ac9fc4dec..00000000000 --- a/suites/krbd/rbd-nomount/clusters/fixed-3.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3.yaml \ No newline at end of file diff --git a/suites/krbd/rbd-nomount/conf.yaml b/suites/krbd/rbd-nomount/conf.yaml deleted file mode 100644 index 30da870b25d..00000000000 --- a/suites/krbd/rbd-nomount/conf.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false diff --git a/suites/krbd/rbd-nomount/fs/btrfs.yaml b/suites/krbd/rbd-nomount/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/krbd/rbd-nomount/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/krbd/rbd-nomount/install/ceph.yaml b/suites/krbd/rbd-nomount/install/ceph.yaml deleted file mode 100644 index 2030acb9083..00000000000 --- a/suites/krbd/rbd-nomount/install/ceph.yaml +++ /dev/null @@ -1,3 +0,0 @@ -tasks: -- install: -- ceph: diff --git a/suites/krbd/rbd-nomount/msgr-failures/few.yaml b/suites/krbd/rbd-nomount/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/krbd/rbd-nomount/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/krbd/rbd-nomount/msgr-failures/many.yaml b/suites/krbd/rbd-nomount/msgr-failures/many.yaml deleted file mode 100644 index 86f8dde8a0e..00000000000 --- a/suites/krbd/rbd-nomount/msgr-failures/many.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 500 diff --git a/suites/krbd/rbd-nomount/tasks/rbd_concurrent.yaml b/suites/krbd/rbd-nomount/tasks/rbd_concurrent.yaml deleted file mode 100644 index 675b98e73a5..00000000000 --- a/suites/krbd/rbd-nomount/tasks/rbd_concurrent.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- workunit: - clients: - all: - - rbd/concurrent.sh -# Options for rbd/concurrent.sh (default values shown) -# env: -# RBD_CONCURRENT_ITER: 100 -# RBD_CONCURRENT_COUNT: 5 -# RBD_CONCURRENT_DELAY: 5 diff --git a/suites/krbd/rbd-nomount/tasks/rbd_huge_tickets.yaml b/suites/krbd/rbd-nomount/tasks/rbd_huge_tickets.yaml deleted file mode 100644 index ea421eec16e..00000000000 --- a/suites/krbd/rbd-nomount/tasks/rbd_huge_tickets.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - rbd/huge-tickets.sh diff --git a/suites/krbd/rbd-nomount/tasks/rbd_image_read.yaml b/suites/krbd/rbd-nomount/tasks/rbd_image_read.yaml deleted file mode 100644 index e5017e118d1..00000000000 --- a/suites/krbd/rbd-nomount/tasks/rbd_image_read.yaml +++ /dev/null @@ -1,15 +0,0 @@ -tasks: -- workunit: - clients: - all: - - rbd/image_read.sh -# Options for rbd/image_read.sh (default values shown) -# env: -# IMAGE_READ_LOCAL_FILES: 'false' -# IMAGE_READ_FORMAT: '2' -# IMAGE_READ_VERBOSE: 'true' -# IMAGE_READ_PAGE_SIZE: '4096' -# IMAGE_READ_OBJECT_ORDER: '22' -# IMAGE_READ_TEST_CLONES: 'true' -# IMAGE_READ_DOUBLE_ORDER: 'true' -# IMAGE_READ_HALF_ORDER: 'false' diff --git a/suites/krbd/rbd-nomount/tasks/rbd_kernel.yaml b/suites/krbd/rbd-nomount/tasks/rbd_kernel.yaml deleted file mode 100644 index aa155827c69..00000000000 --- a/suites/krbd/rbd-nomount/tasks/rbd_kernel.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - rbd/kernel.sh diff --git a/suites/krbd/rbd-nomount/tasks/rbd_kfsx.yaml b/suites/krbd/rbd-nomount/tasks/rbd_kfsx.yaml deleted file mode 100644 index 0f4b24aa64a..00000000000 --- a/suites/krbd/rbd-nomount/tasks/rbd_kfsx.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- rbd_fsx: - clients: [client.0] - ops: 10000 - krbd: true - readbdy: 512 - writebdy: 512 - truncbdy: 512 - holebdy: 512 - punch_holes: true - randomized_striping: false diff --git a/suites/krbd/rbd-nomount/tasks/rbd_map_snapshot_io.yaml b/suites/krbd/rbd-nomount/tasks/rbd_map_snapshot_io.yaml deleted file mode 100644 index c1529398b9e..00000000000 --- a/suites/krbd/rbd-nomount/tasks/rbd_map_snapshot_io.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - rbd/map-snapshot-io.sh diff --git a/suites/krbd/rbd-nomount/tasks/rbd_map_unmap.yaml b/suites/krbd/rbd-nomount/tasks/rbd_map_unmap.yaml deleted file mode 100644 index c2160997c81..00000000000 --- a/suites/krbd/rbd-nomount/tasks/rbd_map_unmap.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - rbd/map-unmap.sh diff --git a/suites/krbd/rbd-nomount/tasks/rbd_simple_big.yaml b/suites/krbd/rbd-nomount/tasks/rbd_simple_big.yaml deleted file mode 100644 index c493cfaf420..00000000000 --- a/suites/krbd/rbd-nomount/tasks/rbd_simple_big.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - clients: - all: - - rbd/simple_big.sh - diff --git a/suites/krbd/rbd/% b/suites/krbd/rbd/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/krbd/rbd/clusters/fixed-3.yaml b/suites/krbd/rbd/clusters/fixed-3.yaml deleted file mode 120000 index a3ac9fc4dec..00000000000 --- a/suites/krbd/rbd/clusters/fixed-3.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3.yaml \ No newline at end of file diff --git a/suites/krbd/rbd/conf.yaml b/suites/krbd/rbd/conf.yaml deleted file mode 100644 index 30da870b25d..00000000000 --- a/suites/krbd/rbd/conf.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false diff --git a/suites/krbd/rbd/fs/btrfs.yaml b/suites/krbd/rbd/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/krbd/rbd/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/krbd/rbd/msgr-failures/few.yaml b/suites/krbd/rbd/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/krbd/rbd/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/krbd/rbd/msgr-failures/many.yaml b/suites/krbd/rbd/msgr-failures/many.yaml deleted file mode 100644 index 86f8dde8a0e..00000000000 --- a/suites/krbd/rbd/msgr-failures/many.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 500 diff --git a/suites/krbd/rbd/tasks/rbd_workunit_kernel_untar_build.yaml b/suites/krbd/rbd/tasks/rbd_workunit_kernel_untar_build.yaml deleted file mode 100644 index ef2a35dcc1d..00000000000 --- a/suites/krbd/rbd/tasks/rbd_workunit_kernel_untar_build.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- rbd: - all: -- workunit: - clients: - all: - - kernel_untar_build.sh diff --git a/suites/krbd/rbd/tasks/rbd_workunit_suites_dbench.yaml b/suites/krbd/rbd/tasks/rbd_workunit_suites_dbench.yaml deleted file mode 100644 index d779eea23ca..00000000000 --- a/suites/krbd/rbd/tasks/rbd_workunit_suites_dbench.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- rbd: - all: -- workunit: - clients: - all: - - suites/dbench.sh diff --git a/suites/krbd/rbd/tasks/rbd_workunit_suites_ffsb.yaml b/suites/krbd/rbd/tasks/rbd_workunit_suites_ffsb.yaml deleted file mode 100644 index 5204bb87ffe..00000000000 --- a/suites/krbd/rbd/tasks/rbd_workunit_suites_ffsb.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: -- ceph: -- rbd: - all: - image_size: 20480 -- workunit: - clients: - all: - - suites/ffsb.sh diff --git a/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress.yaml b/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress.yaml deleted file mode 100644 index f9d62fefcac..00000000000 --- a/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- rbd: - all: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_btrfs.yaml b/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_btrfs.yaml deleted file mode 100644 index f3930a8986a..00000000000 --- a/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_btrfs.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: -- ceph: -- rbd: - all: - fs_type: btrfs -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_ext4.yaml b/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_ext4.yaml deleted file mode 100644 index f765b74a6c7..00000000000 --- a/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_ext4.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: -- ceph: -- rbd: - all: - fs_type: ext4 -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/krbd/rbd/tasks/rbd_workunit_suites_fsx.yaml b/suites/krbd/rbd/tasks/rbd_workunit_suites_fsx.yaml deleted file mode 100644 index 98c0849c57e..00000000000 --- a/suites/krbd/rbd/tasks/rbd_workunit_suites_fsx.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- rbd: - all: -- workunit: - clients: - all: - - suites/fsx.sh diff --git a/suites/krbd/rbd/tasks/rbd_workunit_suites_iozone.yaml b/suites/krbd/rbd/tasks/rbd_workunit_suites_iozone.yaml deleted file mode 100644 index eb8f18d60de..00000000000 --- a/suites/krbd/rbd/tasks/rbd_workunit_suites_iozone.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: -- ceph: -- rbd: - all: - image_size: 20480 -- workunit: - clients: - all: - - suites/iozone.sh diff --git a/suites/krbd/rbd/tasks/rbd_workunit_trivial_sync.yaml b/suites/krbd/rbd/tasks/rbd_workunit_trivial_sync.yaml deleted file mode 100644 index 7c2796b2a88..00000000000 --- a/suites/krbd/rbd/tasks/rbd_workunit_trivial_sync.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- rbd: - all: -- workunit: - clients: - all: [fs/misc/trivial_sync.sh] diff --git a/suites/krbd/singleton/% b/suites/krbd/singleton/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/krbd/singleton/conf.yaml b/suites/krbd/singleton/conf.yaml deleted file mode 100644 index 30da870b25d..00000000000 --- a/suites/krbd/singleton/conf.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false diff --git a/suites/krbd/singleton/fs/btrfs.yaml b/suites/krbd/singleton/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/krbd/singleton/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/krbd/singleton/msgr-failures/few.yaml b/suites/krbd/singleton/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/krbd/singleton/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/krbd/singleton/msgr-failures/many.yaml b/suites/krbd/singleton/msgr-failures/many.yaml deleted file mode 100644 index 86f8dde8a0e..00000000000 --- a/suites/krbd/singleton/msgr-failures/many.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 500 diff --git a/suites/krbd/singleton/tasks/rbd_xfstests.yaml b/suites/krbd/singleton/tasks/rbd_xfstests.yaml deleted file mode 100644 index 47a8199c190..00000000000 --- a/suites/krbd/singleton/tasks/rbd_xfstests.yaml +++ /dev/null @@ -1,19 +0,0 @@ -roles: -- [mon.a, mon.c, osd.0, osd.1, osd.2] -- [mon.b, mds.a, osd.3, osd.4, osd.5] -- [client.0] -- [client.1] -tasks: -- install: -- ceph: -- rbd.xfstests: - client.0: - test_image: 'test_image-0' - scratch_image: 'scratch_image-0' - tests: '-g auto' - randomize: true - client.1: - test_image: 'test_image-1' - scratch_image: 'scratch_image-1' - tests: '-g auto' - randomize: true diff --git a/suites/krbd/thrash/% b/suites/krbd/thrash/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/krbd/thrash/clusters/fixed-3.yaml b/suites/krbd/thrash/clusters/fixed-3.yaml deleted file mode 120000 index a3ac9fc4dec..00000000000 --- a/suites/krbd/thrash/clusters/fixed-3.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3.yaml \ No newline at end of file diff --git a/suites/krbd/thrash/conf.yaml b/suites/krbd/thrash/conf.yaml deleted file mode 100644 index 30da870b25d..00000000000 --- a/suites/krbd/thrash/conf.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false diff --git a/suites/krbd/thrash/fs/btrfs.yaml b/suites/krbd/thrash/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/krbd/thrash/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/krbd/thrash/thrashers/default.yaml b/suites/krbd/thrash/thrashers/default.yaml deleted file mode 100644 index 14d772583cf..00000000000 --- a/suites/krbd/thrash/thrashers/default.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -- thrashosds: diff --git a/suites/krbd/thrash/thrashers/mon-thrasher.yaml b/suites/krbd/thrash/thrashers/mon-thrasher.yaml deleted file mode 100644 index 90612f21865..00000000000 --- a/suites/krbd/thrash/thrashers/mon-thrasher.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install: -- ceph: -- mon_thrash: - revive_delay: 20 - thrash_delay: 1 diff --git a/suites/krbd/thrash/workloads/rbd_workunit_suites_ffsb.yaml b/suites/krbd/thrash/workloads/rbd_workunit_suites_ffsb.yaml deleted file mode 100644 index 4ae7d690905..00000000000 --- a/suites/krbd/thrash/workloads/rbd_workunit_suites_ffsb.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- rbd: - all: - image_size: 20480 -- workunit: - clients: - all: - - suites/ffsb.sh diff --git a/suites/krbd/thrash/workloads/rbd_workunit_suites_iozone.yaml.disabled b/suites/krbd/thrash/workloads/rbd_workunit_suites_iozone.yaml.disabled deleted file mode 100644 index d61ede1bd66..00000000000 --- a/suites/krbd/thrash/workloads/rbd_workunit_suites_iozone.yaml.disabled +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- rbd: - all: - image_size: 20480 -- workunit: - clients: - all: - - suites/iozone.sh diff --git a/suites/marginal/basic/% b/suites/marginal/basic/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/marginal/basic/clusters/fixed-3.yaml b/suites/marginal/basic/clusters/fixed-3.yaml deleted file mode 100644 index 0038432afa7..00000000000 --- a/suites/marginal/basic/clusters/fixed-3.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.c, osd.0, osd.1, osd.2] -- [mon.b, mds.a, osd.3, osd.4, osd.5] -- [client.0] diff --git a/suites/marginal/basic/fs/btrfs.yaml b/suites/marginal/basic/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/marginal/basic/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/marginal/basic/tasks/kclient_workunit_suites_blogbench.yaml b/suites/marginal/basic/tasks/kclient_workunit_suites_blogbench.yaml deleted file mode 100644 index 4f25d806313..00000000000 --- a/suites/marginal/basic/tasks/kclient_workunit_suites_blogbench.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- kclient: -- workunit: - clients: - all: - - suites/blogbench.sh diff --git a/suites/marginal/basic/tasks/kclient_workunit_suites_fsx.yaml b/suites/marginal/basic/tasks/kclient_workunit_suites_fsx.yaml deleted file mode 100644 index a0d2e765bdb..00000000000 --- a/suites/marginal/basic/tasks/kclient_workunit_suites_fsx.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- kclient: -- workunit: - clients: - all: - - suites/fsx.sh diff --git a/suites/marginal/fs-misc/% b/suites/marginal/fs-misc/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/marginal/fs-misc/clusters/two_clients.yaml b/suites/marginal/fs-misc/clusters/two_clients.yaml deleted file mode 100644 index 2258befd8bf..00000000000 --- a/suites/marginal/fs-misc/clusters/two_clients.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2] -- [client.1] -- [client.0] diff --git a/suites/marginal/fs-misc/fs/btrfs.yaml b/suites/marginal/fs-misc/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/marginal/fs-misc/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/marginal/fs-misc/tasks/locktest.yaml b/suites/marginal/fs-misc/tasks/locktest.yaml deleted file mode 100644 index 444bb1f19b3..00000000000 --- a/suites/marginal/fs-misc/tasks/locktest.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- install: -- ceph: -- kclient: -- locktest: [client.0, client.1] diff --git a/suites/marginal/mds_restart/% b/suites/marginal/mds_restart/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/marginal/mds_restart/clusters/one_mds.yaml b/suites/marginal/mds_restart/clusters/one_mds.yaml deleted file mode 100644 index 9e11c02a36c..00000000000 --- a/suites/marginal/mds_restart/clusters/one_mds.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.b, mon.c, osd.0, osd.1, osd.2] -- [mds.a] -- [client.0] diff --git a/suites/marginal/mds_restart/tasks/restart-workunit-backtraces.yaml b/suites/marginal/mds_restart/tasks/restart-workunit-backtraces.yaml deleted file mode 100644 index d086d4cf8d3..00000000000 --- a/suites/marginal/mds_restart/tasks/restart-workunit-backtraces.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- install: -- ceph: - conf: - mds: - mds log segment size: 16384 - mds log max segments: 1 -- restart: - exec: - client.0: - - test-backtraces.py diff --git a/suites/marginal/multimds/% b/suites/marginal/multimds/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/marginal/multimds/clusters/3-node-3-mds.yaml b/suites/marginal/multimds/clusters/3-node-3-mds.yaml deleted file mode 100644 index 088d9f0d31d..00000000000 --- a/suites/marginal/multimds/clusters/3-node-3-mds.yaml +++ /dev/null @@ -1,5 +0,0 @@ -roles: -- [mon.a, mon.c, mds.a, osd.0, osd.1, osd.2] -- [mon.b, mds.b, mds.c, osd.3, osd.4, osd.5] -- [client.0] -- [client.1] diff --git a/suites/marginal/multimds/clusters/3-node-9-mds.yaml b/suites/marginal/multimds/clusters/3-node-9-mds.yaml deleted file mode 100644 index be824f0f554..00000000000 --- a/suites/marginal/multimds/clusters/3-node-9-mds.yaml +++ /dev/null @@ -1,5 +0,0 @@ -roles: -- [mon.a, mon.c, mds.a, mds.b, mds.c, mds.d, osd.0, osd.1, osd.2] -- [mon.b, mds.e, mds.f, mds.g, mds.h, mds.i, osd.3, osd.4, osd.5] -- [client.0] -- [client.1] diff --git a/suites/marginal/multimds/fs/btrfs.yaml b/suites/marginal/multimds/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/marginal/multimds/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/marginal/multimds/mounts/ceph-fuse.yaml b/suites/marginal/multimds/mounts/ceph-fuse.yaml deleted file mode 100644 index 55d8beb00e9..00000000000 --- a/suites/marginal/multimds/mounts/ceph-fuse.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install: -- ceph: - conf: - client: - fuse_default_permissions: 0 -- ceph-fuse: diff --git a/suites/marginal/multimds/mounts/kclient.yaml b/suites/marginal/multimds/mounts/kclient.yaml deleted file mode 100644 index c18db8f5ea6..00000000000 --- a/suites/marginal/multimds/mounts/kclient.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- install: -- ceph: -- kclient: diff --git a/suites/marginal/multimds/tasks/workunit_misc.yaml b/suites/marginal/multimds/tasks/workunit_misc.yaml deleted file mode 100644 index aa62b9e8c3a..00000000000 --- a/suites/marginal/multimds/tasks/workunit_misc.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - fs/misc diff --git a/suites/marginal/multimds/tasks/workunit_suites_blogbench.yaml b/suites/marginal/multimds/tasks/workunit_suites_blogbench.yaml deleted file mode 100644 index 4c1fcc11ed9..00000000000 --- a/suites/marginal/multimds/tasks/workunit_suites_blogbench.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - suites/blogbench.sh diff --git a/suites/marginal/multimds/tasks/workunit_suites_dbench.yaml b/suites/marginal/multimds/tasks/workunit_suites_dbench.yaml deleted file mode 100644 index 41b2bc8edaa..00000000000 --- a/suites/marginal/multimds/tasks/workunit_suites_dbench.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - suites/dbench.sh diff --git a/suites/marginal/multimds/tasks/workunit_suites_fsstress.yaml b/suites/marginal/multimds/tasks/workunit_suites_fsstress.yaml deleted file mode 100644 index ddb18fb791a..00000000000 --- a/suites/marginal/multimds/tasks/workunit_suites_fsstress.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/marginal/multimds/tasks/workunit_suites_fsync.yaml b/suites/marginal/multimds/tasks/workunit_suites_fsync.yaml deleted file mode 100644 index 7efa1adb82d..00000000000 --- a/suites/marginal/multimds/tasks/workunit_suites_fsync.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - suites/fsync-tester.sh diff --git a/suites/marginal/multimds/tasks/workunit_suites_pjd.yaml b/suites/marginal/multimds/tasks/workunit_suites_pjd.yaml deleted file mode 100644 index dfb3abe23a7..00000000000 --- a/suites/marginal/multimds/tasks/workunit_suites_pjd.yaml +++ /dev/null @@ -1,10 +0,0 @@ -overrides: - ceph: - conf: - client: - fuse_default_permissions: 1 -tasks: -- workunit: - clients: - all: - - suites/pjd.sh diff --git a/suites/marginal/multimds/tasks/workunit_suites_truncate_delay.yaml b/suites/marginal/multimds/tasks/workunit_suites_truncate_delay.yaml deleted file mode 100644 index 3aa5f8825ac..00000000000 --- a/suites/marginal/multimds/tasks/workunit_suites_truncate_delay.yaml +++ /dev/null @@ -1,15 +0,0 @@ -tasks: -- install: -- ceph: - conf: - client: - ms_inject_delay_probability: 1 - ms_inject_delay_type: osd - ms_inject_delay_max: 5 - client_oc_max_dirty_age: 1 -- ceph-fuse: -- exec: - client.0: - - dd if=/dev/zero of=./foo count=100 - - sleep 2 - - truncate --size 0 ./foo diff --git a/suites/marginal/multimds/thrash/exports.yaml b/suites/marginal/multimds/thrash/exports.yaml deleted file mode 100644 index 240b46dfd8a..00000000000 --- a/suites/marginal/multimds/thrash/exports.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - mds: - mds thrash exports: 1 diff --git a/suites/marginal/multimds/thrash/normal.yaml b/suites/marginal/multimds/thrash/normal.yaml deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/mixed-clients/basic/clusters/fixed-3.yaml b/suites/mixed-clients/basic/clusters/fixed-3.yaml deleted file mode 100644 index e1d3c7b7932..00000000000 --- a/suites/mixed-clients/basic/clusters/fixed-3.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mds.a, osd.0, osd.1] -- [mon.b, mon.c, osd.2, osd.3, client.0] -- [client.1] diff --git a/suites/mixed-clients/basic/fs/btrfs.yaml b/suites/mixed-clients/basic/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/mixed-clients/basic/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_dbench_iozone.yaml b/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_dbench_iozone.yaml deleted file mode 100644 index bb347be7fd7..00000000000 --- a/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_dbench_iozone.yaml +++ /dev/null @@ -1,26 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false -tasks: -- install: - branch: dumpling -- ceph: -- parallel: - - user-workload - - kclient-workload -user-workload: - sequential: - - ceph-fuse: [client.0] - - workunit: - clients: - client.0: - - suites/iozone.sh -kclient-workload: - sequential: - - kclient: [client.1] - - workunit: - clients: - client.1: - - suites/dbench.sh diff --git a/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml b/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml deleted file mode 100644 index 2c32a61e864..00000000000 --- a/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml +++ /dev/null @@ -1,26 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false -tasks: -- install: - branch: dumpling -- ceph: -- parallel: - - user-workload - - kclient-workload -user-workload: - sequential: - - ceph-fuse: [client.0] - - workunit: - clients: - client.0: - - suites/blogbench.sh -kclient-workload: - sequential: - - kclient: [client.1] - - workunit: - clients: - client.1: - - kernel_untar_build.sh diff --git a/suites/multimds/basic/% b/suites/multimds/basic/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/multimds/basic/ceph/base.yaml b/suites/multimds/basic/ceph/base.yaml deleted file mode 100644 index 50b60b5152b..00000000000 --- a/suites/multimds/basic/ceph/base.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install: -- ceph: - conf: - client: - fuse_default_permissions: 0 diff --git a/suites/multimds/basic/clusters/3-mds.yaml b/suites/multimds/basic/clusters/3-mds.yaml deleted file mode 100644 index c655b90c81c..00000000000 --- a/suites/multimds/basic/clusters/3-mds.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.c, mds.a, osd.0, osd.1, osd.2] -- [mon.b, mds.b, mds.c, osd.3, osd.4, osd.5] -- [client.0] diff --git a/suites/multimds/basic/clusters/9-mds.yaml b/suites/multimds/basic/clusters/9-mds.yaml deleted file mode 100644 index ed554c9fe3c..00000000000 --- a/suites/multimds/basic/clusters/9-mds.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.c, mds.a, mds.b, mds.c, mds.d, osd.0, osd.1, osd.2] -- [mon.b, mds.e, mds.f, mds.g, mds.h, mds.i, osd.3, osd.4, osd.5] -- [client.0] diff --git a/suites/multimds/basic/debug/mds_client.yaml b/suites/multimds/basic/debug/mds_client.yaml deleted file mode 120000 index 335c1cafed7..00000000000 --- a/suites/multimds/basic/debug/mds_client.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/suites/multimds/basic/fs/btrfs.yaml b/suites/multimds/basic/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/multimds/basic/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/multimds/basic/inline/no.yaml b/suites/multimds/basic/inline/no.yaml deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/multimds/basic/inline/yes.yaml b/suites/multimds/basic/inline/yes.yaml deleted file mode 100644 index 4b2c1d9cf49..00000000000 --- a/suites/multimds/basic/inline/yes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- exec: - client.0: - - ceph mds set inline_data true --yes-i-really-mean-it diff --git a/suites/multimds/basic/mount/cfuse.yaml b/suites/multimds/basic/mount/cfuse.yaml deleted file mode 100644 index e3c34a1f604..00000000000 --- a/suites/multimds/basic/mount/cfuse.yaml +++ /dev/null @@ -1,2 +0,0 @@ -tasks: -- ceph-fuse: diff --git a/suites/multimds/basic/mount/kclient.yaml b/suites/multimds/basic/mount/kclient.yaml deleted file mode 100644 index f00f16aea22..00000000000 --- a/suites/multimds/basic/mount/kclient.yaml +++ /dev/null @@ -1,7 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false -tasks: -- kclient: diff --git a/suites/multimds/basic/overrides/whitelist_wrongly_marked_down.yaml b/suites/multimds/basic/overrides/whitelist_wrongly_marked_down.yaml deleted file mode 120000 index 08f746bf894..00000000000 --- a/suites/multimds/basic/overrides/whitelist_wrongly_marked_down.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/suites/multimds/basic/tasks/kernel_untar_build.yaml b/suites/multimds/basic/tasks/kernel_untar_build.yaml deleted file mode 100644 index 8dbc24a9feb..00000000000 --- a/suites/multimds/basic/tasks/kernel_untar_build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -overrides: - ceph: - conf: - client: - fuse_default_permissions: 0 -tasks: -- workunit: - clients: - all: - - kernel_untar_build.sh diff --git a/suites/multimds/basic/tasks/misc.yaml b/suites/multimds/basic/tasks/misc.yaml deleted file mode 100644 index 6c8327bb0d7..00000000000 --- a/suites/multimds/basic/tasks/misc.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - timeout: 5h - clients: - all: - - fs/misc diff --git a/suites/multimds/basic/tasks/misc_test_o_trunc.yaml b/suites/multimds/basic/tasks/misc_test_o_trunc.yaml deleted file mode 100644 index c9de5c38637..00000000000 --- a/suites/multimds/basic/tasks/misc_test_o_trunc.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - fs/test_o_trunc.sh diff --git a/suites/multimds/basic/tasks/suites_blogbench.yaml b/suites/multimds/basic/tasks/suites_blogbench.yaml deleted file mode 100644 index 4c1fcc11ed9..00000000000 --- a/suites/multimds/basic/tasks/suites_blogbench.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - suites/blogbench.sh diff --git a/suites/multimds/basic/tasks/suites_dbench.yaml b/suites/multimds/basic/tasks/suites_dbench.yaml deleted file mode 100644 index 41b2bc8edaa..00000000000 --- a/suites/multimds/basic/tasks/suites_dbench.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - suites/dbench.sh diff --git a/suites/multimds/basic/tasks/suites_ffsb.yaml b/suites/multimds/basic/tasks/suites_ffsb.yaml deleted file mode 100644 index 4a2a627fe5d..00000000000 --- a/suites/multimds/basic/tasks/suites_ffsb.yaml +++ /dev/null @@ -1,10 +0,0 @@ -overrides: - ceph: - conf: - osd: - filestore flush min: 0 -tasks: -- workunit: - clients: - all: - - suites/ffsb.sh diff --git a/suites/multimds/basic/tasks/suites_fsstress.yaml b/suites/multimds/basic/tasks/suites_fsstress.yaml deleted file mode 100644 index ddb18fb791a..00000000000 --- a/suites/multimds/basic/tasks/suites_fsstress.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/multimds/basic/tasks/suites_fsx.yaml b/suites/multimds/basic/tasks/suites_fsx.yaml deleted file mode 100644 index 8b2b1ab5c14..00000000000 --- a/suites/multimds/basic/tasks/suites_fsx.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - suites/fsx.sh diff --git a/suites/multimds/basic/tasks/suites_fsync.yaml b/suites/multimds/basic/tasks/suites_fsync.yaml deleted file mode 100644 index 7efa1adb82d..00000000000 --- a/suites/multimds/basic/tasks/suites_fsync.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - suites/fsync-tester.sh diff --git a/suites/multimds/basic/tasks/suites_iogen.yaml b/suites/multimds/basic/tasks/suites_iogen.yaml deleted file mode 100644 index d45d4ea3c3f..00000000000 --- a/suites/multimds/basic/tasks/suites_iogen.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - suites/iogen.sh diff --git a/suites/multimds/basic/tasks/suites_iozone.yaml b/suites/multimds/basic/tasks/suites_iozone.yaml deleted file mode 100644 index 9270f3c51e2..00000000000 --- a/suites/multimds/basic/tasks/suites_iozone.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - suites/iozone.sh diff --git a/suites/multimds/basic/tasks/suites_pjd.yaml b/suites/multimds/basic/tasks/suites_pjd.yaml deleted file mode 100644 index de21f7c3464..00000000000 --- a/suites/multimds/basic/tasks/suites_pjd.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - conf: - client: - debug ms: 1 - debug client: 20 - mds: - debug ms: 1 - debug mds: 20 -tasks: -- workunit: - clients: - all: - - suites/pjd.sh diff --git a/suites/multimds/basic/tasks/suites_truncate_delay.yaml b/suites/multimds/basic/tasks/suites_truncate_delay.yaml deleted file mode 100644 index ac5c9b13901..00000000000 --- a/suites/multimds/basic/tasks/suites_truncate_delay.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - conf: - client: - ms_inject_delay_probability: 1 - ms_inject_delay_type: osd - ms_inject_delay_max: 5 - client_oc_max_dirty_age: 1 -tasks: -- exec: - client.0: - - dd if=/dev/zero of=./foo count=100 - - sleep 2 - - truncate --size 0 ./foo diff --git a/suites/multimds/basic/tasks/trivial_sync.yaml b/suites/multimds/basic/tasks/trivial_sync.yaml deleted file mode 100644 index 36e7411b638..00000000000 --- a/suites/multimds/basic/tasks/trivial_sync.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- workunit: - clients: - all: [fs/misc/trivial_sync.sh] diff --git a/suites/multimds/libcephfs/% b/suites/multimds/libcephfs/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/multimds/libcephfs/ceph/base.yaml b/suites/multimds/libcephfs/ceph/base.yaml deleted file mode 100644 index 50b60b5152b..00000000000 --- a/suites/multimds/libcephfs/ceph/base.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install: -- ceph: - conf: - client: - fuse_default_permissions: 0 diff --git a/suites/multimds/libcephfs/clusters/3-mds.yaml b/suites/multimds/libcephfs/clusters/3-mds.yaml deleted file mode 100644 index c655b90c81c..00000000000 --- a/suites/multimds/libcephfs/clusters/3-mds.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.c, mds.a, osd.0, osd.1, osd.2] -- [mon.b, mds.b, mds.c, osd.3, osd.4, osd.5] -- [client.0] diff --git a/suites/multimds/libcephfs/clusters/9-mds.yaml b/suites/multimds/libcephfs/clusters/9-mds.yaml deleted file mode 100644 index ed554c9fe3c..00000000000 --- a/suites/multimds/libcephfs/clusters/9-mds.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.c, mds.a, mds.b, mds.c, mds.d, osd.0, osd.1, osd.2] -- [mon.b, mds.e, mds.f, mds.g, mds.h, mds.i, osd.3, osd.4, osd.5] -- [client.0] diff --git a/suites/multimds/libcephfs/debug/mds_client.yaml b/suites/multimds/libcephfs/debug/mds_client.yaml deleted file mode 120000 index 335c1cafed7..00000000000 --- a/suites/multimds/libcephfs/debug/mds_client.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/suites/multimds/libcephfs/fs/btrfs.yaml b/suites/multimds/libcephfs/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/multimds/libcephfs/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/multimds/libcephfs/inline/no.yaml b/suites/multimds/libcephfs/inline/no.yaml deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/multimds/libcephfs/inline/yes.yaml b/suites/multimds/libcephfs/inline/yes.yaml deleted file mode 100644 index 4b2c1d9cf49..00000000000 --- a/suites/multimds/libcephfs/inline/yes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- exec: - client.0: - - ceph mds set inline_data true --yes-i-really-mean-it diff --git a/suites/multimds/libcephfs/overrides/whitelist_wrongly_marked_down.yaml b/suites/multimds/libcephfs/overrides/whitelist_wrongly_marked_down.yaml deleted file mode 120000 index 08f746bf894..00000000000 --- a/suites/multimds/libcephfs/overrides/whitelist_wrongly_marked_down.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/suites/multimds/libcephfs/tasks/libcephfs_interface_tests.yaml b/suites/multimds/libcephfs/tasks/libcephfs_interface_tests.yaml deleted file mode 100644 index 0b1d41fea5c..00000000000 --- a/suites/multimds/libcephfs/tasks/libcephfs_interface_tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - client.0: - - libcephfs/test.sh diff --git a/suites/multimds/libcephfs/tasks/libcephfs_java.yaml b/suites/multimds/libcephfs/tasks/libcephfs_java.yaml deleted file mode 100644 index 4330d50965e..00000000000 --- a/suites/multimds/libcephfs/tasks/libcephfs_java.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - client.0: - - libcephfs-java/test.sh diff --git a/suites/multimds/libcephfs/tasks/mds_creation_retry.yaml b/suites/multimds/libcephfs/tasks/mds_creation_retry.yaml deleted file mode 100644 index cd87f28ad08..00000000000 --- a/suites/multimds/libcephfs/tasks/mds_creation_retry.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: --mds_creation_failure: --ceph-fuse: -- workunit: - clients: - all: [fs/misc/trivial_sync.sh] diff --git a/suites/multimds/verify/% b/suites/multimds/verify/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/multimds/verify/ceph/base.yaml b/suites/multimds/verify/ceph/base.yaml deleted file mode 100644 index 50b60b5152b..00000000000 --- a/suites/multimds/verify/ceph/base.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install: -- ceph: - conf: - client: - fuse_default_permissions: 0 diff --git a/suites/multimds/verify/clusters/3-mds.yaml b/suites/multimds/verify/clusters/3-mds.yaml deleted file mode 100644 index c655b90c81c..00000000000 --- a/suites/multimds/verify/clusters/3-mds.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.c, mds.a, osd.0, osd.1, osd.2] -- [mon.b, mds.b, mds.c, osd.3, osd.4, osd.5] -- [client.0] diff --git a/suites/multimds/verify/clusters/9-mds.yaml b/suites/multimds/verify/clusters/9-mds.yaml deleted file mode 100644 index ed554c9fe3c..00000000000 --- a/suites/multimds/verify/clusters/9-mds.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.c, mds.a, mds.b, mds.c, mds.d, osd.0, osd.1, osd.2] -- [mon.b, mds.e, mds.f, mds.g, mds.h, mds.i, osd.3, osd.4, osd.5] -- [client.0] diff --git a/suites/multimds/verify/debug/mds_client.yaml b/suites/multimds/verify/debug/mds_client.yaml deleted file mode 120000 index 335c1cafed7..00000000000 --- a/suites/multimds/verify/debug/mds_client.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../debug/mds_client.yaml \ No newline at end of file diff --git a/suites/multimds/verify/fs/btrfs.yaml b/suites/multimds/verify/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/multimds/verify/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/multimds/verify/overrides/whitelist_wrongly_marked_down.yaml b/suites/multimds/verify/overrides/whitelist_wrongly_marked_down.yaml deleted file mode 120000 index 08f746bf894..00000000000 --- a/suites/multimds/verify/overrides/whitelist_wrongly_marked_down.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/suites/multimds/verify/tasks/cfuse_workunit_suites_dbench.yaml b/suites/multimds/verify/tasks/cfuse_workunit_suites_dbench.yaml deleted file mode 100644 index ad96b4c5e7f..00000000000 --- a/suites/multimds/verify/tasks/cfuse_workunit_suites_dbench.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/dbench.sh diff --git a/suites/multimds/verify/tasks/cfuse_workunit_suites_fsstress.yaml b/suites/multimds/verify/tasks/cfuse_workunit_suites_fsstress.yaml deleted file mode 100644 index 5908d951b2d..00000000000 --- a/suites/multimds/verify/tasks/cfuse_workunit_suites_fsstress.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/multimds/verify/tasks/libcephfs_interface_tests.yaml b/suites/multimds/verify/tasks/libcephfs_interface_tests.yaml deleted file mode 100644 index 0b1d41fea5c..00000000000 --- a/suites/multimds/verify/tasks/libcephfs_interface_tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - client.0: - - libcephfs/test.sh diff --git a/suites/multimds/verify/validater/lockdep.yaml b/suites/multimds/verify/validater/lockdep.yaml deleted file mode 100644 index 25f84355c0b..00000000000 --- a/suites/multimds/verify/validater/lockdep.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - lockdep: true diff --git a/suites/multimds/verify/validater/valgrind.yaml b/suites/multimds/verify/validater/valgrind.yaml deleted file mode 100644 index 973f460ad47..00000000000 --- a/suites/multimds/verify/validater/valgrind.yaml +++ /dev/null @@ -1,15 +0,0 @@ -overrides: - install: - ceph: - flavor: notcmalloc - ceph: - conf: - global: - osd heartbeat grace: 40 - valgrind: - mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] - osd: [--tool=memcheck] - mds: [--tool=memcheck] - ceph-fuse: - client.0: - valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] diff --git a/suites/powercycle/osd/% b/suites/powercycle/osd/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/powercycle/osd/clusters/3osd-1per-target.yaml b/suites/powercycle/osd/clusters/3osd-1per-target.yaml deleted file mode 100644 index d5503a40c86..00000000000 --- a/suites/powercycle/osd/clusters/3osd-1per-target.yaml +++ /dev/null @@ -1,5 +0,0 @@ -roles: -- [mon.0, mon.1, mon.2, mds.0, client.0] -- [osd.0] -- [osd.1] -- [osd.2] diff --git a/suites/powercycle/osd/fs/btrfs.yaml b/suites/powercycle/osd/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/powercycle/osd/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/powercycle/osd/fs/ext4.yaml b/suites/powercycle/osd/fs/ext4.yaml deleted file mode 120000 index 65d71886933..00000000000 --- a/suites/powercycle/osd/fs/ext4.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/ext4.yaml \ No newline at end of file diff --git a/suites/powercycle/osd/fs/xfs.yaml b/suites/powercycle/osd/fs/xfs.yaml deleted file mode 120000 index 4c28d731f6b..00000000000 --- a/suites/powercycle/osd/fs/xfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/xfs.yaml \ No newline at end of file diff --git a/suites/powercycle/osd/powercycle/default.yaml b/suites/powercycle/osd/powercycle/default.yaml deleted file mode 100644 index b632e83e621..00000000000 --- a/suites/powercycle/osd/powercycle/default.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install: -- ceph: -- thrashosds: - chance_down: 1.0 - powercycle: true - timeout: 600 diff --git a/suites/powercycle/osd/tasks/admin_socket_objecter_requests.yaml b/suites/powercycle/osd/tasks/admin_socket_objecter_requests.yaml deleted file mode 100644 index b1ddad8d3b0..00000000000 --- a/suites/powercycle/osd/tasks/admin_socket_objecter_requests.yaml +++ /dev/null @@ -1,13 +0,0 @@ -overrides: - ceph: - conf: - client.0: - admin socket: /var/run/ceph/ceph-$name.asok -tasks: -- radosbench: - clients: [client.0] - time: 60 -- admin_socket: - client.0: - objecter_requests: - test: "http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}" diff --git a/suites/powercycle/osd/tasks/cfuse_workunit_kernel_untar_build.yaml b/suites/powercycle/osd/tasks/cfuse_workunit_kernel_untar_build.yaml deleted file mode 100644 index 87f8f57cc7b..00000000000 --- a/suites/powercycle/osd/tasks/cfuse_workunit_kernel_untar_build.yaml +++ /dev/null @@ -1,12 +0,0 @@ -overrides: - ceph: - conf: - client: - fuse_default_permissions: 0 -tasks: -- ceph-fuse: -- workunit: - timeout: 6h - clients: - all: - - kernel_untar_build.sh diff --git a/suites/powercycle/osd/tasks/cfuse_workunit_misc.yaml b/suites/powercycle/osd/tasks/cfuse_workunit_misc.yaml deleted file mode 100644 index 683d3f592c2..00000000000 --- a/suites/powercycle/osd/tasks/cfuse_workunit_misc.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - timeout: 6h - clients: - all: - - fs/misc diff --git a/suites/powercycle/osd/tasks/cfuse_workunit_suites_ffsb.yaml b/suites/powercycle/osd/tasks/cfuse_workunit_suites_ffsb.yaml deleted file mode 100644 index 9f3fa7b1887..00000000000 --- a/suites/powercycle/osd/tasks/cfuse_workunit_suites_ffsb.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - conf: - osd: - filestore flush min: 0 - mds: - debug ms: 1 - debug mds: 20 -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/ffsb.sh diff --git a/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsstress.yaml b/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsstress.yaml deleted file mode 100644 index 5908d951b2d..00000000000 --- a/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsstress.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsx.yaml b/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsx.yaml deleted file mode 100644 index 94031518ea5..00000000000 --- a/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsx.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - timeout: 6h - clients: - all: - - suites/fsx.sh diff --git a/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml b/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml deleted file mode 100644 index c6043e209bd..00000000000 --- a/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/fsync-tester.sh diff --git a/suites/powercycle/osd/tasks/cfuse_workunit_suites_pjd.yaml b/suites/powercycle/osd/tasks/cfuse_workunit_suites_pjd.yaml deleted file mode 100644 index 930bf4a671d..00000000000 --- a/suites/powercycle/osd/tasks/cfuse_workunit_suites_pjd.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/pjd.sh diff --git a/suites/powercycle/osd/tasks/cfuse_workunit_suites_truncate_delay.yaml b/suites/powercycle/osd/tasks/cfuse_workunit_suites_truncate_delay.yaml deleted file mode 100644 index f3efafa2e9d..00000000000 --- a/suites/powercycle/osd/tasks/cfuse_workunit_suites_truncate_delay.yaml +++ /dev/null @@ -1,15 +0,0 @@ -overrides: - ceph: - conf: - client: - ms_inject_delay_probability: 1 - ms_inject_delay_type: osd - ms_inject_delay_max: 5 - client_oc_max_dirty_age: 1 -tasks: -- ceph-fuse: -- exec: - client.0: - - dd if=/dev/zero of=./foo count=100 - - sleep 2 - - truncate --size 0 ./foo diff --git a/suites/powercycle/osd/tasks/rados_api_tests.yaml b/suites/powercycle/osd/tasks/rados_api_tests.yaml deleted file mode 100644 index b4708ebd7c0..00000000000 --- a/suites/powercycle/osd/tasks/rados_api_tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - client.0: - - rados/test.sh diff --git a/suites/powercycle/osd/tasks/radosbench.yaml b/suites/powercycle/osd/tasks/radosbench.yaml deleted file mode 100644 index 68e933028a2..00000000000 --- a/suites/powercycle/osd/tasks/radosbench.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- radosbench: - clients: [client.0] - time: 600 diff --git a/suites/powercycle/osd/tasks/readwrite.yaml b/suites/powercycle/osd/tasks/readwrite.yaml deleted file mode 100644 index c53e52b0872..00000000000 --- a/suites/powercycle/osd/tasks/readwrite.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - op_weights: - read: 45 - write: 45 - delete: 10 diff --git a/suites/powercycle/osd/tasks/snaps-few-objects.yaml b/suites/powercycle/osd/tasks/snaps-few-objects.yaml deleted file mode 100644 index aa82d973ae1..00000000000 --- a/suites/powercycle/osd/tasks/snaps-few-objects.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 diff --git a/suites/powercycle/osd/tasks/snaps-many-objects.yaml b/suites/powercycle/osd/tasks/snaps-many-objects.yaml deleted file mode 100644 index 1ffe4e14888..00000000000 --- a/suites/powercycle/osd/tasks/snaps-many-objects.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 diff --git a/suites/rados/basic/% b/suites/rados/basic/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rados/basic/clusters/fixed-2.yaml b/suites/rados/basic/clusters/fixed-2.yaml deleted file mode 120000 index cd0791a1486..00000000000 --- a/suites/rados/basic/clusters/fixed-2.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-2.yaml \ No newline at end of file diff --git a/suites/rados/basic/fs/xfs.yaml b/suites/rados/basic/fs/xfs.yaml deleted file mode 120000 index 4c28d731f6b..00000000000 --- a/suites/rados/basic/fs/xfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/xfs.yaml \ No newline at end of file diff --git a/suites/rados/basic/msgr-failures/few.yaml b/suites/rados/basic/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/rados/basic/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/rados/basic/msgr-failures/many.yaml b/suites/rados/basic/msgr-failures/many.yaml deleted file mode 100644 index 038c3a79908..00000000000 --- a/suites/rados/basic/msgr-failures/many.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 1500 diff --git a/suites/rados/basic/tasks/rados_api_tests.yaml b/suites/rados/basic/tasks/rados_api_tests.yaml deleted file mode 100644 index acfc597dec3..00000000000 --- a/suites/rados/basic/tasks/rados_api_tests.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - log-whitelist: - - reached quota - - wrongly marked me down -tasks: -- install: -- ceph: -- workunit: - clients: - client.0: - - rados/test.sh - - rados/test_pool_quota.sh - diff --git a/suites/rados/basic/tasks/rados_cls_all.yaml b/suites/rados/basic/tasks/rados_cls_all.yaml deleted file mode 100644 index 34f7cbbb4a0..00000000000 --- a/suites/rados/basic/tasks/rados_cls_all.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install: -- ceph: -- workunit: - clients: - client.0: - - cls diff --git a/suites/rados/basic/tasks/rados_python.yaml b/suites/rados/basic/tasks/rados_python.yaml deleted file mode 100644 index 00320538ff7..00000000000 --- a/suites/rados/basic/tasks/rados_python.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down -- workunit: - clients: - client.0: - - rados/test_python.sh diff --git a/suites/rados/basic/tasks/rados_stress_watch.yaml b/suites/rados/basic/tasks/rados_stress_watch.yaml deleted file mode 100644 index ae2e5fd0083..00000000000 --- a/suites/rados/basic/tasks/rados_stress_watch.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install: -- ceph: -- workunit: - clients: - client.0: - - rados/stress_watch.sh diff --git a/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml b/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml deleted file mode 100644 index 9432367e356..00000000000 --- a/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml +++ /dev/null @@ -1,11 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down -tasks: -- install: -- ceph: -- workunit: - clients: - all: - - rados/load-gen-big.sh diff --git a/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml b/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml deleted file mode 100644 index 7d882cac9c9..00000000000 --- a/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml +++ /dev/null @@ -1,11 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down -tasks: -- install: -- ceph: -- workunit: - clients: - all: - - rados/load-gen-mix.sh diff --git a/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml b/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml deleted file mode 100644 index 69c06b7b049..00000000000 --- a/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml +++ /dev/null @@ -1,11 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down -tasks: -- install: -- ceph: -- workunit: - clients: - all: - - rados/load-gen-mostlyread.sh diff --git a/suites/rados/basic/tasks/repair_test.yaml b/suites/rados/basic/tasks/repair_test.yaml deleted file mode 100644 index 1fd037bed21..00000000000 --- a/suites/rados/basic/tasks/repair_test.yaml +++ /dev/null @@ -1,11 +0,0 @@ -overrides: - ceph: - log-whitelist: ['candidate had a read error', 'deep-scrub 0 missing, 1 inconsistent objects', 'deep-scrub 0 missing, 4 inconsistent objects', 'deep-scrub 1 errors', 'deep-scrub 4 errors', '!= known omap_digest', '!= known data_digest', 'repair 0 missing, 1 inconsistent objects', 'repair 0 missing, 4 inconsistent objects', 'repair 1 errors, 1 fixed', 'repair 4 errors, 4 fixed', 'scrub 0 missing, 1 inconsistent', 'scrub 1 errors', 'size 1 != known size', '!= best guess'] - conf: - osd: - filestore debug inject read err : true -tasks: -- install: -- ceph: -- repair_test: - diff --git a/suites/rados/basic/tasks/scrub_test.yaml b/suites/rados/basic/tasks/scrub_test.yaml deleted file mode 100644 index 2b87c3f0dff..00000000000 --- a/suites/rados/basic/tasks/scrub_test.yaml +++ /dev/null @@ -1,17 +0,0 @@ -overrides: - ceph: - log-whitelist: - - '!= best guess digest' - - '!= best guess data_digest' - - '!= best guess omap_digest' - - '!= known digest' - - '!= known data_digest' - - '!= known omap_digest' - - deep-scrub 0 missing, 1 inconsistent objects - - deep-scrub 1 errors - - repair 0 missing, 1 inconsistent objects - - repair 1 errors, 1 fixed -tasks: -- install: -- ceph: -- scrub_test: diff --git a/suites/rados/monthrash/% b/suites/rados/monthrash/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rados/monthrash/ceph/ceph.yaml b/suites/rados/monthrash/ceph/ceph.yaml deleted file mode 100644 index a2c0efc7779..00000000000 --- a/suites/rados/monthrash/ceph/ceph.yaml +++ /dev/null @@ -1,9 +0,0 @@ -overrides: - ceph: - conf: - mon: - mon min osdmap epochs: 25 - paxos service trim min: 5 -tasks: -- install: -- ceph: diff --git a/suites/rados/monthrash/clusters/3-mons.yaml b/suites/rados/monthrash/clusters/3-mons.yaml deleted file mode 100644 index b36db6592bd..00000000000 --- a/suites/rados/monthrash/clusters/3-mons.yaml +++ /dev/null @@ -1,3 +0,0 @@ -roles: -- [mon.a, mon.c, osd.0, osd.1, osd.2] -- [mon.b, osd.3, osd.4, osd.5, client.0] diff --git a/suites/rados/monthrash/clusters/9-mons.yaml b/suites/rados/monthrash/clusters/9-mons.yaml deleted file mode 100644 index fdb87c6b324..00000000000 --- a/suites/rados/monthrash/clusters/9-mons.yaml +++ /dev/null @@ -1,3 +0,0 @@ -roles: -- [mon.a, mon.b, mon.c, mon.d, mon.e, osd.0, osd.1, osd.2] -- [mon.f, mon.g, mon.h, mon.i, osd.3, osd.4, osd.5, client.0] diff --git a/suites/rados/monthrash/fs/xfs.yaml b/suites/rados/monthrash/fs/xfs.yaml deleted file mode 120000 index 4c28d731f6b..00000000000 --- a/suites/rados/monthrash/fs/xfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/xfs.yaml \ No newline at end of file diff --git a/suites/rados/monthrash/msgr-failures/few.yaml b/suites/rados/monthrash/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/rados/monthrash/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/rados/monthrash/msgr-failures/mon-delay.yaml b/suites/rados/monthrash/msgr-failures/mon-delay.yaml deleted file mode 100644 index 03b7e37f842..00000000000 --- a/suites/rados/monthrash/msgr-failures/mon-delay.yaml +++ /dev/null @@ -1,9 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 2500 - ms inject delay type: mon - ms inject delay probability: .005 - ms inject delay max: 1 - ms inject internal delays: .002 diff --git a/suites/rados/monthrash/thrashers/force-sync-many.yaml b/suites/rados/monthrash/thrashers/force-sync-many.yaml deleted file mode 100644 index 2867f2db5ec..00000000000 --- a/suites/rados/monthrash/thrashers/force-sync-many.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- mon_thrash: - revive_delay: 90 - thrash_delay: 1 - thrash_store: true - thrash_many: true diff --git a/suites/rados/monthrash/thrashers/many.yaml b/suites/rados/monthrash/thrashers/many.yaml deleted file mode 100644 index fe52bb2bbeb..00000000000 --- a/suites/rados/monthrash/thrashers/many.yaml +++ /dev/null @@ -1,13 +0,0 @@ -overrides: - ceph: - conf: - osd: - mon client ping interval: 4 - mon client ping timeout: 12 -tasks: -- mon_thrash: - revive_delay: 20 - thrash_delay: 1 - thrash_many: true - freeze_mon_duration: 20 - freeze_mon_probability: 10 diff --git a/suites/rados/monthrash/thrashers/one.yaml b/suites/rados/monthrash/thrashers/one.yaml deleted file mode 100644 index 2ce44c8601f..00000000000 --- a/suites/rados/monthrash/thrashers/one.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- mon_thrash: - revive_delay: 20 - thrash_delay: 1 diff --git a/suites/rados/monthrash/thrashers/sync-many.yaml b/suites/rados/monthrash/thrashers/sync-many.yaml deleted file mode 100644 index 9868f18159f..00000000000 --- a/suites/rados/monthrash/thrashers/sync-many.yaml +++ /dev/null @@ -1,11 +0,0 @@ -overrides: - ceph: - conf: - mon: - paxos min: 10 - paxos trim min: 10 -tasks: -- mon_thrash: - revive_delay: 90 - thrash_delay: 1 - thrash_many: true diff --git a/suites/rados/monthrash/thrashers/sync.yaml b/suites/rados/monthrash/thrashers/sync.yaml deleted file mode 100644 index 1e7054c271d..00000000000 --- a/suites/rados/monthrash/thrashers/sync.yaml +++ /dev/null @@ -1,10 +0,0 @@ -overrides: - ceph: - conf: - mon: - paxos min: 10 - paxos trim min: 10 -tasks: -- mon_thrash: - revive_delay: 90 - thrash_delay: 1 diff --git a/suites/rados/monthrash/workloads/pool-create-delete.yaml b/suites/rados/monthrash/workloads/pool-create-delete.yaml deleted file mode 100644 index c0f0f2e35b4..00000000000 --- a/suites/rados/monthrash/workloads/pool-create-delete.yaml +++ /dev/null @@ -1,56 +0,0 @@ -overrides: - ceph: - log-whitelist: - - slow request -tasks: -- exec: - client.0: - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel - - ceph_test_rados_delete_pools_parallel diff --git a/suites/rados/monthrash/workloads/rados_5925.yaml b/suites/rados/monthrash/workloads/rados_5925.yaml deleted file mode 100644 index b49937f76df..00000000000 --- a/suites/rados/monthrash/workloads/rados_5925.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- exec: - client.0: - - ceph_test_rados_delete_pools_parallel --debug_objecter 20 --debug_ms 1 --debug_rados 20 --debug_monc 20 diff --git a/suites/rados/monthrash/workloads/rados_api_tests.yaml b/suites/rados/monthrash/workloads/rados_api_tests.yaml deleted file mode 100644 index cd11ae6ca0c..00000000000 --- a/suites/rados/monthrash/workloads/rados_api_tests.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rados/test.sh diff --git a/suites/rados/monthrash/workloads/rados_mon_workunits.yaml b/suites/rados/monthrash/workloads/rados_mon_workunits.yaml deleted file mode 100644 index 31465cffe71..00000000000 --- a/suites/rados/monthrash/workloads/rados_mon_workunits.yaml +++ /dev/null @@ -1,13 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down -tasks: -- workunit: - clients: - client.0: - - mon/pool_ops.sh - - mon/crush_ops.sh - - mon/osd.sh - - mon/caps.sh - diff --git a/suites/rados/monthrash/workloads/snaps-few-objects.yaml b/suites/rados/monthrash/workloads/snaps-few-objects.yaml deleted file mode 100644 index aa82d973ae1..00000000000 --- a/suites/rados/monthrash/workloads/snaps-few-objects.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 diff --git a/suites/rados/multimon/% b/suites/rados/multimon/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rados/multimon/clusters/21.yaml b/suites/rados/multimon/clusters/21.yaml deleted file mode 100644 index 646ff15d45e..00000000000 --- a/suites/rados/multimon/clusters/21.yaml +++ /dev/null @@ -1,8 +0,0 @@ -roles: -- [mon.a, mon.d, mon.g, mon.j, mon.m, mon.p, mon.s, osd.0] -- [mon.b, mon.e, mon.h, mon.k, mon.n, mon.q, mon.t] -- [mon.c, mon.f, mon.i, mon.l, mon.o, mon.r, mon.u, osd.1] -openstack: -- volumes: # attached to each instance - count: 1 - size: 10 # GB diff --git a/suites/rados/multimon/clusters/3.yaml b/suites/rados/multimon/clusters/3.yaml deleted file mode 100644 index e30dc76f381..00000000000 --- a/suites/rados/multimon/clusters/3.yaml +++ /dev/null @@ -1,6 +0,0 @@ -roles: -- [mon.a, mon.b, mon.c, osd.0, osd.1] -openstack: -- volumes: # attached to each instance - count: 2 - size: 10 # GB diff --git a/suites/rados/multimon/clusters/6.yaml b/suites/rados/multimon/clusters/6.yaml deleted file mode 100644 index b16e3267c06..00000000000 --- a/suites/rados/multimon/clusters/6.yaml +++ /dev/null @@ -1,7 +0,0 @@ -roles: -- [mon.a, mon.c, mon.e, osd.0] -- [mon.b, mon.d, mon.f, osd.1] -openstack: -- volumes: # attached to each instance - count: 1 - size: 10 # GB diff --git a/suites/rados/multimon/clusters/9.yaml b/suites/rados/multimon/clusters/9.yaml deleted file mode 100644 index c2c7b494ed8..00000000000 --- a/suites/rados/multimon/clusters/9.yaml +++ /dev/null @@ -1,8 +0,0 @@ -roles: -- [mon.a, mon.d, mon.g, osd.0] -- [mon.b, mon.e, mon.h] -- [mon.c, mon.f, mon.i, osd.1] -openstack: -- volumes: # attached to each instance - count: 1 - size: 10 # GB diff --git a/suites/rados/multimon/msgr-failures/few.yaml b/suites/rados/multimon/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/rados/multimon/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/rados/multimon/msgr-failures/many.yaml b/suites/rados/multimon/msgr-failures/many.yaml deleted file mode 100644 index 86f8dde8a0e..00000000000 --- a/suites/rados/multimon/msgr-failures/many.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 500 diff --git a/suites/rados/multimon/tasks/mon_clock_no_skews.yaml b/suites/rados/multimon/tasks/mon_clock_no_skews.yaml deleted file mode 100644 index e86bdde1d7d..00000000000 --- a/suites/rados/multimon/tasks/mon_clock_no_skews.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: - log-whitelist: - - slow request - - .*clock.*skew.* - - clocks not synchronized -- mon_clock_skew_check: - expect-skew: false diff --git a/suites/rados/multimon/tasks/mon_clock_with_skews.yaml b/suites/rados/multimon/tasks/mon_clock_with_skews.yaml deleted file mode 100644 index 2953e0d6dc2..00000000000 --- a/suites/rados/multimon/tasks/mon_clock_with_skews.yaml +++ /dev/null @@ -1,15 +0,0 @@ -overrides: - ceph: - conf: - mon.b: - clock offset: 10 -tasks: -- install: -- ceph: - wait-for-healthy: false - log-whitelist: - - slow request - - .*clock.*skew.* - - clocks not synchronized -- mon_clock_skew_check: - expect-skew: true diff --git a/suites/rados/multimon/tasks/mon_recovery.yaml b/suites/rados/multimon/tasks/mon_recovery.yaml deleted file mode 100644 index 94721ea53a4..00000000000 --- a/suites/rados/multimon/tasks/mon_recovery.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- install: -- ceph: -- mon_recovery: diff --git a/suites/rados/objectstore/alloc-hint.yaml b/suites/rados/objectstore/alloc-hint.yaml deleted file mode 100644 index 8f8d4841eb0..00000000000 --- a/suites/rados/objectstore/alloc-hint.yaml +++ /dev/null @@ -1,25 +0,0 @@ -roles: -- [mon.a, osd.0, osd.1, osd.2, client.0] - -overrides: - ceph: - fs: xfs - conf: - osd: - filestore xfs extsize: true - -tasks: -- install: -- ceph: -- workunit: - clients: - all: - - rados/test_alloc_hint.sh -openstack: - - machine: - disk: 40 # GB - ram: 8000 # MB - cpus: 1 - volumes: # attached to each instance - count: 3 - size: 10 # GB diff --git a/suites/rados/objectstore/ceph_objectstore_tool.yaml b/suites/rados/objectstore/ceph_objectstore_tool.yaml deleted file mode 100644 index 881d908f06b..00000000000 --- a/suites/rados/objectstore/ceph_objectstore_tool.yaml +++ /dev/null @@ -1,11 +0,0 @@ -roles: -- [mon.0, osd.0, osd.1, osd.2, osd.3, osd.4, osd.5, client.0] -openstack: -- volumes: # attached to each instance - count: 6 - size: 10 # GB -tasks: -- install: -- ceph: -- ceph_objectstore_tool: - objects: 20 diff --git a/suites/rados/objectstore/filejournal.yaml b/suites/rados/objectstore/filejournal.yaml deleted file mode 100644 index 69ffab2092c..00000000000 --- a/suites/rados/objectstore/filejournal.yaml +++ /dev/null @@ -1,12 +0,0 @@ -roles: -- [mon.0, osd.0, osd.1, client.0] -openstack: -- volumes: # attached to each instance - count: 2 - size: 10 # GB -tasks: -- install: -- ceph: -- exec: - client.0: - - ceph_test_filejournal diff --git a/suites/rados/objectstore/filestore-idempotent-aio-journal.yaml b/suites/rados/objectstore/filestore-idempotent-aio-journal.yaml deleted file mode 100644 index c97d7cd8cb7..00000000000 --- a/suites/rados/objectstore/filestore-idempotent-aio-journal.yaml +++ /dev/null @@ -1,13 +0,0 @@ -roles: -- [mon.0, osd.0, osd.1, client.0] -openstack: -- volumes: # attached to each instance - count: 2 - size: 10 # GB -tasks: -- install: -- ceph: - conf: - global: - journal aio: true -- filestore_idempotent: diff --git a/suites/rados/objectstore/filestore-idempotent.yaml b/suites/rados/objectstore/filestore-idempotent.yaml deleted file mode 100644 index 39b2f0fb006..00000000000 --- a/suites/rados/objectstore/filestore-idempotent.yaml +++ /dev/null @@ -1,10 +0,0 @@ -roles: -- [mon.0, osd.0, osd.1, client.0] -openstack: -- volumes: # attached to each instance - count: 2 - size: 10 # GB -tasks: -- install: -- ceph: -- filestore_idempotent: diff --git a/suites/rados/objectstore/objectcacher-stress.yaml b/suites/rados/objectstore/objectcacher-stress.yaml deleted file mode 100644 index 7cb78a76e7e..00000000000 --- a/suites/rados/objectstore/objectcacher-stress.yaml +++ /dev/null @@ -1,13 +0,0 @@ -roles: -- [mon.0, osd.0, osd.1, client.0] -openstack: -- volumes: # attached to each instance - count: 2 - size: 10 # GB -tasks: -- install: -- ceph: -- workunit: - clients: - all: - - osdc/stress_objectcacher.sh diff --git a/suites/rados/objectstore/objectstore.yaml b/suites/rados/objectstore/objectstore.yaml deleted file mode 100644 index 23d650b9d2e..00000000000 --- a/suites/rados/objectstore/objectstore.yaml +++ /dev/null @@ -1,12 +0,0 @@ -roles: -- [mon.0, osd.0, osd.1, client.0] -openstack: -- volumes: # attached to each instance - count: 2 - size: 10 # GB -tasks: -- install: -- exec: - client.0: - - mkdir $TESTDIR/ostest && cd $TESTDIR/ostest && ceph_test_objectstore - - rm -rf $TESTDIR/ostest diff --git a/suites/rados/singleton-nomsgr/% b/suites/rados/singleton-nomsgr/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rados/singleton-nomsgr/all/11429.yaml b/suites/rados/singleton-nomsgr/all/11429.yaml deleted file mode 100644 index 7076e048dc2..00000000000 --- a/suites/rados/singleton-nomsgr/all/11429.yaml +++ /dev/null @@ -1,106 +0,0 @@ -overrides: - ceph: - conf: - mon: - debug mon: 20 - debug ms: 1 - debug paxos: 20 - mon warn on legacy crush tunables: false - mon min osdmap epochs: 3 - osd: - osd map cache size: 2 - osd map max advance: 1 - debug filestore: 20 - debug journal: 20 - debug ms: 1 - debug osd: 20 - log-whitelist: - - osd_map_cache_size - - slow request - - scrub mismatch - - ScrubResult - - failed to encode -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 - - mon.b - - mon.c - - osd.2 - - client.0 -tasks: -- install: - branch: v0.80.8 -- print: '**** done installing firefly' -- ceph: - fs: xfs -- print: '**** done ceph' -- full_sequential: - - ceph_manager.create_pool: - args: ['toremove'] - kwargs: - pg_num: 4096 - - sleep: - duration: 30 - - ceph_manager.wait_for_clean: null - - radosbench: - clients: [client.0] - time: 120 - size: 1 - pool: toremove - create_pool: false - - ceph_manager.remove_pool: - args: ['toremove'] - - sleep: - duration: 10 - - ceph.restart: - daemons: - - osd.0 - - osd.1 - - osd.2 - - sleep: - duration: 30 - - ceph_manager.wait_for_clean: null - - radosbench: - clients: [client.0] - time: 60 - size: 1 - - ceph_manager.create_pool: - args: ['newpool'] - - loop: - count: 100 - body: - - ceph_manager.set_pool_property: - args: ['newpool', 'min_size', 2] - - ceph_manager.set_pool_property: - args: ['newpool', 'min_size', 1] - - sleep: - duration: 30 - - ceph_manager.wait_for_clean: null - - loop: - count: 100 - body: - - ceph_manager.set_pool_property: - args: ['newpool', 'min_size', 2] - - ceph_manager.set_pool_property: - args: ['newpool', 'min_size', 1] - - sleep: - duration: 30 - - ceph_manager.wait_for_clean: null - - sleep: - duration: 30 - - install.upgrade: - mon.a: null - - ceph.restart: - daemons: - - osd.0 - - osd.1 - - osd.2 - - sleep: - duration: 30 - - radosbench: - clients: [client.0] - time: 30 - size: 1 - - ceph_manager.wait_for_clean: null diff --git a/suites/rados/singleton-nomsgr/all/13234.yaml b/suites/rados/singleton-nomsgr/all/13234.yaml deleted file mode 100644 index 3b3602fb5ec..00000000000 --- a/suites/rados/singleton-nomsgr/all/13234.yaml +++ /dev/null @@ -1,130 +0,0 @@ -overrides: - ceph: - conf: - mon: - debug mon: 20 - debug ms: 1 - debug paxos: 20 - mon warn on legacy crush tunables: false - mon min osdmap epochs: 3 - osd: - osd map cache size: 2 - osd map max advance: 1 - debug filestore: 20 - debug journal: 20 - debug ms: 1 - debug osd: 20 - log-whitelist: - - osd_map_cache_size - - slow request - - scrub mismatch - - ScrubResult - - failed to encode -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 - - mon.b - - mon.c - - osd.2 - - client.0 -tasks: -- install: - tag: v0.67.10 -- print: '**** done installing dumpling' -- ceph: - fs: xfs -- print: '**** done ceph' -- full_sequential: - - ceph_manager.create_pool: - args: - - newpool - kwargs: - pg_num: 32 - - sleep: - duration: 30 - - ceph_manager.wait_for_clean: null - - ceph_manager.kill_osd: - kwargs: - osd: 0 - - ceph_manager.kill_osd: - kwargs: - osd: 1 - - ceph_manager.kill_osd: - kwargs: - osd: 2 - - print: '**** done killing osds' - - loop: - body: - - ceph_manager.set_pool_property: - args: - - newpool - - min_size - - 2 - - ceph_manager.set_pool_property: - args: - - newpool - - min_size - - 1 - count: 10 - - install.upgrade: - mon.a: - branch: firefly - - print: '**** done upgrading to firefly' - - ceph.restart: - - mon.a - - mon.b - - mon.c - - print: '**** done upgrading restarting mons' - - loop: - body: - - ceph_manager.set_pool_property: - args: - - newpool - - min_size - - 2 - - ceph_manager.set_pool_property: - args: - - newpool - - min_size - - 1 - count: 10 - - sleep: - duration: 10 - - install.upgrade: - mon.a: null - - print: '**** done upgrading to branch' - - ceph.restart: - - mon.a - - mon.b - - mon.c - - loop: - body: - - ceph_manager.set_pool_property: - args: - - newpool - - min_size - - 2 - - ceph_manager.set_pool_property: - args: - - newpool - - min_size - - 1 - count: 10 - - sleep: - duration: 10 - - print: '**** about to start osds' - - ceph_manager.revive_osd: - kwargs: - osd: 0 - - ceph_manager.revive_osd: - kwargs: - osd: 1 - - ceph_manager.revive_osd: - kwargs: - osd: 2 - - sleep: - duration: 30 - - ceph_manager.wait_for_clean: null - - print: '**** done!' diff --git a/suites/rados/singleton-nomsgr/all/ceph-post-file.yaml b/suites/rados/singleton-nomsgr/all/ceph-post-file.yaml deleted file mode 100644 index d0a4db067cf..00000000000 --- a/suites/rados/singleton-nomsgr/all/ceph-post-file.yaml +++ /dev/null @@ -1,8 +0,0 @@ -roles: -- [mon.a, osd.0, osd.1, osd.2, client.0] -tasks: -- install: -- workunit: - clients: - all: - - post-file.sh diff --git a/suites/rados/singleton-nomsgr/all/export-after-evict.yaml b/suites/rados/singleton-nomsgr/all/export-after-evict.yaml deleted file mode 100644 index f8e4b405f19..00000000000 --- a/suites/rados/singleton-nomsgr/all/export-after-evict.yaml +++ /dev/null @@ -1,25 +0,0 @@ -roles: -- - mon.a - - osd.0 - - osd.1 - - osd.2 - - client.0 -tasks: -- install: -- ceph: -- exec: - client.0: - - ceph osd pool create base-pool 4 - - ceph osd pool create cache-pool 4 - - ceph osd tier add base-pool cache-pool - - ceph osd tier cache-mode cache-pool writeback - - ceph osd tier set-overlay base-pool cache-pool - - dd if=/dev/urandom of=$TESTDIR/foo bs=1M count=1 - - rbd import --image-format 2 $TESTDIR/foo base-pool/bar - - rbd snap create base-pool/bar@snap - - rados -p base-pool cache-flush-evict-all - - rbd export base-pool/bar $TESTDIR/bar - - rbd export base-pool/bar@snap $TESTDIR/snap - - cmp $TESTDIR/foo $TESTDIR/bar - - cmp $TESTDIR/foo $TESTDIR/snap - - rm $TESTDIR/foo $TESTDIR/bar $TESTDIR/snap diff --git a/suites/rados/singleton-nomsgr/all/msgr.yaml b/suites/rados/singleton-nomsgr/all/msgr.yaml deleted file mode 100644 index 86c717f6686..00000000000 --- a/suites/rados/singleton-nomsgr/all/msgr.yaml +++ /dev/null @@ -1,16 +0,0 @@ -roles: -- [mon.0, osd.0, osd.1, client.0] -tasks: -- install: -- exec: - client.0: - - ceph_test_async_driver - - ceph_test_msgr -openstack: - - machine: - disk: 40 # GB - ram: 15000 # MB - cpus: 1 - volumes: # attached to each instance - count: 0 - size: 1 # GB diff --git a/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml b/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml deleted file mode 100644 index a9ec78a763a..00000000000 --- a/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml +++ /dev/null @@ -1,30 +0,0 @@ -roles: -- - mon.a - - osd.0 - - osd.1 - - osd.2 - - client.0 -- - osd.3 - - osd.4 - - osd.5 -tasks: -- install: -- ceph: - conf: - osd: - osd debug reject backfill probability: .3 - osd min pg log entries: 25 - osd max pg log entries: 100 -- exec: - client.0: - - ceph osd pool create foo 64 - - rados -p foo bench 60 write -b 1024 --no-cleanup - - ceph osd pool set foo size 3 - - ceph osd out 0 1 -- sleep: - duration: 60 -- exec: - client.0: - - ceph osd in 0 1 -- sleep: - duration: 60 diff --git a/suites/rados/singleton/% b/suites/rados/singleton/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rados/singleton/all/admin-socket.yaml b/suites/rados/singleton/all/admin-socket.yaml deleted file mode 100644 index 0dbf9b219b2..00000000000 --- a/suites/rados/singleton/all/admin-socket.yaml +++ /dev/null @@ -1,17 +0,0 @@ -roles: -- - mon.a - - osd.0 - - osd.1 - - client.a -tasks: -- install: -- ceph: -- admin_socket: - osd.0: - version: - git_version: - help: - config show: - config set filestore_dump_file /tmp/foo: - perf dump: - perf schema: diff --git a/suites/rados/singleton/all/cephtool.yaml b/suites/rados/singleton/all/cephtool.yaml deleted file mode 100644 index 114073baa4e..00000000000 --- a/suites/rados/singleton/all/cephtool.yaml +++ /dev/null @@ -1,21 +0,0 @@ -roles: -- - mon.a - - mon.b - - mon.c - - osd.0 - - osd.1 - - osd.2 - - client.0 -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - had wrong client addr - - had wrong cluster addr - - must scrub before tier agent can activate -- workunit: - clients: - all: - - cephtool - - mon/pool_ops.sh diff --git a/suites/rados/singleton/all/divergent_priors.yaml b/suites/rados/singleton/all/divergent_priors.yaml deleted file mode 100644 index a01dd122a19..00000000000 --- a/suites/rados/singleton/all/divergent_priors.yaml +++ /dev/null @@ -1,17 +0,0 @@ -roles: -- - mon.0 - - osd.0 - - osd.1 - - osd.2 - - client.0 - -overrides: - ceph: - conf: - osd: - debug osd: 5 - -tasks: -- install: -- ceph: -- divergent_priors: diff --git a/suites/rados/singleton/all/divergent_priors2.yaml b/suites/rados/singleton/all/divergent_priors2.yaml deleted file mode 100644 index aecbc0a4011..00000000000 --- a/suites/rados/singleton/all/divergent_priors2.yaml +++ /dev/null @@ -1,17 +0,0 @@ -roles: -- - mon.0 - - osd.0 - - osd.1 - - osd.2 - - client.0 - -overrides: - ceph: - conf: - osd: - debug osd: 5 - -tasks: -- install: -- ceph: -- divergent_priors2: diff --git a/suites/rados/singleton/all/dump-stuck.yaml b/suites/rados/singleton/all/dump-stuck.yaml deleted file mode 100644 index 2752a38fd45..00000000000 --- a/suites/rados/singleton/all/dump-stuck.yaml +++ /dev/null @@ -1,10 +0,0 @@ -roles: -- - mon.a - - osd.0 - - osd.1 -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down -- dump_stuck: diff --git a/suites/rados/singleton/all/ec-lost-unfound.yaml b/suites/rados/singleton/all/ec-lost-unfound.yaml deleted file mode 100644 index 7006d7f3783..00000000000 --- a/suites/rados/singleton/all/ec-lost-unfound.yaml +++ /dev/null @@ -1,14 +0,0 @@ -roles: -- - mon.a - - mon.b - - mon.c - - osd.0 - - osd.1 - - osd.2 - - osd.3 -tasks: -- install: -- ceph: - log-whitelist: - - objects unfound and apparently lost -- ec_lost_unfound: diff --git a/suites/rados/singleton/all/lost-unfound-delete.yaml b/suites/rados/singleton/all/lost-unfound-delete.yaml deleted file mode 100644 index e6e09d320d3..00000000000 --- a/suites/rados/singleton/all/lost-unfound-delete.yaml +++ /dev/null @@ -1,13 +0,0 @@ -roles: -- - mon.a - - mon.b - - mon.c - - osd.0 - - osd.1 - - osd.2 -tasks: -- install: -- ceph: - log-whitelist: - - objects unfound and apparently lost -- rep_lost_unfound_delete: diff --git a/suites/rados/singleton/all/lost-unfound.yaml b/suites/rados/singleton/all/lost-unfound.yaml deleted file mode 100644 index 0597e43713c..00000000000 --- a/suites/rados/singleton/all/lost-unfound.yaml +++ /dev/null @@ -1,13 +0,0 @@ -roles: -- - mon.a - - mon.b - - mon.c - - osd.0 - - osd.1 - - osd.2 -tasks: -- install: -- ceph: - log-whitelist: - - objects unfound and apparently lost -- lost_unfound: diff --git a/suites/rados/singleton/all/mon-config-keys.yaml b/suites/rados/singleton/all/mon-config-keys.yaml deleted file mode 100644 index f81070fc770..00000000000 --- a/suites/rados/singleton/all/mon-config-keys.yaml +++ /dev/null @@ -1,15 +0,0 @@ -roles: -- - mon.0 - - mon.1 - - mon.2 - - osd.0 - - osd.1 - - osd.2 - - client.0 -tasks: -- install: -- ceph: -- workunit: - clients: - all: - - mon/test_mon_config_key.py diff --git a/suites/rados/singleton/all/mon-thrasher.yaml b/suites/rados/singleton/all/mon-thrasher.yaml deleted file mode 100644 index e69198c85e5..00000000000 --- a/suites/rados/singleton/all/mon-thrasher.yaml +++ /dev/null @@ -1,21 +0,0 @@ -roles: -- - mon.a - - mon.b - - mon.c - - osd.0 - - osd.1 - - client.0 -tasks: -- install: -- ceph: -- mon_thrash: - revive_delay: 20 - thrash_delay: 1 -- workunit: - clients: - all: - - mon/workloadgen.sh - env: - LOADGEN_NUM_OSDS: "5" - VERBOSE: "1" - DURATION: "600" diff --git a/suites/rados/singleton/all/osd-backfill.yaml b/suites/rados/singleton/all/osd-backfill.yaml deleted file mode 100644 index 781a63eaf62..00000000000 --- a/suites/rados/singleton/all/osd-backfill.yaml +++ /dev/null @@ -1,16 +0,0 @@ -roles: -- - mon.a - - mon.b - - mon.c - - osd.0 - - osd.1 - - osd.2 -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - conf: - osd: - osd min pg log entries: 5 -- osd_backfill: diff --git a/suites/rados/singleton/all/osd-recovery-incomplete.yaml b/suites/rados/singleton/all/osd-recovery-incomplete.yaml deleted file mode 100644 index 123f4d44fbe..00000000000 --- a/suites/rados/singleton/all/osd-recovery-incomplete.yaml +++ /dev/null @@ -1,17 +0,0 @@ -roles: -- - mon.a - - mon.b - - mon.c - - osd.0 - - osd.1 - - osd.2 - - osd.3 -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - conf: - osd: - osd min pg log entries: 5 -- osd_recovery.test_incomplete_pgs: diff --git a/suites/rados/singleton/all/osd-recovery.yaml b/suites/rados/singleton/all/osd-recovery.yaml deleted file mode 100644 index a6e1d99f6fe..00000000000 --- a/suites/rados/singleton/all/osd-recovery.yaml +++ /dev/null @@ -1,16 +0,0 @@ -roles: -- - mon.a - - mon.b - - mon.c - - osd.0 - - osd.1 - - osd.2 -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - conf: - osd: - osd min pg log entries: 5 -- osd_recovery: diff --git a/suites/rados/singleton/all/peer.yaml b/suites/rados/singleton/all/peer.yaml deleted file mode 100644 index 655ea685584..00000000000 --- a/suites/rados/singleton/all/peer.yaml +++ /dev/null @@ -1,16 +0,0 @@ -roles: -- - mon.0 - - mon.1 - - mon.2 - - osd.0 - - osd.1 - - osd.2 -tasks: -- install: -- ceph: - config: - global: - osd pool default min size : 1 - log-whitelist: - - objects unfound and apparently lost -- peer: diff --git a/suites/rados/singleton/all/pg-removal-interruption.yaml b/suites/rados/singleton/all/pg-removal-interruption.yaml deleted file mode 100644 index 95cfef76681..00000000000 --- a/suites/rados/singleton/all/pg-removal-interruption.yaml +++ /dev/null @@ -1,27 +0,0 @@ -roles: -- - mon.a - - osd.0 - - osd.1 - - osd.2 - - client.0 -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - slow request -- exec: - client.0: - - ceph osd pool create foo 128 128 - - sleep 5 - - ceph tell osd.0 injectargs -- --osd-inject-failure-on-pg-removal - - ceph osd pool delete foo foo --yes-i-really-really-mean-it -- ceph.wait_for_failure: [osd.0] -- exec: - client.0: - - sudo ceph osd down 0 -- ceph.restart: [osd.0] -- exec: - client.0: - - ceph tell osd.0 flush_pg_stats -- ceph.healthy: diff --git a/suites/rados/singleton/all/radostool.yaml b/suites/rados/singleton/all/radostool.yaml deleted file mode 100644 index 05ab4a3f7c2..00000000000 --- a/suites/rados/singleton/all/radostool.yaml +++ /dev/null @@ -1,16 +0,0 @@ -roles: -- - mon.a - - osd.0 - - osd.1 - - client.0 -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - had wrong client addr - - had wrong cluster addr -- workunit: - clients: - all: - - rados/test_rados_tool.sh diff --git a/suites/rados/singleton/all/reg11184.yaml b/suites/rados/singleton/all/reg11184.yaml deleted file mode 100644 index 54361a4e3d5..00000000000 --- a/suites/rados/singleton/all/reg11184.yaml +++ /dev/null @@ -1,17 +0,0 @@ -roles: -- - mon.0 - - osd.0 - - osd.1 - - osd.2 - - client.0 - -overrides: - ceph: - conf: - osd: - debug osd: 5 - -tasks: -- install: -- ceph: -- reg11184: diff --git a/suites/rados/singleton/all/rest-api.yaml b/suites/rados/singleton/all/rest-api.yaml deleted file mode 100644 index 133840a5dc1..00000000000 --- a/suites/rados/singleton/all/rest-api.yaml +++ /dev/null @@ -1,25 +0,0 @@ -roles: -- - mon.0 - - mon.1 - - mon.2 - - osd.0 - - osd.1 - - osd.2 - - mds.a - - client.0 -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - had wrong client addr - conf: - client.rest0: - debug ms: 1 - debug objecter: 20 - debug rados: 20 -- rest-api: [client.0] -- workunit: - clients: - all: - - rest/test.py diff --git a/suites/rados/singleton/all/thrash-rados.yaml b/suites/rados/singleton/all/thrash-rados.yaml deleted file mode 100644 index 82c47bf3fe7..00000000000 --- a/suites/rados/singleton/all/thrash-rados.yaml +++ /dev/null @@ -1,22 +0,0 @@ -roles: -- - mon.a - - osd.0 - - osd.1 - - osd.2 -- - osd.3 - - osd.4 - - osd.5 - - client.0 -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down -- thrashosds: - op_delay: 30 - clean_interval: 120 - chance_down: .5 -- workunit: - clients: - all: - - rados/load-gen-mix-small.sh diff --git a/suites/rados/singleton/all/watch-notify-same-primary.yaml b/suites/rados/singleton/all/watch-notify-same-primary.yaml deleted file mode 100644 index 3d2d683f63c..00000000000 --- a/suites/rados/singleton/all/watch-notify-same-primary.yaml +++ /dev/null @@ -1,22 +0,0 @@ -roles: -- - mon.0 - - mon.1 - - mon.2 - - osd.0 - - osd.1 - - osd.2 - - client.0 -tasks: -- install: -- ceph: - config: - global: - osd pool default min size : 1 - client: - debug ms: 1 - debug objecter: 20 - debug rados: 20 - log-whitelist: - - objects unfound and apparently lost -- watch_notify_same_primary: - clients: [client.0] diff --git a/suites/rados/singleton/fs/xfs.yaml b/suites/rados/singleton/fs/xfs.yaml deleted file mode 120000 index 4c28d731f6b..00000000000 --- a/suites/rados/singleton/fs/xfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/xfs.yaml \ No newline at end of file diff --git a/suites/rados/singleton/msgr-failures/few.yaml b/suites/rados/singleton/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/rados/singleton/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/rados/singleton/msgr-failures/many.yaml b/suites/rados/singleton/msgr-failures/many.yaml deleted file mode 100644 index 86f8dde8a0e..00000000000 --- a/suites/rados/singleton/msgr-failures/many.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 500 diff --git a/suites/rados/thrash-erasure-code-isa/% b/suites/rados/thrash-erasure-code-isa/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rados/thrash-erasure-code-isa/arch/x86_64.yaml b/suites/rados/thrash-erasure-code-isa/arch/x86_64.yaml deleted file mode 100644 index c2409f5d0dc..00000000000 --- a/suites/rados/thrash-erasure-code-isa/arch/x86_64.yaml +++ /dev/null @@ -1 +0,0 @@ -arch: x86_64 diff --git a/suites/rados/thrash-erasure-code-isa/clusters b/suites/rados/thrash-erasure-code-isa/clusters deleted file mode 120000 index 7aac47be3e6..00000000000 --- a/suites/rados/thrash-erasure-code-isa/clusters +++ /dev/null @@ -1 +0,0 @@ -../thrash/clusters \ No newline at end of file diff --git a/suites/rados/thrash-erasure-code-isa/fs b/suites/rados/thrash-erasure-code-isa/fs deleted file mode 120000 index c11782e0462..00000000000 --- a/suites/rados/thrash-erasure-code-isa/fs +++ /dev/null @@ -1 +0,0 @@ -../thrash/fs \ No newline at end of file diff --git a/suites/rados/thrash-erasure-code-isa/msgr-failures b/suites/rados/thrash-erasure-code-isa/msgr-failures deleted file mode 120000 index 03689aa44a3..00000000000 --- a/suites/rados/thrash-erasure-code-isa/msgr-failures +++ /dev/null @@ -1 +0,0 @@ -../thrash/msgr-failures \ No newline at end of file diff --git a/suites/rados/thrash-erasure-code-isa/supported b/suites/rados/thrash-erasure-code-isa/supported deleted file mode 120000 index c5d59352cb5..00000000000 --- a/suites/rados/thrash-erasure-code-isa/supported +++ /dev/null @@ -1 +0,0 @@ -../../../distros/supported \ No newline at end of file diff --git a/suites/rados/thrash-erasure-code-isa/thrashers b/suites/rados/thrash-erasure-code-isa/thrashers deleted file mode 120000 index f461dadc3f2..00000000000 --- a/suites/rados/thrash-erasure-code-isa/thrashers +++ /dev/null @@ -1 +0,0 @@ -../thrash/thrashers \ No newline at end of file diff --git a/suites/rados/thrash-erasure-code-isa/workloads/ec-rados-plugin=isa-k=2-m=1.yaml b/suites/rados/thrash-erasure-code-isa/workloads/ec-rados-plugin=isa-k=2-m=1.yaml deleted file mode 120000 index 9d32cd811c1..00000000000 --- a/suites/rados/thrash-erasure-code-isa/workloads/ec-rados-plugin=isa-k=2-m=1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml \ No newline at end of file diff --git a/suites/rados/thrash-erasure-code/% b/suites/rados/thrash-erasure-code/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rados/thrash-erasure-code/clusters b/suites/rados/thrash-erasure-code/clusters deleted file mode 120000 index 7aac47be3e6..00000000000 --- a/suites/rados/thrash-erasure-code/clusters +++ /dev/null @@ -1 +0,0 @@ -../thrash/clusters \ No newline at end of file diff --git a/suites/rados/thrash-erasure-code/fs b/suites/rados/thrash-erasure-code/fs deleted file mode 120000 index c11782e0462..00000000000 --- a/suites/rados/thrash-erasure-code/fs +++ /dev/null @@ -1 +0,0 @@ -../thrash/fs \ No newline at end of file diff --git a/suites/rados/thrash-erasure-code/msgr-failures b/suites/rados/thrash-erasure-code/msgr-failures deleted file mode 120000 index 03689aa44a3..00000000000 --- a/suites/rados/thrash-erasure-code/msgr-failures +++ /dev/null @@ -1 +0,0 @@ -../thrash/msgr-failures \ No newline at end of file diff --git a/suites/rados/thrash-erasure-code/thrashers/default.yaml b/suites/rados/thrash-erasure-code/thrashers/default.yaml deleted file mode 100644 index fade054b1b7..00000000000 --- a/suites/rados/thrash-erasure-code/thrashers/default.yaml +++ /dev/null @@ -1,17 +0,0 @@ -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - conf: - osd: - osd debug reject backfill probability: .3 - osd max backfills: 1 - osd scrub min interval: 60 - osd scrub max interval: 120 -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 - min_in: 4 diff --git a/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml b/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml deleted file mode 100644 index c37147fda22..00000000000 --- a/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml +++ /dev/null @@ -1,22 +0,0 @@ -overrides: - ceph: - conf: - mon: - mon min osdmap epochs: 2 - osd: - osd map cache size: 1 - osd scrub min interval: 60 - osd scrub max interval: 120 -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - - osd_map_cache_size -- thrashosds: - timeout: 1800 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 - chance_test_map_discontinuity: 0.5 - min_in: 4 diff --git a/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml b/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml deleted file mode 100644 index 9ba1b9e5867..00000000000 --- a/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml +++ /dev/null @@ -1,16 +0,0 @@ -tasks: -- install: -- ceph: - conf: - osd: - osd max backfills: 1 - osd scrub min interval: 60 - osd scrub max interval: 120 - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 3 - chance_pgpnum_fix: 1 - min_in: 4 diff --git a/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml b/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml deleted file mode 100644 index 744761d8cce..00000000000 --- a/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml +++ /dev/null @@ -1,15 +0,0 @@ -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - conf: - osd: - osd scrub min interval: 60 - osd scrub max interval: 120 -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 2 - chance_pgpnum_fix: 1 - min_in: 4 diff --git a/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=2-m=1.yaml b/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=2-m=1.yaml deleted file mode 120000 index f11eddb7f56..00000000000 --- a/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=2-m=1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml \ No newline at end of file diff --git a/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=3-m=1.yaml b/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=3-m=1.yaml deleted file mode 120000 index b1407aef7e1..00000000000 --- a/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=3-m=1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml \ No newline at end of file diff --git a/suites/rados/thrash-erasure-code/workloads/ec-radosbench.yaml b/suites/rados/thrash-erasure-code/workloads/ec-radosbench.yaml deleted file mode 100644 index 7aaf0e1c30b..00000000000 --- a/suites/rados/thrash-erasure-code/workloads/ec-radosbench.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- radosbench: - clients: [client.0] - time: 300 - unique_pool: true - ec_pool: true diff --git a/suites/rados/thrash-erasure-code/workloads/ec-small-objects.yaml b/suites/rados/thrash-erasure-code/workloads/ec-small-objects.yaml deleted file mode 100644 index a8ac39716e5..00000000000 --- a/suites/rados/thrash-erasure-code/workloads/ec-small-objects.yaml +++ /dev/null @@ -1,20 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 400000 - max_seconds: 600 - max_in_flight: 64 - objects: 1024 - size: 16384 - ec_pool: true - op_weights: - read: 100 - write: 0 - append: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 - setattr: 25 - rmattr: 25 diff --git a/suites/rados/thrash/% b/suites/rados/thrash/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rados/thrash/0-size-min-size-overrides/2-size-1-min-size.yaml b/suites/rados/thrash/0-size-min-size-overrides/2-size-1-min-size.yaml deleted file mode 120000 index 4c817a6fecf..00000000000 --- a/suites/rados/thrash/0-size-min-size-overrides/2-size-1-min-size.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../overrides/2-size-1-min-size.yaml \ No newline at end of file diff --git a/suites/rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml b/suites/rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml deleted file mode 120000 index c429b07b999..00000000000 --- a/suites/rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../overrides/2-size-2-min-size.yaml \ No newline at end of file diff --git a/suites/rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml b/suites/rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml deleted file mode 120000 index 8d529f0a9ad..00000000000 --- a/suites/rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../overrides/3-size-2-min-size.yaml \ No newline at end of file diff --git a/suites/rados/thrash/1-pg-log-overrides/normal_pg_log.yaml b/suites/rados/thrash/1-pg-log-overrides/normal_pg_log.yaml deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rados/thrash/1-pg-log-overrides/short_pg_log.yaml b/suites/rados/thrash/1-pg-log-overrides/short_pg_log.yaml deleted file mode 120000 index 62010f4f7fb..00000000000 --- a/suites/rados/thrash/1-pg-log-overrides/short_pg_log.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../overrides/short_pg_log.yaml \ No newline at end of file diff --git a/suites/rados/thrash/clusters/+ b/suites/rados/thrash/clusters/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rados/thrash/clusters/fixed-2.yaml b/suites/rados/thrash/clusters/fixed-2.yaml deleted file mode 120000 index cd0791a1486..00000000000 --- a/suites/rados/thrash/clusters/fixed-2.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-2.yaml \ No newline at end of file diff --git a/suites/rados/thrash/clusters/openstack.yaml b/suites/rados/thrash/clusters/openstack.yaml deleted file mode 100644 index 39e43d021ac..00000000000 --- a/suites/rados/thrash/clusters/openstack.yaml +++ /dev/null @@ -1,8 +0,0 @@ -openstack: - - machine: - disk: 40 # GB - ram: 8000 # MB - cpus: 1 - volumes: # attached to each instance - count: 3 - size: 30 # GB diff --git a/suites/rados/thrash/fs/ext4.yaml b/suites/rados/thrash/fs/ext4.yaml deleted file mode 120000 index 65d71886933..00000000000 --- a/suites/rados/thrash/fs/ext4.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/ext4.yaml \ No newline at end of file diff --git a/suites/rados/thrash/fs/xfs.yaml b/suites/rados/thrash/fs/xfs.yaml deleted file mode 120000 index 4c28d731f6b..00000000000 --- a/suites/rados/thrash/fs/xfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/xfs.yaml \ No newline at end of file diff --git a/suites/rados/thrash/msgr-failures/fastclose.yaml b/suites/rados/thrash/msgr-failures/fastclose.yaml deleted file mode 100644 index 77fd730aff7..00000000000 --- a/suites/rados/thrash/msgr-failures/fastclose.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 2500 - ms tcp read timeout: 5 diff --git a/suites/rados/thrash/msgr-failures/few.yaml b/suites/rados/thrash/msgr-failures/few.yaml deleted file mode 100644 index 477bffe619b..00000000000 --- a/suites/rados/thrash/msgr-failures/few.yaml +++ /dev/null @@ -1,7 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 - osd: - osd heartbeat use min delay socket: true diff --git a/suites/rados/thrash/msgr-failures/osd-delay.yaml b/suites/rados/thrash/msgr-failures/osd-delay.yaml deleted file mode 100644 index a33ba89e14f..00000000000 --- a/suites/rados/thrash/msgr-failures/osd-delay.yaml +++ /dev/null @@ -1,9 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 2500 - ms inject delay type: osd - ms inject delay probability: .005 - ms inject delay max: 1 - ms inject internal delays: .002 diff --git a/suites/rados/thrash/thrashers/default.yaml b/suites/rados/thrash/thrashers/default.yaml deleted file mode 100644 index fabfc4f8c40..00000000000 --- a/suites/rados/thrash/thrashers/default.yaml +++ /dev/null @@ -1,16 +0,0 @@ -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - conf: - osd: - osd debug reject backfill probability: .3 - osd max backfills: 1 - osd scrub min interval: 60 - osd scrub max interval: 120 -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 diff --git a/suites/rados/thrash/thrashers/mapgap.yaml b/suites/rados/thrash/thrashers/mapgap.yaml deleted file mode 100644 index 016563bd8ea..00000000000 --- a/suites/rados/thrash/thrashers/mapgap.yaml +++ /dev/null @@ -1,21 +0,0 @@ -overrides: - ceph: - conf: - mon: - mon min osdmap epochs: 2 - osd: - osd map cache size: 1 - osd scrub min interval: 60 - osd scrub max interval: 120 -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - - osd_map_cache_size -- thrashosds: - timeout: 1800 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 - chance_test_map_discontinuity: 0.5 diff --git a/suites/rados/thrash/thrashers/morepggrow.yaml b/suites/rados/thrash/thrashers/morepggrow.yaml deleted file mode 100644 index 0bb136ddfea..00000000000 --- a/suites/rados/thrash/thrashers/morepggrow.yaml +++ /dev/null @@ -1,15 +0,0 @@ -tasks: -- install: -- ceph: - conf: - osd: - osd max backfills: 1 - osd scrub min interval: 60 - osd scrub max interval: 120 - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 3 - chance_pgpnum_fix: 1 diff --git a/suites/rados/thrash/thrashers/pggrow.yaml b/suites/rados/thrash/thrashers/pggrow.yaml deleted file mode 100644 index 4a94f50d829..00000000000 --- a/suites/rados/thrash/thrashers/pggrow.yaml +++ /dev/null @@ -1,14 +0,0 @@ -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - conf: - osd: - osd scrub min interval: 60 - osd scrub max interval: 120 -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 2 - chance_pgpnum_fix: 1 diff --git a/suites/rados/thrash/workloads/admin_socket_objecter_requests.yaml b/suites/rados/thrash/workloads/admin_socket_objecter_requests.yaml deleted file mode 100644 index b1ddad8d3b0..00000000000 --- a/suites/rados/thrash/workloads/admin_socket_objecter_requests.yaml +++ /dev/null @@ -1,13 +0,0 @@ -overrides: - ceph: - conf: - client.0: - admin socket: /var/run/ceph/ceph-$name.asok -tasks: -- radosbench: - clients: [client.0] - time: 60 -- admin_socket: - client.0: - objecter_requests: - test: "http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}" diff --git a/suites/rados/thrash/workloads/cache-agent-big.yaml b/suites/rados/thrash/workloads/cache-agent-big.yaml deleted file mode 100644 index bafc08df69f..00000000000 --- a/suites/rados/thrash/workloads/cache-agent-big.yaml +++ /dev/null @@ -1,31 +0,0 @@ -overrides: - ceph: - log-whitelist: - - must scrub before tier agent can activate -tasks: -- exec: - client.0: - - ceph osd erasure-code-profile set teuthologyprofile ruleset-failure-domain=osd - m=1 k=2 - - ceph osd pool create base 4 4 erasure teuthologyprofile - - ceph osd pool create cache 4 - - ceph osd tier add base cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay base cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 60 - - ceph osd pool set cache target_max_objects 5000 - - ceph osd pool set cache min_read_recency_for_promote 2 -- rados: - clients: [client.0] - pools: [base] - ops: 10000 - objects: 6600 - max_seconds: 1200 - size: 1024 - op_weights: - read: 100 - write: 100 - delete: 50 - copy_from: 50 diff --git a/suites/rados/thrash/workloads/cache-agent-small.yaml b/suites/rados/thrash/workloads/cache-agent-small.yaml deleted file mode 100644 index 9cbc435815e..00000000000 --- a/suites/rados/thrash/workloads/cache-agent-small.yaml +++ /dev/null @@ -1,28 +0,0 @@ -overrides: - ceph: - crush_tunables: firefly - log-whitelist: - - must scrub before tier agent can activate -tasks: -- exec: - client.0: - - ceph osd pool create base 4 - - ceph osd pool create cache 4 - - ceph osd tier add base cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay base cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 60 - - ceph osd pool set cache target_max_objects 250 - - ceph osd pool set cache min_read_recency_for_promote 0 -- rados: - clients: [client.0] - pools: [base] - ops: 4000 - objects: 500 - op_weights: - read: 100 - write: 100 - delete: 50 - copy_from: 50 diff --git a/suites/rados/thrash/workloads/cache-pool-snaps.yaml b/suites/rados/thrash/workloads/cache-pool-snaps.yaml deleted file mode 100644 index 17dfe33dd57..00000000000 --- a/suites/rados/thrash/workloads/cache-pool-snaps.yaml +++ /dev/null @@ -1,34 +0,0 @@ -overrides: - ceph: - log-whitelist: - - must scrub before tier agent can activate -tasks: -- exec: - client.0: - - ceph osd pool create base 4 - - ceph osd pool create cache 4 - - ceph osd tier add base cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay base cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 3600 - - ceph osd pool set cache target_max_objects 250 - - ceph osd pool set cache min_read_recency_for_promote 2 -- rados: - clients: [client.0] - pools: [base] - ops: 4000 - objects: 500 - pool_snaps: true - op_weights: - read: 100 - write: 100 - delete: 50 - copy_from: 50 - flush: 50 - try_flush: 50 - evict: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/rados/thrash/workloads/cache-snaps.yaml b/suites/rados/thrash/workloads/cache-snaps.yaml deleted file mode 100644 index 21d963d7f60..00000000000 --- a/suites/rados/thrash/workloads/cache-snaps.yaml +++ /dev/null @@ -1,33 +0,0 @@ -overrides: - ceph: - log-whitelist: - - must scrub before tier agent can activate -tasks: -- exec: - client.0: - - ceph osd pool create base 4 - - ceph osd pool create cache 4 - - ceph osd tier add base cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay base cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 3600 - - ceph osd pool set cache target_max_objects 250 - - ceph osd pool set cache min_read_recency_for_promote 0 -- rados: - clients: [client.0] - pools: [base] - ops: 4000 - objects: 500 - op_weights: - read: 100 - write: 100 - delete: 50 - copy_from: 50 - flush: 50 - try_flush: 50 - evict: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/rados/thrash/workloads/cache.yaml b/suites/rados/thrash/workloads/cache.yaml deleted file mode 100644 index 0f15cad6763..00000000000 --- a/suites/rados/thrash/workloads/cache.yaml +++ /dev/null @@ -1,28 +0,0 @@ -overrides: - ceph: - log-whitelist: - - must scrub before tier agent can activate -tasks: -- exec: - client.0: - - ceph osd pool create base 4 - - ceph osd pool create cache 4 - - ceph osd tier add base cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay base cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 3600 -- rados: - clients: [client.0] - pools: [base] - ops: 4000 - objects: 500 - op_weights: - read: 100 - write: 100 - delete: 50 - copy_from: 50 - flush: 50 - try_flush: 50 - evict: 50 diff --git a/suites/rados/thrash/workloads/pool-snaps-few-objects.yaml b/suites/rados/thrash/workloads/pool-snaps-few-objects.yaml deleted file mode 100644 index b5f6dca6c50..00000000000 --- a/suites/rados/thrash/workloads/pool-snaps-few-objects.yaml +++ /dev/null @@ -1,14 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - pool_snaps: true - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 diff --git a/suites/rados/thrash/workloads/rados_api_tests.yaml b/suites/rados/thrash/workloads/rados_api_tests.yaml deleted file mode 100644 index 265649e2b0c..00000000000 --- a/suites/rados/thrash/workloads/rados_api_tests.yaml +++ /dev/null @@ -1,13 +0,0 @@ -overrides: - ceph: - crush_tunables: hammer - conf: - client: - debug ms: 1 - debug objecter: 20 - rebug rados: 20 -tasks: -- workunit: - clients: - client.0: - - rados/test.sh diff --git a/suites/rados/thrash/workloads/radosbench.yaml b/suites/rados/thrash/workloads/radosbench.yaml deleted file mode 100644 index 03a8ecbdb5b..00000000000 --- a/suites/rados/thrash/workloads/radosbench.yaml +++ /dev/null @@ -1,11 +0,0 @@ -overrides: - ceph: - conf: - client.0: - debug ms: 1 - debug objecter: 20 - debug rados: 20 -tasks: -- radosbench: - clients: [client.0] - time: 300 diff --git a/suites/rados/thrash/workloads/readwrite.yaml b/suites/rados/thrash/workloads/readwrite.yaml deleted file mode 100644 index 84290905d14..00000000000 --- a/suites/rados/thrash/workloads/readwrite.yaml +++ /dev/null @@ -1,12 +0,0 @@ -overrides: - ceph: - crush_tunables: optimal -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - op_weights: - read: 45 - write: 45 - delete: 10 diff --git a/suites/rados/thrash/workloads/small-objects.yaml b/suites/rados/thrash/workloads/small-objects.yaml deleted file mode 100644 index d8545b9bece..00000000000 --- a/suites/rados/thrash/workloads/small-objects.yaml +++ /dev/null @@ -1,21 +0,0 @@ -overrides: - ceph: - crush_tunables: legacy -tasks: -- rados: - clients: [client.0] - ops: 400000 - max_seconds: 600 - max_in_flight: 64 - objects: 1024 - size: 16384 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 - setattr: 25 - rmattr: 25 diff --git a/suites/rados/thrash/workloads/snaps-few-objects.yaml b/suites/rados/thrash/workloads/snaps-few-objects.yaml deleted file mode 100644 index aa82d973ae1..00000000000 --- a/suites/rados/thrash/workloads/snaps-few-objects.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 diff --git a/suites/rados/thrash/workloads/write_fadvise_dontneed.yaml b/suites/rados/thrash/workloads/write_fadvise_dontneed.yaml deleted file mode 100644 index 606dcae6922..00000000000 --- a/suites/rados/thrash/workloads/write_fadvise_dontneed.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - write_fadvise_dontneed: true - op_weights: - write: 100 diff --git a/suites/rados/verify/% b/suites/rados/verify/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rados/verify/1thrash/default.yaml b/suites/rados/verify/1thrash/default.yaml deleted file mode 100644 index 9435b146af6..00000000000 --- a/suites/rados/verify/1thrash/default.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 diff --git a/suites/rados/verify/1thrash/none.yaml b/suites/rados/verify/1thrash/none.yaml deleted file mode 100644 index 2030acb9083..00000000000 --- a/suites/rados/verify/1thrash/none.yaml +++ /dev/null @@ -1,3 +0,0 @@ -tasks: -- install: -- ceph: diff --git a/suites/rados/verify/clusters/fixed-2.yaml b/suites/rados/verify/clusters/fixed-2.yaml deleted file mode 120000 index cd0791a1486..00000000000 --- a/suites/rados/verify/clusters/fixed-2.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-2.yaml \ No newline at end of file diff --git a/suites/rados/verify/fs/xfs.yaml b/suites/rados/verify/fs/xfs.yaml deleted file mode 120000 index 4c28d731f6b..00000000000 --- a/suites/rados/verify/fs/xfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/xfs.yaml \ No newline at end of file diff --git a/suites/rados/verify/msgr-failures/few.yaml b/suites/rados/verify/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/rados/verify/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/rados/verify/tasks/mon_recovery.yaml b/suites/rados/verify/tasks/mon_recovery.yaml deleted file mode 100644 index 6986303409e..00000000000 --- a/suites/rados/verify/tasks/mon_recovery.yaml +++ /dev/null @@ -1,2 +0,0 @@ -tasks: -- mon_recovery: diff --git a/suites/rados/verify/tasks/rados_api_tests.yaml b/suites/rados/verify/tasks/rados_api_tests.yaml deleted file mode 100644 index 0031704784e..00000000000 --- a/suites/rados/verify/tasks/rados_api_tests.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - conf: - client: - debug ms: 1 - debug objecter: 20 - debug rados: 20 - debug monc: 20 -tasks: -- workunit: - timeout: 6h - clients: - client.0: - - rados/test.sh diff --git a/suites/rados/verify/tasks/rados_cls_all.yaml b/suites/rados/verify/tasks/rados_cls_all.yaml deleted file mode 100644 index 853da39ad99..00000000000 --- a/suites/rados/verify/tasks/rados_cls_all.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - cls diff --git a/suites/rados/verify/validater/lockdep.yaml b/suites/rados/verify/validater/lockdep.yaml deleted file mode 100644 index 25f84355c0b..00000000000 --- a/suites/rados/verify/validater/lockdep.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - lockdep: true diff --git a/suites/rados/verify/validater/valgrind.yaml b/suites/rados/verify/validater/valgrind.yaml deleted file mode 100644 index 0b28c183065..00000000000 --- a/suites/rados/verify/validater/valgrind.yaml +++ /dev/null @@ -1,12 +0,0 @@ -overrides: - install: - ceph: - flavor: notcmalloc - ceph: - conf: - global: - osd heartbeat grace: 40 - valgrind: - mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] - osd: [--tool=memcheck] - mds: [--tool=memcheck] diff --git a/suites/rbd/basic/% b/suites/rbd/basic/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rbd/basic/base/install.yaml b/suites/rbd/basic/base/install.yaml deleted file mode 100644 index 2030acb9083..00000000000 --- a/suites/rbd/basic/base/install.yaml +++ /dev/null @@ -1,3 +0,0 @@ -tasks: -- install: -- ceph: diff --git a/suites/rbd/basic/cachepool/none.yaml b/suites/rbd/basic/cachepool/none.yaml deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rbd/basic/cachepool/small.yaml b/suites/rbd/basic/cachepool/small.yaml deleted file mode 100644 index f8ed11040fa..00000000000 --- a/suites/rbd/basic/cachepool/small.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- exec: - client.0: - - ceph osd pool create cache 4 - - ceph osd tier add rbd cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay rbd cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 60 - - ceph osd pool set cache target_max_objects 250 diff --git a/suites/rbd/basic/clusters/fixed-1.yaml b/suites/rbd/basic/clusters/fixed-1.yaml deleted file mode 120000 index 435ea3c7546..00000000000 --- a/suites/rbd/basic/clusters/fixed-1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-1.yaml \ No newline at end of file diff --git a/suites/rbd/basic/fs/btrfs.yaml b/suites/rbd/basic/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/rbd/basic/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/rbd/basic/msgr-failures/few.yaml b/suites/rbd/basic/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/rbd/basic/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/rbd/basic/msgr-failures/many.yaml b/suites/rbd/basic/msgr-failures/many.yaml deleted file mode 100644 index 86f8dde8a0e..00000000000 --- a/suites/rbd/basic/msgr-failures/many.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 500 diff --git a/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml b/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml deleted file mode 100644 index a98768540ba..00000000000 --- a/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/test_librbd.sh diff --git a/suites/rbd/basic/tasks/rbd_cli_tests.yaml b/suites/rbd/basic/tasks/rbd_cli_tests.yaml deleted file mode 100644 index a37db057b5d..00000000000 --- a/suites/rbd/basic/tasks/rbd_cli_tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/run_cli_tests.sh - diff --git a/suites/rbd/basic/tasks/rbd_cls_tests.yaml b/suites/rbd/basic/tasks/rbd_cls_tests.yaml deleted file mode 100644 index 9ccd57c4a82..00000000000 --- a/suites/rbd/basic/tasks/rbd_cls_tests.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - cls/test_cls_rbd.sh diff --git a/suites/rbd/basic/tasks/rbd_lock_and_fence.yaml b/suites/rbd/basic/tasks/rbd_lock_and_fence.yaml deleted file mode 100644 index d2c80ad6585..00000000000 --- a/suites/rbd/basic/tasks/rbd_lock_and_fence.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/test_lock_fence.sh diff --git a/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml b/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml deleted file mode 100644 index 263b784e27d..00000000000 --- a/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/test_librbd_python.sh diff --git a/suites/rbd/cli/% b/suites/rbd/cli/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rbd/cli/base/install.yaml b/suites/rbd/cli/base/install.yaml deleted file mode 100644 index 2030acb9083..00000000000 --- a/suites/rbd/cli/base/install.yaml +++ /dev/null @@ -1,3 +0,0 @@ -tasks: -- install: -- ceph: diff --git a/suites/rbd/cli/cachepool/none.yaml b/suites/rbd/cli/cachepool/none.yaml deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rbd/cli/cachepool/small.yaml b/suites/rbd/cli/cachepool/small.yaml deleted file mode 100644 index f8ed11040fa..00000000000 --- a/suites/rbd/cli/cachepool/small.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- exec: - client.0: - - ceph osd pool create cache 4 - - ceph osd tier add rbd cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay rbd cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 60 - - ceph osd pool set cache target_max_objects 250 diff --git a/suites/rbd/cli/clusters/fixed-1.yaml b/suites/rbd/cli/clusters/fixed-1.yaml deleted file mode 120000 index 435ea3c7546..00000000000 --- a/suites/rbd/cli/clusters/fixed-1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-1.yaml \ No newline at end of file diff --git a/suites/rbd/cli/features/layering.yaml b/suites/rbd/cli/features/layering.yaml deleted file mode 100644 index 233dd53667c..00000000000 --- a/suites/rbd/cli/features/layering.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - client: - rbd default format: 2 diff --git a/suites/rbd/cli/features/none.yaml b/suites/rbd/cli/features/none.yaml deleted file mode 100644 index 8b137891791..00000000000 --- a/suites/rbd/cli/features/none.yaml +++ /dev/null @@ -1 +0,0 @@ - diff --git a/suites/rbd/cli/features/object_map.yaml b/suites/rbd/cli/features/object_map.yaml deleted file mode 100644 index 4e31f21cc25..00000000000 --- a/suites/rbd/cli/features/object_map.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - ceph: - conf: - client: - rbd default format: 2 - rbd default features: 13 diff --git a/suites/rbd/cli/fs b/suites/rbd/cli/fs deleted file mode 120000 index 3658920363d..00000000000 --- a/suites/rbd/cli/fs +++ /dev/null @@ -1 +0,0 @@ -../basic/fs \ No newline at end of file diff --git a/suites/rbd/cli/msgr-failures/few.yaml b/suites/rbd/cli/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/rbd/cli/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/rbd/cli/msgr-failures/many.yaml b/suites/rbd/cli/msgr-failures/many.yaml deleted file mode 100644 index 86f8dde8a0e..00000000000 --- a/suites/rbd/cli/msgr-failures/many.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 500 diff --git a/suites/rbd/cli/workloads/rbd_cli_copy.yaml b/suites/rbd/cli/workloads/rbd_cli_copy.yaml deleted file mode 100644 index 2f99f8990de..00000000000 --- a/suites/rbd/cli/workloads/rbd_cli_copy.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/copy.sh diff --git a/suites/rbd/cli/workloads/rbd_cli_import_export.yaml b/suites/rbd/cli/workloads/rbd_cli_import_export.yaml deleted file mode 100644 index b08f2612f7a..00000000000 --- a/suites/rbd/cli/workloads/rbd_cli_import_export.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/import_export.sh diff --git a/suites/rbd/librbd/% b/suites/rbd/librbd/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rbd/librbd/cache/none.yaml b/suites/rbd/librbd/cache/none.yaml deleted file mode 100644 index 42fd9c95562..00000000000 --- a/suites/rbd/librbd/cache/none.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install: -- ceph: - conf: - client: - rbd cache: false diff --git a/suites/rbd/librbd/cache/writeback.yaml b/suites/rbd/librbd/cache/writeback.yaml deleted file mode 100644 index 86fe06afa05..00000000000 --- a/suites/rbd/librbd/cache/writeback.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install: -- ceph: - conf: - client: - rbd cache: true diff --git a/suites/rbd/librbd/cache/writethrough.yaml b/suites/rbd/librbd/cache/writethrough.yaml deleted file mode 100644 index 6dc29e16c02..00000000000 --- a/suites/rbd/librbd/cache/writethrough.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install: -- ceph: - conf: - client: - rbd cache: true - rbd cache max dirty: 0 diff --git a/suites/rbd/librbd/cachepool/none.yaml b/suites/rbd/librbd/cachepool/none.yaml deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rbd/librbd/cachepool/small.yaml b/suites/rbd/librbd/cachepool/small.yaml deleted file mode 100644 index f8ed11040fa..00000000000 --- a/suites/rbd/librbd/cachepool/small.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- exec: - client.0: - - ceph osd pool create cache 4 - - ceph osd tier add rbd cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay rbd cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 60 - - ceph osd pool set cache target_max_objects 250 diff --git a/suites/rbd/librbd/clusters/fixed-3.yaml b/suites/rbd/librbd/clusters/fixed-3.yaml deleted file mode 120000 index a3ac9fc4dec..00000000000 --- a/suites/rbd/librbd/clusters/fixed-3.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3.yaml \ No newline at end of file diff --git a/suites/rbd/librbd/copy-on-read/off.yaml b/suites/rbd/librbd/copy-on-read/off.yaml deleted file mode 100644 index 638d14aa12a..00000000000 --- a/suites/rbd/librbd/copy-on-read/off.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - client: - rbd clone copy on read: false diff --git a/suites/rbd/librbd/copy-on-read/on.yaml b/suites/rbd/librbd/copy-on-read/on.yaml deleted file mode 100644 index ce99e7ec0a9..00000000000 --- a/suites/rbd/librbd/copy-on-read/on.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - client: - rbd clone copy on read: true diff --git a/suites/rbd/librbd/fs b/suites/rbd/librbd/fs deleted file mode 120000 index 3658920363d..00000000000 --- a/suites/rbd/librbd/fs +++ /dev/null @@ -1 +0,0 @@ -../basic/fs \ No newline at end of file diff --git a/suites/rbd/librbd/msgr-failures/few.yaml b/suites/rbd/librbd/msgr-failures/few.yaml deleted file mode 100644 index a8bc68355ea..00000000000 --- a/suites/rbd/librbd/msgr-failures/few.yaml +++ /dev/null @@ -1,7 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 - log-whitelist: - - wrongly marked me down diff --git a/suites/rbd/librbd/workloads/c_api_tests.yaml b/suites/rbd/librbd/workloads/c_api_tests.yaml deleted file mode 100644 index 188ddc56c60..00000000000 --- a/suites/rbd/librbd/workloads/c_api_tests.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/test_librbd.sh - env: - RBD_FEATURES: "1" diff --git a/suites/rbd/librbd/workloads/c_api_tests_with_object_map.yaml b/suites/rbd/librbd/workloads/c_api_tests_with_object_map.yaml deleted file mode 100644 index fef368b9bc4..00000000000 --- a/suites/rbd/librbd/workloads/c_api_tests_with_object_map.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/test_librbd.sh - env: - RBD_FEATURES: "13" diff --git a/suites/rbd/librbd/workloads/fsx.yaml b/suites/rbd/librbd/workloads/fsx.yaml deleted file mode 100644 index ef512d8a9b4..00000000000 --- a/suites/rbd/librbd/workloads/fsx.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- rbd_fsx: - clients: [client.0] - ops: 5000 diff --git a/suites/rbd/librbd/workloads/python_api_tests.yaml b/suites/rbd/librbd/workloads/python_api_tests.yaml deleted file mode 100644 index a7b3ce7d3e6..00000000000 --- a/suites/rbd/librbd/workloads/python_api_tests.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/test_librbd_python.sh - env: - RBD_FEATURES: "1" diff --git a/suites/rbd/librbd/workloads/python_api_tests_with_object_map.yaml b/suites/rbd/librbd/workloads/python_api_tests_with_object_map.yaml deleted file mode 100644 index ede74cd8d59..00000000000 --- a/suites/rbd/librbd/workloads/python_api_tests_with_object_map.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/test_librbd_python.sh - env: - RBD_FEATURES: "13" diff --git a/suites/rbd/qemu/% b/suites/rbd/qemu/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rbd/qemu/cache/none.yaml b/suites/rbd/qemu/cache/none.yaml deleted file mode 100644 index 42fd9c95562..00000000000 --- a/suites/rbd/qemu/cache/none.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install: -- ceph: - conf: - client: - rbd cache: false diff --git a/suites/rbd/qemu/cache/writeback.yaml b/suites/rbd/qemu/cache/writeback.yaml deleted file mode 100644 index 86fe06afa05..00000000000 --- a/suites/rbd/qemu/cache/writeback.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install: -- ceph: - conf: - client: - rbd cache: true diff --git a/suites/rbd/qemu/cache/writethrough.yaml b/suites/rbd/qemu/cache/writethrough.yaml deleted file mode 100644 index 6dc29e16c02..00000000000 --- a/suites/rbd/qemu/cache/writethrough.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install: -- ceph: - conf: - client: - rbd cache: true - rbd cache max dirty: 0 diff --git a/suites/rbd/qemu/cachepool/none.yaml b/suites/rbd/qemu/cachepool/none.yaml deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rbd/qemu/cachepool/small.yaml b/suites/rbd/qemu/cachepool/small.yaml deleted file mode 100644 index f8ed11040fa..00000000000 --- a/suites/rbd/qemu/cachepool/small.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- exec: - client.0: - - ceph osd pool create cache 4 - - ceph osd tier add rbd cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay rbd cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 60 - - ceph osd pool set cache target_max_objects 250 diff --git a/suites/rbd/qemu/clusters/+ b/suites/rbd/qemu/clusters/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rbd/qemu/clusters/fixed-3.yaml b/suites/rbd/qemu/clusters/fixed-3.yaml deleted file mode 120000 index a3ac9fc4dec..00000000000 --- a/suites/rbd/qemu/clusters/fixed-3.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3.yaml \ No newline at end of file diff --git a/suites/rbd/qemu/clusters/openstack.yaml b/suites/rbd/qemu/clusters/openstack.yaml deleted file mode 100644 index f87995808a1..00000000000 --- a/suites/rbd/qemu/clusters/openstack.yaml +++ /dev/null @@ -1,8 +0,0 @@ -openstack: - - machine: - disk: 40 # GB - ram: 30000 # MB - cpus: 1 - volumes: # attached to each instance - count: 3 - size: 30 # GB diff --git a/suites/rbd/qemu/fs b/suites/rbd/qemu/fs deleted file mode 120000 index 3658920363d..00000000000 --- a/suites/rbd/qemu/fs +++ /dev/null @@ -1 +0,0 @@ -../basic/fs \ No newline at end of file diff --git a/suites/rbd/qemu/msgr-failures/few.yaml b/suites/rbd/qemu/msgr-failures/few.yaml deleted file mode 100644 index a8bc68355ea..00000000000 --- a/suites/rbd/qemu/msgr-failures/few.yaml +++ /dev/null @@ -1,7 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 - log-whitelist: - - wrongly marked me down diff --git a/suites/rbd/qemu/workloads/qemu_bonnie.yaml b/suites/rbd/qemu/workloads/qemu_bonnie.yaml deleted file mode 100644 index 3e523d61b77..00000000000 --- a/suites/rbd/qemu/workloads/qemu_bonnie.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- qemu: - all: - clone: true - test: http://git.ceph.com/?p=ceph.git;a=blob_plain;f=qa/workunits/suites/bonnie.sh -exclude_arch: armv7l diff --git a/suites/rbd/qemu/workloads/qemu_fsstress.yaml b/suites/rbd/qemu/workloads/qemu_fsstress.yaml deleted file mode 100644 index 040dc5567b7..00000000000 --- a/suites/rbd/qemu/workloads/qemu_fsstress.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- qemu: - all: - clone: true - test: http://git.ceph.com/?p=ceph.git;a=blob_plain;f=qa/workunits/suites/fsstress.sh -exclude_arch: armv7l diff --git a/suites/rbd/qemu/workloads/qemu_iozone.yaml.disabled b/suites/rbd/qemu/workloads/qemu_iozone.yaml.disabled deleted file mode 100644 index 3dae6e78a2f..00000000000 --- a/suites/rbd/qemu/workloads/qemu_iozone.yaml.disabled +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- qemu: - all: - test: http://git.ceph.com/?p=ceph.git;a=blob_plain;f=qa/workunits/suites/iozone.sh - image_size: 20480 -exclude_arch: armv7l diff --git a/suites/rbd/qemu/workloads/qemu_xfstests.yaml b/suites/rbd/qemu/workloads/qemu_xfstests.yaml deleted file mode 100644 index b6112d1bb27..00000000000 --- a/suites/rbd/qemu/workloads/qemu_xfstests.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- qemu: - all: - clone: true - type: block - num_rbd: 2 - test: http://git.ceph.com/?p=ceph.git;a=blob_plain;f=qa/run_xfstests_qemu.sh -exclude_arch: armv7l diff --git a/suites/rbd/singleton/% b/suites/rbd/singleton/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rbd/singleton/all/formatted-output.yaml b/suites/rbd/singleton/all/formatted-output.yaml deleted file mode 100644 index 8b118b18928..00000000000 --- a/suites/rbd/singleton/all/formatted-output.yaml +++ /dev/null @@ -1,9 +0,0 @@ -roles: -- [mon.a, osd.0, osd.1, client.0] -tasks: -- install: -- ceph: -- cram: - clients: - client.0: - - http://git.ceph.com/?p=ceph.git;a=blob_plain;hb=hammer;f=src/test/cli-integration/rbd/formatted-output.t diff --git a/suites/rbd/singleton/all/merge_diff.yaml b/suites/rbd/singleton/all/merge_diff.yaml deleted file mode 100644 index fc58d827b0b..00000000000 --- a/suites/rbd/singleton/all/merge_diff.yaml +++ /dev/null @@ -1,8 +0,0 @@ -roles: -- [mon.a, osd.0, osd.1, client.0] -tasks: -- install: -- ceph: -- workunit: - clients: - all: [rbd/merge_diff.sh] diff --git a/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml b/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml deleted file mode 100644 index 2771d4e8db1..00000000000 --- a/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml +++ /dev/null @@ -1,12 +0,0 @@ -exclude_arch: armv7l -roles: -- [mon.a, osd.0, osd.1, client.0] -tasks: -- install: -- ceph: - conf: - client: - rbd cache: false -- workunit: - clients: - all: [rbd/qemu-iotests.sh] diff --git a/suites/rbd/singleton/all/qemu-iotests-writeback.yaml b/suites/rbd/singleton/all/qemu-iotests-writeback.yaml deleted file mode 100644 index f6768df5a22..00000000000 --- a/suites/rbd/singleton/all/qemu-iotests-writeback.yaml +++ /dev/null @@ -1,12 +0,0 @@ -exclude_arch: armv7l -roles: -- [mon.a, osd.0, osd.1, client.0] -tasks: -- install: -- ceph: - conf: - client: - rbd cache: true -- workunit: - clients: - all: [rbd/qemu-iotests.sh] diff --git a/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml b/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml deleted file mode 100644 index 287509e4953..00000000000 --- a/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml +++ /dev/null @@ -1,13 +0,0 @@ -exclude_arch: armv7l -roles: -- [mon.a, osd.0, osd.1, client.0] -tasks: -- install: -- ceph: - conf: - client: - rbd cache: true - rbd cache max dirty: 0 -- workunit: - clients: - all: [rbd/qemu-iotests.sh] diff --git a/suites/rbd/singleton/all/rbd-vs-unmanaged-snaps.yaml b/suites/rbd/singleton/all/rbd-vs-unmanaged-snaps.yaml deleted file mode 100644 index f00153f8464..00000000000 --- a/suites/rbd/singleton/all/rbd-vs-unmanaged-snaps.yaml +++ /dev/null @@ -1,13 +0,0 @@ -roles: -- [mon.a, osd.0, osd.1, client.0] -tasks: -- install: -- ceph: - conf: - client: - rbd validate pool: false -- workunit: - clients: - all: - - mon/rbd_snaps_ops.sh - diff --git a/suites/rbd/singleton/all/read-flags-no-cache.yaml b/suites/rbd/singleton/all/read-flags-no-cache.yaml deleted file mode 100644 index f7d44456d3b..00000000000 --- a/suites/rbd/singleton/all/read-flags-no-cache.yaml +++ /dev/null @@ -1,11 +0,0 @@ -roles: -- [mon.a, osd.0, osd.1, client.0] -tasks: -- install: -- ceph: - conf: - client: - rbd cache: false -- workunit: - clients: - all: [rbd/read-flags.sh] diff --git a/suites/rbd/singleton/all/read-flags-writeback.yaml b/suites/rbd/singleton/all/read-flags-writeback.yaml deleted file mode 100644 index f25be79e0b6..00000000000 --- a/suites/rbd/singleton/all/read-flags-writeback.yaml +++ /dev/null @@ -1,11 +0,0 @@ -roles: -- [mon.a, osd.0, osd.1, client.0] -tasks: -- install: -- ceph: - conf: - client: - rbd cache: true -- workunit: - clients: - all: [rbd/read-flags.sh] diff --git a/suites/rbd/singleton/all/read-flags-writethrough.yaml b/suites/rbd/singleton/all/read-flags-writethrough.yaml deleted file mode 100644 index 80d7b4254b6..00000000000 --- a/suites/rbd/singleton/all/read-flags-writethrough.yaml +++ /dev/null @@ -1,12 +0,0 @@ -roles: -- [mon.a, osd.0, osd.1, client.0] -tasks: -- install: -- ceph: - conf: - client: - rbd cache: true - rbd cache max dirty: 0 -- workunit: - clients: - all: [rbd/read-flags.sh] diff --git a/suites/rbd/thrash/% b/suites/rbd/thrash/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rbd/thrash/base/install.yaml b/suites/rbd/thrash/base/install.yaml deleted file mode 100644 index 2030acb9083..00000000000 --- a/suites/rbd/thrash/base/install.yaml +++ /dev/null @@ -1,3 +0,0 @@ -tasks: -- install: -- ceph: diff --git a/suites/rbd/thrash/clusters/+ b/suites/rbd/thrash/clusters/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rbd/thrash/clusters/fixed-2.yaml b/suites/rbd/thrash/clusters/fixed-2.yaml deleted file mode 120000 index cd0791a1486..00000000000 --- a/suites/rbd/thrash/clusters/fixed-2.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-2.yaml \ No newline at end of file diff --git a/suites/rbd/thrash/clusters/openstack.yaml b/suites/rbd/thrash/clusters/openstack.yaml deleted file mode 100644 index 39e43d021ac..00000000000 --- a/suites/rbd/thrash/clusters/openstack.yaml +++ /dev/null @@ -1,8 +0,0 @@ -openstack: - - machine: - disk: 40 # GB - ram: 8000 # MB - cpus: 1 - volumes: # attached to each instance - count: 3 - size: 30 # GB diff --git a/suites/rbd/thrash/fs/btrfs.yaml b/suites/rbd/thrash/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/rbd/thrash/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/rbd/thrash/fs/xfs.yaml b/suites/rbd/thrash/fs/xfs.yaml deleted file mode 120000 index 4c28d731f6b..00000000000 --- a/suites/rbd/thrash/fs/xfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/xfs.yaml \ No newline at end of file diff --git a/suites/rbd/thrash/msgr-failures/few.yaml b/suites/rbd/thrash/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/rbd/thrash/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/rbd/thrash/thrashers/cache.yaml b/suites/rbd/thrash/thrashers/cache.yaml deleted file mode 100644 index 5bab78ee840..00000000000 --- a/suites/rbd/thrash/thrashers/cache.yaml +++ /dev/null @@ -1,18 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -tasks: -- exec: - client.0: - - ceph osd pool create cache 4 - - ceph osd tier add rbd cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay rbd cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 60 - - ceph osd pool set cache target_max_objects 250 -- thrashosds: - timeout: 1200 diff --git a/suites/rbd/thrash/thrashers/default.yaml b/suites/rbd/thrash/thrashers/default.yaml deleted file mode 100644 index 89c9bdfb0e5..00000000000 --- a/suites/rbd/thrash/thrashers/default.yaml +++ /dev/null @@ -1,8 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -tasks: -- thrashosds: - timeout: 1200 diff --git a/suites/rbd/thrash/workloads/rbd_api_tests.yaml b/suites/rbd/thrash/workloads/rbd_api_tests.yaml deleted file mode 100644 index fef368b9bc4..00000000000 --- a/suites/rbd/thrash/workloads/rbd_api_tests.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/test_librbd.sh - env: - RBD_FEATURES: "13" diff --git a/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml b/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml deleted file mode 100644 index c013ac5ca13..00000000000 --- a/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml +++ /dev/null @@ -1,12 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/test_librbd.sh - env: - RBD_FEATURES: "13" -overrides: - ceph: - conf: - client: - rbd clone copy on read: true diff --git a/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml b/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml deleted file mode 100644 index 188ddc56c60..00000000000 --- a/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/test_librbd.sh - env: - RBD_FEATURES: "1" diff --git a/suites/rbd/thrash/workloads/rbd_fsx_cache_writeback.yaml b/suites/rbd/thrash/workloads/rbd_fsx_cache_writeback.yaml deleted file mode 100644 index bd812695c83..00000000000 --- a/suites/rbd/thrash/workloads/rbd_fsx_cache_writeback.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- rbd_fsx: - clients: [client.0] - ops: 2000 -overrides: - ceph: - conf: - client: - rbd cache: true diff --git a/suites/rbd/thrash/workloads/rbd_fsx_cache_writethrough.yaml b/suites/rbd/thrash/workloads/rbd_fsx_cache_writethrough.yaml deleted file mode 100644 index 56895298025..00000000000 --- a/suites/rbd/thrash/workloads/rbd_fsx_cache_writethrough.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- rbd_fsx: - clients: [client.0] - ops: 2000 -overrides: - ceph: - conf: - client: - rbd cache: true - rbd cache max dirty: 0 diff --git a/suites/rbd/thrash/workloads/rbd_fsx_copy_on_read.yaml b/suites/rbd/thrash/workloads/rbd_fsx_copy_on_read.yaml deleted file mode 100644 index a4860728141..00000000000 --- a/suites/rbd/thrash/workloads/rbd_fsx_copy_on_read.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- rbd_fsx: - clients: [client.0] - ops: 2000 -overrides: - ceph: - conf: - client: - rbd cache: true - rbd clone copy on read: true diff --git a/suites/rbd/thrash/workloads/rbd_fsx_nocache.yaml b/suites/rbd/thrash/workloads/rbd_fsx_nocache.yaml deleted file mode 100644 index 6c5e0e45707..00000000000 --- a/suites/rbd/thrash/workloads/rbd_fsx_nocache.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- rbd_fsx: - clients: [client.0] - ops: 2000 -overrides: - ceph: - conf: - client: - rbd cache: false diff --git a/suites/rest/basic/tasks/rest_test.yaml b/suites/rest/basic/tasks/rest_test.yaml deleted file mode 100644 index a5ef6a6e597..00000000000 --- a/suites/rest/basic/tasks/rest_test.yaml +++ /dev/null @@ -1,26 +0,0 @@ -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 -- - mon.b - - mon.c - - osd.2 - - osd.3 - - client.0 - -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - conf: - client.rest0: - debug ms: 1 - debug objecter: 20 - debug rados: 20 -- rest-api: [client.0] -- workunit: - clients: - client.0: - - rest/test.py diff --git a/suites/rgw/multifs/% b/suites/rgw/multifs/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rgw/multifs/clusters/fixed-2.yaml b/suites/rgw/multifs/clusters/fixed-2.yaml deleted file mode 120000 index cd0791a1486..00000000000 --- a/suites/rgw/multifs/clusters/fixed-2.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-2.yaml \ No newline at end of file diff --git a/suites/rgw/multifs/frontend/apache.yaml b/suites/rgw/multifs/frontend/apache.yaml deleted file mode 100644 index 53ebf758ed3..00000000000 --- a/suites/rgw/multifs/frontend/apache.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - rgw: - frontend: apache diff --git a/suites/rgw/multifs/frontend/civetweb.yaml b/suites/rgw/multifs/frontend/civetweb.yaml deleted file mode 100644 index 5845a0e6c15..00000000000 --- a/suites/rgw/multifs/frontend/civetweb.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - rgw: - frontend: civetweb diff --git a/suites/rgw/multifs/fs/btrfs.yaml b/suites/rgw/multifs/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/rgw/multifs/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/rgw/multifs/fs/ext4.yaml b/suites/rgw/multifs/fs/ext4.yaml deleted file mode 120000 index 65d71886933..00000000000 --- a/suites/rgw/multifs/fs/ext4.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/ext4.yaml \ No newline at end of file diff --git a/suites/rgw/multifs/fs/xfs.yaml b/suites/rgw/multifs/fs/xfs.yaml deleted file mode 120000 index 4c28d731f6b..00000000000 --- a/suites/rgw/multifs/fs/xfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/xfs.yaml \ No newline at end of file diff --git a/suites/rgw/multifs/overrides.yaml b/suites/rgw/multifs/overrides.yaml deleted file mode 100644 index 9b2063f0699..00000000000 --- a/suites/rgw/multifs/overrides.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - client: - debug rgw: 20 diff --git a/suites/rgw/multifs/rgw_pool_type b/suites/rgw/multifs/rgw_pool_type deleted file mode 120000 index 0506f616ce2..00000000000 --- a/suites/rgw/multifs/rgw_pool_type +++ /dev/null @@ -1 +0,0 @@ -../../../rgw_pool_type \ No newline at end of file diff --git a/suites/rgw/multifs/tasks/rgw_bucket_quota.yaml b/suites/rgw/multifs/tasks/rgw_bucket_quota.yaml deleted file mode 100644 index 767debdf3c8..00000000000 --- a/suites/rgw/multifs/tasks/rgw_bucket_quota.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- rgw: [client.0] -- workunit: - clients: - client.0: - - rgw/s3_bucket_quota.pl diff --git a/suites/rgw/multifs/tasks/rgw_multipart_upload.yaml b/suites/rgw/multifs/tasks/rgw_multipart_upload.yaml deleted file mode 100644 index 1781dee096b..00000000000 --- a/suites/rgw/multifs/tasks/rgw_multipart_upload.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- rgw: [client.0] -- workunit: - clients: - client.0: - - rgw/s3_multipart_upload.pl diff --git a/suites/rgw/multifs/tasks/rgw_readwrite.yaml b/suites/rgw/multifs/tasks/rgw_readwrite.yaml deleted file mode 100644 index c7efaa1c757..00000000000 --- a/suites/rgw/multifs/tasks/rgw_readwrite.yaml +++ /dev/null @@ -1,16 +0,0 @@ -tasks: -- install: -- ceph: -- rgw: [client.0] -- s3readwrite: - client.0: - rgw_server: client.0 - readwrite: - bucket: rwtest - readers: 10 - writers: 3 - duration: 300 - files: - num: 10 - size: 2000 - stddev: 500 diff --git a/suites/rgw/multifs/tasks/rgw_roundtrip.yaml b/suites/rgw/multifs/tasks/rgw_roundtrip.yaml deleted file mode 100644 index 47b3c1894a2..00000000000 --- a/suites/rgw/multifs/tasks/rgw_roundtrip.yaml +++ /dev/null @@ -1,16 +0,0 @@ -tasks: -- install: -- ceph: -- rgw: [client.0] -- s3roundtrip: - client.0: - rgw_server: client.0 - roundtrip: - bucket: rttest - readers: 10 - writers: 3 - duration: 300 - files: - num: 10 - size: 2000 - stddev: 500 diff --git a/suites/rgw/multifs/tasks/rgw_s3tests.yaml b/suites/rgw/multifs/tasks/rgw_s3tests.yaml deleted file mode 100644 index 62608773a2a..00000000000 --- a/suites/rgw/multifs/tasks/rgw_s3tests.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install: -- ceph: -- rgw: [client.0] -- s3tests: - client.0: - rgw_server: client.0 diff --git a/suites/rgw/multifs/tasks/rgw_swift.yaml b/suites/rgw/multifs/tasks/rgw_swift.yaml deleted file mode 100644 index 569741b0e15..00000000000 --- a/suites/rgw/multifs/tasks/rgw_swift.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install: -- ceph: -- rgw: [client.0] -- swift: - client.0: - rgw_server: client.0 diff --git a/suites/rgw/multifs/tasks/rgw_user_quota.yaml b/suites/rgw/multifs/tasks/rgw_user_quota.yaml deleted file mode 100644 index c2c38a816cc..00000000000 --- a/suites/rgw/multifs/tasks/rgw_user_quota.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- rgw: [client.0] -- workunit: - clients: - client.0: - - rgw/s3_user_quota.pl diff --git a/suites/rgw/singleton/% b/suites/rgw/singleton/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rgw/singleton/all/radosgw-admin-data-sync.yaml b/suites/rgw/singleton/all/radosgw-admin-data-sync.yaml deleted file mode 100644 index a619f225d9c..00000000000 --- a/suites/rgw/singleton/all/radosgw-admin-data-sync.yaml +++ /dev/null @@ -1,56 +0,0 @@ -roles: -- [mon.a, osd.0, osd.1, osd.2, osd.3, client.0, client.1] -tasks: -- install: -- ceph: - conf: - client: - debug ms: 1 - rgw gc obj min wait: 15 - rgw data log window: 30 - osd: - debug ms: 1 - debug objclass : 20 - client.0: - rgw region: region0 - rgw zone: r0z0 - rgw region root pool: .rgw.region.0 - rgw zone root pool: .rgw.zone.0 - rgw gc pool: .rgw.gc.0 - rgw user uid pool: .users.uid.0 - rgw user keys pool: .users.0 - rgw log data: True - rgw log meta: True - client.1: - rgw region: region0 - rgw zone: r0z1 - rgw region root pool: .rgw.region.0 - rgw zone root pool: .rgw.zone.1 - rgw gc pool: .rgw.gc.1 - rgw user uid pool: .users.uid.1 - rgw user keys pool: .users.1 - rgw log data: False - rgw log meta: False -- rgw: - regions: - region0: - api name: api1 - is master: True - master zone: r0z0 - zones: [r0z0, r0z1] - client.0: - system user: - name: client0-system-user - access key: 0te6NH5mcdcq0Tc5i8i2 - secret key: Oy4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXv - client.1: - system user: - name: client1-system-user - access key: 1te6NH5mcdcq0Tc5i8i3 - secret key: Py4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXw -- radosgw-agent: - client.0: - max-entries: 10 - src: client.0 - dest: client.1 -- radosgw-admin: diff --git a/suites/rgw/singleton/all/radosgw-admin-multi-region.yaml b/suites/rgw/singleton/all/radosgw-admin-multi-region.yaml deleted file mode 100644 index 5ac26c288b3..00000000000 --- a/suites/rgw/singleton/all/radosgw-admin-multi-region.yaml +++ /dev/null @@ -1,61 +0,0 @@ -roles: -- [mon.a, osd.0, osd.1, osd.2, client.0] -- [mon.b, mon.c, osd.3, osd.4, osd.5, client.1] -tasks: -- install: -- ceph: - conf: - client: - debug ms: 1 - rgw gc obj min wait: 15 - osd: - debug ms: 1 - debug objclass : 20 - client.0: - rgw region: region0 - rgw zone: r0z1 - rgw region root pool: .rgw.region.0 - rgw zone root pool: .rgw.zone.0 - rgw gc pool: .rgw.gc.0 - rgw user uid pool: .users.uid.0 - rgw user keys pool: .users.0 - rgw log data: True - rgw log meta: True - client.1: - rgw region: region1 - rgw zone: r1z1 - rgw region root pool: .rgw.region.1 - rgw zone root pool: .rgw.zone.1 - rgw gc pool: .rgw.gc.1 - rgw user uid pool: .users.uid.1 - rgw user keys pool: .users.1 - rgw log data: False - rgw log meta: False -- rgw: - regions: - region0: - api name: api1 - is master: True - master zone: r0z1 - zones: [r0z1] - region1: - api name: api1 - is master: False - master zone: r1z1 - zones: [r1z1] - client.0: - system user: - name: client0-system-user - access key: 0te6NH5mcdcq0Tc5i8i2 - secret key: Oy4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXv - client.1: - system user: - name: client1-system-user - access key: 1te6NH5mcdcq0Tc5i8i3 - secret key: Py4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXw -- radosgw-agent: - client.0: - src: client.0 - dest: client.1 - metadata-only: true -- radosgw-admin: diff --git a/suites/rgw/singleton/all/radosgw-admin.yaml b/suites/rgw/singleton/all/radosgw-admin.yaml deleted file mode 100644 index fd2a131fdbb..00000000000 --- a/suites/rgw/singleton/all/radosgw-admin.yaml +++ /dev/null @@ -1,15 +0,0 @@ -roles: -- [mon.a, osd.0, client.0, osd.1, osd.2, osd.3] -tasks: -- install: -- ceph: - conf: - client: - debug ms: 1 - rgw gc obj min wait: 15 - osd: - debug ms: 1 - debug objclass : 20 -- rgw: - client.0: -- radosgw-admin: diff --git a/suites/rgw/singleton/all/radosgw-convert-to-region.yaml b/suites/rgw/singleton/all/radosgw-convert-to-region.yaml deleted file mode 100644 index 292a1d2ae01..00000000000 --- a/suites/rgw/singleton/all/radosgw-convert-to-region.yaml +++ /dev/null @@ -1,73 +0,0 @@ -overrides: - s3readwrite: - s3: - user_id: s3readwrite-test-user - display_name: test user for the s3readwrite tests - email: tester@inktank - access_key: 2te6NH5mcdcq0Tc5i8i4 - secret_key: Qy4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXx - readwrite: - deterministic_file_names: True - duration: 30 - bucket: testbucket - files: - num: 10 - size: 2000 - stddev: 500 -roles: -- [mon.a, osd.0, osd.1, osd.2, client.0] -- [mon.b, mon.c, osd.3, osd.4, osd.5, client.1] - -tasks: -- install: -- ceph: - conf: - client: - rgw region: default - rgw zone: r1z1 - rgw region root pool: .rgw - rgw zone root pool: .rgw - rgw domain root: .rgw - rgw gc pool: .rgw.gc - rgw user uid pool: .users.uid - rgw user keys pool: .users -- rgw: - regions: - default: - api name: api1 - is master: true - master zone: r1z1 - zones: [r1z1] - client.0: - system user: - name: nr-system - access key: 0te6NH5mcdcq0Tc5i8i2 - secret key: Oy4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXv -- s3readwrite: - client.0: - extra_args: ['--no-cleanup'] - s3: - delete_user: False - readwrite: - writers: 1 - readers: 0 -- rgw: - regions: - default: - api name: api1 - is master: true - master zone: r1z1 - zones: [r1z1] - client.1: - system user: - name: r2-system - access key: 1te6NH5mcdcq0Tc5i8i3 - secret key: Py4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXw -- s3readwrite: - client.1: - s3: - create_user: False - readwrite: - writers: 0 - readers: 2 - diff --git a/suites/rgw/singleton/frontend/apache.yaml b/suites/rgw/singleton/frontend/apache.yaml deleted file mode 100644 index 53ebf758ed3..00000000000 --- a/suites/rgw/singleton/frontend/apache.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - rgw: - frontend: apache diff --git a/suites/rgw/singleton/frontend/civetweb.yaml b/suites/rgw/singleton/frontend/civetweb.yaml deleted file mode 100644 index 5845a0e6c15..00000000000 --- a/suites/rgw/singleton/frontend/civetweb.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - rgw: - frontend: civetweb diff --git a/suites/rgw/singleton/overrides.yaml b/suites/rgw/singleton/overrides.yaml deleted file mode 100644 index 9b2063f0699..00000000000 --- a/suites/rgw/singleton/overrides.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - client: - debug rgw: 20 diff --git a/suites/rgw/singleton/rgw_pool_type b/suites/rgw/singleton/rgw_pool_type deleted file mode 120000 index 77fa7e71b78..00000000000 --- a/suites/rgw/singleton/rgw_pool_type +++ /dev/null @@ -1 +0,0 @@ -../../../rgw_pool_type/ \ No newline at end of file diff --git a/suites/rgw/verify/% b/suites/rgw/verify/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/rgw/verify/clusters/fixed-2.yaml b/suites/rgw/verify/clusters/fixed-2.yaml deleted file mode 120000 index cd0791a1486..00000000000 --- a/suites/rgw/verify/clusters/fixed-2.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-2.yaml \ No newline at end of file diff --git a/suites/rgw/verify/frontend/apache.yaml b/suites/rgw/verify/frontend/apache.yaml deleted file mode 100644 index 53ebf758ed3..00000000000 --- a/suites/rgw/verify/frontend/apache.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - rgw: - frontend: apache diff --git a/suites/rgw/verify/frontend/civetweb.yaml b/suites/rgw/verify/frontend/civetweb.yaml deleted file mode 100644 index 5845a0e6c15..00000000000 --- a/suites/rgw/verify/frontend/civetweb.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - rgw: - frontend: civetweb diff --git a/suites/rgw/verify/fs/btrfs.yaml b/suites/rgw/verify/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/rgw/verify/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/rgw/verify/msgr-failures/few.yaml b/suites/rgw/verify/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/rgw/verify/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/rgw/verify/overrides.yaml b/suites/rgw/verify/overrides.yaml deleted file mode 100644 index 9b2063f0699..00000000000 --- a/suites/rgw/verify/overrides.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - client: - debug rgw: 20 diff --git a/suites/rgw/verify/rgw_pool_type b/suites/rgw/verify/rgw_pool_type deleted file mode 120000 index 77fa7e71b78..00000000000 --- a/suites/rgw/verify/rgw_pool_type +++ /dev/null @@ -1 +0,0 @@ -../../../rgw_pool_type/ \ No newline at end of file diff --git a/suites/rgw/verify/tasks/rgw_s3tests.yaml b/suites/rgw/verify/tasks/rgw_s3tests.yaml deleted file mode 100644 index c23a2cbf4ec..00000000000 --- a/suites/rgw/verify/tasks/rgw_s3tests.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: - flavor: notcmalloc -- ceph: -- rgw: - client.0: - valgrind: [--tool=memcheck] -- s3tests: - client.0: - rgw_server: client.0 diff --git a/suites/rgw/verify/tasks/rgw_s3tests_multiregion.yaml b/suites/rgw/verify/tasks/rgw_s3tests_multiregion.yaml deleted file mode 100644 index 399f4aac2f6..00000000000 --- a/suites/rgw/verify/tasks/rgw_s3tests_multiregion.yaml +++ /dev/null @@ -1,59 +0,0 @@ -tasks: -- install: - flavor: notcmalloc -- ceph: - conf: - client.0: - rgw region: zero - rgw zone: r0z1 - rgw region root pool: .rgw.region.0 - rgw zone root pool: .rgw.zone.0 - rgw gc pool: .rgw.gc.0 - rgw user uid pool: .users.uid.0 - rgw user keys pool: .users.0 - rgw log data: True - rgw log meta: True - client.1: - rgw region: one - rgw zone: r1z1 - rgw region root pool: .rgw.region.1 - rgw zone root pool: .rgw.zone.1 - rgw gc pool: .rgw.gc.1 - rgw user uid pool: .users.uid.1 - rgw user keys pool: .users.1 - rgw log data: False - rgw log meta: False -- rgw: - default_idle_timeout: 300 - regions: - zero: - api name: api1 - is master: True - master zone: r0z1 - zones: [r0z1] - one: - api name: api1 - is master: False - master zone: r1z1 - zones: [r1z1] - client.0: - valgrind: [--tool=memcheck] - system user: - name: client0-system-user - access key: 1te6NH5mcdcq0Tc5i8i2 - secret key: 1y4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXv - client.1: - valgrind: [--tool=memcheck] - system user: - name: client1-system-user - access key: 0te6NH5mcdcq0Tc5i8i2 - secret key: Oy4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXv -- radosgw-agent: - client.0: - src: client.0 - dest: client.1 - metadata-only: true -- s3tests: - client.0: - idle_timeout: 300 - rgw_server: client.0 diff --git a/suites/rgw/verify/tasks/rgw_swift.yaml b/suites/rgw/verify/tasks/rgw_swift.yaml deleted file mode 100644 index 792fb848a9e..00000000000 --- a/suites/rgw/verify/tasks/rgw_swift.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: - flavor: notcmalloc -- ceph: -- rgw: - client.0: - valgrind: [--tool=memcheck] -- swift: - client.0: - rgw_server: client.0 diff --git a/suites/rgw/verify/validater/lockdep.yaml b/suites/rgw/verify/validater/lockdep.yaml deleted file mode 100644 index 941fe12b1e4..00000000000 --- a/suites/rgw/verify/validater/lockdep.yaml +++ /dev/null @@ -1,7 +0,0 @@ -overrides: - ceph: - conf: - osd: - lockdep: true - mon: - lockdep: true diff --git a/suites/rgw/verify/validater/valgrind.yaml b/suites/rgw/verify/validater/valgrind.yaml deleted file mode 100644 index 0b28c183065..00000000000 --- a/suites/rgw/verify/validater/valgrind.yaml +++ /dev/null @@ -1,12 +0,0 @@ -overrides: - install: - ceph: - flavor: notcmalloc - ceph: - conf: - global: - osd heartbeat grace: 40 - valgrind: - mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] - osd: [--tool=memcheck] - mds: [--tool=memcheck] diff --git a/suites/samba/% b/suites/samba/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/samba/clusters/samba-basic.yaml b/suites/samba/clusters/samba-basic.yaml deleted file mode 100644 index caced4a26d1..00000000000 --- a/suites/samba/clusters/samba-basic.yaml +++ /dev/null @@ -1,3 +0,0 @@ -roles: -- [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1] -- [samba.0, client.0, client.1] diff --git a/suites/samba/debug/mds_client.yaml b/suites/samba/debug/mds_client.yaml deleted file mode 120000 index 2550b024ded..00000000000 --- a/suites/samba/debug/mds_client.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../debug/mds_client.yaml \ No newline at end of file diff --git a/suites/samba/fs/btrfs.yaml b/suites/samba/fs/btrfs.yaml deleted file mode 120000 index ea693ab0b42..00000000000 --- a/suites/samba/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/samba/install/install.yaml b/suites/samba/install/install.yaml deleted file mode 100644 index c53f9c55b17..00000000000 --- a/suites/samba/install/install.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# we currently can't install Samba on RHEL; need a gitbuilder and code updates -os_type: ubuntu - -tasks: -- install: -- install: - project: samba - extra_packages: ['samba'] -- ceph: diff --git a/suites/samba/mount/fuse.yaml b/suites/samba/mount/fuse.yaml deleted file mode 100644 index d00ffdb4804..00000000000 --- a/suites/samba/mount/fuse.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: [client.0] -- samba: - samba.0: - ceph: "{testdir}/mnt.0" - diff --git a/suites/samba/mount/kclient.yaml b/suites/samba/mount/kclient.yaml deleted file mode 100644 index 14fee85d266..00000000000 --- a/suites/samba/mount/kclient.yaml +++ /dev/null @@ -1,11 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false -tasks: -- kclient: [client.0] -- samba: - samba.0: - ceph: "{testdir}/mnt.0" - diff --git a/suites/samba/mount/native.yaml b/suites/samba/mount/native.yaml deleted file mode 100644 index 09b8c1c4e3d..00000000000 --- a/suites/samba/mount/native.yaml +++ /dev/null @@ -1,2 +0,0 @@ -tasks: -- samba: diff --git a/suites/samba/mount/noceph.yaml b/suites/samba/mount/noceph.yaml deleted file mode 100644 index 3cad4740d8b..00000000000 --- a/suites/samba/mount/noceph.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- localdir: [client.0] -- samba: - samba.0: - ceph: "{testdir}/mnt.0" diff --git a/suites/samba/workload/cifs-dbench.yaml b/suites/samba/workload/cifs-dbench.yaml deleted file mode 100644 index c13c1c099e5..00000000000 --- a/suites/samba/workload/cifs-dbench.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- cifs-mount: - client.1: - share: ceph -- workunit: - clients: - client.1: - - suites/dbench.sh diff --git a/suites/samba/workload/cifs-fsstress.yaml b/suites/samba/workload/cifs-fsstress.yaml deleted file mode 100644 index ff003af3433..00000000000 --- a/suites/samba/workload/cifs-fsstress.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- cifs-mount: - client.1: - share: ceph -- workunit: - clients: - client.1: - - suites/fsstress.sh diff --git a/suites/samba/workload/cifs-kernel-build.yaml.disabled b/suites/samba/workload/cifs-kernel-build.yaml.disabled deleted file mode 100644 index ab9ff8ac731..00000000000 --- a/suites/samba/workload/cifs-kernel-build.yaml.disabled +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- cifs-mount: - client.1: - share: ceph -- workunit: - clients: - client.1: - - kernel_untar_build.sh - diff --git a/suites/samba/workload/smbtorture.yaml b/suites/samba/workload/smbtorture.yaml deleted file mode 100644 index 823489a2082..00000000000 --- a/suites/samba/workload/smbtorture.yaml +++ /dev/null @@ -1,39 +0,0 @@ -tasks: -- pexec: - client.1: - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.lock - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.fdpass - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.unlink - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.attr - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.trans2 - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.negnowait - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.dir1 - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.deny1 - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.deny2 - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.deny3 - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.denydos - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.ntdeny1 - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.ntdeny2 - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.tcon - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.tcondev - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.vuid - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.rw1 - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.open - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.defer_open - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.xcopy - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.rename - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.properties - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.mangle - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.openattr - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.chkpath - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.secleak - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.disconnect - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.samba3error - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.smb -# - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-holdcon -# - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-holdopen - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-readwrite - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-torture - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.scan-pipe_number - - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.scan-ioctl -# - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.scan-maxfid diff --git a/suites/smoke/basic/% b/suites/smoke/basic/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/smoke/basic/clusters/fixed-3-cephfs.yaml b/suites/smoke/basic/clusters/fixed-3-cephfs.yaml deleted file mode 120000 index a482e650421..00000000000 --- a/suites/smoke/basic/clusters/fixed-3-cephfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/suites/smoke/basic/fs/btrfs.yaml b/suites/smoke/basic/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/smoke/basic/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/smoke/basic/tasks/cfuse_workunit_suites_blogbench.yaml b/suites/smoke/basic/tasks/cfuse_workunit_suites_blogbench.yaml deleted file mode 100644 index 2ee417723b0..00000000000 --- a/suites/smoke/basic/tasks/cfuse_workunit_suites_blogbench.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: - fs: xfs -- ceph-fuse: -- workunit: - clients: - all: - - suites/blogbench.sh diff --git a/suites/smoke/basic/tasks/cfuse_workunit_suites_fsstress.yaml b/suites/smoke/basic/tasks/cfuse_workunit_suites_fsstress.yaml deleted file mode 100644 index cd12eaef570..00000000000 --- a/suites/smoke/basic/tasks/cfuse_workunit_suites_fsstress.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: - fs: btrfs -- ceph-fuse: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/smoke/basic/tasks/cfuse_workunit_suites_iozone.yaml b/suites/smoke/basic/tasks/cfuse_workunit_suites_iozone.yaml deleted file mode 100644 index c4be4cd1de9..00000000000 --- a/suites/smoke/basic/tasks/cfuse_workunit_suites_iozone.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: - fs: btrfs -- ceph-fuse: [client.0] -- workunit: - clients: - all: - - suites/iozone.sh diff --git a/suites/smoke/basic/tasks/cfuse_workunit_suites_pjd.yaml b/suites/smoke/basic/tasks/cfuse_workunit_suites_pjd.yaml deleted file mode 100644 index d042daa716d..00000000000 --- a/suites/smoke/basic/tasks/cfuse_workunit_suites_pjd.yaml +++ /dev/null @@ -1,16 +0,0 @@ -tasks: -- install: -- ceph: - fs: xfs - conf: - mds: - debug mds: 20 - debug ms: 1 - client: - debug client: 20 - debug ms: 1 -- ceph-fuse: -- workunit: - clients: - all: - - suites/pjd.sh diff --git a/suites/smoke/basic/tasks/kclient_workunit_direct_io.yaml b/suites/smoke/basic/tasks/kclient_workunit_direct_io.yaml deleted file mode 100644 index 29ccf463051..00000000000 --- a/suites/smoke/basic/tasks/kclient_workunit_direct_io.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false -tasks: -- install: -- ceph: - fs: btrfs -- kclient: -- workunit: - clients: - all: - - direct_io diff --git a/suites/smoke/basic/tasks/kclient_workunit_suites_dbench.yaml b/suites/smoke/basic/tasks/kclient_workunit_suites_dbench.yaml deleted file mode 100644 index 01d7470a50e..00000000000 --- a/suites/smoke/basic/tasks/kclient_workunit_suites_dbench.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false -tasks: -- install: -- ceph: - fs: xfs -- kclient: -- workunit: - clients: - all: - - suites/dbench.sh diff --git a/suites/smoke/basic/tasks/kclient_workunit_suites_fsstress.yaml b/suites/smoke/basic/tasks/kclient_workunit_suites_fsstress.yaml deleted file mode 100644 index 42d6b97c5b5..00000000000 --- a/suites/smoke/basic/tasks/kclient_workunit_suites_fsstress.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false -tasks: -- install: -- ceph: - fs: xfs -- kclient: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/smoke/basic/tasks/kclient_workunit_suites_pjd.yaml b/suites/smoke/basic/tasks/kclient_workunit_suites_pjd.yaml deleted file mode 100644 index 6818a2a6833..00000000000 --- a/suites/smoke/basic/tasks/kclient_workunit_suites_pjd.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false -tasks: -- install: -- ceph: - fs: xfs -- kclient: -- workunit: - clients: - all: - - suites/pjd.sh diff --git a/suites/smoke/basic/tasks/libcephfs_interface_tests.yaml b/suites/smoke/basic/tasks/libcephfs_interface_tests.yaml deleted file mode 100644 index 5273c931ec7..00000000000 --- a/suites/smoke/basic/tasks/libcephfs_interface_tests.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - client: - debug ms: 1 - debug client: 20 -tasks: -- install: -- ceph: - fs: btrfs -- ceph-fuse: -- workunit: - clients: - client.0: - - libcephfs/test.sh diff --git a/suites/smoke/basic/tasks/mon_thrash.yaml b/suites/smoke/basic/tasks/mon_thrash.yaml deleted file mode 100644 index 0ce6d61a6ff..00000000000 --- a/suites/smoke/basic/tasks/mon_thrash.yaml +++ /dev/null @@ -1,21 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject delay max: 1 - ms inject delay probability: 0.005 - ms inject delay type: mon - ms inject internal delays: 0.002 - ms inject socket failures: 2500 -tasks: -- install: null -- ceph: - fs: xfs -- mon_thrash: - revive_delay: 90 - thrash_delay: 1 - thrash_many: true -- workunit: - clients: - client.0: - - rados/test.sh diff --git a/suites/smoke/basic/tasks/rados_api_tests.yaml b/suites/smoke/basic/tasks/rados_api_tests.yaml deleted file mode 100644 index 7049319ea8b..00000000000 --- a/suites/smoke/basic/tasks/rados_api_tests.yaml +++ /dev/null @@ -1,15 +0,0 @@ -tasks: -- install: null -- ceph: - fs: ext4 - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -- thrashosds: - chance_pgnum_grow: 2 - chance_pgpnum_fix: 1 - timeout: 1200 -- workunit: - clients: - client.0: - - rados/test.sh diff --git a/suites/smoke/basic/tasks/rados_bench.yaml b/suites/smoke/basic/tasks/rados_bench.yaml deleted file mode 100644 index f93b4a610ce..00000000000 --- a/suites/smoke/basic/tasks/rados_bench.yaml +++ /dev/null @@ -1,24 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject delay max: 1 - ms inject delay probability: 0.005 - ms inject delay type: osd - ms inject internal delays: 0.002 - ms inject socket failures: 2500 -tasks: -- install: null -- ceph: - fs: xfs - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -- thrashosds: - chance_pgnum_grow: 2 - chance_pgpnum_fix: 1 - timeout: 1200 -- radosbench: - clients: - - client.0 - time: 1800 diff --git a/suites/smoke/basic/tasks/rados_cache_snaps.yaml b/suites/smoke/basic/tasks/rados_cache_snaps.yaml deleted file mode 100644 index 37ef5092f02..00000000000 --- a/suites/smoke/basic/tasks/rados_cache_snaps.yaml +++ /dev/null @@ -1,41 +0,0 @@ -tasks: -- install: null -- ceph: - fs: btrfs - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -- thrashosds: - chance_pgnum_grow: 2 - chance_pgpnum_fix: 1 - timeout: 1200 -- exec: - client.0: - - ceph osd pool create base 4 - - ceph osd pool create cache 4 - - ceph osd tier add base cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay base cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 3600 - - ceph osd pool set cache target_max_objects 250 -- rados: - clients: - - client.0 - objects: 500 - op_weights: - copy_from: 50 - delete: 50 - evict: 50 - flush: 50 - read: 100 - rollback: 50 - snap_create: 50 - snap_remove: 50 - try_flush: 50 - write: 100 - ops: 4000 - pool_snaps: true - pools: - - base diff --git a/suites/smoke/basic/tasks/rados_cls_all.yaml b/suites/smoke/basic/tasks/rados_cls_all.yaml deleted file mode 100644 index 7f18a7e95bd..00000000000 --- a/suites/smoke/basic/tasks/rados_cls_all.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: - fs: xfs -- workunit: - clients: - client.0: - - cls diff --git a/suites/smoke/basic/tasks/rados_ec_snaps.yaml b/suites/smoke/basic/tasks/rados_ec_snaps.yaml deleted file mode 100644 index d9282bfa41d..00000000000 --- a/suites/smoke/basic/tasks/rados_ec_snaps.yaml +++ /dev/null @@ -1,31 +0,0 @@ -tasks: -- install: null -- ceph: - fs: xfs - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -- thrashosds: - chance_pgnum_grow: 3 - chance_pgpnum_fix: 1 - timeout: 1200 -- rados: - clients: - - client.0 - ec_pool: true - max_in_flight: 64 - max_seconds: 600 - objects: 1024 - op_weights: - append: 100 - copy_from: 50 - delete: 50 - read: 100 - rmattr: 25 - rollback: 50 - setattr: 25 - snap_create: 50 - snap_remove: 50 - write: 0 - ops: 400000 - size: 16384 diff --git a/suites/smoke/basic/tasks/rados_python.yaml b/suites/smoke/basic/tasks/rados_python.yaml deleted file mode 100644 index 399967cc1c9..00000000000 --- a/suites/smoke/basic/tasks/rados_python.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- install: -- ceph: - fs: btrfs - log-whitelist: - - wrongly marked me down -- ceph-fuse: -- workunit: - clients: - client.0: - - rados/test_python.sh diff --git a/suites/smoke/basic/tasks/rados_workunit_loadgen_mix.yaml b/suites/smoke/basic/tasks/rados_workunit_loadgen_mix.yaml deleted file mode 100644 index 0d472a33b79..00000000000 --- a/suites/smoke/basic/tasks/rados_workunit_loadgen_mix.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: - fs: ext4 -- ceph-fuse: -- workunit: - clients: - all: - - rados/load-gen-mix.sh diff --git a/suites/smoke/basic/tasks/rbd_api_tests.yaml b/suites/smoke/basic/tasks/rbd_api_tests.yaml deleted file mode 100644 index a0dda21a51f..00000000000 --- a/suites/smoke/basic/tasks/rbd_api_tests.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- install: -- ceph: - fs: xfs -- ceph-fuse: -- workunit: - clients: - client.0: - - rbd/test_librbd.sh - env: - RBD_FEATURES: "1" diff --git a/suites/smoke/basic/tasks/rbd_cli_import_export.yaml b/suites/smoke/basic/tasks/rbd_cli_import_export.yaml deleted file mode 100644 index e9f38d3a9ff..00000000000 --- a/suites/smoke/basic/tasks/rbd_cli_import_export.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- install: -- ceph: - fs: xfs -- ceph-fuse: -- workunit: - clients: - client.0: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format diff --git a/suites/smoke/basic/tasks/rbd_fsx.yaml b/suites/smoke/basic/tasks/rbd_fsx.yaml deleted file mode 100644 index ed737a3333e..00000000000 --- a/suites/smoke/basic/tasks/rbd_fsx.yaml +++ /dev/null @@ -1,17 +0,0 @@ -overrides: - ceph: - conf: - client: - rbd cache: true - global: - ms inject socket failures: 5000 -tasks: -- install: null -- ceph: - fs: xfs -- thrashosds: - timeout: 1200 -- rbd_fsx: - clients: - - client.0 - ops: 2000 diff --git a/suites/smoke/basic/tasks/rbd_python_api_tests.yaml b/suites/smoke/basic/tasks/rbd_python_api_tests.yaml deleted file mode 100644 index 7ed61d0a339..00000000000 --- a/suites/smoke/basic/tasks/rbd_python_api_tests.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- install: -- ceph: - fs: btrfs -- ceph-fuse: -- workunit: - clients: - client.0: - - rbd/test_librbd_python.sh - env: - RBD_FEATURES: "1" diff --git a/suites/smoke/basic/tasks/rbd_workunit_suites_iozone.yaml b/suites/smoke/basic/tasks/rbd_workunit_suites_iozone.yaml deleted file mode 100644 index d3591686c03..00000000000 --- a/suites/smoke/basic/tasks/rbd_workunit_suites_iozone.yaml +++ /dev/null @@ -1,16 +0,0 @@ -overrides: - ceph: - conf: - global: - ms die on skipped message: false -tasks: -- install: -- ceph: - fs: btrfs -- rbd: - all: - image_size: 20480 -- workunit: - clients: - all: - - suites/iozone.sh diff --git a/suites/smoke/basic/tasks/rgw_s3tests.yaml b/suites/smoke/basic/tasks/rgw_s3tests.yaml deleted file mode 100644 index 73218919e03..00000000000 --- a/suites/smoke/basic/tasks/rgw_s3tests.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: - fs: xfs -- rgw: [client.0] -- s3tests: - client.0: - rgw_server: client.0 diff --git a/suites/smoke/basic/tasks/rgw_swift.yaml b/suites/smoke/basic/tasks/rgw_swift.yaml deleted file mode 100644 index 57c7226e341..00000000000 --- a/suites/smoke/basic/tasks/rgw_swift.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: - fs: ext4 -- rgw: [client.0] -- swift: - client.0: - rgw_server: client.0 diff --git a/suites/stress/bench/% b/suites/stress/bench/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/stress/bench/clusters/fixed-3-cephfs.yaml b/suites/stress/bench/clusters/fixed-3-cephfs.yaml deleted file mode 120000 index a482e650421..00000000000 --- a/suites/stress/bench/clusters/fixed-3-cephfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../clusters/fixed-3-cephfs.yaml \ No newline at end of file diff --git a/suites/stress/bench/tasks/cfuse_workunit_snaps.yaml b/suites/stress/bench/tasks/cfuse_workunit_snaps.yaml deleted file mode 100644 index eafec39e3d0..00000000000 --- a/suites/stress/bench/tasks/cfuse_workunit_snaps.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- ceph-fuse: -- workunit: - clients: - all: - - snaps diff --git a/suites/stress/bench/tasks/kclient_workunit_suites_fsx.yaml b/suites/stress/bench/tasks/kclient_workunit_suites_fsx.yaml deleted file mode 100644 index a0d2e765bdb..00000000000 --- a/suites/stress/bench/tasks/kclient_workunit_suites_fsx.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: -- kclient: -- workunit: - clients: - all: - - suites/fsx.sh diff --git a/suites/stress/thrash/% b/suites/stress/thrash/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/stress/thrash/clusters/16-osd.yaml b/suites/stress/thrash/clusters/16-osd.yaml deleted file mode 100644 index 373dd4052c3..00000000000 --- a/suites/stress/thrash/clusters/16-osd.yaml +++ /dev/null @@ -1,18 +0,0 @@ -roles: -- [mon.0, mds.a, osd.0] -- [mon.1, osd.1] -- [mon.2, osd.2] -- [osd.3] -- [osd.4] -- [osd.5] -- [osd.6] -- [osd.7] -- [osd.8] -- [osd.9] -- [osd.10] -- [osd.11] -- [osd.12] -- [osd.13] -- [osd.14] -- [osd.15] -- [client.0] diff --git a/suites/stress/thrash/clusters/3-osd-1-machine.yaml b/suites/stress/thrash/clusters/3-osd-1-machine.yaml deleted file mode 100644 index d8ff594b95d..00000000000 --- a/suites/stress/thrash/clusters/3-osd-1-machine.yaml +++ /dev/null @@ -1,3 +0,0 @@ -roles: -- [mon.0, mds.a, osd.0, osd.1, osd.2] -- [mon.1, mon.2, client.0] diff --git a/suites/stress/thrash/clusters/8-osd.yaml b/suites/stress/thrash/clusters/8-osd.yaml deleted file mode 100644 index 3b131054e95..00000000000 --- a/suites/stress/thrash/clusters/8-osd.yaml +++ /dev/null @@ -1,10 +0,0 @@ -roles: -- [mon.0, mds.a, osd.0] -- [mon.1, osd.1] -- [mon.2, osd.2] -- [osd.3] -- [osd.4] -- [osd.5] -- [osd.6] -- [osd.7] -- [client.0] diff --git a/suites/stress/thrash/fs/btrfs.yaml b/suites/stress/thrash/fs/btrfs.yaml deleted file mode 120000 index 10d0c3f1266..00000000000 --- a/suites/stress/thrash/fs/btrfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/btrfs.yaml \ No newline at end of file diff --git a/suites/stress/thrash/fs/none.yaml b/suites/stress/thrash/fs/none.yaml deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/stress/thrash/fs/xfs.yaml b/suites/stress/thrash/fs/xfs.yaml deleted file mode 120000 index 4c28d731f6b..00000000000 --- a/suites/stress/thrash/fs/xfs.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../fs/xfs.yaml \ No newline at end of file diff --git a/suites/stress/thrash/thrashers/default.yaml b/suites/stress/thrash/thrashers/default.yaml deleted file mode 100644 index 14d772583cf..00000000000 --- a/suites/stress/thrash/thrashers/default.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -- thrashosds: diff --git a/suites/stress/thrash/thrashers/fast.yaml b/suites/stress/thrash/thrashers/fast.yaml deleted file mode 100644 index eea9c06cd90..00000000000 --- a/suites/stress/thrash/thrashers/fast.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -- thrashosds: - op_delay: 1 - chance_down: 10 diff --git a/suites/stress/thrash/thrashers/more-down.yaml b/suites/stress/thrash/thrashers/more-down.yaml deleted file mode 100644 index e39098b1cb6..00000000000 --- a/suites/stress/thrash/thrashers/more-down.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: -- ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost -- thrashosds: - chance_down: 50 diff --git a/suites/stress/thrash/workloads/bonnie_cfuse.yaml b/suites/stress/thrash/workloads/bonnie_cfuse.yaml deleted file mode 100644 index 912f12d6ce7..00000000000 --- a/suites/stress/thrash/workloads/bonnie_cfuse.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/bonnie.sh diff --git a/suites/stress/thrash/workloads/iozone_cfuse.yaml b/suites/stress/thrash/workloads/iozone_cfuse.yaml deleted file mode 100644 index 18a6051be39..00000000000 --- a/suites/stress/thrash/workloads/iozone_cfuse.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph-fuse: -- workunit: - clients: - all: - - suites/iozone.sh diff --git a/suites/stress/thrash/workloads/radosbench.yaml b/suites/stress/thrash/workloads/radosbench.yaml deleted file mode 100644 index 3940870fce0..00000000000 --- a/suites/stress/thrash/workloads/radosbench.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- radosbench: - clients: [client.0] - time: 1800 diff --git a/suites/stress/thrash/workloads/readwrite.yaml b/suites/stress/thrash/workloads/readwrite.yaml deleted file mode 100644 index c53e52b0872..00000000000 --- a/suites/stress/thrash/workloads/readwrite.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - op_weights: - read: 45 - write: 45 - delete: 10 diff --git a/suites/teuthology/buildpackages/% b/suites/teuthology/buildpackages/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/teuthology/buildpackages/distros b/suites/teuthology/buildpackages/distros deleted file mode 120000 index c5d59352cb5..00000000000 --- a/suites/teuthology/buildpackages/distros +++ /dev/null @@ -1 +0,0 @@ -../../../distros/supported \ No newline at end of file diff --git a/suites/teuthology/buildpackages/tasks/branch.yaml b/suites/teuthology/buildpackages/tasks/branch.yaml deleted file mode 100644 index b93c5d07c4a..00000000000 --- a/suites/teuthology/buildpackages/tasks/branch.yaml +++ /dev/null @@ -1,10 +0,0 @@ -roles: - - [mon.0, client.0] -tasks: - - install: - # branch has precedence over sha1 - branch: hammer - sha1: e5b6eea91cc37434f78a987d2dd1d3edd4a23f3f # dumpling - - exec: - client.0: - - ceph --version | grep 'version 0.94' diff --git a/suites/teuthology/buildpackages/tasks/default.yaml b/suites/teuthology/buildpackages/tasks/default.yaml deleted file mode 100644 index cb583c7634a..00000000000 --- a/suites/teuthology/buildpackages/tasks/default.yaml +++ /dev/null @@ -1,14 +0,0 @@ -roles: - - [client.0] -tasks: - - install: - tag: v0.94.1 - - exec: - client.0: - - ceph --version | grep 'version 0.94.1' - - install.upgrade: - client.0: - tag: v0.94.3 - - exec: - client.0: - - ceph --version | grep 'version 0.94.3' diff --git a/suites/teuthology/buildpackages/tasks/tag.yaml b/suites/teuthology/buildpackages/tasks/tag.yaml deleted file mode 100644 index 126749c9bb0..00000000000 --- a/suites/teuthology/buildpackages/tasks/tag.yaml +++ /dev/null @@ -1,11 +0,0 @@ -roles: - - [mon.0, client.0] -tasks: - - install: - # tag has precedence over branch and sha1 - tag: v0.94.1 - branch: firefly - sha1: e5b6eea91cc37434f78a987d2dd1d3edd4a23f3f # dumpling - - exec: - client.0: - - ceph --version | grep 'version 0.94.1' diff --git a/suites/teuthology/ceph/% b/suites/teuthology/ceph/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/teuthology/ceph/clusters/single.yaml b/suites/teuthology/ceph/clusters/single.yaml deleted file mode 100644 index beba3972ce9..00000000000 --- a/suites/teuthology/ceph/clusters/single.yaml +++ /dev/null @@ -1,2 +0,0 @@ -roles: - - [mon.0, client.0] diff --git a/suites/teuthology/ceph/distros/rhel.yaml b/suites/teuthology/ceph/distros/rhel.yaml deleted file mode 100644 index 7f9778d770c..00000000000 --- a/suites/teuthology/ceph/distros/rhel.yaml +++ /dev/null @@ -1 +0,0 @@ -os_type: rhel diff --git a/suites/teuthology/ceph/distros/ubuntu.yaml b/suites/teuthology/ceph/distros/ubuntu.yaml deleted file mode 100644 index 150451be028..00000000000 --- a/suites/teuthology/ceph/distros/ubuntu.yaml +++ /dev/null @@ -1 +0,0 @@ -os_type: ubuntu diff --git a/suites/teuthology/ceph/tasks/teuthology.yaml b/suites/teuthology/ceph/tasks/teuthology.yaml deleted file mode 100644 index d3d7ddd8728..00000000000 --- a/suites/teuthology/ceph/tasks/teuthology.yaml +++ /dev/null @@ -1,2 +0,0 @@ -tasks: - - install: diff --git a/suites/teuthology/no-ceph/% b/suites/teuthology/no-ceph/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/teuthology/no-ceph/clusters/single.yaml b/suites/teuthology/no-ceph/clusters/single.yaml deleted file mode 100644 index beba3972ce9..00000000000 --- a/suites/teuthology/no-ceph/clusters/single.yaml +++ /dev/null @@ -1,2 +0,0 @@ -roles: - - [mon.0, client.0] diff --git a/suites/teuthology/no-ceph/distros/baremetal.yaml b/suites/teuthology/no-ceph/distros/baremetal.yaml deleted file mode 100644 index 59b9779b905..00000000000 --- a/suites/teuthology/no-ceph/distros/baremetal.yaml +++ /dev/null @@ -1 +0,0 @@ -# left blank so we'll take the default baremetal machine_type and os_type / os_version diff --git a/suites/teuthology/no-ceph/distros/rhel7.0.yaml b/suites/teuthology/no-ceph/distros/rhel7.0.yaml deleted file mode 100644 index c87c0bc135b..00000000000 --- a/suites/teuthology/no-ceph/distros/rhel7.0.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: rhel -os_version: "7.0" diff --git a/suites/teuthology/no-ceph/distros/ubuntu14.04.yaml b/suites/teuthology/no-ceph/distros/ubuntu14.04.yaml deleted file mode 100644 index 309e989feeb..00000000000 --- a/suites/teuthology/no-ceph/distros/ubuntu14.04.yaml +++ /dev/null @@ -1,2 +0,0 @@ -os_type: ubuntu -os_version: "14.04" diff --git a/suites/teuthology/no-ceph/distros/vps.yaml b/suites/teuthology/no-ceph/distros/vps.yaml deleted file mode 100644 index 5d7ba673c94..00000000000 --- a/suites/teuthology/no-ceph/distros/vps.yaml +++ /dev/null @@ -1 +0,0 @@ -machine_type: vps diff --git a/suites/teuthology/no-ceph/distros/vps_rhel7.0.yaml b/suites/teuthology/no-ceph/distros/vps_rhel7.0.yaml deleted file mode 100644 index d19bff22d0b..00000000000 --- a/suites/teuthology/no-ceph/distros/vps_rhel7.0.yaml +++ /dev/null @@ -1,3 +0,0 @@ -machine_type: vps -os_type: rhel -os_version: "7.0" diff --git a/suites/teuthology/no-ceph/distros/vps_ubuntu14.04.yaml b/suites/teuthology/no-ceph/distros/vps_ubuntu14.04.yaml deleted file mode 100644 index 3c3b2500197..00000000000 --- a/suites/teuthology/no-ceph/distros/vps_ubuntu14.04.yaml +++ /dev/null @@ -1,3 +0,0 @@ -machine_type: vps -os_type: ubuntu -os_version: "14.04" diff --git a/suites/teuthology/no-ceph/tasks/teuthology.yaml b/suites/teuthology/no-ceph/tasks/teuthology.yaml deleted file mode 100644 index 1391458b5e7..00000000000 --- a/suites/teuthology/no-ceph/tasks/teuthology.yaml +++ /dev/null @@ -1,2 +0,0 @@ -tasks: - - tests: diff --git a/suites/teuthology/workunits/yes.yaml b/suites/teuthology/workunits/yes.yaml deleted file mode 100644 index 45098dbb811..00000000000 --- a/suites/teuthology/workunits/yes.yaml +++ /dev/null @@ -1,8 +0,0 @@ -roles: - - [client.0] -tasks: -- install: -- workunit: - clients: - all: - - true.sh diff --git a/suites/tgt/basic/% b/suites/tgt/basic/% deleted file mode 100644 index 8b137891791..00000000000 --- a/suites/tgt/basic/% +++ /dev/null @@ -1 +0,0 @@ - diff --git a/suites/tgt/basic/clusters/fixed-3.yaml b/suites/tgt/basic/clusters/fixed-3.yaml deleted file mode 100644 index 0038432afa7..00000000000 --- a/suites/tgt/basic/clusters/fixed-3.yaml +++ /dev/null @@ -1,4 +0,0 @@ -roles: -- [mon.a, mon.c, osd.0, osd.1, osd.2] -- [mon.b, mds.a, osd.3, osd.4, osd.5] -- [client.0] diff --git a/suites/tgt/basic/fs/btrfs.yaml b/suites/tgt/basic/fs/btrfs.yaml deleted file mode 100644 index 4c7af311538..00000000000 --- a/suites/tgt/basic/fs/btrfs.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - ceph: - fs: btrfs - conf: - osd: - osd op thread timeout: 60 diff --git a/suites/tgt/basic/msgr-failures/few.yaml b/suites/tgt/basic/msgr-failures/few.yaml deleted file mode 100644 index 0de320d46b8..00000000000 --- a/suites/tgt/basic/msgr-failures/few.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 5000 diff --git a/suites/tgt/basic/msgr-failures/many.yaml b/suites/tgt/basic/msgr-failures/many.yaml deleted file mode 100644 index 86f8dde8a0e..00000000000 --- a/suites/tgt/basic/msgr-failures/many.yaml +++ /dev/null @@ -1,5 +0,0 @@ -overrides: - ceph: - conf: - global: - ms inject socket failures: 500 diff --git a/suites/tgt/basic/tasks/blogbench.yaml b/suites/tgt/basic/tasks/blogbench.yaml deleted file mode 100644 index f77a78b6bc0..00000000000 --- a/suites/tgt/basic/tasks/blogbench.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/blogbench.sh diff --git a/suites/tgt/basic/tasks/bonnie.yaml b/suites/tgt/basic/tasks/bonnie.yaml deleted file mode 100644 index 2cbfcf8872e..00000000000 --- a/suites/tgt/basic/tasks/bonnie.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/bonnie.sh diff --git a/suites/tgt/basic/tasks/dbench-short.yaml b/suites/tgt/basic/tasks/dbench-short.yaml deleted file mode 100644 index fcb721a4d14..00000000000 --- a/suites/tgt/basic/tasks/dbench-short.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/dbench-short.sh diff --git a/suites/tgt/basic/tasks/dbench.yaml b/suites/tgt/basic/tasks/dbench.yaml deleted file mode 100644 index 7f732175faa..00000000000 --- a/suites/tgt/basic/tasks/dbench.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/dbench.sh diff --git a/suites/tgt/basic/tasks/ffsb.yaml b/suites/tgt/basic/tasks/ffsb.yaml deleted file mode 100644 index f50a3a19647..00000000000 --- a/suites/tgt/basic/tasks/ffsb.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/ffsb.sh diff --git a/suites/tgt/basic/tasks/fio.yaml b/suites/tgt/basic/tasks/fio.yaml deleted file mode 100644 index e7346ce528e..00000000000 --- a/suites/tgt/basic/tasks/fio.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/fio.sh diff --git a/suites/tgt/basic/tasks/fsstress.yaml b/suites/tgt/basic/tasks/fsstress.yaml deleted file mode 100644 index c77f511c0f6..00000000000 --- a/suites/tgt/basic/tasks/fsstress.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/fsstress.sh diff --git a/suites/tgt/basic/tasks/fsx.yaml b/suites/tgt/basic/tasks/fsx.yaml deleted file mode 100644 index 04732c84009..00000000000 --- a/suites/tgt/basic/tasks/fsx.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/fsx.sh diff --git a/suites/tgt/basic/tasks/fsync-tester.yaml b/suites/tgt/basic/tasks/fsync-tester.yaml deleted file mode 100644 index ea627b7d184..00000000000 --- a/suites/tgt/basic/tasks/fsync-tester.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/fsync-tester.sh diff --git a/suites/tgt/basic/tasks/iogen.yaml b/suites/tgt/basic/tasks/iogen.yaml deleted file mode 100644 index 1065c74daba..00000000000 --- a/suites/tgt/basic/tasks/iogen.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/iogen.sh diff --git a/suites/tgt/basic/tasks/iozone-sync.yaml b/suites/tgt/basic/tasks/iozone-sync.yaml deleted file mode 100644 index ac241a417e8..00000000000 --- a/suites/tgt/basic/tasks/iozone-sync.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/iozone-sync.sh diff --git a/suites/tgt/basic/tasks/iozone.yaml b/suites/tgt/basic/tasks/iozone.yaml deleted file mode 100644 index cf5604c21a7..00000000000 --- a/suites/tgt/basic/tasks/iozone.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/iozone.sh diff --git a/suites/tgt/basic/tasks/pjd.yaml b/suites/tgt/basic/tasks/pjd.yaml deleted file mode 100644 index ba5c631f157..00000000000 --- a/suites/tgt/basic/tasks/pjd.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: -- ceph: -- tgt: -- iscsi: -- workunit: - clients: - all: - - suites/pjd.sh diff --git a/suites/upgrade/client-upgrade/dumpling-client-x/basic/% b/suites/upgrade/client-upgrade/dumpling-client-x/basic/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/client-upgrade/dumpling-client-x/basic/0-cluster/start.yaml b/suites/upgrade/client-upgrade/dumpling-client-x/basic/0-cluster/start.yaml deleted file mode 100644 index 1f53351b201..00000000000 --- a/suites/upgrade/client-upgrade/dumpling-client-x/basic/0-cluster/start.yaml +++ /dev/null @@ -1,15 +0,0 @@ -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 -- - mon.b - - mon.c - - osd.2 - - osd.3 -- - client.0 -overrides: - ceph: - log-whitelist: - - failed to encode map - diff --git a/suites/upgrade/client-upgrade/dumpling-client-x/basic/1-install/dumpling-client-x.yaml b/suites/upgrade/client-upgrade/dumpling-client-x/basic/1-install/dumpling-client-x.yaml deleted file mode 100644 index 2335a5f8298..00000000000 --- a/suites/upgrade/client-upgrade/dumpling-client-x/basic/1-install/dumpling-client-x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- install: - branch: dumpling -- print: "**** done install dumpling" -- install.upgrade: - client.0: -- print: "**** done install.upgrade client.0" -- ceph: -- print: "**** done ceph" diff --git a/suites/upgrade/client-upgrade/dumpling-client-x/basic/2-workload/rados_loadgen_big.yaml b/suites/upgrade/client-upgrade/dumpling-client-x/basic/2-workload/rados_loadgen_big.yaml deleted file mode 100644 index 98df38c7191..00000000000 --- a/suites/upgrade/client-upgrade/dumpling-client-x/basic/2-workload/rados_loadgen_big.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rados/load-gen-big.sh -- print: "**** done rados/load-gen-big.sh" diff --git a/suites/upgrade/client-upgrade/dumpling-client-x/basic/2-workload/rbd_import_export.yaml b/suites/upgrade/client-upgrade/dumpling-client-x/basic/2-workload/rbd_import_export.yaml deleted file mode 100644 index 57589e90cfb..00000000000 --- a/suites/upgrade/client-upgrade/dumpling-client-x/basic/2-workload/rbd_import_export.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - branch: dumpling - clients: - client.0: - - rbd/import_export.sh -- print: "**** done rbd/import_export.sh" diff --git a/suites/upgrade/client-upgrade/firefly-client-x/basic/% b/suites/upgrade/client-upgrade/firefly-client-x/basic/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/client-upgrade/firefly-client-x/basic/0-cluster/start.yaml b/suites/upgrade/client-upgrade/firefly-client-x/basic/0-cluster/start.yaml deleted file mode 100644 index db6f5e2fe99..00000000000 --- a/suites/upgrade/client-upgrade/firefly-client-x/basic/0-cluster/start.yaml +++ /dev/null @@ -1,14 +0,0 @@ -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 -- - mon.b - - mon.c - - osd.2 - - osd.3 -- - client.0 -overrides: - ceph: - log-whitelist: - - failed to encode map diff --git a/suites/upgrade/client-upgrade/firefly-client-x/basic/1-install/firefly-client-x.yaml b/suites/upgrade/client-upgrade/firefly-client-x/basic/1-install/firefly-client-x.yaml deleted file mode 100644 index 39430daa088..00000000000 --- a/suites/upgrade/client-upgrade/firefly-client-x/basic/1-install/firefly-client-x.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: - branch: firefly -- print: "**** done install firefly" -- install.upgrade: - exclude_packages: ['ceph-test', 'ceph-test-dbg'] - client.0: -- print: "**** done install.upgrade client.0" -- ceph: -- print: "**** done ceph" diff --git a/suites/upgrade/client-upgrade/firefly-client-x/basic/2-workload/rbd_cli_import_export.yaml b/suites/upgrade/client-upgrade/firefly-client-x/basic/2-workload/rbd_cli_import_export.yaml deleted file mode 100644 index 6d4fd41aff3..00000000000 --- a/suites/upgrade/client-upgrade/firefly-client-x/basic/2-workload/rbd_cli_import_export.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- workunit: - branch: firefly - clients: - client.0: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format -- print: "**** done rbd/import_export.sh" diff --git a/suites/upgrade/client-upgrade/hammer-client-x/basic/% b/suites/upgrade/client-upgrade/hammer-client-x/basic/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/client-upgrade/hammer-client-x/basic/0-cluster/start.yaml b/suites/upgrade/client-upgrade/hammer-client-x/basic/0-cluster/start.yaml deleted file mode 100644 index db6f5e2fe99..00000000000 --- a/suites/upgrade/client-upgrade/hammer-client-x/basic/0-cluster/start.yaml +++ /dev/null @@ -1,14 +0,0 @@ -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 -- - mon.b - - mon.c - - osd.2 - - osd.3 -- - client.0 -overrides: - ceph: - log-whitelist: - - failed to encode map diff --git a/suites/upgrade/client-upgrade/hammer-client-x/basic/1-install/hammer-client-x.yaml b/suites/upgrade/client-upgrade/hammer-client-x/basic/1-install/hammer-client-x.yaml deleted file mode 100644 index c6dd4ed9391..00000000000 --- a/suites/upgrade/client-upgrade/hammer-client-x/basic/1-install/hammer-client-x.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: - branch: hammer -- print: "**** done install hammer" -- install.upgrade: - exclude_packages: ['ceph-test', 'ceph-test-dbg'] - client.0: -- print: "**** done install.upgrade client.0" -- ceph: -- print: "**** done ceph" diff --git a/suites/upgrade/client-upgrade/hammer-client-x/basic/2-workload/rbd_cli_import_export.yaml b/suites/upgrade/client-upgrade/hammer-client-x/basic/2-workload/rbd_cli_import_export.yaml deleted file mode 100644 index 9bb4f94f73f..00000000000 --- a/suites/upgrade/client-upgrade/hammer-client-x/basic/2-workload/rbd_cli_import_export.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- workunit: - branch: hammer - clients: - client.0: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format -- print: "**** done rbd/import_export.sh" diff --git a/suites/upgrade/dumpling-emperor-x/parallel/% b/suites/upgrade/dumpling-emperor-x/parallel/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-emperor-x/parallel/0-cluster/start.yaml b/suites/upgrade/dumpling-emperor-x/parallel/0-cluster/start.yaml deleted file mode 100644 index e3d7f85f9ff..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/0-cluster/start.yaml +++ /dev/null @@ -1,19 +0,0 @@ -overrides: - ceph: - conf: - mon: - mon warn on legacy crush tunables: false - log-whitelist: - - scrub mismatch - - ScrubResult -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 -- - mon.b - - mon.c - - osd.2 - - osd.3 -- - client.0 - - client.1 diff --git a/suites/upgrade/dumpling-emperor-x/parallel/1-dumpling-install/dumpling.yaml b/suites/upgrade/dumpling-emperor-x/parallel/1-dumpling-install/dumpling.yaml deleted file mode 100644 index 92df8cebc5f..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/1-dumpling-install/dumpling.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install: - branch: dumpling -- ceph: - fs: xfs -- parallel: - - workload - - upgrade-sequence diff --git a/suites/upgrade/dumpling-emperor-x/parallel/2-workload/+ b/suites/upgrade/dumpling-emperor-x/parallel/2-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-emperor-x/parallel/2-workload/rados_api.yaml b/suites/upgrade/dumpling-emperor-x/parallel/2-workload/rados_api.yaml deleted file mode 100644 index 96d656e4932..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/2-workload/rados_api.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rados/test.sh - - cls diff --git a/suites/upgrade/dumpling-emperor-x/parallel/2-workload/rados_loadgenbig.yaml b/suites/upgrade/dumpling-emperor-x/parallel/2-workload/rados_loadgenbig.yaml deleted file mode 100644 index 16241b3bed6..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/2-workload/rados_loadgenbig.yaml +++ /dev/null @@ -1,7 +0,0 @@ -workload: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rados/load-gen-big.sh diff --git a/suites/upgrade/dumpling-emperor-x/parallel/2-workload/test_rbd_api.yaml b/suites/upgrade/dumpling-emperor-x/parallel/2-workload/test_rbd_api.yaml deleted file mode 100644 index 7584f0e1ff0..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/2-workload/test_rbd_api.yaml +++ /dev/null @@ -1,7 +0,0 @@ -workload: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rbd/test_librbd.sh diff --git a/suites/upgrade/dumpling-emperor-x/parallel/2-workload/test_rbd_python.yaml b/suites/upgrade/dumpling-emperor-x/parallel/2-workload/test_rbd_python.yaml deleted file mode 100644 index 09c5326592b..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/2-workload/test_rbd_python.yaml +++ /dev/null @@ -1,7 +0,0 @@ -workload: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rbd/test_librbd_python.sh diff --git a/suites/upgrade/dumpling-emperor-x/parallel/3-emperor-upgrade/emperor.yaml b/suites/upgrade/dumpling-emperor-x/parallel/3-emperor-upgrade/emperor.yaml deleted file mode 100644 index 626bc161cbd..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/3-emperor-upgrade/emperor.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: - - install.upgrade: - mon.a: - branch: emperor - mon.b: - branch: emperor - - ceph.restart: - - parallel: - - workload2 - - upgrade-sequence diff --git a/suites/upgrade/dumpling-emperor-x/parallel/4-workload/+ b/suites/upgrade/dumpling-emperor-x/parallel/4-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-emperor-x/parallel/4-workload/rados_api.yaml b/suites/upgrade/dumpling-emperor-x/parallel/4-workload/rados_api.yaml deleted file mode 100644 index b6bb42048a4..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/4-workload/rados_api.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload2: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rados/test.sh - - cls diff --git a/suites/upgrade/dumpling-emperor-x/parallel/4-workload/rados_loadgenbig.yaml b/suites/upgrade/dumpling-emperor-x/parallel/4-workload/rados_loadgenbig.yaml deleted file mode 100644 index fd5c31dc477..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/4-workload/rados_loadgenbig.yaml +++ /dev/null @@ -1,7 +0,0 @@ -workload2: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rados/load-gen-big.sh diff --git a/suites/upgrade/dumpling-emperor-x/parallel/4-workload/test_rbd_api.yaml b/suites/upgrade/dumpling-emperor-x/parallel/4-workload/test_rbd_api.yaml deleted file mode 100644 index 8c8c97a4bf3..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/4-workload/test_rbd_api.yaml +++ /dev/null @@ -1,7 +0,0 @@ -workload2: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rbd/test_librbd.sh diff --git a/suites/upgrade/dumpling-emperor-x/parallel/4-workload/test_rbd_python.yaml b/suites/upgrade/dumpling-emperor-x/parallel/4-workload/test_rbd_python.yaml deleted file mode 100644 index 1edb13cf907..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/4-workload/test_rbd_python.yaml +++ /dev/null @@ -1,7 +0,0 @@ -workload2: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rbd/test_librbd_python.sh diff --git a/suites/upgrade/dumpling-emperor-x/parallel/5-upgrade-sequence/upgrade-all.yaml b/suites/upgrade/dumpling-emperor-x/parallel/5-upgrade-sequence/upgrade-all.yaml deleted file mode 100644 index f5d10cdfcab..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/5-upgrade-sequence/upgrade-all.yaml +++ /dev/null @@ -1,6 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - mon.b: - - ceph.restart: [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2, osd.3] diff --git a/suites/upgrade/dumpling-emperor-x/parallel/5-upgrade-sequence/upgrade-mon-osd-mds.yaml b/suites/upgrade/dumpling-emperor-x/parallel/5-upgrade-sequence/upgrade-mon-osd-mds.yaml deleted file mode 100644 index fcb61b1cef2..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/5-upgrade-sequence/upgrade-mon-osd-mds.yaml +++ /dev/null @@ -1,33 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - mon.b: - - ceph.restart: - daemons: [mon.a] - wait-for-healthy: false - wait-for-osds-up: true - - sleep: - duration: 60 - - ceph.restart: - daemons: [mon.b] - wait-for-healthy: false - wait-for-osds-up: true - - sleep: - duration: 60 - - ceph.restart: [mon.c] - - sleep: - duration: 60 - - ceph.restart: [osd.0] - - sleep: - duration: 60 - - ceph.restart: [osd.1] - - sleep: - duration: 60 - - ceph.restart: [osd.2] - - sleep: - duration: 60 - - ceph.restart: [osd.3] - - sleep: - duration: 60 - - ceph.restart: [mds.a] diff --git a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/+ b/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml b/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml deleted file mode 120000 index 4baff9bdd2f..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml \ No newline at end of file diff --git a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml b/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml deleted file mode 120000 index 4b9d9a44a24..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml \ No newline at end of file diff --git a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados-snaps-few-objects.yaml b/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados-snaps-few-objects.yaml deleted file mode 100644 index bf85020d8d9..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados-snaps-few-objects.yaml +++ /dev/null @@ -1,12 +0,0 @@ -tasks: -- rados: - clients: [client.1] - ops: 4000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados_loadgenmix.yaml b/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados_loadgenmix.yaml deleted file mode 100644 index 0bddda0ab84..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados_loadgenmix.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: - - workunit: - branch: dumpling - clients: - client.1: - - rados/load-gen-mix.sh diff --git a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados_mon_thrash.yaml b/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados_mon_thrash.yaml deleted file mode 100644 index 1a932e059f0..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rados_mon_thrash.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- mon_thrash: - revive_delay: 20 - thrash_delay: 1 -- workunit: - branch: dumpling - clients: - client.1: - - rados/test.sh diff --git a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rbd_cls.yaml b/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rbd_cls.yaml deleted file mode 100644 index 9407ab48916..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rbd_cls.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - branch: dumpling - clients: - client.1: - - cls/test_cls_rbd.sh - diff --git a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rbd_import_export.yaml b/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rbd_import_export.yaml deleted file mode 100644 index 185cd1ab32a..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rbd_import_export.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- workunit: - branch: dumpling - clients: - client.1: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format diff --git a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rgw_s3tests.yaml b/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rgw_s3tests.yaml deleted file mode 100644 index 22c3a3f821a..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rgw_s3tests.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- rgw: [client.1] -- s3tests: - client.1: - rgw_server: client.1 diff --git a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rgw_swift.yaml b/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rgw_swift.yaml deleted file mode 100644 index 0ab9febd2fc..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/6-final-workload/rgw_swift.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -# Uncomment the next line if you have not already included rgw_s3tests.yaml in your test. -# - rgw: [client.1] -- swift: - client.1: - rgw_server: client.1 diff --git a/suites/upgrade/dumpling-emperor-x/parallel/distros b/suites/upgrade/dumpling-emperor-x/parallel/distros deleted file mode 120000 index 79010c36a59..00000000000 --- a/suites/upgrade/dumpling-emperor-x/parallel/distros +++ /dev/null @@ -1 +0,0 @@ -../../../../distros/supported \ No newline at end of file diff --git a/suites/upgrade/dumpling-firefly-x/parallel/% b/suites/upgrade/dumpling-firefly-x/parallel/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-firefly-x/parallel/0-cluster/start.yaml b/suites/upgrade/dumpling-firefly-x/parallel/0-cluster/start.yaml deleted file mode 100644 index 3eb17b675f3..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/0-cluster/start.yaml +++ /dev/null @@ -1,21 +0,0 @@ -overrides: - ceph: - conf: - mon: - mon warn on legacy crush tunables: false - mon debug unsafe allow tier with nonempty snaps: true - log-whitelist: - - scrub mismatch - - ScrubResult - - failed to encode map -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 -- - mon.b - - mon.c - - osd.2 - - osd.3 -- - client.0 - - client.1 diff --git a/suites/upgrade/dumpling-firefly-x/parallel/1-dumpling-install/dumpling.yaml b/suites/upgrade/dumpling-firefly-x/parallel/1-dumpling-install/dumpling.yaml deleted file mode 100644 index 60b2f13c985..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/1-dumpling-install/dumpling.yaml +++ /dev/null @@ -1,14 +0,0 @@ -tasks: -- install: - branch: dumpling -- print: "**** done dumpling install" -- ceph: - fs: xfs -- parallel: - - workload - - upgrade-sequence -- print: "**** done parallel" -- install.upgrade: - client.0: - branch: firefly -- print: "*** client.0 upgraded to firefly" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/2-workload/+ b/suites/upgrade/dumpling-firefly-x/parallel/2-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-firefly-x/parallel/2-workload/rados_api.yaml b/suites/upgrade/dumpling-firefly-x/parallel/2-workload/rados_api.yaml deleted file mode 100644 index 3c7c90ef81f..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/2-workload/rados_api.yaml +++ /dev/null @@ -1,10 +0,0 @@ -workload: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - # make sure to run test-upgrade-firefly.sh when running the cluster is mixed mode between firefly and dumpling - - rados/test-upgrade-firefly.sh - - cls - - print: "**** done rados/test.sh & cls" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/2-workload/rados_loadgenbig.yaml b/suites/upgrade/dumpling-firefly-x/parallel/2-workload/rados_loadgenbig.yaml deleted file mode 100644 index 3bcf62b0cb7..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/2-workload/rados_loadgenbig.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rados/load-gen-big.sh - - print: "**** done rados/load-gen-big.sh" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/2-workload/test_rbd_api.yaml b/suites/upgrade/dumpling-firefly-x/parallel/2-workload/test_rbd_api.yaml deleted file mode 100644 index d5b07c15de8..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/2-workload/test_rbd_api.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rbd/test_librbd.sh - - print: "**** done rbd/test_librbd.sh" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/2-workload/test_rbd_python.yaml b/suites/upgrade/dumpling-firefly-x/parallel/2-workload/test_rbd_python.yaml deleted file mode 100644 index 4063ad7f915..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/2-workload/test_rbd_python.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rbd/test_librbd_python.sh - - print: "**** done rbd/test_librbd_python.sh" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/3-upgrade-sequence/upgrade-all.yaml b/suites/upgrade/dumpling-firefly-x/parallel/3-upgrade-sequence/upgrade-all.yaml deleted file mode 100644 index 7c057dbca99..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/3-upgrade-sequence/upgrade-all.yaml +++ /dev/null @@ -1,10 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - branch: firefly - mon.b: - branch: firefly - - print: "**** done install.upgrade firefly for mon.a and mon.b" - - ceph.restart: [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2, osd.3] - - print: "**** done ceph.restart the cluster" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/4-firefly-upgrade/firefly.yaml b/suites/upgrade/dumpling-firefly-x/parallel/4-firefly-upgrade/firefly.yaml deleted file mode 100644 index bb530565107..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/4-firefly-upgrade/firefly.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: - - parallel: - - workload2 - - upgrade-sequence2 - - print: "**** done parallel" - - install.upgrade: - client.0: - - print: "**** done install.upgrade client.0 to the version from teuthology-suite arg" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/5-workload/+ b/suites/upgrade/dumpling-firefly-x/parallel/5-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-firefly-x/parallel/5-workload/rados_api.yaml b/suites/upgrade/dumpling-firefly-x/parallel/5-workload/rados_api.yaml deleted file mode 100644 index 47573726b0f..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/5-workload/rados_api.yaml +++ /dev/null @@ -1,9 +0,0 @@ -workload2: - sequential: - - workunit: - branch: firefly - clients: - client.0: - - rados/test.sh - - cls - - print: "**** done #rados/test.sh and cls 2" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/5-workload/rados_loadgenbig.yaml b/suites/upgrade/dumpling-firefly-x/parallel/5-workload/rados_loadgenbig.yaml deleted file mode 100644 index 451130e2e38..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/5-workload/rados_loadgenbig.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload2: - sequential: - - workunit: - branch: firefly - clients: - client.0: - - rados/load-gen-big.sh - - print: "**** done rados/load-gen-big.sh 2" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/5-workload/test_rbd_api.yaml b/suites/upgrade/dumpling-firefly-x/parallel/5-workload/test_rbd_api.yaml deleted file mode 100644 index 1cf824d99bf..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/5-workload/test_rbd_api.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload2: - sequential: - - workunit: - branch: firefly - clients: - client.0: - - rbd/test_librbd.sh - - print: "**** done rbd/test_librbd.sh 2" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/5-workload/test_rbd_python.yaml b/suites/upgrade/dumpling-firefly-x/parallel/5-workload/test_rbd_python.yaml deleted file mode 100644 index 9409329b0a4..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/5-workload/test_rbd_python.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload2: - sequential: - - workunit: - branch: firefly - clients: - client.0: - - rbd/test_librbd_python.sh - - print: "**** done rbd/test_librbd_python.sh 2" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/6-upgrade-sequence/upgrade-by-daemon.yaml b/suites/upgrade/dumpling-firefly-x/parallel/6-upgrade-sequence/upgrade-by-daemon.yaml deleted file mode 100644 index 082a63c36fd..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/6-upgrade-sequence/upgrade-by-daemon.yaml +++ /dev/null @@ -1,39 +0,0 @@ -upgrade-sequence2: - sequential: - - install.upgrade: - mon.a: - - print: "**** done install.upgrade mon.a to the version from teuthology-suite arg" - - ceph.restart: - daemons: [mon.a] - wait-for-healthy: true - - sleep: - duration: 60 - - ceph.restart: - daemons: [osd.0, osd.1] - wait-for-healthy: true - - sleep: - duration: 60 - - ceph.restart: [mds.a] - - sleep: - duration: 60 - - print: "**** running mixed versions of osds and mons" - - exec: - mon.b: - - ceph osd crush tunables firefly - - install.upgrade: - mon.b: - - print: "**** done install.upgrade mon.b to the version from teuthology-suite arg" - - ceph.restart: - daemons: [mon.b, mon.c] - wait-for-healthy: true - - sleep: - duration: 60 - - ceph.restart: - daemons: [osd.2, osd.3] - wait-for-healthy: true - - sleep: - duration: 60 -### removed to fix #9642 -# - install.upgrade: -# client.0: -# - print: "*** client.0 upgraded" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/+ b/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml b/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml deleted file mode 120000 index 4baff9bdd2f..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml \ No newline at end of file diff --git a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml b/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml deleted file mode 120000 index 4b9d9a44a24..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml \ No newline at end of file diff --git a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados-snaps-few-objects.yaml b/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados-snaps-few-objects.yaml deleted file mode 100644 index bf85020d8d9..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados-snaps-few-objects.yaml +++ /dev/null @@ -1,12 +0,0 @@ -tasks: -- rados: - clients: [client.1] - ops: 4000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados_loadgenmix.yaml b/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados_loadgenmix.yaml deleted file mode 100644 index 879b7b6d189..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados_loadgenmix.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: - - workunit: - clients: - client.1: - - rados/load-gen-mix.sh diff --git a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados_mon_thrash.yaml b/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados_mon_thrash.yaml deleted file mode 100644 index 5c4c6297cfe..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rados_mon_thrash.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: - - sequential: - - mon_thrash: - revive_delay: 20 - thrash_delay: 1 - - workunit: - clients: - client.1: - - rados/test.sh - - print: "**** done rados/test.sh - 6-final-workload" diff --git a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rbd_cls.yaml b/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rbd_cls.yaml deleted file mode 100644 index 908b79e869a..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rbd_cls.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - clients: - client.1: - - cls/test_cls_rbd.sh - diff --git a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rbd_import_export.yaml b/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rbd_import_export.yaml deleted file mode 100644 index a7ce2141ee5..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rbd_import_export.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - clients: - client.1: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format diff --git a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rgw_s3tests.yaml b/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rgw_s3tests.yaml deleted file mode 100644 index 22c3a3f821a..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rgw_s3tests.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- rgw: [client.1] -- s3tests: - client.1: - rgw_server: client.1 diff --git a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rgw_swift.yaml b/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rgw_swift.yaml deleted file mode 100644 index 0ab9febd2fc..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/7-final-workload/rgw_swift.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -# Uncomment the next line if you have not already included rgw_s3tests.yaml in your test. -# - rgw: [client.1] -- swift: - client.1: - rgw_server: client.1 diff --git a/suites/upgrade/dumpling-firefly-x/parallel/README b/suites/upgrade/dumpling-firefly-x/parallel/README deleted file mode 100644 index 8eff0ed5c94..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/README +++ /dev/null @@ -1,72 +0,0 @@ -Purpose -======= - -This suite tests upgrades of a ceph cluster from dumpling (current -branch) to firefly (current branch), and then to a later specified -version (version x, e.g. for example ‘next’ or ‘master’). It runs the -last upgrade (steps in 3-firefly-upgrade) in parallel with firefly -client tests. - - -Structure -========= - -Generally the flow is: -- install dumpling -- test it -- upgrade cluster and clients to firefly -- test firefly while upgrading cluster to version x -- upgrade clients -- test with version x clients - -0-cluster ---------- - -Defines the cluster layout - two nodes run ceph daemons, and a third -acts as a client. (This is under 'roles:' section in the final yaml) - -1-dumpling-install ------------------- - -Installs dumpling and runs correctness tests from the 'workload' section - -2-workload ----------- - -Defines the 'workload' section - correctness tests to run on dumpling -“+” is used to construct a single yaml load from all tests in this directory - -3-firefly-upgrade ------------------ - -First upgrades everything to firefly. Then upgrades the cluster to -version x while running correctness tests (from the 'workload2' -section) on firefly clients in parallel. This upgrade is done by the -'upgrade-sequence' section, defined later. Once the cluster is -upgraded and these tests complete, upgrades the clients to version x -as well. - -Clients are upgraded last to avoid running newer tests that don't work -against firefly, and to verify that firefly clients can continue -working with a newer cluster. - -4-workload ----------- - -Defines the 'workload2' section - correctness tests to run during the -upgrade from firefly to version x. -“+” is used to construct a single yaml load from all tests in this directory - -5-upgrade-sequence ------------------- - -Defines the 'upgrade-sequence' section - the order in which the -upgrade from firefly to version x is done. Note that leaving the -version unspecified here is what makes it upgrade to version x, -which is set as an override when this suite is scheduled. - -6-final-workload ----------------- - -Runs some final correctness tests of version x clients. -“+” is used to construct a single yaml load from all tests in this directory diff --git a/suites/upgrade/dumpling-firefly-x/parallel/distros b/suites/upgrade/dumpling-firefly-x/parallel/distros deleted file mode 120000 index 79010c36a59..00000000000 --- a/suites/upgrade/dumpling-firefly-x/parallel/distros +++ /dev/null @@ -1 +0,0 @@ -../../../../distros/supported \ No newline at end of file diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/% b/suites/upgrade/dumpling-firefly-x/stress-split/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/00-cluster/start.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/00-cluster/start.yaml deleted file mode 100644 index 129635fa52a..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/00-cluster/start.yaml +++ /dev/null @@ -1,19 +0,0 @@ -overrides: - ceph: - log-whitelist: - - failed to encode map - conf: - mon: - mon warn on legacy crush tunables: false -roles: -- - mon.a - - mon.b - - mds.a - - osd.0 - - osd.1 - - osd.2 - - mon.c -- - osd.3 - - osd.4 - - osd.5 -- - client.0 diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/01-dumpling-install/dumpling.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/01-dumpling-install/dumpling.yaml deleted file mode 100644 index c98631e2bbd..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/01-dumpling-install/dumpling.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- install: - branch: dumpling -- ceph: - fs: xfs diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/02-partial-upgrade-firefly/firsthalf.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/02-partial-upgrade-firefly/firsthalf.yaml deleted file mode 100644 index 1098b089d88..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/02-partial-upgrade-firefly/firsthalf.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install.upgrade: - osd.0: - branch: firefly -- ceph.restart: - daemons: [osd.0, osd.1, osd.2] diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/03-workload/rbd.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/03-workload/rbd.yaml deleted file mode 100644 index 9ccd57c4a82..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/03-workload/rbd.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - cls/test_cls_rbd.sh diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/04-mona-upgrade-firefly/mona.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/04-mona-upgrade-firefly/mona.yaml deleted file mode 100644 index b6ffb3323d1..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/04-mona-upgrade-firefly/mona.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.a] - wait-for-healthy: false - wait-for-osds-up: true diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/+ b/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/rbd-cls.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/rbd-cls.yaml deleted file mode 100644 index 9ccd57c4a82..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/rbd-cls.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - cls/test_cls_rbd.sh diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/readwrite.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/readwrite.yaml deleted file mode 100644 index c53e52b0872..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/05-workload/readwrite.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - op_weights: - read: 45 - write: 45 - delete: 10 diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/06-monb-upgrade-firefly/monb.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/06-monb-upgrade-firefly/monb.yaml deleted file mode 100644 index 513890c41c0..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/06-monb-upgrade-firefly/monb.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.b] - wait-for-healthy: false - wait-for-osds-up: true diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/+ b/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/radosbench.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/radosbench.yaml deleted file mode 100644 index 3940870fce0..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/radosbench.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- radosbench: - clients: [client.0] - time: 1800 diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/rbd_api.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/rbd_api.yaml deleted file mode 100644 index 1ecaee4bf78..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/07-workload/rbd_api.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/test_librbd.sh diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/08-monc-upgrade-firefly/monc.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/08-monc-upgrade-firefly/monc.yaml deleted file mode 100644 index e9273236ba3..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/08-monc-upgrade-firefly/monc.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.c] - wait-for-healthy: false - wait-for-osds-up: true -- ceph.wait_for_mon_quorum: [a, b, c] diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/09-workload/rbd-python.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/09-workload/rbd-python.yaml deleted file mode 100644 index 8273c40457a..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/09-workload/rbd-python.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - #fixes #10577 - branch: dumpling - clients: - client.0: - - rbd/test_librbd_python.sh diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/10-osds-upgrade-firefly/secondhalf.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/10-osds-upgrade-firefly/secondhalf.yaml deleted file mode 100644 index 917894b2fe4..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/10-osds-upgrade-firefly/secondhalf.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- install.upgrade: - osd.3: - branch: firefly -- ceph.restart: - daemons: [osd.3, osd.4, osd.5] diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/11-workload/snaps-few-objects.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/11-workload/snaps-few-objects.yaml deleted file mode 100644 index c54039766c0..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/11-workload/snaps-few-objects.yaml +++ /dev/null @@ -1,12 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/12-partial-upgrade-x/first.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/12-partial-upgrade-x/first.yaml deleted file mode 100644 index 68c9d44b7c3..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/12-partial-upgrade-x/first.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- install.upgrade: - osd.0: -- ceph.restart: - daemons: [osd.0, osd.1, osd.2] diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/13-workload/rados_loadgen_big.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/13-workload/rados_loadgen_big.yaml deleted file mode 100644 index b0030093e72..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/13-workload/rados_loadgen_big.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rados/load-gen-big.sh diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/14-mona-upgrade-x/mona.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/14-mona-upgrade-x/mona.yaml deleted file mode 100644 index b6ffb3323d1..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/14-mona-upgrade-x/mona.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.a] - wait-for-healthy: false - wait-for-osds-up: true diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/15-workload/rbd-import-export.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/15-workload/rbd-import-export.yaml deleted file mode 100644 index 49070827be0..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/15-workload/rbd-import-export.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/16-monb-upgrade-x/monb.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/16-monb-upgrade-x/monb.yaml deleted file mode 100644 index 513890c41c0..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/16-monb-upgrade-x/monb.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.b] - wait-for-healthy: false - wait-for-osds-up: true diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/17-workload/readwrite.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/17-workload/readwrite.yaml deleted file mode 100644 index c53e52b0872..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/17-workload/readwrite.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - op_weights: - read: 45 - write: 45 - delete: 10 diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/18-monc-upgrade-x/monc.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/18-monc-upgrade-x/monc.yaml deleted file mode 100644 index e9273236ba3..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/18-monc-upgrade-x/monc.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.c] - wait-for-healthy: false - wait-for-osds-up: true -- ceph.wait_for_mon_quorum: [a, b, c] diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/19-workload/radosbench.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/19-workload/radosbench.yaml deleted file mode 100644 index 3940870fce0..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/19-workload/radosbench.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- radosbench: - clients: [client.0] - time: 1800 diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/20-osds-upgrade-x/osds_secondhalf.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/20-osds-upgrade-x/osds_secondhalf.yaml deleted file mode 100644 index 88d4bb5ec98..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/20-osds-upgrade-x/osds_secondhalf.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- install.upgrade: - osd.3: -- ceph.restart: - daemons: [osd.3, osd.4, osd.5] diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/+ b/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rados_stress_watch.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rados_stress_watch.yaml deleted file mode 100644 index 0e1ba010c5b..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rados_stress_watch.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - rados/stress_watch.sh diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rbd_cls_tests.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rbd_cls_tests.yaml deleted file mode 100644 index 9ccd57c4a82..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rbd_cls_tests.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - client.0: - - cls/test_cls_rbd.sh diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rgw-swift.yaml b/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rgw-swift.yaml deleted file mode 100644 index 0d79fb621ea..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/21-final-workload/rgw-swift.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- rgw: - client.0: - default_idle_timeout: 300 -- swift: - client.0: - rgw_server: client.0 - diff --git a/suites/upgrade/dumpling-firefly-x/stress-split/distros b/suites/upgrade/dumpling-firefly-x/stress-split/distros deleted file mode 120000 index 79010c36a59..00000000000 --- a/suites/upgrade/dumpling-firefly-x/stress-split/distros +++ /dev/null @@ -1 +0,0 @@ -../../../../distros/supported \ No newline at end of file diff --git a/suites/upgrade/dumpling-giant-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml b/suites/upgrade/dumpling-giant-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml deleted file mode 120000 index 4baff9bdd2f..00000000000 --- a/suites/upgrade/dumpling-giant-x/parallel/6-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml \ No newline at end of file diff --git a/suites/upgrade/dumpling-x/parallel/% b/suites/upgrade/dumpling-x/parallel/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-x/parallel/0-cluster/start.yaml b/suites/upgrade/dumpling-x/parallel/0-cluster/start.yaml deleted file mode 100644 index c39d9dfeca9..00000000000 --- a/suites/upgrade/dumpling-x/parallel/0-cluster/start.yaml +++ /dev/null @@ -1,20 +0,0 @@ -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 -- - mon.b - - mon.c - - osd.2 - - osd.3 -- - client.0 - - client.1 -overrides: - ceph: - conf: - mon: - mon warn on legacy crush tunables: false - log-whitelist: - - scrub mismatch - - ScrubResult - - failed to encode map diff --git a/suites/upgrade/dumpling-x/parallel/1-dumpling-install/dumpling.yaml b/suites/upgrade/dumpling-x/parallel/1-dumpling-install/dumpling.yaml deleted file mode 100644 index adbdedee518..00000000000 --- a/suites/upgrade/dumpling-x/parallel/1-dumpling-install/dumpling.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- install: - branch: dumpling -- print: "**** done install" -- ceph: - fs: xfs -- print: "**** done ceph" -- parallel: - - workload - - upgrade-sequence -- print: "**** done parallel" diff --git a/suites/upgrade/dumpling-x/parallel/2-workload/rados_api.yaml b/suites/upgrade/dumpling-x/parallel/2-workload/rados_api.yaml deleted file mode 100644 index cd820a8a711..00000000000 --- a/suites/upgrade/dumpling-x/parallel/2-workload/rados_api.yaml +++ /dev/null @@ -1,9 +0,0 @@ -workload: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rados/test-upgrade-firefly.sh - - cls - diff --git a/suites/upgrade/dumpling-x/parallel/2-workload/rados_loadgenbig.yaml b/suites/upgrade/dumpling-x/parallel/2-workload/rados_loadgenbig.yaml deleted file mode 100644 index cc1ef874cb0..00000000000 --- a/suites/upgrade/dumpling-x/parallel/2-workload/rados_loadgenbig.yaml +++ /dev/null @@ -1,7 +0,0 @@ -workload: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rados/load-gen-big.sh diff --git a/suites/upgrade/dumpling-x/parallel/2-workload/test_rbd_api.yaml b/suites/upgrade/dumpling-x/parallel/2-workload/test_rbd_api.yaml deleted file mode 100644 index 36ffa27ec3f..00000000000 --- a/suites/upgrade/dumpling-x/parallel/2-workload/test_rbd_api.yaml +++ /dev/null @@ -1,7 +0,0 @@ -workload: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rbd/test_librbd.sh diff --git a/suites/upgrade/dumpling-x/parallel/2-workload/test_rbd_python.yaml b/suites/upgrade/dumpling-x/parallel/2-workload/test_rbd_python.yaml deleted file mode 100644 index e704a9794b9..00000000000 --- a/suites/upgrade/dumpling-x/parallel/2-workload/test_rbd_python.yaml +++ /dev/null @@ -1,7 +0,0 @@ -workload: - sequential: - - workunit: - branch: dumpling - clients: - client.0: - - rbd/test_librbd_python.sh diff --git a/suites/upgrade/dumpling-x/parallel/3-upgrade-sequence/upgrade-all.yaml b/suites/upgrade/dumpling-x/parallel/3-upgrade-sequence/upgrade-all.yaml deleted file mode 100644 index f5d10cdfcab..00000000000 --- a/suites/upgrade/dumpling-x/parallel/3-upgrade-sequence/upgrade-all.yaml +++ /dev/null @@ -1,6 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - mon.b: - - ceph.restart: [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2, osd.3] diff --git a/suites/upgrade/dumpling-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/suites/upgrade/dumpling-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml deleted file mode 100644 index fcb61b1cef2..00000000000 --- a/suites/upgrade/dumpling-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml +++ /dev/null @@ -1,33 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - mon.b: - - ceph.restart: - daemons: [mon.a] - wait-for-healthy: false - wait-for-osds-up: true - - sleep: - duration: 60 - - ceph.restart: - daemons: [mon.b] - wait-for-healthy: false - wait-for-osds-up: true - - sleep: - duration: 60 - - ceph.restart: [mon.c] - - sleep: - duration: 60 - - ceph.restart: [osd.0] - - sleep: - duration: 60 - - ceph.restart: [osd.1] - - sleep: - duration: 60 - - ceph.restart: [osd.2] - - sleep: - duration: 60 - - ceph.restart: [osd.3] - - sleep: - duration: 60 - - ceph.restart: [mds.a] diff --git a/suites/upgrade/dumpling-x/parallel/4-final-upgrade/client.yaml b/suites/upgrade/dumpling-x/parallel/4-final-upgrade/client.yaml deleted file mode 100644 index cf35d41e6c5..00000000000 --- a/suites/upgrade/dumpling-x/parallel/4-final-upgrade/client.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: - - install.upgrade: - client.0: - - print: "**** done install.upgrade" diff --git a/suites/upgrade/dumpling-x/parallel/5-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml b/suites/upgrade/dumpling-x/parallel/5-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml deleted file mode 120000 index 4baff9bdd2f..00000000000 --- a/suites/upgrade/dumpling-x/parallel/5-final-workload/ec-rados-plugin=jerasure-k=2-m=1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml \ No newline at end of file diff --git a/suites/upgrade/dumpling-x/parallel/5-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml b/suites/upgrade/dumpling-x/parallel/5-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml deleted file mode 120000 index 4b9d9a44a24..00000000000 --- a/suites/upgrade/dumpling-x/parallel/5-final-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml \ No newline at end of file diff --git a/suites/upgrade/dumpling-x/parallel/5-final-workload/rados-snaps-few-objects.yaml b/suites/upgrade/dumpling-x/parallel/5-final-workload/rados-snaps-few-objects.yaml deleted file mode 100644 index 40f66da37f2..00000000000 --- a/suites/upgrade/dumpling-x/parallel/5-final-workload/rados-snaps-few-objects.yaml +++ /dev/null @@ -1,12 +0,0 @@ -tasks: - - rados: - clients: [client.1] - ops: 4000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/dumpling-x/parallel/5-final-workload/rados_loadgenmix.yaml b/suites/upgrade/dumpling-x/parallel/5-final-workload/rados_loadgenmix.yaml deleted file mode 100644 index faa96ed24d5..00000000000 --- a/suites/upgrade/dumpling-x/parallel/5-final-workload/rados_loadgenmix.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: - - workunit: - clients: - client.1: - - rados/load-gen-mix.sh diff --git a/suites/upgrade/dumpling-x/parallel/5-final-workload/rados_mon_thrash.yaml b/suites/upgrade/dumpling-x/parallel/5-final-workload/rados_mon_thrash.yaml deleted file mode 100644 index 88019bef17a..00000000000 --- a/suites/upgrade/dumpling-x/parallel/5-final-workload/rados_mon_thrash.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: - - mon_thrash: - revive_delay: 20 - thrash_delay: 1 - - workunit: - clients: - client.1: - - rados/test.sh diff --git a/suites/upgrade/dumpling-x/parallel/5-final-workload/rbd_cls.yaml b/suites/upgrade/dumpling-x/parallel/5-final-workload/rbd_cls.yaml deleted file mode 100644 index 4ef47768237..00000000000 --- a/suites/upgrade/dumpling-x/parallel/5-final-workload/rbd_cls.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: - - workunit: - clients: - client.1: - - cls/test_cls_rbd.sh - diff --git a/suites/upgrade/dumpling-x/parallel/5-final-workload/rbd_import_export.yaml b/suites/upgrade/dumpling-x/parallel/5-final-workload/rbd_import_export.yaml deleted file mode 100644 index 6c40377324d..00000000000 --- a/suites/upgrade/dumpling-x/parallel/5-final-workload/rbd_import_export.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: - - workunit: - clients: - client.1: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format diff --git a/suites/upgrade/dumpling-x/parallel/5-final-workload/rgw_s3tests.yaml b/suites/upgrade/dumpling-x/parallel/5-final-workload/rgw_s3tests.yaml deleted file mode 100644 index 53ceb786ba0..00000000000 --- a/suites/upgrade/dumpling-x/parallel/5-final-workload/rgw_s3tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: - - rgw: [client.1] - - s3tests: - client.1: - rgw_server: client.1 - branch: dumpling diff --git a/suites/upgrade/dumpling-x/parallel/5-final-workload/rgw_swift.yaml b/suites/upgrade/dumpling-x/parallel/5-final-workload/rgw_swift.yaml deleted file mode 100644 index 445224cef2d..00000000000 --- a/suites/upgrade/dumpling-x/parallel/5-final-workload/rgw_swift.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: - - rgw: [client.1] - - swift: - client.1: - rgw_server: client.1 diff --git a/suites/upgrade/dumpling-x/parallel/distros b/suites/upgrade/dumpling-x/parallel/distros deleted file mode 120000 index 79010c36a59..00000000000 --- a/suites/upgrade/dumpling-x/parallel/distros +++ /dev/null @@ -1 +0,0 @@ -../../../../distros/supported \ No newline at end of file diff --git a/suites/upgrade/dumpling-x/stress-split/% b/suites/upgrade/dumpling-x/stress-split/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-x/stress-split/0-cluster/start.yaml b/suites/upgrade/dumpling-x/stress-split/0-cluster/start.yaml deleted file mode 100644 index a1b6b303f33..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/0-cluster/start.yaml +++ /dev/null @@ -1,19 +0,0 @@ -overrides: - ceph: - conf: - mon: - mon warn on legacy crush tunables: false - log-whitelist: - - failed to encode map -roles: -- - mon.a - - mon.b - - mds.a - - osd.0 - - osd.1 - - osd.2 - - mon.c -- - osd.3 - - osd.4 - - osd.5 -- - client.0 diff --git a/suites/upgrade/dumpling-x/stress-split/1-dumpling-install/dumpling.yaml b/suites/upgrade/dumpling-x/stress-split/1-dumpling-install/dumpling.yaml deleted file mode 100644 index c98631e2bbd..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/1-dumpling-install/dumpling.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- install: - branch: dumpling -- ceph: - fs: xfs diff --git a/suites/upgrade/dumpling-x/stress-split/2-partial-upgrade/firsthalf.yaml b/suites/upgrade/dumpling-x/stress-split/2-partial-upgrade/firsthalf.yaml deleted file mode 100644 index 312df6e21c6..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/2-partial-upgrade/firsthalf.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- install.upgrade: - osd.0: -- ceph.restart: - daemons: [osd.0, osd.1, osd.2] diff --git a/suites/upgrade/dumpling-x/stress-split/3-thrash/default.yaml b/suites/upgrade/dumpling-x/stress-split/3-thrash/default.yaml deleted file mode 100644 index a85510eb6fa..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/3-thrash/default.yaml +++ /dev/null @@ -1,12 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - - log bound mismatch -tasks: -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 - thrash_primary_affinity: false diff --git a/suites/upgrade/dumpling-x/stress-split/4-mon/mona.yaml b/suites/upgrade/dumpling-x/stress-split/4-mon/mona.yaml deleted file mode 100644 index b6ffb3323d1..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/4-mon/mona.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.a] - wait-for-healthy: false - wait-for-osds-up: true diff --git a/suites/upgrade/dumpling-x/stress-split/5-workload/rados_api_tests.yaml b/suites/upgrade/dumpling-x/stress-split/5-workload/rados_api_tests.yaml deleted file mode 100644 index 7b2c72cbb2e..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/5-workload/rados_api_tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - branch: dumpling - clients: - client.0: - - rados/test-upgrade-firefly.sh diff --git a/suites/upgrade/dumpling-x/stress-split/5-workload/rbd-cls.yaml b/suites/upgrade/dumpling-x/stress-split/5-workload/rbd-cls.yaml deleted file mode 100644 index db3dff7fc5c..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/5-workload/rbd-cls.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - branch: dumpling - clients: - client.0: - - cls/test_cls_rbd.sh diff --git a/suites/upgrade/dumpling-x/stress-split/5-workload/rbd-import-export.yaml b/suites/upgrade/dumpling-x/stress-split/5-workload/rbd-import-export.yaml deleted file mode 100644 index a5a964ce13b..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/5-workload/rbd-import-export.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- workunit: - branch: dumpling - clients: - client.0: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format diff --git a/suites/upgrade/dumpling-x/stress-split/5-workload/readwrite.yaml b/suites/upgrade/dumpling-x/stress-split/5-workload/readwrite.yaml deleted file mode 100644 index c53e52b0872..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/5-workload/readwrite.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - op_weights: - read: 45 - write: 45 - delete: 10 diff --git a/suites/upgrade/dumpling-x/stress-split/5-workload/snaps-few-objects.yaml b/suites/upgrade/dumpling-x/stress-split/5-workload/snaps-few-objects.yaml deleted file mode 100644 index c54039766c0..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/5-workload/snaps-few-objects.yaml +++ /dev/null @@ -1,12 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/dumpling-x/stress-split/6-next-mon/monb.yaml b/suites/upgrade/dumpling-x/stress-split/6-next-mon/monb.yaml deleted file mode 100644 index 513890c41c0..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/6-next-mon/monb.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.b] - wait-for-healthy: false - wait-for-osds-up: true diff --git a/suites/upgrade/dumpling-x/stress-split/7-workload/rados_api_tests.yaml b/suites/upgrade/dumpling-x/stress-split/7-workload/rados_api_tests.yaml deleted file mode 100644 index 7b2c72cbb2e..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/7-workload/rados_api_tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - branch: dumpling - clients: - client.0: - - rados/test-upgrade-firefly.sh diff --git a/suites/upgrade/dumpling-x/stress-split/7-workload/radosbench.yaml b/suites/upgrade/dumpling-x/stress-split/7-workload/radosbench.yaml deleted file mode 100644 index 3940870fce0..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/7-workload/radosbench.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- radosbench: - clients: [client.0] - time: 1800 diff --git a/suites/upgrade/dumpling-x/stress-split/7-workload/rbd_api.yaml b/suites/upgrade/dumpling-x/stress-split/7-workload/rbd_api.yaml deleted file mode 100644 index bbcde3e1559..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/7-workload/rbd_api.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - branch: dumpling - clients: - client.0: - - rbd/test_librbd.sh diff --git a/suites/upgrade/dumpling-x/stress-split/8-next-mon/monc.yaml b/suites/upgrade/dumpling-x/stress-split/8-next-mon/monc.yaml deleted file mode 100644 index 73f22bd5f7c..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/8-next-mon/monc.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- install.upgrade: - mon.c: -- ceph.restart: - daemons: [mon.c] - wait-for-healthy: false - wait-for-osds-up: true -- ceph.wait_for_mon_quorum: [a, b, c] diff --git a/suites/upgrade/dumpling-x/stress-split/9-workload/+ b/suites/upgrade/dumpling-x/stress-split/9-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/dumpling-x/stress-split/9-workload/rados_api_tests.yaml b/suites/upgrade/dumpling-x/stress-split/9-workload/rados_api_tests.yaml deleted file mode 100644 index 7b2c72cbb2e..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/9-workload/rados_api_tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - branch: dumpling - clients: - client.0: - - rados/test-upgrade-firefly.sh diff --git a/suites/upgrade/dumpling-x/stress-split/9-workload/rbd-python.yaml b/suites/upgrade/dumpling-x/stress-split/9-workload/rbd-python.yaml deleted file mode 100644 index 1c5e53906f8..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/9-workload/rbd-python.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - branch: dumpling - clients: - client.0: - - rbd/test_librbd_python.sh diff --git a/suites/upgrade/dumpling-x/stress-split/9-workload/rgw-s3tests.yaml b/suites/upgrade/dumpling-x/stress-split/9-workload/rgw-s3tests.yaml deleted file mode 100644 index e44546dbcaa..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/9-workload/rgw-s3tests.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- rgw: - default_idle_timeout: 300 - client.0: -- swift: - client.0: - rgw_server: client.0 - diff --git a/suites/upgrade/dumpling-x/stress-split/9-workload/snaps-many-objects.yaml b/suites/upgrade/dumpling-x/stress-split/9-workload/snaps-many-objects.yaml deleted file mode 100644 index 9e311c946e1..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/9-workload/snaps-many-objects.yaml +++ /dev/null @@ -1,12 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/dumpling-x/stress-split/distros b/suites/upgrade/dumpling-x/stress-split/distros deleted file mode 120000 index 79010c36a59..00000000000 --- a/suites/upgrade/dumpling-x/stress-split/distros +++ /dev/null @@ -1 +0,0 @@ -../../../../distros/supported \ No newline at end of file diff --git a/suites/upgrade/firefly-x/parallel/% b/suites/upgrade/firefly-x/parallel/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/firefly-x/parallel/0-cluster/start.yaml b/suites/upgrade/firefly-x/parallel/0-cluster/start.yaml deleted file mode 100644 index 27e08f394e8..00000000000 --- a/suites/upgrade/firefly-x/parallel/0-cluster/start.yaml +++ /dev/null @@ -1,24 +0,0 @@ -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 -- - mon.b - - mon.c - - osd.2 - - osd.3 -- - client.0 - - client.1 - - client.2 - - client.3 - - client.4 -overrides: - ceph: - log-whitelist: - - scrub mismatch - - ScrubResult - - failed to encode map - conf: - mon: - mon warn on legacy crush tunables: false - mon debug unsafe allow tier with nonempty snaps: true diff --git a/suites/upgrade/firefly-x/parallel/1-firefly-install/firefly.yaml b/suites/upgrade/firefly-x/parallel/1-firefly-install/firefly.yaml deleted file mode 100644 index 9f281319604..00000000000 --- a/suites/upgrade/firefly-x/parallel/1-firefly-install/firefly.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- install: - branch: firefly -- print: "**** done installing firefly" -- ceph: - fs: xfs -- print: "**** done ceph" -- parallel: - - workload - - upgrade-sequence -- print: "**** done parallel" diff --git a/suites/upgrade/firefly-x/parallel/2-workload/+ b/suites/upgrade/firefly-x/parallel/2-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/firefly-x/parallel/2-workload/ec-rados-parallel.yaml b/suites/upgrade/firefly-x/parallel/2-workload/ec-rados-parallel.yaml deleted file mode 120000 index c4de249ed96..00000000000 --- a/suites/upgrade/firefly-x/parallel/2-workload/ec-rados-parallel.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-parallel.yaml \ No newline at end of file diff --git a/suites/upgrade/firefly-x/parallel/2-workload/rados_api.yaml b/suites/upgrade/firefly-x/parallel/2-workload/rados_api.yaml deleted file mode 100644 index f8c18a3cb7e..00000000000 --- a/suites/upgrade/firefly-x/parallel/2-workload/rados_api.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - parallel: - - workunit: - branch: firefly - clients: - client.1: - - cls - - print: "**** done cls 2-workload" diff --git a/suites/upgrade/firefly-x/parallel/2-workload/rados_loadgenbig.yaml b/suites/upgrade/firefly-x/parallel/2-workload/rados_loadgenbig.yaml deleted file mode 100644 index 6f1429acb77..00000000000 --- a/suites/upgrade/firefly-x/parallel/2-workload/rados_loadgenbig.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - parallel: - - workunit: - branch: firefly - clients: - client.2: - - rados/load-gen-big.sh - - print: "**** done rados/load-gen-big.sh 2-workload" diff --git a/suites/upgrade/firefly-x/parallel/2-workload/test_rbd_api.yaml b/suites/upgrade/firefly-x/parallel/2-workload/test_rbd_api.yaml deleted file mode 100644 index 0339c575096..00000000000 --- a/suites/upgrade/firefly-x/parallel/2-workload/test_rbd_api.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - parallel: - - workunit: - branch: firefly - clients: - client.3: - - rbd/test_librbd.sh - - print: "**** done rbd/test_librbd.sh 2-workload" diff --git a/suites/upgrade/firefly-x/parallel/2-workload/test_rbd_python.yaml b/suites/upgrade/firefly-x/parallel/2-workload/test_rbd_python.yaml deleted file mode 100644 index fce9039ed41..00000000000 --- a/suites/upgrade/firefly-x/parallel/2-workload/test_rbd_python.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - parallel: - - workunit: - branch: firefly - clients: - client.4: - - rbd/test_librbd_python.sh - - print: "**** done rbd/test_librbd_python.sh 2-workload" diff --git a/suites/upgrade/firefly-x/parallel/3-upgrade-sequence/upgrade-all.yaml b/suites/upgrade/firefly-x/parallel/3-upgrade-sequence/upgrade-all.yaml deleted file mode 100644 index f5d10cdfcab..00000000000 --- a/suites/upgrade/firefly-x/parallel/3-upgrade-sequence/upgrade-all.yaml +++ /dev/null @@ -1,6 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - mon.b: - - ceph.restart: [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2, osd.3] diff --git a/suites/upgrade/firefly-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/suites/upgrade/firefly-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml deleted file mode 100644 index 0a87823a8d0..00000000000 --- a/suites/upgrade/firefly-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml +++ /dev/null @@ -1,35 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - - print: "**** done install.upgrade mon.a to the version from teuthology-suite arg" - - ceph.restart: - daemons: [mon.a] - wait-for-healthy: true - - sleep: - duration: 60 - - ceph.restart: - daemons: [osd.0, osd.1] - wait-for-healthy: true - - sleep: - duration: 60 - - ceph.restart: [mds.a] - - sleep: - duration: 60 - - print: "**** running mixed versions of osds and mons" - - exec: - mon.b: - - ceph osd crush tunables firefly - - install.upgrade: - mon.b: - - print: "**** done install.upgrade mon.b to the version from teuthology-suite arg" - - ceph.restart: - daemons: [mon.b, mon.c] - wait-for-healthy: true - - sleep: - duration: 60 - - ceph.restart: - daemons: [osd.2, osd.3] - wait-for-healthy: true - - sleep: - duration: 60 diff --git a/suites/upgrade/firefly-x/parallel/4-final-workload/+ b/suites/upgrade/firefly-x/parallel/4-final-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/firefly-x/parallel/4-final-workload/rados-snaps-few-objects.yaml b/suites/upgrade/firefly-x/parallel/4-final-workload/rados-snaps-few-objects.yaml deleted file mode 100644 index 112260734b0..00000000000 --- a/suites/upgrade/firefly-x/parallel/4-final-workload/rados-snaps-few-objects.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: - - rados: - clients: [client.1] - ops: 4000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - - print: "**** done rados 4-final-workload" diff --git a/suites/upgrade/firefly-x/parallel/4-final-workload/rados_loadgenmix.yaml b/suites/upgrade/firefly-x/parallel/4-final-workload/rados_loadgenmix.yaml deleted file mode 100644 index d4a8006e906..00000000000 --- a/suites/upgrade/firefly-x/parallel/4-final-workload/rados_loadgenmix.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: - - workunit: - clients: - client.1: - - rados/load-gen-mix.sh - - print: "**** done rados/load-gen-mix.sh 4-final-workload" diff --git a/suites/upgrade/firefly-x/parallel/4-final-workload/rados_mon_thrash.yaml b/suites/upgrade/firefly-x/parallel/4-final-workload/rados_mon_thrash.yaml deleted file mode 100644 index f1e30f2419d..00000000000 --- a/suites/upgrade/firefly-x/parallel/4-final-workload/rados_mon_thrash.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: - - mon_thrash: - revive_delay: 20 - thrash_delay: 1 - - print: "**** done mon_thrash 4-final-workload" - - workunit: - clients: - client.1: - - rados/test.sh - - print: "**** done rados/test.sh 4-final-workload" diff --git a/suites/upgrade/firefly-x/parallel/4-final-workload/rbd_cls.yaml b/suites/upgrade/firefly-x/parallel/4-final-workload/rbd_cls.yaml deleted file mode 100644 index ed75230497d..00000000000 --- a/suites/upgrade/firefly-x/parallel/4-final-workload/rbd_cls.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: - - workunit: - clients: - client.1: - - cls/test_cls_rbd.sh - - print: "**** done cls/test_cls_rbd.sh 4-final-workload" diff --git a/suites/upgrade/firefly-x/parallel/4-final-workload/rbd_import_export.yaml b/suites/upgrade/firefly-x/parallel/4-final-workload/rbd_import_export.yaml deleted file mode 100644 index 2c66c28a276..00000000000 --- a/suites/upgrade/firefly-x/parallel/4-final-workload/rbd_import_export.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: - - workunit: - clients: - client.1: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format - - print: "**** done rbd/import_export.sh 4-final-workload" diff --git a/suites/upgrade/firefly-x/parallel/4-final-workload/rgw_swift.yaml b/suites/upgrade/firefly-x/parallel/4-final-workload/rgw_swift.yaml deleted file mode 100644 index 18089bec6dd..00000000000 --- a/suites/upgrade/firefly-x/parallel/4-final-workload/rgw_swift.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: - - rgw: [client.1] - - print: "**** done rgw 4-final-workload" - - swift: - client.1: - rgw_server: client.1 - - print: "**** done swift 4-final-workload" diff --git a/suites/upgrade/firefly-x/parallel/distros b/suites/upgrade/firefly-x/parallel/distros deleted file mode 120000 index 79010c36a59..00000000000 --- a/suites/upgrade/firefly-x/parallel/distros +++ /dev/null @@ -1 +0,0 @@ -../../../../distros/supported \ No newline at end of file diff --git a/suites/upgrade/firefly-x/stress-split-erasure-code/% b/suites/upgrade/firefly-x/stress-split-erasure-code/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/firefly-x/stress-split-erasure-code/0-cluster b/suites/upgrade/firefly-x/stress-split-erasure-code/0-cluster deleted file mode 120000 index 6dceffa2f5b..00000000000 --- a/suites/upgrade/firefly-x/stress-split-erasure-code/0-cluster +++ /dev/null @@ -1 +0,0 @@ -../stress-split/0-cluster \ No newline at end of file diff --git a/suites/upgrade/firefly-x/stress-split-erasure-code/1-firefly-install b/suites/upgrade/firefly-x/stress-split-erasure-code/1-firefly-install deleted file mode 120000 index fad6450f52d..00000000000 --- a/suites/upgrade/firefly-x/stress-split-erasure-code/1-firefly-install +++ /dev/null @@ -1 +0,0 @@ -../stress-split/1-firefly-install \ No newline at end of file diff --git a/suites/upgrade/firefly-x/stress-split-erasure-code/2-partial-upgrade b/suites/upgrade/firefly-x/stress-split-erasure-code/2-partial-upgrade deleted file mode 120000 index c3a4e2c28e5..00000000000 --- a/suites/upgrade/firefly-x/stress-split-erasure-code/2-partial-upgrade +++ /dev/null @@ -1 +0,0 @@ -../stress-split/2-partial-upgrade \ No newline at end of file diff --git a/suites/upgrade/firefly-x/stress-split-erasure-code/3-thrash/default.yaml b/suites/upgrade/firefly-x/stress-split-erasure-code/3-thrash/default.yaml deleted file mode 100644 index f41b9be4b02..00000000000 --- a/suites/upgrade/firefly-x/stress-split-erasure-code/3-thrash/default.yaml +++ /dev/null @@ -1,12 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - - log bound mismatch -tasks: -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 - min_in: 4 diff --git a/suites/upgrade/firefly-x/stress-split-erasure-code/4-mon b/suites/upgrade/firefly-x/stress-split-erasure-code/4-mon deleted file mode 120000 index 0ea7dd5aa8d..00000000000 --- a/suites/upgrade/firefly-x/stress-split-erasure-code/4-mon +++ /dev/null @@ -1 +0,0 @@ -../stress-split/4-mon \ No newline at end of file diff --git a/suites/upgrade/firefly-x/stress-split-erasure-code/5-workload/ec-rados-default.yaml b/suites/upgrade/firefly-x/stress-split-erasure-code/5-workload/ec-rados-default.yaml deleted file mode 120000 index a8a0ae69405..00000000000 --- a/suites/upgrade/firefly-x/stress-split-erasure-code/5-workload/ec-rados-default.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-default.yaml \ No newline at end of file diff --git a/suites/upgrade/firefly-x/stress-split-erasure-code/6-next-mon b/suites/upgrade/firefly-x/stress-split-erasure-code/6-next-mon deleted file mode 120000 index 0f00e43e5fd..00000000000 --- a/suites/upgrade/firefly-x/stress-split-erasure-code/6-next-mon +++ /dev/null @@ -1 +0,0 @@ -../stress-split/6-next-mon \ No newline at end of file diff --git a/suites/upgrade/firefly-x/stress-split-erasure-code/8-next-mon b/suites/upgrade/firefly-x/stress-split-erasure-code/8-next-mon deleted file mode 120000 index 726c6a47a2e..00000000000 --- a/suites/upgrade/firefly-x/stress-split-erasure-code/8-next-mon +++ /dev/null @@ -1 +0,0 @@ -../stress-split/8-next-mon \ No newline at end of file diff --git a/suites/upgrade/firefly-x/stress-split-erasure-code/9-workload/ec-no-lrc.yaml b/suites/upgrade/firefly-x/stress-split-erasure-code/9-workload/ec-no-lrc.yaml deleted file mode 100644 index 9814a3fa18d..00000000000 --- a/suites/upgrade/firefly-x/stress-split-erasure-code/9-workload/ec-no-lrc.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# -# The lrc plugin cannot be used because some OSD are not upgraded -# yet and would crash. -# -tasks: -- exec: - mon.a: - - |- - ceph osd erasure-code-profile set profile-lrc plugin=lrc 2>&1 | grep "unsupported by:" diff --git a/suites/upgrade/firefly-x/stress-split-erasure-code/9-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml b/suites/upgrade/firefly-x/stress-split-erasure-code/9-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml deleted file mode 120000 index 4b9d9a44a24..00000000000 --- a/suites/upgrade/firefly-x/stress-split-erasure-code/9-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml \ No newline at end of file diff --git a/suites/upgrade/firefly-x/stress-split-erasure-code/distros b/suites/upgrade/firefly-x/stress-split-erasure-code/distros deleted file mode 120000 index 8d4309788e0..00000000000 --- a/suites/upgrade/firefly-x/stress-split-erasure-code/distros +++ /dev/null @@ -1 +0,0 @@ -../stress-split/distros \ No newline at end of file diff --git a/suites/upgrade/firefly-x/stress-split/% b/suites/upgrade/firefly-x/stress-split/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/firefly-x/stress-split/0-cluster/start.yaml b/suites/upgrade/firefly-x/stress-split/0-cluster/start.yaml deleted file mode 100644 index b106b212e6c..00000000000 --- a/suites/upgrade/firefly-x/stress-split/0-cluster/start.yaml +++ /dev/null @@ -1,30 +0,0 @@ -overrides: - ceph: - log-whitelist: - - Missing health data for MDS - - failed to encode map - - soft lockup - - detected stalls on CPUs - conf: - mon: - mon warn on legacy crush tunables: false -roles: -- - mon.a - - mon.b - - mon.c - - mds.a - - osd.0 - - osd.1 - - osd.2 - - osd.3 - - osd.4 - - osd.5 - - osd.6 -- - osd.7 - - osd.8 - - osd.9 - - osd.10 - - osd.11 - - osd.12 - - osd.13 -- - client.0 diff --git a/suites/upgrade/firefly-x/stress-split/1-firefly-install/firefly.yaml b/suites/upgrade/firefly-x/stress-split/1-firefly-install/firefly.yaml deleted file mode 100644 index a3573817aa0..00000000000 --- a/suites/upgrade/firefly-x/stress-split/1-firefly-install/firefly.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- install: - branch: firefly -- ceph: - fs: xfs diff --git a/suites/upgrade/firefly-x/stress-split/2-partial-upgrade/firsthalf.yaml b/suites/upgrade/firefly-x/stress-split/2-partial-upgrade/firsthalf.yaml deleted file mode 100644 index 52ab10fe780..00000000000 --- a/suites/upgrade/firefly-x/stress-split/2-partial-upgrade/firsthalf.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- install.upgrade: - osd.0: -- ceph.restart: - daemons: [osd.0, osd.1, osd.2, osd.3, osd.4, osd.5, osd.6] diff --git a/suites/upgrade/firefly-x/stress-split/3-thrash/default.yaml b/suites/upgrade/firefly-x/stress-split/3-thrash/default.yaml deleted file mode 100644 index 21d4c752075..00000000000 --- a/suites/upgrade/firefly-x/stress-split/3-thrash/default.yaml +++ /dev/null @@ -1,11 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - - log bound mismatch -tasks: -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 diff --git a/suites/upgrade/firefly-x/stress-split/4-mon/mona.yaml b/suites/upgrade/firefly-x/stress-split/4-mon/mona.yaml deleted file mode 100644 index b6ffb3323d1..00000000000 --- a/suites/upgrade/firefly-x/stress-split/4-mon/mona.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.a] - wait-for-healthy: false - wait-for-osds-up: true diff --git a/suites/upgrade/firefly-x/stress-split/5-workload/+ b/suites/upgrade/firefly-x/stress-split/5-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/firefly-x/stress-split/5-workload/rbd-cls.yaml b/suites/upgrade/firefly-x/stress-split/5-workload/rbd-cls.yaml deleted file mode 100644 index 46c61b49a76..00000000000 --- a/suites/upgrade/firefly-x/stress-split/5-workload/rbd-cls.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - branch: firefly - clients: - client.0: - - cls/test_cls_rbd.sh diff --git a/suites/upgrade/firefly-x/stress-split/5-workload/rbd-import-export.yaml b/suites/upgrade/firefly-x/stress-split/5-workload/rbd-import-export.yaml deleted file mode 100644 index 7fddb418a56..00000000000 --- a/suites/upgrade/firefly-x/stress-split/5-workload/rbd-import-export.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- workunit: - branch: firefly - clients: - client.0: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format diff --git a/suites/upgrade/firefly-x/stress-split/5-workload/readwrite.yaml b/suites/upgrade/firefly-x/stress-split/5-workload/readwrite.yaml deleted file mode 100644 index c53e52b0872..00000000000 --- a/suites/upgrade/firefly-x/stress-split/5-workload/readwrite.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - op_weights: - read: 45 - write: 45 - delete: 10 diff --git a/suites/upgrade/firefly-x/stress-split/5-workload/snaps-few-objects.yaml b/suites/upgrade/firefly-x/stress-split/5-workload/snaps-few-objects.yaml deleted file mode 100644 index c54039766c0..00000000000 --- a/suites/upgrade/firefly-x/stress-split/5-workload/snaps-few-objects.yaml +++ /dev/null @@ -1,12 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/firefly-x/stress-split/6-next-mon/monb.yaml b/suites/upgrade/firefly-x/stress-split/6-next-mon/monb.yaml deleted file mode 100644 index 513890c41c0..00000000000 --- a/suites/upgrade/firefly-x/stress-split/6-next-mon/monb.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.b] - wait-for-healthy: false - wait-for-osds-up: true diff --git a/suites/upgrade/firefly-x/stress-split/7-workload/+ b/suites/upgrade/firefly-x/stress-split/7-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/firefly-x/stress-split/7-workload/radosbench.yaml b/suites/upgrade/firefly-x/stress-split/7-workload/radosbench.yaml deleted file mode 100644 index 3940870fce0..00000000000 --- a/suites/upgrade/firefly-x/stress-split/7-workload/radosbench.yaml +++ /dev/null @@ -1,4 +0,0 @@ -tasks: -- radosbench: - clients: [client.0] - time: 1800 diff --git a/suites/upgrade/firefly-x/stress-split/7-workload/rbd_api.yaml b/suites/upgrade/firefly-x/stress-split/7-workload/rbd_api.yaml deleted file mode 100644 index be46ba0a25c..00000000000 --- a/suites/upgrade/firefly-x/stress-split/7-workload/rbd_api.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - branch: firefly - clients: - client.0: - - rbd/test_librbd.sh diff --git a/suites/upgrade/firefly-x/stress-split/8-next-mon/monc.yaml b/suites/upgrade/firefly-x/stress-split/8-next-mon/monc.yaml deleted file mode 100644 index e9273236ba3..00000000000 --- a/suites/upgrade/firefly-x/stress-split/8-next-mon/monc.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.c] - wait-for-healthy: false - wait-for-osds-up: true -- ceph.wait_for_mon_quorum: [a, b, c] diff --git a/suites/upgrade/firefly-x/stress-split/9-workload/+ b/suites/upgrade/firefly-x/stress-split/9-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/firefly-x/stress-split/9-workload/rbd-python.yaml b/suites/upgrade/firefly-x/stress-split/9-workload/rbd-python.yaml deleted file mode 100644 index 5b5412661ee..00000000000 --- a/suites/upgrade/firefly-x/stress-split/9-workload/rbd-python.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- workunit: - branch: firefly - clients: - client.0: - - rbd/test_librbd_python.sh diff --git a/suites/upgrade/firefly-x/stress-split/9-workload/rgw-swift.yaml b/suites/upgrade/firefly-x/stress-split/9-workload/rgw-swift.yaml deleted file mode 100644 index bfaae1a943f..00000000000 --- a/suites/upgrade/firefly-x/stress-split/9-workload/rgw-swift.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- rgw: - client.0: - default_idle_timeout: 300 -- swift: - client.0: - rgw_server: client.0 diff --git a/suites/upgrade/firefly-x/stress-split/9-workload/snaps-many-objects.yaml b/suites/upgrade/firefly-x/stress-split/9-workload/snaps-many-objects.yaml deleted file mode 100644 index 9e311c946e1..00000000000 --- a/suites/upgrade/firefly-x/stress-split/9-workload/snaps-many-objects.yaml +++ /dev/null @@ -1,12 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/firefly-x/stress-split/distros b/suites/upgrade/firefly-x/stress-split/distros deleted file mode 120000 index 79010c36a59..00000000000 --- a/suites/upgrade/firefly-x/stress-split/distros +++ /dev/null @@ -1 +0,0 @@ -../../../../distros/supported \ No newline at end of file diff --git a/suites/upgrade/giant-x/parallel/% b/suites/upgrade/giant-x/parallel/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/giant-x/parallel/0-cluster/start.yaml b/suites/upgrade/giant-x/parallel/0-cluster/start.yaml deleted file mode 100644 index 27e08f394e8..00000000000 --- a/suites/upgrade/giant-x/parallel/0-cluster/start.yaml +++ /dev/null @@ -1,24 +0,0 @@ -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 -- - mon.b - - mon.c - - osd.2 - - osd.3 -- - client.0 - - client.1 - - client.2 - - client.3 - - client.4 -overrides: - ceph: - log-whitelist: - - scrub mismatch - - ScrubResult - - failed to encode map - conf: - mon: - mon warn on legacy crush tunables: false - mon debug unsafe allow tier with nonempty snaps: true diff --git a/suites/upgrade/giant-x/parallel/1-giant-install/giant.yaml b/suites/upgrade/giant-x/parallel/1-giant-install/giant.yaml deleted file mode 100644 index b09a6b74b3d..00000000000 --- a/suites/upgrade/giant-x/parallel/1-giant-install/giant.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- install: - branch: giant -- print: "**** done installing giant" -- ceph: - fs: xfs -- print: "**** done ceph" -- parallel: - - workload - - upgrade-sequence -- print: "**** done parallel" diff --git a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/+ b/suites/upgrade/giant-x/parallel/2-workload/parallel_run/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/ec-rados-parallel.yaml b/suites/upgrade/giant-x/parallel/2-workload/parallel_run/ec-rados-parallel.yaml deleted file mode 120000 index 64b3cabfb0c..00000000000 --- a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/ec-rados-parallel.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../../erasure-code/ec-rados-parallel.yaml \ No newline at end of file diff --git a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/rados_api.yaml b/suites/upgrade/giant-x/parallel/2-workload/parallel_run/rados_api.yaml deleted file mode 100644 index 3fa120f11f9..00000000000 --- a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/rados_api.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - parallel: - - workunit: - branch: giant - clients: - client.1: - - cls - - print: "**** done cls 2-workload parallel" diff --git a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/rados_loadgenbig.yaml b/suites/upgrade/giant-x/parallel/2-workload/parallel_run/rados_loadgenbig.yaml deleted file mode 100644 index 976ef726bf7..00000000000 --- a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/rados_loadgenbig.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - parallel: - - workunit: - branch: giant - clients: - client.2: - - rados/load-gen-big.sh - - print: "**** done rados/load-gen-big.sh 2-workload parallel" diff --git a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_cache-pool-snaps.yaml b/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_cache-pool-snaps.yaml deleted file mode 100644 index 676a9e8e89c..00000000000 --- a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_cache-pool-snaps.yaml +++ /dev/null @@ -1,36 +0,0 @@ -overrides: - ceph: - log-whitelist: - - must scrub before tier agent can activate -workload: - parallel: - - sequential: - - exec: - client.0: - - ceph osd pool create base 4 - - ceph osd pool create cache 4 - - ceph osd tier add base cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay base cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 3600 - - ceph osd pool set cache target_max_objects 250 - - rados: - clients: [client.0] - pools: [base] - ops: 4000 - objects: 500 - pool_snaps: true - op_weights: - read: 100 - write: 100 - delete: 50 - copy_from: 50 - flush: 50 - try_flush: 50 - evict: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - - print: "**** done test_cache-pool-snaps 2-workload parallel_run" diff --git a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_rbd_api.yaml b/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_rbd_api.yaml deleted file mode 100644 index 8db5561196c..00000000000 --- a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_rbd_api.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - parallel: - - workunit: - branch: giant - clients: - client.3: - - rbd/test_librbd.sh - - print: "**** done rbd/test_librbd.sh 2-workload parallel" diff --git a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_rbd_python.yaml b/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_rbd_python.yaml deleted file mode 100644 index 0e9b05974f9..00000000000 --- a/suites/upgrade/giant-x/parallel/2-workload/parallel_run/test_rbd_python.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - parallel: - - workunit: - branch: giant - clients: - client.4: - - rbd/test_librbd_python.sh - - print: "**** done rbd/test_librbd_python.sh 2-workload parallel" diff --git a/suites/upgrade/giant-x/parallel/2-workload/sequential_run/ec-rados-default.yaml b/suites/upgrade/giant-x/parallel/2-workload/sequential_run/ec-rados-default.yaml deleted file mode 120000 index fc05f580a28..00000000000 --- a/suites/upgrade/giant-x/parallel/2-workload/sequential_run/ec-rados-default.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../../erasure-code/ec-rados-default.yaml \ No newline at end of file diff --git a/suites/upgrade/giant-x/parallel/2-workload/sequential_run/rados_api.yaml b/suites/upgrade/giant-x/parallel/2-workload/sequential_run/rados_api.yaml deleted file mode 100644 index f9330607158..00000000000 --- a/suites/upgrade/giant-x/parallel/2-workload/sequential_run/rados_api.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - sequential: - - workunit: - branch: giant - clients: - client.1: - - cls - - print: "**** done cls 2-workload sequential" diff --git a/suites/upgrade/giant-x/parallel/2-workload/sequential_run/rados_loadgenbig.yaml b/suites/upgrade/giant-x/parallel/2-workload/sequential_run/rados_loadgenbig.yaml deleted file mode 100644 index 7330f84fb27..00000000000 --- a/suites/upgrade/giant-x/parallel/2-workload/sequential_run/rados_loadgenbig.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - sequential: - - workunit: - branch: giant - clients: - client.2: - - rados/load-gen-big.sh - - print: "**** done rados/load-gen-big.sh 2-workload sequential" diff --git a/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_cache-pool-snaps.yaml b/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_cache-pool-snaps.yaml deleted file mode 100644 index d3b83fa7d95..00000000000 --- a/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_cache-pool-snaps.yaml +++ /dev/null @@ -1,35 +0,0 @@ -overrides: - ceph: - log-whitelist: - - must scrub before tier agent can activate -workload: - sequential: - - exec: - client.0: - - ceph osd pool create base 4 - - ceph osd pool create cache 4 - - ceph osd tier add base cache - - ceph osd tier cache-mode cache writeback - - ceph osd tier set-overlay base cache - - ceph osd pool set cache hit_set_type bloom - - ceph osd pool set cache hit_set_count 8 - - ceph osd pool set cache hit_set_period 3600 - - ceph osd pool set cache target_max_objects 250 - - rados: - clients: [client.0] - pools: [base] - ops: 4000 - objects: 500 - pool_snaps: true - op_weights: - read: 100 - write: 100 - delete: 50 - copy_from: 50 - flush: 50 - try_flush: 50 - evict: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - - print: "**** done test_cache-pool-snaps 2-workload sequential_run" diff --git a/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_rbd_api.yaml b/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_rbd_api.yaml deleted file mode 100644 index d8c1539e8b4..00000000000 --- a/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_rbd_api.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - sequential: - - workunit: - branch: giant - clients: - client.3: - - rbd/test_librbd.sh - - print: "**** done rbd/test_librbd.sh 2-workload sequential" diff --git a/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_rbd_python.yaml b/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_rbd_python.yaml deleted file mode 100644 index e62bcb5515e..00000000000 --- a/suites/upgrade/giant-x/parallel/2-workload/sequential_run/test_rbd_python.yaml +++ /dev/null @@ -1,8 +0,0 @@ -workload: - sequential: - - workunit: - branch: giant - clients: - client.4: - - rbd/test_librbd_python.sh - - print: "**** done rbd/test_librbd_python.sh 2-workload sequential" diff --git a/suites/upgrade/giant-x/parallel/3-upgrade-sequence/upgrade-all.yaml b/suites/upgrade/giant-x/parallel/3-upgrade-sequence/upgrade-all.yaml deleted file mode 100644 index 3f74d7d14a3..00000000000 --- a/suites/upgrade/giant-x/parallel/3-upgrade-sequence/upgrade-all.yaml +++ /dev/null @@ -1,8 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - mon.b: - - print: "**** done install.upgrade mon.a and mon.b" - - ceph.restart: [mon.a, mon.b, mon.c, mds.a, osd.0, osd.1, osd.2, osd.3] - - print: "**** done ceph.restart all" diff --git a/suites/upgrade/giant-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/suites/upgrade/giant-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml deleted file mode 100644 index c39a5687e26..00000000000 --- a/suites/upgrade/giant-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml +++ /dev/null @@ -1,37 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - - print: "**** done install.upgrade mon.a to the version from teuthology-suite arg" - - ceph.restart: - daemons: [mon.a] - wait-for-healthy: true - - sleep: - duration: 60 - - ceph.restart: - daemons: [osd.0, osd.1] - wait-for-healthy: true - - sleep: - duration: 60 - - ceph.restart: [mds.a] - - sleep: - duration: 60 - - print: "**** running mixed versions of osds and mons" -#do we need to use "ceph osd crush tunables giant" ? - - exec: - mon.b: - - ceph osd crush tunables firefly - - print: "**** done ceph osd crush tunables firefly" - - install.upgrade: - mon.b: - - print: "**** done install.upgrade mon.b to the version from teuthology-suite arg" - - ceph.restart: - daemons: [mon.b, mon.c] - wait-for-healthy: true - - sleep: - duration: 60 - - ceph.restart: - daemons: [osd.2, osd.3] - wait-for-healthy: true - - sleep: - duration: 60 diff --git a/suites/upgrade/giant-x/parallel/4-final-workload/+ b/suites/upgrade/giant-x/parallel/4-final-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/giant-x/parallel/4-final-workload/rados-snaps-few-objects.yaml b/suites/upgrade/giant-x/parallel/4-final-workload/rados-snaps-few-objects.yaml deleted file mode 100644 index 112260734b0..00000000000 --- a/suites/upgrade/giant-x/parallel/4-final-workload/rados-snaps-few-objects.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: - - rados: - clients: [client.1] - ops: 4000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - - print: "**** done rados 4-final-workload" diff --git a/suites/upgrade/giant-x/parallel/4-final-workload/rados_loadgenmix.yaml b/suites/upgrade/giant-x/parallel/4-final-workload/rados_loadgenmix.yaml deleted file mode 100644 index d4a8006e906..00000000000 --- a/suites/upgrade/giant-x/parallel/4-final-workload/rados_loadgenmix.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: - - workunit: - clients: - client.1: - - rados/load-gen-mix.sh - - print: "**** done rados/load-gen-mix.sh 4-final-workload" diff --git a/suites/upgrade/giant-x/parallel/4-final-workload/rados_mon_thrash.yaml b/suites/upgrade/giant-x/parallel/4-final-workload/rados_mon_thrash.yaml deleted file mode 100644 index f1e30f2419d..00000000000 --- a/suites/upgrade/giant-x/parallel/4-final-workload/rados_mon_thrash.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: - - mon_thrash: - revive_delay: 20 - thrash_delay: 1 - - print: "**** done mon_thrash 4-final-workload" - - workunit: - clients: - client.1: - - rados/test.sh - - print: "**** done rados/test.sh 4-final-workload" diff --git a/suites/upgrade/giant-x/parallel/4-final-workload/rbd_cls.yaml b/suites/upgrade/giant-x/parallel/4-final-workload/rbd_cls.yaml deleted file mode 100644 index ed75230497d..00000000000 --- a/suites/upgrade/giant-x/parallel/4-final-workload/rbd_cls.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: - - workunit: - clients: - client.1: - - cls/test_cls_rbd.sh - - print: "**** done cls/test_cls_rbd.sh 4-final-workload" diff --git a/suites/upgrade/giant-x/parallel/4-final-workload/rbd_import_export.yaml b/suites/upgrade/giant-x/parallel/4-final-workload/rbd_import_export.yaml deleted file mode 100644 index 2c66c28a276..00000000000 --- a/suites/upgrade/giant-x/parallel/4-final-workload/rbd_import_export.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: - - workunit: - clients: - client.1: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format - - print: "**** done rbd/import_export.sh 4-final-workload" diff --git a/suites/upgrade/giant-x/parallel/4-final-workload/rgw_swift.yaml b/suites/upgrade/giant-x/parallel/4-final-workload/rgw_swift.yaml deleted file mode 100644 index 18089bec6dd..00000000000 --- a/suites/upgrade/giant-x/parallel/4-final-workload/rgw_swift.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: - - rgw: [client.1] - - print: "**** done rgw 4-final-workload" - - swift: - client.1: - rgw_server: client.1 - - print: "**** done swift 4-final-workload" diff --git a/suites/upgrade/giant-x/parallel/distros b/suites/upgrade/giant-x/parallel/distros deleted file mode 120000 index 79010c36a59..00000000000 --- a/suites/upgrade/giant-x/parallel/distros +++ /dev/null @@ -1 +0,0 @@ -../../../../distros/supported \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/% b/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/0-cluster b/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/0-cluster deleted file mode 120000 index 6dceffa2f5b..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/0-cluster +++ /dev/null @@ -1 +0,0 @@ -../stress-split/0-cluster \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/1-giant-install b/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/1-giant-install deleted file mode 120000 index 2e0b946d2d2..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/1-giant-install +++ /dev/null @@ -1 +0,0 @@ -../stress-split/1-giant-install/ \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/2-partial-upgrade b/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/2-partial-upgrade deleted file mode 120000 index c3a4e2c28e5..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/2-partial-upgrade +++ /dev/null @@ -1 +0,0 @@ -../stress-split/2-partial-upgrade \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/3-thrash/default.yaml b/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/3-thrash/default.yaml deleted file mode 100644 index a33d4e3f4e6..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/3-thrash/default.yaml +++ /dev/null @@ -1,13 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - - log bound mismatch -tasks: -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 - min_in: 4 -- print: "**** done thrashosds 3-thrash" diff --git a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/4-mon b/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/4-mon deleted file mode 120000 index 0ea7dd5aa8d..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/4-mon +++ /dev/null @@ -1 +0,0 @@ -../stress-split/4-mon \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/5-workload/ec-rados-default.yaml b/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/5-workload/ec-rados-default.yaml deleted file mode 120000 index a8a0ae69405..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/5-workload/ec-rados-default.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-default.yaml \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/6-next-mon b/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/6-next-mon deleted file mode 120000 index 0f00e43e5fd..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/6-next-mon +++ /dev/null @@ -1 +0,0 @@ -../stress-split/6-next-mon \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/8-next-mon b/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/8-next-mon deleted file mode 120000 index 726c6a47a2e..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/8-next-mon +++ /dev/null @@ -1 +0,0 @@ -../stress-split/8-next-mon \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/9-workload/ec-rados-plugin=isa-k=2-m=1.yaml b/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/9-workload/ec-rados-plugin=isa-k=2-m=1.yaml deleted file mode 100644 index 75c6275fdf7..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/9-workload/ec-rados-plugin=isa-k=2-m=1.yaml +++ /dev/null @@ -1,25 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - ec_pool: true - erasure_code_profile: - name: isaprofile - plugin: isa - k: 2 - m: 1 - technique: reed_sol_van - ruleset-failure-domain: osd - op_weights: - read: 100 - write: 0 - append: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 - copy_from: 50 - setattr: 25 - rmattr: 25 -- print: "**** done ec-rados-plugin=isa-k=2-m=1 9-workload" diff --git a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/distros/ubuntu_14.04.yaml b/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/distros/ubuntu_14.04.yaml deleted file mode 100644 index 12d479fc32c..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code-x86_64/distros/ubuntu_14.04.yaml +++ /dev/null @@ -1,3 +0,0 @@ -os_type: ubuntu -os_version: "14.04" -arch: x86_64 diff --git a/suites/upgrade/giant-x/stress-split-erasure-code/% b/suites/upgrade/giant-x/stress-split-erasure-code/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/giant-x/stress-split-erasure-code/0-cluster b/suites/upgrade/giant-x/stress-split-erasure-code/0-cluster deleted file mode 120000 index 6dceffa2f5b..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code/0-cluster +++ /dev/null @@ -1 +0,0 @@ -../stress-split/0-cluster \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code/1-giant-install b/suites/upgrade/giant-x/stress-split-erasure-code/1-giant-install deleted file mode 120000 index 2e0b946d2d2..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code/1-giant-install +++ /dev/null @@ -1 +0,0 @@ -../stress-split/1-giant-install/ \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code/2-partial-upgrade b/suites/upgrade/giant-x/stress-split-erasure-code/2-partial-upgrade deleted file mode 120000 index c3a4e2c28e5..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code/2-partial-upgrade +++ /dev/null @@ -1 +0,0 @@ -../stress-split/2-partial-upgrade \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code/3-thrash/default.yaml b/suites/upgrade/giant-x/stress-split-erasure-code/3-thrash/default.yaml deleted file mode 100644 index a33d4e3f4e6..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code/3-thrash/default.yaml +++ /dev/null @@ -1,13 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - - log bound mismatch -tasks: -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 - min_in: 4 -- print: "**** done thrashosds 3-thrash" diff --git a/suites/upgrade/giant-x/stress-split-erasure-code/4-mon b/suites/upgrade/giant-x/stress-split-erasure-code/4-mon deleted file mode 120000 index 0ea7dd5aa8d..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code/4-mon +++ /dev/null @@ -1 +0,0 @@ -../stress-split/4-mon \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code/5-workload/ec-rados-default.yaml b/suites/upgrade/giant-x/stress-split-erasure-code/5-workload/ec-rados-default.yaml deleted file mode 120000 index a8a0ae69405..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code/5-workload/ec-rados-default.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-default.yaml \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code/6-next-mon b/suites/upgrade/giant-x/stress-split-erasure-code/6-next-mon deleted file mode 120000 index 0f00e43e5fd..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code/6-next-mon +++ /dev/null @@ -1 +0,0 @@ -../stress-split/6-next-mon \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code/8-next-mon b/suites/upgrade/giant-x/stress-split-erasure-code/8-next-mon deleted file mode 120000 index 726c6a47a2e..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code/8-next-mon +++ /dev/null @@ -1 +0,0 @@ -../stress-split/8-next-mon \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code/9-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml b/suites/upgrade/giant-x/stress-split-erasure-code/9-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml deleted file mode 120000 index 4b9d9a44a24..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code/9-workload/ec-rados-plugin=jerasure-k=3-m=1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split-erasure-code/distros b/suites/upgrade/giant-x/stress-split-erasure-code/distros deleted file mode 120000 index 8d4309788e0..00000000000 --- a/suites/upgrade/giant-x/stress-split-erasure-code/distros +++ /dev/null @@ -1 +0,0 @@ -../stress-split/distros \ No newline at end of file diff --git a/suites/upgrade/giant-x/stress-split/% b/suites/upgrade/giant-x/stress-split/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/giant-x/stress-split/0-cluster/start.yaml b/suites/upgrade/giant-x/stress-split/0-cluster/start.yaml deleted file mode 100644 index c27322a7e94..00000000000 --- a/suites/upgrade/giant-x/stress-split/0-cluster/start.yaml +++ /dev/null @@ -1,27 +0,0 @@ -overrides: - ceph: - log-whitelist: - - failed to encode map - conf: - mon: - mon warn on legacy crush tunables: false -roles: -- - mon.a - - mon.b - - mon.c - - mds.a - - osd.0 - - osd.1 - - osd.2 - - osd.3 - - osd.4 - - osd.5 - - osd.6 -- - osd.7 - - osd.8 - - osd.9 - - osd.10 - - osd.11 - - osd.12 - - osd.13 -- - client.0 diff --git a/suites/upgrade/giant-x/stress-split/1-giant-install/giant.yaml b/suites/upgrade/giant-x/stress-split/1-giant-install/giant.yaml deleted file mode 100644 index 3ce313299f7..00000000000 --- a/suites/upgrade/giant-x/stress-split/1-giant-install/giant.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install: - branch: giant -- print: "**** done install giant" -- ceph: - fs: xfs -- print: "**** done ceph" diff --git a/suites/upgrade/giant-x/stress-split/2-partial-upgrade/firsthalf.yaml b/suites/upgrade/giant-x/stress-split/2-partial-upgrade/firsthalf.yaml deleted file mode 100644 index d42633e6dcb..00000000000 --- a/suites/upgrade/giant-x/stress-split/2-partial-upgrade/firsthalf.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- install.upgrade: - osd.0: -- print: "**** done install.upgrade osd.0" -- ceph.restart: - daemons: [osd.0, osd.1, osd.2, osd.3, osd.4, osd.5, osd.6] -- print: "**** done ceph.restart 1st half" diff --git a/suites/upgrade/giant-x/stress-split/3-thrash/default.yaml b/suites/upgrade/giant-x/stress-split/3-thrash/default.yaml deleted file mode 100644 index d99d0c87420..00000000000 --- a/suites/upgrade/giant-x/stress-split/3-thrash/default.yaml +++ /dev/null @@ -1,12 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - - log bound mismatch -tasks: -- thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 -- print: "**** done thrashosds 3-thrash" diff --git a/suites/upgrade/giant-x/stress-split/4-mon/mona.yaml b/suites/upgrade/giant-x/stress-split/4-mon/mona.yaml deleted file mode 100644 index 7c75c102fc4..00000000000 --- a/suites/upgrade/giant-x/stress-split/4-mon/mona.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.a] - wait-for-healthy: false - wait-for-osds-up: true -- print: "**** done ceph.restart mon.a" diff --git a/suites/upgrade/giant-x/stress-split/5-workload/+ b/suites/upgrade/giant-x/stress-split/5-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/giant-x/stress-split/5-workload/rbd-cls.yaml b/suites/upgrade/giant-x/stress-split/5-workload/rbd-cls.yaml deleted file mode 100644 index 9122be01659..00000000000 --- a/suites/upgrade/giant-x/stress-split/5-workload/rbd-cls.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - branch: giant - clients: - client.0: - - cls/test_cls_rbd.sh -- print: "**** done cls/test_cls_rbd.sh 5-workload" diff --git a/suites/upgrade/giant-x/stress-split/5-workload/rbd-import-export.yaml b/suites/upgrade/giant-x/stress-split/5-workload/rbd-import-export.yaml deleted file mode 100644 index 3c39990dea6..00000000000 --- a/suites/upgrade/giant-x/stress-split/5-workload/rbd-import-export.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- workunit: - branch: giant - clients: - client.0: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format -- print: "**** done rbd/import_export.sh 5-workload" diff --git a/suites/upgrade/giant-x/stress-split/5-workload/readwrite.yaml b/suites/upgrade/giant-x/stress-split/5-workload/readwrite.yaml deleted file mode 100644 index 37c21483288..00000000000 --- a/suites/upgrade/giant-x/stress-split/5-workload/readwrite.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - op_weights: - read: 45 - write: 45 - delete: 10 -- print: "**** done rados/readwrite 5-workload" diff --git a/suites/upgrade/giant-x/stress-split/5-workload/snaps-few-objects.yaml b/suites/upgrade/giant-x/stress-split/5-workload/snaps-few-objects.yaml deleted file mode 100644 index f01232a3cbd..00000000000 --- a/suites/upgrade/giant-x/stress-split/5-workload/snaps-few-objects.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 -- print: "**** done rados/snaps-few-objects 5-workload" diff --git a/suites/upgrade/giant-x/stress-split/6-next-mon/monb.yaml b/suites/upgrade/giant-x/stress-split/6-next-mon/monb.yaml deleted file mode 100644 index 22e87c7ad21..00000000000 --- a/suites/upgrade/giant-x/stress-split/6-next-mon/monb.yaml +++ /dev/null @@ -1,6 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.b] - wait-for-healthy: false - wait-for-osds-up: true -- print: "**** done ceph.restart mon.b 6-next-mon" diff --git a/suites/upgrade/giant-x/stress-split/7-workload/+ b/suites/upgrade/giant-x/stress-split/7-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/giant-x/stress-split/7-workload/radosbench.yaml b/suites/upgrade/giant-x/stress-split/7-workload/radosbench.yaml deleted file mode 100644 index 3d87bb1c2c5..00000000000 --- a/suites/upgrade/giant-x/stress-split/7-workload/radosbench.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- radosbench: - clients: [client.0] - time: 1800 -- print: "**** done radosbench 7-workload" diff --git a/suites/upgrade/giant-x/stress-split/7-workload/rbd_api.yaml b/suites/upgrade/giant-x/stress-split/7-workload/rbd_api.yaml deleted file mode 100644 index 85536da0c1b..00000000000 --- a/suites/upgrade/giant-x/stress-split/7-workload/rbd_api.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - branch: giant - clients: - client.0: - - rbd/test_librbd.sh -- print: "**** done rbd/test_librbd.sh 7-workload" diff --git a/suites/upgrade/giant-x/stress-split/8-next-mon/monc.yaml b/suites/upgrade/giant-x/stress-split/8-next-mon/monc.yaml deleted file mode 100644 index 61253685890..00000000000 --- a/suites/upgrade/giant-x/stress-split/8-next-mon/monc.yaml +++ /dev/null @@ -1,8 +0,0 @@ -tasks: -- ceph.restart: - daemons: [mon.c] - wait-for-healthy: false - wait-for-osds-up: true -- print: "**** done ceph.restart mon.c 8-next-mon" -- ceph.wait_for_mon_quorum: [a, b, c] -- print: "**** done wait_for_mon_quorum 8-next-mon" diff --git a/suites/upgrade/giant-x/stress-split/9-workload/+ b/suites/upgrade/giant-x/stress-split/9-workload/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/giant-x/stress-split/9-workload/rbd-python.yaml b/suites/upgrade/giant-x/stress-split/9-workload/rbd-python.yaml deleted file mode 100644 index 34ece2940c7..00000000000 --- a/suites/upgrade/giant-x/stress-split/9-workload/rbd-python.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- workunit: - branch: giant - clients: - client.0: - - rbd/test_librbd_python.sh -- print: "**** done rbd/test_librbd_python.sh 9-workload" diff --git a/suites/upgrade/giant-x/stress-split/9-workload/rgw-swift.yaml b/suites/upgrade/giant-x/stress-split/9-workload/rgw-swift.yaml deleted file mode 100644 index 8f1416082bb..00000000000 --- a/suites/upgrade/giant-x/stress-split/9-workload/rgw-swift.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- rgw: - client.0: - default_idle_timeout: 300 -- print: "**** done rgw 9-workload" -- swift: - client.0: - rgw_server: client.0 -- print: "**** done swift 9-workload" diff --git a/suites/upgrade/giant-x/stress-split/9-workload/snaps-many-objects.yaml b/suites/upgrade/giant-x/stress-split/9-workload/snaps-many-objects.yaml deleted file mode 100644 index 9e311c946e1..00000000000 --- a/suites/upgrade/giant-x/stress-split/9-workload/snaps-many-objects.yaml +++ /dev/null @@ -1,12 +0,0 @@ -tasks: -- rados: - clients: [client.0] - ops: 4000 - objects: 500 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/giant-x/stress-split/distros b/suites/upgrade/giant-x/stress-split/distros deleted file mode 120000 index 79010c36a59..00000000000 --- a/suites/upgrade/giant-x/stress-split/distros +++ /dev/null @@ -1 +0,0 @@ -../../../../distros/supported \ No newline at end of file diff --git a/suites/upgrade/giant/% b/suites/upgrade/giant/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/giant/0-cluster/start.yaml b/suites/upgrade/giant/0-cluster/start.yaml deleted file mode 100644 index 5f2f9715929..00000000000 --- a/suites/upgrade/giant/0-cluster/start.yaml +++ /dev/null @@ -1,20 +0,0 @@ -overrides: - ceph: - log-whitelist: - - scrub - - scrub mismatch - - ScrubResult - fs: xfs -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 - - osd.2 -- - mon.b - - mon.c - - osd.3 - - osd.4 - - osd.5 - - client.0 - - client.1 diff --git a/suites/upgrade/giant/1-install/latest_firefly_release.yaml b/suites/upgrade/giant/1-install/latest_firefly_release.yaml deleted file mode 100644 index 9ff4a900bf4..00000000000 --- a/suites/upgrade/giant/1-install/latest_firefly_release.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -# change tag to the latest firefly released version -- install: - tag: v0.80.6 -- print: "**** done latest firefly install" -- ceph: - fs: xfs -- parallel: - - workload - - upgrade-sequence -- print: "**** done parallel" diff --git a/suites/upgrade/giant/2-workload/blogbench.yaml b/suites/upgrade/giant/2-workload/blogbench.yaml deleted file mode 100644 index 93efdf1430e..00000000000 --- a/suites/upgrade/giant/2-workload/blogbench.yaml +++ /dev/null @@ -1,7 +0,0 @@ -workload: - sequential: - - workunit: - clients: - client.0: - - suites/blogbench.sh - - print: "**** done suites/blogbench.sh" diff --git a/suites/upgrade/giant/2-workload/rbd.yaml b/suites/upgrade/giant/2-workload/rbd.yaml deleted file mode 100644 index 8ffb0ea5269..00000000000 --- a/suites/upgrade/giant/2-workload/rbd.yaml +++ /dev/null @@ -1,14 +0,0 @@ -workload: - sequential: - - workunit: - clients: - client.0: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format - - print: "**** done rbd/import_export.sh" - - workunit: - clients: - client.0: - - cls/test_cls_rbd.sh - - print: "**** done cls/test_cls_rbd.sh" diff --git a/suites/upgrade/giant/2-workload/s3tests.yaml b/suites/upgrade/giant/2-workload/s3tests.yaml deleted file mode 100644 index a5882acba43..00000000000 --- a/suites/upgrade/giant/2-workload/s3tests.yaml +++ /dev/null @@ -1,9 +0,0 @@ -workload: - sequential: - - rgw: [client.0] - - print: "**** done rgw: [client.0]" - - s3tests: - client.0: - force-branch: firefly-original - rgw_server: client.0 - - print: "**** done s3tests" diff --git a/suites/upgrade/giant/2-workload/testrados.yaml b/suites/upgrade/giant/2-workload/testrados.yaml deleted file mode 100644 index 49339ecd044..00000000000 --- a/suites/upgrade/giant/2-workload/testrados.yaml +++ /dev/null @@ -1,12 +0,0 @@ -workload: - rados: - clients: [client.0] - ops: 2000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/giant/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/suites/upgrade/giant/3-upgrade-sequence/upgrade-mon-osd-mds.yaml deleted file mode 100644 index e87a8d1c37e..00000000000 --- a/suites/upgrade/giant/3-upgrade-sequence/upgrade-mon-osd-mds.yaml +++ /dev/null @@ -1,37 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - all: - branch: giant - - print: "**** done install.upgrade giant" - - ceph.restart: [mon.a] - - sleep: - duration: 60 - - ceph.restart: [mon.b] - - sleep: - duration: 60 - - ceph.restart: [mon.c] - - sleep: - duration: 60 - - ceph.restart: [mds.a] - - sleep: - duration: 60 - - ceph.restart: [osd.0] - - sleep: - duration: 30 - - ceph.restart: [osd.1] - - sleep: - duration: 30 - - ceph.restart: [osd.2] - - sleep: - duration: 30 - - ceph.restart: [osd.3] - - sleep: - duration: 30 - - ceph.restart: [osd.4] - - sleep: - duration: 30 - - ceph.restart: [osd.5] - - sleep: - duration: 30 - - print: "**** done ceph.restart all" diff --git a/suites/upgrade/giant/3-upgrade-sequence/upgrade-osd-mon-mds.yaml b/suites/upgrade/giant/3-upgrade-sequence/upgrade-osd-mon-mds.yaml deleted file mode 100644 index adcf1b6e83a..00000000000 --- a/suites/upgrade/giant/3-upgrade-sequence/upgrade-osd-mon-mds.yaml +++ /dev/null @@ -1,37 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - all: - branch: giant - - print: "**** done install.upgrade giant" - - ceph.restart: [osd.0] - - sleep: - duration: 30 - - ceph.restart: [osd.1] - - sleep: - duration: 30 - - ceph.restart: [osd.2] - - sleep: - duration: 30 - - ceph.restart: [osd.3] - - sleep: - duration: 30 - - ceph.restart: [osd.4] - - sleep: - duration: 30 - - ceph.restart: [osd.5] - - sleep: - duration: 60 - - ceph.restart: [mon.a] - - sleep: - duration: 60 - - ceph.restart: [mon.b] - - sleep: - duration: 60 - - ceph.restart: [mon.c] - - sleep: - duration: 60 - - ceph.restart: [mds.a] - - sleep: - duration: 60 - - print: "**** done ceph.restart all" diff --git a/suites/upgrade/giant/4-final/+ b/suites/upgrade/giant/4-final/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/giant/4-final/monthrash.yaml b/suites/upgrade/giant/4-final/monthrash.yaml deleted file mode 100644 index 7833c1c7734..00000000000 --- a/suites/upgrade/giant/4-final/monthrash.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: -- mon_thrash: - revive_delay: 20 - thrash_delay: 1 -- print: "**** done mon_thrash" -- ceph-fuse: -- print: "**** done ceph-fuse" -- workunit: - clients: - client.0: - - suites/dbench.sh -- print: "**** done suites/dbench.sh" - diff --git a/suites/upgrade/giant/4-final/osdthrash.yaml b/suites/upgrade/giant/4-final/osdthrash.yaml deleted file mode 100644 index 44b5bcedbb6..00000000000 --- a/suites/upgrade/giant/4-final/osdthrash.yaml +++ /dev/null @@ -1,19 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - - log bound mismatch -tasks: -- sequential: - - thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 - - print: "**** done thrashosds" - - workunit: - clients: - client.0: - - suites/iogen.sh - - print: "**** done suites/iogen.sh" - diff --git a/suites/upgrade/giant/4-final/testrgw.yaml b/suites/upgrade/giant/4-final/testrgw.yaml deleted file mode 100644 index 054b0ff2612..00000000000 --- a/suites/upgrade/giant/4-final/testrgw.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tasks: -- sequential: - - rgw: [client.1] - - s3tests: - client.1: - rgw_server: client.1 - - print: "**** done s3tests" diff --git a/suites/upgrade/giant/distros b/suites/upgrade/giant/distros deleted file mode 120000 index dd0d7f1d5bd..00000000000 --- a/suites/upgrade/giant/distros +++ /dev/null @@ -1 +0,0 @@ -../../../distros/supported/ \ No newline at end of file diff --git a/suites/upgrade/hammer/newer/% b/suites/upgrade/hammer/newer/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/hammer/newer/0-cluster/start.yaml b/suites/upgrade/hammer/newer/0-cluster/start.yaml deleted file mode 100644 index 2f00028f52e..00000000000 --- a/suites/upgrade/hammer/newer/0-cluster/start.yaml +++ /dev/null @@ -1,24 +0,0 @@ -overrides: - ceph: - log-whitelist: - - scrub - - scrub mismatch - - ScrubResult - - failed to encode - - soft lockup - - detected stalls on CPUs - fs: xfs -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 - - osd.2 -- - mon.b - - mon.c - - osd.3 - - osd.4 - - osd.5 - - client.0 -- - client.1 - - client.2 diff --git a/suites/upgrade/hammer/newer/1-install/v0.94.2.yaml b/suites/upgrade/hammer/newer/1-install/v0.94.2.yaml deleted file mode 100644 index c380658c3a0..00000000000 --- a/suites/upgrade/hammer/newer/1-install/v0.94.2.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: - tag: v0.94.2 -- print: "**** done v0.94.2 install" -- ceph: - fs: xfs -- parallel: - - workload - - upgrade-sequence -- print: "**** done parallel v0.94.2" diff --git a/suites/upgrade/hammer/newer/1-install/v0.94.3.yaml b/suites/upgrade/hammer/newer/1-install/v0.94.3.yaml deleted file mode 100644 index 9d3fdc0fecd..00000000000 --- a/suites/upgrade/hammer/newer/1-install/v0.94.3.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: - tag: v0.94.3 -- print: "**** done v0.94.3 install" -- ceph: - fs: xfs -- parallel: - - workload - - upgrade-sequence -- print: "**** done parallel v0.94.3" diff --git a/suites/upgrade/hammer/newer/1-install/v0.94.4.yaml b/suites/upgrade/hammer/newer/1-install/v0.94.4.yaml deleted file mode 100644 index 9bea9f6bf6c..00000000000 --- a/suites/upgrade/hammer/newer/1-install/v0.94.4.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: - tag: v0.94.4 -- print: "**** done v0.94.4 install" -- ceph: - fs: xfs -- parallel: - - workload - - upgrade-sequence -- print: "**** done parallel v0.94.4" diff --git a/suites/upgrade/hammer/newer/1-install/v0.94.6.yaml b/suites/upgrade/hammer/newer/1-install/v0.94.6.yaml deleted file mode 100644 index 419bdadacb5..00000000000 --- a/suites/upgrade/hammer/newer/1-install/v0.94.6.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: - tag: v0.94.6 -- print: "**** done v0.94.6 install" -- ceph: - fs: xfs -- parallel: - - workload - - upgrade-sequence -- print: "**** done parallel v0.94.6" diff --git a/suites/upgrade/hammer/newer/2-workload/blogbench.yaml b/suites/upgrade/hammer/newer/2-workload/blogbench.yaml deleted file mode 100644 index e3d652eafd5..00000000000 --- a/suites/upgrade/hammer/newer/2-workload/blogbench.yaml +++ /dev/null @@ -1,7 +0,0 @@ -workload: - sequential: - - workunit: - clients: - client.0: - - suites/blogbench.sh - - print: "**** done suites/blogbench.sh 2-workload" diff --git a/suites/upgrade/hammer/newer/2-workload/rbd.yaml b/suites/upgrade/hammer/newer/2-workload/rbd.yaml deleted file mode 100644 index d37b294e15d..00000000000 --- a/suites/upgrade/hammer/newer/2-workload/rbd.yaml +++ /dev/null @@ -1,9 +0,0 @@ -workload: - sequential: - - workunit: - clients: - client.1: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format - - print: "**** done rbd/import_export.sh 2-workload" diff --git a/suites/upgrade/hammer/newer/2-workload/s3tests.yaml b/suites/upgrade/hammer/newer/2-workload/s3tests.yaml deleted file mode 100644 index 086346a9ace..00000000000 --- a/suites/upgrade/hammer/newer/2-workload/s3tests.yaml +++ /dev/null @@ -1,9 +0,0 @@ -workload: - sequential: - - rgw: [client.1] - - print: "**** done rgw: [client.1] 2-workload" - - s3tests: - client.1: - force-branch: hammer - rgw_server: client.1 - - print: "**** done s3tests 2-workload off hammer branch" diff --git a/suites/upgrade/hammer/newer/2-workload/testrados.yaml b/suites/upgrade/hammer/newer/2-workload/testrados.yaml deleted file mode 100644 index 49339ecd044..00000000000 --- a/suites/upgrade/hammer/newer/2-workload/testrados.yaml +++ /dev/null @@ -1,12 +0,0 @@ -workload: - rados: - clients: [client.0] - ops: 2000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/hammer/newer/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/suites/upgrade/hammer/newer/3-upgrade-sequence/upgrade-mon-osd-mds.yaml deleted file mode 100644 index f0c62fda007..00000000000 --- a/suites/upgrade/hammer/newer/3-upgrade-sequence/upgrade-mon-osd-mds.yaml +++ /dev/null @@ -1,37 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - mon.b: - - print: "**** done install.upgrade hammer" - - ceph.restart: [mon.a] - - sleep: - duration: 60 - - ceph.restart: [mon.b] - - sleep: - duration: 60 - - ceph.restart: [mon.c] - - sleep: - duration: 60 - - ceph.restart: [mds.a] - - sleep: - duration: 60 - - ceph.restart: [osd.0] - - sleep: - duration: 30 - - ceph.restart: [osd.1] - - sleep: - duration: 30 - - ceph.restart: [osd.2] - - sleep: - duration: 30 - - ceph.restart: [osd.3] - - sleep: - duration: 30 - - ceph.restart: [osd.4] - - sleep: - duration: 30 - - ceph.restart: [osd.5] - - sleep: - duration: 30 - - print: "**** done ceph.restart all" diff --git a/suites/upgrade/hammer/newer/3-upgrade-sequence/upgrade-osd-mon-mds.yaml b/suites/upgrade/hammer/newer/3-upgrade-sequence/upgrade-osd-mon-mds.yaml deleted file mode 100644 index e4df6c8a111..00000000000 --- a/suites/upgrade/hammer/newer/3-upgrade-sequence/upgrade-osd-mon-mds.yaml +++ /dev/null @@ -1,37 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - mon.b: - - print: "**** done install.upgrade hammer" - - ceph.restart: [osd.0] - - sleep: - duration: 30 - - ceph.restart: [osd.1] - - sleep: - duration: 30 - - ceph.restart: [osd.2] - - sleep: - duration: 30 - - ceph.restart: [osd.3] - - sleep: - duration: 30 - - ceph.restart: [osd.4] - - sleep: - duration: 30 - - ceph.restart: [osd.5] - - sleep: - duration: 60 - - ceph.restart: [mon.a] - - sleep: - duration: 60 - - ceph.restart: [mon.b] - - sleep: - duration: 60 - - ceph.restart: [mon.c] - - sleep: - duration: 60 - - ceph.restart: [mds.a] - - sleep: - duration: 60 - - print: "**** done ceph.restart all" diff --git a/suites/upgrade/hammer/newer/4-final/+ b/suites/upgrade/hammer/newer/4-final/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/hammer/newer/4-final/monthrash.yaml b/suites/upgrade/hammer/newer/4-final/monthrash.yaml deleted file mode 100644 index 3774702f08a..00000000000 --- a/suites/upgrade/hammer/newer/4-final/monthrash.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: -- mon_thrash: - revive_delay: 20 - thrash_delay: 1 -- print: "**** done mon_thrash 4-workload" -- ceph-fuse: -- print: "**** done ceph-fuse 4-workload" -- workunit: - clients: - client.0: - - suites/dbench.sh -- print: "**** done suites/dbench.sh 4-workload" - diff --git a/suites/upgrade/hammer/newer/4-final/osdthrash.yaml b/suites/upgrade/hammer/newer/4-final/osdthrash.yaml deleted file mode 100644 index b4740ada16c..00000000000 --- a/suites/upgrade/hammer/newer/4-final/osdthrash.yaml +++ /dev/null @@ -1,20 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - - log bound mismatch - - failed to encode -tasks: -- sequential: - - thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 - - print: "**** done thrashosds 4-workload" - - workunit: - clients: - client.0: - - suites/iogen.sh - - print: "**** done suites/iogen.sh 4-workload" - diff --git a/suites/upgrade/hammer/newer/4-final/testrgw.yaml b/suites/upgrade/hammer/newer/4-final/testrgw.yaml deleted file mode 100644 index 85689decb1c..00000000000 --- a/suites/upgrade/hammer/newer/4-final/testrgw.yaml +++ /dev/null @@ -1,9 +0,0 @@ -tasks: -- sequential: - - rgw: [client.2] - - print: "**** done rgw: [client.2] 4-workload" - - s3tests: - client.2: - force-branch: hammer - rgw_server: client.2 - - print: "**** done s3tests 4-workload" diff --git a/suites/upgrade/hammer/newer/distros b/suites/upgrade/hammer/newer/distros deleted file mode 120000 index ca99fee94fa..00000000000 --- a/suites/upgrade/hammer/newer/distros +++ /dev/null @@ -1 +0,0 @@ -../../../../distros/supported/ \ No newline at end of file diff --git a/suites/upgrade/hammer/older/% b/suites/upgrade/hammer/older/% deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/hammer/older/0-cluster/start.yaml b/suites/upgrade/hammer/older/0-cluster/start.yaml deleted file mode 100644 index c1e3ea21410..00000000000 --- a/suites/upgrade/hammer/older/0-cluster/start.yaml +++ /dev/null @@ -1,23 +0,0 @@ -overrides: - ceph: - log-whitelist: - - scrub - - scrub mismatch - - ScrubResult - - failed to encode - - soft lockup - - detected stalls on CPUs - fs: xfs -roles: -- - mon.a - - mds.a - - osd.0 - - osd.1 - - osd.2 -- - mon.b - - mon.c - - osd.3 - - osd.4 - - osd.5 - - client.0 -- - client.1 diff --git a/suites/upgrade/hammer/older/1-install/v0.94.1.yaml b/suites/upgrade/hammer/older/1-install/v0.94.1.yaml deleted file mode 100644 index f2578e33dc6..00000000000 --- a/suites/upgrade/hammer/older/1-install/v0.94.1.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: - tag: v0.94.1 -- print: "**** done v0.94.1 install" -- ceph: - fs: xfs -- parallel: - - workload - - upgrade-sequence -- print: "**** done parallel v0.94.1" diff --git a/suites/upgrade/hammer/older/1-install/v0.94.yaml b/suites/upgrade/hammer/older/1-install/v0.94.yaml deleted file mode 100644 index 7cab6c37f19..00000000000 --- a/suites/upgrade/hammer/older/1-install/v0.94.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: -- install: - tag: v0.94 -- print: "**** done v0.94 install" -- ceph: - fs: xfs -- parallel: - - workload - - upgrade-sequence -- print: "**** done parallel v0.94" diff --git a/suites/upgrade/hammer/older/2-workload/blogbench.yaml b/suites/upgrade/hammer/older/2-workload/blogbench.yaml deleted file mode 100644 index df5c9a7d7df..00000000000 --- a/suites/upgrade/hammer/older/2-workload/blogbench.yaml +++ /dev/null @@ -1,9 +0,0 @@ -workload: - sequential: - - ceph-fuse: - - print: "**** done ceph-fuse 2-workload" - - workunit: - clients: - client.0: - - suites/blogbench.sh - - print: "**** done suites/blogbench.sh 2-workload" diff --git a/suites/upgrade/hammer/older/2-workload/rbd.yaml b/suites/upgrade/hammer/older/2-workload/rbd.yaml deleted file mode 100644 index d37b294e15d..00000000000 --- a/suites/upgrade/hammer/older/2-workload/rbd.yaml +++ /dev/null @@ -1,9 +0,0 @@ -workload: - sequential: - - workunit: - clients: - client.1: - - rbd/import_export.sh - env: - RBD_CREATE_ARGS: --new-format - - print: "**** done rbd/import_export.sh 2-workload" diff --git a/suites/upgrade/hammer/older/2-workload/testrados.yaml b/suites/upgrade/hammer/older/2-workload/testrados.yaml deleted file mode 100644 index 49339ecd044..00000000000 --- a/suites/upgrade/hammer/older/2-workload/testrados.yaml +++ /dev/null @@ -1,12 +0,0 @@ -workload: - rados: - clients: [client.0] - ops: 2000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/hammer/older/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/suites/upgrade/hammer/older/3-upgrade-sequence/upgrade-mon-osd-mds.yaml deleted file mode 100644 index f0c62fda007..00000000000 --- a/suites/upgrade/hammer/older/3-upgrade-sequence/upgrade-mon-osd-mds.yaml +++ /dev/null @@ -1,37 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - mon.b: - - print: "**** done install.upgrade hammer" - - ceph.restart: [mon.a] - - sleep: - duration: 60 - - ceph.restart: [mon.b] - - sleep: - duration: 60 - - ceph.restart: [mon.c] - - sleep: - duration: 60 - - ceph.restart: [mds.a] - - sleep: - duration: 60 - - ceph.restart: [osd.0] - - sleep: - duration: 30 - - ceph.restart: [osd.1] - - sleep: - duration: 30 - - ceph.restart: [osd.2] - - sleep: - duration: 30 - - ceph.restart: [osd.3] - - sleep: - duration: 30 - - ceph.restart: [osd.4] - - sleep: - duration: 30 - - ceph.restart: [osd.5] - - sleep: - duration: 30 - - print: "**** done ceph.restart all" diff --git a/suites/upgrade/hammer/older/3-upgrade-sequence/upgrade-osd-mon-mds.yaml b/suites/upgrade/hammer/older/3-upgrade-sequence/upgrade-osd-mon-mds.yaml deleted file mode 100644 index e4df6c8a111..00000000000 --- a/suites/upgrade/hammer/older/3-upgrade-sequence/upgrade-osd-mon-mds.yaml +++ /dev/null @@ -1,37 +0,0 @@ -upgrade-sequence: - sequential: - - install.upgrade: - mon.a: - mon.b: - - print: "**** done install.upgrade hammer" - - ceph.restart: [osd.0] - - sleep: - duration: 30 - - ceph.restart: [osd.1] - - sleep: - duration: 30 - - ceph.restart: [osd.2] - - sleep: - duration: 30 - - ceph.restart: [osd.3] - - sleep: - duration: 30 - - ceph.restart: [osd.4] - - sleep: - duration: 30 - - ceph.restart: [osd.5] - - sleep: - duration: 60 - - ceph.restart: [mon.a] - - sleep: - duration: 60 - - ceph.restart: [mon.b] - - sleep: - duration: 60 - - ceph.restart: [mon.c] - - sleep: - duration: 60 - - ceph.restart: [mds.a] - - sleep: - duration: 60 - - print: "**** done ceph.restart all" diff --git a/suites/upgrade/hammer/older/4-final/+ b/suites/upgrade/hammer/older/4-final/+ deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/suites/upgrade/hammer/older/4-final/monthrash.yaml b/suites/upgrade/hammer/older/4-final/monthrash.yaml deleted file mode 100644 index 3774702f08a..00000000000 --- a/suites/upgrade/hammer/older/4-final/monthrash.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: -- mon_thrash: - revive_delay: 20 - thrash_delay: 1 -- print: "**** done mon_thrash 4-workload" -- ceph-fuse: -- print: "**** done ceph-fuse 4-workload" -- workunit: - clients: - client.0: - - suites/dbench.sh -- print: "**** done suites/dbench.sh 4-workload" - diff --git a/suites/upgrade/hammer/older/4-final/osdthrash.yaml b/suites/upgrade/hammer/older/4-final/osdthrash.yaml deleted file mode 100644 index b4740ada16c..00000000000 --- a/suites/upgrade/hammer/older/4-final/osdthrash.yaml +++ /dev/null @@ -1,20 +0,0 @@ -overrides: - ceph: - log-whitelist: - - wrongly marked me down - - objects unfound and apparently lost - - log bound mismatch - - failed to encode -tasks: -- sequential: - - thrashosds: - timeout: 1200 - chance_pgnum_grow: 1 - chance_pgpnum_fix: 1 - - print: "**** done thrashosds 4-workload" - - workunit: - clients: - client.0: - - suites/iogen.sh - - print: "**** done suites/iogen.sh 4-workload" - diff --git a/suites/upgrade/hammer/older/4-final/testrados.yaml b/suites/upgrade/hammer/older/4-final/testrados.yaml deleted file mode 100644 index 71865207eb0..00000000000 --- a/suites/upgrade/hammer/older/4-final/testrados.yaml +++ /dev/null @@ -1,13 +0,0 @@ -tasks: -- sequential: - - rados: - clients: [client.0] - ops: 2000 - objects: 50 - op_weights: - read: 100 - write: 100 - delete: 50 - snap_create: 50 - snap_remove: 50 - rollback: 50 diff --git a/suites/upgrade/hammer/older/distros b/suites/upgrade/hammer/older/distros deleted file mode 120000 index ca99fee94fa..00000000000 --- a/suites/upgrade/hammer/older/distros +++ /dev/null @@ -1 +0,0 @@ -../../../../distros/supported/ \ No newline at end of file diff --git a/tasks/__init__.py b/tasks/__init__.py deleted file mode 100644 index 9a7949a001e..00000000000 --- a/tasks/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -import logging - -# Inherit teuthology's log level -teuthology_log = logging.getLogger('teuthology') -log = logging.getLogger(__name__) -log.setLevel(teuthology_log.level) diff --git a/tasks/admin_socket.py b/tasks/admin_socket.py deleted file mode 100644 index 44235385a8a..00000000000 --- a/tasks/admin_socket.py +++ /dev/null @@ -1,192 +0,0 @@ -""" -Admin Socket task -- used in rados, powercycle, and smoke testing -""" -from cStringIO import StringIO - -import json -import logging -import os -import time - -from teuthology.orchestra import run -from teuthology import misc as teuthology -from teuthology.parallel import parallel - -log = logging.getLogger(__name__) - - -def task(ctx, config): - """ - Run an admin socket command, make sure the output is json, and run - a test program on it. The test program should read json from - stdin. This task succeeds if the test program exits with status 0. - - To run the same test on all clients:: - - tasks: - - ceph: - - rados: - - admin_socket: - all: - dump_requests: - test: http://example.com/script - - To restrict it to certain clients:: - - tasks: - - ceph: - - rados: [client.1] - - admin_socket: - client.1: - dump_requests: - test: http://example.com/script - - If an admin socket command has arguments, they can be specified as - a list:: - - tasks: - - ceph: - - rados: [client.0] - - admin_socket: - client.0: - dump_requests: - test: http://example.com/script - help: - test: http://example.com/test_help_version - args: [version] - - Note that there must be a ceph client with an admin socket running - before this task is run. The tests are parallelized at the client - level. Tests for a single client are run serially. - - :param ctx: Context - :param config: Configuration - """ - assert isinstance(config, dict), \ - 'admin_socket task requires a dict for configuration' - teuthology.replace_all_with_clients(ctx.cluster, config) - - with parallel() as ptask: - for client, tests in config.iteritems(): - ptask.spawn(_run_tests, ctx, client, tests) - - -def _socket_command(ctx, remote, socket_path, command, args): - """ - Run an admin socket command and return the result as a string. - - :param ctx: Context - :param remote: Remote site - :param socket_path: path to socket - :param command: command to be run remotely - :param args: command arguments - - :returns: output of command in json format - """ - json_fp = StringIO() - testdir = teuthology.get_testdir(ctx) - max_tries = 120 - while True: - proc = remote.run( - args=[ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'ceph', - '--admin-daemon', socket_path, - ] + command.split(' ') + args, - stdout=json_fp, - check_status=False, - ) - if proc.exitstatus == 0: - break - assert max_tries > 0 - max_tries -= 1 - log.info('ceph cli returned an error, command not registered yet?') - log.info('sleeping and retrying ...') - time.sleep(1) - out = json_fp.getvalue() - json_fp.close() - log.debug('admin socket command %s returned %s', command, out) - return json.loads(out) - -def _run_tests(ctx, client, tests): - """ - Create a temp directory and wait for a client socket to be created. - For each test, copy the executable locally and run the test. - Remove temp directory when finished. - - :param ctx: Context - :param client: client machine to run the test - :param tests: list of tests to run - """ - testdir = teuthology.get_testdir(ctx) - log.debug('Running admin socket tests on %s', client) - (remote,) = ctx.cluster.only(client).remotes.iterkeys() - socket_path = '/var/run/ceph/ceph-{name}.asok'.format(name=client) - overrides = ctx.config.get('overrides', {}).get('admin_socket', {}) - - try: - tmp_dir = os.path.join( - testdir, - 'admin_socket_{client}'.format(client=client), - ) - remote.run( - args=[ - 'mkdir', - '--', - tmp_dir, - run.Raw('&&'), - # wait for client process to create the socket - 'while', 'test', '!', '-e', socket_path, run.Raw(';'), - 'do', 'sleep', '1', run.Raw(';'), 'done', - ], - ) - - for command, config in tests.iteritems(): - if config is None: - config = {} - teuthology.deep_merge(config, overrides) - log.debug('Testing %s with config %s', command, str(config)) - - test_path = None - if 'test' in config: - url = config['test'].format( - branch=config.get('branch', 'master') - ) - test_path = os.path.join(tmp_dir, command) - remote.run( - args=[ - 'wget', - '-q', - '-O', - test_path, - '--', - url, - run.Raw('&&'), - 'chmod', - 'u=rx', - '--', - test_path, - ], - ) - - args = config.get('args', []) - assert isinstance(args, list), \ - 'admin socket command args must be a list' - sock_out = _socket_command(ctx, remote, socket_path, command, args) - if test_path is not None: - remote.run( - args=[ - test_path, - ], - stdin=json.dumps(sock_out), - ) - - finally: - remote.run( - args=[ - 'rm', '-rf', '--', tmp_dir, - ], - ) diff --git a/tasks/apache.conf.template b/tasks/apache.conf.template deleted file mode 100644 index 87426f67ca5..00000000000 --- a/tasks/apache.conf.template +++ /dev/null @@ -1,60 +0,0 @@ - - LoadModule version_module {mod_path}/mod_version.so - - - LoadModule env_module {mod_path}/mod_env.so - - - LoadModule rewrite_module {mod_path}/mod_rewrite.so - - - LoadModule fastcgi_module {mod_path}/mod_fastcgi.so - - - LoadModule log_config_module {mod_path}/mod_log_config.so - - -Listen {port} -ServerName {host} - -= 2.4> - - LoadModule unixd_module {mod_path}/mod_unixd.so - - - LoadModule authz_core_module {mod_path}/mod_authz_core.so - - - LoadModule mpm_worker_module {mod_path}/mod_mpm_worker.so - - User {user} - Group {group} - - -ServerRoot {testdir}/apache -ErrorLog {testdir}/archive/apache.{client}/error.log -LogFormat "%h l %u %t \"%r\" %>s %b \"{{Referer}}i\" \"%{{User-agent}}i\"" combined -CustomLog {testdir}/archive/apache.{client}/access.log combined -PidFile {testdir}/apache/tmp.{client}/apache.pid -DocumentRoot {testdir}/apache/htdocs.{client} -FastCgiIPCDir {testdir}/apache/tmp.{client}/fastcgi_sock -FastCgiExternalServer {testdir}/apache/htdocs.{client}/rgw.fcgi -socket rgw_sock -idle-timeout {idle_timeout} -RewriteEngine On - -RewriteRule ^/([a-zA-Z0-9-_.]*)([/]?.*) /rgw.fcgi?page=$1¶ms=$2&%{{QUERY_STRING}} [E=HTTP_AUTHORIZATION:%{{HTTP:Authorization}},L] - -# Set fastcgi environment variables. -# Note that this is separate from Unix environment variables! -SetEnv RGW_LOG_LEVEL 20 -SetEnv RGW_SHOULD_LOG yes -SetEnv RGW_PRINT_CONTINUE {print_continue} - - - Options +ExecCGI - AllowOverride All - SetHandler fastcgi-script - - -AllowEncodedSlashes On -ServerSignature Off -MaxRequestsPerChild 0 diff --git a/tasks/autotest.py b/tasks/autotest.py deleted file mode 100644 index efa972123d2..00000000000 --- a/tasks/autotest.py +++ /dev/null @@ -1,166 +0,0 @@ -""" -Run an autotest test on the ceph cluster. -""" -import json -import logging -import os - -from teuthology import misc as teuthology -from teuthology.parallel import parallel -from teuthology.orchestra import run - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Run an autotest test on the ceph cluster. - - Only autotest client tests are supported. - - The config is a mapping from role name to list of tests to run on - that client. - - For example:: - - tasks: - - ceph: - - ceph-fuse: [client.0, client.1] - - autotest: - client.0: [dbench] - client.1: [bonnie] - - You can also specify a list of tests to run on all clients:: - - tasks: - - ceph: - - ceph-fuse: - - autotest: - all: [dbench] - """ - assert isinstance(config, dict) - config = teuthology.replace_all_with_clients(ctx.cluster, config) - log.info('Setting up autotest...') - testdir = teuthology.get_testdir(ctx) - with parallel() as p: - for role in config.iterkeys(): - (remote,) = ctx.cluster.only(role).remotes.keys() - p.spawn(_download, testdir, remote) - - log.info('Making a separate scratch dir for every client...') - for role in config.iterkeys(): - assert isinstance(role, basestring) - PREFIX = 'client.' - assert role.startswith(PREFIX) - id_ = role[len(PREFIX):] - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) - scratch = os.path.join(mnt, 'client.{id}'.format(id=id_)) - remote.run( - args=[ - 'sudo', - 'install', - '-d', - '-m', '0755', - '--owner={user}'.format(user='ubuntu'), #TODO - '--', - scratch, - ], - ) - - with parallel() as p: - for role, tests in config.iteritems(): - (remote,) = ctx.cluster.only(role).remotes.keys() - p.spawn(_run_tests, testdir, remote, role, tests) - -def _download(testdir, remote): - """ - Download. Does not explicitly support muliple tasks in a single run. - """ - remote.run( - args=[ - # explicitly does not support multiple autotest tasks - # in a single run; the result archival would conflict - 'mkdir', '{tdir}/archive/autotest'.format(tdir=testdir), - run.Raw('&&'), - 'mkdir', '{tdir}/autotest'.format(tdir=testdir), - run.Raw('&&'), - 'wget', - '-nv', - '--no-check-certificate', - 'https://github.com/ceph/autotest/tarball/ceph', - '-O-', - run.Raw('|'), - 'tar', - '-C', '{tdir}/autotest'.format(tdir=testdir), - '-x', - '-z', - '-f-', - '--strip-components=1', - ], - ) - -def _run_tests(testdir, remote, role, tests): - """ - Spawned to run test on remote site - """ - assert isinstance(role, basestring) - PREFIX = 'client.' - assert role.startswith(PREFIX) - id_ = role[len(PREFIX):] - mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) - scratch = os.path.join(mnt, 'client.{id}'.format(id=id_)) - - assert isinstance(tests, list) - for idx, testname in enumerate(tests): - log.info('Running autotest client test #%d: %s...', idx, testname) - - tag = 'client.{id}.num{idx}.{testname}'.format( - idx=idx, - testname=testname, - id=id_, - ) - control = '{tdir}/control.{tag}'.format(tdir=testdir, tag=tag) - teuthology.write_file( - remote=remote, - path=control, - data='import json; data=json.loads({data!r}); job.run_test(**data)'.format( - data=json.dumps(dict( - url=testname, - dir=scratch, - # TODO perhaps tag - # results will be in {testdir}/autotest/client/results/dbench - # or {testdir}/autotest/client/results/dbench.{tag} - )), - ), - ) - remote.run( - args=[ - '{tdir}/autotest/client/bin/autotest'.format(tdir=testdir), - '--verbose', - '--harness=simple', - '--tag={tag}'.format(tag=tag), - control, - run.Raw('3>&1'), - ], - ) - - remote.run( - args=[ - 'rm', '-rf', '--', control, - ], - ) - - remote.run( - args=[ - 'mv', - '--', - '{tdir}/autotest/client/results/{tag}'.format(tdir=testdir, tag=tag), - '{tdir}/archive/autotest/{tag}'.format(tdir=testdir, tag=tag), - ], - ) - - remote.run( - args=[ - 'rm', '-rf', '--', '{tdir}/autotest'.format(tdir=testdir), - ], - ) diff --git a/tasks/blktrace.py b/tasks/blktrace.py deleted file mode 100644 index 401f9e39f64..00000000000 --- a/tasks/blktrace.py +++ /dev/null @@ -1,93 +0,0 @@ -""" -Run blktrace program through teuthology -""" -import contextlib -import logging - -from teuthology import misc as teuthology -from teuthology import contextutil -from teuthology.orchestra import run - -log = logging.getLogger(__name__) -blktrace = '/usr/sbin/blktrace' -daemon_signal = 'term' - -@contextlib.contextmanager -def setup(ctx, config): - """ - Setup all the remotes - """ - osds = ctx.cluster.only(teuthology.is_type('osd')) - log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=teuthology.get_testdir(ctx)) - - for remote, roles_for_host in osds.remotes.iteritems(): - log.info('Creating %s on %s' % (log_dir, remote.name)) - remote.run( - args=['mkdir', '-p', '-m0755', '--', log_dir], - wait=False, - ) - yield - -@contextlib.contextmanager -def execute(ctx, config): - """ - Run the blktrace program on remote machines. - """ - procs = [] - testdir = teuthology.get_testdir(ctx) - log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir) - - osds = ctx.cluster.only(teuthology.is_type('osd')) - for remote, roles_for_host in osds.remotes.iteritems(): - roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote] - for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): - if roles_to_devs.get(id_): - dev = roles_to_devs[id_] - log.info("running blktrace on %s: %s" % (remote.name, dev)) - - proc = remote.run( - args=[ - 'cd', - log_dir, - run.Raw(';'), - 'daemon-helper', - daemon_signal, - 'sudo', - blktrace, - '-o', - dev.rsplit("/", 1)[1], - '-d', - dev, - ], - wait=False, - stdin=run.PIPE, - ) - procs.append(proc) - try: - yield - finally: - osds = ctx.cluster.only(teuthology.is_type('osd')) - log.info('stopping blktrace processs') - for proc in procs: - proc.stdin.close() - -@contextlib.contextmanager -def task(ctx, config): - """ - Usage: - blktrace: - - Runs blktrace on all clients. - """ - if config is None: - config = dict(('client.{id}'.format(id=id_), None) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')) - elif isinstance(config, list): - config = dict.fromkeys(config) - - with contextutil.nested( - lambda: setup(ctx=ctx, config=config), - lambda: execute(ctx=ctx, config=config), - ): - yield - diff --git a/tasks/boto.cfg.template b/tasks/boto.cfg.template deleted file mode 100644 index cdfe8873b42..00000000000 --- a/tasks/boto.cfg.template +++ /dev/null @@ -1,2 +0,0 @@ -[Boto] -http_socket_timeout = {idle_timeout} diff --git a/tasks/buildpackages.py b/tasks/buildpackages.py deleted file mode 100644 index b7d6cee4f01..00000000000 --- a/tasks/buildpackages.py +++ /dev/null @@ -1,223 +0,0 @@ -""" -Build ceph packages - -Unit tests: - -py.test -v -s tests/test_buildpackages.py - -Integration tests: - -teuthology-openstack --verbose --key-name myself --key-filename ~/Downloads/myself --ceph infernalis --suite teuthology/buildpackages - -""" -import copy -import logging -import os -import types -from teuthology import packaging -from teuthology import misc -from teuthology.config import config as teuth_config -from teuthology.openstack import OpenStack - -log = logging.getLogger(__name__) - -class LocalGitbuilderProject(packaging.GitbuilderProject): - - def __init__(self): - pass - - -def get_pkg_type(os_type): - if os_type in ('centos', 'fedora', 'opensuse', 'rhel', 'sles'): - return 'rpm' - else: - return 'deb' - -def apply_overrides(ctx, config): - if config is None: - config = {} - else: - config = copy.deepcopy(config) - - assert isinstance(config, dict), \ - "task install only supports a dictionary for configuration" - - project, = config.get('project', 'ceph'), - log.debug('project %s' % project) - overrides = ctx.config.get('overrides') - if overrides: - install_overrides = overrides.get('install', {}) - misc.deep_merge(config, install_overrides.get(project, {})) - return config - -def get_config_install(ctx, config): - config = apply_overrides(ctx, config) - log.debug('install config %s' % config) - return [(config.get('flavor', 'basic'), - config.get('tag', ''), - config.get('branch', ''), - config.get('sha1'))] - -def get_config_install_upgrade(ctx, config): - log.debug('install.upgrade config before override %s' % config) - configs = [] - for (role, role_config) in config.iteritems(): - if role_config is None: - role_config = {} - o = apply_overrides(ctx, role_config) - - log.debug('install.upgrade config ' + str(role_config) + - ' and with overrides ' + str(o)) - # for install.upgrade overrides are actually defaults - configs.append((o.get('flavor', 'basic'), - role_config.get('tag', o.get('tag', '')), - role_config.get('branch', o.get('branch', '')), - role_config.get('sha1', o.get('sha1')))) - return configs - -GET_CONFIG_FUNCTIONS = { - 'install': get_config_install, - 'install.upgrade': get_config_install_upgrade, -} - -def lookup_configs(ctx, node): - configs = [] - if type(node) is types.ListType: - for leaf in node: - configs.extend(lookup_configs(ctx, leaf)) - elif type(node) is types.DictType: - for (key, value) in node.iteritems(): - if key in ('install', 'install.upgrade'): - configs.extend(GET_CONFIG_FUNCTIONS[key](ctx, value)) - elif key in ('overrides',): - pass - else: - configs.extend(lookup_configs(ctx, value)) - return configs - -def get_sha1(ref): - url = teuth_config.get_ceph_git_url() - ls_remote = misc.sh("git ls-remote " + url + " " + ref) - return ls_remote.split()[0] - -def task(ctx, config): - """ - Build Ceph packages. This task will automagically be run - before the task that need to install packages (this is taken - care of by the internal teuthology task). - - The config should be as follows: - - buildpackages: - good_machine: - disk: 40 # GB - ram: 48000 # MB - cpus: 16 - min_machine: - disk: 40 # GB - ram: 8000 # MB - cpus: 1 - - example: - - tasks: - - buildpackages: - good_machine: - disk: 40 # GB - ram: 15000 # MB - cpus: 16 - min_machine: - disk: 40 # GB - ram: 8000 # MB - cpus: 1 - - install: - - When a buildpackages task is already included, the values it contains can be - overriden with: - - overrides: - buildpackages: - good_machine: - disk: 20 # GB - ram: 2000 # MB - cpus: 2 - min_machine: - disk: 10 # GB - ram: 1000 # MB - cpus: 1 - - """ - log.info('Beginning buildpackages...') - if config is None: - config = {} - assert isinstance(config, dict), \ - 'task only accepts a dict for config not ' + str(config) - overrides = ctx.config.get('overrides', {}) - misc.deep_merge(config, overrides.get('buildpackages', {})) - d = os.path.join(os.path.dirname(__file__), 'buildpackages') - os_type = misc.get_distro(ctx) - os_version = misc.get_distro_version(ctx) - arch = ctx.config.get('arch', OpenStack().get_default_arch()) - dist = LocalGitbuilderProject()._get_distro(distro=os_type, - version=os_version) - pkg_type = get_pkg_type(os_type) - misc.sh( - "flock --close /tmp/buildpackages " + - "make -C " + d + " " + os.environ['HOME'] + "/.ssh_agent") - for (flavor, tag, branch, sha1) in lookup_configs(ctx, ctx.config): - if tag: - sha1 = get_sha1(tag) - elif branch: - sha1 = get_sha1(branch) - log.info("building flavor = " + flavor + "," + - " tag = " + tag + "," + - " branch = " + branch + "," + - " sha1 = " + sha1) - target = ('ceph-' + - pkg_type + '-' + - dist + '-' + - arch + '-' + - flavor + '-' + - sha1) - openstack = OpenStack() - openstack.set_provider() - if openstack.provider == 'ovh': - select = '^(vps|hg)-.*ssd' - else: - select = '' - network = openstack.net() - if network != "": - network = " OPENSTACK_NETWORK='" + network + "' " - openstack.image(os_type, os_version, arch) # create if it does not exist - build_flavor = openstack.flavor_range( - config['min_machine'], config['good_machine'], arch, select) - default_arch = openstack.get_default_arch() - http_flavor = openstack.flavor({ - 'disk': 30, # GB - 'ram': 1024, # MB - 'cpus': 1, - }, default_arch, select) - lock = "/tmp/buildpackages-" + sha1 + "-" + os_type + "-" + os_version - cmd = (". " + os.environ['HOME'] + "/.ssh_agent ; " + - " flock --close " + lock + - " make -C " + d + - network + - " CEPH_GIT_URL=" + teuth_config.get_ceph_git_url() + - " CEPH_PKG_TYPE=" + pkg_type + - " CEPH_OS_TYPE=" + os_type + - " CEPH_OS_VERSION=" + os_version + - " CEPH_DIST=" + dist + - " CEPH_ARCH=" + arch + - " CEPH_SHA1=" + sha1 + - " CEPH_TAG=" + tag + - " CEPH_BRANCH=" + branch + - " CEPH_FLAVOR=" + flavor + - " BUILD_FLAVOR=" + build_flavor + - " HTTP_FLAVOR=" + http_flavor + - " HTTP_ARCH=" + default_arch + - " " + target + - " ") - log.info("buildpackages: " + cmd) - misc.sh(cmd) - teuth_config.gitbuilder_host = openstack.get_ip('packages-repository', '') - log.info('Finished buildpackages') diff --git a/tasks/buildpackages/Makefile b/tasks/buildpackages/Makefile deleted file mode 100644 index de20fbb7551..00000000000 --- a/tasks/buildpackages/Makefile +++ /dev/null @@ -1,81 +0,0 @@ -SHELL=/bin/bash -D=/tmp/stampsdir -VPATH=${D} -TIMEOUT_SERVER_CREATE = 30m -TIMEOUT_BUILD = 220m # 20 minutes short of 4 hours -PKG_REPO=packages-repository -PKG_REPO_OS_TYPE=ubuntu -PKG_REPO_OS_VERSION=14.04 -PKG_REPO_USER_DATA=${PKG_REPO_OS_TYPE}-${PKG_REPO_OS_VERSION}-user-data.txt - -# We want to extract the first listed IPv4 address! -# Openstack will provide the addresses field in this format: -# "net1-name=ip(, ip)+(; net2-name=ip(, ip)+)+" -# Each IP may be v4 or v6 (including shortened forms and IPv4-mapped-IPv6 forms) -# 1.2.3.4 -# 2001:db8:6050:ed4d:f816:3eff:fe48:3b36 -# 2001:db8::fe48:3b36 -# 2001:db8::1.2.3.4 -# Example long-form input: -# private-network=10.10.10.69, 2001:db8:6050:ed4d:f816:3eff:fed1:d9f8;net-name2=2001:db8::fe48:3b36, 2001:db8::1.2.3.4, 1.2.3.4; -# TODO: allow selection of the network instead of taking the first network -# TODO: Support IPv6 in future -define get_ip -$$(openstack server show -f value -c addresses $(1) |perl -pe 's/^[^=]+=([^;]+).*/\1/g; s/[ ,]/\n/g; ' |grep -v -e ':' -e '^$$' |head -n1) -endef - -MY_IP=$(shell hostname -I | cut -f1 -d' ') - -${HOME}/.ssh_agent: - ssh-agent -s > ${HOME}/.ssh_agent - source ${HOME}/.ssh_agent ; ssh-add ; ssh-add -l - grep -q ssh_agent ~/.bashrc_teuthology || echo 'source ${HOME}/.ssh_agent' >> ~/.bashrc_teuthology - -flock-${PKG_REPO}: - timeout $(TIMEOUT_SERVER_CREATE) openstack server create --image 'teuthology-ubuntu-14.04-${HTTP_ARCH}' ${OPENSTACK_NETWORK} --flavor ${HTTP_FLAVOR} --key-name teuthology --security-group teuthology --property ownedby=${MY_IP} --user-data ${PKG_REPO_USER_DATA} --wait ${PKG_REPO} - sleep 30 - set -ex ; \ - ip=$(call get_ip,${PKG_REPO}) ; \ - for delay in 1 2 4 8 8 8 8 8 8 8 8 8 16 16 16 16 16 32 32 32 64 128 256 512 ; do if ssh -o 'ConnectTimeout=3' $$ip bash -c '"grep -q READYTORUN /var/log/cloud-init*.log"' ; then break ; else sleep $$delay ; fi ; done ; \ - ssh $$ip sudo apt-get update ; \ - ssh $$ip sudo apt-get install -y nginx rsync && \ - ssh $$ip sudo chown -R ubuntu /usr/share/nginx/html && \ - ssh $$ip sudo rm /usr/share/nginx/html/\* && \ - ssh $$ip sudo perl -pi -e '"s|location / {|location / { autoindex on;|"' /etc/nginx/sites-available/default && \ - ssh $$ip sudo /etc/init.d/nginx restart && \ - perl -pi -e "s/^gitbuilder_host:.*/gitbuilder_host: $$ip/" ~/.teuthology.yaml - touch ${D}/$@ - -${PKG_REPO}: - mkdir -p ${D} - flock --close ${D}/flock-$@.lock ${MAKE} flock-$@ - touch ${D}/$@ - -# Just because 'server create' return success does not mean it actually succeeded! -# Check the server status before we proceed. -# If it's a weird status, bail out and let the delete fire -# eg: ERROR status can happen if there is no VM host without enough capacity for the request. -ceph-${CEPH_PKG_TYPE}-${CEPH_DIST}-${CEPH_ARCH}-${CEPH_FLAVOR}-${CEPH_SHA1}: ${PKG_REPO} - timeout $(TIMEOUT_SERVER_CREATE) openstack server create --image 'teuthology-${CEPH_OS_TYPE}-${CEPH_OS_VERSION}-${CEPH_ARCH}' ${OPENSTACK_NETWORK} --flavor ${BUILD_FLAVOR} --key-name teuthology --security-group teuthology --property ownedby=${MY_IP} --user-data ${CEPH_OS_TYPE}-${CEPH_OS_VERSION}-user-data.txt --wait $@ - set -ex ; \ - trap "openstack server delete --wait $@" EXIT ; \ - for delay in 30 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 ; do \ - status=$$(openstack server show -c status -f value $@) ; \ - case $$status in \ - ACTIVE) break ;; \ - NOSTATE|*BUILD|*BOOT|*RESIZE) sleep $$delay ;; \ - *) exit 1 ;; \ - esac ; \ - done ; \ - ip=$(call get_ip,$@) ; \ - test -n "$$ip" || exit ; \ - for delay in 1 2 4 8 8 8 8 8 8 8 8 8 16 16 16 16 16 32 32 32 64 128 256 512 ; do if ssh -o 'ConnectTimeout=3' $$ip bash -c '"grep -q READYTORUN /var/log/cloud-init*.log"' ; then break ; else sleep $$delay ; fi ; done ; \ - scp make-${CEPH_PKG_TYPE}.sh common.sh ubuntu@$$ip: ; \ - packages_repository=$(call get_ip,${> /etc/ssh/sshd_config - - ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo -preserve_hostname: true -system_info: - default_user: - name: ubuntu -packages: - - dracut-modules-growroot -runcmd: - - mkinitrd --force /boot/initramfs-2.6.32-573.3.1.el6.x86_64.img 2.6.32-573.3.1.el6.x86_64 - - reboot -final_message: "READYTORUN" diff --git a/tasks/buildpackages/centos-7.0-user-data.txt b/tasks/buildpackages/centos-7.0-user-data.txt deleted file mode 120000 index 2eb0e3c88dd..00000000000 --- a/tasks/buildpackages/centos-7.0-user-data.txt +++ /dev/null @@ -1 +0,0 @@ -user-data.txt \ No newline at end of file diff --git a/tasks/buildpackages/centos-7.1-user-data.txt b/tasks/buildpackages/centos-7.1-user-data.txt deleted file mode 120000 index 2eb0e3c88dd..00000000000 --- a/tasks/buildpackages/centos-7.1-user-data.txt +++ /dev/null @@ -1 +0,0 @@ -user-data.txt \ No newline at end of file diff --git a/tasks/buildpackages/centos-7.2-user-data.txt b/tasks/buildpackages/centos-7.2-user-data.txt deleted file mode 120000 index 2eb0e3c88dd..00000000000 --- a/tasks/buildpackages/centos-7.2-user-data.txt +++ /dev/null @@ -1 +0,0 @@ -user-data.txt \ No newline at end of file diff --git a/tasks/buildpackages/common.sh b/tasks/buildpackages/common.sh deleted file mode 100644 index eb9bc646678..00000000000 --- a/tasks/buildpackages/common.sh +++ /dev/null @@ -1,163 +0,0 @@ -#!/bin/bash -# -# Copyright (C) 2015 Red Hat -# -# Author: Loic Dachary -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Library Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library Public License for more details. -# -function install_deps() { - git archive --remote=git://git.ceph.com/ceph.git master install-deps.sh | tar -xvf - - # - # drop the following hack when trusty is not supported anymore - # there is no other way as long as we maintain a debian directory that tries - # to be the same for all distributions - # - if grep --quiet 14.04 /etc/issue 2>/dev/null && sudo apt-get install --force-yes -qq -y dpkg-dev && test "$(dpkg-architecture -qDEB_BUILD_GNU_CPU 2>/dev/null)" = aarch64 ; then - sed -i -e '/libgoogle-perftools-dev/d' debian/control - fi - bash -x install-deps.sh -} - -function git_submodules() { - # see http://tracker.ceph.com/issues/13426 - perl -pi -e 's|git://ceph.com/git/ceph-object-corpus.git|https://github.com/ceph/ceph-object-corpus.git|' .gitmodules - local force=$(if git submodule usage 2>&1 | grep --quiet 'update.*--force'; then echo --force ; fi) - git submodule sync || return 1 - git submodule update $force --init --recursive || return 1 -} - -function get_ceph() { - local git_ceph_url=$1 - local sha1=$2 - - test -d ceph || git clone ${git_ceph_url} ceph - cd ceph - if test -d src ; then # so we don't try to fetch when using a fixture - git fetch --tags http://github.com/ceph/ceph - fi - git fetch --tags ${git_ceph_url} - git checkout ${sha1} -} - -function init_ceph() { - local git_ceph_url=$1 - local sha1=$2 - get_ceph $git_ceph_url $sha1 || return 1 - git_submodules || return 1 - install_deps || return 1 -} - -function flavor2configure() { - local flavor=$1 - - eval $(dpkg-architecture) - - if test $flavor = notcmalloc || test "$DEB_HOST_GNU_CPU" = aarch64 ; then - echo --without-tcmalloc --without-cryptopp - fi -} - -# -# for a given $sha1 in the $ceph_dir repository, lookup all references -# from the remote origin and tags matching the sha1. Add a symbolic -# link in $ref_dir to the $sha1 for each reference found. If the -# reference is a tag, also add a symbolic link to the commit to which -# the tag points, if it is an annotated tag. -# -function link_same() { - local ref_dir=$1 - local ceph_dir=$2 - local sha1=$3 - - mkdir -p $ref_dir - ( - cd ${ceph_dir} - git for-each-ref refs/tags/** refs/remotes/origin/** | grep $sha1 | \ - while read sha1 type ref ; do - if test $type = 'tag' ; then - commit_sha1=$(git rev-parse $ref^{commit}) - if test $commit_sha1 != $sha1 ; then - echo ../sha1/$sha1 ../sha1/$commit_sha1 - fi - fi - echo ../sha1/$sha1 $(basename $ref) - done - ) | while read from to ; do - ( cd $ref_dir ; ln -sf $from $to ) - done -} - -function test_link_same() { - local d=/tmp/link_same$$ - mkdir -p $d/primary - cd $d/primary - git init - touch a ; git add a ; git commit -m 'm' a - git tag tag1 - tag1=$(git rev-parse HEAD) - git branch branch1 - touch b ; git add b ; git commit -m 'm' b - git tag --annotate -m 'a' tag2 - tag2=$(git rev-parse tag2) - sha1_tag2=$(git rev-parse tag2^{commit}) - git branch branch2 - touch c ; git add c ; git commit -m 'm' c - git branch branch3 - sha1_branch3=$(git rev-parse branch3) - - git clone $d/primary $d/secondary - cd $d/secondary - mkdir $d/ref $d/sha1 - - touch $d/sha1/$sha1_branch3 - link_same $d/ref $d/secondary $sha1_branch3 - test $(readlink --canonicalize $d/ref/branch3) = $d/sha1/$sha1_branch3 || return 1 - test $(readlink --canonicalize $d/ref/master) = $d/sha1/$sha1_branch3 || return 1 - - touch $d/sha1/$tag2 - link_same $d/ref $d/secondary $tag2 - test $(readlink --canonicalize $d/ref/tag2) = $d/sha1/$tag2 || return 1 - test $(readlink --canonicalize $d/sha1/$sha1_tag2) = $d/sha1/$tag2 || return 1 - - touch $d/sha1/$tag1 - link_same $d/ref $d/secondary $tag1 - test $(readlink --canonicalize $d/ref/tag1) = $d/sha1/$tag1 || return 1 - test $(readlink --canonicalize $d/ref/branch1) = $d/sha1/$tag1 || return 1 - - rm -fr $d -} - -function maybe_parallel() { - local nproc=$1 - local vers=$2 - - if echo $vers | grep --quiet '0\.67' ; then - return - fi - - if test $nproc -gt 1 ; then - echo -j${nproc} - fi -} - -function test_maybe_parallel() { - test "$(maybe_parallel 1 0.72)" = "" || return 1 - test "$(maybe_parallel 8 0.67)" = "" || return 1 - test "$(maybe_parallel 8 0.72)" = "-j8" || return 1 -} - -if test "$1" = "TEST" ; then - shopt -s -o xtrace - PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: ' - test_link_same - test_maybe_parallel -fi diff --git a/tasks/buildpackages/debian-8.0-user-data.txt b/tasks/buildpackages/debian-8.0-user-data.txt deleted file mode 100644 index 13aba98763a..00000000000 --- a/tasks/buildpackages/debian-8.0-user-data.txt +++ /dev/null @@ -1,12 +0,0 @@ -#cloud-config -bootcmd: - - echo 'APT::Get::AllowUnauthenticated "true";' | tee /etc/apt/apt.conf.d/99disablesigs - - echo nameserver 8.8.8.8 | tee -a /etc/resolv.conf # last resort, in case the DHCP server does not provide a resolver -manage_etc_hosts: true -preserve_hostname: true -system_info: - default_user: - name: ubuntu -runcmd: - - echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers -final_message: "READYTORUN" diff --git a/tasks/buildpackages/make-deb.sh b/tasks/buildpackages/make-deb.sh deleted file mode 100755 index db9df06ffb5..00000000000 --- a/tasks/buildpackages/make-deb.sh +++ /dev/null @@ -1,150 +0,0 @@ -#!/bin/bash -# -# Copyright (C) 2015 Red Hat -# -# Author: Loic Dachary -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Library Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library Public License for more details. -# - -# -# Create and upload a deb repository with the same naming conventions -# as https://github.com/ceph/autobuild-ceph/blob/master/build-ceph-deb.sh -# -set -xe - -base=/tmp/release -gitbuilder_host=$1 -codename=$2 -git_ceph_url=$3 -sha1=$4 -flavor=$5 -arch=$6 - -sudo apt-get update -sudo apt-get install -y git - -source $(dirname $0)/common.sh - -init_ceph $git_ceph_url $sha1 - -#codename=$(lsb_release -sc) -releasedir=$base/$(lsb_release -si)/WORKDIR -# -# git describe provides a version that is -# a) human readable -# b) is unique for each commit -# c) compares higher than any previous commit -# d) contains the short hash of the commit -# -vers=$(git describe --match "v*" | sed s/^v//) -# -# always set the debian version to 1 which is ok because the debian -# directory is included in the sources and the upstream version will -# change each time it is modified. -# -dvers="$vers-1" -: ${NPROC:=$(nproc)} -ceph_dir=$(pwd) - -function build_package() { - - rm -fr $releasedir - mkdir -p $releasedir - # - # remove all files not under git so they are not - # included in the distribution. - # - git clean -qdxff - # - # creating the distribution tarbal requires some configure - # options (otherwise parts of the source tree will be left out). - # - ./autogen.sh - # Building with LTTNG on Ubuntu Precise is not possible. - # It fails the LTTNG-is-sane check (it misses headers) - # And the Debian rules files leave it out anyway - case $codename in - precise) lttng_opt="--without-lttng" ;; - *) lttng_opt="--with-lttng" ;; - esac - ./configure $(flavor2configure $flavor) \ - --with-rocksdb --with-ocf \ - --with-nss --with-debug --enable-cephfs-java \ - $lttng_opt --with-babeltrace - # - # use distdir= to set the name of the top level directory of the - # tarbal to match the desired version - # - make distdir=ceph-$vers dist - # - # rename the tarbal to match debian conventions and extract it - # - mv ceph-$vers.tar.gz $releasedir/ceph_$vers.orig.tar.gz - tar -C $releasedir -zxf $releasedir/ceph_$vers.orig.tar.gz - # - # copy the debian directory over - # - cp -a debian $releasedir/ceph-$vers/debian - cd $releasedir - # - # uncomment to remove -dbg packages - # because they are large and take time to build - # - #perl -ni -e 'print if(!(/^Package: .*-dbg$/../^$/))' ceph-$vers/debian/control - #perl -pi -e 's/--dbg-package.*//' ceph-$vers/debian/rules - # - # update the changelog to match the desired version - # - cd ceph-$vers - local chvers=$(head -1 debian/changelog | perl -ne 's/.*\(//; s/\).*//; print') - if [ "$chvers" != "$dvers" ]; then - DEBEMAIL="contact@ceph.com" dch -D $codename --force-distribution -b -v "$dvers" "new version" - fi - # - # create the packages (with ccache) - # - export CEPH_EXTRA_CONFIGURE_ARGS=$(flavor2configure $flavor) - j=$(maybe_parallel $NPROC $vers) - PATH=/usr/lib/ccache:$PATH dpkg-buildpackage $j -uc -us -sa -} - -function build_repo() { - local gitbuilder_host=$1 - - sudo apt-get install -y reprepro - cd ${releasedir}/.. - # - # Create a repository in a directory with a name structured - # as - # - base=ceph-deb-$codename-$arch-$flavor - sha1_dir=$codename/$base/sha1/$sha1 - mkdir -p $sha1_dir/conf - cat > $sha1_dir/conf/distributions < $sha1_dir/version - echo $sha1 > $sha1_dir/sha1 - link_same $codename/$base/ref $ceph_dir $sha1 - if test "$gitbuilder_host" ; then - cd $codename - sudo apt-get install -y rsync - RSYNC_RSH='ssh -o StrictHostKeyChecking=false' rsync -av $base/ $gitbuilder_host:/usr/share/nginx/html/$base/ - fi -} - -build_package -build_repo $gitbuilder_host diff --git a/tasks/buildpackages/make-rpm.sh b/tasks/buildpackages/make-rpm.sh deleted file mode 100755 index f44efc43207..00000000000 --- a/tasks/buildpackages/make-rpm.sh +++ /dev/null @@ -1,264 +0,0 @@ -#!/bin/bash -# -# Copyright (C) 2015 Red Hat -# -# Author: Loic Dachary -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Library Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library Public License for more details. -# - -# -# Create and upload a RPM repository with the same naming conventions -# as https://github.com/ceph/autobuild-ceph/blob/master/build-ceph-rpm.sh -# - -set -xe - -base=/tmp/release -gitbuilder_host=$1 -codename=$2 -git_ceph_url=$3 -sha1=$4 -flavor=$5 -arch=$6 - -suse=false -[[ $codename =~ suse ]] && suse=true - -if [ "$suse" = true ] ; then - sudo zypper -n install git -else - sudo yum install -y git -fi - -source $(dirname $0)/common.sh - -init_ceph $git_ceph_url $sha1 - -#id=$(lsb_release -s -i | tr A-Z a-z) -#major=$(lsb_release -s -r | sed -s "s;\..*;;g") -#codename="${id}${major}" -releasedir=$base/$(lsb_release -si | tr ' ' '_')/WORKDIR -# -# git describe provides a version that is -# a) human readable -# b) is unique for each commit -# c) compares higher than any previous commit -# d) contains the short hash of the commit -# -vers=$(git describe --match "v*" | sed s/^v//) -ceph_dir=$(pwd) - -# -# Create a repository in a directory with a name structured -# as -# -base=ceph-rpm-$codename-$arch-$flavor - -function setup_rpmmacros() { - if ! grep -q find_debuginfo_dwz_opts $HOME/.rpmmacros ; then - echo '%_find_debuginfo_dwz_opts %{nil}' >> $HOME/.rpmmacros - fi - if lsb_release -d -s | grep CentOS | grep -q 'release 7' ; then - if ! grep -q '%dist .el7' $HOME/.rpmmacros ; then - echo '%dist .el7' >> $HOME/.rpmmacros - fi - fi -} - -function build_package() { - rm -fr $releasedir - mkdir -p $releasedir - # - # remove all files not under git so they are not - # included in the distribution. - # - git clean -qdxff - # - # creating the distribution tarbal requires some configure - # options (otherwise parts of the source tree will be left out). - # - if [ "$suse" = true ] ; then - sudo zypper -n install bzip2 - else - sudo yum install -y bzip2 - fi - ./autogen.sh - ./configure $(flavor2configure $flavor) --with-debug --with-radosgw --with-fuse --with-libatomic-ops --with-gtk2 --with-nss - # - # use distdir= to set the name of the top level directory of the - # tarbal to match the desired version - # - make dist-bzip2 - # Set up build area - setup_rpmmacros - if [ "$suse" = true ] ; then - sudo zypper -n install rpm-build - else - sudo yum install -y rpm-build - fi - local buildarea=$releasedir - mkdir -p ${buildarea}/SOURCES - mkdir -p ${buildarea}/SRPMS - mkdir -p ${buildarea}/SPECS - cp ceph.spec ${buildarea}/SPECS - mkdir -p ${buildarea}/RPMS - mkdir -p ${buildarea}/BUILD - cp -a ceph-*.tar.bz2 ${buildarea}/SOURCES/. - cp -a rpm/*.patch ${buildarea}/SOURCES || true - ( - cd ${buildarea}/SPECS - ccache=$(echo /usr/lib*/ccache) - # Build RPMs - buildarea=`readlink -fn ${releasedir}` ### rpm wants absolute path - PATH=$ccache:$PATH rpmbuild -ba --define "_unpackaged_files_terminate_build 0" --define "_topdir ${buildarea}" ceph.spec - ) -} - -function build_rpm_release() { - local buildarea=$1 - local sha1=$2 - local gitbuilder_host=$3 - local base=$4 - - cat < ${buildarea}/SPECS/ceph-release.spec -Name: ceph-release -Version: 1 -Release: 0%{?dist} -Summary: Ceph repository configuration -Group: System Environment/Base -License: GPLv2 -URL: http://gitbuilder.ceph.com/$dist -Source0: ceph.repo -#Source0: RPM-GPG-KEY-CEPH -#Source1: ceph.repo -BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) -BuildArch: noarch - -%description -This package contains the Ceph repository GPG key as well as configuration -for yum and up2date. - -%prep - -%setup -q -c -T -install -pm 644 %{SOURCE0} . -#install -pm 644 %{SOURCE1} . - -%build - -%install -rm -rf %{buildroot} -#install -Dpm 644 %{SOURCE0} \ -# %{buildroot}/%{_sysconfdir}/pki/rpm-gpg/RPM-GPG-KEY-CEPH -%if 0%{defined suse_version} -install -dm 755 %{buildroot}/%{_sysconfdir}/zypp -install -dm 755 %{buildroot}/%{_sysconfdir}/zypp/repos.d -install -pm 644 %{SOURCE0} \ - %{buildroot}/%{_sysconfdir}/zypp/repos.d -%else -install -dm 755 %{buildroot}/%{_sysconfdir}/yum.repos.d -install -pm 644 %{SOURCE0} \ - %{buildroot}/%{_sysconfdir}/yum.repos.d -%endif - -%clean -#rm -rf %{buildroot} - -%post - -%postun - -%files -%defattr(-,root,root,-) -#%doc GPL -%if 0%{defined suse_version} -/etc/zypp/repos.d/* -%else -/etc/yum.repos.d/* -%endif -#/etc/pki/rpm-gpg/* - -%changelog -* Tue Mar 10 2013 Gary Lowell - 1-0 -- Handle both yum and zypper -- Use URL to ceph git repo for key -- remove config attribute from repo file -* Tue Aug 27 2012 Gary Lowell - 1-0 -- Initial Package -EOF - - cat < $buildarea/SOURCES/ceph.repo -[Ceph] -name=Ceph packages for \$basearch -baseurl=http://${gitbuilder_host}/${base}/sha1/${sha1}/\$basearch -enabled=1 -gpgcheck=0 -type=rpm-md - -[Ceph-noarch] -name=Ceph noarch packages -baseurl=http://${gitbuilder_host}/${base}/sha1/${sha1}/noarch -enabled=1 -gpgcheck=0 -type=rpm-md - -[ceph-source] -name=Ceph source packages -baseurl=http://${gitbuilder_host}/${base}/sha1/${sha1}/SRPMS -enabled=1 -gpgcheck=0 -type=rpm-md -EOF - - rpmbuild -bb --define "_topdir ${buildarea}" ${buildarea}/SPECS/ceph-release.spec -} - -function build_rpm_repo() { - local buildarea=$1 - local gitbuilder_host=$2 - local base=$3 - - if [ "$suse" = true ] ; then - sudo zypper -n install createrepo - else - sudo yum install -y createrepo - fi - - for dir in ${buildarea}/SRPMS ${buildarea}/RPMS/* - do - createrepo ${dir} - done - - local sha1_dir=${buildarea}/../$codename/$base/sha1/$sha1 - mkdir -p $sha1_dir - echo $vers > $sha1_dir/version - echo $sha1 > $sha1_dir/sha1 - echo ceph > $sha1_dir/name - - for dir in ${buildarea}/SRPMS ${buildarea}/RPMS/* - do - cp -fla ${dir} $sha1_dir - done - - link_same ${buildarea}/../$codename/$base/ref $ceph_dir $sha1 - if test "$gitbuilder_host" ; then - ( - cd ${buildarea}/../$codename - RSYNC_RSH='ssh -o StrictHostKeyChecking=false' rsync -av $base/ ubuntu@$gitbuilder_host:/usr/share/nginx/html/$base/ - ) - fi -} - -setup_rpmmacros -build_package -build_rpm_release $releasedir $sha1 $gitbuilder_host $base -build_rpm_repo $releasedir $gitbuilder_host $base diff --git a/tasks/buildpackages/opensuse-42.1-user-data.txt b/tasks/buildpackages/opensuse-42.1-user-data.txt deleted file mode 100644 index 190cac2b1eb..00000000000 --- a/tasks/buildpackages/opensuse-42.1-user-data.txt +++ /dev/null @@ -1,13 +0,0 @@ -#cloud-config -bootcmd: - - echo nameserver 8.8.8.8 | tee -a /etc/resolv.conf # last resort, in case the DHCP server does not provide a resolver -manage_etc_hosts: true -preserve_hostname: true -users: - - name: ubuntu - gecos: User - sudo: ["ALL=(ALL) NOPASSWD:ALL"] - groups: users -runcmd: - - ( MYHOME=/home/ubuntu ; mkdir $MYHOME/.ssh ; chmod 700 $MYHOME/.ssh ; cp /root/.ssh/authorized_keys $MYHOME/.ssh ; chown -R ubuntu.users $MYHOME/.ssh ) -final_message: "READYTORUN" diff --git a/tasks/buildpackages/ubuntu-12.04-user-data.txt b/tasks/buildpackages/ubuntu-12.04-user-data.txt deleted file mode 120000 index 2eb0e3c88dd..00000000000 --- a/tasks/buildpackages/ubuntu-12.04-user-data.txt +++ /dev/null @@ -1 +0,0 @@ -user-data.txt \ No newline at end of file diff --git a/tasks/buildpackages/ubuntu-14.04-user-data.txt b/tasks/buildpackages/ubuntu-14.04-user-data.txt deleted file mode 120000 index 2eb0e3c88dd..00000000000 --- a/tasks/buildpackages/ubuntu-14.04-user-data.txt +++ /dev/null @@ -1 +0,0 @@ -user-data.txt \ No newline at end of file diff --git a/tasks/buildpackages/ubuntu-16.04-user-data.txt b/tasks/buildpackages/ubuntu-16.04-user-data.txt deleted file mode 120000 index 2eb0e3c88dd..00000000000 --- a/tasks/buildpackages/ubuntu-16.04-user-data.txt +++ /dev/null @@ -1 +0,0 @@ -user-data.txt \ No newline at end of file diff --git a/tasks/buildpackages/user-data.txt b/tasks/buildpackages/user-data.txt deleted file mode 100644 index d5016929dac..00000000000 --- a/tasks/buildpackages/user-data.txt +++ /dev/null @@ -1,10 +0,0 @@ -#cloud-config -bootcmd: - - echo 'APT::Get::AllowUnauthenticated "true";' | tee /etc/apt/apt.conf.d/99disablesigs - - echo nameserver 8.8.8.8 | tee -a /etc/resolv.conf # last resort, in case the DHCP server does not provide a resolver -manage_etc_hosts: true -preserve_hostname: true -system_info: - default_user: - name: ubuntu -final_message: "READYTORUN" diff --git a/tasks/calamari_nosetests.py b/tasks/calamari_nosetests.py deleted file mode 100644 index 5c5b15dbecb..00000000000 --- a/tasks/calamari_nosetests.py +++ /dev/null @@ -1,281 +0,0 @@ -import contextlib -import logging -import os -import textwrap -import yaml - -from teuthology import contextutil -from teuthology import misc -from teuthology import packaging -from teuthology.orchestra import run - -log = logging.getLogger(__name__) - -# extra stuff we need to do our job here -EXTRA_PKGS = [ - 'git', -] - -# stuff that would be in a devmode install, but should be -# installed in the system for running nosetests against -# a production install. -EXTRA_NOSETEST_PKGS = [ - 'python-psutil', - 'python-mock', -] - - -def find_client0(cluster): - ''' Find remote that has client.0 role, or None ''' - for rem, roles in cluster.remotes.iteritems(): - if 'client.0' in roles: - return rem - return None - - -def pip(remote, package, venv=None, uninstall=False, force=False): - ''' {un}install a package with pip, possibly in a virtualenv ''' - if venv: - pip = os.path.join(venv, 'bin', 'pip') - args = ['sudo', pip] - else: - args = ['sudo', 'pip'] - - if uninstall: - args.extend(['uninstall', '-y']) - else: - args.append('install') - if force: - args.append('-I') - - args.append(package) - remote.run(args=args) - - -@contextlib.contextmanager -def install_epel(remote): - ''' install a disabled-by-default epel repo config file ''' - remove = False - try: - if remote.os.package_type == 'deb': - yield - else: - remove = True - distromajor = remote.os.version.split('.')[0] - - repofiledata = textwrap.dedent(''' - [epel] - name=epel{version} - metalink=http://mirrors.fedoraproject.org/metalink?repo=epel-{version}&arch=$basearch - enabled=0 - gpgcheck=0 - ''').format(version=distromajor) - - misc.create_file(remote, '/etc/yum.repos.d/epel.repo', - data=repofiledata, sudo=True) - remote.run(args='sudo yum clean all') - yield - - finally: - if remove: - misc.delete_file(remote, '/etc/yum.repos.d/epel.repo', sudo=True) - - -def enable_epel(remote, enable=True): - ''' enable/disable the epel repo ''' - args = 'sudo sed -i'.split() - if enable: - args.extend(['s/enabled=0/enabled=1/']) - else: - args.extend(['s/enabled=1/enabled=0/']) - args.extend(['/etc/yum.repos.d/epel.repo']) - - remote.run(args=args) - remote.run(args='sudo yum clean all') - - -@contextlib.contextmanager -def install_extra_pkgs(client): - ''' Install EXTRA_PKGS ''' - try: - for pkg in EXTRA_PKGS: - packaging.install_package(pkg, client) - yield - - finally: - for pkg in EXTRA_PKGS: - packaging.remove_package(pkg, client) - - -@contextlib.contextmanager -def clone_calamari(config, client): - ''' clone calamari source into current directory on remote ''' - branch = config.get('calamari_branch', 'master') - url = config.get('calamari_giturl', 'git://github.com/ceph/calamari') - try: - cmd = 'git clone -b {branch} {giturl}'.format( - branch=branch, giturl=url - ) - client.run(args=cmd) - yield - finally: - # sudo python setup.py develop may have left some root files around - client.run(args='sudo rm -rf calamari') - - -@contextlib.contextmanager -def write_info_yaml(cluster, client): - ''' write info.yaml to client for nosetests ''' - try: - info = { - 'cluster': { - rem.name: {'roles': roles} - for rem, roles in cluster.remotes.iteritems() - } - } - misc.create_file(client, 'calamari/info.yaml', - data=yaml.safe_dump(info, default_flow_style=False)) - yield - finally: - misc.delete_file(client, 'calamari/info.yaml') - - -@contextlib.contextmanager -def write_test_conf(client): - ''' write calamari/tests/test.conf to client for nosetests ''' - try: - testconf = textwrap.dedent(''' - [testing] - - calamari_control = external - ceph_control = external - bootstrap = False - api_username = admin - api_password = admin - embedded_timeout_factor = 1 - external_timeout_factor = 3 - external_cluster_path = info.yaml - ''') - misc.create_file(client, 'calamari/tests/test.conf', data=testconf) - yield - - finally: - misc.delete_file(client, 'calamari/tests/test.conf') - - -@contextlib.contextmanager -def prepare_nosetest_env(client): - try: - # extra dependencies that would be in the devmode venv - if client.os.package_type == 'rpm': - enable_epel(client, enable=True) - for package in EXTRA_NOSETEST_PKGS: - packaging.install_package(package, client) - if client.os.package_type == 'rpm': - enable_epel(client, enable=False) - - # install nose itself into the calamari venv, force it in case it's - # already installed in the system, so we can invoke it by path without - # fear that it's not present - pip(client, 'nose', venv='/opt/calamari/venv', force=True) - - # install a later version of requests into the venv as well - # (for precise) - pip(client, 'requests', venv='/opt/calamari/venv', force=True) - - # link (setup.py develop) calamari/rest-api into the production venv - # because production does not include calamari_rest.management, needed - # for test_rest_api.py's ApiIntrospection - args = 'cd calamari/rest-api'.split() + [run.Raw(';')] + \ - 'sudo /opt/calamari/venv/bin/python setup.py develop'.split() - client.run(args=args) - - # because, at least in Python 2.6/Centos, site.py uses - # 'os.path.exists()' to process .pth file entries, and exists() uses - # access(2) to check for existence, all the paths leading up to - # $HOME/calamari/rest-api need to be searchable by all users of - # the package, which will include the WSGI/Django app, running - # as the Apache user. So make them all world-read-and-execute. - args = 'sudo chmod a+x'.split() + \ - ['.', './calamari', './calamari/rest-api'] - client.run(args=args) - - # make one dummy request just to get the WSGI app to do - # all its log creation here, before the chmod below (I'm - # looking at you, graphite -- /var/log/calamari/info.log and - # /var/log/calamari/exception.log) - client.run(args='wget -q -O /dev/null http://localhost') - - # /var/log/calamari/* is root-or-apache write-only - client.run(args='sudo chmod a+w /var/log/calamari/*') - - yield - - finally: - args = 'cd calamari/rest-api'.split() + [run.Raw(';')] + \ - 'sudo /opt/calamari/venv/bin/python setup.py develop -u'.split() - client.run(args=args) - for pkg in ('nose', 'requests'): - pip(client, pkg, venv='/opt/calamari/venv', uninstall=True) - for package in EXTRA_NOSETEST_PKGS: - packaging.remove_package(package, client) - - -@contextlib.contextmanager -def run_nosetests(client): - ''' Actually run the tests ''' - args = [ - 'cd', - 'calamari', - run.Raw(';'), - 'CALAMARI_CONFIG=/etc/calamari/calamari.conf', - '/opt/calamari/venv/bin/nosetests', - '-v', - 'tests/', - ] - client.run(args=args) - yield - - -@contextlib.contextmanager -def task(ctx, config): - """ - Run Calamari tests against an instance set up by 'calamari_server'. - - -- clone the Calamari source into $HOME (see options) - -- write calamari/info.yaml describing the cluster - -- write calamari/tests/test.conf containing - 'external' for calamari_control and ceph_control - 'bootstrap = False' to disable test bootstrapping (installing minions) - no api_url necessary (inferred from client.0) - 'external_cluster_path = info.yaml' - -- modify the production Calamari install to allow test runs: - install nose in the venv - install EXTRA_NOSETEST_PKGS - link in, with setup.py develop, calamari_rest (for ApiIntrospection) - -- set CALAMARI_CONFIG to point to /etc/calamari/calamari.conf - -- nosetests -v tests/ - - Options are: - calamari_giturl: url from which to git clone calamari - (default: git://github.com/ceph/calamari) - calamari_branch: git branch of calamari to check out - (default: master) - - Note: the tests must find a clean cluster, so don't forget to - set the crush default type appropriately, or install min_size OSD hosts - """ - client0 = find_client0(ctx.cluster) - if client0 is None: - raise RuntimeError("must have client.0 role") - - with contextutil.nested( - lambda: install_epel(client0), - lambda: install_extra_pkgs(client0), - lambda: clone_calamari(config, client0), - lambda: write_info_yaml(ctx.cluster, client0), - lambda: write_test_conf(client0), - lambda: prepare_nosetest_env(client0), - lambda: run_nosetests(client0), - ): - yield diff --git a/tasks/calamari_setup.py b/tasks/calamari_setup.py deleted file mode 100644 index 09d3d40894e..00000000000 --- a/tasks/calamari_setup.py +++ /dev/null @@ -1,393 +0,0 @@ -""" -Calamari setup task -""" -import contextlib -import logging -import os -import requests -import shutil -import subprocess -import webbrowser - -from cStringIO import StringIO -from teuthology.orchestra import run -from teuthology import contextutil -from teuthology import misc - -log = logging.getLogger(__name__) - -ICE_VERSION_DEFAULT = '1.2.2' - - -@contextlib.contextmanager -def task(ctx, config): - """ - Do the setup of a calamari server. - - - calamari_setup: - version: 'v80.1' - ice_tool_dir: - iceball_location: - - Options are: - - version -- ceph version we are testing against (defaults to 80.1) - ice_tool_dir -- optional local directory where ice-tool exists or will - be loaded (defaults to src in home directory) - ice_version -- version of ICE we're testing (with default) - iceball_location -- Can be an HTTP URL, in which case fetch from this - location, using 'ice_version' and distro information - to select the right tarball. Can also be a local - path. If local path is '.', and iceball is - not already present, then we try to build - an iceball using the ice_tool_dir commands. - ice_git_location -- location of ice tool on git - start_browser -- If True, start a browser. To be used by runs that will - bring up a browser quickly for human use. Set to False - for overnight suites that are testing for problems in - the installation itself (defaults to False). - email -- email address for the user (defaults to x@y.com) - no_epel -- indicates if we should remove epel files prior to yum - installations. Defaults to True. - calamari_user -- user name to log into gui (defaults to admin) - calamari_password -- calamari user password (defaults to admin) - """ - cal_svr = None - start_browser = config.get('start_browser', False) - no_epel = config.get('no_epel', True) - for remote_, roles in ctx.cluster.remotes.items(): - if 'client.0' in roles: - cal_svr = remote_ - break - if not cal_svr: - raise RuntimeError('client.0 not found in roles') - with contextutil.nested( - lambda: adjust_yum_repos(ctx, cal_svr, no_epel), - lambda: calamari_install(config, cal_svr), - lambda: ceph_install(ctx, cal_svr), - lambda: calamari_connect(ctx, cal_svr), - lambda: browser(start_browser, cal_svr.hostname), - ): - yield - - -@contextlib.contextmanager -def adjust_yum_repos(ctx, cal_svr, no_epel): - """ - For each remote machine, fix the repos if yum is used. - """ - ice_distro = str(cal_svr.os) - if ice_distro.startswith('rhel') or ice_distro.startswith('centos'): - if no_epel: - for remote in ctx.cluster.remotes: - fix_yum_repos(remote, ice_distro) - try: - yield - finally: - if ice_distro.startswith('rhel') or ice_distro.startswith('centos'): - if no_epel: - for remote in ctx.cluster.remotes: - restore_yum_repos(remote) - - -def restore_yum_repos(remote): - """ - Copy the old saved repo back in. - """ - if remote.run(args=['sudo', 'rm', '-rf', '/etc/yum.repos.d']).exitstatus: - return False - if remote.run(args=['sudo', 'mv', '/etc/yum.repos.d.old', - '/etc/yum.repos.d']).exitstatus: - return False - - -def fix_yum_repos(remote, distro): - """ - For yum calamari installations, the repos.d directory should only - contain a repo file named rhel.repo - """ - if distro.startswith('centos'): - cmds = [ - 'sudo mkdir /etc/yum.repos.d.old'.split(), - ['sudo', 'cp', run.Raw('/etc/yum.repos.d/*'), - '/etc/yum.repos.d.old'], - ['sudo', 'rm', run.Raw('/etc/yum.repos.d/epel*')], - ] - for cmd in cmds: - if remote.run(args=cmd).exitstatus: - return False - else: - cmds = [ - 'sudo mv /etc/yum.repos.d /etc/yum.repos.d.old'.split(), - 'sudo mkdir /etc/yum.repos.d'.split(), - ] - for cmd in cmds: - if remote.run(args=cmd).exitstatus: - return False - - # map "distroversion" from Remote.os to a tuple of - # (repo title, repo name descriptor, apt-mirror repo path chunk) - yum_repo_params = { - 'rhel 6.4': ('rhel6-server', 'RHEL', 'rhel6repo-server'), - 'rhel 6.5': ('rhel6-server', 'RHEL', 'rhel6repo-server'), - 'rhel 7.0': ('rhel7-server', 'RHEL', 'rhel7repo/server'), - } - repotitle, reponame, path = yum_repo_params[distro] - repopath = '/etc/yum.repos.d/%s.repo' % repotitle - # TO DO: Make this data configurable too - repo_contents = '\n'.join( - ('[%s]' % repotitle, - 'name=%s $releasever - $basearch' % reponame, - 'baseurl=http://apt-mirror.front.sepia.ceph.com/' + path, - 'gpgcheck=0', - 'enabled=1') - ) - misc.sudo_write_file(remote, repopath, repo_contents) - cmds = [ - 'sudo yum clean all'.split(), - 'sudo yum makecache'.split(), - ] - for cmd in cmds: - if remote.run(args=cmd).exitstatus: - return False - return True - - -def get_iceball_with_http(urlbase, ice_version, ice_distro, destdir): - ''' - Copy iceball with http to destdir - ''' - url = '/'.join(( - urlbase, - '{ver}/ICE-{ver}-{distro}.tar.gz'.format( - ver=ice_version, distro=ice_distro - ) - )) - # stream=True means we don't download until copyfileobj below, - # and don't need a temp file - r = requests.get(url, stream=True) - filename = url.split('/')[-1] - with open(filename, 'w') as f: - shutil.copyfileobj(r.raw, f) - log.info('saved %s as %s' % (url, filename)) - - -@contextlib.contextmanager -def calamari_install(config, cal_svr): - """ - Install calamari - - The steps here are: - -- Get the iceball, building it if necessary. - -- Copy the iceball to the calamari server, and untarring it. - -- Running ice-setup.py on the calamari server. - -- Running calamari-ctl initialize. - """ - ice_distro = str(cal_svr.os) - ice_distro = ice_distro.replace(" ", "") - client_id = str(cal_svr) - at_loc = client_id.find('@') - if at_loc > 0: - client_id = client_id[at_loc + 1:] - convert = {'ubuntu12.04': 'precise', 'ubuntu14.04': 'trusty', - 'rhel7.0': 'rhel7', 'debian7': 'wheezy'} - version = config.get('version', 'v0.80.1') - email = config.get('email', 'x@x.com') - ice_tool_dir = config.get('ice_tool_dir', '%s%s%s' % - (os.environ['HOME'], os.sep, 'src')) - calamari_user = config.get('calamari_user', 'admin') - calamari_password = config.get('calamari_passwd', 'admin') - git_icetool_loc = config.get('ice_git_location', - 'git@github.com:inktankstorage') - if ice_distro in convert: - ice_distro = convert[ice_distro] - log.info('calamari server on %s' % ice_distro) - iceball_loc = config.get('iceball_location', '.') - ice_version = config.get('ice_version', ICE_VERSION_DEFAULT) - delete_iceball = False - if iceball_loc.startswith('http'): - get_iceball_with_http(iceball_loc, ice_version, ice_distro, '/tmp') - iceball_loc = '/tmp' - delete_iceball = True - elif iceball_loc == '.': - ice_tool_loc = os.path.join(ice_tool_dir, 'ice-tools') - if not os.path.isdir(ice_tool_loc): - try: - subprocess.check_call(['git', 'clone', - git_icetool_loc + os.sep + - 'ice-tools.git', - ice_tool_loc]) - except subprocess.CalledProcessError: - raise RuntimeError('git clone of ice-tools failed') - exec_ice = os.path.join(ice_tool_loc, - 'teuth-virtenv/bin/make_iceball') - try: - subprocess.check_call('virtualenv teuth-virtenv'.split(), - cwd=ice_tool_loc) - subprocess.check_call( - 'teuth-virtenv/bin/python setup.py develop'.split(), - cwd=ice_tool_loc - ) - subprocess.check_call( - 'teuth-virtenv/bin/pip install -r requirements.txt'.split(), - cwd=ice_tool_loc - ) - subprocess.check_call([exec_ice, '-I', ice_version, - '-b', version, '-o', ice_distro]) - delete_iceball = True - except subprocess.CalledProcessError: - raise RuntimeError('%s failed for %s distro' % - (exec_ice, ice_distro)) - subprocess.check_call('rm -rf teuth-virtenv'.split(), - cwd=ice_tool_loc) - - gz_file = 'ICE-{0}-{1}.tar.gz'.format(ice_version, ice_distro) - lgz_file = os.path.join(iceball_loc, gz_file) - cal_svr.put_file(lgz_file, os.path.join('/tmp/', gz_file)) - ret = cal_svr.run(args=['gunzip', run.Raw('<'), "/tmp/%s" % gz_file, - run.Raw('|'), 'tar', 'xvf', run.Raw('-')]) - if ret.exitstatus: - raise RuntimeError('remote tar failed') - icesetdata = 'yes\n\n%s\nhttp\n' % client_id - ice_in = StringIO(icesetdata) - ice_setup_io = StringIO() - ret = cal_svr.run(args=['sudo', 'python', 'ice_setup.py'], stdin=ice_in, - stdout=ice_setup_io) - log.debug(ice_setup_io.getvalue()) - # Run Calamari-ceph connect. - if ret.exitstatus: - raise RuntimeError('ice_setup.py failed') - icesetdata = '%s\n%s\n%s\n%s\n' % (calamari_user, email, calamari_password, - calamari_password) - ice_in = StringIO(icesetdata) - ret = cal_svr.run(args=['sudo', 'calamari-ctl', 'initialize'], - stdin=ice_in, stdout=ice_setup_io) - log.debug(ice_setup_io.getvalue()) - if ret.exitstatus: - raise RuntimeError('calamari-ctl initialize failed') - try: - yield - finally: - log.info('Cleaning up after Calamari installation') - if delete_iceball: - os.unlink(gz_file) - - -@contextlib.contextmanager -def ceph_install(ctx, cal_svr): - """ - Install ceph if ceph was not previously installed by teuthology. This - code tests the case where calamari is installed on a brand new system. - """ - loc_inst = False - if 'install' not in [x.keys()[0] for x in ctx.config['tasks']]: - loc_inst = True - ret = deploy_ceph(ctx, cal_svr) - if ret: - raise RuntimeError('ceph installs failed') - try: - yield - finally: - if loc_inst: - if not undeploy_ceph(ctx, cal_svr): - log.error('Cleanup of Ceph installed by Calamari-setup failed') - - -def deploy_ceph(ctx, cal_svr): - """ - Perform the ceph-deploy actions needed to bring up a Ceph cluster. This - test is needed to check the ceph-deploy that comes with the calamari - package. - """ - osd_to_name = {} - all_machines = set() - all_mons = set() - for remote in ctx.cluster.remotes: - all_machines.add(remote.shortname) - roles = ctx.cluster.remotes[remote] - for role in roles: - daemon_type, number = role.split('.') - if daemon_type == 'osd': - osd_to_name[number] = remote.shortname - if daemon_type == 'mon': - all_mons.add(remote.shortname) - first_cmds = [['new'] + list(all_mons), ['install'] + list(all_machines), - ['mon', 'create-initial'] ] - ret = True - for entry in first_cmds: - arg_list = ['ceph-deploy'] + entry - log.info('Running: %s' % ' '.join(arg_list)) - ret &= cal_svr.run(args=arg_list).exitstatus - disk_labels = '_dcba' - # NEEDS WORK assumes disks start with vd (need to check this somewhere) - for cmd_pts in [['disk', 'zap'], ['osd', 'prepare'], ['osd', 'activate']]: - mach_osd_cnt = {} - for osdn in osd_to_name: - osd_mac = osd_to_name[osdn] - mach_osd_cnt[osd_mac] = mach_osd_cnt.get(osd_mac, 0) + 1 - arg_list = ['ceph-deploy'] - arg_list.extend(cmd_pts) - disk_id = '%s:vd%s' % (osd_to_name[osdn], - disk_labels[mach_osd_cnt[osd_mac]]) - if 'activate' in cmd_pts: - disk_id += '1' - arg_list.append(disk_id) - log.info('Running: %s' % ' '.join(arg_list)) - ret &= cal_svr.run(args=arg_list).exitstatus - return ret - - -def undeploy_ceph(ctx, cal_svr): - """ - Cleanup deployment of ceph. - """ - all_machines = [] - ret = True - for remote in ctx.cluster.remotes: - ret &= remote.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), - 'sudo', 'service', 'ceph', 'stop'] - ).exitstatus - all_machines.append(remote.shortname) - all_machines = set(all_machines) - cmd1 = ['ceph-deploy', 'uninstall'] - cmd1.extend(all_machines) - ret &= cal_svr.run(args=cmd1).exitstatus - cmd2 = ['ceph-deploy', 'purge'] - cmd2.extend(all_machines) - ret &= cal_svr.run(args=cmd2).exitstatus - for remote in ctx.cluster.remotes: - ret &= remote.run(args=['sudo', 'rm', '-rf', - '.ssh/known_hosts']).exitstatus - return ret - - -@contextlib.contextmanager -def calamari_connect(ctx, cal_svr): - """ - Connect calamari to the ceph nodes. - """ - connects = ['ceph-deploy', 'calamari', 'connect'] - for machine_info in ctx.cluster.remotes: - if 'client.0' not in ctx.cluster.remotes[machine_info]: - connects.append(machine_info.shortname) - ret = cal_svr.run(args=connects) - if ret.exitstatus: - raise RuntimeError('calamari connect failed') - try: - yield - finally: - log.info('Calamari test terminating') - - -@contextlib.contextmanager -def browser(start_browser, web_page): - """ - Bring up a browser, if wanted. - """ - if start_browser: - webbrowser.open('http://%s' % web_page) - try: - yield - finally: - if start_browser: - log.info('Web browser support terminating') diff --git a/tasks/ceph.py b/tasks/ceph.py deleted file mode 100644 index c5227adb250..00000000000 --- a/tasks/ceph.py +++ /dev/null @@ -1,1330 +0,0 @@ -""" -Ceph cluster task. - -Handle the setup, starting, and clean-up of a Ceph cluster. -""" -from cStringIO import StringIO - -import argparse -import contextlib -import logging -import os -import json -import time - -from ceph_manager import CephManager, write_conf, DEFAULT_CONF_PATH -from tasks.cephfs.filesystem import Filesystem -from teuthology import misc as teuthology -from teuthology import contextutil -from teuthology.orchestra import run -import ceph_client as cclient -from teuthology.orchestra.daemon import DaemonGroup - -CEPH_ROLE_TYPES = ['mon', 'osd', 'mds', 'rgw'] - -log = logging.getLogger(__name__) - - -def generate_caps(type_): - """ - Each call will return the next capability for each system type - (essentially a subset of possible role values). Valid types are osd, - mds and client. - """ - defaults = dict( - osd=dict( - mon='allow *', - osd='allow *', - ), - mds=dict( - mon='allow *', - osd='allow *', - mds='allow', - ), - client=dict( - mon='allow rw', - osd='allow rwx', - mds='allow', - ), - ) - for subsystem, capability in defaults[type_].items(): - yield '--cap' - yield subsystem - yield capability - - -@contextlib.contextmanager -def ceph_log(ctx, config): - """ - Create /var/log/ceph log directory that is open to everyone. - Add valgrind and profiling-logger directories. - - :param ctx: Context - :param config: Configuration - """ - log.info('Making ceph log dir writeable by non-root...') - run.wait( - ctx.cluster.run( - args=[ - 'sudo', - 'chmod', - '777', - '/var/log/ceph', - ], - wait=False, - ) - ) - log.info('Disabling ceph logrotate...') - run.wait( - ctx.cluster.run( - args=[ - 'sudo', - 'rm', '-f', '--', - '/etc/logrotate.d/ceph', - ], - wait=False, - ) - ) - log.info('Creating extra log directories...') - run.wait( - ctx.cluster.run( - args=[ - 'sudo', - 'install', '-d', '-m0755', '--', - '/var/log/ceph/valgrind', - '/var/log/ceph/profiling-logger', - ], - wait=False, - ) - ) - - try: - yield - - finally: - if ctx.archive is not None and \ - not (ctx.config.get('archive-on-error') and ctx.summary['success']): - # and logs - log.info('Compressing logs...') - run.wait( - ctx.cluster.run( - args=[ - 'sudo', - 'find', - '/var/log/ceph', - '-name', - '*.log', - '-print0', - run.Raw('|'), - 'sudo', - 'xargs', - '-0', - '--no-run-if-empty', - '--', - 'gzip', - '--', - ], - wait=False, - ), - ) - - log.info('Archiving logs...') - path = os.path.join(ctx.archive, 'remote') - os.makedirs(path) - for remote in ctx.cluster.remotes.iterkeys(): - sub = os.path.join(path, remote.shortname) - os.makedirs(sub) - teuthology.pull_directory(remote, '/var/log/ceph', - os.path.join(sub, 'log')) - - -def assign_devs(roles, devs): - """ - Create a dictionary of devs indexed by roles - - :param roles: List of roles - :param devs: Corresponding list of devices. - :returns: Dictionary of devs indexed by roles. - """ - return dict(zip(roles, devs)) - - -@contextlib.contextmanager -def valgrind_post(ctx, config): - """ - After the tests run, look throught all the valgrind logs. Exceptions are raised - if textual errors occured in the logs, or if valgrind exceptions were detected in - the logs. - - :param ctx: Context - :param config: Configuration - """ - try: - yield - finally: - lookup_procs = list() - log.info('Checking for errors in any valgrind logs...') - for remote in ctx.cluster.remotes.iterkeys(): - # look at valgrind logs for each node - proc = remote.run( - args=[ - 'sudo', - 'zgrep', - '', - run.Raw('/var/log/ceph/valgrind/*'), - '/dev/null', # include a second file so that we always get a filename prefix on the output - run.Raw('|'), - 'sort', - run.Raw('|'), - 'uniq', - ], - wait=False, - check_status=False, - stdout=StringIO(), - ) - lookup_procs.append((proc, remote)) - - valgrind_exception = None - for (proc, remote) in lookup_procs: - proc.wait() - out = proc.stdout.getvalue() - for line in out.split('\n'): - if line == '': - continue - try: - (file, kind) = line.split(':') - except Exception: - log.error('failed to split line %s', line) - raise - log.debug('file %s kind %s', file, kind) - if (file.find('mds') >= 0) and kind.find('Lost') > 0: - continue - log.error('saw valgrind issue %s in %s', kind, file) - valgrind_exception = Exception('saw valgrind issues') - - if valgrind_exception is not None: - raise valgrind_exception - - -@contextlib.contextmanager -def crush_setup(ctx, config): - first_mon = teuthology.get_first_mon(ctx, config) - (mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - profile = config.get('crush_tunables', 'default') - log.info('Setting crush tunables to %s', profile) - mon_remote.run( - args=['sudo', 'ceph', 'osd', 'crush', 'tunables', profile]) - yield - - -@contextlib.contextmanager -def cephfs_setup(ctx, config): - testdir = teuthology.get_testdir(ctx) - coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) - - first_mon = teuthology.get_first_mon(ctx, config) - (mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() - mdss = ctx.cluster.only(teuthology.is_type('mds')) - # If there are any MDSs, then create a filesystem for them to use - # Do this last because requires mon cluster to be up and running - if mdss.remotes: - log.info('Setting up CephFS filesystem...') - - ceph_fs = Filesystem(ctx) - if not ceph_fs.legacy_configured(): - ceph_fs.create() - - is_active_mds = lambda role: role.startswith('mds.') and not role.endswith('-s') and role.find('-s-') == -1 - all_roles = [item for remote_roles in mdss.remotes.values() for item in remote_roles] - num_active = len([r for r in all_roles if is_active_mds(r)]) - mon_remote.run(args=[ - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'ceph', - 'mds', 'set_max_mds', str(num_active)]) - - yield - - -@contextlib.contextmanager -def cluster(ctx, config): - """ - Handle the creation and removal of a ceph cluster. - - On startup: - Create directories needed for the cluster. - Create remote journals for all osds. - Create and set keyring. - Copy the monmap to tht test systems. - Setup mon nodes. - Setup mds nodes. - Mkfs osd nodes. - Add keyring information to monmaps - Mkfs mon nodes. - - On exit: - If errors occured, extract a failure message and store in ctx.summary. - Unmount all test files and temporary journaling files. - Save the monitor information and archive all ceph logs. - Cleanup the keyring setup, and remove all monitor map and data files left over. - - :param ctx: Context - :param config: Configuration - """ - if ctx.config.get('use_existing_cluster', False) is True: - log.info("'use_existing_cluster' is true; skipping cluster creation") - yield - - testdir = teuthology.get_testdir(ctx) - log.info('Creating ceph cluster...') - run.wait( - ctx.cluster.run( - args=[ - 'install', '-d', '-m0755', '--', - '{tdir}/data'.format(tdir=testdir), - ], - wait=False, - ) - ) - - run.wait( - ctx.cluster.run( - args=[ - 'sudo', - 'install', '-d', '-m0777', '--', '/var/run/ceph', - ], - wait=False, - ) - ) - - devs_to_clean = {} - remote_to_roles_to_devs = {} - remote_to_roles_to_journals = {} - osds = ctx.cluster.only(teuthology.is_type('osd')) - for remote, roles_for_host in osds.remotes.iteritems(): - devs = teuthology.get_scratch_devices(remote) - roles_to_devs = {} - roles_to_journals = {} - if config.get('fs'): - log.info('fs option selected, checking for scratch devs') - log.info('found devs: %s' % (str(devs),)) - devs_id_map = teuthology.get_wwn_id_map(remote, devs) - iddevs = devs_id_map.values() - roles_to_devs = assign_devs( - teuthology.roles_of_type(roles_for_host, 'osd'), iddevs - ) - if len(roles_to_devs) < len(iddevs): - iddevs = iddevs[len(roles_to_devs):] - devs_to_clean[remote] = [] - - if config.get('block_journal'): - log.info('block journal enabled') - roles_to_journals = assign_devs( - teuthology.roles_of_type(roles_for_host, 'osd'), iddevs - ) - log.info('journal map: %s', roles_to_journals) - - if config.get('tmpfs_journal'): - log.info('tmpfs journal enabled') - roles_to_journals = {} - remote.run(args=['sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt']) - for osd in teuthology.roles_of_type(roles_for_host, 'osd'): - tmpfs = '/mnt/osd.%s' % osd - roles_to_journals[osd] = tmpfs - remote.run(args=['truncate', '-s', '1500M', tmpfs]) - log.info('journal map: %s', roles_to_journals) - - log.info('dev map: %s' % (str(roles_to_devs),)) - remote_to_roles_to_devs[remote] = roles_to_devs - remote_to_roles_to_journals[remote] = roles_to_journals - - log.info('Generating config...') - remotes_and_roles = ctx.cluster.remotes.items() - roles = [role_list for (remote, role_list) in remotes_and_roles] - ips = [host for (host, port) in - (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] - conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips) - for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): - for role, journal in roles_to_journals.iteritems(): - key = "osd." + str(role) - if key not in conf: - conf[key] = {} - conf[key]['osd journal'] = journal - for section, keys in config['conf'].iteritems(): - for key, value in keys.iteritems(): - log.info("[%s] %s = %s" % (section, key, value)) - if section not in conf: - conf[section] = {} - conf[section][key] = value - - if config.get('tmpfs_journal'): - conf['journal dio'] = False - - ctx.ceph = argparse.Namespace() - ctx.ceph.conf = conf - - keyring_path = config.get('keyring_path', '/etc/ceph/ceph.keyring') - - coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) - - firstmon = teuthology.get_first_mon(ctx, config) - - log.info('Setting up %s...' % firstmon) - ctx.cluster.only(firstmon).run( - args=[ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'ceph-authtool', - '--create-keyring', - keyring_path, - ], - ) - ctx.cluster.only(firstmon).run( - args=[ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'ceph-authtool', - '--gen-key', - '--name=mon.', - keyring_path, - ], - ) - ctx.cluster.only(firstmon).run( - args=[ - 'sudo', - 'chmod', - '0644', - keyring_path, - ], - ) - (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() - fsid = teuthology.create_simple_monmap( - ctx, - remote=mon0_remote, - conf=conf, - ) - if not 'global' in conf: - conf['global'] = {} - conf['global']['fsid'] = fsid - - log.info('Writing ceph.conf for FSID %s...' % fsid) - conf_path = config.get('conf_path', DEFAULT_CONF_PATH) - write_conf(ctx, conf_path) - - log.info('Creating admin key on %s...' % firstmon) - ctx.cluster.only(firstmon).run( - args=[ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'ceph-authtool', - '--gen-key', - '--name=client.admin', - '--set-uid=0', - '--cap', 'mon', 'allow *', - '--cap', 'osd', 'allow *', - '--cap', 'mds', 'allow *', - keyring_path, - ], - ) - - log.info('Copying monmap to all nodes...') - keyring = teuthology.get_file( - remote=mon0_remote, - path=keyring_path, - ) - monmap = teuthology.get_file( - remote=mon0_remote, - path='{tdir}/monmap'.format(tdir=testdir), - ) - - for rem in ctx.cluster.remotes.iterkeys(): - # copy mon key and initial monmap - log.info('Sending monmap to node {remote}'.format(remote=rem)) - teuthology.sudo_write_file( - remote=rem, - path=keyring_path, - data=keyring, - perms='0644' - ) - teuthology.write_file( - remote=rem, - path='{tdir}/monmap'.format(tdir=testdir), - data=monmap, - ) - - log.info('Setting up mon nodes...') - mons = ctx.cluster.only(teuthology.is_type('mon')) - run.wait( - mons.run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'osdmaptool', - '-c', conf_path, - '--clobber', - '--createsimple', '{num:d}'.format( - num=teuthology.num_instances_of_type(ctx.cluster, 'osd'), - ), - '{tdir}/osdmap'.format(tdir=testdir), - '--pg_bits', '2', - '--pgp_bits', '4', - ], - wait=False, - ), - ) - - log.info('Setting up mds nodes...') - mdss = ctx.cluster.only(teuthology.is_type('mds')) - for remote, roles_for_host in mdss.remotes.iteritems(): - for id_ in teuthology.roles_of_type(roles_for_host, 'mds'): - remote.run( - args=[ - 'sudo', - 'mkdir', - '-p', - '/var/lib/ceph/mds/ceph-{id}'.format(id=id_), - run.Raw('&&'), - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'ceph-authtool', - '--create-keyring', - '--gen-key', - '--name=mds.{id}'.format(id=id_), - '/var/lib/ceph/mds/ceph-{id}/keyring'.format(id=id_), - ], - ) - - cclient.create_keyring(ctx) - log.info('Running mkfs on osd nodes...') - - ctx.disk_config = argparse.Namespace() - ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs - ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals - ctx.disk_config.remote_to_roles_to_dev_mount_options = {} - ctx.disk_config.remote_to_roles_to_dev_fstype = {} - - log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) - for remote, roles_for_host in osds.remotes.iteritems(): - roles_to_devs = remote_to_roles_to_devs[remote] - roles_to_journals = remote_to_roles_to_journals[remote] - - for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): - remote.run( - args=[ - 'sudo', - 'mkdir', - '-p', - '/var/lib/ceph/osd/ceph-{id}'.format(id=id_), - ]) - log.info(str(roles_to_journals)) - log.info(id_) - if roles_to_devs.get(id_): - dev = roles_to_devs[id_] - fs = config.get('fs') - package = None - mkfs_options = config.get('mkfs_options') - mount_options = config.get('mount_options') - if fs == 'btrfs': - # package = 'btrfs-tools' - if mount_options is None: - mount_options = ['noatime', 'user_subvol_rm_allowed'] - if mkfs_options is None: - mkfs_options = ['-m', 'single', - '-l', '32768', - '-n', '32768'] - if fs == 'xfs': - # package = 'xfsprogs' - if mount_options is None: - mount_options = ['noatime'] - if mkfs_options is None: - mkfs_options = ['-f', '-i', 'size=2048'] - if fs == 'ext4' or fs == 'ext3': - if mount_options is None: - mount_options = ['noatime', 'user_xattr'] - - if mount_options is None: - mount_options = [] - if mkfs_options is None: - mkfs_options = [] - mkfs = ['mkfs.%s' % fs] + mkfs_options - log.info('%s on %s on %s' % (mkfs, dev, remote)) - if package is not None: - remote.run( - args=[ - 'sudo', - 'apt-get', 'install', '-y', package - ], - stdout=StringIO(), - ) - - try: - remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) - except run.CommandFailedError: - # Newer btfs-tools doesn't prompt for overwrite, use -f - if '-f' not in mount_options: - mkfs_options.append('-f') - mkfs = ['mkfs.%s' % fs] + mkfs_options - log.info('%s on %s on %s' % (mkfs, dev, remote)) - remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) - - log.info('mount %s on %s -o %s' % (dev, remote, - ','.join(mount_options))) - remote.run( - args=[ - 'sudo', - 'mount', - '-t', fs, - '-o', ','.join(mount_options), - dev, - os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)), - ] - ) - remote.run( - args=[ - 'sudo', '/sbin/restorecon', mnt_point, - ], - check_status=False, - ) - if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: - ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} - ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][id_] = mount_options - if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: - ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} - ctx.disk_config.remote_to_roles_to_dev_fstype[remote][id_] = fs - devs_to_clean[remote].append( - os.path.join( - os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)), - ) - ) - - for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): - remote.run( - args=[ - 'sudo', - 'MALLOC_CHECK_=3', - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'ceph-osd', - '--mkfs', - '--mkkey', - '-i', id_, - '--monmap', '{tdir}/monmap'.format(tdir=testdir), - ], - ) - - log.info('Reading keys from all nodes...') - keys_fp = StringIO() - keys = [] - for remote, roles_for_host in ctx.cluster.remotes.iteritems(): - for type_ in ['mds', 'osd']: - for id_ in teuthology.roles_of_type(roles_for_host, type_): - data = teuthology.get_file( - remote=remote, - path='/var/lib/ceph/{type}/ceph-{id}/keyring'.format( - type=type_, - id=id_, - ), - sudo=True, - ) - keys.append((type_, id_, data)) - keys_fp.write(data) - for remote, roles_for_host in ctx.cluster.remotes.iteritems(): - for type_ in ['client']: - for id_ in teuthology.roles_of_type(roles_for_host, type_): - data = teuthology.get_file( - remote=remote, - path='/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) - ) - keys.append((type_, id_, data)) - keys_fp.write(data) - - log.info('Adding keys to all mons...') - writes = mons.run( - args=[ - 'sudo', 'tee', '-a', - keyring_path, - ], - stdin=run.PIPE, - wait=False, - stdout=StringIO(), - ) - keys_fp.seek(0) - teuthology.feed_many_stdins_and_close(keys_fp, writes) - run.wait(writes) - for type_, id_, data in keys: - run.wait( - mons.run( - args=[ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'ceph-authtool', - keyring_path, - '--name={type}.{id}'.format( - type=type_, - id=id_, - ), - ] + list(generate_caps(type_)), - wait=False, - ), - ) - - log.info('Running mkfs on mon nodes...') - for remote, roles_for_host in mons.remotes.iteritems(): - for id_ in teuthology.roles_of_type(roles_for_host, 'mon'): - remote.run( - args=[ - 'sudo', - 'mkdir', - '-p', - '/var/lib/ceph/mon/ceph-{id}'.format(id=id_), - ], - ) - remote.run( - args=[ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'ceph-mon', - '--mkfs', - '-i', id_, - '--monmap={tdir}/monmap'.format(tdir=testdir), - '--osdmap={tdir}/osdmap'.format(tdir=testdir), - '--keyring={kpath}'.format(kpath=keyring_path), - ], - ) - - run.wait( - mons.run( - args=[ - 'rm', - '--', - '{tdir}/monmap'.format(tdir=testdir), - '{tdir}/osdmap'.format(tdir=testdir), - ], - wait=False, - ), - ) - - try: - yield - except Exception: - # we need to know this below - ctx.summary['success'] = False - raise - finally: - (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() - - log.info('Checking cluster log for badness...') - - def first_in_ceph_log(pattern, excludes): - """ - Find the first occurence of the pattern specified in the Ceph log, - Returns None if none found. - - :param pattern: Pattern scanned for. - :param excludes: Patterns to ignore. - :return: First line of text (or None if not found) - """ - args = [ - 'sudo', - 'egrep', pattern, - '/var/log/ceph/ceph.log', - ] - for exclude in excludes: - args.extend([run.Raw('|'), 'egrep', '-v', exclude]) - args.extend([ - run.Raw('|'), 'head', '-n', '1', - ]) - r = mon0_remote.run( - stdout=StringIO(), - args=args, - ) - stdout = r.stdout.getvalue() - if stdout != '': - return stdout - return None - - if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', - config['log_whitelist']) is not None: - log.warning('Found errors (ERR|WRN|SEC) in cluster log') - ctx.summary['success'] = False - # use the most severe problem as the failure reason - if 'failure_reason' not in ctx.summary: - for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: - match = first_in_ceph_log(pattern, config['log_whitelist']) - if match is not None: - ctx.summary['failure_reason'] = \ - '"{match}" in cluster log'.format( - match=match.rstrip('\n'), - ) - break - - for remote, dirs in devs_to_clean.iteritems(): - for dir_ in dirs: - log.info('Unmounting %s on %s' % (dir_, remote)) - try: - remote.run( - args=[ - 'sync', - run.Raw('&&'), - 'sudo', - 'umount', - '-f', - dir_ - ] - ) - except Exception as e: - remote.run(args=[ - 'sudo', - run.Raw('PATH=/usr/sbin:$PATH'), - 'lsof', - run.Raw(';'), - 'ps', 'auxf', - ]) - raise e - - if config.get('tmpfs_journal'): - log.info('tmpfs journal enabled - unmounting tmpfs at /mnt') - for remote, roles_for_host in osds.remotes.iteritems(): - remote.run( - args=['sudo', 'umount', '-f', '/mnt'], - check_status=False, - ) - - if ctx.archive is not None and \ - not (ctx.config.get('archive-on-error') and ctx.summary['success']): - - # archive mon data, too - log.info('Archiving mon data...') - path = os.path.join(ctx.archive, 'data') - os.makedirs(path) - for remote, roles in mons.remotes.iteritems(): - for role in roles: - if role.startswith('mon.'): - teuthology.pull_directory_tarball( - remote, - '/var/lib/ceph/mon', - path + '/' + role + '.tgz') - - log.info('Cleaning ceph cluster...') - run.wait( - ctx.cluster.run( - args=[ - 'sudo', - 'rm', - '-rf', - '--', - conf_path, - keyring_path, - '{tdir}/data'.format(tdir=testdir), - '{tdir}/monmap'.format(tdir=testdir), - ], - wait=False, - ), - ) - - -def get_all_pg_info(rem_site, testdir): - """ - Get the results of a ceph pg dump - """ - info = rem_site.run(args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'ceph', 'pg', 'dump', - '--format', 'json'], stdout=StringIO()) - all_info = json.loads(info.stdout.getvalue()) - return all_info['pg_stats'] - - -def osd_scrub_pgs(ctx, config): - """ - Scrub pgs when we exit. - - First make sure all pgs are active and clean. - Next scrub all osds. - Then periodically check until all pgs have scrub time stamps that - indicate the last scrub completed. Time out if no progess is made - here after two minutes. - """ - retries = 12 - delays = 10 - vlist = ctx.cluster.remotes.values() - testdir = teuthology.get_testdir(ctx) - rem_site = ctx.cluster.remotes.keys()[0] - all_clean = False - for _ in range(0, retries): - stats = get_all_pg_info(rem_site, testdir) - states = [stat['state'] for stat in stats] - if len(set(states)) == 1 and states[0] == 'active+clean': - all_clean = True - break - log.info("Waiting for all osds to be active and clean.") - time.sleep(delays) - if not all_clean: - log.info("Scrubbing terminated -- not all pgs were active and clean.") - return - check_time_now = time.localtime() - time.sleep(1) - for slists in vlist: - for role in slists: - if role.startswith('osd.'): - log.info("Scrubbing osd {osd}".format(osd=role)) - rem_site.run(args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'ceph', 'osd', 'deep-scrub', role]) - prev_good = 0 - gap_cnt = 0 - loop = True - while loop: - stats = get_all_pg_info(rem_site, testdir) - timez = [stat['last_scrub_stamp'] for stat in stats] - loop = False - thiscnt = 0 - for tmval in timez: - pgtm = time.strptime(tmval[0:tmval.find('.')], '%Y-%m-%d %H:%M:%S') - if pgtm > check_time_now: - thiscnt += 1 - else: - loop = True - if thiscnt > prev_good: - prev_good = thiscnt - gap_cnt = 0 - else: - gap_cnt += 1 - if gap_cnt > retries: - log.info('Exiting scrub checking -- not all pgs scrubbed.') - return - if loop: - log.info('Still waiting for all pgs to be scrubbed.') - time.sleep(delays) - - -@contextlib.contextmanager -def run_daemon(ctx, config, type_): - """ - Run daemons for a role type. Handle the startup and termination of a a daemon. - On startup -- set coverages, cpu_profile, valgrind values for all remotes, - and a max_mds value for one mds. - On cleanup -- Stop all existing daemons of this type. - - :param ctx: Context - :param config: Configuration - :paran type_: Role type - """ - log.info('Starting %s daemons...' % type_) - testdir = teuthology.get_testdir(ctx) - daemons = ctx.cluster.only(teuthology.is_type(type_)) - - # check whether any daemons if this type are configured - if daemons is None: - return - coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) - - daemon_signal = 'kill' - if config.get('coverage') or config.get('valgrind') is not None: - daemon_signal = 'term' - - for remote, roles_for_host in daemons.remotes.iteritems(): - for id_ in teuthology.roles_of_type(roles_for_host, type_): - name = '%s.%s' % (type_, id_) - - run_cmd = [ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'daemon-helper', - daemon_signal, - ] - run_cmd_tail = [ - 'ceph-%s' % (type_), - '-f', - '-i', id_] - - if type_ in config.get('cpu_profile', []): - profile_path = '/var/log/ceph/profiling-logger/%s.%s.prof' % (type_, id_) - run_cmd.extend(['env', 'CPUPROFILE=%s' % profile_path]) - - if config.get('valgrind') is not None: - valgrind_args = None - if type_ in config['valgrind']: - valgrind_args = config['valgrind'][type_] - if name in config['valgrind']: - valgrind_args = config['valgrind'][name] - run_cmd = teuthology.get_valgrind_args(testdir, name, - run_cmd, - valgrind_args) - - run_cmd.extend(run_cmd_tail) - - ctx.daemons.add_daemon(remote, type_, id_, - args=run_cmd, - logger=log.getChild(name), - stdin=run.PIPE, - wait=False, - ) - - try: - yield - finally: - teuthology.stop_daemons_of_type(ctx, type_) - - -def healthy(ctx, config): - """ - Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK. - - :param ctx: Context - :param config: Configuration - """ - log.info('Waiting until ceph is healthy...') - firstmon = teuthology.get_first_mon(ctx, config) - (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() - teuthology.wait_until_osds_up( - ctx, - cluster=ctx.cluster, - remote=mon0_remote - ) - teuthology.wait_until_healthy( - ctx, - remote=mon0_remote, - ) - - -def wait_for_osds_up(ctx, config): - """ - Wait for all osd's to come up. - - :param ctx: Context - :param config: Configuration - """ - log.info('Waiting until ceph osds are all up...') - firstmon = teuthology.get_first_mon(ctx, config) - (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() - teuthology.wait_until_osds_up( - ctx, - cluster=ctx.cluster, - remote=mon0_remote - ) - - -def wait_for_mon_quorum(ctx, config): - """ - Check renote ceph status until all monitors are up. - - :param ctx: Context - :param config: Configuration - """ - - assert isinstance(config, list) - firstmon = teuthology.get_first_mon(ctx, config) - (remote,) = ctx.cluster.only(firstmon).remotes.keys() - while True: - r = remote.run( - args=[ - 'ceph', - 'quorum_status', - ], - stdout=StringIO(), - logger=log.getChild('quorum_status'), - ) - j = json.loads(r.stdout.getvalue()) - q = j.get('quorum_names', []) - log.debug('Quorum: %s', q) - if sorted(q) == sorted(config): - break - time.sleep(1) - - -def created_pool(ctx, config): - """ - Add new pools to the dictionary of pools that the ceph-manager - knows about. - """ - for new_pool in config: - if new_pool not in ctx.manager.pools: - ctx.manager.pools[new_pool] = ctx.manager.get_pool_property( - new_pool, 'pg_num') - - -@contextlib.contextmanager -def restart(ctx, config): - """ - restart ceph daemons - - For example:: - tasks: - - ceph.restart: [all] - - For example:: - tasks: - - ceph.restart: [osd.0, mon.1, mds.*] - - or:: - - tasks: - - ceph.restart: - daemons: [osd.0, mon.1] - wait-for-healthy: false - wait-for-osds-up: true - - :param ctx: Context - :param config: Configuration - """ - if config is None: - config = {} - elif isinstance(config, list): - config = {'daemons': config} - - daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES) - for i in daemons: - type_ = i.split('.', 1)[0] - id_ = i.split('.', 1)[1] - ctx.daemons.get_daemon(type_, id_).restart() - - if config.get('wait-for-healthy', True): - healthy(ctx=ctx, config=None) - if config.get('wait-for-osds-up', False): - wait_for_osds_up(ctx=ctx, config=None) - yield - - -@contextlib.contextmanager -def stop(ctx, config): - """ - Stop ceph daemons - - For example:: - tasks: - - ceph.stop: [mds.*] - - tasks: - - ceph.stop: [osd.0, osd.2] - - tasks: - - ceph.stop: - daemons: [osd.0, osd.2] - - """ - if config is None: - config = {} - elif isinstance(config, list): - config = {'daemons': config} - - daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES) - for i in daemons: - type_ = i.split('.', 1)[0] - id_ = i.split('.', 1)[1] - ctx.daemons.get_daemon(type_, id_).stop() - - yield - - -@contextlib.contextmanager -def wait_for_failure(ctx, config): - """ - Wait for a failure of a ceph daemon - - For example:: - tasks: - - ceph.wait_for_failure: [mds.*] - - tasks: - - ceph.wait_for_failure: [osd.0, osd.2] - - tasks: - - ceph.wait_for_failure: - daemons: [osd.0, osd.2] - - """ - if config is None: - config = {} - elif isinstance(config, list): - config = {'daemons': config} - - daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES) - for i in daemons: - type_ = i.split('.', 1)[0] - id_ = i.split('.', 1)[1] - try: - ctx.daemons.get_daemon(type_, id_).wait() - except: - log.info('Saw expected daemon failure. Continuing.') - pass - else: - raise RuntimeError('daemon %s did not fail' % i) - - yield - - -@contextlib.contextmanager -def task(ctx, config): - """ - Set up and tear down a Ceph cluster. - - For example:: - - tasks: - - ceph: - - interactive: - - You can also specify what branch to run:: - - tasks: - - ceph: - branch: foo - - Or a tag:: - - tasks: - - ceph: - tag: v0.42.13 - - Or a sha1:: - - tasks: - - ceph: - sha1: 1376a5ab0c89780eab39ffbbe436f6a6092314ed - - Or a local source dir:: - - tasks: - - ceph: - path: /home/sage/ceph - - To capture code coverage data, use:: - - tasks: - - ceph: - coverage: true - - To use btrfs, ext4, or xfs on the target's scratch disks, use:: - - tasks: - - ceph: - fs: xfs - mkfs_options: [-b,size=65536,-l,logdev=/dev/sdc1] - mount_options: [nobarrier, inode64] - - Note, this will cause the task to check the /scratch_devs file on each node - for available devices. If no such file is found, /dev/sdb will be used. - - To run some daemons under valgrind, include their names - and the tool/args to use in a valgrind section:: - - tasks: - - ceph: - valgrind: - mds.1: --tool=memcheck - osd.1: [--tool=memcheck, --leak-check=no] - - Those nodes which are using memcheck or valgrind will get - checked for bad results. - - To adjust or modify config options, use:: - - tasks: - - ceph: - conf: - section: - key: value - - For example:: - - tasks: - - ceph: - conf: - mds.0: - some option: value - other key: other value - client.0: - debug client: 10 - debug ms: 1 - - By default, the cluster log is checked for errors and warnings, - and the run marked failed if any appear. You can ignore log - entries by giving a list of egrep compatible regexes, i.e.: - - tasks: - - ceph: - log-whitelist: ['foo.*bar', 'bad message'] - - :param ctx: Context - :param config: Configuration - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - "task ceph only supports a dictionary for configuration" - - overrides = ctx.config.get('overrides', {}) - teuthology.deep_merge(config, overrides.get('ceph', {})) - - ctx.daemons = DaemonGroup() - - testdir = teuthology.get_testdir(ctx) - if config.get('coverage'): - coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) - log.info('Creating coverage directory...') - run.wait( - ctx.cluster.run( - args=[ - 'install', '-d', '-m0755', '--', - coverage_dir, - ], - wait=False, - ) - ) - - with contextutil.nested( - lambda: ceph_log(ctx=ctx, config=None), - lambda: valgrind_post(ctx=ctx, config=config), - lambda: cluster(ctx=ctx, config=dict( - conf=config.get('conf', {}), - fs=config.get('fs', None), - mkfs_options=config.get('mkfs_options', None), - mount_options=config.get('mount_options', None), - block_journal=config.get('block_journal', None), - tmpfs_journal=config.get('tmpfs_journal', None), - log_whitelist=config.get('log-whitelist', []), - cpu_profile=set(config.get('cpu_profile', [])), - )), - lambda: run_daemon(ctx=ctx, config=config, type_='mon'), - lambda: crush_setup(ctx=ctx, config=config), - lambda: run_daemon(ctx=ctx, config=config, type_='osd'), - lambda: cephfs_setup(ctx=ctx, config=config), - lambda: run_daemon(ctx=ctx, config=config, type_='mds'), - ): - try: - if config.get('wait-for-healthy', True): - healthy(ctx=ctx, config=None) - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - ctx.manager = CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - yield - finally: - if config.get('wait-for-scrub', True): - osd_scrub_pgs(ctx, config) diff --git a/tasks/ceph_client.py b/tasks/ceph_client.py deleted file mode 100644 index d7cfd00be3e..00000000000 --- a/tasks/ceph_client.py +++ /dev/null @@ -1,40 +0,0 @@ -""" -Set up client keyring -""" -import logging - -from teuthology import misc as teuthology -from teuthology.orchestra import run - -log = logging.getLogger(__name__) - -def create_keyring(ctx): - """ - Set up key ring on remote sites - """ - log.info('Setting up client nodes...') - clients = ctx.cluster.only(teuthology.is_type('client')) - testdir = teuthology.get_testdir(ctx) - coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) - for remote, roles_for_host in clients.remotes.iteritems(): - for id_ in teuthology.roles_of_type(roles_for_host, 'client'): - client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) - remote.run( - args=[ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'ceph-authtool', - '--create-keyring', - '--gen-key', - # TODO this --name= is not really obeyed, all unknown "types" are munged to "client" - '--name=client.{id}'.format(id=id_), - client_keyring, - run.Raw('&&'), - 'sudo', - 'chmod', - '0644', - client_keyring, - ], - ) diff --git a/tasks/ceph_deploy.py b/tasks/ceph_deploy.py deleted file mode 100644 index da9f0b713f9..00000000000 --- a/tasks/ceph_deploy.py +++ /dev/null @@ -1,510 +0,0 @@ -""" -Execute ceph-deploy as a task -""" -from cStringIO import StringIO - -import contextlib -import os -import time -import logging -import traceback - -from teuthology import misc as teuthology -from teuthology import contextutil -from teuthology.config import config as teuth_config -from teuthology.task import install as install_fn -from teuthology.orchestra import run -from tasks.cephfs.filesystem import Filesystem - -log = logging.getLogger(__name__) - - -@contextlib.contextmanager -def download_ceph_deploy(ctx, config): - """ - Downloads ceph-deploy from the ceph.com git mirror and (by default) - switches to the master branch. If the `ceph-deploy-branch` is specified, it - will use that instead. - """ - log.info('Downloading ceph-deploy...') - testdir = teuthology.get_testdir(ctx) - ceph_admin = ctx.cluster.only(teuthology.get_first_mon(ctx, config)) - ceph_deploy_branch = config.get('ceph-deploy-branch', 'master') - - ceph_admin.run( - args=[ - 'git', 'clone', '-b', ceph_deploy_branch, - teuth_config.ceph_git_base_url + 'ceph-deploy.git', - '{tdir}/ceph-deploy'.format(tdir=testdir), - ], - ) - ceph_admin.run( - args=[ - 'cd', - '{tdir}/ceph-deploy'.format(tdir=testdir), - run.Raw('&&'), - './bootstrap', - ], - ) - - try: - yield - finally: - log.info('Removing ceph-deploy ...') - ceph_admin.run( - args=[ - 'rm', - '-rf', - '{tdir}/ceph-deploy'.format(tdir=testdir), - ], - ) - - -def is_healthy(ctx, config): - """Wait until a Ceph cluster is healthy.""" - testdir = teuthology.get_testdir(ctx) - ceph_admin = teuthology.get_first_mon(ctx, config) - (remote,) = ctx.cluster.only(ceph_admin).remotes.keys() - max_tries = 90 # 90 tries * 10 secs --> 15 minutes - tries = 0 - while True: - tries += 1 - if tries >= max_tries: - msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes" - raise RuntimeError(msg) - - r = remote.run( - args=[ - 'cd', - '{tdir}'.format(tdir=testdir), - run.Raw('&&'), - 'sudo', 'ceph', - 'health', - ], - stdout=StringIO(), - logger=log.getChild('health'), - ) - out = r.stdout.getvalue() - log.info('Ceph health: %s', out.rstrip('\n')) - if out.split(None, 1)[0] == 'HEALTH_OK': - break - time.sleep(10) - - -def get_nodes_using_role(ctx, target_role): - """ - Extract the names of nodes that match a given role from a cluster, and modify the - cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy - uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23". - """ - - # Nodes containing a service of the specified role - nodes_of_interest = [] - - # Prepare a modified version of cluster.remotes with ceph-deploy-ized names - modified_remotes = {} - - for _remote, roles_for_host in ctx.cluster.remotes.iteritems(): - modified_remotes[_remote] = [] - for svc_id in roles_for_host: - if svc_id.startswith("{0}.".format(target_role)): - fqdn = str(_remote).split('@')[-1] - nodename = str(str(_remote).split('.')[0]).split('@')[1] - if target_role == 'mon': - nodes_of_interest.append(fqdn) - else: - nodes_of_interest.append(nodename) - - modified_remotes[_remote].append("{0}.{1}".format(target_role, nodename)) - else: - modified_remotes[_remote].append(svc_id) - - ctx.cluster.remotes = modified_remotes - - return nodes_of_interest - -def get_dev_for_osd(ctx, config): - """Get a list of all osd device names.""" - osd_devs = [] - for remote, roles_for_host in ctx.cluster.remotes.iteritems(): - host = remote.name.split('@')[-1] - shortname = host.split('.')[0] - devs = teuthology.get_scratch_devices(remote) - num_osd_per_host = list(teuthology.roles_of_type(roles_for_host, 'osd')) - num_osds = len(num_osd_per_host) - if config.get('separate_journal_disk') is not None: - num_devs_reqd = 2 * num_osds - assert num_devs_reqd <= len(devs), 'fewer data and journal disks than required ' + shortname - for dindex in range(0,num_devs_reqd,2): - jd_index = dindex + 1 - dev_short = devs[dindex].split('/')[-1] - jdev_short = devs[jd_index].split('/')[-1] - osd_devs.append((shortname, dev_short, jdev_short)) - else: - assert num_osds <= len(devs), 'fewer disks than osds ' + shortname - for dev in devs[:num_osds]: - dev_short = dev.split('/')[-1] - osd_devs.append((shortname, dev_short)) - return osd_devs - -def get_all_nodes(ctx, config): - """Return a string of node names separated by blanks""" - nodelist = [] - for t, k in ctx.config['targets'].iteritems(): - host = t.split('@')[-1] - simple_host = host.split('.')[0] - nodelist.append(simple_host) - nodelist = " ".join(nodelist) - return nodelist - -@contextlib.contextmanager -def build_ceph_cluster(ctx, config): - """Build a ceph cluster""" - - # Expect to find ceph_admin on the first mon by ID, same place that the download task - # puts it. Remember this here, because subsequently IDs will change from those in - # the test config to those that ceph-deploy invents. - (ceph_admin,) = ctx.cluster.only(teuthology.get_first_mon(ctx, config)).remotes.iterkeys() - - def execute_ceph_deploy(cmd): - """Remotely execute a ceph_deploy command""" - return ceph_admin.run( - args=[ - 'cd', - '{tdir}/ceph-deploy'.format(tdir=testdir), - run.Raw('&&'), - run.Raw(cmd), - ], - check_status=False, - ).exitstatus - - try: - log.info('Building ceph cluster using ceph-deploy...') - testdir = teuthology.get_testdir(ctx) - ceph_branch = None - if config.get('branch') is not None: - cbranch = config.get('branch') - for var, val in cbranch.iteritems(): - ceph_branch = '--{var}={val}'.format(var=var, val=val) - - ceph_sha = ctx.config['sha1'] - devcommit = '--dev-commit={sha}'.format(sha=ceph_sha) - if ceph_branch: - option = ceph_branch - else: - option = devcommit - all_nodes = get_all_nodes(ctx, config) - mds_nodes = get_nodes_using_role(ctx, 'mds') - mds_nodes = " ".join(mds_nodes) - mon_node = get_nodes_using_role(ctx, 'mon') - mon_nodes = " ".join(mon_node) - new_mon = './ceph-deploy new'+" "+mon_nodes - install_nodes = './ceph-deploy install ' + option + " " + all_nodes - mon_hostname = mon_nodes.split(' ')[0] - mon_hostname = str(mon_hostname) - gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname - deploy_mds = './ceph-deploy mds create'+" "+mds_nodes - no_of_osds = 0 - estatus_install = execute_ceph_deploy(install_nodes) - if estatus_install != 0: - raise RuntimeError("ceph-deploy: Failed to install ceph") - # install ceph-test package too - install_nodes2 = './ceph-deploy install --tests ' + option + \ - " " + all_nodes - estatus_install = execute_ceph_deploy(install_nodes2) - if estatus_install != 0: - raise RuntimeError("ceph-deploy: Failed to install ceph-test") - - if mon_nodes is None: - raise RuntimeError("no monitor nodes in the config file") - - estatus_new = execute_ceph_deploy(new_mon) - if estatus_new != 0: - raise RuntimeError("ceph-deploy: new command failed") - - log.info('adding config inputs...') - testdir = teuthology.get_testdir(ctx) - conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) - - if config.get('conf') is not None: - confp = config.get('conf') - for section, keys in confp.iteritems(): - lines = '[{section}]\n'.format(section=section) - teuthology.append_lines_to_file(ceph_admin, conf_path, lines, - sudo=True) - for key, value in keys.iteritems(): - log.info("[%s] %s = %s" % (section, key, value)) - lines = '{key} = {value}\n'.format(key=key, value=value) - teuthology.append_lines_to_file(ceph_admin, conf_path, lines, - sudo=True) - - mon_create_nodes = './ceph-deploy mon create-initial' - # If the following fails, it is OK, it might just be that the monitors - # are taking way more than a minute/monitor to form quorum, so lets - # try the next block which will wait up to 15 minutes to gatherkeys. - execute_ceph_deploy(mon_create_nodes) - - estatus_gather = execute_ceph_deploy(gather_keys) - max_gather_tries = 90 - gather_tries = 0 - while (estatus_gather != 0): - gather_tries += 1 - if gather_tries >= max_gather_tries: - msg = 'ceph-deploy was not able to gatherkeys after 15 minutes' - raise RuntimeError(msg) - estatus_gather = execute_ceph_deploy(gather_keys) - time.sleep(10) - - if mds_nodes: - estatus_mds = execute_ceph_deploy(deploy_mds) - if estatus_mds != 0: - raise RuntimeError("ceph-deploy: Failed to deploy mds") - - if config.get('test_mon_destroy') is not None: - for d in range(1, len(mon_node)): - mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d] - estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) - if estatus_mon_d != 0: - raise RuntimeError("ceph-deploy: Failed to delete monitor") - - node_dev_list = get_dev_for_osd(ctx, config) - for d in node_dev_list: - node = d[0] - for disk in d[1:]: - zap = './ceph-deploy disk zap ' + node + ':' + disk - estatus = execute_ceph_deploy(zap) - if estatus != 0: - raise RuntimeError("ceph-deploy: Failed to zap osds") - osd_create_cmd = './ceph-deploy osd create ' - if config.get('dmcrypt') is not None: - osd_create_cmd += '--dmcrypt ' - osd_create_cmd += ":".join(d) - estatus_osd = execute_ceph_deploy(osd_create_cmd) - if estatus_osd == 0: - log.info('successfully created osd') - no_of_osds += 1 - else: - raise RuntimeError("ceph-deploy: Failed to create osds") - - if config.get('wait-for-healthy', True) and no_of_osds >= 2: - is_healthy(ctx=ctx, config=None) - - log.info('Setting up client nodes...') - conf_path = '/etc/ceph/ceph.conf' - admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' - first_mon = teuthology.get_first_mon(ctx, config) - (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() - conf_data = teuthology.get_file( - remote=mon0_remote, - path=conf_path, - sudo=True, - ) - admin_keyring = teuthology.get_file( - remote=mon0_remote, - path=admin_keyring_path, - sudo=True, - ) - - clients = ctx.cluster.only(teuthology.is_type('client')) - for remot, roles_for_host in clients.remotes.iteritems(): - for id_ in teuthology.roles_of_type(roles_for_host, 'client'): - client_keyring = \ - '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) - mon0_remote.run( - args=[ - 'cd', - '{tdir}'.format(tdir=testdir), - run.Raw('&&'), - 'sudo', 'bash', '-c', - run.Raw('"'), 'ceph', - 'auth', - 'get-or-create', - 'client.{id}'.format(id=id_), - 'mds', 'allow', - 'mon', 'allow *', - 'osd', 'allow *', - run.Raw('>'), - client_keyring, - run.Raw('"'), - ], - ) - key_data = teuthology.get_file( - remote=mon0_remote, - path=client_keyring, - sudo=True, - ) - teuthology.sudo_write_file( - remote=remot, - path=client_keyring, - data=key_data, - perms='0644' - ) - teuthology.sudo_write_file( - remote=remot, - path=admin_keyring_path, - data=admin_keyring, - perms='0644' - ) - teuthology.sudo_write_file( - remote=remot, - path=conf_path, - data=conf_data, - perms='0644' - ) - - log.info('Configuring CephFS...') - ceph_fs = Filesystem(ctx, admin_remote=clients.remotes.keys()[0]) - if not ceph_fs.legacy_configured(): - ceph_fs.create() - else: - raise RuntimeError( - "The cluster is NOT operational due to insufficient OSDs") - yield - - except Exception: - log.info("Error encountered, logging exception before tearing down ceph-deploy") - log.info(traceback.format_exc()) - raise - finally: - log.info('Stopping ceph...') - ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), - 'sudo', 'service', 'ceph', 'stop' ]) - - # Are you really not running anymore? - # try first with the init tooling - # ignoring the status so this becomes informational only - ctx.cluster.run(args=['sudo', 'status', 'ceph-all', run.Raw('||'), - 'sudo', 'service', 'ceph', 'status'], - check_status=False) - - # and now just check for the processes themselves, as if upstart/sysvinit - # is lying to us. Ignore errors if the grep fails - ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), - 'grep', '-v', 'grep', run.Raw('|'), - 'grep', 'ceph'], check_status=False) - - if ctx.archive is not None: - # archive mon data, too - log.info('Archiving mon data...') - path = os.path.join(ctx.archive, 'data') - os.makedirs(path) - mons = ctx.cluster.only(teuthology.is_type('mon')) - for remote, roles in mons.remotes.iteritems(): - for role in roles: - if role.startswith('mon.'): - teuthology.pull_directory_tarball( - remote, - '/var/lib/ceph/mon', - path + '/' + role + '.tgz') - - log.info('Compressing logs...') - run.wait( - ctx.cluster.run( - args=[ - 'sudo', - 'find', - '/var/log/ceph', - '-name', - '*.log', - '-print0', - run.Raw('|'), - 'sudo', - 'xargs', - '-0', - '--no-run-if-empty', - '--', - 'gzip', - '--', - ], - wait=False, - ), - ) - - log.info('Archiving logs...') - path = os.path.join(ctx.archive, 'remote') - os.makedirs(path) - for remote in ctx.cluster.remotes.iterkeys(): - sub = os.path.join(path, remote.shortname) - os.makedirs(sub) - teuthology.pull_directory(remote, '/var/log/ceph', - os.path.join(sub, 'log')) - - # Prevent these from being undefined if the try block fails - all_nodes = get_all_nodes(ctx, config) - purge_nodes = './ceph-deploy purge'+" "+all_nodes - purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes - - log.info('Purging package...') - execute_ceph_deploy(purge_nodes) - log.info('Purging data...') - execute_ceph_deploy(purgedata_nodes) - - -@contextlib.contextmanager -def task(ctx, config): - """ - Set up and tear down a Ceph cluster. - - For example:: - - tasks: - - install: - extras: yes - - ssh_keys: - - ceph-deploy: - branch: - stable: bobtail - mon_initial_members: 1 - - tasks: - - install: - extras: yes - - ssh_keys: - - ceph-deploy: - branch: - dev: master - conf: - mon: - debug mon = 20 - - tasks: - - install: - extras: yes - - ssh_keys: - - ceph-deploy: - branch: - testing: - dmcrypt: yes - separate_journal_disk: yes - - """ - if config is None: - config = {} - - overrides = ctx.config.get('overrides', {}) - teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) - - assert isinstance(config, dict), \ - "task ceph-deploy only supports a dictionary for configuration" - - overrides = ctx.config.get('overrides', {}) - teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) - - if config.get('branch') is not None: - assert isinstance(config['branch'], dict), 'branch must be a dictionary' - - log.info('task ceph-deploy with config ' + str(config)) - - with contextutil.nested( - lambda: install_fn.ship_utilities(ctx=ctx, config=None), - lambda: download_ceph_deploy(ctx=ctx, config=config), - lambda: build_ceph_cluster(ctx=ctx, config=dict( - conf=config.get('conf', {}), - branch=config.get('branch',{}), - dmcrypt=config.get('dmcrypt',None), - separate_journal_disk=config.get('separate_journal_disk',None), - mon_initial_members=config.get('mon_initial_members', None), - test_mon_destroy=config.get('test_mon_destroy', None), - )), - ): - yield diff --git a/tasks/ceph_fuse.py b/tasks/ceph_fuse.py deleted file mode 100644 index 78dafeda370..00000000000 --- a/tasks/ceph_fuse.py +++ /dev/null @@ -1,144 +0,0 @@ -""" -Ceph FUSE client task -""" - -import contextlib -import logging - -from teuthology import misc as teuthology -from cephfs.fuse_mount import FuseMount - -log = logging.getLogger(__name__) - - -def get_client_configs(ctx, config): - """ - Get a map of the configuration for each FUSE client in the configuration - by combining the configuration of the current task with any global overrides. - - :param ctx: Context instance - :param config: configuration for this task - :return: dict of client name to config or to None - """ - if config is None: - config = dict(('client.{id}'.format(id=id_), None) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')) - elif isinstance(config, list): - config = dict((name, None) for name in config) - - overrides = ctx.config.get('overrides', {}) - teuthology.deep_merge(config, overrides.get('ceph-fuse', {})) - - return config - - -@contextlib.contextmanager -def task(ctx, config): - """ - Mount/unmount a ``ceph-fuse`` client. - - The config is optional and defaults to mounting on all clients. If - a config is given, it is expected to be a list of clients to do - this operation on. This lets you e.g. set up one client with - ``ceph-fuse`` and another with ``kclient``. - - Example that mounts all clients:: - - tasks: - - ceph: - - ceph-fuse: - - interactive: - - Example that uses both ``kclient` and ``ceph-fuse``:: - - tasks: - - ceph: - - ceph-fuse: [client.0] - - kclient: [client.1] - - interactive: - - Example that enables valgrind: - - tasks: - - ceph: - - ceph-fuse: - client.0: - valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] - - interactive: - - Example that stops an already-mounted client: - - :: - - tasks: - - ceph: - - ceph-fuse: [client.0] - - ... do something that requires the FS mounted ... - - ceph-fuse: - client.0: - mounted: false - - ... do something that requires the FS unmounted ... - - Example that adds more generous wait time for mount (for virtual machines): - - tasks: - - ceph: - - ceph-fuse: - client.0: - mount_wait: 60 # default is 0, do not wait before checking /sys/ - mount_timeout: 120 # default is 30, give up if /sys/ is not populated - - interactive: - - :param ctx: Context - :param config: Configuration - """ - log.info('Mounting ceph-fuse clients...') - - testdir = teuthology.get_testdir(ctx) - config = get_client_configs(ctx, config) - - # List clients we will configure mounts for, default is all clients - clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys())) - - all_mounts = getattr(ctx, 'mounts', {}) - mounted_by_me = {} - - # Construct any new FuseMount instances - for id_, remote in clients: - client_config = config.get("client.%s" % id_) - if client_config is None: - client_config = {} - - if id_ not in all_mounts: - fuse_mount = FuseMount(client_config, testdir, id_, remote) - all_mounts[id_] = fuse_mount - else: - # Catch bad configs where someone has e.g. tried to use ceph-fuse and kcephfs for the same client - assert isinstance(all_mounts[id_], FuseMount) - - if client_config.get('mounted', True): - mounted_by_me[id_] = all_mounts[id_] - - # Mount any clients we have been asked to (default to mount all) - for mount in mounted_by_me.values(): - mount.mount() - - for mount in mounted_by_me.values(): - mount.wait_until_mounted() - - # Umount any pre-existing clients that we have not been asked to mount - for client_id in set(all_mounts.keys()) - set(mounted_by_me.keys()): - mount = all_mounts[client_id] - if mount.is_mounted(): - mount.umount_wait() - - ctx.mounts = all_mounts - try: - yield all_mounts - finally: - log.info('Unmounting ceph-fuse clients...') - - for mount in mounted_by_me.values(): - # Conditional because an inner context might have umounted it - if mount.is_mounted(): - mount.umount_wait() diff --git a/tasks/ceph_manager.py b/tasks/ceph_manager.py deleted file mode 100644 index d7f3c10afa6..00000000000 --- a/tasks/ceph_manager.py +++ /dev/null @@ -1,1990 +0,0 @@ -""" -ceph manager -- Thrasher and CephManager objects -""" -from cStringIO import StringIO -from functools import wraps -import contextlib -import random -import time -import gevent -import base64 -import json -import logging -import threading -import traceback -import os -from teuthology import misc as teuthology -from tasks.scrub import Scrubber -from util.rados import cmd_erasure_code_profile -from teuthology.orchestra.remote import Remote -from teuthology.orchestra import run - - -DEFAULT_CONF_PATH = '/etc/ceph/ceph.conf' - -log = logging.getLogger(__name__) - - -def write_conf(ctx, conf_path=DEFAULT_CONF_PATH): - conf_fp = StringIO() - ctx.ceph.conf.write(conf_fp) - conf_fp.seek(0) - writes = ctx.cluster.run( - args=[ - 'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'), - 'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'), - 'sudo', 'python', - '-c', - ('import shutil, sys; ' - 'shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))'), - conf_path, - run.Raw('&&'), - 'sudo', 'chmod', '0644', conf_path, - ], - stdin=run.PIPE, - wait=False) - teuthology.feed_many_stdins_and_close(conf_fp, writes) - run.wait(writes) - - -def make_admin_daemon_dir(ctx, remote): - """ - Create /var/run/ceph directory on remote site. - - :param ctx: Context - :param remote: Remote site - """ - remote.run(args=['sudo', - 'install', '-d', '-m0777', '--', '/var/run/ceph', ], ) - - -def mount_osd_data(ctx, remote, osd): - """ - Mount a remote OSD - - :param ctx: Context - :param remote: Remote site - :param ods: Osd name - """ - log.debug('Mounting data for osd.{o} on {r}'.format(o=osd, r=remote)) - if (remote in ctx.disk_config.remote_to_roles_to_dev and - osd in ctx.disk_config.remote_to_roles_to_dev[remote]): - dev = ctx.disk_config.remote_to_roles_to_dev[remote][osd] - mount_options = ctx.disk_config.\ - remote_to_roles_to_dev_mount_options[remote][osd] - fstype = ctx.disk_config.remote_to_roles_to_dev_fstype[remote][osd] - mnt = os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=osd)) - - log.info('Mounting osd.{o}: dev: {n}, ' - 'mountpoint: {p}, type: {t}, options: {v}'.format( - o=osd, n=remote.name, p=mnt, t=fstype, v=mount_options)) - - remote.run( - args=[ - 'sudo', - 'mount', - '-t', fstype, - '-o', ','.join(mount_options), - dev, - mnt, - ] - ) - - -class Thrasher: - """ - Object used to thrash Ceph - """ - def __init__(self, manager, config, logger=None): - self.ceph_manager = manager - self.ceph_manager.wait_for_clean() - osd_status = self.ceph_manager.get_osd_status() - self.in_osds = osd_status['in'] - self.live_osds = osd_status['live'] - self.out_osds = osd_status['out'] - self.dead_osds = osd_status['dead'] - self.stopping = False - self.logger = logger - self.config = config - self.revive_timeout = self.config.get("revive_timeout", 150) - if self.config.get('powercycle'): - self.revive_timeout += 120 - self.clean_wait = self.config.get('clean_wait', 0) - self.minin = self.config.get("min_in", 3) - self.chance_move_pg = self.config.get('chance_move_pg', 1.0) - self.dump_ops_enable = self.config.get('dump_ops_enable') - - num_osds = self.in_osds + self.out_osds - self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * num_osds - if self.logger is not None: - self.log = lambda x: self.logger.info(x) - else: - def tmp(x): - """ - Implement log behavior - """ - print x - self.log = tmp - if self.config is None: - self.config = dict() - # prevent monitor from auto-marking things out while thrasher runs - # try both old and new tell syntax, in case we are testing old code - try: - manager.raw_cluster_cmd('--', 'tell', 'mon.*', 'injectargs', - '--mon-osd-down-out-interval 0') - except Exception: - manager.raw_cluster_cmd('--', 'mon', 'tell', '*', 'injectargs', - '--mon-osd-down-out-interval 0') - self.thread = gevent.spawn(self.do_thrash) - if self.dump_ops_enable == "true": - self.dump_ops_thread = gevent.spawn(self.do_dump_ops) - if self.config.get('powercycle') or not self.cmd_exists_on_osds("ceph-objectstore-tool"): - self.ceph_objectstore_tool = False - self.test_rm_past_intervals = False - if self.config.get('powercycle'): - self.log("Unable to test ceph-objectstore-tool, " - "powercycle testing") - else: - self.log("Unable to test ceph-objectstore-tool, " - "not available on all OSD nodes") - else: - self.ceph_objectstore_tool = \ - self.config.get('ceph_objectstore_tool', True) - self.test_rm_past_intervals = \ - self.config.get('test_rm_past_intervals', True) - - def cmd_exists_on_osds(self, cmd): - allremotes = self.ceph_manager.ctx.cluster.only(\ - teuthology.is_type('osd')).remotes.keys() - allremotes = list(set(allremotes)) - for remote in allremotes: - proc = remote.run(args=['type', cmd], wait=True, - check_status=False, stdout=StringIO(), - stderr=StringIO()) - if proc.exitstatus != 0: - return False; - return True; - - def kill_osd(self, osd=None, mark_down=False, mark_out=False): - """ - :param osd: Osd to be killed. - :mark_down: Mark down if true. - :mark_out: Mark out if true. - """ - if osd is None: - osd = random.choice(self.live_osds) - self.log("Killing osd %s, live_osds are %s" % (str(osd), - str(self.live_osds))) - self.live_osds.remove(osd) - self.dead_osds.append(osd) - self.ceph_manager.kill_osd(osd) - if mark_down: - self.ceph_manager.mark_down_osd(osd) - if mark_out and osd in self.in_osds: - self.out_osd(osd) - if self.ceph_objectstore_tool: - self.log("Testing ceph-objectstore-tool on down osd") - (remote,) = self.ceph_manager.ctx.\ - cluster.only('osd.{o}'.format(o=osd)).remotes.iterkeys() - FSPATH = self.ceph_manager.get_filepath() - JPATH = os.path.join(FSPATH, "journal") - exp_osd = imp_osd = osd - exp_remote = imp_remote = remote - # If an older osd is available we'll move a pg from there - if (len(self.dead_osds) > 1 and - random.random() < self.chance_move_pg): - exp_osd = random.choice(self.dead_osds[:-1]) - (exp_remote,) = self.ceph_manager.ctx.\ - cluster.only('osd.{o}'.format(o=exp_osd)).\ - remotes.iterkeys() - if ('keyvaluestore_backend' in - self.ceph_manager.ctx.ceph.conf['osd']): - prefix = ("sudo adjust-ulimits ceph-objectstore-tool " - "--data-path {fpath} --journal-path {jpath} " - "--type keyvaluestore " - "--log-file=" - "/var/log/ceph/objectstore_tool.\\$pid.log ". - format(fpath=FSPATH, jpath=JPATH)) - else: - prefix = ("sudo adjust-ulimits ceph-objectstore-tool " - "--data-path {fpath} --journal-path {jpath} " - "--log-file=" - "/var/log/ceph/objectstore_tool.\\$pid.log ". - format(fpath=FSPATH, jpath=JPATH)) - cmd = (prefix + "--op list-pgs").format(id=exp_osd) - proc = exp_remote.run(args=cmd, wait=True, - check_status=False, stdout=StringIO()) - if proc.exitstatus: - raise Exception("ceph-objectstore-tool: " - "exp list-pgs failure with status {ret}". - format(ret=proc.exitstatus)) - pgs = proc.stdout.getvalue().split('\n')[:-1] - if len(pgs) == 0: - self.log("No PGs found for osd.{osd}".format(osd=exp_osd)) - return - pg = random.choice(pgs) - exp_path = teuthology.get_testdir(self.ceph_manager.ctx) - exp_path = os.path.join(exp_path, "data") - exp_path = os.path.join(exp_path, - "exp.{pg}.{id}".format(pg=pg, id=exp_osd)) - # export - cmd = prefix + "--op export --pgid {pg} --file {file}" - cmd = cmd.format(id=exp_osd, pg=pg, file=exp_path) - proc = exp_remote.run(args=cmd) - if proc.exitstatus: - raise Exception("ceph-objectstore-tool: " - "export failure with status {ret}". - format(ret=proc.exitstatus)) - # remove - cmd = prefix + "--op remove --pgid {pg}" - cmd = cmd.format(id=exp_osd, pg=pg) - proc = exp_remote.run(args=cmd) - if proc.exitstatus: - raise Exception("ceph-objectstore-tool: " - "remove failure with status {ret}". - format(ret=proc.exitstatus)) - # If there are at least 2 dead osds we might move the pg - if exp_osd != imp_osd: - # If pg isn't already on this osd, then we will move it there - cmd = (prefix + "--op list-pgs").format(id=imp_osd) - proc = imp_remote.run(args=cmd, wait=True, - check_status=False, stdout=StringIO()) - if proc.exitstatus: - raise Exception("ceph-objectstore-tool: " - "imp list-pgs failure with status {ret}". - format(ret=proc.exitstatus)) - pgs = proc.stdout.getvalue().split('\n')[:-1] - if pg not in pgs: - self.log("Moving pg {pg} from osd.{fosd} to osd.{tosd}". - format(pg=pg, fosd=exp_osd, tosd=imp_osd)) - if imp_remote != exp_remote: - # Copy export file to the other machine - self.log("Transfer export file from {srem} to {trem}". - format(srem=exp_remote, trem=imp_remote)) - tmpexport = Remote.get_file(exp_remote, exp_path) - Remote.put_file(imp_remote, tmpexport, exp_path) - os.remove(tmpexport) - else: - # Can't move the pg after all - imp_osd = exp_osd - imp_remote = exp_remote - # import - cmd = (prefix + "--op import --file {file}") - cmd = cmd.format(id=imp_osd, file=exp_path) - proc = imp_remote.run(args=cmd, wait=True, check_status=False) - if proc.exitstatus == 10: - self.log("Pool went away before processing an import" - "...ignored") - elif proc.exitstatus == 11: - self.log("Attempt to import an incompatible export" - "...ignored") - elif proc.exitstatus: - raise Exception("ceph-objectstore-tool: " - "import failure with status {ret}". - format(ret=proc.exitstatus)) - cmd = "rm -f {file}".format(file=exp_path) - exp_remote.run(args=cmd) - if imp_remote != exp_remote: - imp_remote.run(args=cmd) - - # apply low split settings to each pool - for pool in self.ceph_manager.list_pools(): - no_sudo_prefix = prefix[5:] - cmd = ("CEPH_ARGS='--filestore-merge-threshold 1 " - "--filestore-split-multiple 1' sudo -E " - + no_sudo_prefix + "--op apply-layout-settings --pool " + pool).format(id=osd) - proc = remote.run(args=cmd, wait=True, check_status=False, stderr=StringIO()) - output = proc.stderr.getvalue() - if 'Couldn\'t find pool' in output: - continue - if proc.exitstatus: - raise Exception("ceph-objectstore-tool apply-layout-settings" - " failed with {status}".format(status=proc.exitstatus)) - - def rm_past_intervals(self, osd=None): - """ - :param osd: Osd to find pg to remove past intervals - """ - if self.test_rm_past_intervals: - if osd is None: - osd = random.choice(self.dead_osds) - self.log("Use ceph_objectstore_tool to remove past intervals") - (remote,) = self.ceph_manager.ctx.\ - cluster.only('osd.{o}'.format(o=osd)).remotes.iterkeys() - FSPATH = self.ceph_manager.get_filepath() - JPATH = os.path.join(FSPATH, "journal") - if ('keyvaluestore_backend' in - self.ceph_manager.ctx.ceph.conf['osd']): - prefix = ("sudo adjust-ulimits ceph-objectstore-tool " - "--data-path {fpath} --journal-path {jpath} " - "--type keyvaluestore " - "--log-file=" - "/var/log/ceph/objectstore_tool.\\$pid.log ". - format(fpath=FSPATH, jpath=JPATH)) - else: - prefix = ("sudo adjust-ulimits ceph-objectstore-tool " - "--data-path {fpath} --journal-path {jpath} " - "--log-file=" - "/var/log/ceph/objectstore_tool.\\$pid.log ". - format(fpath=FSPATH, jpath=JPATH)) - cmd = (prefix + "--op list-pgs").format(id=osd) - proc = remote.run(args=cmd, wait=True, - check_status=False, stdout=StringIO()) - if proc.exitstatus: - raise Exception("ceph_objectstore_tool: " - "exp list-pgs failure with status {ret}". - format(ret=proc.exitstatus)) - pgs = proc.stdout.getvalue().split('\n')[:-1] - if len(pgs) == 0: - self.log("No PGs found for osd.{osd}".format(osd=osd)) - return - pg = random.choice(pgs) - cmd = (prefix + "--op rm-past-intervals --pgid {pg}").\ - format(id=osd, pg=pg) - proc = remote.run(args=cmd) - if proc.exitstatus: - raise Exception("ceph_objectstore_tool: " - "rm-past-intervals failure with status {ret}". - format(ret=proc.exitstatus)) - - def blackhole_kill_osd(self, osd=None): - """ - If all else fails, kill the osd. - :param osd: Osd to be killed. - """ - if osd is None: - osd = random.choice(self.live_osds) - self.log("Blackholing and then killing osd %s, live_osds are %s" % - (str(osd), str(self.live_osds))) - self.live_osds.remove(osd) - self.dead_osds.append(osd) - self.ceph_manager.blackhole_kill_osd(osd) - - def revive_osd(self, osd=None): - """ - Revive the osd. - :param osd: Osd to be revived. - """ - if osd is None: - osd = random.choice(self.dead_osds) - self.log("Reviving osd %s" % (str(osd),)) - self.live_osds.append(osd) - self.dead_osds.remove(osd) - self.ceph_manager.revive_osd(osd, self.revive_timeout) - - def out_osd(self, osd=None): - """ - Mark the osd out - :param osd: Osd to be marked. - """ - if osd is None: - osd = random.choice(self.in_osds) - self.log("Removing osd %s, in_osds are: %s" % - (str(osd), str(self.in_osds))) - self.ceph_manager.mark_out_osd(osd) - self.in_osds.remove(osd) - self.out_osds.append(osd) - - def in_osd(self, osd=None): - """ - Mark the osd out - :param osd: Osd to be marked. - """ - if osd is None: - osd = random.choice(self.out_osds) - if osd in self.dead_osds: - return self.revive_osd(osd) - self.log("Adding osd %s" % (str(osd),)) - self.out_osds.remove(osd) - self.in_osds.append(osd) - self.ceph_manager.mark_in_osd(osd) - self.log("Added osd %s" % (str(osd),)) - - def reweight_osd(self, osd=None): - """ - Reweight an osd that is in - :param osd: Osd to be marked. - """ - if osd is None: - osd = random.choice(self.in_osds) - val = random.uniform(.1, 1.0) - self.log("Reweighting osd %s to %s" % (str(osd), str(val))) - self.ceph_manager.raw_cluster_cmd('osd', 'reweight', - str(osd), str(val)) - - def primary_affinity(self, osd=None): - if osd is None: - osd = random.choice(self.in_osds) - if random.random() >= .5: - pa = random.random() - elif random.random() >= .5: - pa = 1 - else: - pa = 0 - self.log('Setting osd %s primary_affinity to %f' % (str(osd), pa)) - self.ceph_manager.raw_cluster_cmd('osd', 'primary-affinity', - str(osd), str(pa)) - - def all_up(self): - """ - Make sure all osds are up and not out. - """ - while len(self.dead_osds) > 0: - self.log("reviving osd") - self.revive_osd() - while len(self.out_osds) > 0: - self.log("inning osd") - self.in_osd() - - def do_join(self): - """ - Break out of this Ceph loop - """ - self.stopping = True - self.thread.get() - if self.dump_ops_enable == "true": - self.log("joining the do_dump_ops greenlet") - self.dump_ops_thread.join() - - def grow_pool(self): - """ - Increase the size of the pool - """ - pool = self.ceph_manager.get_pool() - self.log("Growing pool %s" % (pool,)) - self.ceph_manager.expand_pool(pool, - self.config.get('pool_grow_by', 10), - self.max_pgs) - - def fix_pgp_num(self): - """ - Fix number of pgs in pool. - """ - pool = self.ceph_manager.get_pool() - self.log("fixing pg num pool %s" % (pool,)) - self.ceph_manager.set_pool_pgpnum(pool) - - def test_pool_min_size(self): - """ - Kill and revive all osds except one. - """ - self.log("test_pool_min_size") - self.all_up() - self.ceph_manager.wait_for_recovery( - timeout=self.config.get('timeout') - ) - the_one = random.choice(self.in_osds) - self.log("Killing everyone but %s", the_one) - to_kill = filter(lambda x: x != the_one, self.in_osds) - [self.kill_osd(i) for i in to_kill] - [self.out_osd(i) for i in to_kill] - time.sleep(self.config.get("test_pool_min_size_time", 10)) - self.log("Killing %s" % (the_one,)) - self.kill_osd(the_one) - self.out_osd(the_one) - self.log("Reviving everyone but %s" % (the_one,)) - [self.revive_osd(i) for i in to_kill] - [self.in_osd(i) for i in to_kill] - self.log("Revived everyone but %s" % (the_one,)) - self.log("Waiting for clean") - self.ceph_manager.wait_for_recovery( - timeout=self.config.get('timeout') - ) - - def inject_pause(self, conf_key, duration, check_after, should_be_down): - """ - Pause injection testing. Check for osd being down when finished. - """ - the_one = random.choice(self.live_osds) - self.log("inject_pause on {osd}".format(osd=the_one)) - self.log( - "Testing {key} pause injection for duration {duration}".format( - key=conf_key, - duration=duration - )) - self.log( - "Checking after {after}, should_be_down={shouldbedown}".format( - after=check_after, - shouldbedown=should_be_down - )) - self.ceph_manager.set_config(the_one, **{conf_key: duration}) - if not should_be_down: - return - time.sleep(check_after) - status = self.ceph_manager.get_osd_status() - assert the_one in status['down'] - time.sleep(duration - check_after + 20) - status = self.ceph_manager.get_osd_status() - assert not the_one in status['down'] - - def test_backfill_full(self): - """ - Test backfills stopping when the replica fills up. - - First, use osd_backfill_full_ratio to simulate a now full - osd by setting it to 0 on all of the OSDs. - - Second, on a random subset, set - osd_debug_skip_full_check_in_backfill_reservation to force - the more complicated check in do_scan to be exercised. - - Then, verify that all backfills stop. - """ - self.log("injecting osd_backfill_full_ratio = 0") - for i in self.live_osds: - self.ceph_manager.set_config( - i, - osd_debug_skip_full_check_in_backfill_reservation= - random.choice(['false', 'true']), - osd_backfill_full_ratio=0) - for i in range(30): - status = self.ceph_manager.compile_pg_status() - if 'backfill' not in status.keys(): - break - self.log( - "waiting for {still_going} backfills".format( - still_going=status.get('backfill'))) - time.sleep(1) - assert('backfill' not in self.ceph_manager.compile_pg_status().keys()) - for i in self.live_osds: - self.ceph_manager.set_config( - i, - osd_debug_skip_full_check_in_backfill_reservation='false', - osd_backfill_full_ratio=0.85) - - def test_map_discontinuity(self): - """ - 1) Allows the osds to recover - 2) kills an osd - 3) allows the remaining osds to recover - 4) waits for some time - 5) revives the osd - This sequence should cause the revived osd to have to handle - a map gap since the mons would have trimmed - """ - while len(self.in_osds) < (self.minin + 1): - self.in_osd() - self.log("Waiting for recovery") - self.ceph_manager.wait_for_all_up( - timeout=self.config.get('timeout') - ) - # now we wait 20s for the pg status to change, if it takes longer, - # the test *should* fail! - time.sleep(20) - self.ceph_manager.wait_for_clean( - timeout=self.config.get('timeout') - ) - - # now we wait 20s for the backfill replicas to hear about the clean - time.sleep(20) - self.log("Recovered, killing an osd") - self.kill_osd(mark_down=True, mark_out=True) - self.log("Waiting for clean again") - self.ceph_manager.wait_for_clean( - timeout=self.config.get('timeout') - ) - self.log("Waiting for trim") - time.sleep(int(self.config.get("map_discontinuity_sleep_time", 40))) - self.revive_osd() - - def choose_action(self): - """ - Random action selector. - """ - chance_down = self.config.get('chance_down', 0.4) - chance_test_min_size = self.config.get('chance_test_min_size', 0) - chance_test_backfill_full = \ - self.config.get('chance_test_backfill_full', 0) - if isinstance(chance_down, int): - chance_down = float(chance_down) / 100 - minin = self.minin - minout = self.config.get("min_out", 0) - minlive = self.config.get("min_live", 2) - mindead = self.config.get("min_dead", 0) - - self.log('choose_action: min_in %d min_out ' - '%d min_live %d min_dead %d' % - (minin, minout, minlive, mindead)) - actions = [] - if len(self.in_osds) > minin: - actions.append((self.out_osd, 1.0,)) - if len(self.live_osds) > minlive and chance_down > 0: - actions.append((self.kill_osd, chance_down,)) - if len(self.dead_osds) > 1: - actions.append((self.rm_past_intervals, 1.0,)) - if len(self.out_osds) > minout: - actions.append((self.in_osd, 1.7,)) - if len(self.dead_osds) > mindead: - actions.append((self.revive_osd, 1.0,)) - if self.config.get('thrash_primary_affinity', True): - actions.append((self.primary_affinity, 1.0,)) - actions.append((self.reweight_osd, - self.config.get('reweight_osd', .5),)) - actions.append((self.grow_pool, - self.config.get('chance_pgnum_grow', 0),)) - actions.append((self.fix_pgp_num, - self.config.get('chance_pgpnum_fix', 0),)) - actions.append((self.test_pool_min_size, - chance_test_min_size,)) - actions.append((self.test_backfill_full, - chance_test_backfill_full,)) - for key in ['heartbeat_inject_failure', 'filestore_inject_stall']: - for scenario in [ - (lambda: - self.inject_pause(key, - self.config.get('pause_short', 3), - 0, - False), - self.config.get('chance_inject_pause_short', 1),), - (lambda: - self.inject_pause(key, - self.config.get('pause_long', 80), - self.config.get('pause_check_after', 70), - True), - self.config.get('chance_inject_pause_long', 0),)]: - actions.append(scenario) - - total = sum([y for (x, y) in actions]) - val = random.uniform(0, total) - for (action, prob) in actions: - if val < prob: - return action - val -= prob - return None - - def log_exc(func): - @wraps(func) - def wrapper(self): - try: - return func(self) - except: - self.log(traceback.format_exc()) - raise - return wrapper - - @log_exc - def do_dump_ops(self): - """ - Loops and does op dumps on all osds - """ - self.log("starting do_dump_ops") - while not self.stopping: - for osd in self.live_osds: - # Ignore errors because live_osds is in flux - self.ceph_manager.osd_admin_socket(osd, command=['dump_ops_in_flight'], - check_status=False, timeout=30) - self.ceph_manager.osd_admin_socket(osd, command=['dump_blocked_ops'], - check_status=False, timeout=30) - self.ceph_manager.osd_admin_socket(osd, command=['dump_historic_ops'], - check_status=False, timeout=30) - gevent.sleep(0) - - @log_exc - def do_thrash(self): - """ - Loop to select random actions to thrash ceph manager with. - """ - cleanint = self.config.get("clean_interval", 60) - scrubint = self.config.get("scrub_interval", -1) - maxdead = self.config.get("max_dead", 0) - delay = self.config.get("op_delay", 5) - self.log("starting do_thrash") - while not self.stopping: - to_log = [str(x) for x in ["in_osds: ", self.in_osds, - "out_osds: ", self.out_osds, - "dead_osds: ", self.dead_osds, - "live_osds: ", self.live_osds]] - self.log(" ".join(to_log)) - if random.uniform(0, 1) < (float(delay) / cleanint): - while len(self.dead_osds) > maxdead: - self.revive_osd() - for osd in self.in_osds: - self.ceph_manager.raw_cluster_cmd('osd', 'reweight', - str(osd), str(1)) - if random.uniform(0, 1) < float( - self.config.get('chance_test_map_discontinuity', 0)): - self.test_map_discontinuity() - else: - self.ceph_manager.wait_for_recovery( - timeout=self.config.get('timeout') - ) - time.sleep(self.clean_wait) - if scrubint > 0: - if random.uniform(0, 1) < (float(delay) / scrubint): - self.log('Scrubbing while thrashing being performed') - Scrubber(self.ceph_manager, self.config) - self.choose_action()() - time.sleep(delay) - self.all_up() - - -class ObjectStoreTool: - - def __init__(self, manager, pool, **kwargs): - self.manager = manager - self.pool = pool - self.osd = kwargs.get('osd', None) - self.object_name = kwargs.get('object_name', None) - if self.osd and self.pool and self.object_name: - if self.osd == "primary": - self.osd = self.manager.get_object_primary(self.pool, - self.object_name) - assert self.osd - if self.object_name: - self.pgid = self.manager.get_object_pg_with_shard(self.pool, - self.object_name, - self.osd) - self.remote = self.manager.ctx.\ - cluster.only('osd.{o}'.format(o=self.osd)).remotes.keys()[0] - path = self.manager.get_filepath().format(id=self.osd) - self.paths = ("--data-path {path} --journal-path {path}/journal". - format(path=path)) - - def build_cmd(self, options, args, stdin): - lines = [] - if self.object_name: - lines.append("object=$(sudo adjust-ulimits ceph-objectstore-tool " - "{paths} --pgid {pgid} --op list |" - "grep '\"oid\":\"{name}\"')". - format(paths=self.paths, - pgid=self.pgid, - name=self.object_name)) - args = '"$object" ' + args - options += " --pgid {pgid}".format(pgid=self.pgid) - cmd = ("sudo adjust-ulimits ceph-objectstore-tool {paths} {options} {args}". - format(paths=self.paths, - args=args, - options=options)) - if stdin: - cmd = ("echo {payload} | base64 --decode | {cmd}". - format(payload=base64.encode(stdin), - cmd=cmd)) - lines.append(cmd) - return "\n".join(lines) - - def run(self, options, args, stdin=None): - self.manager.kill_osd(self.osd) - cmd = self.build_cmd(options, args, stdin) - self.manager.log(cmd) - try: - proc = self.remote.run(args=['bash', '-e', '-x', '-c', cmd], - check_status=False, - stdout=StringIO(), - stderr=StringIO()) - proc.wait() - if proc.exitstatus != 0: - self.manager.log("failed with " + str(proc.exitstatus)) - error = proc.stdout.getvalue() + " " + proc.stderr.getvalue() - raise Exception(error) - finally: - self.manager.revive_osd(self.osd) - - -class CephManager: - """ - Ceph manager object. - Contains several local functions that form a bulk of this module. - """ - - REPLICATED_POOL = 1 - ERASURE_CODED_POOL = 3 - - def __init__(self, controller, ctx=None, config=None, logger=None): - self.lock = threading.RLock() - self.ctx = ctx - self.config = config - self.controller = controller - self.next_pool_id = 0 - if (logger): - self.log = lambda x: logger.info(x) - else: - def tmp(x): - """ - implement log behavior. - """ - print x - self.log = tmp - if self.config is None: - self.config = dict() - pools = self.list_pools() - self.pools = {} - for pool in pools: - self.pools[pool] = self.get_pool_property(pool, 'pg_num') - - def raw_cluster_cmd(self, *args): - """ - Start ceph on a raw cluster. Return count - """ - testdir = teuthology.get_testdir(self.ctx) - ceph_args = [ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'ceph', - ] - ceph_args.extend(args) - proc = self.controller.run( - args=ceph_args, - stdout=StringIO(), - ) - return proc.stdout.getvalue() - - def raw_cluster_cmd_result(self, *args): - """ - Start ceph on a cluster. Return success or failure information. - """ - testdir = teuthology.get_testdir(self.ctx) - ceph_args = [ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'ceph', - ] - ceph_args.extend(args) - proc = self.controller.run( - args=ceph_args, - check_status=False, - ) - return proc.exitstatus - - def do_rados(self, remote, cmd): - """ - Execute a remote rados command. - """ - testdir = teuthology.get_testdir(self.ctx) - pre = [ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'rados', - ] - pre.extend(cmd) - proc = remote.run( - args=pre, - wait=True, - ) - return proc - - def rados_write_objects(self, pool, num_objects, size, - timelimit, threads, cleanup=False): - """ - Write rados objects - Threads not used yet. - """ - args = [ - '-p', pool, - '--num-objects', num_objects, - '-b', size, - 'bench', timelimit, - 'write' - ] - if not cleanup: - args.append('--no-cleanup') - return self.do_rados(self.controller, map(str, args)) - - def do_put(self, pool, obj, fname): - """ - Implement rados put operation - """ - return self.do_rados( - self.controller, - [ - '-p', - pool, - 'put', - obj, - fname - ] - ) - - def do_get(self, pool, obj, fname='/dev/null'): - """ - Implement rados get operation - """ - return self.do_rados( - self.controller, - [ - '-p', - pool, - 'stat', - obj, - fname - ] - ) - - def osd_admin_socket(self, osd_id, command, check_status=True, timeout=0): - return self.admin_socket('osd', osd_id, command, check_status, timeout) - - def find_remote(self, service_type, service_id): - """ - Get the Remote for the host where a particular service runs. - - :param service_type: 'mds', 'osd', 'client' - :param service_id: The second part of a role, e.g. '0' for - the role 'client.0' - :return: a Remote instance for the host where the - requested role is placed - """ - for _remote, roles_for_host in self.ctx.cluster.remotes.iteritems(): - for id_ in teuthology.roles_of_type(roles_for_host, service_type): - if id_ == str(service_id): - return _remote - - raise KeyError("Service {0}.{1} not found".format(service_type, - service_id)) - - def admin_socket(self, service_type, service_id, - command, check_status=True, timeout=0): - """ - Remotely start up ceph specifying the admin socket - :param command: a list of words to use as the command - to the admin socket - """ - testdir = teuthology.get_testdir(self.ctx) - remote = self.find_remote(service_type, service_id) - args = [ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'timeout', - str(timeout), - 'ceph', - '--admin-daemon', - '/var/run/ceph/ceph-{type}.{id}.asok'.format( - type=service_type, - id=service_id), - ] - args.extend(command) - return remote.run( - args=args, - stdout=StringIO(), - wait=True, - check_status=check_status - ) - - def objectstore_tool(self, pool, options, args, **kwargs): - return ObjectStoreTool(self, pool, **kwargs).run(options, args) - - def get_pgid(self, pool, pgnum): - """ - :param pool: pool name - :param pgnum: pg number - :returns: a string representing this pg. - """ - poolnum = self.get_pool_num(pool) - pg_str = "{poolnum}.{pgnum}".format( - poolnum=poolnum, - pgnum=pgnum) - return pg_str - - def get_pg_replica(self, pool, pgnum): - """ - get replica for pool, pgnum (e.g. (data, 0)->0 - """ - output = self.raw_cluster_cmd("pg", "dump", '--format=json') - j = json.loads('\n'.join(output.split('\n')[1:])) - pg_str = self.get_pgid(pool, pgnum) - for pg in j['pg_stats']: - if pg['pgid'] == pg_str: - return int(pg['acting'][-1]) - assert False - - def get_pg_primary(self, pool, pgnum): - """ - get primary for pool, pgnum (e.g. (data, 0)->0 - """ - output = self.raw_cluster_cmd("pg", "dump", '--format=json') - j = json.loads('\n'.join(output.split('\n')[1:])) - pg_str = self.get_pgid(pool, pgnum) - for pg in j['pg_stats']: - if pg['pgid'] == pg_str: - return int(pg['acting'][0]) - assert False - - def get_pool_num(self, pool): - """ - get number for pool (e.g., data -> 2) - """ - return int(self.get_pool_dump(pool)['pool']) - - def list_pools(self): - """ - list all pool names - """ - osd_dump = self.get_osd_dump_json() - self.log(osd_dump['pools']) - return [str(i['pool_name']) for i in osd_dump['pools']] - - def clear_pools(self): - """ - remove all pools - """ - [self.remove_pool(i) for i in self.list_pools()] - - def kick_recovery_wq(self, osdnum): - """ - Run kick_recovery_wq on cluster. - """ - return self.raw_cluster_cmd( - 'tell', "osd.%d" % (int(osdnum),), - 'debug', - 'kick_recovery_wq', - '0') - - def wait_run_admin_socket(self, service_type, - service_id, args=['version'], timeout=75): - """ - If osd_admin_socket call suceeds, return. Otherwise wait - five seconds and try again. - """ - tries = 0 - while True: - proc = self.admin_socket(service_type, service_id, - args, check_status=False) - if proc.exitstatus is 0: - break - else: - tries += 1 - if (tries * 5) > timeout: - raise Exception('timed out waiting for admin_socket ' - 'to appear after {type}.{id} restart'. - format(type=service_type, - id=service_id)) - self.log("waiting on admin_socket for {type}-{id}, " - "{command}".format(type=service_type, - id=service_id, - command=args)) - time.sleep(5) - - def get_pool_dump(self, pool): - """ - get the osd dump part of a pool - """ - osd_dump = self.get_osd_dump_json() - for i in osd_dump['pools']: - if i['pool_name'] == pool: - return i - assert False - - def set_config(self, osdnum, **argdict): - """ - :param osdnum: osd number - :param argdict: dictionary containing values to set. - """ - for k, v in argdict.iteritems(): - self.wait_run_admin_socket( - 'osd', osdnum, - ['config', 'set', str(k), str(v)]) - - def raw_cluster_status(self): - """ - Get status from cluster - """ - status = self.raw_cluster_cmd('status', '--format=json-pretty') - return json.loads(status) - - def raw_osd_status(self): - """ - Get osd status from cluster - """ - return self.raw_cluster_cmd('osd', 'dump') - - def get_osd_status(self): - """ - Get osd statuses sorted by states that the osds are in. - """ - osd_lines = filter( - lambda x: x.startswith('osd.') and (("up" in x) or ("down" in x)), - self.raw_osd_status().split('\n')) - self.log(osd_lines) - in_osds = [int(i[4:].split()[0]) - for i in filter(lambda x: " in " in x, osd_lines)] - out_osds = [int(i[4:].split()[0]) - for i in filter(lambda x: " out " in x, osd_lines)] - up_osds = [int(i[4:].split()[0]) - for i in filter(lambda x: " up " in x, osd_lines)] - down_osds = [int(i[4:].split()[0]) - for i in filter(lambda x: " down " in x, osd_lines)] - dead_osds = [int(x.id_) - for x in filter(lambda x: - not x.running(), - self.ctx.daemons. - iter_daemons_of_role('osd'))] - live_osds = [int(x.id_) for x in - filter(lambda x: - x.running(), - self.ctx.daemons.iter_daemons_of_role('osd'))] - return {'in': in_osds, 'out': out_osds, 'up': up_osds, - 'down': down_osds, 'dead': dead_osds, 'live': live_osds, - 'raw': osd_lines} - - def get_num_pgs(self): - """ - Check cluster status for the number of pgs - """ - status = self.raw_cluster_status() - self.log(status) - return status['pgmap']['num_pgs'] - - def create_erasure_code_profile(self, profile_name, profile): - """ - Create an erasure code profile name that can be used as a parameter - when creating an erasure coded pool. - """ - with self.lock: - args = cmd_erasure_code_profile(profile_name, profile) - self.raw_cluster_cmd(*args) - - def create_pool_with_unique_name(self, pg_num=16, - erasure_code_profile_name=None): - """ - Create a pool named unique_pool_X where X is unique. - """ - name = "" - with self.lock: - name = "unique_pool_%s" % (str(self.next_pool_id),) - self.next_pool_id += 1 - self.create_pool( - name, - pg_num, - erasure_code_profile_name=erasure_code_profile_name) - return name - - @contextlib.contextmanager - def pool(self, pool_name, pg_num=16, erasure_code_profile_name=None): - self.create_pool(pool_name, pg_num, erasure_code_profile_name) - yield - self.remove_pool(pool_name) - - def create_pool(self, pool_name, pg_num=16, - erasure_code_profile_name=None): - """ - Create a pool named from the pool_name parameter. - :param pool_name: name of the pool being created. - :param pg_num: initial number of pgs. - :param erasure_code_profile_name: if set and !None create an - erasure coded pool using the profile - """ - with self.lock: - assert isinstance(pool_name, str) - assert isinstance(pg_num, int) - assert pool_name not in self.pools - self.log("creating pool_name %s" % (pool_name,)) - if erasure_code_profile_name: - self.raw_cluster_cmd('osd', 'pool', 'create', - pool_name, str(pg_num), str(pg_num), - 'erasure', erasure_code_profile_name) - else: - self.raw_cluster_cmd('osd', 'pool', 'create', - pool_name, str(pg_num)) - self.pools[pool_name] = pg_num - - def remove_pool(self, pool_name): - """ - Remove the indicated pool - :param pool_name: Pool to be removed - """ - with self.lock: - assert isinstance(pool_name, str) - assert pool_name in self.pools - self.log("removing pool_name %s" % (pool_name,)) - del self.pools[pool_name] - self.do_rados(self.controller, - ['rmpool', pool_name, pool_name, - "--yes-i-really-really-mean-it"]) - - def get_pool(self): - """ - Pick a random pool - """ - with self.lock: - return random.choice(self.pools.keys()) - - def get_pool_pg_num(self, pool_name): - """ - Return the number of pgs in the pool specified. - """ - with self.lock: - assert isinstance(pool_name, str) - if pool_name in self.pools: - return self.pools[pool_name] - return 0 - - def get_pool_property(self, pool_name, prop): - """ - :param pool_name: pool - :param prop: property to be checked. - :returns: property as an int value. - """ - with self.lock: - assert isinstance(pool_name, str) - assert isinstance(prop, str) - output = self.raw_cluster_cmd( - 'osd', - 'pool', - 'get', - pool_name, - prop) - return int(output.split()[1]) - - def set_pool_property(self, pool_name, prop, val): - """ - :param pool_name: pool - :param prop: property to be set. - :param val: value to set. - - This routine retries if set operation fails. - """ - with self.lock: - assert isinstance(pool_name, str) - assert isinstance(prop, str) - assert isinstance(val, int) - tries = 0 - while True: - r = self.raw_cluster_cmd_result( - 'osd', - 'pool', - 'set', - pool_name, - prop, - str(val)) - if r != 11: # EAGAIN - break - tries += 1 - if tries > 50: - raise Exception('timed out getting EAGAIN ' - 'when setting pool property %s %s = %s' % - (pool_name, prop, val)) - self.log('got EAGAIN setting pool property, ' - 'waiting a few seconds...') - time.sleep(2) - - def expand_pool(self, pool_name, by, max_pgs): - """ - Increase the number of pgs in a pool - """ - with self.lock: - assert isinstance(pool_name, str) - assert isinstance(by, int) - assert pool_name in self.pools - if self.get_num_creating() > 0: - return - if (self.pools[pool_name] + by) > max_pgs: - return - self.log("increase pool size by %d" % (by,)) - new_pg_num = self.pools[pool_name] + by - self.set_pool_property(pool_name, "pg_num", new_pg_num) - self.pools[pool_name] = new_pg_num - - def set_pool_pgpnum(self, pool_name): - """ - Set pgpnum property of pool_name pool. - """ - with self.lock: - assert isinstance(pool_name, str) - assert pool_name in self.pools - if self.get_num_creating() > 0: - return - self.set_pool_property(pool_name, 'pgp_num', self.pools[pool_name]) - - def list_pg_missing(self, pgid): - """ - return list of missing pgs with the id specified - """ - r = None - offset = {} - while True: - out = self.raw_cluster_cmd('--', 'pg', pgid, 'list_missing', - json.dumps(offset)) - j = json.loads(out) - if r is None: - r = j - else: - r['objects'].extend(j['objects']) - if not 'more' in j: - break - if j['more'] == 0: - break - offset = j['objects'][-1]['oid'] - if 'more' in r: - del r['more'] - return r - - def get_pg_stats(self): - """ - Dump the cluster and get pg stats - """ - out = self.raw_cluster_cmd('pg', 'dump', '--format=json') - j = json.loads('\n'.join(out.split('\n')[1:])) - return j['pg_stats'] - - def compile_pg_status(self): - """ - Return a histogram of pg state values - """ - ret = {} - j = self.get_pg_stats() - for pg in j: - for status in pg['state'].split('+'): - if status not in ret: - ret[status] = 0 - ret[status] += 1 - return ret - - def pg_scrubbing(self, pool, pgnum): - """ - pg scrubbing wrapper - """ - pgstr = self.get_pgid(pool, pgnum) - stats = self.get_single_pg_stats(pgstr) - return 'scrub' in stats['state'] - - def pg_repairing(self, pool, pgnum): - """ - pg repairing wrapper - """ - pgstr = self.get_pgid(pool, pgnum) - stats = self.get_single_pg_stats(pgstr) - return 'repair' in stats['state'] - - def pg_inconsistent(self, pool, pgnum): - """ - pg inconsistent wrapper - """ - pgstr = self.get_pgid(pool, pgnum) - stats = self.get_single_pg_stats(pgstr) - return 'inconsistent' in stats['state'] - - def get_last_scrub_stamp(self, pool, pgnum): - """ - Get the timestamp of the last scrub. - """ - stats = self.get_single_pg_stats(self.get_pgid(pool, pgnum)) - return stats["last_scrub_stamp"] - - def do_pg_scrub(self, pool, pgnum, stype): - """ - Scrub pg and wait for scrubbing to finish - """ - init = self.get_last_scrub_stamp(pool, pgnum) - while init == self.get_last_scrub_stamp(pool, pgnum): - self.log("waiting for scrub type %s" % (stype,)) - self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum)) - time.sleep(10) - - def get_single_pg_stats(self, pgid): - """ - Return pg for the pgid specified. - """ - all_stats = self.get_pg_stats() - - for pg in all_stats: - if pg['pgid'] == pgid: - return pg - - return None - - def get_object_pg_with_shard(self, pool, name, osdid): - """ - """ - pool_dump = self.get_pool_dump(pool) - object_map = self.get_object_map(pool, name) - if pool_dump["type"] == CephManager.ERASURE_CODED_POOL: - shard = object_map['acting'].index(osdid) - return "{pgid}s{shard}".format(pgid=object_map['pgid'], - shard=shard) - else: - return object_map['pgid'] - - def get_object_primary(self, pool, name): - """ - """ - object_map = self.get_object_map(pool, name) - return object_map['acting_primary'] - - def get_object_map(self, pool, name): - """ - osd map --format=json converted to a python object - :returns: the python object - """ - out = self.raw_cluster_cmd('--format=json', 'osd', 'map', pool, name) - return json.loads('\n'.join(out.split('\n')[1:])) - - def get_osd_dump_json(self): - """ - osd dump --format=json converted to a python object - :returns: the python object - """ - out = self.raw_cluster_cmd('osd', 'dump', '--format=json') - return json.loads('\n'.join(out.split('\n')[1:])) - - def get_osd_dump(self): - """ - Dump osds - :returns: all osds - """ - return self.get_osd_dump_json()['osds'] - - def get_stuck_pgs(self, type_, threshold): - """ - :returns: stuck pg information from the cluster - """ - out = self.raw_cluster_cmd('pg', 'dump_stuck', type_, str(threshold), - '--format=json') - return json.loads(out) - - def get_num_unfound_objects(self): - """ - Check cluster status to get the number of unfound objects - """ - status = self.raw_cluster_status() - self.log(status) - return status['pgmap'].get('unfound_objects', 0) - - def get_num_creating(self): - """ - Find the number of pgs in creating mode. - """ - pgs = self.get_pg_stats() - num = 0 - for pg in pgs: - if 'creating' in pg['state']: - num += 1 - return num - - def get_num_active_clean(self): - """ - Find the number of active and clean pgs. - """ - pgs = self.get_pg_stats() - num = 0 - for pg in pgs: - if (pg['state'].count('active') and - pg['state'].count('clean') and - not pg['state'].count('stale')): - num += 1 - return num - - def get_num_active_recovered(self): - """ - Find the number of active and recovered pgs. - """ - pgs = self.get_pg_stats() - num = 0 - for pg in pgs: - if (pg['state'].count('active') and - not pg['state'].count('recover') and - not pg['state'].count('backfill') and - not pg['state'].count('stale')): - num += 1 - return num - - def get_is_making_recovery_progress(self): - """ - Return whether there is recovery progress discernable in the - raw cluster status - """ - status = self.raw_cluster_status() - kps = status['pgmap'].get('recovering_keys_per_sec', 0) - bps = status['pgmap'].get('recovering_bytes_per_sec', 0) - ops = status['pgmap'].get('recovering_objects_per_sec', 0) - return kps > 0 or bps > 0 or ops > 0 - - def get_num_active(self): - """ - Find the number of active pgs. - """ - pgs = self.get_pg_stats() - num = 0 - for pg in pgs: - if pg['state'].count('active') and not pg['state'].count('stale'): - num += 1 - return num - - def get_num_down(self): - """ - Find the number of pgs that are down. - """ - pgs = self.get_pg_stats() - num = 0 - for pg in pgs: - if ((pg['state'].count('down') and not - pg['state'].count('stale')) or - (pg['state'].count('incomplete') and not - pg['state'].count('stale'))): - num += 1 - return num - - def get_num_active_down(self): - """ - Find the number of pgs that are either active or down. - """ - pgs = self.get_pg_stats() - num = 0 - for pg in pgs: - if ((pg['state'].count('active') and not - pg['state'].count('stale')) or - (pg['state'].count('down') and not - pg['state'].count('stale')) or - (pg['state'].count('incomplete') and not - pg['state'].count('stale'))): - num += 1 - return num - - def is_clean(self): - """ - True if all pgs are clean - """ - return self.get_num_active_clean() == self.get_num_pgs() - - def is_recovered(self): - """ - True if all pgs have recovered - """ - return self.get_num_active_recovered() == self.get_num_pgs() - - def is_active_or_down(self): - """ - True if all pgs are active or down - """ - return self.get_num_active_down() == self.get_num_pgs() - - def wait_for_clean(self, timeout=None): - """ - Returns true when all pgs are clean. - """ - self.log("waiting for clean") - start = time.time() - num_active_clean = self.get_num_active_clean() - while not self.is_clean(): - if timeout is not None: - if self.get_is_making_recovery_progress(): - self.log("making progress, resetting timeout") - start = time.time() - else: - self.log("no progress seen, keeping timeout for now") - if time.time() - start >= timeout: - self.log('dumping pgs') - out = self.raw_cluster_cmd('pg', 'dump') - self.log(out) - assert time.time() - start < timeout, \ - 'failed to become clean before timeout expired' - cur_active_clean = self.get_num_active_clean() - if cur_active_clean != num_active_clean: - start = time.time() - num_active_clean = cur_active_clean - time.sleep(3) - self.log("clean!") - - def are_all_osds_up(self): - """ - Returns true if all osds are up. - """ - x = self.get_osd_dump() - return (len(x) == sum([(y['up'] > 0) for y in x])) - - def wait_for_all_up(self, timeout=None): - """ - When this exits, either the timeout has expired, or all - osds are up. - """ - self.log("waiting for all up") - start = time.time() - while not self.are_all_osds_up(): - if timeout is not None: - assert time.time() - start < timeout, \ - 'timeout expired in wait_for_all_up' - time.sleep(3) - self.log("all up!") - - def wait_for_recovery(self, timeout=None): - """ - Check peering. When this exists, we have recovered. - """ - self.log("waiting for recovery to complete") - start = time.time() - num_active_recovered = self.get_num_active_recovered() - while not self.is_recovered(): - now = time.time() - if timeout is not None: - if self.get_is_making_recovery_progress(): - self.log("making progress, resetting timeout") - start = time.time() - else: - self.log("no progress seen, keeping timeout for now") - if now - start >= timeout: - self.log('dumping pgs') - out = self.raw_cluster_cmd('pg', 'dump') - self.log(out) - assert now - start < timeout, \ - 'failed to recover before timeout expired' - cur_active_recovered = self.get_num_active_recovered() - if cur_active_recovered != num_active_recovered: - start = time.time() - num_active_recovered = cur_active_recovered - time.sleep(3) - self.log("recovered!") - - def wait_for_active(self, timeout=None): - """ - Check peering. When this exists, we are definitely active - """ - self.log("waiting for peering to complete") - start = time.time() - num_active = self.get_num_active() - while not self.is_active(): - if timeout is not None: - if time.time() - start >= timeout: - self.log('dumping pgs') - out = self.raw_cluster_cmd('pg', 'dump') - self.log(out) - assert time.time() - start < timeout, \ - 'failed to recover before timeout expired' - cur_active = self.get_num_active() - if cur_active != num_active: - start = time.time() - num_active = cur_active - time.sleep(3) - self.log("active!") - - def wait_for_active_or_down(self, timeout=None): - """ - Check peering. When this exists, we are definitely either - active or down - """ - self.log("waiting for peering to complete or become blocked") - start = time.time() - num_active_down = self.get_num_active_down() - while not self.is_active_or_down(): - if timeout is not None: - if time.time() - start >= timeout: - self.log('dumping pgs') - out = self.raw_cluster_cmd('pg', 'dump') - self.log(out) - assert time.time() - start < timeout, \ - 'failed to recover before timeout expired' - cur_active_down = self.get_num_active_down() - if cur_active_down != num_active_down: - start = time.time() - num_active_down = cur_active_down - time.sleep(3) - self.log("active or down!") - - def osd_is_up(self, osd): - """ - Wrapper for osd check - """ - osds = self.get_osd_dump() - return osds[osd]['up'] > 0 - - def wait_till_osd_is_up(self, osd, timeout=None): - """ - Loop waiting for osd. - """ - self.log('waiting for osd.%d to be up' % osd) - start = time.time() - while not self.osd_is_up(osd): - if timeout is not None: - assert time.time() - start < timeout, \ - 'osd.%d failed to come up before timeout expired' % osd - time.sleep(3) - self.log('osd.%d is up' % osd) - - def is_active(self): - """ - Wrapper to check if all pgs are active - """ - return self.get_num_active() == self.get_num_pgs() - - def wait_till_active(self, timeout=None): - """ - Wait until all pgs are active. - """ - self.log("waiting till active") - start = time.time() - while not self.is_active(): - if timeout is not None: - if time.time() - start >= timeout: - self.log('dumping pgs') - out = self.raw_cluster_cmd('pg', 'dump') - self.log(out) - assert time.time() - start < timeout, \ - 'failed to become active before timeout expired' - time.sleep(3) - self.log("active!") - - def mark_out_osd(self, osd): - """ - Wrapper to mark osd out. - """ - self.raw_cluster_cmd('osd', 'out', str(osd)) - - def kill_osd(self, osd): - """ - Kill osds by either power cycling (if indicated by the config) - or by stopping. - """ - if self.config.get('powercycle'): - (remote,) = (self.ctx.cluster.only('osd.{o}'.format(o=osd)). - remotes.iterkeys()) - self.log('kill_osd on osd.{o} ' - 'doing powercycle of {s}'.format(o=osd, s=remote.name)) - assert remote.console is not None, ("powercycling requested " - "but RemoteConsole is not " - "initialized. " - "Check ipmi config.") - remote.console.power_off() - else: - self.ctx.daemons.get_daemon('osd', osd).stop() - - def blackhole_kill_osd(self, osd): - """ - Stop osd if nothing else works. - """ - self.raw_cluster_cmd('--', 'tell', 'osd.%d' % osd, - 'injectargs', '--filestore-blackhole') - time.sleep(2) - self.ctx.daemons.get_daemon('osd', osd).stop() - - def revive_osd(self, osd, timeout=150): - """ - Revive osds by either power cycling (if indicated by the config) - or by restarting. - """ - if self.config.get('powercycle'): - (remote,) = (self.ctx.cluster.only('osd.{o}'.format(o=osd)). - remotes.iterkeys()) - self.log('kill_osd on osd.{o} doing powercycle of {s}'. - format(o=osd, s=remote.name)) - assert remote.console is not None, ("powercycling requested " - "but RemoteConsole is not " - "initialized. " - "Check ipmi config.") - remote.console.power_on() - if not remote.console.check_status(300): - raise Exception('Failed to revive osd.{o} via ipmi'. - format(o=osd)) - teuthology.reconnect(self.ctx, 60, [remote]) - mount_osd_data(self.ctx, remote, str(osd)) - make_admin_daemon_dir(self.ctx, remote) - self.ctx.daemons.get_daemon('osd', osd).reset() - self.ctx.daemons.get_daemon('osd', osd).restart() - # wait for dump_ops_in_flight; this command doesn't appear - # until after the signal handler is installed and it is safe - # to stop the osd again without making valgrind leak checks - # unhappy. see #5924. - self.wait_run_admin_socket('osd', osd, - args=['dump_ops_in_flight'], - timeout=timeout) - - def mark_down_osd(self, osd): - """ - Cluster command wrapper - """ - self.raw_cluster_cmd('osd', 'down', str(osd)) - - def mark_in_osd(self, osd): - """ - Cluster command wrapper - """ - self.raw_cluster_cmd('osd', 'in', str(osd)) - - ## monitors - def signal_mon(self, mon, sig): - """ - Wrapper to local get_deamon call - """ - self.ctx.daemons.get_daemon('mon', mon).signal(sig) - - def kill_mon(self, mon): - """ - Kill the monitor by either power cycling (if the config says so), - or by doing a stop. - """ - if self.config.get('powercycle'): - (remote,) = (self.ctx.cluster.only('mon.{m}'.format(m=mon)). - remotes.iterkeys()) - self.log('kill_mon on mon.{m} doing powercycle of {s}'. - format(m=mon, s=remote.name)) - assert remote.console is not None, ("powercycling requested " - "but RemoteConsole is not " - "initialized. " - "Check ipmi config.") - - remote.console.power_off() - else: - self.ctx.daemons.get_daemon('mon', mon).stop() - - def revive_mon(self, mon): - """ - Restart by either power cycling (if the config says so), - or by doing a normal restart. - """ - if self.config.get('powercycle'): - (remote,) = (self.ctx.cluster.only('mon.{m}'.format(m=mon)). - remotes.iterkeys()) - self.log('revive_mon on mon.{m} doing powercycle of {s}'. - format(m=mon, s=remote.name)) - assert remote.console is not None, ("powercycling requested " - "but RemoteConsole is not " - "initialized. " - "Check ipmi config.") - - remote.console.power_on() - make_admin_daemon_dir(self.ctx, remote) - self.ctx.daemons.get_daemon('mon', mon).restart() - - def get_mon_status(self, mon): - """ - Extract all the monitor status information from the cluster - """ - addr = self.ctx.ceph.conf['mon.%s' % mon]['mon addr'] - out = self.raw_cluster_cmd('-m', addr, 'mon_status') - return json.loads(out) - - def get_mon_quorum(self): - """ - Extract monitor quorum information from the cluster - """ - out = self.raw_cluster_cmd('quorum_status') - j = json.loads(out) - self.log('quorum_status is %s' % out) - return j['quorum'] - - def wait_for_mon_quorum_size(self, size, timeout=300): - """ - Loop until quorum size is reached. - """ - self.log('waiting for quorum size %d' % size) - start = time.time() - while not len(self.get_mon_quorum()) == size: - if timeout is not None: - assert time.time() - start < timeout, \ - ('failed to reach quorum size %d ' - 'before timeout expired' % size) - time.sleep(3) - self.log("quorum is size %d" % size) - - def get_mon_health(self, debug=False): - """ - Extract all the monitor health information. - """ - out = self.raw_cluster_cmd('health', '--format=json') - if debug: - self.log('health:\n{h}'.format(h=out)) - return json.loads(out) - - ## metadata servers - - def kill_mds(self, mds): - """ - Powercyle if set in config, otherwise just stop. - """ - if self.config.get('powercycle'): - (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)). - remotes.iterkeys()) - self.log('kill_mds on mds.{m} doing powercycle of {s}'. - format(m=mds, s=remote.name)) - assert remote.console is not None, ("powercycling requested " - "but RemoteConsole is not " - "initialized. " - "Check ipmi config.") - remote.console.power_off() - else: - self.ctx.daemons.get_daemon('mds', mds).stop() - - def kill_mds_by_rank(self, rank): - """ - kill_mds wrapper to kill based on rank passed. - """ - status = self.get_mds_status_by_rank(rank) - self.kill_mds(status['name']) - - def revive_mds(self, mds, standby_for_rank=None): - """ - Revive mds -- do an ipmpi powercycle (if indicated by the config) - and then restart (using --hot-standby if specified. - """ - if self.config.get('powercycle'): - (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)). - remotes.iterkeys()) - self.log('revive_mds on mds.{m} doing powercycle of {s}'. - format(m=mds, s=remote.name)) - assert remote.console is not None, ("powercycling requested " - "but RemoteConsole is not " - "initialized. " - "Check ipmi config.") - remote.console.power_on() - make_admin_daemon_dir(self.ctx, remote) - args = [] - if standby_for_rank: - args.extend(['--hot-standby', standby_for_rank]) - self.ctx.daemons.get_daemon('mds', mds).restart(*args) - - def revive_mds_by_rank(self, rank, standby_for_rank=None): - """ - revive_mds wrapper to revive based on rank passed. - """ - status = self.get_mds_status_by_rank(rank) - self.revive_mds(status['name'], standby_for_rank) - - def get_mds_status(self, mds): - """ - Run cluster commands for the mds in order to get mds information - """ - out = self.raw_cluster_cmd('mds', 'dump', '--format=json') - j = json.loads(' '.join(out.splitlines()[1:])) - # collate; for dup ids, larger gid wins. - for info in j['info'].itervalues(): - if info['name'] == mds: - return info - return None - - def get_mds_status_by_rank(self, rank): - """ - Run cluster commands for the mds in order to get mds information - check rank. - """ - out = self.raw_cluster_cmd('mds', 'dump', '--format=json') - j = json.loads(' '.join(out.splitlines()[1:])) - # collate; for dup ids, larger gid wins. - for info in j['info'].itervalues(): - if info['rank'] == rank: - return info - return None - - def get_mds_status_all(self): - """ - Run cluster command to extract all the mds status. - """ - out = self.raw_cluster_cmd('mds', 'dump', '--format=json') - j = json.loads(' '.join(out.splitlines()[1:])) - return j - - def get_filepath(self): - """ - Return path to osd data with {id} needing to be replaced - """ - return "/var/lib/ceph/osd/ceph-{id}" - -def utility_task(name): - """ - Generate ceph_manager subtask corresponding to ceph_manager - method name - """ - def task(ctx, config): - if config is None: - config = {} - args = config.get('args', []) - kwargs = config.get('kwargs', {}) - fn = getattr(ctx.manager, name) - fn(*args, **kwargs) - return task - -revive_osd = utility_task("revive_osd") -kill_osd = utility_task("kill_osd") -create_pool = utility_task("create_pool") -remove_pool = utility_task("remove_pool") -wait_for_clean = utility_task("wait_for_clean") -set_pool_property = utility_task("set_pool_property") diff --git a/tasks/ceph_objectstore_tool.py b/tasks/ceph_objectstore_tool.py deleted file mode 100644 index 3b899de33b8..00000000000 --- a/tasks/ceph_objectstore_tool.py +++ /dev/null @@ -1,679 +0,0 @@ -""" -ceph_objectstore_tool - Simple test of ceph-objectstore-tool utility -""" -from cStringIO import StringIO -import contextlib -import logging -import ceph_manager -from teuthology import misc as teuthology -import time -import os -import string -from teuthology.orchestra import run -import sys -import tempfile -import json -from util.rados import (rados, create_replicated_pool, create_ec_pool) -# from util.rados import (rados, create_ec_pool, -# create_replicated_pool, -# create_cache_pool) - -log = logging.getLogger(__name__) - -# Should get cluster name "ceph" from somewhere -# and normal path from osd_data and osd_journal in conf -FSPATH = "/var/lib/ceph/osd/ceph-{id}" -JPATH = "/var/lib/ceph/osd/ceph-{id}/journal" - - -def cod_setup_local_data(log, ctx, NUM_OBJECTS, DATADIR, - BASE_NAME, DATALINECOUNT): - objects = range(1, NUM_OBJECTS + 1) - for i in objects: - NAME = BASE_NAME + "{num}".format(num=i) - LOCALNAME = os.path.join(DATADIR, NAME) - - dataline = range(DATALINECOUNT) - fd = open(LOCALNAME, "w") - data = "This is the data for " + NAME + "\n" - for _ in dataline: - fd.write(data) - fd.close() - - -def cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, - BASE_NAME, DATALINECOUNT): - - objects = range(1, NUM_OBJECTS + 1) - for i in objects: - NAME = BASE_NAME + "{num}".format(num=i) - DDNAME = os.path.join(DATADIR, NAME) - - remote.run(args=['rm', '-f', DDNAME]) - - dataline = range(DATALINECOUNT) - data = "This is the data for " + NAME + "\n" - DATA = "" - for _ in dataline: - DATA += data - teuthology.write_file(remote, DDNAME, DATA) - - -def cod_setup(log, ctx, remote, NUM_OBJECTS, DATADIR, - BASE_NAME, DATALINECOUNT, POOL, db, ec): - ERRORS = 0 - log.info("Creating {objs} objects in pool".format(objs=NUM_OBJECTS)) - - objects = range(1, NUM_OBJECTS + 1) - for i in objects: - NAME = BASE_NAME + "{num}".format(num=i) - DDNAME = os.path.join(DATADIR, NAME) - - proc = rados(ctx, remote, ['-p', POOL, 'put', NAME, DDNAME], - wait=False) - # proc = remote.run(args=['rados', '-p', POOL, 'put', NAME, DDNAME]) - ret = proc.wait() - if ret != 0: - log.critical("Rados put failed with status {ret}". - format(ret=proc.exitstatus)) - sys.exit(1) - - db[NAME] = {} - - keys = range(i) - db[NAME]["xattr"] = {} - for k in keys: - if k == 0: - continue - mykey = "key{i}-{k}".format(i=i, k=k) - myval = "val{i}-{k}".format(i=i, k=k) - proc = remote.run(args=['rados', '-p', POOL, 'setxattr', - NAME, mykey, myval]) - ret = proc.wait() - if ret != 0: - log.error("setxattr failed with {ret}".format(ret=ret)) - ERRORS += 1 - db[NAME]["xattr"][mykey] = myval - - # Erasure coded pools don't support omap - if ec: - continue - - # Create omap header in all objects but REPobject1 - if i != 1: - myhdr = "hdr{i}".format(i=i) - proc = remote.run(args=['rados', '-p', POOL, 'setomapheader', - NAME, myhdr]) - ret = proc.wait() - if ret != 0: - log.critical("setomapheader failed with {ret}".format(ret=ret)) - ERRORS += 1 - db[NAME]["omapheader"] = myhdr - - db[NAME]["omap"] = {} - for k in keys: - if k == 0: - continue - mykey = "okey{i}-{k}".format(i=i, k=k) - myval = "oval{i}-{k}".format(i=i, k=k) - proc = remote.run(args=['rados', '-p', POOL, 'setomapval', - NAME, mykey, myval]) - ret = proc.wait() - if ret != 0: - log.critical("setomapval failed with {ret}".format(ret=ret)) - db[NAME]["omap"][mykey] = myval - - return ERRORS - - -def get_lines(filename): - tmpfd = open(filename, "r") - line = True - lines = [] - while line: - line = tmpfd.readline().rstrip('\n') - if line: - lines += [line] - tmpfd.close() - os.unlink(filename) - return lines - - -@contextlib.contextmanager -def task(ctx, config): - """ - Run ceph_objectstore_tool test - - The config should be as follows:: - - ceph_objectstore_tool: - objects: 20 # - pgnum: 12 - """ - - if config is None: - config = {} - assert isinstance(config, dict), \ - 'ceph_objectstore_tool task only accepts a dict for configuration' - - log.info('Beginning ceph_objectstore_tool...') - - log.debug(config) - log.debug(ctx) - clients = ctx.cluster.only(teuthology.is_type('client')) - assert len(clients.remotes) > 0, 'Must specify at least 1 client' - (cli_remote, _) = clients.remotes.popitem() - log.debug(cli_remote) - - # clients = dict(teuthology.get_clients(ctx=ctx, roles=config.keys())) - # client = clients.popitem() - # log.info(client) - osds = ctx.cluster.only(teuthology.is_type('osd')) - log.info("OSDS") - log.info(osds) - log.info(osds.remotes) - - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - config=config, - logger=log.getChild('ceph_manager'), - ) - ctx.manager = manager - - while (len(manager.get_osd_status()['up']) != - len(manager.get_osd_status()['raw'])): - time.sleep(10) - while (len(manager.get_osd_status()['in']) != - len(manager.get_osd_status()['up'])): - time.sleep(10) - manager.raw_cluster_cmd('osd', 'set', 'noout') - manager.raw_cluster_cmd('osd', 'set', 'nodown') - - PGNUM = config.get('pgnum', 12) - log.info("pgnum: {num}".format(num=PGNUM)) - - ERRORS = 0 - - REP_POOL = "rep_pool" - REP_NAME = "REPobject" - create_replicated_pool(cli_remote, REP_POOL, PGNUM) - ERRORS += test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME) - - EC_POOL = "ec_pool" - EC_NAME = "ECobject" - create_ec_pool(cli_remote, EC_POOL, 'default', PGNUM) - ERRORS += test_objectstore(ctx, config, cli_remote, - EC_POOL, EC_NAME, ec=True) - - if ERRORS == 0: - log.info("TEST PASSED") - else: - log.error("TEST FAILED WITH {errcount} ERRORS".format(errcount=ERRORS)) - - assert ERRORS == 0 - - try: - yield - finally: - log.info('Ending ceph_objectstore_tool') - - -def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False): - manager = ctx.manager - - osds = ctx.cluster.only(teuthology.is_type('osd')) - - TEUTHDIR = teuthology.get_testdir(ctx) - DATADIR = os.path.join(TEUTHDIR, "data") - DATALINECOUNT = 10000 - ERRORS = 0 - NUM_OBJECTS = config.get('objects', 10) - log.info("objects: {num}".format(num=NUM_OBJECTS)) - - pool_dump = manager.get_pool_dump(REP_POOL) - REPID = pool_dump['pool'] - - log.debug("repid={num}".format(num=REPID)) - - db = {} - - LOCALDIR = tempfile.mkdtemp("cod") - - cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR, - REP_NAME, DATALINECOUNT) - allremote = [] - allremote.append(cli_remote) - allremote += osds.remotes.keys() - allremote = list(set(allremote)) - for remote in allremote: - cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, - REP_NAME, DATALINECOUNT) - - ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR, - REP_NAME, DATALINECOUNT, REP_POOL, db, ec) - - pgs = {} - for stats in manager.get_pg_stats(): - if stats["pgid"].find(str(REPID) + ".") != 0: - continue - if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL: - for osd in stats["acting"]: - pgs.setdefault(osd, []).append(stats["pgid"]) - elif pool_dump["type"] == ceph_manager.CephManager.ERASURE_CODED_POOL: - shard = 0 - for osd in stats["acting"]: - pgs.setdefault(osd, []).append("{pgid}s{shard}". - format(pgid=stats["pgid"], - shard=shard)) - shard += 1 - else: - raise Exception("{pool} has an unexpected type {type}". - format(pool=REP_POOL, type=pool_dump["type"])) - - log.info(pgs) - log.info(db) - - for osd in manager.get_osd_status()['up']: - manager.kill_osd(osd) - time.sleep(5) - - pgswithobjects = set() - objsinpg = {} - - # Test --op list and generate json for all objects - log.info("Test --op list by generating json for all objects") - prefix = ("sudo ceph-objectstore-tool " - "--data-path {fpath} " - "--journal-path {jpath} ").format(fpath=FSPATH, jpath=JPATH) - for remote in osds.remotes.iterkeys(): - log.debug(remote) - log.debug(osds.remotes[remote]) - for role in osds.remotes[remote]: - if string.find(role, "osd.") != 0: - continue - osdid = int(role.split('.')[1]) - log.info("process osd.{id} on {remote}". - format(id=osdid, remote=remote)) - cmd = (prefix + "--op list").format(id=osdid) - proc = remote.run(args=cmd.split(), check_status=False, - stdout=StringIO()) - if proc.exitstatus != 0: - log.error("Bad exit status {ret} from --op list request". - format(ret=proc.exitstatus)) - ERRORS += 1 - else: - for pgline in proc.stdout.getvalue().splitlines(): - if not pgline: - continue - (pg, obj) = json.loads(pgline) - name = obj['oid'] - if name in db: - pgswithobjects.add(pg) - objsinpg.setdefault(pg, []).append(name) - db[name].setdefault("pg2json", - {})[pg] = json.dumps(obj) - - log.info(db) - log.info(pgswithobjects) - log.info(objsinpg) - - if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL: - # Test get-bytes - log.info("Test get-bytes and set-bytes") - for basename in db.keys(): - file = os.path.join(DATADIR, basename) - GETNAME = os.path.join(DATADIR, "get") - SETNAME = os.path.join(DATADIR, "set") - - for remote in osds.remotes.iterkeys(): - for role in osds.remotes[remote]: - if string.find(role, "osd.") != 0: - continue - osdid = int(role.split('.')[1]) - if osdid not in pgs: - continue - - for pg, JSON in db[basename]["pg2json"].iteritems(): - if pg in pgs[osdid]: - cmd = ((prefix + "--pgid {pg}"). - format(id=osdid, pg=pg).split()) - cmd.append(run.Raw("'{json}'".format(json=JSON))) - cmd += ("get-bytes {fname}". - format(fname=GETNAME).split()) - proc = remote.run(args=cmd, check_status=False) - if proc.exitstatus != 0: - remote.run(args="rm -f {getfile}". - format(getfile=GETNAME).split()) - log.error("Bad exit status {ret}". - format(ret=proc.exitstatus)) - ERRORS += 1 - continue - cmd = ("diff -q {file} {getfile}". - format(file=file, getfile=GETNAME)) - proc = remote.run(args=cmd.split()) - if proc.exitstatus != 0: - log.error("Data from get-bytes differ") - # log.debug("Got:") - # cat_file(logging.DEBUG, GETNAME) - # log.debug("Expected:") - # cat_file(logging.DEBUG, file) - ERRORS += 1 - remote.run(args="rm -f {getfile}". - format(getfile=GETNAME).split()) - - data = ("put-bytes going into {file}\n". - format(file=file)) - teuthology.write_file(remote, SETNAME, data) - cmd = ((prefix + "--pgid {pg}"). - format(id=osdid, pg=pg).split()) - cmd.append(run.Raw("'{json}'".format(json=JSON))) - cmd += ("set-bytes {fname}". - format(fname=SETNAME).split()) - proc = remote.run(args=cmd, check_status=False) - proc.wait() - if proc.exitstatus != 0: - log.info("set-bytes failed for object {obj} " - "in pg {pg} osd.{id} ret={ret}". - format(obj=basename, pg=pg, - id=osdid, ret=proc.exitstatus)) - ERRORS += 1 - - cmd = ((prefix + "--pgid {pg}"). - format(id=osdid, pg=pg).split()) - cmd.append(run.Raw("'{json}'".format(json=JSON))) - cmd += "get-bytes -".split() - proc = remote.run(args=cmd, check_status=False, - stdout=StringIO()) - proc.wait() - if proc.exitstatus != 0: - log.error("get-bytes after " - "set-bytes ret={ret}". - format(ret=proc.exitstatus)) - ERRORS += 1 - else: - if data != proc.stdout.getvalue(): - log.error("Data inconsistent after " - "set-bytes, got:") - log.error(proc.stdout.getvalue()) - ERRORS += 1 - - cmd = ((prefix + "--pgid {pg}"). - format(id=osdid, pg=pg).split()) - cmd.append(run.Raw("'{json}'".format(json=JSON))) - cmd += ("set-bytes {fname}". - format(fname=file).split()) - proc = remote.run(args=cmd, check_status=False) - proc.wait() - if proc.exitstatus != 0: - log.info("set-bytes failed for object {obj} " - "in pg {pg} osd.{id} ret={ret}". - format(obj=basename, pg=pg, - id=osdid, ret=proc.exitstatus)) - ERRORS += 1 - - log.info("Test list-attrs get-attr") - for basename in db.keys(): - file = os.path.join(DATADIR, basename) - GETNAME = os.path.join(DATADIR, "get") - SETNAME = os.path.join(DATADIR, "set") - - for remote in osds.remotes.iterkeys(): - for role in osds.remotes[remote]: - if string.find(role, "osd.") != 0: - continue - osdid = int(role.split('.')[1]) - if osdid not in pgs: - continue - - for pg, JSON in db[basename]["pg2json"].iteritems(): - if pg in pgs[osdid]: - cmd = ((prefix + "--pgid {pg}"). - format(id=osdid, pg=pg).split()) - cmd.append(run.Raw("'{json}'".format(json=JSON))) - cmd += ["list-attrs"] - proc = remote.run(args=cmd, check_status=False, - stdout=StringIO(), stderr=StringIO()) - proc.wait() - if proc.exitstatus != 0: - log.error("Bad exit status {ret}". - format(ret=proc.exitstatus)) - ERRORS += 1 - continue - keys = proc.stdout.getvalue().split() - values = dict(db[basename]["xattr"]) - - for key in keys: - if (key == "_" or - key == "snapset" or - key == "hinfo_key"): - continue - key = key.strip("_") - if key not in values: - log.error("The key {key} should be present". - format(key=key)) - ERRORS += 1 - continue - exp = values.pop(key) - cmd = ((prefix + "--pgid {pg}"). - format(id=osdid, pg=pg).split()) - cmd.append(run.Raw("'{json}'".format(json=JSON))) - cmd += ("get-attr {key}". - format(key="_" + key).split()) - proc = remote.run(args=cmd, check_status=False, - stdout=StringIO()) - proc.wait() - if proc.exitstatus != 0: - log.error("get-attr failed with {ret}". - format(ret=proc.exitstatus)) - ERRORS += 1 - continue - val = proc.stdout.getvalue() - if exp != val: - log.error("For key {key} got value {got} " - "instead of {expected}". - format(key=key, got=val, - expected=exp)) - ERRORS += 1 - if "hinfo_key" in keys: - cmd_prefix = prefix.format(id=osdid) - cmd = """ - expected=$({prefix} --pgid {pg} '{json}' get-attr {key} | base64) - echo placeholder | {prefix} --pgid {pg} '{json}' set-attr {key} - - test $({prefix} --pgid {pg} '{json}' get-attr {key}) = placeholder - echo $expected | base64 --decode | \ - {prefix} --pgid {pg} '{json}' set-attr {key} - - test $({prefix} --pgid {pg} '{json}' get-attr {key} | base64) = $expected - """.format(prefix=cmd_prefix, pg=pg, json=JSON, - key="hinfo_key") - log.debug(cmd) - proc = remote.run(args=['bash', '-e', '-x', - '-c', cmd], - check_status=False, - stdout=StringIO(), - stderr=StringIO()) - proc.wait() - if proc.exitstatus != 0: - log.error("failed with " + - str(proc.exitstatus)) - log.error(proc.stdout.getvalue() + " " + - proc.stderr.getvalue()) - ERRORS += 1 - - if len(values) != 0: - log.error("Not all keys found, remaining keys:") - log.error(values) - - log.info("Test pg info") - for remote in osds.remotes.iterkeys(): - for role in osds.remotes[remote]: - if string.find(role, "osd.") != 0: - continue - osdid = int(role.split('.')[1]) - if osdid not in pgs: - continue - - for pg in pgs[osdid]: - cmd = ((prefix + "--op info --pgid {pg}"). - format(id=osdid, pg=pg).split()) - proc = remote.run(args=cmd, check_status=False, - stdout=StringIO()) - proc.wait() - if proc.exitstatus != 0: - log.error("Failure of --op info command with {ret}". - format(proc.exitstatus)) - ERRORS += 1 - continue - info = proc.stdout.getvalue() - if not str(pg) in info: - log.error("Bad data from info: {info}".format(info=info)) - ERRORS += 1 - - log.info("Test pg logging") - for remote in osds.remotes.iterkeys(): - for role in osds.remotes[remote]: - if string.find(role, "osd.") != 0: - continue - osdid = int(role.split('.')[1]) - if osdid not in pgs: - continue - - for pg in pgs[osdid]: - cmd = ((prefix + "--op log --pgid {pg}"). - format(id=osdid, pg=pg).split()) - proc = remote.run(args=cmd, check_status=False, - stdout=StringIO()) - proc.wait() - if proc.exitstatus != 0: - log.error("Getting log failed for pg {pg} " - "from osd.{id} with {ret}". - format(pg=pg, id=osdid, ret=proc.exitstatus)) - ERRORS += 1 - continue - HASOBJ = pg in pgswithobjects - MODOBJ = "modify" in proc.stdout.getvalue() - if HASOBJ != MODOBJ: - log.error("Bad log for pg {pg} from osd.{id}". - format(pg=pg, id=osdid)) - MSG = (HASOBJ and [""] or ["NOT "])[0] - log.error("Log should {msg}have a modify entry". - format(msg=MSG)) - ERRORS += 1 - - log.info("Test pg export") - EXP_ERRORS = 0 - for remote in osds.remotes.iterkeys(): - for role in osds.remotes[remote]: - if string.find(role, "osd.") != 0: - continue - osdid = int(role.split('.')[1]) - if osdid not in pgs: - continue - - for pg in pgs[osdid]: - fpath = os.path.join(DATADIR, "osd{id}.{pg}". - format(id=osdid, pg=pg)) - - cmd = ((prefix + "--op export --pgid {pg} --file {file}"). - format(id=osdid, pg=pg, file=fpath)) - proc = remote.run(args=cmd, check_status=False, - stdout=StringIO()) - proc.wait() - if proc.exitstatus != 0: - log.error("Exporting failed for pg {pg} " - "on osd.{id} with {ret}". - format(pg=pg, id=osdid, ret=proc.exitstatus)) - EXP_ERRORS += 1 - - ERRORS += EXP_ERRORS - - log.info("Test pg removal") - RM_ERRORS = 0 - for remote in osds.remotes.iterkeys(): - for role in osds.remotes[remote]: - if string.find(role, "osd.") != 0: - continue - osdid = int(role.split('.')[1]) - if osdid not in pgs: - continue - - for pg in pgs[osdid]: - cmd = ((prefix + "--op remove --pgid {pg}"). - format(pg=pg, id=osdid)) - proc = remote.run(args=cmd, check_status=False, - stdout=StringIO()) - proc.wait() - if proc.exitstatus != 0: - log.error("Removing failed for pg {pg} " - "on osd.{id} with {ret}". - format(pg=pg, id=osdid, ret=proc.exitstatus)) - RM_ERRORS += 1 - - ERRORS += RM_ERRORS - - IMP_ERRORS = 0 - if EXP_ERRORS == 0 and RM_ERRORS == 0: - log.info("Test pg import") - - for remote in osds.remotes.iterkeys(): - for role in osds.remotes[remote]: - if string.find(role, "osd.") != 0: - continue - osdid = int(role.split('.')[1]) - if osdid not in pgs: - continue - - for pg in pgs[osdid]: - fpath = os.path.join(DATADIR, "osd{id}.{pg}". - format(id=osdid, pg=pg)) - - cmd = ((prefix + "--op import --file {file}"). - format(id=osdid, file=fpath)) - proc = remote.run(args=cmd, check_status=False, - stdout=StringIO()) - proc.wait() - if proc.exitstatus != 0: - log.error("Import failed from {file} with {ret}". - format(file=fpath, ret=proc.exitstatus)) - IMP_ERRORS += 1 - else: - log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES") - - ERRORS += IMP_ERRORS - - if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: - log.info("Restarting OSDs....") - # They are still look to be up because of setting nodown - for osd in manager.get_osd_status()['up']: - manager.revive_osd(osd) - # Wait for health? - time.sleep(5) - # Let scrub after test runs verify consistency of all copies - log.info("Verify replicated import data") - objects = range(1, NUM_OBJECTS + 1) - for i in objects: - NAME = REP_NAME + "{num}".format(num=i) - TESTNAME = os.path.join(DATADIR, "gettest") - REFNAME = os.path.join(DATADIR, NAME) - - proc = rados(ctx, cli_remote, - ['-p', REP_POOL, 'get', NAME, TESTNAME], wait=False) - - ret = proc.wait() - if ret != 0: - log.error("After import, rados get failed with {ret}". - format(ret=proc.exitstatus)) - ERRORS += 1 - continue - - cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME, - ref=REFNAME) - proc = cli_remote.run(args=cmd, check_status=False) - proc.wait() - if proc.exitstatus != 0: - log.error("Data comparison failed for {obj}".format(obj=NAME)) - ERRORS += 1 - - return ERRORS diff --git a/tasks/cephfs/__init__.py b/tasks/cephfs/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tasks/cephfs/cephfs_test_case.py b/tasks/cephfs/cephfs_test_case.py deleted file mode 100644 index 8700bc51c54..00000000000 --- a/tasks/cephfs/cephfs_test_case.py +++ /dev/null @@ -1,223 +0,0 @@ -import logging -import unittest -import time -from teuthology.task import interactive - - -log = logging.getLogger(__name__) - - -class CephFSTestCase(unittest.TestCase): - """ - Test case for Ceph FS, requires caller to populate Filesystem and Mounts, - into the fs, mount_a, mount_b class attributes (setting mount_b is optional) - - Handles resetting the cluster under test between tests. - """ - # Environment references - mount_a = None - mount_b = None - fs = None - - def setUp(self): - self.fs.clear_firewall() - - # Unmount in order to start each test on a fresh mount, such - # that test_barrier can have a firm expectation of what OSD - # epoch the clients start with. - if self.mount_a.is_mounted(): - self.mount_a.umount_wait() - - if self.mount_b: - if self.mount_b.is_mounted(): - self.mount_b.umount_wait() - - # To avoid any issues with e.g. unlink bugs, we destroy and recreate - # the filesystem rather than just doing a rm -rf of files - self.fs.mds_stop() - self.fs.mds_fail() - self.fs.delete() - self.fs.create() - - # In case the previous filesystem had filled up the RADOS cluster, wait for that - # flag to pass. - osd_mon_report_interval_max = int(self.fs.get_config("osd_mon_report_interval_max", service_type='osd')) - self.wait_until_true(lambda: not self.fs.is_full(), - timeout=osd_mon_report_interval_max * 5) - - self.fs.mds_restart() - self.fs.wait_for_daemons() - if not self.mount_a.is_mounted(): - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - if self.mount_b: - if not self.mount_b.is_mounted(): - self.mount_b.mount() - self.mount_b.wait_until_mounted() - - self.configs_set = set() - - def tearDown(self): - self.fs.clear_firewall() - self.mount_a.teardown() - if self.mount_b: - self.mount_b.teardown() - - for subsys, key in self.configs_set: - self.fs.clear_ceph_conf(subsys, key) - - def set_conf(self, subsys, key, value): - self.configs_set.add((subsys, key)) - self.fs.set_ceph_conf(subsys, key, value) - - def assert_session_count(self, expected, ls_data=None): - if ls_data is None: - ls_data = self.fs.mds_asok(['session', 'ls']) - - self.assertEqual(expected, len(ls_data), "Expected {0} sessions, found {1}".format( - expected, len(ls_data) - )) - - def assert_session_state(self, client_id, expected_state): - self.assertEqual( - self._session_by_id( - self.fs.mds_asok(['session', 'ls'])).get(client_id, {'state': None})['state'], - expected_state) - - def get_session_data(self, client_id): - return self._session_by_id(client_id) - - def _session_list(self): - ls_data = self.fs.mds_asok(['session', 'ls']) - ls_data = [s for s in ls_data if s['state'] not in ['stale', 'closed']] - return ls_data - - def get_session(self, client_id, session_ls=None): - if session_ls is None: - session_ls = self.fs.mds_asok(['session', 'ls']) - - return self._session_by_id(session_ls)[client_id] - - def _session_by_id(self, session_ls): - return dict([(s['id'], s) for s in session_ls]) - - def wait_until_equal(self, get_fn, expect_val, timeout, reject_fn=None): - period = 5 - elapsed = 0 - while True: - val = get_fn() - if val == expect_val: - return - elif reject_fn and reject_fn(val): - raise RuntimeError("wait_until_equal: forbidden value {0} seen".format(val)) - else: - if elapsed >= timeout: - raise RuntimeError("Timed out after {0} seconds waiting for {1} (currently {2})".format( - elapsed, expect_val, val - )) - else: - log.debug("wait_until_equal: {0} != {1}, waiting...".format(val, expect_val)) - time.sleep(period) - elapsed += period - - log.debug("wait_until_equal: success") - - def wait_until_true(self, condition, timeout): - period = 5 - elapsed = 0 - while True: - if condition(): - return - else: - if elapsed >= timeout: - raise RuntimeError("Timed out after {0} seconds".format(elapsed)) - else: - log.debug("wait_until_true: waiting...") - time.sleep(period) - elapsed += period - - log.debug("wait_until_true: success") - - -class LogStream(object): - def __init__(self): - self.buffer = "" - - def write(self, data): - self.buffer += data - if "\n" in self.buffer: - lines = self.buffer.split("\n") - for line in lines[:-1]: - log.info(line) - self.buffer = lines[-1] - - def flush(self): - pass - - -class InteractiveFailureResult(unittest.TextTestResult): - """ - Specialization that implements interactive-on-error style - behavior. - """ - ctx = None - - def addFailure(self, test, err): - log.error(self._exc_info_to_string(err, test)) - log.error("Failure in test '{0}', going interactive".format( - self.getDescription(test) - )) - interactive.task(ctx=self.ctx, config=None) - - def addError(self, test, err): - log.error(self._exc_info_to_string(err, test)) - log.error("Error in test '{0}', going interactive".format( - self.getDescription(test) - )) - interactive.task(ctx=self.ctx, config=None) - - -def run_tests(ctx, config, test_klass, params): - for k, v in params.items(): - setattr(test_klass, k, v) - - # Execute test suite - # ================== - if config and 'test_name' in config: - # Test names like TestCase.this_test - suite = unittest.TestLoader().loadTestsFromName( - "{0}.{1}".format(test_klass.__module__, config['test_name'])) - else: - suite = unittest.TestLoader().loadTestsFromTestCase(test_klass) - - if ctx.config.get("interactive-on-error", False): - InteractiveFailureResult.ctx = ctx - result_class = InteractiveFailureResult - else: - result_class = unittest.TextTestResult - - # Unmount all clients not involved - for mount in ctx.mounts.values(): - if mount is not params.get('mount_a') and mount is not params.get('mount_b'): - if mount.is_mounted(): - log.info("Unmounting unneeded client {0}".format(mount.client_id)) - mount.umount_wait() - - # Execute! - result = unittest.TextTestRunner( - stream=LogStream(), - resultclass=result_class, - verbosity=2, - failfast=True).run(suite) - - if not result.wasSuccessful(): - result.printErrors() # duplicate output at end for convenience - - bad_tests = [] - for test, error in result.errors: - bad_tests.append(str(test)) - for test, failure in result.failures: - bad_tests.append(str(test)) - - raise RuntimeError("Test failure: {0}".format(", ".join(bad_tests))) diff --git a/tasks/cephfs/filesystem.py b/tasks/cephfs/filesystem.py deleted file mode 100644 index 5023c143823..00000000000 --- a/tasks/cephfs/filesystem.py +++ /dev/null @@ -1,571 +0,0 @@ - -from StringIO import StringIO -import json -import logging -import time -import datetime -import re - -from teuthology.exceptions import CommandFailedError -from teuthology.orchestra import run -from teuthology import misc -from teuthology.nuke import clear_firewall -from teuthology.parallel import parallel -from tasks.ceph_manager import write_conf -from tasks import ceph_manager - - -log = logging.getLogger(__name__) - - -DAEMON_WAIT_TIMEOUT = 120 -ROOT_INO = 1 - - -class ObjectNotFound(Exception): - def __init__(self, object_name): - self._object_name = object_name - - def __str__(self): - return "Object not found: '{0}'".format(self._object_name) - - -class Filesystem(object): - """ - This object is for driving a CephFS filesystem. - - Limitations: - * Assume a single filesystem+cluster - * Assume a single MDS - """ - def __init__(self, ctx, admin_remote=None): - self._ctx = ctx - - self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds')) - if len(self.mds_ids) == 0: - raise RuntimeError("This task requires at least one MDS") - - first_mon = misc.get_first_mon(ctx, None) - if admin_remote is None: - (self.admin_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() - else: - self.admin_remote = admin_remote - self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager')) - if hasattr(self._ctx, "daemons"): - # Presence of 'daemons' attribute implies ceph task rather than ceph_deploy task - self.mds_daemons = dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids]) - - client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client')) - self.client_id = client_list[0] - self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1] - - def create(self): - pg_warn_min_per_osd = int(self.get_config('mon_pg_warn_min_per_osd')) - osd_count = len(list(misc.all_roles_of_type(self._ctx.cluster, 'osd'))) - pgs_per_fs_pool = pg_warn_min_per_osd * osd_count - - self.admin_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'metadata', pgs_per_fs_pool.__str__()]) - self.admin_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'data', pgs_per_fs_pool.__str__()]) - self.admin_remote.run(args=['sudo', 'ceph', 'fs', 'new', 'default', 'metadata', 'data']) - - def delete(self): - self.admin_remote.run(args=['sudo', 'ceph', 'fs', 'rm', 'default', '--yes-i-really-mean-it']) - self.admin_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'delete', - 'metadata', 'metadata', '--yes-i-really-really-mean-it']) - self.admin_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'delete', - 'data', 'data', '--yes-i-really-really-mean-it']) - - def legacy_configured(self): - """ - Check if a legacy (i.e. pre "fs new") filesystem configuration is present. If this is - the case, the caller should avoid using Filesystem.create - """ - try: - proc = self.admin_remote.run(args=['sudo', 'ceph', '--format=json-pretty', 'osd', 'lspools'], - stdout=StringIO()) - pools = json.loads(proc.stdout.getvalue()) - metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools] - except CommandFailedError as e: - # For use in upgrade tests, Ceph cuttlefish and earlier don't support - # structured output (--format) from the CLI. - if e.exitstatus == 22: - metadata_pool_exists = True - else: - raise - - return metadata_pool_exists - - def _df(self): - return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")) - - def _fs_ls(self): - fs_list = json.loads(self.mon_manager.raw_cluster_cmd("fs", "ls", "--format=json-pretty")) - assert len(fs_list) == 1 # we don't handle multiple filesystems yet - return fs_list[0] - - def get_data_pool_name(self): - """ - Return the name of the data pool if there is only one, else raise exception -- call - this in tests where there will only be one data pool. - """ - names = self.get_data_pool_names() - if len(names) > 1: - raise RuntimeError("Multiple data pools found") - else: - return names[0] - - def get_data_pool_names(self): - return self._fs_ls()['data_pools'] - - def get_metadata_pool_name(self): - return self._fs_ls()['metadata_pool'] - - def get_pool_df(self, pool_name): - """ - Return a dict like: - {u'bytes_used': 0, u'max_avail': 83848701, u'objects': 0, u'kb_used': 0} - """ - for pool_df in self._df()['pools']: - if pool_df['name'] == pool_name: - return pool_df['stats'] - - raise RuntimeError("Pool name '{0}' not found".format(pool_name)) - - def get_usage(self): - return self._df()['stats']['total_used_bytes'] - - def get_mds_hostnames(self): - result = set() - for mds_id in self.mds_ids: - mds_remote = self.mon_manager.find_remote('mds', mds_id) - result.add(mds_remote.hostname) - - return list(result) - - def get_config(self, key, service_type=None): - """ - Get config from mon by default, or a specific service if caller asks for it - """ - if service_type is None: - service_type = 'mon' - - service_id = sorted(misc.all_roles_of_type(self._ctx.cluster, service_type))[0] - return self.json_asok(['config', 'get', key], service_type, service_id)[key] - - def set_ceph_conf(self, subsys, key, value): - if subsys not in self._ctx.ceph.conf: - self._ctx.ceph.conf[subsys] = {} - self._ctx.ceph.conf[subsys][key] = value - write_conf(self._ctx) # XXX because we don't have the ceph task's config object, if they - # used a different config path this won't work. - - def clear_ceph_conf(self, subsys, key): - del self._ctx.ceph.conf[subsys][key] - write_conf(self._ctx) - - def are_daemons_healthy(self): - """ - Return true if all daemons are in one of active, standby, standby-replay - :return: - """ - status = self.mon_manager.get_mds_status_all() - for mds_id, mds_status in status['info'].items(): - if mds_status['state'] not in ["up:active", "up:standby", "up:standby-replay"]: - log.warning("Unhealthy mds state {0}:{1}".format(mds_id, mds_status['state'])) - return False - - return True - - def get_active_names(self): - """ - Return MDS daemon names of those daemons holding ranks - in state up:active - - :return: list of strings like ['a', 'b'], sorted by rank - """ - status = self.mon_manager.get_mds_status_all() - result = [] - for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): - if mds_status['state'] == 'up:active': - result.append(mds_status['name']) - - return result - - def wait_for_daemons(self, timeout=None): - """ - Wait until all daemons are healthy - :return: - """ - - if timeout is None: - timeout = DAEMON_WAIT_TIMEOUT - - elapsed = 0 - while True: - if self.are_daemons_healthy(): - return - else: - time.sleep(1) - elapsed += 1 - - if elapsed > timeout: - raise RuntimeError("Timed out waiting for MDS daemons to become healthy") - - def get_lone_mds_id(self): - if len(self.mds_ids) != 1: - raise ValueError("Explicit MDS argument required when multiple MDSs in use") - else: - return self.mds_ids[0] - - def _one_or_all(self, mds_id, cb): - """ - Call a callback for a single named MDS, or for all - - :param mds_id: MDS daemon name, or None - :param cb: Callback taking single argument of MDS daemon name - """ - if mds_id is None: - with parallel() as p: - for mds_id in self.mds_ids: - p.spawn(cb, mds_id) - else: - cb(mds_id) - - def mds_stop(self, mds_id=None): - """ - Stop the MDS daemon process(se). If it held a rank, that rank - will eventually go laggy. - """ - self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].stop()) - - def mds_fail(self, mds_id=None): - """ - Inform MDSMonitor of the death of the daemon process(es). If it held - a rank, that rank will be relinquished. - """ - self._one_or_all(mds_id, lambda id_: self.mon_manager.raw_cluster_cmd("mds", "fail", id_)) - - def mds_restart(self, mds_id=None): - self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].restart()) - - def mds_fail_restart(self, mds_id=None): - """ - Variation on restart that includes marking MDSs as failed, so that doing this - operation followed by waiting for healthy daemon states guarantees that they - have gone down and come up, rather than potentially seeing the healthy states - that existed before the restart. - """ - def _fail_restart(id_): - self.mds_daemons[id_].stop() - self.mon_manager.raw_cluster_cmd("mds", "fail", id_) - self.mds_daemons[id_].restart() - - self._one_or_all(mds_id, _fail_restart) - - def reset(self): - log.info("Creating new filesystem") - - self.mon_manager.raw_cluster_cmd_result('mds', 'set', "max_mds", "0") - for mds_id in self.mds_ids: - assert not self._ctx.daemons.get_daemon('mds', mds_id).running() - self.mon_manager.raw_cluster_cmd_result('mds', 'fail', mds_id) - self.mon_manager.raw_cluster_cmd_result('fs', 'rm', "default", "--yes-i-really-mean-it") - self.mon_manager.raw_cluster_cmd_result('fs', 'new', "default", "metadata", "data") - - def get_metadata_object(self, object_type, object_id): - """ - Retrieve an object from the metadata pool, pass it through - ceph-dencoder to dump it to JSON, and return the decoded object. - """ - temp_bin_path = '/tmp/out.bin' - - # FIXME get the metadata pool name from mdsmap instead of hardcoding - self.client_remote.run(args=[ - 'sudo', 'rados', '-p', 'metadata', 'get', object_id, temp_bin_path - ]) - - stdout = StringIO() - self.client_remote.run(args=[ - 'sudo', 'ceph-dencoder', 'type', object_type, 'import', temp_bin_path, 'decode', 'dump_json' - ], stdout=stdout) - dump_json = stdout.getvalue().strip() - try: - dump = json.loads(dump_json) - except (TypeError, ValueError): - log.error("Failed to decode JSON: '{0}'".format(dump_json)) - raise - - return dump - - def get_journal_version(self): - """ - Read the JournalPointer and Journal::Header objects to learn the version of - encoding in use. - """ - journal_pointer_object = '400.00000000' - journal_pointer_dump = self.get_metadata_object("JournalPointer", journal_pointer_object) - journal_ino = journal_pointer_dump['journal_pointer']['front'] - - journal_header_object = "{0:x}.00000000".format(journal_ino) - journal_header_dump = self.get_metadata_object('Journaler::Header', journal_header_object) - - version = journal_header_dump['journal_header']['stream_format'] - log.info("Read journal version {0}".format(version)) - - return version - - def json_asok(self, command, service_type, service_id): - proc = self.mon_manager.admin_socket(service_type, service_id, command) - response_data = proc.stdout.getvalue() - log.info("_json_asok output: {0}".format(response_data)) - if response_data.strip(): - return json.loads(response_data) - else: - return None - - def mds_asok(self, command, mds_id=None): - if mds_id is None: - mds_id = self.get_lone_mds_id() - - return self.json_asok(command, 'mds', mds_id) - - def get_mds_map(self): - """ - Return the MDS map, as a JSON-esque dict from 'mds dump' - """ - return json.loads(self.mon_manager.raw_cluster_cmd('mds', 'dump', '--format=json-pretty')) - - def get_mds_addr(self, mds_id): - """ - Return the instance addr as a string, like "10.214.133.138:6807\/10825" - """ - mds_map = self.get_mds_map() - for gid_string, mds_info in mds_map['info'].items(): - # For some reason - if mds_info['name'] == mds_id: - return mds_info['addr'] - - log.warn(json.dumps(mds_map, indent=2)) # dump map for debugging - raise RuntimeError("MDS id '{0}' not found in MDS map".format(mds_id)) - - def set_clients_block(self, blocked, mds_id=None): - """ - Block (using iptables) client communications to this MDS. Be careful: if - other services are running on this MDS, or other MDSs try to talk to this - MDS, their communications may also be blocked as collatoral damage. - - :param mds_id: Optional ID of MDS to block, default to all - :return: - """ - da_flag = "-A" if blocked else "-D" - - def set_block(_mds_id): - remote = self.mon_manager.find_remote('mds', _mds_id) - - addr = self.get_mds_addr(_mds_id) - ip_str, port_str, inst_str = re.match("(.+):(.+)/(.+)", addr).groups() - - remote.run( - args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m", - "comment", "--comment", "teuthology"]) - remote.run( - args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m", - "comment", "--comment", "teuthology"]) - - self._one_or_all(mds_id, set_block) - - def clear_firewall(self): - clear_firewall(self._ctx) - - def is_full(self): - flags = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['flags'] - return 'full' in flags - - def wait_for_state(self, goal_state, reject=None, timeout=None, mds_id=None): - """ - Block until the MDS reaches a particular state, or a failure condition - is met. - - :param goal_state: Return once the MDS is in this state - :param reject: Fail if the MDS enters this state before the goal state - :param timeout: Fail if this many seconds pass before reaching goal - :return: number of seconds waited, rounded down to integer - """ - - if mds_id is None: - mds_id = self.get_lone_mds_id() - - elapsed = 0 - while True: - # mds_info is None if no daemon currently claims this rank - mds_info = self.mon_manager.get_mds_status(mds_id) - current_state = mds_info['state'] if mds_info else None - - if current_state == goal_state: - log.info("reached state '{0}' in {1}s".format(current_state, elapsed)) - return elapsed - elif reject is not None and current_state == reject: - raise RuntimeError("MDS in reject state {0}".format(current_state)) - elif timeout is not None and elapsed > timeout: - raise RuntimeError( - "Reached timeout after {0} seconds waiting for state {1}, while in state {2}".format( - elapsed, goal_state, current_state - )) - else: - time.sleep(1) - elapsed += 1 - - def read_backtrace(self, ino_no): - """ - Read the backtrace from the data pool, return a dict in the format - given by inode_backtrace_t::dump, which is something like: - - :: - - rados -p cephfs_data getxattr 10000000002.00000000 parent > out.bin - ceph-dencoder type inode_backtrace_t import out.bin decode dump_json - - { "ino": 1099511627778, - "ancestors": [ - { "dirino": 1, - "dname": "blah", - "version": 11}], - "pool": 1, - "old_pools": []} - - """ - mds_id = self.mds_ids[0] - remote = self.mds_daemons[mds_id].remote - - obj_name = "{0:x}.00000000".format(ino_no) - - temp_file = "/tmp/{0}_{1}".format(obj_name, datetime.datetime.now().isoformat()) - - args = [ - "rados", "-p", self.get_data_pool_name(), "getxattr", obj_name, "parent", - run.Raw(">"), temp_file - ] - try: - remote.run( - args=args, - stdout=StringIO()) - except CommandFailedError as e: - log.error(e.__str__()) - raise ObjectNotFound(obj_name) - - p = remote.run( - args=["ceph-dencoder", "type", "inode_backtrace_t", "import", temp_file, "decode", "dump_json"], - stdout=StringIO() - ) - - return json.loads(p.stdout.getvalue().strip()) - - def rados(self, args, pool=None): - """ - Call into the `rados` CLI from an MDS - """ - - if pool is None: - pool = self.get_metadata_pool_name() - - # Doesn't matter which MDS we use to run rados commands, they all - # have access to the pools - mds_id = self.mds_ids[0] - remote = self.mds_daemons[mds_id].remote - - # NB we could alternatively use librados pybindings for this, but it's a one-liner - # using the `rados` CLI - args = ["rados", "-p", pool] + args - p = remote.run( - args=args, - stdout=StringIO()) - return p.stdout.getvalue().strip() - - def list_dirfrag(self, dir_ino): - """ - Read the named object and return the list of omap keys - - :return a list of 0 or more strings - """ - - dirfrag_obj_name = "{0:x}.00000000".format(dir_ino) - - try: - key_list_str = self.rados(["listomapkeys", dirfrag_obj_name]) - except CommandFailedError as e: - log.error(e.__str__()) - raise ObjectNotFound(dirfrag_obj_name) - - return key_list_str.split("\n") if key_list_str else [] - - def erase_metadata_objects(self, prefix): - """ - For all objects in the metadata pool matching the prefix, - erase them. - - This O(N) with the number of objects in the pool, so only suitable - for use on toy test filesystems. - """ - all_objects = self.rados(["ls"]).split("\n") - matching_objects = [o for o in all_objects if o.startswith(prefix)] - for o in matching_objects: - self.rados(["rm", o]) - - def erase_mds_objects(self, rank): - """ - Erase all the per-MDS objects for a particular rank. This includes - inotable, sessiontable, journal - """ - - def obj_prefix(multiplier): - """ - MDS object naming conventions like rank 1's - journal is at 201.*** - """ - return "%x." % (multiplier * 0x100 + rank) - - # MDS_INO_LOG_OFFSET - self.erase_metadata_objects(obj_prefix(2)) - # MDS_INO_LOG_BACKUP_OFFSET - self.erase_metadata_objects(obj_prefix(3)) - # MDS_INO_LOG_POINTER_OFFSET - self.erase_metadata_objects(obj_prefix(4)) - # MDSTables & SessionMap - self.erase_metadata_objects("mds{rank:d}_".format(rank=rank)) - - def _run_tool(self, tool, args, rank=None, quiet=False): - mds_id = self.mds_ids[0] - remote = self.mds_daemons[mds_id].remote - - # Tests frequently have [client] configuration that jacks up - # the objecter log level (unlikely to be interesting here) - # and does not set the mds log level (very interesting here) - if quiet: - base_args = [tool, '--debug-mds=1', '--debug-objecter=1'] - else: - base_args = [tool, '--debug-mds=4', '--debug-objecter=1'] - - if rank is not None: - base_args.extend(["--rank", "%d" % rank]) - - t1 = datetime.datetime.now() - r = remote.run( - args=base_args + args, - stdout=StringIO()).stdout.getvalue().strip() - duration = datetime.datetime.now() - t1 - log.info("Ran {0} in time {1}, result:\n{2}".format( - base_args + args, duration, r - )) - return r - - def journal_tool(self, args, rank=None, quiet=False): - """ - Invoke cephfs-journal-tool with the passed arguments, and return its stdout - """ - return self._run_tool("cephfs-journal-tool", args, rank, quiet) - - def table_tool(self, args, quiet=False): - """ - Invoke cephfs-table-tool with the passed arguments, and return its stdout - """ - return self._run_tool("cephfs-table-tool", args, None, quiet) diff --git a/tasks/cephfs/fuse_mount.py b/tasks/cephfs/fuse_mount.py deleted file mode 100644 index 1a6f5074d78..00000000000 --- a/tasks/cephfs/fuse_mount.py +++ /dev/null @@ -1,354 +0,0 @@ - -from StringIO import StringIO -import json -import time -import os -import logging -from textwrap import dedent - -from teuthology import misc -from teuthology.orchestra import run -from teuthology.orchestra.run import CommandFailedError -from .mount import CephFSMount - -log = logging.getLogger(__name__) - - -class FuseMount(CephFSMount): - def __init__(self, client_config, test_dir, client_id, client_remote): - super(FuseMount, self).__init__(test_dir, client_id, client_remote) - - self.client_config = client_config if client_config else {} - self.fuse_daemon = None - self._fuse_conn = None - - def mount(self): - log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) - - daemon_signal = 'kill' - if self.client_config.get('coverage') or self.client_config.get('valgrind') is not None: - daemon_signal = 'term' - - mnt = os.path.join(self.test_dir, 'mnt.{id}'.format(id=self.client_id)) - log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( - id=self.client_id, remote=self.client_remote, mnt=mnt)) - - self.client_remote.run( - args=[ - 'mkdir', - '--', - mnt, - ], - ) - - run_cmd = [ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=self.test_dir), - 'daemon-helper', - daemon_signal, - ] - run_cmd_tail = [ - 'ceph-fuse', - '-f', - '--name', 'client.{id}'.format(id=self.client_id), - # TODO ceph-fuse doesn't understand dash dash '--', - mnt, - ] - - if self.client_config.get('valgrind') is not None: - run_cmd = misc.get_valgrind_args( - self.test_dir, - 'client.{id}'.format(id=self.client_id), - run_cmd, - self.client_config.get('valgrind'), - ) - - run_cmd.extend(run_cmd_tail) - - def list_connections(): - self.client_remote.run( - args=["sudo", "mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"], - check_status=False - ) - p = self.client_remote.run( - args=["ls", "/sys/fs/fuse/connections"], - stdout=StringIO(), - check_status=False - ) - if p.exitstatus != 0: - return [] - - ls_str = p.stdout.getvalue().strip() - if ls_str: - return [int(n) for n in ls_str.split("\n")] - else: - return [] - - # Before starting ceph-fuse process, note the contents of - # /sys/fs/fuse/connections - pre_mount_conns = list_connections() - log.info("Pre-mount connections: {0}".format(pre_mount_conns)) - - proc = self.client_remote.run( - args=run_cmd, - logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), - stdin=run.PIPE, - wait=False, - ) - self.fuse_daemon = proc - - # Wait for the connection reference to appear in /sys - mount_wait = self.client_config.get('mount_wait', 0) - if mount_wait > 0: - log.info("Fuse mount waits {0} seconds before checking /sys/".format(mount_wait)) - time.sleep(mount_wait) - timeout = int(self.client_config.get('mount_timeout', 30)) - waited = 0 - while list_connections() == pre_mount_conns: - time.sleep(1) - waited += 1 - if waited > timeout: - raise RuntimeError("Fuse mount failed to populate /sys/ after {0} seconds".format( - waited - )) - - post_mount_conns = list_connections() - log.info("Post-mount connections: {0}".format(post_mount_conns)) - - # Record our fuse connection number so that we can use it when - # forcing an unmount - new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) - if len(new_conns) == 0: - raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns)) - elif len(new_conns) > 1: - raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns)) - else: - self._fuse_conn = new_conns[0] - - def is_mounted(self): - try: - proc = self.client_remote.run( - args=[ - 'stat', - '--file-system', - '--printf=%T\n', - '--', - self.mountpoint, - ], - stdout=StringIO(), - ) - except CommandFailedError: - # This happens if the mount directory doesn't exist - log.info('mount point does not exist: %s', self.mountpoint) - return False - - fstype = proc.stdout.getvalue().rstrip('\n') - if fstype == 'fuseblk': - log.info('ceph-fuse is mounted on %s', self.mountpoint) - return True - else: - log.debug('ceph-fuse not mounted, got fs type {fstype!r}'.format( - fstype=fstype)) - return False - - def wait_until_mounted(self): - """ - Check to make sure that fuse is mounted on mountpoint. If not, - sleep for 5 seconds and check again. - """ - - while not self.is_mounted(): - # Even if it's not mounted, it should at least - # be running: catch simple failures where it has terminated. - assert not self.fuse_daemon.poll() - - time.sleep(5) - - # Now that we're mounted, set permissions so that the rest of the test will have - # unrestricted access to the filesystem mount. - self.client_remote.run( - args=['sudo', 'chmod', '1777', self.mountpoint]) - - def _mountpoint_exists(self): - return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False).exitstatus == 0 - - def umount(self): - try: - log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name)) - self.client_remote.run( - args=[ - 'sudo', - 'fusermount', - '-u', - self.mountpoint, - ], - ) - except run.CommandFailedError: - log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name)) - - # abort the fuse mount, killing all hung processes - if self._fuse_conn: - self.run_python(dedent(""" - import os - path = "/sys/fs/fuse/connections/{0}/abort" - if os.path.exists(path): - open(path, "w").write("1") - """).format(self._fuse_conn)) - self._fuse_conn = None - - stderr = StringIO() - try: - # make sure its unmounted - self.client_remote.run( - args=[ - 'sudo', - 'umount', - '-l', - '-f', - self.mountpoint, - ], - stderr=stderr - ) - except CommandFailedError: - if "not found" in stderr.getvalue(): - # Missing mount point, so we are unmounted already, yay. - pass - else: - raise - - assert not self.is_mounted() - self._fuse_conn = None - - def umount_wait(self, force=False): - """ - :param force: Complete cleanly even if the MDS is offline - """ - if force: - # When we expect to be forcing, kill the ceph-fuse process directly. - # This should avoid hitting the more aggressive fallback killing - # in umount() which can affect other mounts too. - self.fuse_daemon.stdin.close() - - # However, we will still hit the aggressive wait if there is an ongoing - # mount -o remount (especially if the remount is stuck because MDSs - # are unavailable) - - self.umount() - - try: - if self.fuse_daemon: - self.fuse_daemon.wait() - except CommandFailedError: - pass - - self.cleanup() - - def cleanup(self): - """ - Remove the mount point. - - Prerequisite: the client is not mounted. - """ - stderr = StringIO() - try: - self.client_remote.run( - args=[ - 'rmdir', - '--', - self.mountpoint, - ], - stderr=stderr - ) - except CommandFailedError: - if "No such file or directory" in stderr.getvalue(): - pass - else: - raise - - def kill(self): - """ - Terminate the client without removing the mount point. - """ - self.fuse_daemon.stdin.close() - try: - self.fuse_daemon.wait() - except CommandFailedError: - pass - - def kill_cleanup(self): - """ - Follow up ``kill`` to get to a clean unmounted state. - """ - self.umount() - self.cleanup() - - def teardown(self): - """ - Whatever the state of the mount, get it gone. - """ - super(FuseMount, self).teardown() - - self.umount() - - if not self.fuse_daemon.finished: - self.fuse_daemon.stdin.close() - try: - self.fuse_daemon.wait() - except CommandFailedError: - pass - - # Indiscriminate, unlike the touchier cleanup() - self.client_remote.run( - args=[ - 'rm', - '-rf', - self.mountpoint, - ], - ) - - def _admin_socket(self, args): - pyscript = """ -import glob -import re -import os -import subprocess - -def find_socket(client_name): - files = glob.glob("/var/run/ceph/ceph-{{client_name}}.*.asok".format(client_name=client_name)) - for f in files: - pid = re.match(".*\.(\d+)\.asok$", f).group(1) - if os.path.exists("/proc/{{0}}".format(pid)): - return f - raise RuntimeError("Client socket {{0}} not found".format(client_name)) - -print find_socket("{client_name}") -""".format(client_name="client.{0}".format(self.client_id)) - - # Find the admin socket - p = self.client_remote.run(args=[ - 'python', '-c', pyscript - ], stdout=StringIO()) - asok_path = p.stdout.getvalue().strip() - log.info("Found client admin socket at {0}".format(asok_path)) - - # Query client ID from admin socket - p = self.client_remote.run( - args=['sudo', 'ceph', '--admin-daemon', asok_path] + args, - stdout=StringIO()) - return json.loads(p.stdout.getvalue()) - - def get_global_id(self): - """ - Look up the CephFS client ID for this mount - """ - - return self._admin_socket(['mds_sessions'])['id'] - - def get_osd_epoch(self): - """ - Return 2-tuple of osd_epoch, osd_epoch_barrier - """ - status = self._admin_socket(['status']) - return status['osd_epoch'], status['osd_epoch_barrier'] diff --git a/tasks/cephfs/kernel_mount.py b/tasks/cephfs/kernel_mount.py deleted file mode 100644 index 9ff4fdbd58d..00000000000 --- a/tasks/cephfs/kernel_mount.py +++ /dev/null @@ -1,243 +0,0 @@ -from StringIO import StringIO -import json -import logging -from textwrap import dedent -from teuthology.orchestra.run import CommandFailedError -from teuthology import misc - -from teuthology.orchestra import remote as orchestra_remote -from teuthology.orchestra import run -from .mount import CephFSMount - -log = logging.getLogger(__name__) - - -class KernelMount(CephFSMount): - def __init__(self, mons, test_dir, client_id, client_remote, - ipmi_user, ipmi_password, ipmi_domain): - super(KernelMount, self).__init__(test_dir, client_id, client_remote) - self.mons = mons - - self.mounted = False - self.ipmi_user = ipmi_user - self.ipmi_password = ipmi_password - self.ipmi_domain = ipmi_domain - - def write_secret_file(self, remote, role, keyring, filename): - """ - Stash the keyring in the filename specified. - """ - remote.run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=self.test_dir), - 'ceph-authtool', - '--name={role}'.format(role=role), - '--print-key', - keyring, - run.Raw('>'), - filename, - ], - ) - - def mount(self): - log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format( - id=self.client_id, remote=self.client_remote, mnt=self.mountpoint)) - - keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=self.client_id) - secret = '{tdir}/data/client.{id}.secret'.format(tdir=self.test_dir, id=self.client_id) - self.write_secret_file(self.client_remote, 'client.{id}'.format(id=self.client_id), - keyring, secret) - - self.client_remote.run( - args=[ - 'mkdir', - '--', - self.mountpoint, - ], - ) - - self.client_remote.run( - args=[ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=self.test_dir), - '/sbin/mount.ceph', - '{mons}:/'.format(mons=','.join(self.mons)), - self.mountpoint, - '-v', - '-o', - 'name={id},secretfile={secret}'.format(id=self.client_id, - secret=secret), - ], - ) - - self.client_remote.run( - args=['sudo', 'chmod', '1777', self.mountpoint]) - - self.mounted = True - - def umount(self): - log.debug('Unmounting client client.{id}...'.format(id=self.client_id)) - self.client_remote.run( - args=[ - 'sudo', - 'umount', - self.mountpoint, - ], - ) - self.client_remote.run( - args=[ - 'rmdir', - '--', - self.mountpoint, - ], - ) - self.mounted = False - - def cleanup(self): - pass - - def umount_wait(self, force=False): - """ - Unlike the fuse client, the kernel client's umount is immediate - """ - try: - self.umount() - except CommandFailedError: - if not force: - raise - - self.kill() - self.kill_cleanup() - - self.mounted = False - - def is_mounted(self): - return self.mounted - - def wait_until_mounted(self): - """ - Unlike the fuse client, the kernel client is up and running as soon - as the initial mount() function returns. - """ - assert self.mounted - - def teardown(self): - super(KernelMount, self).teardown() - if self.mounted: - self.umount() - - def kill(self): - """ - The Ceph kernel client doesn't have a mechanism to kill itself (doing - that in side the kernel would be weird anyway), so we reboot the whole node - to get the same effect. - - We use IPMI to reboot, because we don't want the client to send any - releases of capabilities. - """ - - con = orchestra_remote.getRemoteConsole(self.client_remote.hostname, - self.ipmi_user, - self.ipmi_password, - self.ipmi_domain) - con.power_off() - - self.mounted = False - - def kill_cleanup(self): - assert not self.mounted - - con = orchestra_remote.getRemoteConsole(self.client_remote.hostname, - self.ipmi_user, - self.ipmi_password, - self.ipmi_domain) - con.power_on() - - # Wait for node to come back up after reboot - misc.reconnect(None, 300, [self.client_remote]) - - # Remove mount directory - self.client_remote.run( - args=[ - 'rmdir', - '--', - self.mountpoint, - ], - ) - - def _find_debug_dir(self): - """ - Find the debugfs folder for this mount - """ - pyscript = dedent(""" - import glob - import os - import json - - def get_id_to_dir(): - result = {} - for dir in glob.glob("/sys/kernel/debug/ceph/*"): - mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines() - client_id = mds_sessions_lines[1].split()[1].strip('"') - - result[client_id] = dir - return result - - print json.dumps(get_id_to_dir()) - """) - - p = self.client_remote.run(args=[ - 'sudo', 'python', '-c', pyscript - ], stdout=StringIO()) - client_id_to_dir = json.loads(p.stdout.getvalue()) - - try: - return client_id_to_dir[self.client_id] - except KeyError: - log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format( - self.client_id, ",".join(client_id_to_dir.keys()) - )) - raise - - def _read_debug_file(self, filename): - debug_dir = self._find_debug_dir() - - pyscript = dedent(""" - import os - - print open(os.path.join("{debug_dir}", "{filename}")).read() - """).format(debug_dir=debug_dir, filename=filename) - - p = self.client_remote.run(args=[ - 'sudo', 'python', '-c', pyscript - ], stdout=StringIO()) - return p.stdout.getvalue() - - def get_global_id(self): - """ - Look up the CephFS client ID for this mount, using debugfs. - """ - - assert self.mounted - - mds_sessions = self._read_debug_file("mds_sessions") - lines = mds_sessions.split("\n") - return int(lines[0].split()[1]) - - def get_osd_epoch(self): - """ - Return 2-tuple of osd_epoch, osd_epoch_barrier - """ - osd_map = self._read_debug_file("osdmap") - lines = osd_map.split("\n") - epoch = int(lines[0].split()[1]) - - mds_sessions = self._read_debug_file("mds_sessions") - lines = mds_sessions.split("\n") - epoch_barrier = int(lines[2].split()[1].strip('"')) - - return epoch, epoch_barrier \ No newline at end of file diff --git a/tasks/cephfs/mount.py b/tasks/cephfs/mount.py deleted file mode 100644 index 1413b818f3b..00000000000 --- a/tasks/cephfs/mount.py +++ /dev/null @@ -1,367 +0,0 @@ -from contextlib import contextmanager -import logging -import datetime -import time -from textwrap import dedent -import os -from StringIO import StringIO -from teuthology.orchestra import run -from teuthology.orchestra.run import CommandFailedError, ConnectionLostError - -log = logging.getLogger(__name__) - - -class CephFSMount(object): - def __init__(self, test_dir, client_id, client_remote): - """ - :param test_dir: Global teuthology test dir - :param client_id: Client ID, the 'foo' in client.foo - :param client_remote: Remote instance for the host where client will run - """ - - self.test_dir = test_dir - self.client_id = client_id - self.client_remote = client_remote - - self.mountpoint = os.path.join(self.test_dir, 'mnt.{id}'.format(id=self.client_id)) - self.test_files = ['a', 'b', 'c'] - - self.background_procs = [] - - def is_mounted(self): - raise NotImplementedError() - - def mount(self): - raise NotImplementedError() - - def umount(self): - raise NotImplementedError() - - def umount_wait(self, force=False): - raise NotImplementedError() - - def kill_cleanup(self): - raise NotImplementedError() - - def kill(self): - raise NotImplementedError() - - def cleanup(self): - raise NotImplementedError() - - def wait_until_mounted(self): - raise NotImplementedError() - - @contextmanager - def mounted(self): - """ - A context manager, from an initially unmounted state, to mount - this, yield, and then unmount and clean up. - """ - self.mount() - self.wait_until_mounted() - try: - yield - finally: - self.umount_wait() - - def create_files(self): - assert(self.is_mounted()) - - for suffix in self.test_files: - log.info("Creating file {0}".format(suffix)) - self.client_remote.run(args=[ - 'sudo', 'touch', os.path.join(self.mountpoint, suffix) - ]) - - def check_files(self): - assert(self.is_mounted()) - - for suffix in self.test_files: - log.info("Checking file {0}".format(suffix)) - r = self.client_remote.run(args=[ - 'sudo', 'ls', os.path.join(self.mountpoint, suffix) - ], check_status=False) - if r.exitstatus != 0: - raise RuntimeError("Expected file {0} not found".format(suffix)) - - def create_destroy(self): - assert(self.is_mounted()) - - filename = "{0} {1}".format(datetime.datetime.now(), self.client_id) - log.debug("Creating test file {0}".format(filename)) - self.client_remote.run(args=[ - 'sudo', 'touch', os.path.join(self.mountpoint, filename) - ]) - log.debug("Deleting test file {0}".format(filename)) - self.client_remote.run(args=[ - 'sudo', 'rm', '-f', os.path.join(self.mountpoint, filename) - ]) - - def _run_python(self, pyscript): - return self.client_remote.run(args=[ - 'sudo', 'adjust-ulimits', 'daemon-helper', 'kill', 'python', '-c', pyscript - ], wait=False, stdin=run.PIPE, stdout=StringIO()) - - def run_python(self, pyscript): - p = self._run_python(pyscript) - p.wait() - - def run_shell(self, args, wait=True): - args = ["cd", self.mountpoint, run.Raw('&&')] + args - return self.client_remote.run(args=args, stdout=StringIO(), wait=wait) - - def open_no_data(self, basename): - """ - A pure metadata operation - """ - assert(self.is_mounted()) - - path = os.path.join(self.mountpoint, basename) - - p = self._run_python(dedent( - """ - f = open("{path}", 'w') - """.format(path=path) - )) - p.wait() - - def open_background(self, basename="background_file"): - """ - Open a file for writing, then block such that the client - will hold a capability - """ - assert(self.is_mounted()) - - path = os.path.join(self.mountpoint, basename) - - pyscript = dedent(""" - import time - - f = open("{path}", 'w') - f.write('content') - f.flush() - f.write('content2') - while True: - time.sleep(1) - """).format(path=path) - - rproc = self._run_python(pyscript) - self.background_procs.append(rproc) - return rproc - - def wait_for_visible(self, basename="background_file", timeout=30): - i = 0 - while i < timeout: - r = self.client_remote.run(args=[ - 'sudo', 'ls', os.path.join(self.mountpoint, basename) - ], check_status=False) - if r.exitstatus == 0: - log.debug("File {0} became visible from {1} after {2}s".format( - basename, self.client_id, i)) - return - else: - time.sleep(1) - i += 1 - - raise RuntimeError("Timed out after {0}s waiting for {1} to become visible from {2}".format( - i, basename, self.client_id)) - - def lock_background(self, basename="background_file"): - """ - Open and lock a files for writing, hold the lock in a background process - """ - assert(self.is_mounted()) - - path = os.path.join(self.mountpoint, basename) - - pyscript = dedent(""" - import time - import fcntl - import struct - - f1 = open("{path}-1", 'w') - fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB) - - f2 = open("{path}-2", 'w') - lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) - fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) - while True: - time.sleep(1) - """).format(path=path) - - log.info("lock file {0}".format(basename)) - rproc = self._run_python(pyscript) - self.background_procs.append(rproc) - return rproc - - def check_filelock(self, basename="background_file"): - assert(self.is_mounted()) - - path = os.path.join(self.mountpoint, basename) - - pyscript = dedent(""" - import fcntl - import errno - import struct - - f1 = open("{path}-1", 'r') - try: - fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB) - except IOError, e: - if e.errno == errno.EAGAIN: - pass - else: - raise RuntimeError("flock on file {path}-1 not found") - - f2 = open("{path}-2", 'r') - try: - lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) - fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) - except IOError, e: - if e.errno == errno.EAGAIN: - pass - else: - raise RuntimeError("posix lock on file {path}-2 not found") - """).format(path=path) - - log.info("check lock on file {0}".format(basename)) - self.client_remote.run(args=[ - 'sudo', 'python', '-c', pyscript - ]) - - def write_background(self, basename="background_file", loop=False): - """ - Open a file for writing, complete as soon as you can - :param basename: - :return: - """ - assert(self.is_mounted()) - - path = os.path.join(self.mountpoint, basename) - - pyscript = dedent(""" - import os - import time - - fd = os.open("{path}", os.O_RDWR | os.O_CREAT, 0644) - try: - while True: - os.write(fd, 'content') - time.sleep(1) - if not {loop}: - break - except IOError, e: - pass - os.close(fd) - """).format(path=path, loop=str(loop)) - - rproc = self._run_python(pyscript) - self.background_procs.append(rproc) - return rproc - - def write_n_mb(self, filename, n_mb, seek=0): - """ - Write the requested number of megabytes to a file - """ - assert(self.is_mounted()) - - self.run_shell(["dd", "if=/dev/urandom", "of={0}".format(filename), - "bs=1M", - "count={0}".format(n_mb), - "seek={0}".format(seek) - ]) - - def open_n_background(self, fs_path, count): - """ - Open N files for writing, hold them open in a background process - - :param fs_path: Path relative to CephFS root, e.g. "foo/bar" - :return: a RemoteProcess - """ - assert(self.is_mounted()) - - abs_path = os.path.join(self.mountpoint, fs_path) - - pyscript = dedent(""" - import sys - import time - import os - - n = {count} - abs_path = "{abs_path}" - - if not os.path.exists(os.path.dirname(abs_path)): - os.makedirs(os.path.dirname(abs_path)) - - handles = [] - for i in range(0, n): - fname = "{{0}}_{{1}}".format(abs_path, i) - handles.append(open(fname, 'w')) - - while True: - time.sleep(1) - """).format(abs_path=abs_path, count=count) - - rproc = self._run_python(pyscript) - self.background_procs.append(rproc) - return rproc - - def teardown(self): - for p in self.background_procs: - log.info("Terminating background process") - if p.stdin: - p.stdin.close() - try: - p.wait() - except (CommandFailedError, ConnectionLostError): - pass - - def spam_dir_background(self, path): - """ - Create directory `path` and do lots of metadata operations - in it until further notice. - """ - assert(self.is_mounted()) - abs_path = os.path.join(self.mountpoint, path) - - pyscript = dedent(""" - import sys - import time - import os - - abs_path = "{abs_path}" - - if not os.path.exists(abs_path): - os.makedirs(abs_path) - - n = 0 - while True: - file_path = os.path.join(abs_path, "tmp%d" % n) - f = open(file_path, 'w') - f.close() - n = n + 1 - """).format(abs_path=abs_path) - - rproc = self._run_python(pyscript) - self.background_procs.append(rproc) - return rproc - - def get_global_id(self): - raise NotImplementedError() - - def get_osd_epoch(self): - raise NotImplementedError() - - def path_to_ino(self, fs_path): - abs_path = os.path.join(self.mountpoint, fs_path) - - pyscript = dedent(""" - import os - import stat - - print os.stat("{path}").st_ino - """).format(path=abs_path) - proc = self._run_python(pyscript) - proc.wait() - return int(proc.stdout.getvalue().strip()) diff --git a/tasks/chef.py b/tasks/chef.py deleted file mode 100644 index 9a9f1bc2c82..00000000000 --- a/tasks/chef.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -Chef-solo task -""" -import logging - -from teuthology.orchestra import run -from teuthology import misc - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Run chef-solo on all nodes. - """ - log.info('Running chef-solo...') - - run.wait( - ctx.cluster.run( - args=[ - 'wget', -# '-q', - '-O-', -# 'https://raw.github.com/ceph/ceph-qa-chef/master/solo/solo-from-scratch', - 'http://git.ceph.com/?p=ceph-qa-chef.git;a=blob_plain;f=solo/solo-from-scratch;hb=HEAD', - run.Raw('|'), - 'sh', - '-x', - ], - wait=False, - ) - ) - - log.info('Reconnecting after ceph-qa-chef run') - misc.reconnect(ctx, 10) #Reconnect for ulimit and other ceph-qa-chef changes - diff --git a/tasks/cifs_mount.py b/tasks/cifs_mount.py deleted file mode 100644 index b282b0b7dfb..00000000000 --- a/tasks/cifs_mount.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -Mount cifs clients. Unmount when finished. -""" -import contextlib -import logging -import os - -from teuthology import misc as teuthology -from teuthology.orchestra import run - -log = logging.getLogger(__name__) - -@contextlib.contextmanager -def task(ctx, config): - """ - Mount/unmount a cifs client. - - The config is optional and defaults to mounting on all clients. If - a config is given, it is expected to be a list of clients to do - this operation on. - - Example that starts smbd and mounts cifs on all nodes:: - - tasks: - - ceph: - - samba: - - cifs-mount: - - interactive: - - Example that splits smbd and cifs: - - tasks: - - ceph: - - samba: [samba.0] - - cifs-mount: [client.0] - - ceph-fuse: [client.1] - - interactive: - - Example that specifies the share name: - - tasks: - - ceph: - - ceph-fuse: - - samba: - samba.0: - cephfuse: "{testdir}/mnt.0" - - cifs-mount: - client.0: - share: cephfuse - - :param ctx: Context - :param config: Configuration - """ - log.info('Mounting cifs clients...') - - if config is None: - config = dict(('client.{id}'.format(id=id_), None) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')) - elif isinstance(config, list): - config = dict((name, None) for name in config) - - clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys())) - - from .samba import get_sambas - samba_roles = ['samba.{id_}'.format(id_=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')] - sambas = list(get_sambas(ctx=ctx, roles=samba_roles)) - (ip, _) = sambas[0][1].ssh.get_transport().getpeername() - log.info('samba ip: {ip}'.format(ip=ip)) - - for id_, remote in clients: - mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_)) - log.info('Mounting cifs client.{id} at {remote} {mnt}...'.format( - id=id_, remote=remote,mnt=mnt)) - - remote.run( - args=[ - 'mkdir', - '--', - mnt, - ], - ) - - rolestr = 'client.{id_}'.format(id_=id_) - unc = "ceph" - log.info("config: {c}".format(c=config)) - if config[rolestr] is not None and 'share' in config[rolestr]: - unc = config[rolestr]['share'] - - remote.run( - args=[ - 'sudo', - 'mount', - '-t', - 'cifs', - '//{sambaip}/{unc}'.format(sambaip=ip, unc=unc), - '-o', - 'username=ubuntu,password=ubuntu', - mnt, - ], - ) - - remote.run( - args=[ - 'sudo', - 'chown', - 'ubuntu:ubuntu', - '{m}/'.format(m=mnt), - ], - ) - - try: - yield - finally: - log.info('Unmounting cifs clients...') - for id_, remote in clients: - remote.run( - args=[ - 'sudo', - 'umount', - mnt, - ], - ) - for id_, remote in clients: - while True: - try: - remote.run( - args=[ - 'rmdir', '--', mnt, - run.Raw('2>&1'), - run.Raw('|'), - 'grep', 'Device or resource busy', - ], - ) - import time - time.sleep(1) - except Exception: - break diff --git a/tasks/cram.py b/tasks/cram.py deleted file mode 100644 index 05138af4de0..00000000000 --- a/tasks/cram.py +++ /dev/null @@ -1,135 +0,0 @@ -""" -Cram tests -""" -import logging -import os - -from teuthology import misc as teuthology -from teuthology.parallel import parallel -from teuthology.orchestra import run - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Run all cram tests from the specified urls on the specified - clients. Each client runs tests in parallel. - - Limitations: - Tests must have a .t suffix. Tests with duplicate names will - overwrite each other, so only the last one will run. - - For example:: - - tasks: - - ceph: - - cram: - clients: - client.0: - - http://ceph.com/qa/test.t - - http://ceph.com/qa/test2.t] - client.1: [http://ceph.com/qa/test.t] - - You can also run a list of cram tests on all clients:: - - tasks: - - ceph: - - cram: - clients: - all: [http://ceph.com/qa/test.t] - - :param ctx: Context - :param config: Configuration - """ - assert isinstance(config, dict) - assert 'clients' in config and isinstance(config['clients'], dict), \ - 'configuration must contain a dictionary of clients' - - clients = teuthology.replace_all_with_clients(ctx.cluster, - config['clients']) - testdir = teuthology.get_testdir(ctx) - - try: - for client, tests in clients.iteritems(): - (remote,) = ctx.cluster.only(client).remotes.iterkeys() - client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client) - remote.run( - args=[ - 'mkdir', '--', client_dir, - run.Raw('&&'), - 'virtualenv', '{tdir}/virtualenv'.format(tdir=testdir), - run.Raw('&&'), - '{tdir}/virtualenv/bin/pip'.format(tdir=testdir), - 'install', 'cram==0.6', - ], - ) - for test in tests: - log.info('fetching test %s for %s', test, client) - assert test.endswith('.t'), 'tests must end in .t' - remote.run( - args=[ - 'wget', '-nc', '-nv', '-P', client_dir, '--', test, - ], - ) - - with parallel() as p: - for role in clients.iterkeys(): - p.spawn(_run_tests, ctx, role) - finally: - for client, tests in clients.iteritems(): - (remote,) = ctx.cluster.only(client).remotes.iterkeys() - client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client) - test_files = set([test.rsplit('/', 1)[1] for test in tests]) - - # remove test files unless they failed - for test_file in test_files: - abs_file = os.path.join(client_dir, test_file) - remote.run( - args=[ - 'test', '-f', abs_file + '.err', - run.Raw('||'), - 'rm', '-f', '--', abs_file, - ], - ) - - # ignore failure since more than one client may - # be run on a host, and the client dir should be - # non-empty if the test failed - remote.run( - args=[ - 'rm', '-rf', '--', - '{tdir}/virtualenv'.format(tdir=testdir), - run.Raw(';'), - 'rmdir', '--ignore-fail-on-non-empty', client_dir, - ], - ) - -def _run_tests(ctx, role): - """ - For each role, check to make sure it's a client, then run the cram on that client - - :param ctx: Context - :param role: Roles - """ - assert isinstance(role, basestring) - PREFIX = 'client.' - assert role.startswith(PREFIX) - id_ = role[len(PREFIX):] - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - ceph_ref = ctx.summary.get('ceph-sha1', 'master') - - testdir = teuthology.get_testdir(ctx) - log.info('Running tests for %s...', role) - remote.run( - args=[ - run.Raw('CEPH_REF={ref}'.format(ref=ceph_ref)), - run.Raw('CEPH_ID="{id}"'.format(id=id_)), - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - '{tdir}/virtualenv/bin/cram'.format(tdir=testdir), - '-v', '--', - run.Raw('{tdir}/archive/cram.{role}/*.t'.format(tdir=testdir, role=role)), - ], - logger=log.getChild(role), - ) diff --git a/tasks/devstack.py b/tasks/devstack.py deleted file mode 100644 index c5cd41b06bd..00000000000 --- a/tasks/devstack.py +++ /dev/null @@ -1,382 +0,0 @@ -#!/usr/bin/env python -import contextlib -import logging -from cStringIO import StringIO -import textwrap -from configparser import ConfigParser -import time - -from teuthology.orchestra import run -from teuthology import misc -from teuthology.contextutil import nested - -log = logging.getLogger(__name__) - -DEVSTACK_GIT_REPO = 'https://github.com/openstack-dev/devstack.git' -DS_STABLE_BRANCHES = ("havana", "grizzly") - -is_devstack_node = lambda role: role.startswith('devstack') -is_osd_node = lambda role: role.startswith('osd') - - -@contextlib.contextmanager -def task(ctx, config): - if config is None: - config = {} - if not isinstance(config, dict): - raise TypeError("config must be a dict") - with nested(lambda: install(ctx=ctx, config=config), - lambda: smoke(ctx=ctx, config=config), - ): - yield - - -@contextlib.contextmanager -def install(ctx, config): - """ - Install OpenStack DevStack and configure it to use a Ceph cluster for - Glance and Cinder. - - Requires one node with a role 'devstack' - - Since devstack runs rampant on the system it's used on, typically you will - want to reprovision that machine after using devstack on it. - - Also, the default 2GB of RAM that is given to vps nodes is insufficient. I - recommend 4GB. Downburst can be instructed to give 4GB to a vps node by - adding this to the yaml: - - downburst: - ram: 4G - - This was created using documentation found here: - https://github.com/openstack-dev/devstack/blob/master/README.md - http://ceph.com/docs/master/rbd/rbd-openstack/ - """ - if config is None: - config = {} - if not isinstance(config, dict): - raise TypeError("config must be a dict") - - devstack_node = ctx.cluster.only(is_devstack_node).remotes.keys()[0] - an_osd_node = ctx.cluster.only(is_osd_node).remotes.keys()[0] - - devstack_branch = config.get("branch", "master") - install_devstack(devstack_node, devstack_branch) - try: - configure_devstack_and_ceph(ctx, config, devstack_node, an_osd_node) - yield - finally: - pass - - -def install_devstack(devstack_node, branch="master"): - log.info("Cloning DevStack repo...") - - args = ['git', 'clone', DEVSTACK_GIT_REPO] - devstack_node.run(args=args) - - if branch != "master": - if branch in DS_STABLE_BRANCHES and not branch.startswith("stable"): - branch = "stable/" + branch - log.info("Checking out {branch} branch...".format(branch=branch)) - cmd = "cd devstack && git checkout " + branch - devstack_node.run(args=cmd) - - log.info("Installing DevStack...") - args = ['cd', 'devstack', run.Raw('&&'), './stack.sh'] - devstack_node.run(args=args) - - -def configure_devstack_and_ceph(ctx, config, devstack_node, ceph_node): - pool_size = config.get('pool_size', '128') - create_pools(ceph_node, pool_size) - distribute_ceph_conf(devstack_node, ceph_node) - # This is where we would install python-ceph and ceph-common but it appears - # the ceph task does that for us. - generate_ceph_keys(ceph_node) - distribute_ceph_keys(devstack_node, ceph_node) - secret_uuid = set_libvirt_secret(devstack_node, ceph_node) - update_devstack_config_files(devstack_node, secret_uuid) - set_apache_servername(devstack_node) - # Rebooting is the most-often-used method of restarting devstack services - misc.reboot(devstack_node) - start_devstack(devstack_node) - restart_apache(devstack_node) - - -def create_pools(ceph_node, pool_size): - log.info("Creating pools on Ceph cluster...") - - for pool_name in ['volumes', 'images', 'backups']: - args = ['ceph', 'osd', 'pool', 'create', pool_name, pool_size] - ceph_node.run(args=args) - - -def distribute_ceph_conf(devstack_node, ceph_node): - log.info("Copying ceph.conf to DevStack node...") - - ceph_conf_path = '/etc/ceph/ceph.conf' - ceph_conf = misc.get_file(ceph_node, ceph_conf_path, sudo=True) - misc.sudo_write_file(devstack_node, ceph_conf_path, ceph_conf) - - -def generate_ceph_keys(ceph_node): - log.info("Generating Ceph keys...") - - ceph_auth_cmds = [ - ['ceph', 'auth', 'get-or-create', 'client.cinder', 'mon', - 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=volumes, allow rx pool=images'], # noqa - ['ceph', 'auth', 'get-or-create', 'client.glance', 'mon', - 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=images'], # noqa - ['ceph', 'auth', 'get-or-create', 'client.cinder-backup', 'mon', - 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=backups'], # noqa - ] - for cmd in ceph_auth_cmds: - ceph_node.run(args=cmd) - - -def distribute_ceph_keys(devstack_node, ceph_node): - log.info("Copying Ceph keys to DevStack node...") - - def copy_key(from_remote, key_name, to_remote, dest_path, owner): - key_stringio = StringIO() - from_remote.run( - args=['ceph', 'auth', 'get-or-create', key_name], - stdout=key_stringio) - key_stringio.seek(0) - misc.sudo_write_file(to_remote, dest_path, - key_stringio, owner=owner) - keys = [ - dict(name='client.glance', - path='/etc/ceph/ceph.client.glance.keyring', - # devstack appears to just want root:root - #owner='glance:glance', - ), - dict(name='client.cinder', - path='/etc/ceph/ceph.client.cinder.keyring', - # devstack appears to just want root:root - #owner='cinder:cinder', - ), - dict(name='client.cinder-backup', - path='/etc/ceph/ceph.client.cinder-backup.keyring', - # devstack appears to just want root:root - #owner='cinder:cinder', - ), - ] - for key_dict in keys: - copy_key(ceph_node, key_dict['name'], devstack_node, - key_dict['path'], key_dict.get('owner')) - - -def set_libvirt_secret(devstack_node, ceph_node): - log.info("Setting libvirt secret...") - - cinder_key_stringio = StringIO() - ceph_node.run(args=['ceph', 'auth', 'get-key', 'client.cinder'], - stdout=cinder_key_stringio) - cinder_key = cinder_key_stringio.getvalue().strip() - - uuid_stringio = StringIO() - devstack_node.run(args=['uuidgen'], stdout=uuid_stringio) - uuid = uuid_stringio.getvalue().strip() - - secret_path = '/tmp/secret.xml' - secret_template = textwrap.dedent(""" - - {uuid} - - client.cinder secret - - """) - misc.sudo_write_file(devstack_node, secret_path, - secret_template.format(uuid=uuid)) - devstack_node.run(args=['sudo', 'virsh', 'secret-define', '--file', - secret_path]) - devstack_node.run(args=['sudo', 'virsh', 'secret-set-value', '--secret', - uuid, '--base64', cinder_key]) - return uuid - - -def update_devstack_config_files(devstack_node, secret_uuid): - log.info("Updating DevStack config files to use Ceph...") - - def backup_config(node, file_name, backup_ext='.orig.teuth'): - node.run(args=['cp', '-f', file_name, file_name + backup_ext]) - - def update_config(config_name, config_stream, update_dict, - section='DEFAULT'): - parser = ConfigParser() - parser.read_file(config_stream) - for (key, value) in update_dict.items(): - parser.set(section, key, value) - out_stream = StringIO() - parser.write(out_stream) - out_stream.seek(0) - return out_stream - - updates = [ - dict(name='/etc/glance/glance-api.conf', options=dict( - default_store='rbd', - rbd_store_user='glance', - rbd_store_pool='images', - show_image_direct_url='True',)), - dict(name='/etc/cinder/cinder.conf', options=dict( - volume_driver='cinder.volume.drivers.rbd.RBDDriver', - rbd_pool='volumes', - rbd_ceph_conf='/etc/ceph/ceph.conf', - rbd_flatten_volume_from_snapshot='false', - rbd_max_clone_depth='5', - glance_api_version='2', - rbd_user='cinder', - rbd_secret_uuid=secret_uuid, - backup_driver='cinder.backup.drivers.ceph', - backup_ceph_conf='/etc/ceph/ceph.conf', - backup_ceph_user='cinder-backup', - backup_ceph_chunk_size='134217728', - backup_ceph_pool='backups', - backup_ceph_stripe_unit='0', - backup_ceph_stripe_count='0', - restore_discard_excess_bytes='true', - )), - dict(name='/etc/nova/nova.conf', options=dict( - libvirt_images_type='rbd', - libvirt_images_rbd_pool='volumes', - libvirt_images_rbd_ceph_conf='/etc/ceph/ceph.conf', - rbd_user='cinder', - rbd_secret_uuid=secret_uuid, - libvirt_inject_password='false', - libvirt_inject_key='false', - libvirt_inject_partition='-2', - )), - ] - - for update in updates: - file_name = update['name'] - options = update['options'] - config_str = misc.get_file(devstack_node, file_name, sudo=True) - config_stream = StringIO(config_str) - backup_config(devstack_node, file_name) - new_config_stream = update_config(file_name, config_stream, options) - misc.sudo_write_file(devstack_node, file_name, new_config_stream) - - -def set_apache_servername(node): - # Apache complains: "Could not reliably determine the server's fully - # qualified domain name, using 127.0.0.1 for ServerName" - # So, let's make sure it knows its name. - log.info("Setting Apache ServerName...") - - hostname = node.hostname - config_file = '/etc/apache2/conf.d/servername' - misc.sudo_write_file(node, config_file, - "ServerName {name}".format(name=hostname)) - - -def start_devstack(devstack_node): - log.info("Patching devstack start script...") - # This causes screen to start headless - otherwise rejoin-stack.sh fails - # because there is no terminal attached. - cmd = "cd devstack && sed -ie 's/screen -c/screen -dm -c/' rejoin-stack.sh" - devstack_node.run(args=cmd) - - log.info("Starting devstack...") - cmd = "cd devstack && ./rejoin-stack.sh" - devstack_node.run(args=cmd) - - # This was added because I was getting timeouts on Cinder requests - which - # were trying to access Keystone on port 5000. A more robust way to handle - # this would be to introduce a wait-loop on devstack_node that checks to - # see if a service is listening on port 5000. - log.info("Waiting 30s for devstack to start...") - time.sleep(30) - - -def restart_apache(node): - node.run(args=['sudo', '/etc/init.d/apache2', 'restart'], wait=True) - - -@contextlib.contextmanager -def exercise(ctx, config): - log.info("Running devstack exercises...") - - if config is None: - config = {} - if not isinstance(config, dict): - raise TypeError("config must be a dict") - - devstack_node = ctx.cluster.only(is_devstack_node).remotes.keys()[0] - - # TODO: save the log *and* preserve failures - #devstack_archive_dir = create_devstack_archive(ctx, devstack_node) - - try: - #cmd = "cd devstack && ./exercise.sh 2>&1 | tee {dir}/exercise.log".format( # noqa - # dir=devstack_archive_dir) - cmd = "cd devstack && ./exercise.sh" - devstack_node.run(args=cmd, wait=True) - yield - finally: - pass - - -def create_devstack_archive(ctx, devstack_node): - test_dir = misc.get_testdir(ctx) - devstack_archive_dir = "{test_dir}/archive/devstack".format( - test_dir=test_dir) - devstack_node.run(args="mkdir -p " + devstack_archive_dir) - return devstack_archive_dir - - -@contextlib.contextmanager -def smoke(ctx, config): - log.info("Running a basic smoketest...") - - devstack_node = ctx.cluster.only(is_devstack_node).remotes.keys()[0] - an_osd_node = ctx.cluster.only(is_osd_node).remotes.keys()[0] - - try: - create_volume(devstack_node, an_osd_node, 'smoke0', 1) - yield - finally: - pass - - -def create_volume(devstack_node, ceph_node, vol_name, size): - """ - :param size: The size of the volume, in GB - """ - size = str(size) - log.info("Creating a {size}GB volume named {name}...".format( - name=vol_name, - size=size)) - args = ['source', 'devstack/openrc', run.Raw('&&'), 'cinder', 'create', - '--display-name', vol_name, size] - out_stream = StringIO() - devstack_node.run(args=args, stdout=out_stream, wait=True) - vol_info = parse_os_table(out_stream.getvalue()) - log.debug("Volume info: %s", str(vol_info)) - - out_stream = StringIO() - try: - ceph_node.run(args="rbd --id cinder ls -l volumes", stdout=out_stream, - wait=True) - except run.CommandFailedError: - log.debug("Original rbd call failed; retrying without '--id cinder'") - ceph_node.run(args="rbd ls -l volumes", stdout=out_stream, - wait=True) - - assert vol_info['id'] in out_stream.getvalue(), \ - "Volume not found on Ceph cluster" - assert vol_info['size'] == size, \ - "Volume size on Ceph cluster is different than specified" - return vol_info['id'] - - -def parse_os_table(table_str): - out_dict = dict() - for line in table_str.split('\n'): - if line.startswith('|'): - items = line.split() - out_dict[items[1]] = items[3] - return out_dict diff --git a/tasks/die_on_err.py b/tasks/die_on_err.py deleted file mode 100644 index bf422ae547d..00000000000 --- a/tasks/die_on_err.py +++ /dev/null @@ -1,70 +0,0 @@ -""" -Raise exceptions on osd coredumps or test err directories -""" -import contextlib -import logging -import time -from teuthology.orchestra import run - -import ceph_manager -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -@contextlib.contextmanager -def task(ctx, config): - """ - Die if {testdir}/err exists or if an OSD dumps core - """ - if config is None: - config = {} - - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') - log.info('num_osds is %s' % num_osds) - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - while len(manager.get_osd_status()['up']) < num_osds: - time.sleep(10) - - testdir = teuthology.get_testdir(ctx) - - while True: - for i in range(num_osds): - (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.iterkeys() - p = osd_remote.run( - args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ], - wait=True, - check_status=False, - ) - exit_status = p.exitstatus - - if exit_status == 0: - log.info("osd %d has an error" % i) - raise Exception("osd %d error" % i) - - log_path = '/var/log/ceph/osd.%d.log' % (i) - - p = osd_remote.run( - args = [ - 'tail', '-1', log_path, - run.Raw('|'), - 'grep', '-q', 'end dump' - ], - wait=True, - check_status=False, - ) - exit_status = p.exitstatus - - if exit_status == 0: - log.info("osd %d dumped core" % i) - raise Exception("osd %d dumped core" % i) - - time.sleep(5) diff --git a/tasks/divergent_priors.py b/tasks/divergent_priors.py deleted file mode 100644 index d81ea472734..00000000000 --- a/tasks/divergent_priors.py +++ /dev/null @@ -1,169 +0,0 @@ -""" -Special case divergence test -""" -import logging -import time - -from teuthology import misc as teuthology -from util.rados import rados - - -log = logging.getLogger(__name__) - - -def task(ctx, config): - """ - Test handling of divergent entries with prior_version - prior to log_tail - - overrides: - ceph: - conf: - osd: - debug osd: 5 - - Requires 3 osds on a single test node. - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'divergent_priors task only accepts a dict for configuration' - - while len(ctx.manager.get_osd_status()['up']) < 3: - time.sleep(10) - ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - ctx.manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - ctx.manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - ctx.manager.raw_cluster_cmd('osd', 'set', 'noout') - ctx.manager.raw_cluster_cmd('osd', 'set', 'noin') - ctx.manager.raw_cluster_cmd('osd', 'set', 'nodown') - ctx.manager.wait_for_clean() - - # something that is always there - dummyfile = '/etc/fstab' - dummyfile2 = '/etc/resolv.conf' - - # create 1 pg pool - log.info('creating foo') - ctx.manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') - - osds = [0, 1, 2] - for i in osds: - ctx.manager.set_config(i, osd_min_pg_log_entries=10) - ctx.manager.set_config(i, osd_max_pg_log_entries=10) - ctx.manager.set_config(i, osd_pg_log_trim_min=5) - - # determine primary - divergent = ctx.manager.get_pg_primary('foo', 0) - log.info("primary and soon to be divergent is %d", divergent) - non_divergent = list(osds) - non_divergent.remove(divergent) - - log.info('writing initial objects') - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - # write 100 objects - for i in range(100): - rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) - - ctx.manager.wait_for_clean() - - # blackhole non_divergent - log.info("blackholing osds %s", str(non_divergent)) - for i in non_divergent: - ctx.manager.set_config(i, filestore_blackhole=1) - - DIVERGENT_WRITE = 5 - DIVERGENT_REMOVE = 5 - # Write some soon to be divergent - log.info('writing divergent objects') - for i in range(DIVERGENT_WRITE): - rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, - dummyfile2], wait=False) - # Remove some soon to be divergent - log.info('remove divergent objects') - for i in range(DIVERGENT_REMOVE): - rados(ctx, mon, ['-p', 'foo', 'rm', - 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) - time.sleep(10) - mon.run( - args=['killall', '-9', 'rados'], - wait=True, - check_status=False) - - # kill all the osds but leave divergent in - log.info('killing all the osds') - for i in osds: - ctx.manager.kill_osd(i) - for i in osds: - ctx.manager.mark_down_osd(i) - for i in non_divergent: - ctx.manager.mark_out_osd(i) - - # bring up non-divergent - log.info("bringing up non_divergent %s", str(non_divergent)) - for i in non_divergent: - ctx.manager.revive_osd(i) - for i in non_divergent: - ctx.manager.mark_in_osd(i) - - # write 1 non-divergent object (ensure that old divergent one is divergent) - objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) - log.info('writing non-divergent object ' + objname) - rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) - - ctx.manager.wait_for_recovery() - - # ensure no recovery of up osds first - log.info('delay recovery') - for i in non_divergent: - ctx.manager.wait_run_admin_socket( - 'osd', i, ['set_recovery_delay', '100000']) - - # bring in our divergent friend - log.info("revive divergent %d", divergent) - ctx.manager.raw_cluster_cmd('osd', 'set', 'noup') - ctx.manager.revive_osd(divergent) - - log.info('delay recovery divergent') - ctx.manager.wait_run_admin_socket( - 'osd', divergent, ['set_recovery_delay', '100000']) - - ctx.manager.raw_cluster_cmd('osd', 'unset', 'noup') - while len(ctx.manager.get_osd_status()['up']) < 3: - time.sleep(10) - - log.info('wait for peering') - rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) - - # At this point the divergent_priors should have been detected - - log.info("killing divergent %d", divergent) - ctx.manager.kill_osd(divergent) - log.info("reviving divergent %d", divergent) - ctx.manager.revive_osd(divergent) - - time.sleep(20) - - log.info('allowing recovery') - # Set osd_recovery_delay_start back to 0 and kick the queue - for i in osds: - ctx.manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', - 'kick_recovery_wq', ' 0') - - log.info('reading divergent objects') - for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): - exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, - '/tmp/existing']) - assert exit_status is 0 - - (remote,) = ctx.\ - cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() - msg = "dirty_divergent_priors: true, divergent_priors: %d" \ - % (DIVERGENT_WRITE + DIVERGENT_REMOVE) - cmd = 'grep "{msg}" /var/log/ceph/ceph-osd.{osd}.log'\ - .format(msg=msg, osd=divergent) - proc = remote.run(args=cmd, wait=True, check_status=False) - assert proc.exitstatus == 0 - - log.info("success") diff --git a/tasks/divergent_priors2.py b/tasks/divergent_priors2.py deleted file mode 100644 index 78d6043a536..00000000000 --- a/tasks/divergent_priors2.py +++ /dev/null @@ -1,205 +0,0 @@ -""" -Special case divergence test with ceph-objectstore-tool export/remove/import -""" -import logging -import time -from cStringIO import StringIO - -from teuthology import misc as teuthology -from util.rados import rados -import os - - -log = logging.getLogger(__name__) - - -def task(ctx, config): - """ - Test handling of divergent entries with prior_version - prior to log_tail and a ceph-objectstore-tool export/import - - overrides: - ceph: - conf: - osd: - debug osd: 5 - - Requires 3 osds on a single test node. - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'divergent_priors task only accepts a dict for configuration' - - while len(ctx.manager.get_osd_status()['up']) < 3: - time.sleep(10) - ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - ctx.manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - ctx.manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - ctx.manager.raw_cluster_cmd('osd', 'set', 'noout') - ctx.manager.raw_cluster_cmd('osd', 'set', 'noin') - ctx.manager.raw_cluster_cmd('osd', 'set', 'nodown') - ctx.manager.wait_for_clean() - - # something that is always there - dummyfile = '/etc/fstab' - dummyfile2 = '/etc/resolv.conf' - testdir = teuthology.get_testdir(ctx) - - # create 1 pg pool - log.info('creating foo') - ctx.manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') - - osds = [0, 1, 2] - for i in osds: - ctx.manager.set_config(i, osd_min_pg_log_entries=10) - ctx.manager.set_config(i, osd_max_pg_log_entries=10) - ctx.manager.set_config(i, osd_pg_log_trim_min=5) - - # determine primary - divergent = ctx.manager.get_pg_primary('foo', 0) - log.info("primary and soon to be divergent is %d", divergent) - non_divergent = list(osds) - non_divergent.remove(divergent) - - log.info('writing initial objects') - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - # write 100 objects - for i in range(100): - rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) - - ctx.manager.wait_for_clean() - - # blackhole non_divergent - log.info("blackholing osds %s", str(non_divergent)) - for i in non_divergent: - ctx.manager.set_config(i, filestore_blackhole=1) - - DIVERGENT_WRITE = 5 - DIVERGENT_REMOVE = 5 - # Write some soon to be divergent - log.info('writing divergent objects') - for i in range(DIVERGENT_WRITE): - rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, - dummyfile2], wait=False) - # Remove some soon to be divergent - log.info('remove divergent objects') - for i in range(DIVERGENT_REMOVE): - rados(ctx, mon, ['-p', 'foo', 'rm', - 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) - time.sleep(10) - mon.run( - args=['killall', '-9', 'rados'], - wait=True, - check_status=False) - - # kill all the osds but leave divergent in - log.info('killing all the osds') - for i in osds: - ctx.manager.kill_osd(i) - for i in osds: - ctx.manager.mark_down_osd(i) - for i in non_divergent: - ctx.manager.mark_out_osd(i) - - # bring up non-divergent - log.info("bringing up non_divergent %s", str(non_divergent)) - for i in non_divergent: - ctx.manager.revive_osd(i) - for i in non_divergent: - ctx.manager.mark_in_osd(i) - - # write 1 non-divergent object (ensure that old divergent one is divergent) - objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) - log.info('writing non-divergent object ' + objname) - rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) - - ctx.manager.wait_for_recovery() - - # ensure no recovery of up osds first - log.info('delay recovery') - for i in non_divergent: - ctx.manager.wait_run_admin_socket( - 'osd', i, ['set_recovery_delay', '100000']) - - # bring in our divergent friend - log.info("revive divergent %d", divergent) - ctx.manager.raw_cluster_cmd('osd', 'set', 'noup') - ctx.manager.revive_osd(divergent) - - log.info('delay recovery divergent') - ctx.manager.wait_run_admin_socket( - 'osd', divergent, ['set_recovery_delay', '100000']) - - ctx.manager.raw_cluster_cmd('osd', 'unset', 'noup') - while len(ctx.manager.get_osd_status()['up']) < 3: - time.sleep(10) - - log.info('wait for peering') - rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) - - # At this point the divergent_priors should have been detected - - log.info("killing divergent %d", divergent) - ctx.manager.kill_osd(divergent) - - # Export a pg - (exp_remote,) = ctx.\ - cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() - FSPATH = ctx.manager.get_filepath() - JPATH = os.path.join(FSPATH, "journal") - prefix = ("sudo adjust-ulimits ceph-objectstore-tool " - "--data-path {fpath} --journal-path {jpath} " - "--log-file=" - "/var/log/ceph/objectstore_tool.$$.log ". - format(fpath=FSPATH, jpath=JPATH)) - pid = os.getpid() - expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) - cmd = ((prefix + "--op export --pgid 1.0 --file {file}"). - format(id=divergent, file=expfile)) - proc = exp_remote.run(args=cmd, wait=True, - check_status=False, stdout=StringIO()) - assert proc.exitstatus == 0 - - cmd = ((prefix + "--op remove --pgid 1.0"). - format(id=divergent, file=expfile)) - proc = exp_remote.run(args=cmd, wait=True, - check_status=False, stdout=StringIO()) - assert proc.exitstatus == 0 - - cmd = ((prefix + "--op import --file {file}"). - format(id=divergent, file=expfile)) - proc = exp_remote.run(args=cmd, wait=True, - check_status=False, stdout=StringIO()) - assert proc.exitstatus == 0 - - log.info("reviving divergent %d", divergent) - ctx.manager.revive_osd(divergent) - ctx.manager.wait_run_admin_socket('osd', divergent, ['dump_ops_in_flight']) - time.sleep(20); - - log.info('allowing recovery') - # Set osd_recovery_delay_start back to 0 and kick the queue - for i in osds: - ctx.manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', - 'kick_recovery_wq', ' 0') - - log.info('reading divergent objects') - for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): - exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, - '/tmp/existing']) - assert exit_status is 0 - - (remote,) = ctx.\ - cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() - msg = "dirty_divergent_priors: true, divergent_priors: %d" \ - % (DIVERGENT_WRITE + DIVERGENT_REMOVE) - cmd = 'grep "{msg}" /var/log/ceph/ceph-osd.{osd}.log'\ - .format(msg=msg, osd=divergent) - proc = remote.run(args=cmd, wait=True, check_status=False) - assert proc.exitstatus == 0 - - cmd = 'rm {file}'.format(file=expfile) - remote.run(args=cmd, wait=True) - log.info("success") diff --git a/tasks/dump_stuck.py b/tasks/dump_stuck.py deleted file mode 100644 index 9e1780f0156..00000000000 --- a/tasks/dump_stuck.py +++ /dev/null @@ -1,146 +0,0 @@ -""" -Dump_stuck command -""" -import logging -import re -import time - -import ceph_manager -from teuthology import misc as teuthology - - -log = logging.getLogger(__name__) - -def check_stuck(manager, num_inactive, num_unclean, num_stale, timeout=10): - """ - Do checks. Make sure get_stuck_pgs return the right amout of information, then - extract health information from the raw_cluster_cmd and compare the results with - values passed in. This passes if all asserts pass. - - :param num_manager: Ceph manager - :param num_inactive: number of inaactive pages that are stuck - :param num_unclean: number of unclean pages that are stuck - :paran num_stale: number of stale pages that are stuck - :param timeout: timeout value for get_stuck_pgs calls - """ - inactive = manager.get_stuck_pgs('inactive', timeout) - assert len(inactive) == num_inactive - unclean = manager.get_stuck_pgs('unclean', timeout) - assert len(unclean) == num_unclean - stale = manager.get_stuck_pgs('stale', timeout) - assert len(stale) == num_stale - - # check health output as well - health = manager.raw_cluster_cmd('health') - log.debug('ceph health is: %s', health) - if num_inactive > 0: - m = re.search('(\d+) pgs stuck inactive', health) - assert int(m.group(1)) == num_inactive - if num_unclean > 0: - m = re.search('(\d+) pgs stuck unclean', health) - assert int(m.group(1)) == num_unclean - if num_stale > 0: - m = re.search('(\d+) pgs stuck stale', health) - assert int(m.group(1)) == num_stale - -def task(ctx, config): - """ - Test the dump_stuck command. - - :param ctx: Context - :param config: Configuration - """ - assert config is None, \ - 'dump_stuck requires no configuration' - assert teuthology.num_instances_of_type(ctx.cluster, 'osd') == 2, \ - 'dump_stuck requires exactly 2 osds' - - timeout = 60 - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.wait_for_clean(timeout) - - manager.raw_cluster_cmd('tell', 'mon.0', 'injectargs', '--', -# '--mon-osd-report-timeout 90', - '--mon-pg-stuck-threshold 10') - - check_stuck( - manager, - num_inactive=0, - num_unclean=0, - num_stale=0, - ) - num_pgs = manager.get_num_pgs() - - manager.mark_out_osd(0) - time.sleep(timeout) - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.wait_for_recovery(timeout) - - check_stuck( - manager, - num_inactive=0, - num_unclean=num_pgs, - num_stale=0, - ) - - manager.mark_in_osd(0) - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.wait_for_clean(timeout) - - check_stuck( - manager, - num_inactive=0, - num_unclean=0, - num_stale=0, - ) - - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'): - manager.kill_osd(id_) - manager.mark_down_osd(id_) - - starttime = time.time() - done = False - while not done: - try: - check_stuck( - manager, - num_inactive=0, - num_unclean=0, - num_stale=num_pgs, - ) - done = True - except AssertionError: - # wait up to 15 minutes to become stale - if time.time() - starttime > 900: - raise - - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'): - manager.revive_osd(id_) - manager.mark_in_osd(id_) - while True: - try: - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - break - except Exception: - log.exception('osds must not be started yet, waiting...') - time.sleep(1) - manager.wait_for_clean(timeout) - - check_stuck( - manager, - num_inactive=0, - num_unclean=0, - num_stale=0, - ) diff --git a/tasks/ec_lost_unfound.py b/tasks/ec_lost_unfound.py deleted file mode 100644 index 5a9678d8c27..00000000000 --- a/tasks/ec_lost_unfound.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -Lost_unfound -""" -import logging -import ceph_manager -from teuthology import misc as teuthology -from util.rados import rados - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Test handling of lost objects on an ec pool. - - A pretty rigid cluster is brought up andtested by this task - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'lost_unfound task only accepts a dict for configuration' - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') - manager.wait_for_clean() - - profile = config.get('erasure_code_profile', { - 'k': '2', - 'm': '2', - 'ruleset-failure-domain': 'osd' - }) - profile_name = profile.get('name', 'lost_unfound') - manager.create_erasure_code_profile(profile_name, profile) - pool = manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name) - - # something that is always there, readable and never empty - dummyfile = '/etc/group' - - # kludge to make sure they get a map - rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) - - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.wait_for_recovery() - - # create old objects - for f in range(1, 10): - rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) - rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) - rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f]) - - # delay recovery, and make the pg log very long (to prevent backfill) - manager.raw_cluster_cmd( - 'tell', 'osd.1', - 'injectargs', - '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' - ) - - manager.kill_osd(0) - manager.mark_down_osd(0) - manager.kill_osd(3) - manager.mark_down_osd(3) - - for f in range(1, 10): - rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile]) - rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) - rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) - - # take out osd.1 and a necessary shard of those objects. - manager.kill_osd(1) - manager.mark_down_osd(1) - manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') - manager.revive_osd(0) - manager.wait_till_osd_is_up(0) - manager.revive_osd(3) - manager.wait_till_osd_is_up(3) - - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') - manager.wait_till_active() - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') - - # verify that there are unfound objects - unfound = manager.get_num_unfound_objects() - log.info("there are %d unfound objects" % unfound) - assert unfound - - # mark stuff lost - pgs = manager.get_pg_stats() - for pg in pgs: - if pg['stat_sum']['num_objects_unfound'] > 0: - # verify that i can list them direct from the osd - log.info('listing missing/lost in %s state %s', pg['pgid'], - pg['state']); - m = manager.list_pg_missing(pg['pgid']) - log.info('%s' % m) - assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] - - log.info("reverting unfound in %s", pg['pgid']) - manager.raw_cluster_cmd('pg', pg['pgid'], - 'mark_unfound_lost', 'delete') - else: - log.info("no unfound in %s", pg['pgid']) - - manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') - manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') - manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5') - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') - manager.wait_for_recovery() - - # verify result - for f in range(1, 10): - err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-']) - assert err - err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-']) - assert err - err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-']) - assert err - - # see if osd.1 can cope - manager.revive_osd(1) - manager.wait_till_osd_is_up(1) - manager.wait_for_clean() diff --git a/tasks/filestore_idempotent.py b/tasks/filestore_idempotent.py deleted file mode 100644 index da3995eafcb..00000000000 --- a/tasks/filestore_idempotent.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -Filestore/filejournal handler -""" -import logging -from teuthology.orchestra import run -import random - -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Test filestore/filejournal handling of non-idempotent events. - - Currently this is a kludge; we require the ceph task preceeds us just - so that we get the tarball installed to run the test binary. - - :param ctx: Context - :param config: Configuration - """ - assert config is None or isinstance(config, list) \ - or isinstance(config, dict), \ - "task only supports a list or dictionary for configuration" - all_clients = ['client.{id}'.format(id=id_) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] - if config is None: - config = all_clients - if isinstance(config, list): - config = dict.fromkeys(config) - clients = config.keys() - - # just use the first client... - client = clients[0]; - (remote,) = ctx.cluster.only(client).remotes.iterkeys() - - testdir = teuthology.get_testdir(ctx) - - dir = '%s/data/test.%s' % (testdir, client) - - seed = str(int(random.uniform(1,100))) - - try: - log.info('creating a working dir') - remote.run(args=['mkdir', dir]) - remote.run( - args=[ - 'cd', dir, - run.Raw('&&'), - 'wget','-q', '-Orun_seed_to.sh', - 'http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to.sh;hb=HEAD', - run.Raw('&&'), - 'wget','-q', '-Orun_seed_to_range.sh', - 'http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to_range.sh;hb=HEAD', - run.Raw('&&'), - 'chmod', '+x', 'run_seed_to.sh', 'run_seed_to_range.sh', - ]); - - log.info('running a series of tests') - proc = remote.run( - args=[ - 'cd', dir, - run.Raw('&&'), - './run_seed_to_range.sh', seed, '50', '300', - ], - wait=False, - check_status=False) - result = proc.wait() - - if result != 0: - remote.run( - args=[ - 'cp', '-a', dir, '{tdir}/archive/idempotent_failure'.format(tdir=testdir), - ]) - raise Exception("./run_seed_to_range.sh errored out") - - finally: - remote.run(args=[ - 'rm', '-rf', '--', dir - ]) - diff --git a/tasks/kclient.py b/tasks/kclient.py deleted file mode 100644 index ca1fb3ba716..00000000000 --- a/tasks/kclient.py +++ /dev/null @@ -1,107 +0,0 @@ -""" -Mount/unmount a ``kernel`` client. -""" -import contextlib -import logging - -from teuthology.misc import deep_merge -from teuthology import misc -from cephfs.kernel_mount import KernelMount - -log = logging.getLogger(__name__) - -@contextlib.contextmanager -def task(ctx, config): - """ - Mount/unmount a ``kernel`` client. - - The config is optional and defaults to mounting on all clients. If - a config is given, it is expected to be a list of clients to do - this operation on. This lets you e.g. set up one client with - ``ceph-fuse`` and another with ``kclient``. - - Example that mounts all clients:: - - tasks: - - ceph: - - kclient: - - interactive: - - Example that uses both ``kclient` and ``ceph-fuse``:: - - tasks: - - ceph: - - ceph-fuse: [client.0] - - kclient: [client.1] - - interactive: - - - Pass a dictionary instead of lists to specify per-client config: - - tasks: - -kclient: - client.0: - debug: true - - :param ctx: Context - :param config: Configuration - """ - log.info('Mounting kernel clients...') - assert config is None or isinstance(config, list) or isinstance(config, dict), \ - "task kclient got invalid config" - - if config is None: - config = ['client.{id}'.format(id=id_) - for id_ in misc.all_roles_of_type(ctx.cluster, 'client')] - - if isinstance(config, list): - client_roles = config - config = dict([r, dict()] for r in client_roles) - elif isinstance(config, dict): - client_roles = config.keys() - else: - raise ValueError("Invalid config object: {0} ({1})".format(config, config.__class__)) - - # config has been converted to a dict by this point - overrides = ctx.config.get('overrides', {}) - deep_merge(config, overrides.get('kclient', {})) - - clients = list(misc.get_clients(ctx=ctx, roles=client_roles)) - - test_dir = misc.get_testdir(ctx) - - # Assemble mon addresses - remotes_and_roles = ctx.cluster.remotes.items() - roles = [roles for (remote_, roles) in remotes_and_roles] - ips = [remote_.ssh.get_transport().getpeername()[0] - for (remote_, _) in remotes_and_roles] - mons = misc.get_mons(roles, ips).values() - - mounts = {} - for id_, remote in clients: - kernel_mount = KernelMount( - mons, - test_dir, - id_, - remote, - ctx.teuthology_config.get('ipmi_user', None), - ctx.teuthology_config.get('ipmi_password', None), - ctx.teuthology_config.get('ipmi_domain', None) - ) - - mounts[id_] = kernel_mount - - client_config = config["client.{0}".format(id_)] - if client_config.get('debug', False): - remote.run(args=["sudo", "bash", "-c", "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"]) - remote.run(args=["sudo", "bash", "-c", "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"]) - - kernel_mount.mount() - - ctx.mounts = mounts - try: - yield mounts - finally: - log.info('Unmounting kernel clients...') - for mount in mounts.values(): - mount.umount() diff --git a/tasks/locktest.py b/tasks/locktest.py deleted file mode 100755 index 9de5ba40c5b..00000000000 --- a/tasks/locktest.py +++ /dev/null @@ -1,134 +0,0 @@ -""" -locktests -""" -import logging - -from teuthology.orchestra import run -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Run locktests, from the xfstests suite, on the given - clients. Whether the clients are ceph-fuse or kernel does not - matter, and the two clients can refer to the same mount. - - The config is a list of two clients to run the locktest on. The - first client will be the host. - - For example: - tasks: - - ceph: - - ceph-fuse: [client.0, client.1] - - locktest: - [client.0, client.1] - - This task does not yield; there would be little point. - - :param ctx: Context - :param config: Configuration - """ - - assert isinstance(config, list) - log.info('fetching and building locktests...') - (host,) = ctx.cluster.only(config[0]).remotes - (client,) = ctx.cluster.only(config[1]).remotes - ( _, _, host_id) = config[0].partition('.') - ( _, _, client_id) = config[1].partition('.') - testdir = teuthology.get_testdir(ctx) - hostmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=host_id) - clientmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=client_id) - - try: - for client_name in config: - log.info('building on {client_}'.format(client_=client_name)) - ctx.cluster.only(client_name).run( - args=[ - # explicitly does not support multiple autotest tasks - # in a single run; the result archival would conflict - 'mkdir', '{tdir}/archive/locktest'.format(tdir=testdir), - run.Raw('&&'), - 'mkdir', '{tdir}/locktest'.format(tdir=testdir), - run.Raw('&&'), - 'wget', - '-nv', - 'https://raw.github.com/gregsfortytwo/xfstests-ceph/master/src/locktest.c', - '-O', '{tdir}/locktest/locktest.c'.format(tdir=testdir), - run.Raw('&&'), - 'g++', '{tdir}/locktest/locktest.c'.format(tdir=testdir), - '-o', '{tdir}/locktest/locktest'.format(tdir=testdir) - ], - logger=log.getChild('locktest_client.{id}'.format(id=client_name)), - ) - - log.info('built locktest on each client') - - host.run(args=['sudo', 'touch', - '{mnt}/locktestfile'.format(mnt=hostmnt), - run.Raw('&&'), - 'sudo', 'chown', 'ubuntu.ubuntu', - '{mnt}/locktestfile'.format(mnt=hostmnt) - ] - ) - - log.info('starting on host') - hostproc = host.run( - args=[ - '{tdir}/locktest/locktest'.format(tdir=testdir), - '-p', '6788', - '-d', - '{mnt}/locktestfile'.format(mnt=hostmnt), - ], - wait=False, - logger=log.getChild('locktest.host'), - ) - log.info('starting on client') - (_,_,hostaddr) = host.name.partition('@') - clientproc = client.run( - args=[ - '{tdir}/locktest/locktest'.format(tdir=testdir), - '-p', '6788', - '-d', - '-h', hostaddr, - '{mnt}/locktestfile'.format(mnt=clientmnt), - ], - logger=log.getChild('locktest.client'), - wait=False - ) - - hostresult = hostproc.wait() - clientresult = clientproc.wait() - if (hostresult != 0) or (clientresult != 0): - raise Exception("Did not pass locking test!") - log.info('finished locktest executable with results {r} and {s}'. \ - format(r=hostresult, s=clientresult)) - - finally: - log.info('cleaning up host dir') - host.run( - args=[ - 'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir), - run.Raw('&&'), - 'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir), - run.Raw('&&'), - 'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir), - run.Raw('&&'), - 'rmdir', '{tdir}/locktest' - ], - logger=log.getChild('.{id}'.format(id=config[0])), - ) - log.info('cleaning up client dir') - client.run( - args=[ - 'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir), - run.Raw('&&'), - 'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir), - run.Raw('&&'), - 'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir), - run.Raw('&&'), - 'rmdir', '{tdir}/locktest'.format(tdir=testdir) - ], - logger=log.getChild('.{id}'.format(\ - id=config[1])), - ) diff --git a/tasks/lost_unfound.py b/tasks/lost_unfound.py deleted file mode 100644 index af1df4de0cb..00000000000 --- a/tasks/lost_unfound.py +++ /dev/null @@ -1,157 +0,0 @@ -""" -Lost_unfound -""" -import logging -import time -import ceph_manager -from teuthology import misc as teuthology -from util.rados import rados - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Test handling of lost objects. - - A pretty rigid cluseter is brought up andtested by this task - """ - POOL = 'unfound_pool' - if config is None: - config = {} - assert isinstance(config, dict), \ - 'lost_unfound task only accepts a dict for configuration' - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - while len(manager.get_osd_status()['up']) < 3: - time.sleep(10) - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_for_clean() - - manager.create_pool(POOL) - - # something that is always there - dummyfile = '/etc/fstab' - - # take an osd out until the very end - manager.kill_osd(2) - manager.mark_down_osd(2) - manager.mark_out_osd(2) - - # kludge to make sure they get a map - rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) - - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.wait_for_recovery() - - # create old objects - for f in range(1, 10): - rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) - rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) - rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) - - # delay recovery, and make the pg log very long (to prevent backfill) - manager.raw_cluster_cmd( - 'tell', 'osd.1', - 'injectargs', - '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' - ) - - manager.kill_osd(0) - manager.mark_down_osd(0) - - for f in range(1, 10): - rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) - rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) - rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) - - # bring osd.0 back up, let it peer, but don't replicate the new - # objects... - log.info('osd.0 command_args is %s' % 'foo') - log.info(ctx.daemons.get_daemon('osd', 0).command_args) - ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([ - '--osd-recovery-delay-start', '1000' - ]) - manager.revive_osd(0) - manager.mark_in_osd(0) - manager.wait_till_osd_is_up(0) - - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.wait_till_active() - - # take out osd.1 and the only copy of those objects. - manager.kill_osd(1) - manager.mark_down_osd(1) - manager.mark_out_osd(1) - manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') - - # bring up osd.2 so that things would otherwise, in theory, recovery fully - manager.revive_osd(2) - manager.mark_in_osd(2) - manager.wait_till_osd_is_up(2) - - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_till_active() - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - - # verify that there are unfound objects - unfound = manager.get_num_unfound_objects() - log.info("there are %d unfound objects" % unfound) - assert unfound - - # mark stuff lost - pgs = manager.get_pg_stats() - for pg in pgs: - if pg['stat_sum']['num_objects_unfound'] > 0: - primary = 'osd.%d' % pg['acting'][0] - - # verify that i can list them direct from the osd - log.info('listing missing/lost in %s state %s', pg['pgid'], - pg['state']); - m = manager.list_pg_missing(pg['pgid']) - #log.info('%s' % m) - assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] - num_unfound=0 - for o in m['objects']: - if len(o['locations']) == 0: - num_unfound += 1 - assert m['num_unfound'] == num_unfound - - log.info("reverting unfound in %s on %s", pg['pgid'], primary) - manager.raw_cluster_cmd('pg', pg['pgid'], - 'mark_unfound_lost', 'revert') - else: - log.info("no unfound in %s", pg['pgid']) - - manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') - manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_for_recovery() - - # verify result - for f in range(1, 10): - err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) - assert err - err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) - assert err - err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) - assert not err - - # see if osd.1 can cope - manager.revive_osd(1) - manager.mark_in_osd(1) - manager.wait_till_osd_is_up(1) - manager.wait_for_clean() diff --git a/tasks/manypools.py b/tasks/manypools.py deleted file mode 100644 index 1ddcba5c8a9..00000000000 --- a/tasks/manypools.py +++ /dev/null @@ -1,73 +0,0 @@ -""" -Force pg creation on all osds -""" -from teuthology import misc as teuthology -from teuthology.orchestra import run -import logging - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Create the specified number of pools and write 16 objects to them (thereby forcing - the PG creation on each OSD). This task creates pools from all the clients, - in parallel. It is easy to add other daemon types which have the appropriate - permissions, but I don't think anything else does. - The config is just the number of pools to create. I recommend setting - "mon create pg interval" to a very low value in your ceph config to speed - this up. - - You probably want to do this to look at memory consumption, and - maybe to test how performance changes with the number of PGs. For example: - - tasks: - - ceph: - config: - mon: - mon create pg interval: 1 - - manypools: 3000 - - radosbench: - clients: [client.0] - time: 360 - """ - - log.info('creating {n} pools'.format(n=config)) - - poolnum = int(config) - creator_remotes = [] - client_roles = teuthology.all_roles_of_type(ctx.cluster, 'client') - log.info('got client_roles={client_roles_}'.format(client_roles_=client_roles)) - for role in client_roles: - log.info('role={role_}'.format(role_=role)) - (creator_remote, ) = ctx.cluster.only('client.{id}'.format(id=role)).remotes.iterkeys() - creator_remotes.append((creator_remote, 'client.{id}'.format(id=role))) - - remaining_pools = poolnum - poolprocs=dict() - while (remaining_pools > 0): - log.info('{n} pools remaining to create'.format(n=remaining_pools)) - for remote, role_ in creator_remotes: - poolnum = remaining_pools - remaining_pools -= 1 - if remaining_pools < 0: - continue - log.info('creating pool{num} on {role}'.format(num=poolnum, role=role_)) - proc = remote.run( - args=[ - 'rados', - '--name', role_, - 'mkpool', 'pool{num}'.format(num=poolnum), '-1', - run.Raw('&&'), - 'rados', - '--name', role_, - '--pool', 'pool{num}'.format(num=poolnum), - 'bench', '0', 'write', '-t', '16', '--block-size', '1' - ], - wait = False - ) - log.info('waiting for pool and object creates') - poolprocs[remote] = proc - - run.wait(poolprocs.itervalues()) - - log.info('created all {n} pools and wrote 16 objects to each'.format(n=poolnum)) diff --git a/tasks/mds_auto_repair.py b/tasks/mds_auto_repair.py deleted file mode 100644 index e7580613b2a..00000000000 --- a/tasks/mds_auto_repair.py +++ /dev/null @@ -1,122 +0,0 @@ - -""" -Exercise the MDS's auto repair functions -""" - -import contextlib -import logging -import time - -from teuthology.orchestra.run import CommandFailedError - -from tasks.cephfs.filesystem import Filesystem -from tasks.cephfs.cephfs_test_case import CephFSTestCase, run_tests - - -log = logging.getLogger(__name__) - - -# Arbitrary timeouts for operations involving restarting -# an MDS or waiting for it to come up -MDS_RESTART_GRACE = 60 - - -class TestMDSAutoRepair(CephFSTestCase): - def test_backtrace_repair(self): - """ - MDS should verify/fix backtrace on fetch dirfrag - """ - - # trim log segment as fast as possible - self.set_conf('mds', 'mds cache size', 100) - self.set_conf('mds', 'mds verify backtrace', 1) - self.fs.mds_restart() - self.fs.wait_for_daemons() - - create_script = "mkdir {0}; for i in `seq 0 500`; do touch {0}/file$i; done" - # create main test directory - self.mount_a.run_shell(["sudo", "bash", "-c", create_script.format("testdir1")]) - - # create more files in another directory. make sure MDS trim dentries in testdir1 - self.mount_a.run_shell(["sudo", "bash", "-c", create_script.format("testdir2")]) - - # flush journal entries to dirfrag objects - self.fs.mds_asok(['flush', 'journal']) - - # drop inodes caps - self.mount_a.umount_wait() - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - # wait MDS to trim dentries in testdir1. 60 seconds should be long enough. - time.sleep(60) - - # remove testdir1's backtrace - proc = self.mount_a.run_shell(["sudo", "ls", "-id", "testdir1"]) - self.assertEqual(proc.exitstatus, 0) - objname = "{:x}.00000000".format(long(proc.stdout.getvalue().split()[0])) - proc = self.mount_a.run_shell(["sudo", "rados", "-p", "metadata", "rmxattr", objname, "parent"]) - self.assertEqual(proc.exitstatus, 0) - - # readdir (fetch dirfrag) should fix testdir1's backtrace - self.mount_a.run_shell(["sudo", "ls", "testdir1"]) - - # add more entries to journal - self.mount_a.run_shell(["sudo", "rm", "-rf", " testdir2"]) - - # flush journal entries to dirfrag objects - self.fs.mds_asok(['flush', 'journal']) - - # check if backtrace exists - proc = self.mount_a.run_shell(["sudo", "rados", "-p", "metadata", "getxattr", objname, "parent"]) - self.assertEqual(proc.exitstatus, 0) - - def test_mds_readonly(self): - """ - test if MDS behave correct when it's readonly - """ - # operation should successd when MDS is not readonly - self.mount_a.run_shell(["sudo", "touch", "test_file1"]) - writer = self.mount_a.write_background(loop=True) - - time.sleep(10) - self.assertFalse(writer.finished) - - # force MDS to read-only mode - self.fs.mds_asok(['force_readonly']) - time.sleep(10) - - # touching test file should fail - try: - self.mount_a.run_shell(["sudo", "touch", "test_file1"]) - except CommandFailedError: - pass - else: - self.assertTrue(False) - - # background writer also should fail - self.assertTrue(writer.finished) - - # restart mds to make it writable - self.fs.mds_restart() - self.fs.wait_for_daemons() - - -@contextlib.contextmanager -def task(ctx, config): - fs = Filesystem(ctx) - mount_a = ctx.mounts.values()[0] - - # Stash references on ctx so that we can easily debug in interactive mode - # ======================================================================= - ctx.filesystem = fs - ctx.mount_a = mount_a - - run_tests(ctx, config, TestMDSAutoRepair, { - 'fs': fs, - 'mount_a': mount_a, - }) - - # Continue to any downstream tasks - # ================================ - yield diff --git a/tasks/mds_client_limits.py b/tasks/mds_client_limits.py deleted file mode 100644 index ae722886753..00000000000 --- a/tasks/mds_client_limits.py +++ /dev/null @@ -1,183 +0,0 @@ - -""" -Exercise the MDS's behaviour when clients and the MDCache reach or -exceed the limits of how many caps/inodes they should hold. -""" - -import contextlib -import logging -from unittest import SkipTest -from teuthology.orchestra.run import CommandFailedError - -from tasks.cephfs.filesystem import Filesystem -from tasks.cephfs.fuse_mount import FuseMount -from tasks.cephfs.cephfs_test_case import CephFSTestCase, run_tests - - -log = logging.getLogger(__name__) - - -# Arbitrary timeouts for operations involving restarting -# an MDS or waiting for it to come up -MDS_RESTART_GRACE = 60 - -# Hardcoded values from Server::recall_client_state -CAP_RECALL_RATIO = 0.8 -CAP_RECALL_MIN = 100 - - -class TestClientLimits(CephFSTestCase): - # Environment references - mds_session_timeout = None - mds_reconnect_timeout = None - ms_max_backoff = None - - def wait_for_health(self, pattern, timeout): - """ - Wait until 'ceph health' contains a single message matching the pattern - """ - def seen_health_warning(): - health = self.fs.mon_manager.get_mon_health() - summary_strings = [s['summary'] for s in health['summary']] - if len(summary_strings) == 0: - log.debug("Not expected number of summary strings ({0})".format(summary_strings)) - return False - elif len(summary_strings) == 1 and pattern in summary_strings[0]: - return True - else: - raise RuntimeError("Unexpected health messages: {0}".format(summary_strings)) - - self.wait_until_true(seen_health_warning, timeout) - - def _test_client_pin(self, use_subdir): - """ - When a client pins an inode in its cache, for example because the file is held open, - it should reject requests from the MDS to trim these caps. The MDS should complain - to the user that it is unable to enforce its cache size limits because of this - objectionable client. - - :param use_subdir: whether to put test files in a subdir or use root - """ - - cache_size = 200 - open_files = 250 - - self.set_conf('mds', 'mds cache size', cache_size) - self.fs.mds_restart() - self.fs.wait_for_daemons() - - mount_a_client_id = self.mount_a.get_global_id() - path = "subdir/mount_a" if use_subdir else "mount_a" - open_proc = self.mount_a.open_n_background(path, open_files) - - # Client should now hold: - # `open_files` caps for the open files - # 1 cap for root - # 1 cap for subdir - self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'], - open_files + (2 if use_subdir else 1), - timeout=600, - reject_fn=lambda x: x > open_files + 2) - - # MDS should not be happy about that, as the client is failing to comply - # with the SESSION_RECALL messages it is being sent - mds_recall_state_timeout = int(self.fs.get_config("mds_recall_state_timeout")) - self.wait_for_health("failing to respond to cache pressure", mds_recall_state_timeout + 10) - - # When the client closes the files, it should retain only as many caps as allowed - # under the SESSION_RECALL policy - log.info("Terminating process holding files open") - open_proc.stdin.close() - try: - open_proc.wait() - except CommandFailedError: - # We killed it, so it raises an error - pass - - # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message, - # which depend on the cache size and overall ratio - self.wait_until_equal( - lambda: self.get_session(mount_a_client_id)['num_caps'], - int(cache_size * 0.8), - timeout=600, - reject_fn=lambda x: x < int(cache_size*.8)) - - def test_client_pin_root(self): - self._test_client_pin(False) - - def test_client_pin(self): - self._test_client_pin(True) - - def test_client_release_bug(self): - """ - When a client has a bug (which we will simulate) preventing it from releasing caps, - the MDS should notice that releases are not being sent promptly, and generate a health - metric to that effect. - """ - - # The debug hook to inject the failure only exists in the fuse client - if not isinstance(self.mount_a, FuseMount): - raise SkipTest("Require FUSE client to inject client release failure") - - self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true') - self.mount_a.teardown() - self.mount_a.mount() - self.mount_a.wait_until_mounted() - mount_a_client_id = self.mount_a.get_global_id() - - # Client A creates a file. He will hold the write caps on the file, and later (simulated bug) fail - # to comply with the MDSs request to release that cap - self.mount_a.run_shell(["touch", "file1"]) - - # Client B tries to stat the file that client A created - rproc = self.mount_b.write_background("file1") - - # After mds_revoke_cap_timeout, we should see a health warning (extra lag from - # MDS beacon period) - mds_revoke_cap_timeout = int(self.fs.get_config("mds_revoke_cap_timeout")) - self.wait_for_health("failing to respond to capability release", mds_revoke_cap_timeout + 10) - - # Client B should still be stuck - self.assertFalse(rproc.finished) - - # Kill client A - self.mount_a.kill() - self.mount_a.kill_cleanup() - - # Client B should complete - self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) - rproc.wait() - - -@contextlib.contextmanager -def task(ctx, config): - fs = Filesystem(ctx) - - # Pick out the clients we will use from the configuration - # ======================================================= - if len(ctx.mounts) < 2: - raise RuntimeError("Need at least two clients") - mount_a = ctx.mounts.values()[0] - mount_b = ctx.mounts.values()[1] - - if not isinstance(mount_a, FuseMount) or not isinstance(mount_b, FuseMount): - # kclient kill() power cycles nodes, so requires clients to each be on - # their own node - if mount_a.client_remote.hostname == mount_b.client_remote.hostname: - raise RuntimeError("kclient clients must be on separate nodes") - - # Stash references on ctx so that we can easily debug in interactive mode - # ======================================================================= - ctx.filesystem = fs - ctx.mount_a = mount_a - ctx.mount_b = mount_b - - run_tests(ctx, config, TestClientLimits, { - 'fs': fs, - 'mount_a': mount_a, - 'mount_b': mount_b - }) - - # Continue to any downstream tasks - # ================================ - yield diff --git a/tasks/mds_client_recovery.py b/tasks/mds_client_recovery.py deleted file mode 100644 index 7dd7402e8e8..00000000000 --- a/tasks/mds_client_recovery.py +++ /dev/null @@ -1,429 +0,0 @@ - -""" -Teuthology task for exercising CephFS client recovery -""" - -import contextlib -import logging -import time -import unittest - -from teuthology.orchestra.run import CommandFailedError, ConnectionLostError -from teuthology.task import interactive - -from tasks.cephfs.cephfs_test_case import CephFSTestCase, run_tests -from tasks.cephfs.filesystem import Filesystem -from tasks.cephfs.fuse_mount import FuseMount - - -log = logging.getLogger(__name__) - - -# Arbitrary timeouts for operations involving restarting -# an MDS or waiting for it to come up -MDS_RESTART_GRACE = 60 - - -class TestClientRecovery(CephFSTestCase): - # Environment references - mds_session_timeout = None - mds_reconnect_timeout = None - ms_max_backoff = None - - def test_basic(self): - # Check that two clients come up healthy and see each others' files - # ===================================================== - self.mount_a.create_files() - self.mount_a.check_files() - self.mount_a.umount_wait() - - self.mount_b.check_files() - - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - # Check that the admin socket interface is correctly reporting - # two sessions - # ===================================================== - ls_data = self._session_list() - self.assert_session_count(2, ls_data) - - self.assertSetEqual( - set([l['id'] for l in ls_data]), - {self.mount_a.get_global_id(), self.mount_b.get_global_id()} - ) - - def test_restart(self): - # Check that after an MDS restart both clients reconnect and continue - # to handle I/O - # ===================================================== - self.fs.mds_stop() - self.fs.mds_fail() - self.fs.mds_restart() - self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) - - self.mount_a.create_destroy() - self.mount_b.create_destroy() - - def _session_num_caps(self, client_id): - ls_data = self.fs.mds_asok(['session', 'ls']) - return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps']) - - def test_reconnect_timeout(self): - # Reconnect timeout - # ================= - # Check that if I stop an MDS and a client goes away, the MDS waits - # for the reconnect period - self.fs.mds_stop() - self.fs.mds_fail() - - mount_a_client_id = self.mount_a.get_global_id() - self.mount_a.umount_wait(force=True) - - self.fs.mds_restart() - - self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE) - - ls_data = self._session_list() - self.assert_session_count(2, ls_data) - - # The session for the dead client should have the 'reconnect' flag set - self.assertTrue(self.get_session(mount_a_client_id)['reconnecting']) - - # Wait for the reconnect state to clear, this should take the - # reconnect timeout period. - in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2) - # Check that the period we waited to enter active is within a factor - # of two of the reconnect timeout. - self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout / 2, - "Should have been in reconnect phase for {0} but only took {1}".format( - self.mds_reconnect_timeout, in_reconnect_for - )) - - self.assert_session_count(1) - - # Check that the client that timed out during reconnect can - # mount again and do I/O - self.mount_a.mount() - self.mount_a.wait_until_mounted() - self.mount_a.create_destroy() - - self.assert_session_count(2) - - def test_reconnect_eviction(self): - # Eviction during reconnect - # ========================= - self.fs.mds_stop() - self.fs.mds_fail() - - mount_a_client_id = self.mount_a.get_global_id() - self.mount_a.umount_wait(force=True) - - self.fs.mds_restart() - - # Enter reconnect phase - self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE) - self.assert_session_count(2) - - # Evict the stuck client - self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) - self.assert_session_count(1) - - # Observe that we proceed to active phase without waiting full reconnect timeout - evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) - # Once we evict the troublemaker, the reconnect phase should complete - # in well under the reconnect timeout. - self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5, - "reconnect did not complete soon enough after eviction, took {0}".format( - evict_til_active - )) - - # Bring the client back - self.mount_a.mount() - self.mount_a.wait_until_mounted() - self.mount_a.create_destroy() - - def test_stale_caps(self): - # Capability release from stale session - # ===================================== - cap_holder = self.mount_a.open_background() - - # Wait for the file to be visible from another client, indicating - # that mount_a has completed its network ops - self.mount_b.wait_for_visible() - - # Simulate client death - self.mount_a.kill() - - try: - # Now, after mds_session_timeout seconds, the waiter should - # complete their operation when the MDS marks the holder's - # session stale. - cap_waiter = self.mount_b.write_background() - a = time.time() - cap_waiter.wait() - b = time.time() - - # Should have succeeded - self.assertEqual(cap_waiter.exitstatus, 0) - - cap_waited = b - a - log.info("cap_waiter waited {0}s".format(cap_waited)) - self.assertTrue(self.mds_session_timeout / 2.0 <= cap_waited <= self.mds_session_timeout * 2.0, - "Capability handover took {0}, expected approx {1}".format( - cap_waited, self.mds_session_timeout - )) - - cap_holder.stdin.close() - try: - cap_holder.wait() - except (CommandFailedError, ConnectionLostError): - # We killed it (and possibly its node), so it raises an error - pass - finally: - # teardown() doesn't quite handle this case cleanly, so help it out - self.mount_a.kill_cleanup() - - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - def test_evicted_caps(self): - # Eviction while holding a capability - # =================================== - - # Take out a write capability on a file on client A, - # and then immediately kill it. - cap_holder = self.mount_a.open_background() - mount_a_client_id = self.mount_a.get_global_id() - - # Wait for the file to be visible from another client, indicating - # that mount_a has completed its network ops - self.mount_b.wait_for_visible() - - # Simulate client death - self.mount_a.kill() - - try: - # The waiter should get stuck waiting for the capability - # held on the MDS by the now-dead client A - cap_waiter = self.mount_b.write_background() - time.sleep(5) - self.assertFalse(cap_waiter.finished) - - self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) - # Now, because I evicted the old holder of the capability, it should - # immediately get handed over to the waiter - a = time.time() - cap_waiter.wait() - b = time.time() - cap_waited = b - a - log.info("cap_waiter waited {0}s".format(cap_waited)) - # This is the check that it happened 'now' rather than waiting - # for the session timeout - self.assertLess(cap_waited, self.mds_session_timeout / 2.0, - "Capability handover took {0}, expected less than {1}".format( - cap_waited, self.mds_session_timeout / 2.0 - )) - - cap_holder.stdin.close() - try: - cap_holder.wait() - except (CommandFailedError, ConnectionLostError): - # We killed it (and possibly its node), so it raises an error - pass - finally: - self.mount_a.kill_cleanup() - - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - def test_trim_caps(self): - # Trim capability when reconnecting MDS - # =================================== - - count = 500 - # Create lots of files - for i in range(count): - self.mount_a.run_shell(["sudo", "touch", "f{0}".format(i)]) - - # Populate mount_b's cache - self.mount_b.run_shell(["sudo", "ls"]) - - client_id = self.mount_b.get_global_id() - num_caps = self._session_num_caps(client_id) - self.assertGreaterEqual(num_caps, count) - - # Restart MDS. client should trim its cache when reconnecting to the MDS - self.fs.mds_stop() - self.fs.mds_fail() - self.fs.mds_restart() - self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) - - num_caps = self._session_num_caps(client_id) - self.assertLess(num_caps, count, - "should have less than {0} capabilities, have {1}".format( - count, num_caps - )) - - def test_network_death(self): - """ - Simulate software freeze or temporary network failure. - - Check that the client blocks I/O during failure, and completes - I/O after failure. - """ - - # We only need one client - self.mount_b.umount_wait() - - # Initially our one client session should be visible - client_id = self.mount_a.get_global_id() - ls_data = self._session_list() - self.assert_session_count(1, ls_data) - self.assertEqual(ls_data[0]['id'], client_id) - self.assert_session_state(client_id, "open") - - # ...and capable of doing I/O without blocking - self.mount_a.create_files() - - # ...but if we turn off the network - self.fs.set_clients_block(True) - - # ...and try and start an I/O - write_blocked = self.mount_a.write_background() - - # ...then it should block - self.assertFalse(write_blocked.finished) - self.assert_session_state(client_id, "open") - time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale - self.assertFalse(write_blocked.finished) - self.assert_session_state(client_id, "stale") - - # ...until we re-enable I/O - self.fs.set_clients_block(False) - - # ...when it should complete promptly - a = time.time() - write_blocked.wait() - b = time.time() - recovery_time = b - a - log.info("recovery time: {0}".format(recovery_time)) - self.assertLess(recovery_time, self.ms_max_backoff * 2) - self.assert_session_state(client_id, "open") - - def test_filelock(self): - """ - Check that file lock doesn't get lost after an MDS restart - """ - lock_holder = self.mount_a.lock_background() - - self.mount_b.wait_for_visible("background_file-2") - self.mount_b.check_filelock() - - self.fs.mds_stop() - self.fs.mds_fail() - self.fs.mds_restart() - self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) - - self.mount_b.check_filelock() - - # Tear down the background process - lock_holder.stdin.close() - try: - lock_holder.wait() - except (CommandFailedError, ConnectionLostError): - # We killed it, so it raises an error - pass - - -class LogStream(object): - def __init__(self): - self.buffer = "" - - def write(self, data): - self.buffer += data - if "\n" in self.buffer: - lines = self.buffer.split("\n") - for line in lines[:-1]: - log.info(line) - self.buffer = lines[-1] - - def flush(self): - pass - - -class InteractiveFailureResult(unittest.TextTestResult): - """ - Specialization that implements interactive-on-error style - behavior. - """ - ctx = None - - def addFailure(self, test, err): - log.error(self._exc_info_to_string(err, test)) - log.error("Failure in test '{0}', going interactive".format( - self.getDescription(test) - )) - interactive.task(ctx=self.ctx, config=None) - - def addError(self, test, err): - log.error(self._exc_info_to_string(err, test)) - log.error("Error in test '{0}', going interactive".format( - self.getDescription(test) - )) - interactive.task(ctx=self.ctx, config=None) - - -@contextlib.contextmanager -def task(ctx, config): - """ - Execute CephFS client recovery test suite. - - Requires: - - An outer ceph_fuse task with at least two clients - - That the clients are on a separate host to the MDS - """ - fs = Filesystem(ctx) - - # Pick out the clients we will use from the configuration - # ======================================================= - if len(ctx.mounts) < 2: - raise RuntimeError("Need at least two clients") - mount_a = ctx.mounts.values()[0] - mount_b = ctx.mounts.values()[1] - - if not isinstance(mount_a, FuseMount) or not isinstance(mount_b, FuseMount): - # kclient kill() power cycles nodes, so requires clients to each be on - # their own node - if mount_a.client_remote.hostname == mount_b.client_remote.hostname: - raise RuntimeError("kclient clients must be on separate nodes") - - # Check we have at least one remote client for use with network-dependent tests - # ============================================================================= - if mount_a.client_remote.hostname in fs.get_mds_hostnames(): - raise RuntimeError("Require first client to on separate server from MDSs") - - # Stash references on ctx so that we can easily debug in interactive mode - # ======================================================================= - ctx.filesystem = fs - ctx.mount_a = mount_a - ctx.mount_b = mount_b - - run_tests(ctx, config, TestClientRecovery, { - "mds_reconnect_timeout": int(fs.mds_asok( - ['config', 'get', 'mds_reconnect_timeout'] - )['mds_reconnect_timeout']), - "mds_session_timeout": int(fs.mds_asok( - ['config', 'get', 'mds_session_timeout'] - )['mds_session_timeout']), - "ms_max_backoff": int(fs.mds_asok( - ['config', 'get', 'ms_max_backoff'] - )['ms_max_backoff']), - "fs": fs, - "mount_a": mount_a, - "mount_b": mount_b - }) - - # Continue to any downstream tasks - # ================================ - yield diff --git a/tasks/mds_creation_failure.py b/tasks/mds_creation_failure.py deleted file mode 100644 index d1de1569442..00000000000 --- a/tasks/mds_creation_failure.py +++ /dev/null @@ -1,85 +0,0 @@ - -import logging -import contextlib -import time -import ceph_manager -from teuthology import misc -from teuthology.orchestra.run import CommandFailedError, Raw - -log = logging.getLogger(__name__) - - -@contextlib.contextmanager -def task(ctx, config): - """ - Go through filesystem creation with a synthetic failure in an MDS - in its 'up:creating' state, to exercise the retry behaviour. - """ - # Grab handles to the teuthology objects of interest - mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds')) - if len(mdslist) != 1: - # Require exactly one MDS, the code path for creation failure when - # a standby is available is different - raise RuntimeError("This task requires exactly one MDS") - - mds_id = mdslist[0] - (mds_remote,) = ctx.cluster.only('mds.{_id}'.format(_id=mds_id)).remotes.iterkeys() - manager = ceph_manager.CephManager( - mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'), - ) - - # Stop MDS - manager.raw_cluster_cmd('mds', 'set', "max_mds", "0") - mds = ctx.daemons.get_daemon('mds', mds_id) - mds.stop() - manager.raw_cluster_cmd('mds', 'fail', mds_id) - - # Reset the filesystem so that next start will go into CREATING - manager.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it") - manager.raw_cluster_cmd('fs', 'new', "default", "metadata", "data") - - # Start the MDS with mds_kill_create_at set, it will crash during creation - mds.restart_with_args(["--mds_kill_create_at=1"]) - try: - mds.wait_for_exit() - except CommandFailedError as e: - if e.exitstatus == 1: - log.info("MDS creation killed as expected") - else: - log.error("Unexpected status code %s" % e.exitstatus) - raise - - # Since I have intentionally caused a crash, I will clean up the resulting core - # file to avoid task.internal.coredump seeing it as a failure. - log.info("Removing core file from synthetic MDS failure") - mds_remote.run(args=['rm', '-f', Raw("{archive}/coredump/*.core".format(archive=misc.get_archive_dir(ctx)))]) - - # It should have left the MDS map state still in CREATING - status = manager.get_mds_status(mds_id) - assert status['state'] == 'up:creating' - - # Start the MDS again without the kill flag set, it should proceed with creation successfully - mds.restart() - - # Wait for state ACTIVE - t = 0 - create_timeout = 120 - while True: - status = manager.get_mds_status(mds_id) - if status['state'] == 'up:active': - log.info("MDS creation completed successfully") - break - elif status['state'] == 'up:creating': - log.info("MDS still in creating state") - if t > create_timeout: - log.error("Creating did not complete within %ss" % create_timeout) - raise RuntimeError("Creating did not complete within %ss" % create_timeout) - t += 1 - time.sleep(1) - else: - log.error("Unexpected MDS state: %s" % status['state']) - assert(status['state'] in ['up:active', 'up:creating']) - - # The system should be back up in a happy healthy state, go ahead and run any further tasks - # inside this context. - yield diff --git a/tasks/mds_flush.py b/tasks/mds_flush.py deleted file mode 100644 index 458de83aefa..00000000000 --- a/tasks/mds_flush.py +++ /dev/null @@ -1,136 +0,0 @@ -import contextlib -from textwrap import dedent -from tasks.cephfs.cephfs_test_case import run_tests, CephFSTestCase -from tasks.cephfs.filesystem import Filesystem, ObjectNotFound, ROOT_INO - - -class TestFlush(CephFSTestCase): - def test_flush(self): - self.mount_a.run_shell(["mkdir", "mydir"]) - self.mount_a.run_shell(["touch", "mydir/alpha"]) - dir_ino = self.mount_a.path_to_ino("mydir") - file_ino = self.mount_a.path_to_ino("mydir/alpha") - - # Unmount the client so that it isn't still holding caps - self.mount_a.umount_wait() - - # Before flush, the dirfrag object does not exist - with self.assertRaises(ObjectNotFound): - self.fs.list_dirfrag(dir_ino) - - # Before flush, the file's backtrace has not been written - with self.assertRaises(ObjectNotFound): - self.fs.read_backtrace(file_ino) - - # Before flush, there are no dentries in the root - self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) - - # Execute flush - flush_data = self.fs.mds_asok(["flush", "journal"]) - self.assertEqual(flush_data['return_code'], 0) - - # After flush, the dirfrag object has been created - dir_list = self.fs.list_dirfrag(dir_ino) - self.assertEqual(dir_list, ["alpha_head"]) - - # And the 'mydir' dentry is in the root - self.assertEqual(self.fs.list_dirfrag(ROOT_INO), ['mydir_head']) - - # ...and the data object has its backtrace - backtrace = self.fs.read_backtrace(file_ino) - self.assertEqual(['alpha', 'mydir'], [a['dname'] for a in backtrace['ancestors']]) - self.assertEqual([dir_ino, 1], [a['dirino'] for a in backtrace['ancestors']]) - self.assertEqual(file_ino, backtrace['ino']) - - # ...and the journal is truncated to just a single subtreemap from the - # newly created segment - summary_output = self.fs.journal_tool(["event", "get", "summary"]) - try: - self.assertEqual(summary_output, - dedent( - """ - Events by type: - SUBTREEMAP: 1 - Errors: 0 - """ - ).strip()) - except AssertionError: - # In some states, flushing the journal will leave you - # an extra event from locks a client held. This is - # correct behaviour: the MDS is flushing the journal, - # it's just that new events are getting added too. - # In this case, we should nevertheless see a fully - # empty journal after a second flush. - self.assertEqual(summary_output, - dedent( - """ - Events by type: - SUBTREEMAP: 1 - UPDATE: 1 - Errors: 0 - """ - ).strip()) - flush_data = self.fs.mds_asok(["flush", "journal"]) - self.assertEqual(flush_data['return_code'], 0) - self.assertEqual(self.fs.journal_tool(["event", "get", "summary"]), - dedent( - """ - Events by type: - SUBTREEMAP: 1 - Errors: 0 - """ - ).strip()) - - # Now for deletion! - self.mount_a.mount() - self.mount_a.wait_until_mounted() - self.mount_a.run_shell(["rm", "-rf", "mydir"]) - - # We will count the RADOS deletions and MDS file purges, to verify that - # the expected behaviour is happening as a result of the purge - initial_dels = self.fs.mds_asok(['perf', 'dump'])['objecter']['osdop_delete'] - initial_purges = self.fs.mds_asok(['perf', 'dump'])['mds_cache']['strays_purged'] - - flush_data = self.fs.mds_asok(["flush", "journal"]) - self.assertEqual(flush_data['return_code'], 0) - - # We expect to see a single file purge - self.wait_until_true( - lambda: self.fs.mds_asok(['perf', 'dump'])['mds_cache']['strays_purged'] - initial_purges >= 1, - 60) - - # We expect two deletions, one of the dirfrag and one of the backtrace - self.wait_until_true( - lambda: self.fs.mds_asok(['perf', 'dump'])['objecter']['osdop_delete'] - initial_dels >= 2, - 60) # timeout is fairly long to allow for tick+rados latencies - - with self.assertRaises(ObjectNotFound): - self.fs.list_dirfrag(dir_ino) - with self.assertRaises(ObjectNotFound): - self.fs.read_backtrace(file_ino) - self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) - - -@contextlib.contextmanager -def task(ctx, config): - fs = Filesystem(ctx) - - # Pick out the clients we will use from the configuration - # ======================================================= - if len(ctx.mounts) < 1: - raise RuntimeError("Need at least one client") - mount = ctx.mounts.values()[0] - - # Stash references on ctx so that we can easily debug in interactive mode - # ======================================================================= - ctx.filesystem = fs - ctx.mount = mount - - run_tests(ctx, config, TestFlush, { - 'fs': fs, - 'mount_a': mount, - }) - - # Continue to any downstream tasks - # ================================ - yield diff --git a/tasks/mds_full.py b/tasks/mds_full.py deleted file mode 100644 index c984d1a9bf7..00000000000 --- a/tasks/mds_full.py +++ /dev/null @@ -1,363 +0,0 @@ - -""" -Exercise the MDS and Client behaviour when the cluster fills up. -""" - -import contextlib -import json -import logging -import os -from textwrap import dedent -import time -from teuthology.orchestra.run import CommandFailedError - -from tasks.cephfs.filesystem import Filesystem -from tasks.cephfs.cephfs_test_case import CephFSTestCase, run_tests - - -log = logging.getLogger(__name__) - - -class TestClusterFull(CephFSTestCase): - # Persist-between-tests constants - pool_capacity = None - - def setUp(self): - super(TestClusterFull, self).setUp() - - if self.pool_capacity is None: - # This is a hack to overcome weird fluctuations in the reported - # `max_avail` attribute of pools that sometimes occurs in between - # tests (reason as yet unclear, but this dodges the issue) - TestClusterFull.pool_capacity = self.fs.get_pool_df(self._data_pool_name())['max_avail'] - - def test_barrier(self): - """ - That when an OSD epoch barrier is set on an MDS, subsequently - issued capabilities cause clients to update their OSD map to that - epoch. - """ - - # Check the initial barrier epoch on the MDS: this should be - # set to the latest map at MDS startup - initial_osd_epoch = json.loads( - self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip() - )['epoch'] - self.assertGreaterEqual(self.fs.mds_asok(["status"])['osdmap_epoch_barrier'], initial_osd_epoch) - - # Sync up clients with initial MDS OSD map barrier - self.mount_a.open_no_data("foo") - self.mount_b.open_no_data("bar") - - # Grab mount_a's initial OSD epoch: later we will check that - # it hasn't advanced beyond this point. - mount_a_initial_epoch = self.mount_a.get_osd_epoch()[0] - - # Freshly mounted at start of test, should be up to date with OSD map - self.assertGreaterEqual(mount_a_initial_epoch, initial_osd_epoch) - self.assertGreaterEqual(self.mount_b.get_osd_epoch()[0], initial_osd_epoch) - - # Set and unset a flag to cause OSD epoch to increment - self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause") - self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause") - - out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip() - new_epoch = json.loads(out)['epoch'] - self.assertNotEqual(initial_osd_epoch, new_epoch) - - # Do a metadata operation on client A, witness that it ends up with - # the old OSD map from startup time (nothing has prompted it - # to update its map) - self.mount_a.open_no_data("alpha") - - # Sleep long enough that if the OSD map was propagating it would - # have done so (this is arbitrary because we are 'waiting' for something - # to *not* happen). - time.sleep(30) - - mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() - self.assertEqual(mount_a_epoch, mount_a_initial_epoch) - - # Set a barrier on the MDS - self.fs.mds_asok(["osdmap", "barrier", new_epoch.__str__()]) - - # Do an operation on client B, witness that it ends up with - # the latest OSD map from the barrier - self.mount_b.run_shell(["touch", "bravo"]) - self.mount_b.open_no_data("bravo") - - # Some time passes here because the metadata part of the operation - # completes immediately, while the resulting OSD map update happens - # asynchronously (it's an Objecter::_maybe_request_map) as a result - # of seeing the new epoch barrier. - self.wait_until_equal( - lambda: self.mount_b.get_osd_epoch(), - (new_epoch, new_epoch), - 30, - lambda x: x[0] > new_epoch or x[1] > new_epoch) - - # ...and none of this should have affected the oblivious mount a, - # because it wasn't doing any data or metadata IO - mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() - self.assertEqual(mount_a_epoch, mount_a_initial_epoch) - - def _data_pool_name(self): - data_pool_names = self.fs.get_data_pool_names() - if len(data_pool_names) > 1: - raise RuntimeError("This test can't handle multiple data pools") - else: - return data_pool_names[0] - - def _test_full(self, easy_case): - """ - - That a client trying to write data to a file is prevented - from doing so with an -EFULL result - - That they are also prevented from creating new files by the MDS. - - That they may delete another file to get the system healthy again - - :param easy_case: if true, delete a successfully written file to - free up space. else, delete the file that experienced - the failed write. - """ - - osd_mon_report_interval_max = int(self.fs.get_config("osd_mon_report_interval_max", service_type='osd')) - mon_osd_full_ratio = float(self.fs.get_config("mon_osd_full_ratio")) - - pool_capacity = self.pool_capacity - fill_mb = int(1.05 * mon_osd_full_ratio * (pool_capacity / (1024.0 * 1024.0))) + 2 - - log.info("Writing {0}MB should fill this cluster".format(fill_mb)) - - # Fill up the cluster. This dd may or may not fail, as it depends on - # how soon the cluster recognises its own fullness - self.mount_a.write_n_mb("large_file_a", fill_mb / 2) - try: - self.mount_a.write_n_mb("large_file_b", fill_mb / 2) - except CommandFailedError: - log.info("Writing file B failed (full status happened already)") - assert self.fs.is_full() - else: - log.info("Writing file B succeeded (full status will happen soon)") - self.wait_until_true(lambda: self.fs.is_full(), - timeout=osd_mon_report_interval_max * 5) - - # Attempting to write more data should give me ENOSPC - with self.assertRaises(CommandFailedError) as ar: - self.mount_a.write_n_mb("large_file_b", 50, seek=fill_mb / 2) - self.assertEqual(ar.exception.exitstatus, 1) # dd returns 1 on "No space" - - # Wait for the MDS to see the latest OSD map so that it will reliably - # be applying the policy of rejecting non-deletion metadata operations - # while in the full state. - osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch'] - self.wait_until_true( - lambda: self.fs.mds_asok(['status'])['osdmap_epoch'] >= osd_epoch, - timeout=10) - - with self.assertRaises(CommandFailedError): - self.mount_a.write_n_mb("small_file_1", 0) - - # Clear out some space - if easy_case: - self.mount_a.run_shell(['rm', '-f', 'large_file_a']) - self.mount_a.run_shell(['rm', '-f', 'large_file_b']) - else: - # In the hard case it is the file that filled the system. - # Before the new #7317 (ENOSPC, epoch barrier) changes, this - # would fail because the last objects written would be - # stuck in the client cache as objecter operations. - self.mount_a.run_shell(['rm', '-f', 'large_file_b']) - self.mount_a.run_shell(['rm', '-f', 'large_file_a']) - - # Here we are waiting for two things to happen: - # * The MDS to purge the stray folder and execute object deletions - # * The OSDs to inform the mon that they are no longer full - self.wait_until_true(lambda: not self.fs.is_full(), - timeout=osd_mon_report_interval_max * 5) - - # Wait for the MDS to see the latest OSD map so that it will reliably - # be applying the free space policy - osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch'] - self.wait_until_true( - lambda: self.fs.mds_asok(['status'])['osdmap_epoch'] >= osd_epoch, - timeout=10) - - # Now I should be able to write again - self.mount_a.write_n_mb("large_file", 50, seek=0) - - # Ensure that the MDS keeps its OSD epoch barrier across a restart - - def test_full_different_file(self): - self._test_full(True) - - def test_full_same_file(self): - self._test_full(False) - - def _remote_write_test(self, template): - """ - Run some remote python in a way that's useful for - testing free space behaviour (see test_* methods using this) - """ - file_path = os.path.join(self.mount_a.mountpoint, "full_test_file") - - # Enough to trip the full flag - osd_mon_report_interval_max = int(self.fs.get_config("osd_mon_report_interval_max", service_type='osd')) - mon_osd_full_ratio = float(self.fs.get_config("mon_osd_full_ratio")) - pool_capacity = self.pool_capacity - - # Sufficient data to cause RADOS cluster to go 'full' - fill_mb = int(1.05 * mon_osd_full_ratio * (pool_capacity / (1024.0 * 1024.0))) - log.info("pool capacity {0}, {1}MB should be enough to fill it".format(pool_capacity, fill_mb)) - - # Long enough for RADOS cluster to notice it is full and set flag on mons - full_wait = osd_mon_report_interval_max * 1.5 - - # Configs for this test should bring this setting down in order to - # run reasonably quickly - if osd_mon_report_interval_max > 10: - log.warn("This test may run rather slowly unless you decrease" - "osd_mon_report_interval_max (5 is a good setting)!") - - self.mount_a.run_python(template.format( - fill_mb=fill_mb, - file_path=file_path, - full_wait=full_wait - )) - - def test_full_fclose(self): - # A remote script which opens a file handle, fills up the filesystem, and then - # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync - remote_script = dedent(""" - import time - import datetime - import subprocess - import os - - # Write some buffered data through before going full, all should be well - bytes = 0 - f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT) - bytes += os.write(f, 'a' * 4096) - os.fsync(f) - - # Okay, now we're going to fill up the filesystem, and then keep - # writing until we see an error from fsync. As long as we're doing - # buffered IO, the error should always only appear from fsync and not - # from write - full = False - - for n in range(0, {fill_mb}): - bytes += os.write(f, 'x' * 1024 * 1024) - - # OK, now we should sneak in under the full condition - # due to the time it takes the OSDs to report to the - # mons, and get a successful fsync on our full-making data - os.fsync(f) - - # Now wait for the full flag to get set so that our - # next flush IO will fail - time.sleep(30) - - # A buffered IO, should succeed - os.write(f, 'x' * 4096) - - # Wait long enough for a background flush that should fail - time.sleep(30) - - # ...and check that the failed background flush is reflected in fclose - try: - os.close(f) - except OSError: - print "close() returned an error as expected" - else: - raise RuntimeError("close() failed to raise error") - - os.unlink("{file_path}") - """) - self._remote_write_test(remote_script) - - def test_full_fsync(self): - """ - That when the full flag is encountered during asynchronous - flushes, such that an fwrite() succeeds but an fsync/fclose() - should return the ENOSPC error. - """ - - # A remote script which opens a file handle, fills up the filesystem, and then - # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync - remote_script = dedent(""" - import time - import datetime - import subprocess - import os - - # Write some buffered data through before going full, all should be well - bytes = 0 - f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT) - bytes += os.write(f, 'a' * 4096) - os.fsync(f) - - # Okay, now we're going to fill up the filesystem, and then keep - # writing until we see an error from fsync. As long as we're doing - # buffered IO, the error should always only appear from fsync and not - # from write - full = False - - for n in range(0, {fill_mb} + 1): - bytes += os.write(f, 'x' * 1024 * 1024) - try: - os.fsync(f) - except OSError as e: - print "Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)) - full = True - break - else: - print "Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0)) - - if n > {fill_mb} * 0.8: - # Be cautious in the last region where we expect to hit - # the full condition, so that we don't overshoot too dramatically - time.sleep({full_wait}) - - if not full: - raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes) - - # The error sticks to the inode until we dispose of it - try: - os.close(f) - except OSError: - print "Saw error from close() as expected" - else: - raise RuntimeError("Did not see expected error from close()") - - os.unlink("{file_path}") - """) - - self._remote_write_test(remote_script) - - -@contextlib.contextmanager -def task(ctx, config): - fs = Filesystem(ctx) - - # Pick out the clients we will use from the configuration - # ======================================================= - if len(ctx.mounts) < 2: - raise RuntimeError("Need at least two clients") - mount_a = ctx.mounts.values()[0] - mount_b = ctx.mounts.values()[1] - - # Stash references on ctx so that we can easily debug in interactive mode - # ======================================================================= - ctx.filesystem = fs - ctx.mount_a = mount_a - ctx.mount_b = mount_b - - run_tests(ctx, config, TestClusterFull, { - 'fs': fs, - 'mount_a': mount_a, - 'mount_b': mount_b - }) - - # Continue to any downstream tasks - # ================================ - yield diff --git a/tasks/mds_journal_migration.py b/tasks/mds_journal_migration.py deleted file mode 100644 index 992186e67c5..00000000000 --- a/tasks/mds_journal_migration.py +++ /dev/null @@ -1,132 +0,0 @@ -from StringIO import StringIO -import contextlib -import logging -from teuthology import misc - -from tasks.workunit import task as workunit -from cephfs.filesystem import Filesystem - -log = logging.getLogger(__name__) - - -JOURNAL_FORMAT_LEGACY = 0 -JOURNAL_FORMAT_RESILIENT = 1 - - -@contextlib.contextmanager -def task(ctx, config): - """ - Given a Ceph cluster has already been set up, exercise the migration - of the CephFS journal from an older format to the latest format. On - successful completion the filesystem will be running with a journal - in the new format. - - Optionally specify which client to use like this: - - - mds-journal_migration: - client: client.0 - - """ - if not hasattr(ctx, 'ceph'): - raise RuntimeError("This task must be nested in 'ceph' task") - - if not hasattr(ctx, 'mounts'): - raise RuntimeError("This task must be nested inside 'kclient' or 'ceph_fuse' task") - - # Determine which client we will use - if config and 'client' in config: - # Use client specified in config - client_role = config['client'] - client_list = list(misc.get_clients(ctx, [client_role])) - try: - client_id = client_list[0][0] - except IndexError: - raise RuntimeError("Client role '{0}' not found".format(client_role)) - else: - # Pick one arbitrary client to use - client_list = list(misc.all_roles_of_type(ctx.cluster, 'client')) - try: - client_id = client_list[0] - except IndexError: - raise RuntimeError("This task requires at least one client") - - fs = Filesystem(ctx) - ctx.fs = fs - old_journal_version = JOURNAL_FORMAT_LEGACY - new_journal_version = JOURNAL_FORMAT_RESILIENT - - fs.set_ceph_conf('mds', 'mds journal format', old_journal_version) - - # Create a filesystem using the older journal format. - for mount in ctx.mounts.values(): - mount.umount_wait() - fs.mds_stop() - fs.reset() - fs.mds_restart() - - # Do some client work so that the log is populated with something. - mount = ctx.mounts[client_id] - with mount.mounted(): - mount.create_files() - mount.check_files() # sanity, this should always pass - - # Run a more substantial workunit so that the length of the log to be - # coverted is going span at least a few segments - workunit(ctx, { - 'clients': { - "client.{0}".format(client_id): ["suites/fsstress.sh"], - }, - "timeout": "3h" - }) - - # Modify the ceph.conf to ask the MDS to use the new journal format. - fs.set_ceph_conf('mds', 'mds journal format', new_journal_version) - - # Restart the MDS. - fs.mds_fail_restart() - fs.wait_for_daemons() - - # This ensures that all daemons come up into a valid state - fs.wait_for_daemons() - - # Check that files created in the initial client workload are still visible - # in a client mount. - with mount.mounted(): - mount.check_files() - - # Verify that the journal really has been rewritten. - journal_version = fs.get_journal_version() - if journal_version != new_journal_version: - raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format( - new_journal_version, journal_version() - )) - - # Verify that cephfs-journal-tool can now read the rewritten journal - proc = mount.client_remote.run( - args=["cephfs-journal-tool", "journal", "inspect"], - stdout=StringIO()) - if not proc.stdout.getvalue().strip().endswith(": OK"): - raise RuntimeError("Unexpected journal-tool result: '{0}'".format( - proc.stdout.getvalue() - )) - - mount.client_remote.run( - args=["sudo", "cephfs-journal-tool", "event", "get", "json", "--path", "/tmp/journal.json"]) - proc = mount.client_remote.run( - args=[ - "python", - "-c", - "import json; print len(json.load(open('/tmp/journal.json')))" - ], - stdout=StringIO()) - event_count = int(proc.stdout.getvalue().strip()) - if event_count < 1000: - # Approximate value of "lots", expected from having run fsstress - raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count)) - - # Leave all MDSs and clients running for any child tasks - for mount in ctx.mounts.values(): - mount.mount() - mount.wait_until_mounted() - - yield diff --git a/tasks/mds_journal_repair.py b/tasks/mds_journal_repair.py deleted file mode 100644 index 8195c314ca4..00000000000 --- a/tasks/mds_journal_repair.py +++ /dev/null @@ -1,362 +0,0 @@ - -""" -Test our tools for recovering the content of damaged journals -""" - -import contextlib -import json -import logging -from textwrap import dedent -import time -from teuthology.orchestra.run import CommandFailedError -from tasks.cephfs.filesystem import Filesystem, ObjectNotFound, ROOT_INO -from tasks.cephfs.cephfs_test_case import CephFSTestCase, run_tests - - -log = logging.getLogger(__name__) - - -class TestJournalRepair(CephFSTestCase): - def test_inject_to_empty(self): - """ - That when some dentries in the journal but nothing is in - the backing store, we correctly populate the backing store - from the journalled dentries. - """ - - # Inject metadata operations - self.mount_a.run_shell(["touch", "rootfile"]) - self.mount_a.run_shell(["mkdir", "subdir"]) - self.mount_a.run_shell(["touch", "subdir/subdirfile"]) - # There are several different paths for handling hardlinks, depending - # on whether an existing dentry (being overwritten) is also a hardlink - self.mount_a.run_shell(["mkdir", "linkdir"]) - - # Test inode -> remote transition for a dentry - self.mount_a.run_shell(["touch", "linkdir/link0"]) - self.mount_a.run_shell(["rm", "-f", "linkdir/link0"]) - self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link0"]) - - # Test nothing -> remote transition - self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link1"]) - - # Test remote -> inode transition - self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link2"]) - self.mount_a.run_shell(["rm", "-f", "linkdir/link2"]) - self.mount_a.run_shell(["touch", "linkdir/link2"]) - - # Test remote -> diff remote transition - self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link3"]) - self.mount_a.run_shell(["rm", "-f", "linkdir/link3"]) - self.mount_a.run_shell(["ln", "rootfile", "linkdir/link3"]) - - # Before we unmount, make a note of the inode numbers, later we will - # check that they match what we recover from the journal - rootfile_ino = self.mount_a.path_to_ino("rootfile") - subdir_ino = self.mount_a.path_to_ino("subdir") - linkdir_ino = self.mount_a.path_to_ino("linkdir") - subdirfile_ino = self.mount_a.path_to_ino("subdir/subdirfile") - - self.mount_a.umount_wait() - - # Stop the MDS - self.fs.mds_stop() - self.fs.mds_fail() - - # Now, the journal should contain the operations, but the backing - # store shouldn't - with self.assertRaises(ObjectNotFound): - self.fs.list_dirfrag(subdir_ino) - self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) - - # Execute the dentry recovery, this should populate the backing store - self.fs.journal_tool(['event', 'recover_dentries', 'list']) - - # Dentries in ROOT_INO are present - self.assertEqual(sorted(self.fs.list_dirfrag(ROOT_INO)), sorted(['rootfile_head', 'subdir_head', 'linkdir_head'])) - self.assertEqual(self.fs.list_dirfrag(subdir_ino), ['subdirfile_head']) - self.assertEqual(sorted(self.fs.list_dirfrag(linkdir_ino)), - sorted(['link0_head', 'link1_head', 'link2_head', 'link3_head'])) - - # Now check the MDS can read what we wrote: truncate the journal - # and start the mds. - self.fs.journal_tool(['journal', 'reset']) - self.fs.mds_restart() - self.fs.wait_for_daemons() - - # List files - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - # First ls -R to populate MDCache, such that hardlinks will - # resolve properly (recover_dentries does not create backtraces, - # so ordinarily hardlinks to inodes that happen not to have backtraces - # will be invisible in readdir). - # FIXME: hook in forward scrub here to regenerate backtraces - proc = self.mount_a.run_shell(['ls', '-R']) - - proc = self.mount_a.run_shell(['ls', '-R']) - self.assertEqual(proc.stdout.getvalue().strip(), - dedent(""" - .: - linkdir - rootfile - subdir - - ./linkdir: - link0 - link1 - link2 - link3 - - ./subdir: - subdirfile - """).strip()) - - # Check the correct inos were preserved by path - self.assertEqual(rootfile_ino, self.mount_a.path_to_ino("rootfile")) - self.assertEqual(subdir_ino, self.mount_a.path_to_ino("subdir")) - self.assertEqual(subdirfile_ino, self.mount_a.path_to_ino("subdir/subdirfile")) - - # Check that the hard link handling came out correctly - self.assertEqual(self.mount_a.path_to_ino("linkdir/link0"), subdirfile_ino) - self.assertEqual(self.mount_a.path_to_ino("linkdir/link1"), subdirfile_ino) - self.assertNotEqual(self.mount_a.path_to_ino("linkdir/link2"), subdirfile_ino) - self.assertEqual(self.mount_a.path_to_ino("linkdir/link3"), rootfile_ino) - - # Create a new file, ensure it is not issued the same ino as one of the - # recovered ones - self.mount_a.run_shell(["touch", "afterwards"]) - new_ino = self.mount_a.path_to_ino("afterwards") - self.assertNotIn(new_ino, [rootfile_ino, subdir_ino, subdirfile_ino]) - - def test_reset(self): - """ - That after forcibly modifying the backing store, we can get back into - a good state by resetting the MDSMap. - - The scenario is that we have two active MDSs, and we lose the journals. Once - we have completely lost confidence in the integrity of the metadata, we want to - return the system to a single-MDS state to go into a scrub to recover what we - can. - """ - - # Set max_mds to 2 - self.fs.mon_manager.raw_cluster_cmd_result('mds', 'set', "max_mds", "2") - - # See that we have two active MDSs - self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30, - reject_fn=lambda v: v > 2 or v < 1) - active_mds_names = self.fs.get_active_names() - - # Do a bunch of I/O such that at least some will hit the second MDS: create - # lots of directories so that the balancer should find it easy to make a decision - # to allocate some of them to the second mds. - spammers = [] - for n in range(0, 16): - dir_name = "spam_{0}".format(n) - spammers.append(self.mount_a.spam_dir_background(dir_name)) - - def subtrees_assigned(): - got_subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=active_mds_names[0]) - rank_1_count = len([s for s in got_subtrees if s['auth_first'] == 1]) - - # Greater than 1, because there is typically 1 for ~mds1, and once it - # has been assigned something in addition to that it means it has been - # assigned a "real" subtree. - return rank_1_count > 1 - - # We are waiting for the MDS to respond to hot directories, which - # is not guaranteed to happen at a particular time, so a lengthy timeout here. - self.wait_until_true(subtrees_assigned, 600) - - # Flush the journals so that we have some backing store data - # belonging to one MDS, and some to the other MDS. - for mds_name in active_mds_names: - self.fs.mds_asok(["flush", "journal"], mds_name) - - # Stop (hard) the second MDS daemon - self.fs.mds_stop(active_mds_names[1]) - - # Wipe out the tables for MDS rank 1 so that it is broken and can't start - # (this is the simulated failure that we will demonstrate that the disaster - # recovery tools can get us back from) - self.fs.erase_metadata_objects(prefix="mds1_") - - # Try to access files from the client - blocked_ls = self.mount_a.run_shell(["ls", "-R"], wait=False) - - # Check that this "ls -R" blocked rather than completing: indicates - # it got stuck trying to access subtrees which were on the now-dead MDS. - log.info("Sleeping to check ls is blocked...") - time.sleep(60) - self.assertFalse(blocked_ls.finished) - - # This mount is now useless because it will depend on MDS rank 1, and MDS rank 1 - # is not coming back. Kill it. - log.info("Killing mount, it's blocked on the MDS we killed") - self.mount_a.kill() - self.mount_a.kill_cleanup() - try: - # Now that the mount is dead, the ls -R should error out. - blocked_ls.wait() - except CommandFailedError: - pass - - log.info("Terminating spammer processes...") - for spammer_proc in spammers: - spammer_proc.stdin.close() - try: - spammer_proc.wait() - except CommandFailedError: - pass - - # See that the second MDS will crash when it starts and tries to - # acquire rank 1 - self.fs.mds_restart(active_mds_names[1]) - crasher = self.fs.mds_daemons[active_mds_names[1]].proc - - try: - crasher.wait() - except CommandFailedError as e: - log.info("MDS '{0}' crashed with status {1} as expected".format(active_mds_names[1], e.exitstatus)) - self.fs.mds_daemons[active_mds_names[1]].proc = None - else: - raise RuntimeError("MDS daemon '{0}' did not crash as expected".format(active_mds_names[1])) - - # Now it's crashed, let the MDSMonitor know that it's not coming back - self.fs.mds_fail(active_mds_names[1]) - - # Now give up and go through a disaster recovery procedure - self.fs.mds_stop(active_mds_names[0]) - self.fs.mds_fail(active_mds_names[0]) - # Invoke recover_dentries quietly, because otherwise log spews millions of lines - self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=0, quiet=True) - self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=1, quiet=True) - self.fs.table_tool(["0", "reset", "session"]) - self.fs.journal_tool(["journal", "reset"], rank=0) - self.fs.erase_mds_objects(1) - self.fs.admin_remote.run(args=['sudo', 'ceph', 'fs', 'reset', 'default', '--yes-i-really-mean-it']) - - # Bring an MDS back online, mount a client, and see that we can walk the full - # filesystem tree again - self.fs.mds_restart(active_mds_names[0]) - self.wait_until_equal(lambda: self.fs.get_active_names(), [active_mds_names[0]], 30, - reject_fn=lambda v: len(v) > 1) - self.mount_a.mount() - self.mount_a.run_shell(["ls", "-R"], wait=True) - - def test_table_tool(self): - active_mdss = self.fs.get_active_names() - self.assertEqual(len(active_mdss), 1) - mds_name = active_mdss[0] - - self.mount_a.run_shell(["touch", "foo"]) - self.fs.mds_asok(["flush", "journal"], mds_name) - - log.info(self.fs.table_tool(["all", "show", "inode"])) - log.info(self.fs.table_tool(["all", "show", "snap"])) - log.info(self.fs.table_tool(["all", "show", "session"])) - - # Inode table should always be the same because initial state - # and choice of inode are deterministic. - # Should see one inode consumed - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "inode"])), - {"0": { - "data": { - "version": 2, - "inotable": { - "projected_free": [ - {"start": 1099511628777, - "len": 1099511626775}], - "free": [ - {"start": 1099511628777, - "len": 1099511626775}]}}, - "result": 0}} - - ) - - # Should see one session - session_data = json.loads(self.fs.table_tool( - ["all", "show", "session"])) - self.assertEqual(len(session_data["0"]["data"]["Sessions"]), 1) - self.assertEqual(session_data["0"]["result"], 0) - - # Should see no snaps - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "snap"])), - {"version": 0, - "snapserver": {"last_snap": 1, - "pending_noop": [], - "snaps": [], - "need_to_purge": {}, - "pending_create": [], - "pending_destroy": []}, - "result": 0} - ) - - # Reset everything - for table in ["session", "inode", "snap"]: - self.fs.table_tool(["all", "reset", table]) - - log.info(self.fs.table_tool(["all", "show", "inode"])) - log.info(self.fs.table_tool(["all", "show", "snap"])) - log.info(self.fs.table_tool(["all", "show", "session"])) - - # Should see 0 sessions - session_data = json.loads(self.fs.table_tool( - ["all", "show", "session"])) - self.assertEqual(len(session_data["0"]["data"]["Sessions"]), 0) - self.assertEqual(session_data["0"]["result"], 0) - - # Should see entire inode range now marked free - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "inode"])), - {"0": {"data": {"version": 1, - "inotable": {"projected_free": [ - {"start": 1099511627776, - "len": 1099511627776}], - "free": [ - {"start": 1099511627776, - "len": 1099511627776}]}}, - "result": 0}} - ) - - # Should see no snaps - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "snap"])), - {"version": 1, - "snapserver": {"last_snap": 1, - "pending_noop": [], - "snaps": [], - "need_to_purge": {}, - "pending_create": [], - "pending_destroy": []}, - "result": 0} - ) - - -@contextlib.contextmanager -def task(ctx, config): - fs = Filesystem(ctx) - - # Pick out the clients we will use from the configuration - # ======================================================= - if len(ctx.mounts) < 1: - raise RuntimeError("Need at least one clients") - mount_a = ctx.mounts.values()[0] - - # Stash references on ctx so that we can easily debug in interactive mode - # ======================================================================= - ctx.filesystem = fs - ctx.mount_a = mount_a - - run_tests(ctx, config, TestJournalRepair, { - 'fs': fs, - 'mount_a': mount_a - }) - - # Continue to any downstream tasks - # ================================ - yield diff --git a/tasks/mds_scrub_checks.py b/tasks/mds_scrub_checks.py deleted file mode 100644 index d0c35aab964..00000000000 --- a/tasks/mds_scrub_checks.py +++ /dev/null @@ -1,239 +0,0 @@ -""" -MDS admin socket scrubbing-related tests. -""" -from cStringIO import StringIO -import json -import logging - -from teuthology.orchestra import run -from teuthology import misc as teuthology - -from tasks.cephfs.filesystem import Filesystem - -log = logging.getLogger(__name__) - - -def run_test(ctx, config, filesystem): - """ - Run flush and scrub commands on the specified files in the filesystem. This - task will run through a sequence of operations, but it is not comprehensive - on its own -- it doesn't manipulate the mds cache state to test on both - in- and out-of-memory parts of the hierarchy. So it's designed to be run - multiple times within a single test run, so that the test can manipulate - memory state. - - Usage: - mds_scrub_checks: - mds_rank: 0 - path: path/to/test/dir - client: 0 - run_seq: [0-9]+ - - Increment the run_seq on subsequent invocations within a single test run; - it uses that value to generate unique folder and file names. - """ - - mds_rank = config.get("mds_rank") - test_path = config.get("path") - run_seq = config.get("run_seq") - client_id = config.get("client") - - if mds_rank is None or test_path is None or run_seq is None: - raise ValueError("Must specify each of mds_rank, test_path, run_seq," - "client_id in config!") - - teuthdir = teuthology.get_testdir(ctx) - client_path = "{teuthdir}/mnt.{id_}/{test_path}".\ - format(teuthdir=teuthdir, - id_=client_id, - test_path=test_path) - - log.info("Cloning repo into place (if not present)") - repo_path = clone_repo(ctx, client_id, client_path) - - log.info("Initiating mds_scrub_checks on mds.{id_}, " - "test_path {path}, run_seq {seq}".format( - id_=mds_rank, path=test_path, seq=run_seq)) - - def json_validator(json, rc, element, expected_value): - if (rc != 0): - return False, "asok command returned error {rc}".format(rc=str(rc)) - element_value = json.get(element) - if element_value != expected_value: - return False, "unexpectedly got {jv} instead of {ev}!".format( - jv=element_value, ev=expected_value) - return True, "Succeeded" - - success_validator = lambda j, r: json_validator(j, r, "return_code", 0) - - nep = "{test_path}/i/dont/exist".format(test_path=test_path) - command = "flush_path {nep}".format(nep=nep) - asok_command(ctx, mds_rank, command, - lambda j, r: json_validator(j, r, "return_code", -2), - filesystem) - - command = "scrub_path {nep}".format(nep=nep) - asok_command(ctx, mds_rank, command, - lambda j, r: json_validator(j, r, "return_code", -2), - filesystem) - - test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=test_path) - dirpath = "{repo_path}/suites".format(repo_path=test_repo_path) - - if (run_seq == 0): - log.info("First run: flushing {dirpath}".format(dirpath=dirpath)) - command = "flush_path {dirpath}".format(dirpath=dirpath) - asok_command(ctx, mds_rank, command, success_validator, filesystem) - command = "scrub_path {dirpath}".format(dirpath=dirpath) - asok_command(ctx, mds_rank, command, success_validator, filesystem) - - filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format( - repo_path=test_repo_path) - if (run_seq == 0): - log.info("First run: flushing {filepath}".format(filepath=filepath)) - command = "flush_path {filepath}".format(filepath=filepath) - asok_command(ctx, mds_rank, command, success_validator, filesystem) - command = "scrub_path {filepath}".format(filepath=filepath) - asok_command(ctx, mds_rank, command, success_validator, filesystem) - - filepath = "{repo_path}/suites/fs/basic/clusters/fixed-3-cephfs.yaml".\ - format(repo_path=test_repo_path) - command = "scrub_path {filepath}".format(filepath=filepath) - asok_command(ctx, mds_rank, command, - lambda j, r: json_validator(j, r, "performed_validation", - False), - filesystem) - - if (run_seq == 0): - log.info("First run: flushing base dir /") - command = "flush_path /" - asok_command(ctx, mds_rank, command, success_validator, filesystem) - command = "scrub_path /" - asok_command(ctx, mds_rank, command, success_validator, filesystem) - - client = ctx.manager.find_remote("client", client_id) - new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq) - test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path, - i=run_seq) - client.run(args=[ - "mkdir", new_dir]) - command = "flush_path {dir}".format(dir=test_new_dir) - asok_command(ctx, mds_rank, command, success_validator, filesystem) - - new_file = "{repo_path}/new_file_{i}".format(repo_path=repo_path, - i=run_seq) - test_new_file = "{repo_path}/new_file_{i}".format(repo_path=test_repo_path, - i=run_seq) - client.run(args=[ - "echo", "hello", run.Raw('>'), new_file]) - command = "flush_path {file}".format(file=test_new_file) - asok_command(ctx, mds_rank, command, success_validator, filesystem) - - # check that scrub fails on errors. First, get ino - client = ctx.manager.find_remote("client", 0) - proc = client.run( - args=[ - "ls", "-li", new_file, run.Raw('|'), - "grep", "-o", run.Raw('"^[0-9]*"') - ], - wait=False, - stdout=StringIO() - ) - proc.wait() - ino = int(proc.stdout.getvalue().strip()) - rados_obj_name = "{ino}.00000000".format(ino=hex(ino).split('x')[1]) - client.run( - args=[ - "rados", "-p", "data", "rmxattr", - rados_obj_name, "parent" - ] - ) - command = "scrub_path {file}".format(file=test_new_file) - asok_command(ctx, mds_rank, command, - lambda j, r: json_validator(j, r, "return_code", -61), filesystem) - client.run( - args=[ - "rados", "-p", "data", "rm", rados_obj_name - ] - ) - asok_command(ctx, mds_rank, command, - lambda j, r: json_validator(j, r, "return_code", -2), filesystem) - - command = "flush_path /" - asok_command(ctx, mds_rank, command, success_validator, filesystem) - - -class AsokCommandFailedError(Exception): - """ - Exception thrown when we get an unexpected response - on an admin socket command - """ - def __init__(self, command, rc, json, errstring): - self.command = command - self.rc = rc - self.json = json - self.errstring = errstring - - def __str__(self): - return "Admin socket: {command} failed with rc={rc}," - "json output={json}, because '{es}'".format( - command=self.command, rc=self.rc, - json=self.json, es=self.errstring) - - -def asok_command(ctx, mds_rank, command, validator, filesystem): - log.info("Running command '{command}'".format(command=command)) - - command_list = command.split() - - # we just assume there's an active mds for every rank - mds_id = filesystem.get_active_names()[mds_rank] - - proc = ctx.manager.admin_socket('mds', mds_id, - command_list, check_status=False) - rout = proc.exitstatus - sout = proc.stdout.getvalue() - - if sout.strip(): - jout = json.loads(sout) - else: - jout = None - - log.info("command '{command}' got response code " - "'{rout}' and stdout '{sout}'".format( - command=command, rout=rout, sout=sout)) - - success, errstring = validator(jout, rout) - - if not success: - raise AsokCommandFailedError(command, rout, jout, errstring) - - return jout - - -def clone_repo(ctx, client_id, path): - repo = "ceph-qa-suite" - repo_path = "{path}/{repo}".format(path=path, repo=repo) - - client = ctx.manager.find_remote("client", client_id) - client.run( - args=[ - "mkdir", "-p", path - ] - ) - client.run( - args=[ - "ls", repo_path, run.Raw('||'), - "git", "clone", '--branch', 'giant', - "http://github.com/ceph/{repo}".format(repo=repo), - "{path}/{repo}".format(path=path, repo=repo) - ] - ) - - return repo_path - - -def task(ctx, config): - fs = Filesystem(ctx) - - run_test(ctx, config, fs) diff --git a/tasks/mds_thrash.py b/tasks/mds_thrash.py deleted file mode 100644 index c60b741a49e..00000000000 --- a/tasks/mds_thrash.py +++ /dev/null @@ -1,352 +0,0 @@ -""" -Thrash mds by simulating failures -""" -import logging -import contextlib -import ceph_manager -import random -import time -from gevent.greenlet import Greenlet -from gevent.event import Event -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - - -class MDSThrasher(Greenlet): - """ - MDSThrasher:: - - The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc). - - The config is optional. Many of the config parameters are a a maximum value - to use when selecting a random value from a range. To always use the maximum - value, set no_random to true. The config is a dict containing some or all of: - - seed: [no default] seed the random number generator - - randomize: [default: true] enables randomization and use the max/min values - - max_thrash: [default: 1] the maximum number of MDSs that will be thrashed at - any given time. - - max_thrash_delay: [default: 30] maximum number of seconds to delay before - thrashing again. - - max_revive_delay: [default: 10] maximum number of seconds to delay before - bringing back a thrashed MDS - - thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed - during replay. Value should be between 0.0 and 1.0 - - max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in - the replay state before thrashing - - thrash_weights: allows specific MDSs to be thrashed more/less frequently. This option - overrides anything specified by max_thrash. This option is a dict containing - mds.x: weight pairs. For example, [mds.a: 0.7, mds.b: 0.3, mds.c: 0.0]. Each weight - is a value from 0.0 to 1.0. Any MDSs not specified will be automatically - given a weight of 0.0. For a given MDS, by default the trasher delays for up - to max_thrash_delay, trashes, waits for the MDS to recover, and iterates. If a non-zero - weight is specified for an MDS, for each iteration the thrasher chooses whether to thrash - during that iteration based on a random value [0-1] not exceeding the weight of that MDS. - - Examples:: - - - The following example sets the likelihood that mds.a will be thrashed - to 80%, mds.b to 20%, and other MDSs will not be thrashed. It also sets the - likelihood that an MDS will be thrashed in replay to 40%. - Thrash weights do not have to sum to 1. - - tasks: - - ceph: - - mds_thrash: - thrash_weights: - - mds.a: 0.8 - - mds.b: 0.2 - thrash_in_replay: 0.4 - - ceph-fuse: - - workunit: - clients: - all: [suites/fsx.sh] - - The following example disables randomization, and uses the max delay values: - - tasks: - - ceph: - - mds_thrash: - max_thrash_delay: 10 - max_revive_delay: 1 - max_replay_thrash_delay: 4 - - """ - - def __init__(self, ctx, manager, config, logger, failure_group, weight): - super(MDSThrasher, self).__init__() - - self.ctx = ctx - self.manager = manager - assert self.manager.is_clean() - - self.stopping = Event() - self.logger = logger - self.config = config - - self.randomize = bool(self.config.get('randomize', True)) - self.max_thrash_delay = float(self.config.get('thrash_delay', 30.0)) - self.thrash_in_replay = float(self.config.get('thrash_in_replay', False)) - assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format( - v=self.thrash_in_replay) - - self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0)) - - self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0)) - - self.failure_group = failure_group - self.weight = weight - - def _run(self): - try: - self.do_thrash() - except: - # Log exceptions here so we get the full backtrace (it's lost - # by the time someone does a .get() on this greenlet) - self.logger.exception("Exception in do_thrash:") - raise - - def log(self, x): - """Write data to logger assigned to this MDThrasher""" - self.logger.info(x) - - def stop(self): - self.stopping.set() - - def do_thrash(self): - """ - Perform the random thrashing action - """ - self.log('starting mds_do_thrash for failure group: ' + ', '.join( - ['mds.{_id}'.format(_id=_f) for _f in self.failure_group])) - while not self.stopping.is_set(): - delay = self.max_thrash_delay - if self.randomize: - delay = random.randrange(0.0, self.max_thrash_delay) - - if delay > 0.0: - self.log('waiting for {delay} secs before thrashing'.format(delay=delay)) - self.stopping.wait(delay) - if self.stopping.is_set(): - continue - - skip = random.randrange(0.0, 1.0) - if self.weight < 1.0 and skip > self.weight: - self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip, - weight=self.weight)) - continue - - # find the active mds in the failure group - statuses = [self.manager.get_mds_status(m) for m in self.failure_group] - actives = filter(lambda s: s and s['state'] == 'up:active', statuses) - assert len(actives) == 1, 'Can only have one active in a failure group' - - active_mds = actives[0]['name'] - active_rank = actives[0]['rank'] - - self.log('kill mds.{id} (rank={r})'.format(id=active_mds, r=active_rank)) - self.manager.kill_mds_by_rank(active_rank) - - # wait for mon to report killed mds as crashed - last_laggy_since = None - itercount = 0 - while True: - failed = self.manager.get_mds_status_all()['failed'] - status = self.manager.get_mds_status(active_mds) - if not status: - break - if 'laggy_since' in status: - last_laggy_since = status['laggy_since'] - break - if any([(f == active_mds) for f in failed]): - break - self.log( - 'waiting till mds map indicates mds.{_id} is laggy/crashed, in failed state, or mds.{_id} is removed from mdsmap'.format( - _id=active_mds)) - itercount = itercount + 1 - if itercount > 10: - self.log('mds map: {status}'.format(status=self.manager.get_mds_status_all())) - time.sleep(2) - if last_laggy_since: - self.log( - 'mds.{_id} reported laggy/crashed since: {since}'.format(_id=active_mds, since=last_laggy_since)) - else: - self.log('mds.{_id} down, removed from mdsmap'.format(_id=active_mds, since=last_laggy_since)) - - # wait for a standby mds to takeover and become active - takeover_mds = None - takeover_rank = None - itercount = 0 - while True: - statuses = [self.manager.get_mds_status(m) for m in self.failure_group] - actives = filter(lambda s: s and s['state'] == 'up:active', statuses) - if len(actives) > 0: - assert len(actives) == 1, 'Can only have one active in failure group' - takeover_mds = actives[0]['name'] - takeover_rank = actives[0]['rank'] - break - itercount = itercount + 1 - if itercount > 10: - self.log('mds map: {status}'.format(status=self.manager.get_mds_status_all())) - - self.log('New active mds is mds.{_id}'.format(_id=takeover_mds)) - - # wait for a while before restarting old active to become new - # standby - delay = self.max_revive_delay - if self.randomize: - delay = random.randrange(0.0, self.max_revive_delay) - - self.log('waiting for {delay} secs before reviving mds.{id}'.format( - delay=delay, id=active_mds)) - time.sleep(delay) - - self.log('reviving mds.{id}'.format(id=active_mds)) - self.manager.revive_mds(active_mds, standby_for_rank=takeover_rank) - - status = {} - while True: - status = self.manager.get_mds_status(active_mds) - if status and (status['state'] == 'up:standby' or status['state'] == 'up:standby-replay'): - break - self.log( - 'waiting till mds map indicates mds.{_id} is in standby or standby-replay'.format(_id=active_mds)) - time.sleep(2) - self.log('mds.{_id} reported in {state} state'.format(_id=active_mds, state=status['state'])) - - # don't do replay thrashing right now - continue - # this might race with replay -> active transition... - if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay: - - delay = self.max_replay_thrash_delay - if self.randomize: - delay = random.randrange(0.0, self.max_replay_thrash_delay) - time.sleep(delay) - self.log('kill replaying mds.{id}'.format(id=self.to_kill)) - self.manager.kill_mds(self.to_kill) - - delay = self.max_revive_delay - if self.randomize: - delay = random.randrange(0.0, self.max_revive_delay) - - self.log('waiting for {delay} secs before reviving mds.{id}'.format( - delay=delay, id=self.to_kill)) - time.sleep(delay) - - self.log('revive mds.{id}'.format(id=self.to_kill)) - self.manager.revive_mds(self.to_kill) - - -@contextlib.contextmanager -def task(ctx, config): - """ - Stress test the mds by thrashing while another task/workunit - is running. - - Please refer to MDSThrasher class for further information on the - available options. - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'mds_thrash task only accepts a dict for configuration' - mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds')) - assert len(mdslist) > 1, \ - 'mds_thrash task requires at least 2 metadata servers' - - # choose random seed - seed = None - if 'seed' in config: - seed = int(config['seed']) - else: - seed = int(time.time()) - log.info('mds thrasher using random seed: {seed}'.format(seed=seed)) - random.seed(seed) - - max_thrashers = config.get('max_thrash', 1) - thrashers = {} - - (first,) = ctx.cluster.only('mds.{_id}'.format(_id=mdslist[0])).remotes.iterkeys() - manager = ceph_manager.CephManager( - first, ctx=ctx, logger=log.getChild('ceph_manager'), - ) - - # make sure everyone is in active, standby, or standby-replay - log.info('Wait for all MDSs to reach steady state...') - statuses = None - statuses_by_rank = None - while True: - statuses = {m: manager.get_mds_status(m) for m in mdslist} - statuses_by_rank = {} - for _, s in statuses.iteritems(): - if isinstance(s, dict): - statuses_by_rank[s['rank']] = s - - ready = filter(lambda (_, s): s is not None and (s['state'] == 'up:active' - or s['state'] == 'up:standby' - or s['state'] == 'up:standby-replay'), - statuses.items()) - if len(ready) == len(statuses): - break - time.sleep(2) - log.info('Ready to start thrashing') - - # setup failure groups - failure_groups = {} - actives = {s['name']: s for (_, s) in statuses.iteritems() if s['state'] == 'up:active'} - log.info('Actives is: {d}'.format(d=actives)) - log.info('Statuses is: {d}'.format(d=statuses_by_rank)) - for active in actives: - for (r, s) in statuses.iteritems(): - if s['standby_for_name'] == active: - if not active in failure_groups: - failure_groups[active] = [] - log.info('Assigning mds rank {r} to failure group {g}'.format(r=r, g=active)) - failure_groups[active].append(r) - - manager.wait_for_clean() - for (active, standbys) in failure_groups.iteritems(): - weight = 1.0 - if 'thrash_weights' in config: - weight = int(config['thrash_weights'].get('mds.{_id}'.format(_id=active), '0.0')) - - failure_group = [active] - failure_group.extend(standbys) - - thrasher = MDSThrasher( - ctx, manager, config, - logger=log.getChild('mds_thrasher.failure_group.[{a}, {sbs}]'.format( - a=active, - sbs=', '.join(standbys) - ) - ), - failure_group=failure_group, - weight=weight) - thrasher.start() - thrashers[active] = thrasher - - # if thrash_weights isn't specified and we've reached max_thrash, - # we're done - if not 'thrash_weights' in config and len(thrashers) == max_thrashers: - break - - try: - log.debug('Yielding') - yield - finally: - log.info('joining mds_thrashers') - for t in thrashers: - log.info('join thrasher for failure group [{fg}]'.format(fg=', '.join(failure_group))) - thrashers[t].stop() - thrashers[t].join() - log.info('done joining') diff --git a/tasks/metadata.yaml b/tasks/metadata.yaml deleted file mode 100644 index ccdc3b077cb..00000000000 --- a/tasks/metadata.yaml +++ /dev/null @@ -1,2 +0,0 @@ -instance-id: test -local-hostname: test diff --git a/tasks/mon_clock_skew_check.py b/tasks/mon_clock_skew_check.py deleted file mode 100644 index 891e6ec484e..00000000000 --- a/tasks/mon_clock_skew_check.py +++ /dev/null @@ -1,261 +0,0 @@ -""" -Handle clock skews in monitors. -""" -import logging -import contextlib -import ceph_manager -import time -import gevent -from StringIO import StringIO -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -class ClockSkewCheck: - """ - Periodically check if there are any clock skews among the monitors in the - quorum. By default, assume no skews are supposed to exist; that can be - changed using the 'expect-skew' option. If 'fail-on-skew' is set to false, - then we will always succeed and only report skews if any are found. - - This class does not spawn a thread. It assumes that, if that is indeed - wanted, it should be done by a third party (for instance, the task using - this class). We intend it as such in order to reuse this class if need be. - - This task accepts the following options: - - interval amount of seconds to wait in-between checks. (default: 30.0) - max-skew maximum skew, in seconds, that is considered tolerable before - issuing a warning. (default: 0.05) - expect-skew 'true' or 'false', to indicate whether to expect a skew during - the run or not. If 'true', the test will fail if no skew is - found, and succeed if a skew is indeed found; if 'false', it's - the other way around. (default: false) - never-fail Don't fail the run if a skew is detected and we weren't - expecting it, or if no skew is detected and we were expecting - it. (default: False) - - at-least-once Runs at least once, even if we are told to stop. - (default: True) - at-least-once-timeout If we were told to stop but we are attempting to - run at least once, timeout after this many seconds. - (default: 600) - - Example: - Expect a skew higher than 0.05 seconds, but only report it without - failing the teuthology run. - - - mon_clock_skew_check: - interval: 30 - max-skew: 0.05 - expect_skew: true - never-fail: true - """ - - def __init__(self, ctx, manager, config, logger): - self.ctx = ctx - self.manager = manager - - self.stopping = False - self.logger = logger - self.config = config - - if self.config is None: - self.config = dict() - - self.check_interval = float(self.config.get('interval', 30.0)) - - first_mon = teuthology.get_first_mon(ctx, config) - remote = ctx.cluster.only(first_mon).remotes.keys()[0] - proc = remote.run( - args=[ - 'sudo', - 'ceph-mon', - '-i', first_mon[4:], - '--show-config-value', 'mon_clock_drift_allowed' - ], stdout=StringIO(), wait=True - ) - self.max_skew = self.config.get('max-skew', float(proc.stdout.getvalue())) - - self.expect_skew = self.config.get('expect-skew', False) - self.never_fail = self.config.get('never-fail', False) - self.at_least_once = self.config.get('at-least-once', True) - self.at_least_once_timeout = self.config.get('at-least-once-timeout', 600.0) - - def info(self, x): - """ - locally define logger for info messages - """ - self.logger.info(x) - - def warn(self, x): - """ - locally define logger for warnings - """ - self.logger.warn(x) - - def debug(self, x): - """ - locally define logger for debug messages - """ - self.logger.info(x) - self.logger.debug(x) - - def finish(self): - """ - Break out of the do_check loop. - """ - self.stopping = True - - def sleep_interval(self): - """ - If a sleep interval is set, sleep for that amount of time. - """ - if self.check_interval > 0.0: - self.debug('sleeping for {s} seconds'.format( - s=self.check_interval)) - time.sleep(self.check_interval) - - def print_skews(self, skews): - """ - Display skew values. - """ - total = len(skews) - if total > 0: - self.info('---------- found {n} skews ----------'.format(n=total)) - for mon_id, values in skews.iteritems(): - self.info('mon.{id}: {v}'.format(id=mon_id, v=values)) - self.info('-------------------------------------') - else: - self.info('---------- no skews were found ----------') - - def do_check(self): - """ - Clock skew checker. Loops until finish() is called. - """ - self.info('start checking for clock skews') - skews = dict() - ran_once = False - - started_on = None - - while not self.stopping or (self.at_least_once and not ran_once): - - if self.at_least_once and not ran_once and self.stopping: - if started_on is None: - self.info('kicking-off timeout (if any)') - started_on = time.time() - elif self.at_least_once_timeout > 0.0: - assert time.time() - started_on < self.at_least_once_timeout, \ - 'failed to obtain a timecheck before timeout expired' - - quorum_size = len(teuthology.get_mon_names(self.ctx)) - self.manager.wait_for_mon_quorum_size(quorum_size) - - health = self.manager.get_mon_health(True) - timechecks = health['timechecks'] - - clean_check = False - - if timechecks['round_status'] == 'finished': - assert (timechecks['round'] % 2) == 0, \ - 'timecheck marked as finished but round ' \ - 'disagrees (r {r})'.format( - r=timechecks['round']) - clean_check = True - else: - assert timechecks['round_status'] == 'on-going', \ - 'timecheck status expected \'on-going\' ' \ - 'but found \'{s}\' instead'.format( - s=timechecks['round_status']) - if 'mons' in timechecks.keys() and len(timechecks['mons']) > 1: - self.info('round still on-going, but there are available reports') - else: - self.info('no timechecks available just yet') - self.sleep_interval() - continue - - assert len(timechecks['mons']) > 1, \ - 'there are not enough reported timechecks; ' \ - 'expected > 1 found {n}'.format(n=len(timechecks['mons'])) - - for check in timechecks['mons']: - mon_skew = float(check['skew']) - mon_health = check['health'] - mon_id = check['name'] - if abs(mon_skew) > self.max_skew: - assert mon_health == 'HEALTH_WARN', \ - 'mon.{id} health is \'{health}\' but skew {s} > max {ms}'.format( - id=mon_id,health=mon_health,s=abs(mon_skew),ms=self.max_skew) - - log_str = 'mon.{id} with skew {s} > max {ms}'.format( - id=mon_id,s=abs(mon_skew),ms=self.max_skew) - - """ add to skew list """ - details = check['details'] - skews[mon_id] = {'skew': mon_skew, 'details': details} - - if self.expect_skew: - self.info('expected skew: {str}'.format(str=log_str)) - else: - self.warn('unexpected skew: {str}'.format(str=log_str)) - - if clean_check or (self.expect_skew and len(skews) > 0): - ran_once = True - self.print_skews(skews) - self.sleep_interval() - - total = len(skews) - self.print_skews(skews) - - error_str = '' - found_error = False - - if self.expect_skew: - if total == 0: - error_str = 'We were expecting a skew, but none was found!' - found_error = True - else: - if total > 0: - error_str = 'We were not expecting a skew, but we did find it!' - found_error = True - - if found_error: - self.info(error_str) - if not self.never_fail: - assert False, error_str - -@contextlib.contextmanager -def task(ctx, config): - """ - Use clas ClockSkewCheck to check for clock skews on the monitors. - This task will spawn a thread running ClockSkewCheck's do_check(). - - All the configuration will be directly handled by ClockSkewCheck, - so please refer to the class documentation for further information. - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'mon_clock_skew_check task only accepts a dict for configuration' - log.info('Beginning mon_clock_skew_check...') - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - skew_check = ClockSkewCheck(ctx, - manager, config, - logger=log.getChild('mon_clock_skew_check')) - skew_check_thread = gevent.spawn(skew_check.do_check) - try: - yield - finally: - log.info('joining mon_clock_skew_check') - skew_check.finish() - skew_check_thread.get() - - diff --git a/tasks/mon_recovery.py b/tasks/mon_recovery.py deleted file mode 100644 index bfa2cdf78f1..00000000000 --- a/tasks/mon_recovery.py +++ /dev/null @@ -1,80 +0,0 @@ -""" -Monitor recovery -""" -import logging -import ceph_manager -from teuthology import misc as teuthology - - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Test monitor recovery. - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'task only accepts a dict for configuration' - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)] - log.info("mon ids = %s" % mons) - - manager.wait_for_mon_quorum_size(len(mons)) - - log.info('verifying all monitors are in the quorum') - for m in mons: - s = manager.get_mon_status(m) - assert s['state'] == 'leader' or s['state'] == 'peon' - assert len(s['quorum']) == len(mons) - - log.info('restarting each monitor in turn') - for m in mons: - # stop a monitor - manager.kill_mon(m) - manager.wait_for_mon_quorum_size(len(mons) - 1) - - # restart - manager.revive_mon(m) - manager.wait_for_mon_quorum_size(len(mons)) - - # in forward and reverse order, - rmons = mons - rmons.reverse() - for mons in mons, rmons: - log.info('stopping all monitors') - for m in mons: - manager.kill_mon(m) - - log.info('forming a minimal quorum for %s, then adding monitors' % mons) - qnum = (len(mons) / 2) + 1 - num = 0 - for m in mons: - manager.revive_mon(m) - num += 1 - if num >= qnum: - manager.wait_for_mon_quorum_size(num) - - # on both leader and non-leader ranks... - for rank in [0, 1]: - # take one out - log.info('removing mon %s' % mons[rank]) - manager.kill_mon(mons[rank]) - manager.wait_for_mon_quorum_size(len(mons) - 1) - - log.info('causing some monitor log activity') - m = 30 - for n in range(1, m): - manager.raw_cluster_cmd('log', '%d of %d' % (n, m)) - - log.info('adding mon %s back in' % mons[rank]) - manager.revive_mon(mons[rank]) - manager.wait_for_mon_quorum_size(len(mons)) diff --git a/tasks/mon_thrash.py b/tasks/mon_thrash.py deleted file mode 100644 index b45aaa99978..00000000000 --- a/tasks/mon_thrash.py +++ /dev/null @@ -1,343 +0,0 @@ -""" -Monitor thrash -""" -import logging -import contextlib -import ceph_manager -import random -import time -import gevent -import json -import math -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -def _get_mons(ctx): - """ - Get monitor names from the context value. - """ - mons = [f[len('mon.'):] for f in teuthology.get_mon_names(ctx)] - return mons - -class MonitorThrasher: - """ - How it works:: - - - pick a monitor - - kill it - - wait for quorum to be formed - - sleep for 'revive_delay' seconds - - revive monitor - - wait for quorum to be formed - - sleep for 'thrash_delay' seconds - - Options:: - - seed Seed to use on the RNG to reproduce a previous - behaviour (default: None; i.e., not set) - revive_delay Number of seconds to wait before reviving - the monitor (default: 10) - thrash_delay Number of seconds to wait in-between - test iterations (default: 0) - thrash_store Thrash monitor store before killing the monitor being thrashed (default: False) - thrash_store_probability Probability of thrashing a monitor's store - (default: 50) - thrash_many Thrash multiple monitors instead of just one. If - 'maintain-quorum' is set to False, then we will - thrash up to as many monitors as there are - available. (default: False) - maintain_quorum Always maintain quorum, taking care on how many - monitors we kill during the thrashing. If we - happen to only have one or two monitors configured, - if this option is set to True, then we won't run - this task as we cannot guarantee maintenance of - quorum. Setting it to false however would allow the - task to run with as many as just one single monitor. - (default: True) - freeze_mon_probability: how often to freeze the mon instead of killing it, - in % (default: 0) - freeze_mon_duration: how many seconds to freeze the mon (default: 15) - scrub Scrub after each iteration (default: True) - - Note: if 'store-thrash' is set to True, then 'maintain-quorum' must also - be set to True. - - For example:: - - tasks: - - ceph: - - mon_thrash: - revive_delay: 20 - thrash_delay: 1 - thrash_store: true - thrash_store_probability: 40 - seed: 31337 - maintain_quorum: true - thrash_many: true - - ceph-fuse: - - workunit: - clients: - all: - - mon/workloadgen.sh - """ - def __init__(self, ctx, manager, config, logger): - self.ctx = ctx - self.manager = manager - self.manager.wait_for_clean() - - self.stopping = False - self.logger = logger - self.config = config - - if self.config is None: - self.config = dict() - - """ Test reproducibility """ - self.random_seed = self.config.get('seed', None) - - if self.random_seed is None: - self.random_seed = int(time.time()) - - self.rng = random.Random() - self.rng.seed(int(self.random_seed)) - - """ Monitor thrashing """ - self.revive_delay = float(self.config.get('revive_delay', 10.0)) - self.thrash_delay = float(self.config.get('thrash_delay', 0.0)) - - self.thrash_many = self.config.get('thrash_many', False) - self.maintain_quorum = self.config.get('maintain_quorum', True) - - self.scrub = self.config.get('scrub', True) - - self.freeze_mon_probability = float(self.config.get('freeze_mon_probability', 10)) - self.freeze_mon_duration = float(self.config.get('freeze_mon_duration', 15.0)) - - assert self.max_killable() > 0, \ - 'Unable to kill at least one monitor with the current config.' - - """ Store thrashing """ - self.store_thrash = self.config.get('store_thrash', False) - self.store_thrash_probability = int( - self.config.get('store_thrash_probability', 50)) - if self.store_thrash: - assert self.store_thrash_probability > 0, \ - 'store_thrash is set, probability must be > 0' - assert self.maintain_quorum, \ - 'store_thrash = true must imply maintain_quorum = true' - - self.thread = gevent.spawn(self.do_thrash) - - def log(self, x): - """ - locally log info messages - """ - self.logger.info(x) - - def do_join(self): - """ - Break out of this processes thrashing loop. - """ - self.stopping = True - self.thread.get() - - def should_thrash_store(self): - """ - If allowed, indicate that we should thrash a certain percentage of - the time as determined by the store_thrash_probability value. - """ - if not self.store_thrash: - return False - return self.rng.randrange(0, 101) < self.store_thrash_probability - - def thrash_store(self, mon): - """ - Thrash the monitor specified. - :param mon: monitor to thrash - """ - addr = self.ctx.ceph.conf['mon.%s' % mon]['mon addr'] - self.log('thrashing mon.{id}@{addr} store'.format(id=mon, addr=addr)) - out = self.manager.raw_cluster_cmd('-m', addr, 'sync', 'force') - j = json.loads(out) - assert j['ret'] == 0, \ - 'error forcing store sync on mon.{id}:\n{ret}'.format( - id=mon,ret=out) - - def should_freeze_mon(self): - """ - Indicate that we should freeze a certain percentago of the time - as determined by the freeze_mon_probability value. - """ - return self.rng.randrange(0, 101) < self.freeze_mon_probability - - def freeze_mon(self, mon): - """ - Send STOP signal to freeze the monitor. - """ - log.info('Sending STOP to mon %s', mon) - self.manager.signal_mon(mon, 19) # STOP - - def unfreeze_mon(self, mon): - """ - Send CONT signal to unfreeze the monitor. - """ - log.info('Sending CONT to mon %s', mon) - self.manager.signal_mon(mon, 18) # CONT - - def kill_mon(self, mon): - """ - Kill the monitor specified - """ - self.log('killing mon.{id}'.format(id=mon)) - self.manager.kill_mon(mon) - - def revive_mon(self, mon): - """ - Revive the monitor specified - """ - self.log('killing mon.{id}'.format(id=mon)) - self.log('reviving mon.{id}'.format(id=mon)) - self.manager.revive_mon(mon) - - def max_killable(self): - """ - Return the maximum number of monitors we can kill. - """ - m = len(_get_mons(self.ctx)) - if self.maintain_quorum: - return max(math.ceil(m/2.0)-1, 0) - else: - return m - - def do_thrash(self): - """ - Cotinuously loop and thrash the monitors. - """ - self.log('start thrashing') - self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\ - 'thrash many: {tm}, maintain quorum: {mq} '\ - 'store thrash: {st}, probability: {stp} '\ - 'freeze mon: prob {fp} duration {fd}'.format( - s=self.random_seed,r=self.revive_delay,t=self.thrash_delay, - tm=self.thrash_many, mq=self.maintain_quorum, - st=self.store_thrash,stp=self.store_thrash_probability, - fp=self.freeze_mon_probability,fd=self.freeze_mon_duration, - )) - - while not self.stopping: - mons = _get_mons(self.ctx) - self.manager.wait_for_mon_quorum_size(len(mons)) - self.log('making sure all monitors are in the quorum') - for m in mons: - s = self.manager.get_mon_status(m) - assert s['state'] == 'leader' or s['state'] == 'peon' - assert len(s['quorum']) == len(mons) - - kill_up_to = self.rng.randrange(1, self.max_killable()+1) - mons_to_kill = self.rng.sample(mons, kill_up_to) - self.log('monitors to thrash: {m}'.format(m=mons_to_kill)) - - mons_to_freeze = [] - for mon in mons: - if mon in mons_to_kill: - continue - if self.should_freeze_mon(): - mons_to_freeze.append(mon) - self.log('monitors to freeze: {m}'.format(m=mons_to_freeze)) - - for mon in mons_to_kill: - self.log('thrashing mon.{m}'.format(m=mon)) - - """ we only thrash stores if we are maintaining quorum """ - if self.should_thrash_store() and self.maintain_quorum: - self.thrash_store(mon) - - self.kill_mon(mon) - - if mons_to_freeze: - for mon in mons_to_freeze: - self.freeze_mon(mon) - self.log('waiting for {delay} secs to unfreeze mons'.format( - delay=self.freeze_mon_duration)) - time.sleep(self.freeze_mon_duration) - for mon in mons_to_freeze: - self.unfreeze_mon(mon) - - if self.maintain_quorum: - self.manager.wait_for_mon_quorum_size(len(mons)-len(mons_to_kill)) - for m in mons: - if m in mons_to_kill: - continue - s = self.manager.get_mon_status(m) - assert s['state'] == 'leader' or s['state'] == 'peon' - assert len(s['quorum']) == len(mons)-len(mons_to_kill) - - self.log('waiting for {delay} secs before reviving monitors'.format( - delay=self.revive_delay)) - time.sleep(self.revive_delay) - - for mon in mons_to_kill: - self.revive_mon(mon) - # do more freezes - if mons_to_freeze: - for mon in mons_to_freeze: - self.freeze_mon(mon) - self.log('waiting for {delay} secs to unfreeze mons'.format( - delay=self.freeze_mon_duration)) - time.sleep(self.freeze_mon_duration) - for mon in mons_to_freeze: - self.unfreeze_mon(mon) - - self.manager.wait_for_mon_quorum_size(len(mons)) - for m in mons: - s = self.manager.get_mon_status(m) - assert s['state'] == 'leader' or s['state'] == 'peon' - assert len(s['quorum']) == len(mons) - - if self.scrub: - self.log('triggering scrub') - try: - self.manager.raw_cluster_cmd('scrub') - except Exception: - log.exception("Saw exception while triggering scrub") - - if self.thrash_delay > 0.0: - self.log('waiting for {delay} secs before continuing thrashing'.format( - delay=self.thrash_delay)) - time.sleep(self.thrash_delay) - -@contextlib.contextmanager -def task(ctx, config): - """ - Stress test the monitor by thrashing them while another task/workunit - is running. - - Please refer to MonitorThrasher class for further information on the - available options. - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'mon_thrash task only accepts a dict for configuration' - assert len(_get_mons(ctx)) > 2, \ - 'mon_thrash task requires at least 3 monitors' - log.info('Beginning mon_thrash...') - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - thrash_proc = MonitorThrasher(ctx, - manager, config, - logger=log.getChild('mon_thrasher')) - try: - log.debug('Yielding') - yield - finally: - log.info('joining mon_thrasher') - thrash_proc.do_join() - mons = _get_mons(ctx) - manager.wait_for_mon_quorum_size(len(mons)) diff --git a/tasks/multibench.py b/tasks/multibench.py deleted file mode 100644 index 13b5ffe2cf8..00000000000 --- a/tasks/multibench.py +++ /dev/null @@ -1,57 +0,0 @@ -""" -Multibench testing -""" -import contextlib -import logging -import radosbench -import time -import copy -import gevent - -log = logging.getLogger(__name__) - -@contextlib.contextmanager -def task(ctx, config): - """ - Run multibench - - The config should be as follows: - - multibench: - time: - segments: - radosbench: - - example: - - tasks: - - ceph: - - multibench: - clients: [client.0] - time: 360 - - interactive: - """ - log.info('Beginning multibench...') - assert isinstance(config, dict), \ - "please list clients to run on" - - def run_one(num): - """Run test spawn from gevent""" - start = time.time() - benchcontext = copy.copy(config.get('radosbench')) - iterations = 0 - while time.time() - start < int(config.get('time', 600)): - log.info("Starting iteration %s of segment %s"%(iterations, num)) - benchcontext['pool'] = str(num) + "-" + str(iterations) - with radosbench.task(ctx, benchcontext): - time.sleep() - iterations += 1 - log.info("Starting %s threads"%(str(config.get('segments', 3)),)) - segments = [ - gevent.spawn(run_one, i) - for i in range(0, int(config.get('segments', 3)))] - - try: - yield - finally: - [i.get() for i in segments] diff --git a/tasks/object_source_down.py b/tasks/object_source_down.py deleted file mode 100644 index 17b94490668..00000000000 --- a/tasks/object_source_down.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -Test Object locations going down -""" -import logging -import ceph_manager -from teuthology import misc as teuthology -from util.rados import rados - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Test handling of object location going down - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'lost_unfound task only accepts a dict for configuration' - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - while len(manager.get_osd_status()['up']) < 3: - manager.sleep(10) - manager.wait_for_clean() - - # something that is always there - dummyfile = '/etc/fstab' - - # take 0, 1 out - manager.mark_out_osd(0) - manager.mark_out_osd(1) - manager.wait_for_clean() - - # delay recovery, and make the pg log very long (to prevent backfill) - manager.raw_cluster_cmd( - 'tell', 'osd.0', - 'injectargs', - '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' - ) - # delay recovery, and make the pg log very long (to prevent backfill) - manager.raw_cluster_cmd( - 'tell', 'osd.1', - 'injectargs', - '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' - ) - # delay recovery, and make the pg log very long (to prevent backfill) - manager.raw_cluster_cmd( - 'tell', 'osd.2', - 'injectargs', - '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' - ) - # delay recovery, and make the pg log very long (to prevent backfill) - manager.raw_cluster_cmd( - 'tell', 'osd.3', - 'injectargs', - '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' - ) - - # kludge to make sure they get a map - rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile]) - - # create old objects - for f in range(1, 10): - rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) - - manager.mark_out_osd(3) - manager.wait_till_active() - - manager.mark_in_osd(0) - manager.wait_till_active() - - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - - manager.mark_out_osd(2) - manager.wait_till_active() - - # bring up 1 - manager.mark_in_osd(1) - manager.wait_till_active() - - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - log.info("Getting unfound objects") - unfound = manager.get_num_unfound_objects() - assert not unfound - - manager.kill_osd(2) - manager.mark_down_osd(2) - manager.kill_osd(3) - manager.mark_down_osd(3) - - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - log.info("Getting unfound objects") - unfound = manager.get_num_unfound_objects() - assert unfound diff --git a/tasks/omapbench.py b/tasks/omapbench.py deleted file mode 100644 index e026c74dbc0..00000000000 --- a/tasks/omapbench.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -Run omapbench executable within teuthology -""" -import contextlib -import logging - -from teuthology.orchestra import run -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -@contextlib.contextmanager -def task(ctx, config): - """ - Run omapbench - - The config should be as follows:: - - omapbench: - clients: [client list] - threads: - objects: - entries: - keysize: - valsize: - increment: - omaptype: - - example:: - - tasks: - - ceph: - - omapbench: - clients: [client.0] - threads: 30 - objects: 1000 - entries: 10 - keysize: 10 - valsize: 100 - increment: 100 - omaptype: uniform - - interactive: - """ - log.info('Beginning omapbench...') - assert isinstance(config, dict), \ - "please list clients to run on" - omapbench = {} - testdir = teuthology.get_testdir(ctx) - print(str(config.get('increment',-1))) - for role in config.get('clients', ['client.0']): - assert isinstance(role, basestring) - PREFIX = 'client.' - assert role.startswith(PREFIX) - id_ = role[len(PREFIX):] - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - proc = remote.run( - args=[ - "/bin/sh", "-c", - " ".join(['adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage', - 'omapbench', - '--name', role[len(PREFIX):], - '-t', str(config.get('threads', 30)), - '-o', str(config.get('objects', 1000)), - '--entries', str(config.get('entries',10)), - '--keysize', str(config.get('keysize',10)), - '--valsize', str(config.get('valsize',1000)), - '--inc', str(config.get('increment',10)), - '--omaptype', str(config.get('omaptype','uniform')) - ]).format(tdir=testdir), - ], - logger=log.getChild('omapbench.{id}'.format(id=id_)), - stdin=run.PIPE, - wait=False - ) - omapbench[id_] = proc - - try: - yield - finally: - log.info('joining omapbench') - run.wait(omapbench.itervalues()) diff --git a/tasks/osd_backfill.py b/tasks/osd_backfill.py deleted file mode 100644 index f3b59e398cb..00000000000 --- a/tasks/osd_backfill.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -Osd backfill test -""" -import logging -import ceph_manager -import time -from teuthology import misc as teuthology - - -log = logging.getLogger(__name__) - - -def rados_start(ctx, remote, cmd): - """ - Run a remote rados command (currently used to only write data) - """ - log.info("rados %s" % ' '.join(cmd)) - testdir = teuthology.get_testdir(ctx) - pre = [ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'rados', - ]; - pre.extend(cmd) - proc = remote.run( - args=pre, - wait=False, - ) - return proc - -def task(ctx, config): - """ - Test backfill - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'thrashosds task only accepts a dict for configuration' - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') - log.info('num_osds is %s' % num_osds) - assert num_osds == 3 - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - while len(manager.get_osd_status()['up']) < 3: - manager.sleep(10) - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_for_clean() - - # write some data - p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096', - '--no-cleanup']) - err = p.wait() - log.info('err is %d' % err) - - # mark osd.0 out to trigger a rebalance/backfill - manager.mark_out_osd(0) - - # also mark it down to it won't be included in pg_temps - manager.kill_osd(0) - manager.mark_down_osd(0) - - # wait for everything to peer and be happy... - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_for_recovery() - - # write some new data - p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '30', 'write', '-b', '4096', - '--no-cleanup']) - - time.sleep(15) - - # blackhole + restart osd.1 - # this triggers a divergent backfill target - manager.blackhole_kill_osd(1) - time.sleep(2) - manager.revive_osd(1) - - # wait for our writes to complete + succeed - err = p.wait() - log.info('err is %d' % err) - - # cluster must recover - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_for_recovery() - - # re-add osd.0 - manager.revive_osd(0) - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_for_clean() - - diff --git a/tasks/osd_failsafe_enospc.py b/tasks/osd_failsafe_enospc.py deleted file mode 100644 index 2af94cd58e4..00000000000 --- a/tasks/osd_failsafe_enospc.py +++ /dev/null @@ -1,211 +0,0 @@ -""" -Handle osdfailsafe configuration settings (nearfull ratio and full ratio) -""" -from cStringIO import StringIO -import logging -import time - -from teuthology.orchestra import run -from util.rados import rados -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio - configuration settings - - In order for test to pass must use log-whitelist as follows - - tasks: - - chef: - - install: - - ceph: - log-whitelist: ['OSD near full', 'OSD full dropping all updates'] - - osd_failsafe_enospc: - - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'osd_failsafe_enospc task only accepts a dict for configuration' - - # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding - sleep_time = 50 - - # something that is always there - dummyfile = '/etc/fstab' - dummyfile2 = '/etc/resolv.conf' - - # create 1 pg pool with 1 rep which can only be on osd.0 - osds = ctx.manager.get_osd_dump() - for osd in osds: - if osd['osd'] != 0: - ctx.manager.mark_out_osd(osd['osd']) - - log.info('creating pool foo') - ctx.manager.create_pool("foo") - ctx.manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1') - - # State NONE -> NEAR - log.info('1. Verify warning messages when exceeding nearfull_ratio') - - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - proc = mon.run( - args=[ - 'daemon-helper', - 'kill', - 'ceph', '-w' - ], - stdin=run.PIPE, - stdout=StringIO(), - wait=False, - ) - - ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001') - - time.sleep(sleep_time) - proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w - proc.wait() - - lines = proc.stdout.getvalue().split('\n') - - count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) - assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count - count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) - assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count - - # State NEAR -> FULL - log.info('2. Verify error messages when exceeding full_ratio') - - proc = mon.run( - args=[ - 'daemon-helper', - 'kill', - 'ceph', '-w' - ], - stdin=run.PIPE, - stdout=StringIO(), - wait=False, - ) - - ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') - - time.sleep(sleep_time) - proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w - proc.wait() - - lines = proc.stdout.getvalue().split('\n') - - count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) - assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count - - log.info('3. Verify write failure when exceeding full_ratio') - - # Write data should fail - ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile]) - assert ret != 0, 'Expected write failure but it succeeded with exit status 0' - - # Put back default - ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') - time.sleep(10) - - # State FULL -> NEAR - log.info('4. Verify write success when NOT exceeding full_ratio') - - # Write should succeed - ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2]) - assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret - - log.info('5. Verify warning messages again when exceeding nearfull_ratio') - - proc = mon.run( - args=[ - 'daemon-helper', - 'kill', - 'ceph', '-w' - ], - stdin=run.PIPE, - stdout=StringIO(), - wait=False, - ) - - time.sleep(sleep_time) - proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w - proc.wait() - - lines = proc.stdout.getvalue().split('\n') - - count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) - assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count - count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) - assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count - - ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90') - time.sleep(10) - - # State NONE -> FULL - log.info('6. Verify error messages again when exceeding full_ratio') - - proc = mon.run( - args=[ - 'daemon-helper', - 'kill', - 'ceph', '-w' - ], - stdin=run.PIPE, - stdout=StringIO(), - wait=False, - ) - - ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') - - time.sleep(sleep_time) - proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w - proc.wait() - - lines = proc.stdout.getvalue().split('\n') - - count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) - assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count - count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) - assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count - - # State FULL -> NONE - log.info('7. Verify no messages settings back to default') - - ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') - time.sleep(10) - - proc = mon.run( - args=[ - 'daemon-helper', - 'kill', - 'ceph', '-w' - ], - stdin=run.PIPE, - stdout=StringIO(), - wait=False, - ) - - time.sleep(sleep_time) - proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w - proc.wait() - - lines = proc.stdout.getvalue().split('\n') - - count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) - assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count - count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) - assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count - - log.info('Test Passed') - - # Bring all OSDs back in - ctx.manager.remove_pool("foo") - for osd in osds: - if osd['osd'] != 0: - ctx.manager.mark_in_osd(osd['osd']) diff --git a/tasks/osd_recovery.py b/tasks/osd_recovery.py deleted file mode 100644 index 450384aa2fe..00000000000 --- a/tasks/osd_recovery.py +++ /dev/null @@ -1,208 +0,0 @@ -""" -osd recovery -""" -import logging -import ceph_manager -import time -from teuthology import misc as teuthology - - -log = logging.getLogger(__name__) - - -def rados_start(testdir, remote, cmd): - """ - Run a remote rados command (currently used to only write data) - """ - log.info("rados %s" % ' '.join(cmd)) - pre = [ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'rados', - ]; - pre.extend(cmd) - proc = remote.run( - args=pre, - wait=False, - ) - return proc - -def task(ctx, config): - """ - Test (non-backfill) recovery - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'task only accepts a dict for configuration' - testdir = teuthology.get_testdir(ctx) - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') - log.info('num_osds is %s' % num_osds) - assert num_osds == 3 - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - while len(manager.get_osd_status()['up']) < 3: - manager.sleep(10) - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_for_clean() - - # test some osdmap flags - manager.raw_cluster_cmd('osd', 'set', 'noin') - manager.raw_cluster_cmd('osd', 'set', 'noout') - manager.raw_cluster_cmd('osd', 'set', 'noup') - manager.raw_cluster_cmd('osd', 'set', 'nodown') - manager.raw_cluster_cmd('osd', 'unset', 'noin') - manager.raw_cluster_cmd('osd', 'unset', 'noout') - manager.raw_cluster_cmd('osd', 'unset', 'noup') - manager.raw_cluster_cmd('osd', 'unset', 'nodown') - - # write some new data - p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '60', 'write', '-b', '4096', - '--no-cleanup']) - - time.sleep(15) - - # trigger a divergent target: - # blackhole + restart osd.1 (shorter log) - manager.blackhole_kill_osd(1) - # kill osd.2 (longer log... we'll make it divergent below) - manager.kill_osd(2) - time.sleep(2) - manager.revive_osd(1) - - # wait for our writes to complete + succeed - err = p.wait() - log.info('err is %d' % err) - - # cluster must repeer - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.wait_for_active_or_down() - - # write some more (make sure osd.2 really is divergent) - p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096']) - p.wait() - - # revive divergent osd - manager.revive_osd(2) - - while len(manager.get_osd_status()['up']) < 3: - log.info('waiting a bit...') - time.sleep(2) - log.info('3 are up!') - - # cluster must recover - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_for_clean() - - -def test_incomplete_pgs(ctx, config): - """ - Test handling of incomplete pgs. Requires 4 osds. - """ - testdir = teuthology.get_testdir(ctx) - if config is None: - config = {} - assert isinstance(config, dict), \ - 'task only accepts a dict for configuration' - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') - log.info('num_osds is %s' % num_osds) - assert num_osds == 4 - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - while len(manager.get_osd_status()['up']) < 4: - time.sleep(10) - - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') - manager.wait_for_clean() - - log.info('Testing incomplete pgs...') - - for i in range(4): - manager.set_config( - i, - osd_recovery_delay_start=1000) - - # move data off of osd.0, osd.1 - manager.raw_cluster_cmd('osd', 'out', '0', '1') - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') - manager.wait_for_clean() - - # lots of objects in rbd (no pg log, will backfill) - p = rados_start(testdir, mon, - ['-p', 'rbd', 'bench', '60', 'write', '-b', '1', - '--no-cleanup']) - p.wait() - - # few objects in rbd pool (with pg log, normal recovery) - for f in range(1, 20): - p = rados_start(testdir, mon, ['-p', 'rbd', 'put', - 'foo.%d' % f, '/etc/passwd']) - p.wait() - - # move it back - manager.raw_cluster_cmd('osd', 'in', '0', '1') - manager.raw_cluster_cmd('osd', 'out', '2', '3') - time.sleep(10) - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') - time.sleep(10) - manager.wait_for_active() - - assert not manager.is_clean() - assert not manager.is_recovered() - - # kill 2 + 3 - log.info('stopping 2,3') - manager.kill_osd(2) - manager.kill_osd(3) - log.info('...') - manager.raw_cluster_cmd('osd', 'down', '2', '3') - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.wait_for_active_or_down() - - assert manager.get_num_down() > 0 - - # revive 2 + 3 - manager.revive_osd(2) - manager.revive_osd(3) - while len(manager.get_osd_status()['up']) < 4: - log.info('waiting a bit...') - time.sleep(2) - log.info('all are up!') - - for i in range(4): - manager.kick_recovery_wq(i) - - # cluster must recover - manager.wait_for_clean() diff --git a/tasks/peer.py b/tasks/peer.py deleted file mode 100644 index f1789cf12d6..00000000000 --- a/tasks/peer.py +++ /dev/null @@ -1,96 +0,0 @@ -""" -Peer test (Single test, not much configurable here) -""" -import logging -import json - -import ceph_manager -from teuthology import misc as teuthology -from util.rados import rados - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Test peering. - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'peer task only accepts a dict for configuration' - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - while len(manager.get_osd_status()['up']) < 3: - manager.sleep(10) - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_for_clean() - - for i in range(3): - manager.set_config( - i, - osd_recovery_delay_start=120) - - # take on osd down - manager.kill_osd(2) - manager.mark_down_osd(2) - - # kludge to make sure they get a map - rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-']) - - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.wait_for_recovery() - - # kill another and revive 2, so that some pgs can't peer. - manager.kill_osd(1) - manager.mark_down_osd(1) - manager.revive_osd(2) - manager.wait_till_osd_is_up(2) - - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - - manager.wait_for_active_or_down() - - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - - # look for down pgs - num_down_pgs = 0 - pgs = manager.get_pg_stats() - for pg in pgs: - out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query') - log.debug("out string %s",out) - j = json.loads(out) - log.info("pg is %s, query json is %s", pg, j) - - if pg['state'].count('down'): - num_down_pgs += 1 - # verify that it is blocked on osd.1 - rs = j['recovery_state'] - assert len(rs) > 0 - assert rs[0]['name'] == 'Started/Primary/Peering/GetInfo' - assert rs[1]['name'] == 'Started/Primary/Peering' - assert rs[1]['blocked'] - assert rs[1]['down_osds_we_would_probe'] == [1] - assert len(rs[1]['peering_blocked_by']) == 1 - assert rs[1]['peering_blocked_by'][0]['osd'] == 1 - - assert num_down_pgs > 0 - - # bring it all back - manager.revive_osd(1) - manager.wait_till_osd_is_up(1) - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_for_clean() diff --git a/tasks/peering_speed_test.py b/tasks/peering_speed_test.py deleted file mode 100644 index 602a7da3066..00000000000 --- a/tasks/peering_speed_test.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -Remotely run peering tests. -""" -import logging -import time - -log = logging.getLogger(__name__) - -from args import argify - -POOLNAME = "POOLNAME" -ARGS = [ - ('num_pgs', 'number of pgs to create', 256, int), - ('max_time', 'seconds to complete peering', 0, int), - ('runs', 'trials to run', 10, int), - ('num_objects', 'objects to create', 256 * 1024, int), - ('object_size', 'size in bytes for objects', 64, int), - ('creation_time_limit', 'time limit for pool population', 60*60, int), - ('create_threads', 'concurrent writes for create', 256, int) - ] - -def setup(ctx, config): - """ - Setup peering test on remotes. - """ - ctx.manager.clear_pools() - ctx.manager.create_pool(POOLNAME, config.num_pgs) - log.info("populating pool") - ctx.manager.rados_write_objects( - POOLNAME, - config.num_objects, - config.object_size, - config.creation_time_limit, - config.create_threads) - log.info("done populating pool") - -def do_run(ctx, config): - """ - Perform the test. - """ - start = time.time() - # mark in osd - ctx.manager.mark_in_osd(0) - log.info("writing out objects") - ctx.manager.rados_write_objects( - POOLNAME, - config.num_pgs, # write 1 object per pg or so - 1, - config.creation_time_limit, - config.num_pgs, # lots of concurrency - cleanup = True) - peering_end = time.time() - - log.info("peering done, waiting on recovery") - ctx.manager.wait_for_clean() - - log.info("recovery done") - recovery_end = time.time() - if config.max_time: - assert(peering_end - start < config.max_time) - ctx.manager.mark_out_osd(0) - ctx.manager.wait_for_clean() - return { - 'time_to_active': peering_end - start, - 'time_to_clean': recovery_end - start - } - -@argify("peering_speed_test", ARGS) -def task(ctx, config): - """ - Peering speed test - """ - setup(ctx, config) - ctx.manager.mark_out_osd(0) - ctx.manager.wait_for_clean() - ret = [] - for i in range(config.runs): - log.info("Run {i}".format(i = i)) - ret.append(do_run(ctx, config)) - - ctx.manager.mark_in_osd(0) - ctx.summary['recovery_times'] = { - 'runs': ret - } diff --git a/tasks/populate_rbd_pool.py b/tasks/populate_rbd_pool.py deleted file mode 100644 index 059a33fc112..00000000000 --- a/tasks/populate_rbd_pool.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -Populate rbd pools -""" -import contextlib -import logging - -log = logging.getLogger(__name__) - -@contextlib.contextmanager -def task(ctx, config): - """ - Populate pools with prefix with - rbd images at snaps - - The config could be as follows:: - - populate_rbd_pool: - client: - pool_prefix: foo - num_pools: 5 - num_images: 10 - num_snaps: 3 - image_size: 10737418240 - """ - if config is None: - config = {} - client = config.get("client", "client.0") - pool_prefix = config.get("pool_prefix", "foo") - num_pools = config.get("num_pools", 2) - num_images = config.get("num_images", 20) - num_snaps = config.get("num_snaps", 4) - image_size = config.get("image_size", 100) - write_size = config.get("write_size", 1024*1024) - write_threads = config.get("write_threads", 10) - write_total_per_snap = config.get("write_total_per_snap", 1024*1024*30) - - (remote,) = ctx.cluster.only(client).remotes.iterkeys() - - for poolid in range(num_pools): - poolname = "%s-%s" % (pool_prefix, str(poolid)) - log.info("Creating pool %s" % (poolname,)) - ctx.manager.create_pool(poolname) - for imageid in range(num_images): - imagename = "rbd-%s" % (str(imageid),) - log.info("Creating imagename %s" % (imagename,)) - remote.run( - args = [ - "rbd", - "create", - imagename, - "--image-format", "1", - "--size", str(image_size), - "--pool", str(poolname)]) - def bench_run(): - remote.run( - args = [ - "rbd", - "bench-write", - imagename, - "--pool", poolname, - "--io-size", str(write_size), - "--io-threads", str(write_threads), - "--io-total", str(write_total_per_snap), - "--io-pattern", "rand"]) - log.info("imagename %s first bench" % (imagename,)) - bench_run() - for snapid in range(num_snaps): - snapname = "snap-%s" % (str(snapid),) - log.info("imagename %s creating snap %s" % (imagename, snapname)) - remote.run( - args = [ - "rbd", "snap", "create", - "--pool", poolname, - "--snap", snapname, - imagename - ]) - bench_run() - - try: - yield - finally: - log.info('done') diff --git a/tasks/qemu.py b/tasks/qemu.py deleted file mode 100644 index 44591a12003..00000000000 --- a/tasks/qemu.py +++ /dev/null @@ -1,449 +0,0 @@ -""" -Qemu task -""" -from cStringIO import StringIO - -import contextlib -import logging -import os - -from teuthology import misc as teuthology -from teuthology import contextutil -from tasks import rbd -from teuthology.orchestra import run - -log = logging.getLogger(__name__) - -DEFAULT_NUM_RBD = 1 -DEFAULT_IMAGE_URL = 'http://ceph.com/qa/ubuntu-12.04.qcow2' -DEFAULT_MEM = 4096 # in megabytes - -def create_images(ctx, config, managers): - for client, client_config in config.iteritems(): - num_rbd = client_config.get('num_rbd', 1) - clone = client_config.get('clone', False) - assert num_rbd > 0, 'at least one rbd device must be used' - for i in xrange(num_rbd): - create_config = { - client: { - 'image_name': '{client}.{num}'.format(client=client, num=i), - 'image_format': 2 if clone else 1, - } - } - managers.append( - lambda create_config=create_config: - rbd.create_image(ctx=ctx, config=create_config) - ) - -def create_clones(ctx, config, managers): - for client, client_config in config.iteritems(): - num_rbd = client_config.get('num_rbd', 1) - clone = client_config.get('clone', False) - if clone: - for i in xrange(num_rbd): - create_config = { - client: { - 'image_name': - '{client}.{num}-clone'.format(client=client, num=i), - 'parent_name': - '{client}.{num}'.format(client=client, num=i), - } - } - managers.append( - lambda create_config=create_config: - rbd.clone_image(ctx=ctx, config=create_config) - ) - -@contextlib.contextmanager -def create_dirs(ctx, config): - """ - Handle directory creation and cleanup - """ - testdir = teuthology.get_testdir(ctx) - for client, client_config in config.iteritems(): - assert 'test' in client_config, 'You must specify a test to run' - (remote,) = ctx.cluster.only(client).remotes.keys() - remote.run( - args=[ - 'install', '-d', '-m0755', '--', - '{tdir}/qemu'.format(tdir=testdir), - '{tdir}/archive/qemu'.format(tdir=testdir), - ] - ) - try: - yield - finally: - for client, client_config in config.iteritems(): - assert 'test' in client_config, 'You must specify a test to run' - (remote,) = ctx.cluster.only(client).remotes.keys() - remote.run( - args=[ - 'rmdir', '{tdir}/qemu'.format(tdir=testdir), run.Raw('||'), 'true', - ] - ) - -@contextlib.contextmanager -def generate_iso(ctx, config): - """Execute system commands to generate iso""" - log.info('generating iso...') - testdir = teuthology.get_testdir(ctx) - for client, client_config in config.iteritems(): - assert 'test' in client_config, 'You must specify a test to run' - (remote,) = ctx.cluster.only(client).remotes.keys() - src_dir = os.path.dirname(__file__) - userdata_path = os.path.join(testdir, 'qemu', 'userdata.' + client) - metadata_path = os.path.join(testdir, 'qemu', 'metadata.' + client) - - with file(os.path.join(src_dir, 'userdata_setup.yaml'), 'rb') as f: - test_setup = ''.join(f.readlines()) - # configuring the commands to setup the nfs mount - mnt_dir = "/export/{client}".format(client=client) - test_setup = test_setup.format( - mnt_dir=mnt_dir - ) - - with file(os.path.join(src_dir, 'userdata_teardown.yaml'), 'rb') as f: - test_teardown = ''.join(f.readlines()) - - user_data = test_setup - if client_config.get('type', 'filesystem') == 'filesystem': - for i in xrange(0, client_config.get('num_rbd', DEFAULT_NUM_RBD)): - dev_letter = chr(ord('b') + i) - user_data += """ -- | - #!/bin/bash - mkdir /mnt/test_{dev_letter} - mkfs -t xfs /dev/vd{dev_letter} - mount -t xfs /dev/vd{dev_letter} /mnt/test_{dev_letter} -""".format(dev_letter=dev_letter) - - # this may change later to pass the directories as args to the - # script or something. xfstests needs that. - user_data += """ -- | - #!/bin/bash - test -d /mnt/test_b && cd /mnt/test_b - /mnt/cdrom/test.sh > /mnt/log/test.log 2>&1 && touch /mnt/log/success -""" + test_teardown - - teuthology.write_file(remote, userdata_path, StringIO(user_data)) - - with file(os.path.join(src_dir, 'metadata.yaml'), 'rb') as f: - teuthology.write_file(remote, metadata_path, f) - - test_file = '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client) - remote.run( - args=[ - 'wget', '-nv', '-O', test_file, - client_config['test'], - run.Raw('&&'), - 'chmod', '755', test_file, - ], - ) - remote.run( - args=[ - 'genisoimage', '-quiet', '-input-charset', 'utf-8', - '-volid', 'cidata', '-joliet', '-rock', - '-o', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), - '-graft-points', - 'user-data={userdata}'.format(userdata=userdata_path), - 'meta-data={metadata}'.format(metadata=metadata_path), - 'test.sh={file}'.format(file=test_file), - ], - ) - try: - yield - finally: - for client in config.iterkeys(): - (remote,) = ctx.cluster.only(client).remotes.keys() - remote.run( - args=[ - 'rm', '-f', - '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), - os.path.join(testdir, 'qemu', 'userdata.' + client), - os.path.join(testdir, 'qemu', 'metadata.' + client), - '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client), - ], - ) - -@contextlib.contextmanager -def download_image(ctx, config): - """Downland base image, remove image file when done""" - log.info('downloading base image') - testdir = teuthology.get_testdir(ctx) - for client, client_config in config.iteritems(): - (remote,) = ctx.cluster.only(client).remotes.keys() - base_file = '{tdir}/qemu/base.{client}.qcow2'.format(tdir=testdir, client=client) - remote.run( - args=[ - 'wget', '-nv', '-O', base_file, DEFAULT_IMAGE_URL, - ] - ) - try: - yield - finally: - log.debug('cleaning up base image files') - for client in config.iterkeys(): - base_file = '{tdir}/qemu/base.{client}.qcow2'.format( - tdir=testdir, - client=client, - ) - (remote,) = ctx.cluster.only(client).remotes.keys() - remote.run( - args=[ - 'rm', '-f', base_file, - ], - ) - - -def _setup_nfs_mount(remote, client, mount_dir): - """ - Sets up an nfs mount on the remote that the guest can use to - store logs. This nfs mount is also used to touch a file - at the end of the test to indiciate if the test was successful - or not. - """ - export_dir = "/export/{client}".format(client=client) - log.info("Creating the nfs export directory...") - remote.run(args=[ - 'sudo', 'mkdir', '-p', export_dir, - ]) - log.info("Mounting the test directory...") - remote.run(args=[ - 'sudo', 'mount', '--bind', mount_dir, export_dir, - ]) - log.info("Adding mount to /etc/exports...") - export = "{dir} *(rw,no_root_squash,no_subtree_check,insecure)".format( - dir=export_dir - ) - remote.run(args=[ - 'echo', export, run.Raw("|"), - 'sudo', 'tee', '-a', "/etc/exports", - ]) - log.info("Restarting NFS...") - if remote.os.package_type == "deb": - remote.run(args=['sudo', 'service', 'nfs-kernel-server', 'restart']) - else: - remote.run(args=['sudo', 'systemctl', 'restart', 'nfs']) - - -def _teardown_nfs_mount(remote, client): - """ - Tears down the nfs mount on the remote used for logging and reporting the - status of the tests being ran in the guest. - """ - log.info("Tearing down the nfs mount for {remote}".format(remote=remote)) - export_dir = "/export/{client}".format(client=client) - log.info("Stopping NFS...") - if remote.os.package_type == "deb": - remote.run(args=[ - 'sudo', 'service', 'nfs-kernel-server', 'stop' - ]) - else: - remote.run(args=[ - 'sudo', 'systemctl', 'stop', 'nfs' - ]) - log.info("Unmounting exported directory...") - remote.run(args=[ - 'sudo', 'umount', export_dir - ]) - log.info("Deleting exported directory...") - remote.run(args=[ - 'sudo', 'rm', '-r', '/export' - ]) - log.info("Deleting export from /etc/exports...") - remote.run(args=[ - 'sudo', 'sed', '-i', '$ d', '/etc/exports' - ]) - log.info("Starting NFS...") - if remote.os.package_type == "deb": - remote.run(args=[ - 'sudo', 'service', 'nfs-kernel-server', 'start' - ]) - else: - remote.run(args=[ - 'sudo', 'systemctl', 'start', 'nfs' - ]) - - -@contextlib.contextmanager -def run_qemu(ctx, config): - """Setup kvm environment and start qemu""" - procs = [] - testdir = teuthology.get_testdir(ctx) - for client, client_config in config.iteritems(): - (remote,) = ctx.cluster.only(client).remotes.keys() - log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir, client=client) - remote.run( - args=[ - 'mkdir', log_dir, run.Raw('&&'), - 'sudo', 'modprobe', 'kvm', - ] - ) - - # make an nfs mount to use for logging and to - # allow to test to tell teuthology the tests outcome - _setup_nfs_mount(remote, client, log_dir) - - base_file = '{tdir}/qemu/base.{client}.qcow2'.format( - tdir=testdir, - client=client - ) - qemu_cmd = 'qemu-system-x86_64' - if remote.os.package_type == "rpm": - qemu_cmd = "/usr/libexec/qemu-kvm" - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'daemon-helper', - 'term', - qemu_cmd, '-enable-kvm', '-nographic', - '-m', str(client_config.get('memory', DEFAULT_MEM)), - # base OS device - '-drive', - 'file={base},format=qcow2,if=virtio'.format(base=base_file), - # cd holding metadata for cloud-init - '-cdrom', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), - ] - - cachemode = 'none' - ceph_config = ctx.ceph.conf.get('global', {}) - ceph_config.update(ctx.ceph.conf.get('client', {})) - ceph_config.update(ctx.ceph.conf.get(client, {})) - if ceph_config.get('rbd cache'): - if ceph_config.get('rbd cache max dirty', 1) > 0: - cachemode = 'writeback' - else: - cachemode = 'writethrough' - - clone = client_config.get('clone', False) - for i in xrange(client_config.get('num_rbd', DEFAULT_NUM_RBD)): - suffix = '-clone' if clone else '' - args.extend([ - '-drive', - 'file=rbd:rbd/{img}:id={id},format=raw,if=virtio,cache={cachemode}'.format( - img='{client}.{num}{suffix}'.format(client=client, num=i, - suffix=suffix), - id=client[len('client.'):], - cachemode=cachemode, - ), - ]) - - log.info('starting qemu...') - procs.append( - remote.run( - args=args, - logger=log.getChild(client), - stdin=run.PIPE, - wait=False, - ) - ) - - try: - yield - finally: - log.info('waiting for qemu tests to finish...') - run.wait(procs) - - log.debug('checking that qemu tests succeeded...') - for client in config.iterkeys(): - (remote,) = ctx.cluster.only(client).remotes.keys() - # teardown nfs mount - _teardown_nfs_mount(remote, client) - # check for test status - remote.run( - args=[ - 'test', '-f', - '{tdir}/archive/qemu/{client}/success'.format( - tdir=testdir, - client=client - ), - ], - ) - - -@contextlib.contextmanager -def task(ctx, config): - """ - Run a test inside of QEMU on top of rbd. Only one test - is supported per client. - - For example, you can specify which clients to run on:: - - tasks: - - ceph: - - qemu: - client.0: - test: http://ceph.com/qa/test.sh - client.1: - test: http://ceph.com/qa/test2.sh - - Or use the same settings on all clients: - - tasks: - - ceph: - - qemu: - all: - test: http://ceph.com/qa/test.sh - - For tests that don't need a filesystem, set type to block:: - - tasks: - - ceph: - - qemu: - client.0: - test: http://ceph.com/qa/test.sh - type: block - - The test should be configured to run on /dev/vdb and later - devices. - - If you want to run a test that uses more than one rbd image, - specify how many images to use:: - - tasks: - - ceph: - - qemu: - client.0: - test: http://ceph.com/qa/test.sh - type: block - num_rbd: 2 - - You can set the amount of memory the VM has (default is 1024 MB):: - - tasks: - - ceph: - - qemu: - client.0: - test: http://ceph.com/qa/test.sh - memory: 512 # megabytes - - If you want to run a test against a cloned rbd image, set clone to true:: - - tasks: - - ceph: - - qemu: - client.0: - test: http://ceph.com/qa/test.sh - clone: true - """ - assert isinstance(config, dict), \ - "task qemu only supports a dictionary for configuration" - - config = teuthology.replace_all_with_clients(ctx.cluster, config) - - managers = [] - create_images(ctx=ctx, config=config, managers=managers) - managers.extend([ - lambda: create_dirs(ctx=ctx, config=config), - lambda: generate_iso(ctx=ctx, config=config), - lambda: download_image(ctx=ctx, config=config), - ]) - create_clones(ctx=ctx, config=config, managers=managers) - managers.append( - lambda: run_qemu(ctx=ctx, config=config), - ) - - with contextutil.nested(*managers): - yield diff --git a/tasks/rados.py b/tasks/rados.py deleted file mode 100644 index 720da159630..00000000000 --- a/tasks/rados.py +++ /dev/null @@ -1,196 +0,0 @@ -""" -Rados modle-based integration tests -""" -import contextlib -import logging -import gevent -from teuthology import misc as teuthology - -from teuthology.orchestra import run - -log = logging.getLogger(__name__) - -@contextlib.contextmanager -def task(ctx, config): - """ - Run RadosModel-based integration tests. - - The config should be as follows:: - - rados: - clients: [client list] - ops: - objects: - max_in_flight: - object_size: - min_stride_size: - max_stride_size: - op_weights: - runs: - the pool is remade between runs - ec_pool: use an ec pool - erasure_code_profile: profile to use with the erasure coded pool - pool_snaps: use pool snapshots instead of selfmanaged snapshots - write_fadvise_dontneed: write behavior like with LIBRADOS_OP_FLAG_FADVISE_DONTNEED. - This mean data don't access in the near future. - Let osd backend don't keep data in cache. - - For example:: - - tasks: - - ceph: - - rados: - clients: [client.0] - ops: 1000 - max_seconds: 0 # 0 for no limit - objects: 25 - max_in_flight: 16 - object_size: 4000000 - min_stride_size: 1024 - max_stride_size: 4096 - op_weights: - read: 20 - write: 10 - delete: 2 - snap_create: 3 - rollback: 2 - snap_remove: 0 - ec_pool: create an ec pool, defaults to False - erasure_code_profile: - name: teuthologyprofile - k: 2 - m: 1 - ruleset-failure-domain: osd - pool_snaps: true - write_fadvise_dontneed: true - runs: 10 - - interactive: - - Optionally, you can provide the pool name to run against: - - tasks: - - ceph: - - exec: - client.0: - - ceph osd pool create foo - - rados: - clients: [client.0] - pools: [foo] - ... - - Alternatively, you can provide a pool prefix: - - tasks: - - ceph: - - exec: - client.0: - - ceph osd pool create foo.client.0 - - rados: - clients: [client.0] - pool_prefix: foo - ... - - """ - log.info('Beginning rados...') - assert isinstance(config, dict), \ - "please list clients to run on" - - object_size = int(config.get('object_size', 4000000)) - op_weights = config.get('op_weights', {}) - testdir = teuthology.get_testdir(ctx) - args = [ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'ceph_test_rados'] - if config.get('ec_pool', False): - args.extend(['--ec-pool']) - if config.get('write_fadvise_dontneed', False): - args.extend(['--write-fadvise-dontneed']) - if config.get('pool_snaps', False): - args.extend(['--pool-snaps']) - args.extend([ - '--op', 'read', str(op_weights.get('read', 100)), - '--op', 'write', str(op_weights.get('write', 100)), - '--op', 'delete', str(op_weights.get('delete', 10)), - '--max-ops', str(config.get('ops', 10000)), - '--objects', str(config.get('objects', 500)), - '--max-in-flight', str(config.get('max_in_flight', 16)), - '--size', str(object_size), - '--min-stride-size', str(config.get('min_stride_size', object_size / 10)), - '--max-stride-size', str(config.get('max_stride_size', object_size / 5)), - '--max-seconds', str(config.get('max_seconds', 0)) - ]) - # Parallel of the op_types in test/osd/TestRados.cc - for field in [ - # read handled above - # write handled above - # delete handled above - "snap_create", - "snap_remove", - "rollback", - "setattr", - "rmattr", - "watch", - "copy_from", - "hit_set_list", - "is_dirty", - "undirty", - "cache_flush", - "cache_try_flush", - "cache_evict", - "append", - ]: - if field in op_weights: - args.extend([ - '--op', field, str(op_weights[field]), - ]) - - def thread(): - """Thread spawned by gevent""" - clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] - log.info('clients are %s' % clients) - if config.get('ec_pool', False): - profile = config.get('erasure_code_profile', {}) - profile_name = profile.get('name', 'teuthologyprofile') - ctx.manager.create_erasure_code_profile(profile_name, profile) - else: - profile_name = None - for i in range(int(config.get('runs', '1'))): - log.info("starting run %s out of %s", str(i), config.get('runs', '1')) - tests = {} - existing_pools = config.get('pools', []) - created_pools = [] - for role in config.get('clients', clients): - assert isinstance(role, basestring) - PREFIX = 'client.' - assert role.startswith(PREFIX) - id_ = role[len(PREFIX):] - - pool = config.get('pool', None) - if not pool and existing_pools: - pool = existing_pools.pop() - else: - pool = ctx.manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name) - created_pools.append(pool) - - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - proc = remote.run( - args=["CEPH_CLIENT_ID={id_}".format(id_=id_)] + args + - ["--pool", pool], - logger=log.getChild("rados.{id}".format(id=id_)), - stdin=run.PIPE, - wait=False - ) - tests[id_] = proc - run.wait(tests.itervalues()) - - for pool in created_pools: - ctx.manager.remove_pool(pool) - - running = gevent.spawn(thread) - - try: - yield - finally: - log.info('joining rados') - running.get() diff --git a/tasks/radosbench.py b/tasks/radosbench.py deleted file mode 100644 index 73c54372857..00000000000 --- a/tasks/radosbench.py +++ /dev/null @@ -1,100 +0,0 @@ -""" -Rados benchmarking -""" -import contextlib -import logging - -from teuthology.orchestra import run -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -@contextlib.contextmanager -def task(ctx, config): - """ - Run radosbench - - The config should be as follows: - - radosbench: - clients: [client list] - time: - pool: - size: write size to use - unique_pool: use a unique pool, defaults to False - ec_pool: create an ec pool, defaults to False - create_pool: create pool, defaults to False - erasure_code_profile: - name: teuthologyprofile - k: 2 - m: 1 - ruleset-failure-domain: osd - - example: - - tasks: - - ceph: - - radosbench: - clients: [client.0] - time: 360 - - interactive: - """ - log.info('Beginning radosbench...') - assert isinstance(config, dict), \ - "please list clients to run on" - radosbench = {} - - testdir = teuthology.get_testdir(ctx) - - for role in config.get('clients', ['client.0']): - assert isinstance(role, basestring) - PREFIX = 'client.' - assert role.startswith(PREFIX) - id_ = role[len(PREFIX):] - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - - if config.get('ec_pool', False): - profile = config.get('erasure_code_profile', {}) - profile_name = profile.get('name', 'teuthologyprofile') - ctx.manager.create_erasure_code_profile(profile_name, profile) - else: - profile_name = None - - pool = 'data' - if config.get('create_pool', True): - if config.get('pool'): - pool = config.get('pool') - if pool != 'data': - ctx.manager.create_pool(pool, erasure_code_profile_name=profile_name) - else: - pool = ctx.manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name) - - proc = remote.run( - args=[ - "/bin/sh", "-c", - " ".join(['adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage', - 'rados', - '--no-log-to-stderr', - '--name', role, - '-b', str(config.get('size', 4<<20)), - '-p' , pool, - 'bench', str(config.get('time', 360)), 'write', - ]).format(tdir=testdir), - ], - logger=log.getChild('radosbench.{id}'.format(id=id_)), - stdin=run.PIPE, - wait=False - ) - radosbench[id_] = proc - - try: - yield - finally: - timeout = config.get('time', 360) * 5 + 180 - log.info('joining radosbench (timing out after %ss)', timeout) - run.wait(radosbench.itervalues(), timeout=timeout) - - if pool is not 'data': - ctx.manager.remove_pool(pool) diff --git a/tasks/radosgw_admin.py b/tasks/radosgw_admin.py deleted file mode 100644 index b6baa4cd4e5..00000000000 --- a/tasks/radosgw_admin.py +++ /dev/null @@ -1,1018 +0,0 @@ -""" -Rgw admin testing against a running instance -""" -# The test cases in this file have been annotated for inventory. -# To extract the inventory (in csv format) use the command: -# -# grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //' -# - -import copy -import json -import logging -import time - -from cStringIO import StringIO - -import boto.exception -import boto.s3.connection -import boto.s3.acl - -import httplib2 - -import util.rgw as rgw_utils - -from teuthology import misc as teuthology -from util.rgw import rgwadmin, get_user_summary, get_user_successful_ops - -log = logging.getLogger(__name__) - -def create_presigned_url(conn, method, bucket_name, key_name, expiration): - return conn.generate_url(expires_in=expiration, - method=method, - bucket=bucket_name, - key=key_name, - query_auth=True, - ) - -def send_raw_http_request(conn, method, bucket_name, key_name, follow_redirects = False): - url = create_presigned_url(conn, method, bucket_name, key_name, 3600) - print url - h = httplib2.Http() - h.follow_redirects = follow_redirects - return h.request(url, method) - - -def get_acl(key): - """ - Helper function to get the xml acl from a key, ensuring that the xml - version tag is removed from the acl response - """ - raw_acl = key.get_xml_acl() - - def remove_version(string): - return string.split( - '' - )[-1] - - def remove_newlines(string): - return string.strip('\n') - - return remove_version( - remove_newlines(raw_acl) - ) - - -def task(ctx, config): - """ - Test radosgw-admin functionality against a running rgw instance. - """ - global log - assert config is None or isinstance(config, list) \ - or isinstance(config, dict), \ - "task s3tests only supports a list or dictionary for configuration" - all_clients = ['client.{id}'.format(id=id_) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] - if config is None: - config = all_clients - if isinstance(config, list): - config = dict.fromkeys(config) - clients = config.keys() - - multi_region_run = rgw_utils.multi_region_enabled(ctx) - - client = clients[0]; # default choice, multi-region code may overwrite this - if multi_region_run: - client = rgw_utils.get_master_client(ctx, clients) - - # once the client is chosen, pull the host name and assigned port out of - # the role_endpoints that were assigned by the rgw task - (remote_host, remote_port) = ctx.rgw.role_endpoints[client] - - ## - user1='foo' - user2='fud' - subuser1='foo:foo1' - subuser2='foo:foo2' - display_name1='Foo' - display_name2='Fud' - email='foo@foo.com' - email2='bar@bar.com' - access_key='9te6NH5mcdcq0Tc5i8i1' - secret_key='Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu' - access_key2='p5YnriCv1nAtykxBrupQ' - secret_key2='Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh' - swift_secret1='gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL' - swift_secret2='ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy' - - bucket_name='myfoo' - bucket_name2='mybar' - - # connect to rgw - connection = boto.s3.connection.S3Connection( - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - is_secure=False, - port=remote_port, - host=remote_host, - calling_format=boto.s3.connection.OrdinaryCallingFormat(), - ) - connection2 = boto.s3.connection.S3Connection( - aws_access_key_id=access_key2, - aws_secret_access_key=secret_key2, - is_secure=False, - port=remote_port, - host=remote_host, - calling_format=boto.s3.connection.OrdinaryCallingFormat(), - ) - - # legend (test cases can be easily grep-ed out) - # TESTCASE 'testname','object','method','operation','assertion' - # TESTCASE 'info-nosuch','user','info','non-existent user','fails' - (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1]) - assert err - - # TESTCASE 'create-ok','user','create','w/all valid info','succeeds' - (err, out) = rgwadmin(ctx, client, [ - 'user', 'create', - '--uid', user1, - '--display-name', display_name1, - '--email', email, - '--access-key', access_key, - '--secret', secret_key, - '--max-buckets', '4' - ], - check_status=True) - - # TESTCASE 'duplicate email','user','create','existing user email','fails' - (err, out) = rgwadmin(ctx, client, [ - 'user', 'create', - '--uid', user2, - '--display-name', display_name2, - '--email', email, - ]) - assert err - - # TESTCASE 'info-existing','user','info','existing user','returns correct info' - (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) - assert out['user_id'] == user1 - assert out['email'] == email - assert out['display_name'] == display_name1 - assert len(out['keys']) == 1 - assert out['keys'][0]['access_key'] == access_key - assert out['keys'][0]['secret_key'] == secret_key - assert not out['suspended'] - - # this whole block should only be run if regions have been configured - if multi_region_run: - rgw_utils.radosgw_agent_sync_all(ctx) - # post-sync, validate that user1 exists on the sync destination host - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - dest_client = c_config['dest'] - (err, out) = rgwadmin(ctx, dest_client, ['metadata', 'list', 'user']) - (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user1], check_status=True) - assert out['user_id'] == user1 - assert out['email'] == email - assert out['display_name'] == display_name1 - assert len(out['keys']) == 1 - assert out['keys'][0]['access_key'] == access_key - assert out['keys'][0]['secret_key'] == secret_key - assert not out['suspended'] - - # compare the metadata between different regions, make sure it matches - log.debug('compare the metadata between different regions, make sure it matches') - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - source_client = c_config['src'] - dest_client = c_config['dest'] - (err1, out1) = rgwadmin(ctx, source_client, - ['metadata', 'get', 'user:{uid}'.format(uid=user1)], check_status=True) - (err2, out2) = rgwadmin(ctx, dest_client, - ['metadata', 'get', 'user:{uid}'.format(uid=user1)], check_status=True) - assert out1 == out2 - - # suspend a user on the master, then check the status on the destination - log.debug('suspend a user on the master, then check the status on the destination') - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - source_client = c_config['src'] - dest_client = c_config['dest'] - (err, out) = rgwadmin(ctx, source_client, ['user', 'suspend', '--uid', user1]) - rgw_utils.radosgw_agent_sync_all(ctx) - (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user1], check_status=True) - assert out['suspended'] - - # delete a user on the master, then check that it's gone on the destination - log.debug('delete a user on the master, then check that it\'s gone on the destination') - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - source_client = c_config['src'] - dest_client = c_config['dest'] - (err, out) = rgwadmin(ctx, source_client, ['user', 'rm', '--uid', user1], check_status=True) - rgw_utils.radosgw_agent_sync_all(ctx) - (err, out) = rgwadmin(ctx, source_client, ['user', 'info', '--uid', user1]) - assert out is None - (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user1]) - assert out is None - - # then recreate it so later tests pass - (err, out) = rgwadmin(ctx, client, [ - 'user', 'create', - '--uid', user1, - '--display-name', display_name1, - '--email', email, - '--access-key', access_key, - '--secret', secret_key, - '--max-buckets', '4' - ], - check_status=True) - - # now do the multi-region bucket tests - log.debug('now do the multi-region bucket tests') - - # Create a second user for the following tests - log.debug('Create a second user for the following tests') - (err, out) = rgwadmin(ctx, client, [ - 'user', 'create', - '--uid', user2, - '--display-name', display_name2, - '--email', email2, - '--access-key', access_key2, - '--secret', secret_key2, - '--max-buckets', '4' - ], - check_status=True) - (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user2], check_status=True) - assert out is not None - - # create a bucket and do a sync - log.debug('create a bucket and do a sync') - bucket = connection.create_bucket(bucket_name2) - rgw_utils.radosgw_agent_sync_all(ctx) - - # compare the metadata for the bucket between different regions, make sure it matches - log.debug('compare the metadata for the bucket between different regions, make sure it matches') - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - source_client = c_config['src'] - dest_client = c_config['dest'] - (err1, out1) = rgwadmin(ctx, source_client, - ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], - check_status=True) - (err2, out2) = rgwadmin(ctx, dest_client, - ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], - check_status=True) - assert out1 == out2 - - # get the bucket.instance info and compare that - src_bucket_id = out1['data']['bucket']['bucket_id'] - dest_bucket_id = out2['data']['bucket']['bucket_id'] - (err1, out1) = rgwadmin(ctx, source_client, ['metadata', 'get', - 'bucket.instance:{bucket_name}:{bucket_instance}'.format( - bucket_name=bucket_name2,bucket_instance=src_bucket_id)], - check_status=True) - (err2, out2) = rgwadmin(ctx, dest_client, ['metadata', 'get', - 'bucket.instance:{bucket_name}:{bucket_instance}'.format( - bucket_name=bucket_name2,bucket_instance=dest_bucket_id)], - check_status=True) - del out1['data']['bucket_info']['bucket']['pool'] - del out1['data']['bucket_info']['bucket']['index_pool'] - del out2['data']['bucket_info']['bucket']['pool'] - del out2['data']['bucket_info']['bucket']['index_pool'] - assert out1 == out2 - - same_region = 0 - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - source_client = c_config['src'] - dest_client = c_config['dest'] - - source_region = rgw_utils.region_for_client(ctx, source_client) - dest_region = rgw_utils.region_for_client(ctx, dest_client) - - # 301 is only returned for requests to something in a different region - if source_region == dest_region: - log.debug('301 is only returned for requests to something in a different region') - same_region += 1 - continue - - # Attempt to create a new connection with user1 to the destination RGW - log.debug('Attempt to create a new connection with user1 to the destination RGW') - # and use that to attempt a delete (that should fail) - - (dest_remote_host, dest_remote_port) = ctx.rgw.role_endpoints[dest_client] - connection_dest = boto.s3.connection.S3Connection( - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - is_secure=False, - port=dest_remote_port, - host=dest_remote_host, - calling_format=boto.s3.connection.OrdinaryCallingFormat(), - ) - - # this should fail - r, content = send_raw_http_request(connection_dest, 'DELETE', bucket_name2, '', follow_redirects = False) - assert r.status == 301 - - # now delete the bucket on the source RGW and do another sync - log.debug('now delete the bucket on the source RGW and do another sync') - bucket.delete() - rgw_utils.radosgw_agent_sync_all(ctx) - - if same_region == len(ctx.radosgw_agent.config): - bucket.delete() - rgw_utils.radosgw_agent_sync_all(ctx) - - # make sure that the bucket no longer exists in either region - log.debug('make sure that the bucket no longer exists in either region') - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - source_client = c_config['src'] - dest_client = c_config['dest'] - (err1, out1) = rgwadmin(ctx, source_client, ['metadata', 'get', - 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)]) - (err2, out2) = rgwadmin(ctx, dest_client, ['metadata', 'get', - 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)]) - # Both of the previous calls should have errors due to requesting - # metadata for non-existent buckets - assert err1 - assert err2 - - # create a bucket and then sync it - log.debug('create a bucket and then sync it') - bucket = connection.create_bucket(bucket_name2) - rgw_utils.radosgw_agent_sync_all(ctx) - - # compare the metadata for the bucket between different regions, make sure it matches - log.debug('compare the metadata for the bucket between different regions, make sure it matches') - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - source_client = c_config['src'] - dest_client = c_config['dest'] - (err1, out1) = rgwadmin(ctx, source_client, - ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], - check_status=True) - (err2, out2) = rgwadmin(ctx, dest_client, - ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], - check_status=True) - assert out1 == out2 - - # Now delete the bucket and recreate it with a different user - log.debug('Now delete the bucket and recreate it with a different user') - # within the same window of time and then sync. - bucket.delete() - bucket = connection2.create_bucket(bucket_name2) - rgw_utils.radosgw_agent_sync_all(ctx) - - # compare the metadata for the bucket between different regions, make sure it matches - log.debug('compare the metadata for the bucket between different regions, make sure it matches') - # user2 should own the bucket in both regions - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - source_client = c_config['src'] - dest_client = c_config['dest'] - (err1, out1) = rgwadmin(ctx, source_client, - ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], - check_status=True) - (err2, out2) = rgwadmin(ctx, dest_client, - ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], - check_status=True) - assert out1 == out2 - assert out1['data']['owner'] == user2 - assert out1['data']['owner'] != user1 - - # now we're going to use this bucket to test meta-data update propagation - log.debug('now we\'re going to use this bucket to test meta-data update propagation') - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - source_client = c_config['src'] - dest_client = c_config['dest'] - - # get the metadata so we can tweak it - log.debug('get the metadata so we can tweak it') - (err, orig_data) = rgwadmin(ctx, source_client, - ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], - check_status=True) - - # manually edit mtime for this bucket to be 300 seconds in the past - log.debug('manually edit mtime for this bucket to be 300 seconds in the past') - new_data = copy.deepcopy(orig_data) - new_data['mtime'] = orig_data['mtime'] - 300 - assert new_data != orig_data - (err, out) = rgwadmin(ctx, source_client, - ['metadata', 'put', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], - stdin=StringIO(json.dumps(new_data)), - check_status=True) - - # get the metadata and make sure that the 'put' worked - log.debug('get the metadata and make sure that the \'put\' worked') - (err, out) = rgwadmin(ctx, source_client, - ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], - check_status=True) - assert out == new_data - - # sync to propagate the new metadata - log.debug('sync to propagate the new metadata') - rgw_utils.radosgw_agent_sync_all(ctx) - - # get the metadata from the dest and compare it to what we just set - log.debug('get the metadata from the dest and compare it to what we just set') - # and what the source region has. - (err1, out1) = rgwadmin(ctx, source_client, - ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], - check_status=True) - (err2, out2) = rgwadmin(ctx, dest_client, - ['metadata', 'get', 'bucket:{bucket_name}'.format(bucket_name=bucket_name2)], - check_status=True) - # yeah for the transitive property - assert out1 == out2 - assert out1 == new_data - - # now we delete the bucket - log.debug('now we delete the bucket') - bucket.delete() - - log.debug('sync to propagate the deleted bucket') - rgw_utils.radosgw_agent_sync_all(ctx) - - # Delete user2 as later tests do not expect it to exist. - # Verify that it is gone on both regions - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - source_client = c_config['src'] - dest_client = c_config['dest'] - (err, out) = rgwadmin(ctx, source_client, - ['user', 'rm', '--uid', user2], check_status=True) - rgw_utils.radosgw_agent_sync_all(ctx) - # The two 'user info' calls should fail and not return any data - # since we just deleted this user. - (err, out) = rgwadmin(ctx, source_client, ['user', 'info', '--uid', user2]) - assert out is None - (err, out) = rgwadmin(ctx, dest_client, ['user', 'info', '--uid', user2]) - assert out is None - - # Test data sync - - # First create a bucket for data sync test purpose - bucket = connection.create_bucket(bucket_name + 'data') - - # Create a tiny file and check if in sync - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - if c_config.get('metadata-only'): - continue - - for full in (True, False): - source_client = c_config['src'] - dest_client = c_config['dest'] - k = boto.s3.key.Key(bucket) - k.key = 'tiny_file' - k.set_contents_from_string("123456789") - safety_window = rgw_utils.radosgw_data_log_window(ctx, source_client) - time.sleep(safety_window) - rgw_utils.radosgw_agent_sync_all(ctx, data=True, full=full) - (dest_host, dest_port) = ctx.rgw.role_endpoints[dest_client] - dest_connection = boto.s3.connection.S3Connection( - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - is_secure=False, - port=dest_port, - host=dest_host, - calling_format=boto.s3.connection.OrdinaryCallingFormat(), - ) - dest_k = dest_connection.get_bucket(bucket_name + 'data').get_key('tiny_file') - assert k.get_contents_as_string() == dest_k.get_contents_as_string() - - # check that deleting it removes it from the dest zone - k.delete() - time.sleep(safety_window) - # full sync doesn't handle deleted objects yet - rgw_utils.radosgw_agent_sync_all(ctx, data=True, full=False) - - dest_bucket = dest_connection.get_bucket(bucket_name + 'data') - dest_k = dest_bucket.get_key('tiny_file') - assert dest_k == None, 'object not deleted from destination zone' - - # finally we delete the bucket - bucket.delete() - - bucket = connection.create_bucket(bucket_name + 'data2') - for agent_client, c_config in ctx.radosgw_agent.config.iteritems(): - if c_config.get('metadata-only'): - continue - - for full in (True, False): - source_client = c_config['src'] - dest_client = c_config['dest'] - (dest_host, dest_port) = ctx.rgw.role_endpoints[dest_client] - dest_connection = boto.s3.connection.S3Connection( - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - is_secure=False, - port=dest_port, - host=dest_host, - calling_format=boto.s3.connection.OrdinaryCallingFormat(), - ) - for i in range(20): - k = boto.s3.key.Key(bucket) - k.key = 'tiny_file_' + str(i) - k.set_contents_from_string(str(i) * 100) - - safety_window = rgw_utils.radosgw_data_log_window(ctx, source_client) - time.sleep(safety_window) - rgw_utils.radosgw_agent_sync_all(ctx, data=True, full=full) - - for i in range(20): - dest_k = dest_connection.get_bucket(bucket_name + 'data2').get_key('tiny_file_' + str(i)) - assert (str(i) * 100) == dest_k.get_contents_as_string() - k = boto.s3.key.Key(bucket) - k.key = 'tiny_file_' + str(i) - k.delete() - - # check that deleting removes the objects from the dest zone - time.sleep(safety_window) - # full sync doesn't delete deleted objects yet - rgw_utils.radosgw_agent_sync_all(ctx, data=True, full=False) - - for i in range(20): - dest_bucket = dest_connection.get_bucket(bucket_name + 'data2') - dest_k = dest_bucket.get_key('tiny_file_' + str(i)) - assert dest_k == None, 'object %d not deleted from destination zone' % i - bucket.delete() - - # end of 'if multi_region_run:' - - # TESTCASE 'suspend-ok','user','suspend','active user','succeeds' - (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1], - check_status=True) - - # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory' - (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) - assert out['suspended'] - - # TESTCASE 're-enable','user','enable','suspended user','succeeds' - (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1], check_status=True) - - # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended' - (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) - assert not out['suspended'] - - # TESTCASE 'add-keys','key','create','w/valid info','succeeds' - (err, out) = rgwadmin(ctx, client, [ - 'key', 'create', '--uid', user1, - '--access-key', access_key2, '--secret', secret_key2, - ], check_status=True) - - # TESTCASE 'info-new-key','user','info','after key addition','returns all keys' - (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], - check_status=True) - assert len(out['keys']) == 2 - assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2 - assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2 - - # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed' - (err, out) = rgwadmin(ctx, client, [ - 'key', 'rm', '--uid', user1, - '--access-key', access_key2, - ], check_status=True) - assert len(out['keys']) == 1 - assert out['keys'][0]['access_key'] == access_key - assert out['keys'][0]['secret_key'] == secret_key - - # TESTCASE 'add-swift-key','key','create','swift key','succeeds' - subuser_access = 'full' - subuser_perm = 'full-control' - - (err, out) = rgwadmin(ctx, client, [ - 'subuser', 'create', '--subuser', subuser1, - '--access', subuser_access - ], check_status=True) - - # TESTCASE 'add-swift-key','key','create','swift key','succeeds' - (err, out) = rgwadmin(ctx, client, [ - 'subuser', 'modify', '--subuser', subuser1, - '--secret', swift_secret1, - '--key-type', 'swift', - ], check_status=True) - - # TESTCASE 'subuser-perm-mask', 'subuser', 'info', 'test subuser perm mask durability', 'succeeds' - (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1]) - - assert out['subusers'][0]['permissions'] == subuser_perm - - # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys' - (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) - assert len(out['swift_keys']) == 1 - assert out['swift_keys'][0]['user'] == subuser1 - assert out['swift_keys'][0]['secret_key'] == swift_secret1 - - # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds' - (err, out) = rgwadmin(ctx, client, [ - 'subuser', 'create', '--subuser', subuser2, - '--secret', swift_secret2, - '--key-type', 'swift', - ], check_status=True) - - # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys' - (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) - assert len(out['swift_keys']) == 2 - assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2 - assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2 - - # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed' - (err, out) = rgwadmin(ctx, client, [ - 'key', 'rm', '--subuser', subuser1, - '--key-type', 'swift', - ], check_status=True) - assert len(out['swift_keys']) == 1 - - # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed' - (err, out) = rgwadmin(ctx, client, [ - 'subuser', 'rm', '--subuser', subuser1, - ], check_status=True) - assert len(out['subusers']) == 1 - - # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed' - (err, out) = rgwadmin(ctx, client, [ - 'subuser', 'rm', '--subuser', subuser2, - '--key-type', 'swift', '--purge-keys', - ], check_status=True) - assert len(out['swift_keys']) == 0 - assert len(out['subusers']) == 0 - - # TESTCASE 'bucket-stats','bucket','stats','no session/buckets','succeeds, empty list' - (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1], - check_status=True) - assert len(out) == 0 - - if multi_region_run: - rgw_utils.radosgw_agent_sync_all(ctx) - - # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list' - (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True) - assert len(out) == 0 - - # create a first bucket - bucket = connection.create_bucket(bucket_name) - - # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list' - (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True) - assert len(out) == 1 - assert out[0] == bucket_name - - # TESTCASE 'bucket-list-all','bucket','list','all buckets','succeeds, expected list' - (err, out) = rgwadmin(ctx, client, ['bucket', 'list'], check_status=True) - assert len(out) >= 1 - assert bucket_name in out; - - # TESTCASE 'max-bucket-limit,'bucket','create','4 buckets','5th bucket fails due to max buckets == 4' - bucket2 = connection.create_bucket(bucket_name + '2') - bucket3 = connection.create_bucket(bucket_name + '3') - bucket4 = connection.create_bucket(bucket_name + '4') - # the 5th should fail. - failed = False - try: - connection.create_bucket(bucket_name + '5') - except Exception: - failed = True - assert failed - - # delete the buckets - bucket2.delete() - bucket3.delete() - bucket4.delete() - - # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list' - (err, out) = rgwadmin(ctx, client, [ - 'bucket', 'stats', '--bucket', bucket_name], check_status=True) - assert out['owner'] == user1 - bucket_id = out['id'] - - # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID' - (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1], check_status=True) - assert len(out) == 1 - assert out[0]['id'] == bucket_id # does it return the same ID twice in a row? - - # use some space - key = boto.s3.key.Key(bucket) - key.set_contents_from_string('one') - - # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object' - (err, out) = rgwadmin(ctx, client, [ - 'bucket', 'stats', '--bucket', bucket_name], check_status=True) - assert out['id'] == bucket_id - assert out['usage']['rgw.main']['num_objects'] == 1 - assert out['usage']['rgw.main']['size_kb'] > 0 - - # reclaim it - key.delete() - - # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error' - (err, out) = rgwadmin(ctx, client, - ['bucket', 'unlink', '--uid', user1, '--bucket', bucket_name], - check_status=True) - - # create a second user to link the bucket to - (err, out) = rgwadmin(ctx, client, [ - 'user', 'create', - '--uid', user2, - '--display-name', display_name2, - '--access-key', access_key2, - '--secret', secret_key2, - '--max-buckets', '1', - ], - check_status=True) - - # try creating an object with the first user before the bucket is relinked - denied = False - key = boto.s3.key.Key(bucket) - - try: - key.set_contents_from_string('two') - except boto.exception.S3ResponseError: - denied = True - - assert not denied - - # delete the object - key.delete() - - # link the bucket to another user - (err, out) = rgwadmin(ctx, client, ['metadata', 'get', 'bucket:{n}'.format(n=bucket_name)], - check_status=True) - - bucket_data = out['data'] - assert bucket_data['bucket']['name'] == bucket_name - - bucket_id = bucket_data['bucket']['bucket_id'] - - # link the bucket to another user - (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--uid', user2, '--bucket', bucket_name, '--bucket-id', bucket_id], - check_status=True) - - # try to remove user, should fail (has a linked bucket) - (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2]) - assert err - - # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'succeeds, bucket unlinked' - (err, out) = rgwadmin(ctx, client, ['bucket', 'unlink', '--uid', user2, '--bucket', bucket_name], - check_status=True) - - # relink the bucket to the first user and delete the second user - (err, out) = rgwadmin(ctx, client, - ['bucket', 'link', '--uid', user1, '--bucket', bucket_name, '--bucket-id', bucket_id], - check_status=True) - - (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2], - check_status=True) - - # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed' - - # upload an object - object_name = 'four' - key = boto.s3.key.Key(bucket, object_name) - key.set_contents_from_string(object_name) - - # now delete it - (err, out) = rgwadmin(ctx, client, - ['object', 'rm', '--bucket', bucket_name, '--object', object_name], - check_status=True) - - # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects' - (err, out) = rgwadmin(ctx, client, [ - 'bucket', 'stats', '--bucket', bucket_name], - check_status=True) - assert out['id'] == bucket_id - assert out['usage']['rgw.main']['num_objects'] == 0 - - # list log objects - # TESTCASE 'log-list','log','list','after activity','succeeds, lists one no objects' - (err, out) = rgwadmin(ctx, client, ['log', 'list'], check_status=True) - assert len(out) > 0 - - for obj in out: - # TESTCASE 'log-show','log','show','after activity','returns expected info' - if obj[:4] == 'meta' or obj[:4] == 'data': - continue - - (err, rgwlog) = rgwadmin(ctx, client, ['log', 'show', '--object', obj], - check_status=True) - assert len(rgwlog) > 0 - - # exempt bucket_name2 from checking as it was only used for multi-region tests - assert rgwlog['bucket'].find(bucket_name) == 0 or rgwlog['bucket'].find(bucket_name2) == 0 - assert rgwlog['bucket'] != bucket_name or rgwlog['bucket_id'] == bucket_id - assert rgwlog['bucket_owner'] == user1 or rgwlog['bucket'] == bucket_name + '5' or rgwlog['bucket'] == bucket_name2 - for entry in rgwlog['log_entries']: - log.debug('checking log entry: ', entry) - assert entry['bucket'] == rgwlog['bucket'] - possible_buckets = [bucket_name + '5', bucket_name2] - user = entry['user'] - assert user == user1 or user.endswith('system-user') or \ - rgwlog['bucket'] in possible_buckets - - # TESTCASE 'log-rm','log','rm','delete log objects','succeeds' - (err, out) = rgwadmin(ctx, client, ['log', 'rm', '--object', obj], - check_status=True) - - # TODO: show log by bucket+date - - # need to wait for all usage data to get flushed, should take up to 30 seconds - timestamp = time.time() - while time.time() - timestamp <= (20 * 60): # wait up to 20 minutes - (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--categories', 'delete_obj']) # last operation we did is delete obj, wait for it to flush - if get_user_successful_ops(out, user1) > 0: - break - time.sleep(1) - - assert time.time() - timestamp <= (20 * 60) - - # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds' - (err, out) = rgwadmin(ctx, client, ['usage', 'show'], check_status=True) - assert len(out['entries']) > 0 - assert len(out['summary']) > 0 - - user_summary = get_user_summary(out, user1) - - total = user_summary['total'] - assert total['successful_ops'] > 0 - - # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds' - (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1], - check_status=True) - assert len(out['entries']) > 0 - assert len(out['summary']) > 0 - user_summary = out['summary'][0] - for entry in user_summary['categories']: - assert entry['successful_ops'] > 0 - assert user_summary['user'] == user1 - - # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds' - test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket'] - for cat in test_categories: - (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1, '--categories', cat], - check_status=True) - assert len(out['summary']) > 0 - user_summary = out['summary'][0] - assert user_summary['user'] == user1 - assert len(user_summary['categories']) == 1 - entry = user_summary['categories'][0] - assert entry['category'] == cat - assert entry['successful_ops'] > 0 - - # the usage flush interval is 30 seconds, wait that much an then some - # to make sure everything has been flushed - time.sleep(35) - - # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed' - (err, out) = rgwadmin(ctx, client, ['usage', 'trim', '--uid', user1], - check_status=True) - (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1], - check_status=True) - assert len(out['entries']) == 0 - assert len(out['summary']) == 0 - - # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds' - (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1], - check_status=True) - - # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects' - try: - key = boto.s3.key.Key(bucket) - key.set_contents_from_string('five') - except boto.exception.S3ResponseError as e: - assert e.status == 403 - - # TESTCASE 'user-renable2','user','enable','suspended user','succeeds' - (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1], - check_status=True) - - # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects' - key = boto.s3.key.Key(bucket) - key.set_contents_from_string('six') - - # TESTCASE 'gc-list', 'gc', 'list', 'get list of objects ready for garbage collection' - - # create an object large enough to be split into multiple parts - test_string = 'foo'*10000000 - - big_key = boto.s3.key.Key(bucket) - big_key.set_contents_from_string(test_string) - - # now delete the head - big_key.delete() - - # wait a bit to give the garbage collector time to cycle - time.sleep(15) - - (err, out) = rgwadmin(ctx, client, ['gc', 'list']) - - assert len(out) > 0 - - # TESTCASE 'gc-process', 'gc', 'process', 'manually collect garbage' - (err, out) = rgwadmin(ctx, client, ['gc', 'process'], check_status=True) - - #confirm - (err, out) = rgwadmin(ctx, client, ['gc', 'list']) - - assert len(out) == 0 - - # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets' - (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1]) - assert err - - # delete should fail because ``key`` still exists - try: - bucket.delete() - except boto.exception.S3ResponseError as e: - assert e.status == 409 - - key.delete() - bucket.delete() - - # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy' - bucket = connection.create_bucket(bucket_name) - - # create an object - key = boto.s3.key.Key(bucket) - key.set_contents_from_string('seven') - - # should be private already but guarantee it - key.set_acl('private') - - (err, out) = rgwadmin(ctx, client, - ['policy', '--bucket', bucket.name, '--object', key.key], - check_status=True) - - acl = get_acl(key) - - assert acl == out.strip('\n') - - # add another grantee by making the object public read - key.set_acl('public-read') - - (err, out) = rgwadmin(ctx, client, - ['policy', '--bucket', bucket.name, '--object', key.key], - check_status=True) - - acl = get_acl(key) - - assert acl == out.strip('\n') - - # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds' - bucket = connection.create_bucket(bucket_name) - key_name = ['eight', 'nine', 'ten', 'eleven'] - for i in range(4): - key = boto.s3.key.Key(bucket) - key.set_contents_from_string(key_name[i]) - - (err, out) = rgwadmin(ctx, client, - ['bucket', 'rm', '--bucket', bucket_name, '--purge-objects'], - check_status=True) - - # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds' - caps='user=read' - (err, out) = rgwadmin(ctx, client, ['caps', 'add', '--uid', user1, '--caps', caps]) - - assert out['caps'][0]['perm'] == 'read' - - # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds' - (err, out) = rgwadmin(ctx, client, ['caps', 'rm', '--uid', user1, '--caps', caps]) - - assert not out['caps'] - - # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets' - bucket = connection.create_bucket(bucket_name) - key = boto.s3.key.Key(bucket) - - (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1]) - assert err - - # TESTCASE 'rm-user2', 'user', 'rm', 'user with data', 'succeeds' - bucket = connection.create_bucket(bucket_name) - key = boto.s3.key.Key(bucket) - key.set_contents_from_string('twelve') - - (err, out) = rgwadmin(ctx, client, - ['user', 'rm', '--uid', user1, '--purge-data' ], - check_status=True) - - # TESTCASE 'rm-user3','user','rm','deleted user','fails' - (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1]) - assert err - - # TESTCASE 'zone-info', 'zone', 'get', 'get zone info', 'succeeds, has default placement rule' - # - - (err, out) = rgwadmin(ctx, client, ['zone', 'get']) - orig_placement_pools = len(out['placement_pools']) - - # removed this test, it is not correct to assume that zone has default placement, it really - # depends on how we set it up before - # - # assert len(out) > 0 - # assert len(out['placement_pools']) == 1 - - # default_rule = out['placement_pools'][0] - # assert default_rule['key'] == 'default-placement' - - rule={'key': 'new-placement', 'val': {'data_pool': '.rgw.buckets.2', 'index_pool': '.rgw.buckets.index.2'}} - - out['placement_pools'].append(rule) - - (err, out) = rgwadmin(ctx, client, ['zone', 'set'], - stdin=StringIO(json.dumps(out)), - check_status=True) - - (err, out) = rgwadmin(ctx, client, ['zone', 'get']) - assert len(out) > 0 - assert len(out['placement_pools']) == orig_placement_pools + 1 diff --git a/tasks/radosgw_admin_rest.py b/tasks/radosgw_admin_rest.py deleted file mode 100644 index 7bd72d19536..00000000000 --- a/tasks/radosgw_admin_rest.py +++ /dev/null @@ -1,668 +0,0 @@ -""" -Run a series of rgw admin commands through the rest interface. - -The test cases in this file have been annotated for inventory. -To extract the inventory (in csv format) use the command: - - grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //' - -""" -from cStringIO import StringIO -import logging -import json - -import boto.exception -import boto.s3.connection -import boto.s3.acl - -import requests -import time - -from boto.connection import AWSAuthConnection -from teuthology import misc as teuthology -from util.rgw import get_user_summary, get_user_successful_ops - -log = logging.getLogger(__name__) - -def rgwadmin(ctx, client, cmd): - """ - Perform rgw admin command - - :param client: client - :param cmd: command to execute. - :return: command exit status, json result. - """ - log.info('radosgw-admin: %s' % cmd) - testdir = teuthology.get_testdir(ctx) - pre = [ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'radosgw-admin', - '--log-to-stderr', - '--format', 'json', - ] - pre.extend(cmd) - (remote,) = ctx.cluster.only(client).remotes.iterkeys() - proc = remote.run( - args=pre, - check_status=False, - stdout=StringIO(), - stderr=StringIO(), - ) - r = proc.exitstatus - out = proc.stdout.getvalue() - j = None - if not r and out != '': - try: - j = json.loads(out) - log.info(' json result: %s' % j) - except ValueError: - j = out - log.info(' raw result: %s' % j) - return (r, j) - - -def rgwadmin_rest(connection, cmd, params=None, headers=None, raw=False): - """ - perform a rest command - """ - log.info('radosgw-admin-rest: %s %s' % (cmd, params)) - put_cmds = ['create', 'link', 'add'] - post_cmds = ['unlink', 'modify'] - delete_cmds = ['trim', 'rm', 'process'] - get_cmds = ['check', 'info', 'show', 'list'] - - bucket_sub_resources = ['object', 'policy', 'index'] - user_sub_resources = ['subuser', 'key', 'caps'] - zone_sub_resources = ['pool', 'log', 'garbage'] - - def get_cmd_method_and_handler(cmd): - """ - Get the rest command and handler from information in cmd and - from the imported requests object. - """ - if cmd[1] in put_cmds: - return 'PUT', requests.put - elif cmd[1] in delete_cmds: - return 'DELETE', requests.delete - elif cmd[1] in post_cmds: - return 'POST', requests.post - elif cmd[1] in get_cmds: - return 'GET', requests.get - - def get_resource(cmd): - """ - Get the name of the resource from information in cmd. - """ - if cmd[0] == 'bucket' or cmd[0] in bucket_sub_resources: - if cmd[0] == 'bucket': - return 'bucket', '' - else: - return 'bucket', cmd[0] - elif cmd[0] == 'user' or cmd[0] in user_sub_resources: - if cmd[0] == 'user': - return 'user', '' - else: - return 'user', cmd[0] - elif cmd[0] == 'usage': - return 'usage', '' - elif cmd[0] == 'zone' or cmd[0] in zone_sub_resources: - if cmd[0] == 'zone': - return 'zone', '' - else: - return 'zone', cmd[0] - - def build_admin_request(conn, method, resource = '', headers=None, data='', - query_args=None, params=None): - """ - Build an administative request adapted from the build_request() - method of boto.connection - """ - - path = conn.calling_format.build_path_base('admin', resource) - auth_path = conn.calling_format.build_auth_path('admin', resource) - host = conn.calling_format.build_host(conn.server_name(), 'admin') - if query_args: - path += '?' + query_args - boto.log.debug('path=%s' % path) - auth_path += '?' + query_args - boto.log.debug('auth_path=%s' % auth_path) - return AWSAuthConnection.build_base_http_request(conn, method, path, - auth_path, params, headers, data, host) - - method, handler = get_cmd_method_and_handler(cmd) - resource, query_args = get_resource(cmd) - request = build_admin_request(connection, method, resource, - query_args=query_args, headers=headers) - - url = '{protocol}://{host}{path}'.format(protocol=request.protocol, - host=request.host, path=request.path) - - request.authorize(connection=connection) - result = handler(url, params=params, headers=request.headers) - - if raw: - log.info(' text result: %s' % result.txt) - return result.status_code, result.txt - else: - log.info(' json result: %s' % result.json()) - return result.status_code, result.json() - - -def task(ctx, config): - """ - Test radosgw-admin functionality through the RESTful interface - """ - assert config is None or isinstance(config, list) \ - or isinstance(config, dict), \ - "task s3tests only supports a list or dictionary for configuration" - all_clients = ['client.{id}'.format(id=id_) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] - if config is None: - config = all_clients - if isinstance(config, list): - config = dict.fromkeys(config) - clients = config.keys() - - # just use the first client... - client = clients[0] - - ## - admin_user = 'ada' - admin_display_name = 'Ms. Admin User' - admin_access_key = 'MH1WC2XQ1S8UISFDZC8W' - admin_secret_key = 'dQyrTPA0s248YeN5bBv4ukvKU0kh54LWWywkrpoG' - admin_caps = 'users=read, write; usage=read, write; buckets=read, write; zone=read, write' - - user1 = 'foo' - user2 = 'fud' - subuser1 = 'foo:foo1' - subuser2 = 'foo:foo2' - display_name1 = 'Foo' - display_name2 = 'Fud' - email = 'foo@foo.com' - access_key = '9te6NH5mcdcq0Tc5i8i1' - secret_key = 'Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu' - access_key2 = 'p5YnriCv1nAtykxBrupQ' - secret_key2 = 'Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh' - swift_secret1 = 'gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL' - swift_secret2 = 'ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy' - - bucket_name = 'myfoo' - - # legend (test cases can be easily grep-ed out) - # TESTCASE 'testname','object','method','operation','assertion' - # TESTCASE 'create-admin-user','user','create','administrative user','succeeds' - (err, out) = rgwadmin(ctx, client, [ - 'user', 'create', - '--uid', admin_user, - '--display-name', admin_display_name, - '--access-key', admin_access_key, - '--secret', admin_secret_key, - '--max-buckets', '0', - '--caps', admin_caps - ]) - logging.error(out) - logging.error(err) - assert not err - - (remote,) = ctx.cluster.only(client).remotes.iterkeys() - remote_host = remote.name.split('@')[1] - admin_conn = boto.s3.connection.S3Connection( - aws_access_key_id=admin_access_key, - aws_secret_access_key=admin_secret_key, - is_secure=False, - port=7280, - host=remote_host, - calling_format=boto.s3.connection.OrdinaryCallingFormat(), - ) - - # TESTCASE 'info-nosuch','user','info','non-existent user','fails' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {"uid": user1}) - assert ret == 404 - - # TESTCASE 'create-ok','user','create','w/all valid info','succeeds' - (ret, out) = rgwadmin_rest(admin_conn, - ['user', 'create'], - {'uid' : user1, - 'display-name' : display_name1, - 'email' : email, - 'access-key' : access_key, - 'secret-key' : secret_key, - 'max-buckets' : '4' - }) - - assert ret == 200 - - # TESTCASE 'info-existing','user','info','existing user','returns correct info' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) - - assert out['user_id'] == user1 - assert out['email'] == email - assert out['display_name'] == display_name1 - assert len(out['keys']) == 1 - assert out['keys'][0]['access_key'] == access_key - assert out['keys'][0]['secret_key'] == secret_key - assert not out['suspended'] - - # TESTCASE 'suspend-ok','user','suspend','active user','succeeds' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True}) - assert ret == 200 - - # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) - assert ret == 200 - assert out['suspended'] - - # TESTCASE 're-enable','user','enable','suspended user','succeeds' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'}) - assert not err - - # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) - assert ret == 200 - assert not out['suspended'] - - # TESTCASE 'add-keys','key','create','w/valid info','succeeds' - (ret, out) = rgwadmin_rest(admin_conn, - ['key', 'create'], - {'uid' : user1, - 'access-key' : access_key2, - 'secret-key' : secret_key2 - }) - - - assert ret == 200 - - # TESTCASE 'info-new-key','user','info','after key addition','returns all keys' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) - assert ret == 200 - assert len(out['keys']) == 2 - assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2 - assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2 - - # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed' - (ret, out) = rgwadmin_rest(admin_conn, - ['key', 'rm'], - {'uid' : user1, - 'access-key' : access_key2 - }) - - assert ret == 200 - - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) - - assert len(out['keys']) == 1 - assert out['keys'][0]['access_key'] == access_key - assert out['keys'][0]['secret_key'] == secret_key - - # TESTCASE 'add-swift-key','key','create','swift key','succeeds' - (ret, out) = rgwadmin_rest(admin_conn, - ['subuser', 'create'], - {'subuser' : subuser1, - 'secret-key' : swift_secret1, - 'key-type' : 'swift' - }) - - assert ret == 200 - - # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) - assert ret == 200 - assert len(out['swift_keys']) == 1 - assert out['swift_keys'][0]['user'] == subuser1 - assert out['swift_keys'][0]['secret_key'] == swift_secret1 - - # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds' - (ret, out) = rgwadmin_rest(admin_conn, - ['subuser', 'create'], - {'subuser' : subuser2, - 'secret-key' : swift_secret2, - 'key-type' : 'swift' - }) - - assert ret == 200 - - # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) - assert ret == 200 - assert len(out['swift_keys']) == 2 - assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2 - assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2 - - # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed' - (ret, out) = rgwadmin_rest(admin_conn, - ['key', 'rm'], - {'subuser' : subuser1, - 'key-type' :'swift' - }) - - assert ret == 200 - - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) - assert len(out['swift_keys']) == 1 - - # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed' - (ret, out) = rgwadmin_rest(admin_conn, - ['subuser', 'rm'], - {'subuser' : subuser1 - }) - - assert ret == 200 - - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) - assert len(out['subusers']) == 1 - - # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed' - (ret, out) = rgwadmin_rest(admin_conn, - ['subuser', 'rm'], - {'subuser' : subuser2, - 'key-type' : 'swift', - '{purge-keys' :True - }) - - assert ret == 200 - - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) - assert len(out['swift_keys']) == 0 - assert len(out['subusers']) == 0 - - # TESTCASE 'bucket-stats','bucket','info','no session/buckets','succeeds, empty list' - (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1}) - assert ret == 200 - assert len(out) == 0 - - # connect to rgw - connection = boto.s3.connection.S3Connection( - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - is_secure=False, - port=7280, - host=remote_host, - calling_format=boto.s3.connection.OrdinaryCallingFormat(), - ) - - # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list' - (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True}) - assert ret == 200 - assert len(out) == 0 - - # create a first bucket - bucket = connection.create_bucket(bucket_name) - - # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list' - (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1}) - assert ret == 200 - assert len(out) == 1 - assert out[0] == bucket_name - - # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list' - (ret, out) = rgwadmin_rest(admin_conn, - ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True}) - - assert ret == 200 - assert out['owner'] == user1 - bucket_id = out['id'] - - # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID' - (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True}) - assert ret == 200 - assert len(out) == 1 - assert out[0]['id'] == bucket_id # does it return the same ID twice in a row? - - # use some space - key = boto.s3.key.Key(bucket) - key.set_contents_from_string('one') - - # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object' - (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True}) - assert ret == 200 - assert out['id'] == bucket_id - assert out['usage']['rgw.main']['num_objects'] == 1 - assert out['usage']['rgw.main']['size_kb'] > 0 - - # reclaim it - key.delete() - - # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error' - (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'unlink'], {'uid' : user1, 'bucket' : bucket_name}) - - assert ret == 200 - - # create a second user to link the bucket to - (ret, out) = rgwadmin_rest(admin_conn, - ['user', 'create'], - {'uid' : user2, - 'display-name' : display_name2, - 'access-key' : access_key2, - 'secret-key' : secret_key2, - 'max-buckets' : '1', - }) - - assert ret == 200 - - # try creating an object with the first user before the bucket is relinked - denied = False - key = boto.s3.key.Key(bucket) - - try: - key.set_contents_from_string('two') - except boto.exception.S3ResponseError: - denied = True - - assert not denied - - # delete the object - key.delete() - - # link the bucket to another user - (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'link'], {'uid' : user2, 'bucket' : bucket_name}) - - assert ret == 200 - - # try creating an object with the first user which should cause an error - key = boto.s3.key.Key(bucket) - - try: - key.set_contents_from_string('three') - except boto.exception.S3ResponseError: - denied = True - - assert denied - - # relink the bucket to the first user and delete the second user - (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'link'], {'uid' : user1, 'bucket' : bucket_name}) - assert ret == 200 - - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user2}) - assert ret == 200 - - # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed' - - # upload an object - object_name = 'four' - key = boto.s3.key.Key(bucket, object_name) - key.set_contents_from_string(object_name) - - # now delete it - (ret, out) = rgwadmin_rest(admin_conn, ['object', 'rm'], {'bucket' : bucket_name, 'object' : object_name}) - assert ret == 200 - - # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects' - (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True}) - assert ret == 200 - assert out['id'] == bucket_id - assert out['usage']['rgw.main']['num_objects'] == 0 - - # create a bucket for deletion stats - useless_bucket = connection.create_bucket('useless_bucket') - useless_key = useless_bucket.new_key('useless_key') - useless_key.set_contents_from_string('useless string') - - # delete it - useless_key.delete() - useless_bucket.delete() - - # wait for the statistics to flush - time.sleep(60) - - # need to wait for all usage data to get flushed, should take up to 30 seconds - timestamp = time.time() - while time.time() - timestamp <= (20 * 60): # wait up to 20 minutes - (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'categories' : 'delete_obj'}) # last operation we did is delete obj, wait for it to flush - - if get_user_successful_ops(out, user1) > 0: - break - time.sleep(1) - - assert time.time() - timestamp <= (20 * 60) - - # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds' - (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show']) - assert ret == 200 - assert len(out['entries']) > 0 - assert len(out['summary']) > 0 - user_summary = get_user_summary(out, user1) - total = user_summary['total'] - assert total['successful_ops'] > 0 - - # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds' - (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1}) - assert ret == 200 - assert len(out['entries']) > 0 - assert len(out['summary']) > 0 - user_summary = out['summary'][0] - for entry in user_summary['categories']: - assert entry['successful_ops'] > 0 - assert user_summary['user'] == user1 - - # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds' - test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket'] - for cat in test_categories: - (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1, 'categories' : cat}) - assert ret == 200 - assert len(out['summary']) > 0 - user_summary = out['summary'][0] - assert user_summary['user'] == user1 - assert len(user_summary['categories']) == 1 - entry = user_summary['categories'][0] - assert entry['category'] == cat - assert entry['successful_ops'] > 0 - - # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed' - (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'trim'], {'uid' : user1}) - assert ret == 200 - (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1}) - assert ret == 200 - assert len(out['entries']) == 0 - assert len(out['summary']) == 0 - - # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True}) - assert ret == 200 - - # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects' - try: - key = boto.s3.key.Key(bucket) - key.set_contents_from_string('five') - except boto.exception.S3ResponseError as e: - assert e.status == 403 - - # TESTCASE 'user-renable2','user','enable','suspended user','succeeds' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'}) - assert ret == 200 - - # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects' - key = boto.s3.key.Key(bucket) - key.set_contents_from_string('six') - - # TESTCASE 'garbage-list', 'garbage', 'list', 'get list of objects ready for garbage collection' - - # create an object large enough to be split into multiple parts - test_string = 'foo'*10000000 - - big_key = boto.s3.key.Key(bucket) - big_key.set_contents_from_string(test_string) - - # now delete the head - big_key.delete() - - # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1}) - assert ret == 409 - - # delete should fail because ``key`` still exists - try: - bucket.delete() - except boto.exception.S3ResponseError as e: - assert e.status == 409 - - key.delete() - bucket.delete() - - # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy' - bucket = connection.create_bucket(bucket_name) - - # create an object - key = boto.s3.key.Key(bucket) - key.set_contents_from_string('seven') - - # should be private already but guarantee it - key.set_acl('private') - - (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key}) - assert ret == 200 - - acl = key.get_xml_acl() - assert acl == out.strip('\n') - - # add another grantee by making the object public read - key.set_acl('public-read') - - (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key}) - assert ret == 200 - - acl = key.get_xml_acl() - assert acl == out.strip('\n') - - # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds' - bucket = connection.create_bucket(bucket_name) - key_name = ['eight', 'nine', 'ten', 'eleven'] - for i in range(4): - key = boto.s3.key.Key(bucket) - key.set_contents_from_string(key_name[i]) - - (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'rm'], {'bucket' : bucket_name, 'purge-objects' : True}) - assert ret == 200 - - # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds' - caps = 'usage=read' - (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'add'], {'uid' : user1, 'user-caps' : caps}) - assert ret == 200 - assert out[0]['perm'] == 'read' - - # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds' - (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'rm'], {'uid' : user1, 'user-caps' : caps}) - assert ret == 200 - assert not out - - # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets' - bucket = connection.create_bucket(bucket_name) - key = boto.s3.key.Key(bucket) - - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1}) - assert ret == 409 - - # TESTCASE 'rm-user2', 'user', 'rm', user with data', 'succeeds' - bucket = connection.create_bucket(bucket_name) - key = boto.s3.key.Key(bucket) - key.set_contents_from_string('twelve') - - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1, 'purge-data' : True}) - assert ret == 200 - - # TESTCASE 'rm-user3','user','info','deleted user','fails' - (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) - assert ret == 404 - diff --git a/tasks/radosgw_agent.py b/tasks/radosgw_agent.py deleted file mode 100644 index 0254805d2af..00000000000 --- a/tasks/radosgw_agent.py +++ /dev/null @@ -1,211 +0,0 @@ -""" -Run rados gateway agent in test mode -""" -import contextlib -import logging -import argparse - -from teuthology.orchestra import run -from teuthology import misc as teuthology -import util.rgw as rgw_utils - -log = logging.getLogger(__name__) - -def run_radosgw_agent(ctx, config): - """ - Run a single radosgw-agent. See task() for config format. - """ - return_list = list() - for (client, cconf) in config.items(): - # don't process entries that are not clients - if not client.startswith('client.'): - log.debug('key {data} does not start with \'client.\', moving on'.format( - data=client)) - continue - - src_client = cconf['src'] - dest_client = cconf['dest'] - - src_zone = rgw_utils.zone_for_client(ctx, src_client) - dest_zone = rgw_utils.zone_for_client(ctx, dest_client) - - log.info("source is %s", src_zone) - log.info("dest is %s", dest_zone) - - testdir = teuthology.get_testdir(ctx) - (remote,) = ctx.cluster.only(client).remotes.keys() - # figure out which branch to pull from - branch = cconf.get('force-branch', None) - if not branch: - branch = cconf.get('branch', 'master') - sha1 = cconf.get('sha1') - remote.run( - args=[ - 'cd', testdir, run.Raw('&&'), - 'git', 'clone', - '-b', branch, -# 'https://github.com/ceph/radosgw-agent.git', - 'git://git.ceph.com/radosgw-agent.git', - 'radosgw-agent.{client}'.format(client=client), - ] - ) - if sha1 is not None: - remote.run( - args=[ - 'cd', testdir, run.Raw('&&'), - run.Raw('&&'), - 'git', 'reset', '--hard', sha1, - ] - ) - remote.run( - args=[ - 'cd', testdir, run.Raw('&&'), - 'cd', 'radosgw-agent.{client}'.format(client=client), - run.Raw('&&'), - './bootstrap', - ] - ) - - src_host, src_port = rgw_utils.get_zone_host_and_port(ctx, src_client, - src_zone) - dest_host, dest_port = rgw_utils.get_zone_host_and_port(ctx, dest_client, - dest_zone) - src_access, src_secret = rgw_utils.get_zone_system_keys(ctx, src_client, - src_zone) - dest_access, dest_secret = rgw_utils.get_zone_system_keys(ctx, dest_client, - dest_zone) - sync_scope = cconf.get('sync-scope', None) - port = cconf.get('port', 8000) - daemon_name = '{host}.{port}.syncdaemon'.format(host=remote.name, port=port) - in_args=[ - 'daemon-helper', - 'kill', - '{tdir}/radosgw-agent.{client}/radosgw-agent'.format(tdir=testdir, - client=client), - '-v', - '--src-access-key', src_access, - '--src-secret-key', src_secret, - '--source', "http://{addr}:{port}".format(addr=src_host, port=src_port), - '--dest-access-key', dest_access, - '--dest-secret-key', dest_secret, - '--max-entries', str(cconf.get('max-entries', 1000)), - '--log-file', '{tdir}/archive/rgw_sync_agent.{client}.log'.format( - tdir=testdir, - client=client), - '--object-sync-timeout', '30', - ] - - if cconf.get('metadata-only', False): - in_args.append('--metadata-only') - - # the test server and full/incremental flags are mutually exclusive - if sync_scope is None: - in_args.append('--test-server-host') - in_args.append('0.0.0.0') - in_args.append('--test-server-port') - in_args.append(str(port)) - log.debug('Starting a sync test server on {client}'.format(client=client)) - # Stash the radosgw-agent server / port # for use by subsequent tasks - ctx.radosgw_agent.endpoint = (client, str(port)) - else: - in_args.append('--sync-scope') - in_args.append(sync_scope) - log.debug('Starting a {scope} sync on {client}'.format(scope=sync_scope,client=client)) - - # positional arg for destination must come last - in_args.append("http://{addr}:{port}".format(addr=dest_host, - port=dest_port)) - - return_list.append((client, remote.run( - args=in_args, - wait=False, - stdin=run.PIPE, - logger=log.getChild(daemon_name), - ))) - return return_list - - -@contextlib.contextmanager -def task(ctx, config): - """ - Run radosgw-agents in test mode. - - Configuration is clients to run the agents on, with settings for - source client, destination client, and port to listen on. Binds - to 0.0.0.0. Port defaults to 8000. This must be run on clients - that have the correct zone root pools and rgw zone set in - ceph.conf, or the task cannot read the region information from the - cluster. - - By default, this task will start an HTTP server that will trigger full - or incremental syncs based on requests made to it. - Alternatively, a single full sync can be triggered by - specifying 'sync-scope: full' or a loop of incremental syncs can be triggered - by specifying 'sync-scope: incremental' (the loop will sleep - '--incremental-sync-delay' seconds between each sync, default is 30 seconds). - - By default, both data and metadata are synced. To only sync - metadata, for example because you want to sync between regions, - set metadata-only: true. - - An example:: - - tasks: - - ceph: - conf: - client.0: - rgw zone = foo - rgw zone root pool = .root.pool - client.1: - rgw zone = bar - rgw zone root pool = .root.pool2 - - rgw: # region configuration omitted for brevity - - radosgw-agent: - client.0: - branch: wip-next-feature-branch - src: client.0 - dest: client.1 - sync-scope: full - metadata-only: true - # port: 8000 (default) - client.1: - src: client.1 - dest: client.0 - port: 8001 - """ - assert isinstance(config, dict), 'rgw_sync_agent requires a dictionary config' - log.debug("config is %s", config) - - overrides = ctx.config.get('overrides', {}) - # merge each client section, but only if it exists in config since there isn't - # a sensible default action for this task - for client in config.iterkeys(): - if config[client]: - log.debug('config[{client}]: {data}'.format(client=client, data=config[client])) - teuthology.deep_merge(config[client], overrides.get('radosgw-agent', {})) - - ctx.radosgw_agent = argparse.Namespace() - ctx.radosgw_agent.config = config - - procs = run_radosgw_agent(ctx, config) - - ctx.radosgw_agent.procs = procs - - try: - yield - finally: - testdir = teuthology.get_testdir(ctx) - try: - for client, proc in procs: - log.info("shutting down sync agent on %s", client) - proc.stdin.close() - proc.wait() - finally: - for client, proc in procs: - ctx.cluster.only(client).run( - args=[ - 'rm', '-rf', - '{tdir}/radosgw-agent.{client}'.format(tdir=testdir, - client=client) - ] - ) diff --git a/tasks/rbd.py b/tasks/rbd.py deleted file mode 100644 index 4bf529373a1..00000000000 --- a/tasks/rbd.py +++ /dev/null @@ -1,585 +0,0 @@ -""" -Rbd testing task -""" -import contextlib -import logging -import os - -from cStringIO import StringIO -from teuthology.orchestra import run -from teuthology import misc as teuthology -from teuthology import contextutil -from teuthology.parallel import parallel -from teuthology.task.common_fs_utils import generic_mkfs -from teuthology.task.common_fs_utils import generic_mount -from teuthology.task.common_fs_utils import default_image_name - -log = logging.getLogger(__name__) - -@contextlib.contextmanager -def create_image(ctx, config): - """ - Create an rbd image. - - For example:: - - tasks: - - ceph: - - rbd.create_image: - client.0: - image_name: testimage - image_size: 100 - image_format: 1 - client.1: - - Image size is expressed as a number of megabytes; default value - is 10240. - - Image format value must be either 1 or 2; default value is 1. - - """ - assert isinstance(config, dict) or isinstance(config, list), \ - "task create_image only supports a list or dictionary for configuration" - - if isinstance(config, dict): - images = config.items() - else: - images = [(role, None) for role in config] - - testdir = teuthology.get_testdir(ctx) - for role, properties in images: - if properties is None: - properties = {} - name = properties.get('image_name', default_image_name(role)) - size = properties.get('image_size', 10240) - fmt = properties.get('image_format', 1) - (remote,) = ctx.cluster.only(role).remotes.keys() - log.info('Creating image {name} with size {size}'.format(name=name, - size=size)) - args = [ - 'adjust-ulimits', - 'ceph-coverage'.format(tdir=testdir), - '{tdir}/archive/coverage'.format(tdir=testdir), - 'rbd', - '-p', 'rbd', - 'create', - '--size', str(size), - name, - ] - # omit format option if using the default (format 1) - # since old versions of don't support it - if int(fmt) != 1: - args += ['--image-format', str(fmt)] - remote.run(args=args) - try: - yield - finally: - log.info('Deleting rbd images...') - for role, properties in images: - if properties is None: - properties = {} - name = properties.get('image_name', default_image_name(role)) - (remote,) = ctx.cluster.only(role).remotes.keys() - remote.run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'rbd', - '-p', 'rbd', - 'rm', - name, - ], - ) - -@contextlib.contextmanager -def clone_image(ctx, config): - """ - Clones a parent imag - - For example:: - - tasks: - - ceph: - - rbd.clone_image: - client.0: - parent_name: testimage - image_name: cloneimage - """ - assert isinstance(config, dict) or isinstance(config, list), \ - "task clone_image only supports a list or dictionary for configuration" - - if isinstance(config, dict): - images = config.items() - else: - images = [(role, None) for role in config] - - testdir = teuthology.get_testdir(ctx) - for role, properties in images: - if properties is None: - properties = {} - - name = properties.get('image_name', default_image_name(role)) - parent_name = properties.get('parent_name') - assert parent_name is not None, \ - "parent_name is required" - parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name) - - (remote,) = ctx.cluster.only(role).remotes.keys() - log.info('Clone image {parent} to {child}'.format(parent=parent_name, - child=name)) - for cmd in [('snap', 'create', parent_spec), - ('snap', 'protect', parent_spec), - ('clone', parent_spec, name)]: - args = [ - 'adjust-ulimits', - 'ceph-coverage'.format(tdir=testdir), - '{tdir}/archive/coverage'.format(tdir=testdir), - 'rbd', '-p', 'rbd' - ] - args.extend(cmd) - remote.run(args=args) - - try: - yield - finally: - log.info('Deleting rbd clones...') - for role, properties in images: - if properties is None: - properties = {} - name = properties.get('image_name', default_image_name(role)) - parent_name = properties.get('parent_name') - parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name) - - (remote,) = ctx.cluster.only(role).remotes.keys() - - for cmd in [('rm', name), - ('snap', 'unprotect', parent_spec), - ('snap', 'rm', parent_spec)]: - args = [ - 'adjust-ulimits', - 'ceph-coverage'.format(tdir=testdir), - '{tdir}/archive/coverage'.format(tdir=testdir), - 'rbd', '-p', 'rbd' - ] - args.extend(cmd) - remote.run(args=args) - -@contextlib.contextmanager -def modprobe(ctx, config): - """ - Load the rbd kernel module.. - - For example:: - - tasks: - - ceph: - - rbd.create_image: [client.0] - - rbd.modprobe: [client.0] - """ - log.info('Loading rbd kernel module...') - for role in config: - (remote,) = ctx.cluster.only(role).remotes.keys() - remote.run( - args=[ - 'sudo', - 'modprobe', - 'rbd', - ], - ) - try: - yield - finally: - log.info('Unloading rbd kernel module...') - for role in config: - (remote,) = ctx.cluster.only(role).remotes.keys() - remote.run( - args=[ - 'sudo', - 'modprobe', - '-r', - 'rbd', - # force errors to be ignored; necessary if more - # than one device was created, which may mean - # the module isn't quite ready to go the first - # time through. - run.Raw('||'), - 'true', - ], - ) - -@contextlib.contextmanager -def dev_create(ctx, config): - """ - Map block devices to rbd images. - - For example:: - - tasks: - - ceph: - - rbd.create_image: [client.0] - - rbd.modprobe: [client.0] - - rbd.dev_create: - client.0: testimage.client.0 - """ - assert isinstance(config, dict) or isinstance(config, list), \ - "task dev_create only supports a list or dictionary for configuration" - - if isinstance(config, dict): - role_images = config.items() - else: - role_images = [(role, None) for role in config] - - log.info('Creating rbd block devices...') - - testdir = teuthology.get_testdir(ctx) - - for role, image in role_images: - if image is None: - image = default_image_name(role) - (remote,) = ctx.cluster.only(role).remotes.keys() - - remote.run( - args=[ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'rbd', - '--user', role.rsplit('.')[-1], - '-p', 'rbd', - 'map', - image, - run.Raw('&&'), - # wait for the symlink to be created by udev - 'while', 'test', '!', '-e', '/dev/rbd/rbd/{image}'.format(image=image), run.Raw(';'), 'do', - 'sleep', '1', run.Raw(';'), - 'done', - ], - ) - try: - yield - finally: - log.info('Unmapping rbd devices...') - for role, image in role_images: - if image is None: - image = default_image_name(role) - (remote,) = ctx.cluster.only(role).remotes.keys() - remote.run( - args=[ - 'LD_LIBRARY_PATH={tdir}/binary/usr/local/lib'.format(tdir=testdir), - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'rbd', - '-p', 'rbd', - 'unmap', - '/dev/rbd/rbd/{imgname}'.format(imgname=image), - run.Raw('&&'), - # wait for the symlink to be deleted by udev - 'while', 'test', '-e', '/dev/rbd/rbd/{image}'.format(image=image), - run.Raw(';'), - 'do', - 'sleep', '1', run.Raw(';'), - 'done', - ], - ) - - -def rbd_devname_rtn(ctx, image): - return '/dev/rbd/rbd/{image}'.format(image=image) - -def canonical_path(ctx, role, path): - """ - Determine the canonical path for a given path on the host - representing the given role. A canonical path contains no - . or .. components, and includes no symbolic links. - """ - version_fp = StringIO() - ctx.cluster.only(role).run( - args=[ 'readlink', '-f', path ], - stdout=version_fp, - ) - canonical_path = version_fp.getvalue().rstrip('\n') - version_fp.close() - return canonical_path - -@contextlib.contextmanager -def run_xfstests(ctx, config): - """ - Run xfstests over specified devices. - - Warning: both the test and scratch devices specified will be - overwritten. Normally xfstests modifies (but does not destroy) - the test device, but for now the run script used here re-makes - both filesystems. - - Note: Only one instance of xfstests can run on a single host at - a time, although this is not enforced. - - This task in its current form needs some improvement. For - example, it assumes all roles provided in the config are - clients, and that the config provided is a list of key/value - pairs. For now please use the xfstests() interface, below. - - For example:: - - tasks: - - ceph: - - rbd.run_xfstests: - client.0: - count: 2 - test_dev: 'test_dev' - scratch_dev: 'scratch_dev' - fs_type: 'xfs' - tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015' - randomize: true - """ - with parallel() as p: - for role, properties in config.items(): - p.spawn(run_xfstests_one_client, ctx, role, properties) - yield - -def run_xfstests_one_client(ctx, role, properties): - """ - Spawned routine to handle xfs tests for a single client - """ - testdir = teuthology.get_testdir(ctx) - try: - count = properties.get('count') - test_dev = properties.get('test_dev') - assert test_dev is not None, \ - "task run_xfstests requires test_dev to be defined" - test_dev = canonical_path(ctx, role, test_dev) - - scratch_dev = properties.get('scratch_dev') - assert scratch_dev is not None, \ - "task run_xfstests requires scratch_dev to be defined" - scratch_dev = canonical_path(ctx, role, scratch_dev) - - fs_type = properties.get('fs_type') - tests = properties.get('tests') - randomize = properties.get('randomize') - - (remote,) = ctx.cluster.only(role).remotes.keys() - - # Fetch the test script - test_root = teuthology.get_testdir(ctx) - test_script = 'run_xfstests_krbd.sh' - test_path = os.path.join(test_root, test_script) - - git_branch = 'master' - test_url = 'https://raw.github.com/ceph/ceph/{branch}/qa/{script}'.format(branch=git_branch, script=test_script) - - log.info('Fetching {script} for {role} from {url}'.format(script=test_script, - role=role, - url=test_url)) - args = [ 'wget', '-O', test_path, '--', test_url ] - remote.run(args=args) - - log.info('Running xfstests on {role}:'.format(role=role)) - log.info(' iteration count: {count}:'.format(count=count)) - log.info(' test device: {dev}'.format(dev=test_dev)) - log.info(' scratch device: {dev}'.format(dev=scratch_dev)) - log.info(' using fs_type: {fs_type}'.format(fs_type=fs_type)) - log.info(' tests to run: {tests}'.format(tests=tests)) - log.info(' randomize: {randomize}'.format(randomize=randomize)) - - # Note that the device paths are interpreted using - # readlink -f in order to get their canonical - # pathname (so it matches what the kernel remembers). - args = [ - '/usr/bin/sudo', - 'TESTDIR={tdir}'.format(tdir=testdir), - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - '/bin/bash', - test_path, - '-c', str(count), - '-f', fs_type, - '-t', test_dev, - '-s', scratch_dev, - ] - if randomize: - args.append('-r') - if tests: - args.extend(['--', tests]) - remote.run(args=args, logger=log.getChild(role)) - finally: - log.info('Removing {script} on {role}'.format(script=test_script, - role=role)) - remote.run(args=['rm', '-f', test_path]) - -@contextlib.contextmanager -def xfstests(ctx, config): - """ - Run xfstests over rbd devices. This interface sets up all - required configuration automatically if not otherwise specified. - Note that only one instance of xfstests can run on a single host - at a time. By default, the set of tests specified is run once. - If a (non-zero) count value is supplied, the complete set of - tests will be run that number of times. - - For example:: - - tasks: - - ceph: - # Image sizes are in MB - - rbd.xfstests: - client.0: - count: 3 - test_image: 'test_image' - test_size: 250 - test_format: 2 - scratch_image: 'scratch_image' - scratch_size: 250 - scratch_format: 1 - fs_type: 'xfs' - tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015' - randomize: true - """ - if config is None: - config = { 'all': None } - assert isinstance(config, dict) or isinstance(config, list), \ - "task xfstests only supports a list or dictionary for configuration" - if isinstance(config, dict): - config = teuthology.replace_all_with_clients(ctx.cluster, config) - runs = config.items() - else: - runs = [(role, None) for role in config] - - running_xfstests = {} - for role, properties in runs: - assert role.startswith('client.'), \ - "task xfstests can only run on client nodes" - for host, roles_for_host in ctx.cluster.remotes.items(): - if role in roles_for_host: - assert host not in running_xfstests, \ - "task xfstests allows only one instance at a time per host" - running_xfstests[host] = True - - images_config = {} - scratch_config = {} - modprobe_config = {} - image_map_config = {} - scratch_map_config = {} - xfstests_config = {} - for role, properties in runs: - if properties is None: - properties = {} - - test_image = properties.get('test_image', 'test_image.{role}'.format(role=role)) - test_size = properties.get('test_size', 2000) # 2G - test_fmt = properties.get('test_format', 1) - scratch_image = properties.get('scratch_image', 'scratch_image.{role}'.format(role=role)) - scratch_size = properties.get('scratch_size', 10000) # 10G - scratch_fmt = properties.get('scratch_format', 1) - - images_config[role] = dict( - image_name=test_image, - image_size=test_size, - image_format=test_fmt, - ) - - scratch_config[role] = dict( - image_name=scratch_image, - image_size=scratch_size, - image_format=scratch_fmt, - ) - - xfstests_config[role] = dict( - count=properties.get('count', 1), - test_dev='/dev/rbd/rbd/{image}'.format(image=test_image), - scratch_dev='/dev/rbd/rbd/{image}'.format(image=scratch_image), - fs_type=properties.get('fs_type', 'xfs'), - randomize=properties.get('randomize', False), - tests=properties.get('tests'), - ) - - log.info('Setting up xfstests using RBD images:') - log.info(' test ({size} MB): {image}'.format(size=test_size, - image=test_image)) - log.info(' scratch ({size} MB): {image}'.format(size=scratch_size, - image=scratch_image)) - modprobe_config[role] = None - image_map_config[role] = test_image - scratch_map_config[role] = scratch_image - - with contextutil.nested( - lambda: create_image(ctx=ctx, config=images_config), - lambda: create_image(ctx=ctx, config=scratch_config), - lambda: modprobe(ctx=ctx, config=modprobe_config), - lambda: dev_create(ctx=ctx, config=image_map_config), - lambda: dev_create(ctx=ctx, config=scratch_map_config), - lambda: run_xfstests(ctx=ctx, config=xfstests_config), - ): - yield - - -@contextlib.contextmanager -def task(ctx, config): - """ - Create and mount an rbd image. - - For example, you can specify which clients to run on:: - - tasks: - - ceph: - - rbd: [client.0, client.1] - - There are a few image options:: - - tasks: - - ceph: - - rbd: - client.0: # uses defaults - client.1: - image_name: foo - image_size: 2048 - image_format: 2 - fs_type: xfs - - To use default options on all clients:: - - tasks: - - ceph: - - rbd: - all: - - To create 20GiB images and format them with xfs on all clients:: - - tasks: - - ceph: - - rbd: - all: - image_size: 20480 - fs_type: xfs - """ - if config is None: - config = { 'all': None } - norm_config = config - if isinstance(config, dict): - norm_config = teuthology.replace_all_with_clients(ctx.cluster, config) - if isinstance(norm_config, dict): - role_images = {} - for role, properties in norm_config.iteritems(): - if properties is None: - properties = {} - role_images[role] = properties.get('image_name') - else: - role_images = norm_config - - log.debug('rbd config is: %s', norm_config) - - with contextutil.nested( - lambda: create_image(ctx=ctx, config=norm_config), - lambda: modprobe(ctx=ctx, config=norm_config), - lambda: dev_create(ctx=ctx, config=role_images), - lambda: generic_mkfs(ctx=ctx, config=norm_config, - devname_rtn=rbd_devname_rtn), - lambda: generic_mount(ctx=ctx, config=role_images, - devname_rtn=rbd_devname_rtn), - ): - yield diff --git a/tasks/rbd_fsx.py b/tasks/rbd_fsx.py deleted file mode 100644 index d848a88c566..00000000000 --- a/tasks/rbd_fsx.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -Run fsx on an rbd image -""" -import contextlib -import logging - -from teuthology.parallel import parallel -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -@contextlib.contextmanager -def task(ctx, config): - """ - Run fsx on an rbd image. - - Currently this requires running as client.admin - to create a pool. - - Specify which clients to run on as a list:: - - tasks: - ceph: - rbd_fsx: - clients: [client.0, client.1] - - You can optionally change some properties of fsx: - - tasks: - ceph: - rbd_fsx: - clients: - seed: - ops: - size: - """ - log.info('starting rbd_fsx...') - with parallel() as p: - for role in config['clients']: - p.spawn(_run_one_client, ctx, config, role) - yield - -def _run_one_client(ctx, config, role): - """Spawned task that runs the client""" - krbd = config.get('krbd', False) - testdir = teuthology.get_testdir(ctx) - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - - args = [] - if krbd: - args.append('sudo') # rbd map/unmap need privileges - args.extend([ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'ceph_test_librbd_fsx', - '-d', # debug output for all operations - '-W', '-R', # mmap doesn't work with rbd - '-p', str(config.get('progress_interval', 100)), # show progress - '-P', '{tdir}/archive'.format(tdir=testdir), - '-r', str(config.get('readbdy',1)), - '-w', str(config.get('writebdy',1)), - '-t', str(config.get('truncbdy',1)), - '-h', str(config.get('holebdy',1)), - '-l', str(config.get('size', 250000000)), - '-S', str(config.get('seed', 0)), - '-N', str(config.get('ops', 1000)), - ]) - if krbd: - args.append('-K') # -K enables krbd mode - if config.get('direct_io', False): - args.append('-Z') # -Z use direct IO - if not config.get('randomized_striping', True): - args.append('-U') # -U disables randomized striping - if not config.get('punch_holes', True): - args.append('-H') # -H disables discard ops - args.extend([ - 'pool_{pool}'.format(pool=role), - 'image_{image}'.format(image=role), - ]) - - remote.run(args=args) diff --git a/tasks/recovery_bench.py b/tasks/recovery_bench.py deleted file mode 100644 index 1984b97d31e..00000000000 --- a/tasks/recovery_bench.py +++ /dev/null @@ -1,208 +0,0 @@ -""" -Recovery system benchmarking -""" -from cStringIO import StringIO - -import contextlib -import gevent -import json -import logging -import random -import time - -import ceph_manager -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -@contextlib.contextmanager -def task(ctx, config): - """ - Benchmark the recovery system. - - Generates objects with smalliobench, runs it normally to get a - baseline performance measurement, then marks an OSD out and reruns - to measure performance during recovery. - - The config should be as follows: - - recovery_bench: - duration: - num_objects: - io_size: - - example: - - tasks: - - ceph: - - recovery_bench: - duration: 60 - num_objects: 500 - io_size: 4096 - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'recovery_bench task only accepts a dict for configuration' - - log.info('Beginning recovery bench...') - - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') - while len(manager.get_osd_status()['up']) < num_osds: - manager.sleep(10) - - bench_proc = RecoveryBencher( - manager, - config, - ) - try: - yield - finally: - log.info('joining recovery bencher') - bench_proc.do_join() - -class RecoveryBencher: - """ - RecoveryBencher - """ - def __init__(self, manager, config): - self.ceph_manager = manager - self.ceph_manager.wait_for_clean() - - osd_status = self.ceph_manager.get_osd_status() - self.osds = osd_status['up'] - - self.config = config - if self.config is None: - self.config = dict() - - else: - def tmp(x): - """ - Local wrapper to print value. - """ - print x - self.log = tmp - - log.info("spawning thread") - - self.thread = gevent.spawn(self.do_bench) - - def do_join(self): - """ - Join the recovery bencher. This is called after the main - task exits. - """ - self.thread.get() - - def do_bench(self): - """ - Do the benchmarking. - """ - duration = self.config.get("duration", 60) - num_objects = self.config.get("num_objects", 500) - io_size = self.config.get("io_size", 4096) - - osd = str(random.choice(self.osds)) - (osd_remote,) = self.ceph_manager.ctx.cluster.only('osd.%s' % osd).remotes.iterkeys() - - testdir = teuthology.get_testdir(self.ceph_manager.ctx) - - # create the objects - osd_remote.run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'smalliobench'.format(tdir=testdir), - '--use-prefix', 'recovery_bench', - '--init-only', '1', - '--num-objects', str(num_objects), - '--io-size', str(io_size), - ], - wait=True, - ) - - # baseline bench - log.info('non-recovery (baseline)') - p = osd_remote.run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'smalliobench', - '--use-prefix', 'recovery_bench', - '--do-not-init', '1', - '--duration', str(duration), - '--io-size', str(io_size), - ], - stdout=StringIO(), - stderr=StringIO(), - wait=True, - ) - self.process_samples(p.stderr.getvalue()) - - self.ceph_manager.raw_cluster_cmd('osd', 'out', osd) - time.sleep(5) - - # recovery bench - log.info('recovery active') - p = osd_remote.run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'smalliobench', - '--use-prefix', 'recovery_bench', - '--do-not-init', '1', - '--duration', str(duration), - '--io-size', str(io_size), - ], - stdout=StringIO(), - stderr=StringIO(), - wait=True, - ) - self.process_samples(p.stderr.getvalue()) - - self.ceph_manager.raw_cluster_cmd('osd', 'in', osd) - - def process_samples(self, input): - """ - Extract samples from the input and process the results - - :param input: input lines in JSON format - """ - lat = {} - for line in input.split('\n'): - try: - sample = json.loads(line) - samples = lat.setdefault(sample['type'], []) - samples.append(float(sample['latency'])) - except Exception: - pass - - for type in lat: - samples = lat[type] - samples.sort() - - num = len(samples) - - # median - if num & 1 == 1: # odd number of samples - median = samples[num / 2] - else: - median = (samples[num / 2] + samples[num / 2 - 1]) / 2 - - # 99% - ninety_nine = samples[int(num * 0.99)] - - log.info("%s: median %f, 99%% %f" % (type, median, ninety_nine)) diff --git a/tasks/reg11184.py b/tasks/reg11184.py deleted file mode 100644 index 6ba39bde898..00000000000 --- a/tasks/reg11184.py +++ /dev/null @@ -1,240 +0,0 @@ -""" -Special regression test for tracker #11184 - -Synopsis: osd/SnapMapper.cc: 282: FAILED assert(check(oid)) - -This is accomplished by moving a pg that wasn't part of split and still include -divergent priors. -""" -import logging -import time -from cStringIO import StringIO - -from teuthology import misc as teuthology -from util.rados import rados -import os - - -log = logging.getLogger(__name__) - - -def task(ctx, config): - """ - Test handling of divergent entries during export / import - to regression test tracker #11184 - - overrides: - ceph: - conf: - osd: - debug osd: 5 - - Requires 3 osds on a single test node. - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'divergent_priors task only accepts a dict for configuration' - - while len(ctx.manager.get_osd_status()['up']) < 3: - time.sleep(10) - ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - ctx.manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - ctx.manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - ctx.manager.raw_cluster_cmd('osd', 'set', 'noout') - ctx.manager.raw_cluster_cmd('osd', 'set', 'noin') - ctx.manager.raw_cluster_cmd('osd', 'set', 'nodown') - ctx.manager.wait_for_clean() - - # something that is always there - dummyfile = '/etc/fstab' - dummyfile2 = '/etc/resolv.conf' - testdir = teuthology.get_testdir(ctx) - - # create 1 pg pool - log.info('creating foo') - ctx.manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') - - osds = [0, 1, 2] - for i in osds: - ctx.manager.set_config(i, osd_min_pg_log_entries=10) - ctx.manager.set_config(i, osd_max_pg_log_entries=10) - ctx.manager.set_config(i, osd_pg_log_trim_min=5) - - # determine primary - divergent = ctx.manager.get_pg_primary('foo', 0) - log.info("primary and soon to be divergent is %d", divergent) - non_divergent = list(osds) - non_divergent.remove(divergent) - - log.info('writing initial objects') - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - # write 100 objects - for i in range(100): - rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) - - ctx.manager.wait_for_clean() - - # blackhole non_divergent - log.info("blackholing osds %s", str(non_divergent)) - for i in non_divergent: - ctx.manager.set_config(i, filestore_blackhole=1) - - DIVERGENT_WRITE = 5 - DIVERGENT_REMOVE = 5 - # Write some soon to be divergent - log.info('writing divergent objects') - for i in range(DIVERGENT_WRITE): - rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, - dummyfile2], wait=False) - # Remove some soon to be divergent - log.info('remove divergent objects') - for i in range(DIVERGENT_REMOVE): - rados(ctx, mon, ['-p', 'foo', 'rm', - 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) - time.sleep(10) - mon.run( - args=['killall', '-9', 'rados'], - wait=True, - check_status=False) - - # kill all the osds but leave divergent in - log.info('killing all the osds') - for i in osds: - ctx.manager.kill_osd(i) - for i in osds: - ctx.manager.mark_down_osd(i) - for i in non_divergent: - ctx.manager.mark_out_osd(i) - - # bring up non-divergent - log.info("bringing up non_divergent %s", str(non_divergent)) - for i in non_divergent: - ctx.manager.revive_osd(i) - for i in non_divergent: - ctx.manager.mark_in_osd(i) - - # write 1 non-divergent object (ensure that old divergent one is divergent) - objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) - log.info('writing non-divergent object ' + objname) - rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) - - ctx.manager.wait_for_recovery() - - # ensure no recovery of up osds first - log.info('delay recovery') - for i in non_divergent: - ctx.manager.wait_run_admin_socket( - 'osd', i, ['set_recovery_delay', '100000']) - - # bring in our divergent friend - log.info("revive divergent %d", divergent) - ctx.manager.raw_cluster_cmd('osd', 'set', 'noup') - ctx.manager.revive_osd(divergent) - - log.info('delay recovery divergent') - ctx.manager.wait_run_admin_socket( - 'osd', divergent, ['set_recovery_delay', '100000']) - - ctx.manager.raw_cluster_cmd('osd', 'unset', 'noup') - while len(ctx.manager.get_osd_status()['up']) < 3: - time.sleep(10) - - log.info('wait for peering') - rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) - - # At this point the divergent_priors should have been detected - - log.info("killing divergent %d", divergent) - ctx.manager.kill_osd(divergent) - - # Split pgs for pool foo - ctx.manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'pg_num', '2') - time.sleep(5) - - # Export a pg - (exp_remote,) = ctx.\ - cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() - FSPATH = ctx.manager.get_filepath() - JPATH = os.path.join(FSPATH, "journal") - prefix = ("sudo adjust-ulimits ceph-objectstore-tool " - "--data-path {fpath} --journal-path {jpath} " - "--log-file=" - "/var/log/ceph/objectstore_tool.$$.log ". - format(fpath=FSPATH, jpath=JPATH)) - pid = os.getpid() - expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) - cmd = ((prefix + "--op export --pgid 1.0 --file {file}"). - format(id=divergent, file=expfile)) - proc = exp_remote.run(args=cmd, wait=True, - check_status=False, stdout=StringIO()) - assert proc.exitstatus == 0 - - # Remove the same pg that was exported - cmd = ((prefix + "--op remove --pgid 1.0"). - format(id=divergent, file=expfile)) - proc = exp_remote.run(args=cmd, wait=True, - check_status=False, stdout=StringIO()) - assert proc.exitstatus == 0 - - # Kill one of non-divergent OSDs - log.info('killing osd.%d' % non_divergent[1]) - ctx.manager.kill_osd(non_divergent[1]) - ctx.manager.mark_down_osd(non_divergent[1]) - # ctx.manager.mark_out_osd(non_divergent[1]) - - cmd = ((prefix + "--op import --file {file}"). - format(id=non_divergent[1], file=expfile)) - proc = exp_remote.run(args=cmd, wait=True, - check_status=False, stdout=StringIO()) - assert proc.exitstatus == 0 - - # bring in our divergent friend and other node - log.info("revive divergent %d", divergent) - ctx.manager.revive_osd(divergent) - ctx.manager.mark_in_osd(divergent) - log.info("revive %d", non_divergent[1]) - ctx.manager.revive_osd(non_divergent[1]) - - while len(ctx.manager.get_osd_status()['up']) < 3: - time.sleep(10) - - log.info('delay recovery divergent') - ctx.manager.set_config(divergent, osd_recovery_delay_start=100000) - log.info('mark divergent in') - ctx.manager.mark_in_osd(divergent) - - log.info('wait for peering') - rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) - - log.info("killing divergent %d", divergent) - ctx.manager.kill_osd(divergent) - log.info("reviving divergent %d", divergent) - ctx.manager.revive_osd(divergent) - time.sleep(3) - - log.info('allowing recovery') - # Set osd_recovery_delay_start back to 0 and kick the queue - for i in osds: - ctx.manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', - 'kick_recovery_wq', ' 0') - - log.info('reading divergent objects') - for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): - exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, - '/tmp/existing']) - assert exit_status is 0 - - (remote,) = ctx.\ - cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() - msg = "dirty_divergent_priors: true, divergent_priors: %d" \ - % (DIVERGENT_WRITE + DIVERGENT_REMOVE) - cmd = 'grep "{msg}" /var/log/ceph/ceph-osd.{osd}.log'\ - .format(msg=msg, osd=divergent) - proc = remote.run(args=cmd, wait=True, check_status=False) - assert proc.exitstatus == 0 - - cmd = 'rm {file}'.format(file=expfile) - remote.run(args=cmd, wait=True) - log.info("success") diff --git a/tasks/rep_lost_unfound_delete.py b/tasks/rep_lost_unfound_delete.py deleted file mode 100644 index b36d260b122..00000000000 --- a/tasks/rep_lost_unfound_delete.py +++ /dev/null @@ -1,156 +0,0 @@ -""" -Lost_unfound -""" -import logging -import ceph_manager -from teuthology import misc as teuthology -from util.rados import rados - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Test handling of lost objects. - - A pretty rigid cluseter is brought up andtested by this task - """ - POOL = 'unfounddel_pool' - if config is None: - config = {} - assert isinstance(config, dict), \ - 'lost_unfound task only accepts a dict for configuration' - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - while len(manager.get_osd_status()['up']) < 3: - manager.sleep(10) - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_for_clean() - - manager.create_pool(POOL) - - # something that is always there - dummyfile = '/etc/fstab' - - # take an osd out until the very end - manager.kill_osd(2) - manager.mark_down_osd(2) - manager.mark_out_osd(2) - - # kludge to make sure they get a map - rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) - - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.wait_for_recovery() - - # create old objects - for f in range(1, 10): - rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) - rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) - rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) - - # delay recovery, and make the pg log very long (to prevent backfill) - manager.raw_cluster_cmd( - 'tell', 'osd.1', - 'injectargs', - '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' - ) - - manager.kill_osd(0) - manager.mark_down_osd(0) - - for f in range(1, 10): - rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) - rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) - rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) - - # bring osd.0 back up, let it peer, but don't replicate the new - # objects... - log.info('osd.0 command_args is %s' % 'foo') - log.info(ctx.daemons.get_daemon('osd', 0).command_args) - ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([ - '--osd-recovery-delay-start', '1000' - ]) - manager.revive_osd(0) - manager.mark_in_osd(0) - manager.wait_till_osd_is_up(0) - - manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.wait_till_active() - - # take out osd.1 and the only copy of those objects. - manager.kill_osd(1) - manager.mark_down_osd(1) - manager.mark_out_osd(1) - manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') - - # bring up osd.2 so that things would otherwise, in theory, recovery fully - manager.revive_osd(2) - manager.mark_in_osd(2) - manager.wait_till_osd_is_up(2) - - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_till_active() - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - - # verify that there are unfound objects - unfound = manager.get_num_unfound_objects() - log.info("there are %d unfound objects" % unfound) - assert unfound - - # mark stuff lost - pgs = manager.get_pg_stats() - for pg in pgs: - if pg['stat_sum']['num_objects_unfound'] > 0: - primary = 'osd.%d' % pg['acting'][0] - - # verify that i can list them direct from the osd - log.info('listing missing/lost in %s state %s', pg['pgid'], - pg['state']); - m = manager.list_pg_missing(pg['pgid']) - #log.info('%s' % m) - assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] - num_unfound=0 - for o in m['objects']: - if len(o['locations']) == 0: - num_unfound += 1 - assert m['num_unfound'] == num_unfound - - log.info("reverting unfound in %s on %s", pg['pgid'], primary) - manager.raw_cluster_cmd('pg', pg['pgid'], - 'mark_unfound_lost', 'delete') - else: - log.info("no unfound in %s", pg['pgid']) - - manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') - manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') - manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') - manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') - manager.wait_for_recovery() - - # verify result - for f in range(1, 10): - err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) - assert err - err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) - assert err - err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) - assert err - - # see if osd.1 can cope - manager.revive_osd(1) - manager.mark_in_osd(1) - manager.wait_till_osd_is_up(1) - manager.wait_for_clean() diff --git a/tasks/repair_test.py b/tasks/repair_test.py deleted file mode 100644 index f71d99e2fb8..00000000000 --- a/tasks/repair_test.py +++ /dev/null @@ -1,312 +0,0 @@ -""" -Test pool repairing after objects are damaged. -""" -import logging -import time - -import ceph_manager -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - - -def choose_primary(ctx, pool, num): - """ - Return primary to test on. - """ - log.info("Choosing primary") - return ctx.manager.get_pg_primary(pool, num) - - -def choose_replica(ctx, pool, num): - """ - Return replica to test on. - """ - log.info("Choosing replica") - return ctx.manager.get_pg_replica(pool, num) - - -def trunc(ctx, osd, pool, obj): - """ - truncate an object - """ - log.info("truncating object") - return ctx.manager.osd_admin_socket( - osd, - ['truncobj', pool, obj, '1']) - - -def dataerr(ctx, osd, pool, obj): - """ - cause an error in the data - """ - log.info("injecting data err on object") - return ctx.manager.osd_admin_socket( - osd, - ['injectdataerr', pool, obj]) - - -def mdataerr(ctx, osd, pool, obj): - """ - cause an error in the mdata - """ - log.info("injecting mdata err on object") - return ctx.manager.osd_admin_socket( - osd, - ['injectmdataerr', pool, obj]) - - -def omaperr(ctx, osd, pool, obj): - """ - Cause an omap error. - """ - log.info("injecting omap err on object") - return ctx.manager.osd_admin_socket(osd, ['setomapval', pool, obj, - 'badkey', 'badval']) - - -def repair_test_1(ctx, corrupter, chooser, scrub_type): - """ - Creates an object in the pool, corrupts it, - scrubs it, and verifies that the pool is inconsistent. It then repairs - the pool, rescrubs it, and verifies that the pool is consistent - - :param corrupter: error generating function (truncate, data-error, or - meta-data error, for example). - :param chooser: osd type chooser (primary or replica) - :param scrub_type: regular scrub or deep-scrub - """ - pool = "repair_pool_1" - ctx.manager.wait_for_clean() - with ctx.manager.pool(pool, 1): - - log.info("starting repair test type 1") - victim_osd = chooser(ctx, pool, 0) - - # create object - log.info("doing put") - ctx.manager.do_put(pool, 'repair_test_obj', '/etc/hosts') - - # corrupt object - log.info("corrupting object") - corrupter(ctx, victim_osd, pool, 'repair_test_obj') - - # verify inconsistent - log.info("scrubbing") - ctx.manager.do_pg_scrub(pool, 0, scrub_type) - - assert ctx.manager.pg_inconsistent(pool, 0) - - # repair - log.info("repairing") - ctx.manager.do_pg_scrub(pool, 0, "repair") - - log.info("re-scrubbing") - ctx.manager.do_pg_scrub(pool, 0, scrub_type) - - # verify consistent - assert not ctx.manager.pg_inconsistent(pool, 0) - log.info("done") - - -def repair_test_2(ctx, config, chooser): - """ - First creates a set of objects and - sets the omap value. It then corrupts an object, does both a scrub - and a deep-scrub, and then corrupts more objects. After that, it - repairs the pool and makes sure that the pool is consistent some - time after a deep-scrub. - - :param chooser: primary or replica selection routine. - """ - pool = "repair_pool_2" - ctx.manager.wait_for_clean() - with ctx.manager.pool(pool, 1): - log.info("starting repair test type 2") - victim_osd = chooser(ctx, pool, 0) - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - # create object - log.info("doing put and setomapval") - ctx.manager.do_put(pool, 'file1', '/etc/hosts') - ctx.manager.do_rados(mon, ['-p', pool, 'setomapval', 'file1', - 'key', 'val']) - ctx.manager.do_put(pool, 'file2', '/etc/hosts') - ctx.manager.do_put(pool, 'file3', '/etc/hosts') - ctx.manager.do_put(pool, 'file4', '/etc/hosts') - ctx.manager.do_put(pool, 'file5', '/etc/hosts') - ctx.manager.do_rados(mon, ['-p', pool, 'setomapval', 'file5', - 'key', 'val']) - ctx.manager.do_put(pool, 'file6', '/etc/hosts') - - # corrupt object - log.info("corrupting object") - omaperr(ctx, victim_osd, pool, 'file1') - - # verify inconsistent - log.info("scrubbing") - ctx.manager.do_pg_scrub(pool, 0, 'deep-scrub') - - assert ctx.manager.pg_inconsistent(pool, 0) - - # Regression test for bug #4778, should still - # be inconsistent after scrub - ctx.manager.do_pg_scrub(pool, 0, 'scrub') - - assert ctx.manager.pg_inconsistent(pool, 0) - - # Additional corruptions including 2 types for file1 - log.info("corrupting more objects") - dataerr(ctx, victim_osd, pool, 'file1') - mdataerr(ctx, victim_osd, pool, 'file2') - trunc(ctx, victim_osd, pool, 'file3') - omaperr(ctx, victim_osd, pool, 'file6') - - # see still inconsistent - log.info("scrubbing") - ctx.manager.do_pg_scrub(pool, 0, 'deep-scrub') - - assert ctx.manager.pg_inconsistent(pool, 0) - - # repair - log.info("repairing") - ctx.manager.do_pg_scrub(pool, 0, "repair") - - # Let repair clear inconsistent flag - time.sleep(10) - - # verify consistent - assert not ctx.manager.pg_inconsistent(pool, 0) - - # In the future repair might determine state of - # inconsistency itself, verify with a deep-scrub - log.info("scrubbing") - ctx.manager.do_pg_scrub(pool, 0, 'deep-scrub') - - # verify consistent - assert not ctx.manager.pg_inconsistent(pool, 0) - - log.info("done") - - -def hinfoerr(ctx, victim, pool, obj): - """ - cause an error in the hinfo_key - """ - log.info("remove the hinfo_key") - ctx.manager.objectstore_tool(pool, - options='', - args='rm-attr hinfo_key', - object_name=obj, - osd=victim) - - -def repair_test_erasure_code(ctx, corrupter, victim, scrub_type): - """ - Creates an object in the pool, corrupts it, - scrubs it, and verifies that the pool is inconsistent. It then repairs - the pool, rescrubs it, and verifies that the pool is consistent - - :param corrupter: error generating function. - :param chooser: osd type chooser (primary or replica) - :param scrub_type: regular scrub or deep-scrub - """ - pool = "repair_pool_3" - ctx.manager.wait_for_clean() - with ctx.manager.pool(pool_name=pool, pg_num=1, - erasure_code_profile_name='default'): - - log.info("starting repair test for erasure code") - - # create object - log.info("doing put") - ctx.manager.do_put(pool, 'repair_test_obj', '/etc/hosts') - - # corrupt object - log.info("corrupting object") - corrupter(ctx, victim, pool, 'repair_test_obj') - - # verify inconsistent - log.info("scrubbing") - ctx.manager.do_pg_scrub(pool, 0, scrub_type) - - assert ctx.manager.pg_inconsistent(pool, 0) - - # repair - log.info("repairing") - ctx.manager.do_pg_scrub(pool, 0, "repair") - - log.info("re-scrubbing") - ctx.manager.do_pg_scrub(pool, 0, scrub_type) - - # verify consistent - assert not ctx.manager.pg_inconsistent(pool, 0) - log.info("done") - - -def task(ctx, config): - """ - Test [deep] repair in several situations: - Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica] - - The config should be as follows: - - Must include the log-whitelist below - Must enable filestore_debug_inject_read_err config - - example: - - tasks: - - chef: - - install: - - ceph: - log-whitelist: - - 'candidate had a read error' - - 'deep-scrub 0 missing, 1 inconsistent objects' - - 'deep-scrub 0 missing, 4 inconsistent objects' - - 'deep-scrub 1 errors' - - 'deep-scrub 4 errors' - - '!= known omap_digest' - - 'repair 0 missing, 1 inconsistent objects' - - 'repair 0 missing, 4 inconsistent objects' - - 'repair 1 errors, 1 fixed' - - 'repair 4 errors, 4 fixed' - - 'scrub 0 missing, 1 inconsistent' - - 'scrub 1 errors' - - 'size 1 != known size' - conf: - osd: - filestore debug inject read err: true - - repair_test: - - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'repair_test task only accepts a dict for config' - - if not hasattr(ctx, 'manager'): - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - ctx.manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager') - ) - - ctx.manager.wait_for_all_up() - - ctx.manager.raw_cluster_cmd('osd', 'set', 'noscrub') - ctx.manager.raw_cluster_cmd('osd', 'set', 'nodeep-scrub') - - repair_test_1(ctx, mdataerr, choose_primary, "scrub") - repair_test_1(ctx, mdataerr, choose_replica, "scrub") - repair_test_1(ctx, dataerr, choose_primary, "deep-scrub") - repair_test_1(ctx, dataerr, choose_replica, "deep-scrub") - repair_test_1(ctx, trunc, choose_primary, "scrub") - repair_test_1(ctx, trunc, choose_replica, "scrub") - repair_test_2(ctx, config, choose_primary) - repair_test_2(ctx, config, choose_replica) - - repair_test_erasure_code(ctx, hinfoerr, 'primary', "deep-scrub") diff --git a/tasks/rest_api.py b/tasks/rest_api.py deleted file mode 100644 index 0956d00be51..00000000000 --- a/tasks/rest_api.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -Rest Api -""" -import logging -import contextlib -import time - -from teuthology import misc as teuthology -from teuthology import contextutil -from teuthology.orchestra import run -from teuthology.orchestra.daemon import DaemonGroup - -log = logging.getLogger(__name__) - - -@contextlib.contextmanager -def run_rest_api_daemon(ctx, api_clients): - """ - Wrapper starts the rest api daemons - """ - if not hasattr(ctx, 'daemons'): - ctx.daemons = DaemonGroup() - remotes = ctx.cluster.only(teuthology.is_type('client')).remotes - for rems, roles in remotes.iteritems(): - for whole_id_ in roles: - if whole_id_ in api_clients: - id_ = whole_id_[len('clients'):] - run_cmd = [ - 'sudo', - 'daemon-helper', - 'kill', - 'ceph-rest-api', - '-n', - 'client.rest{id}'.format(id=id_), ] - cl_rest_id = 'client.rest{id}'.format(id=id_) - ctx.daemons.add_daemon(rems, 'restapi', - cl_rest_id, - args=run_cmd, - logger=log.getChild(cl_rest_id), - stdin=run.PIPE, - wait=False, - ) - for i in range(1, 12): - log.info('testing for ceph-rest-api try {0}'.format(i)) - run_cmd = [ - 'wget', - '-O', - '/dev/null', - '-q', - 'http://localhost:5000/api/v0.1/status' - ] - proc = rems.run( - args=run_cmd, - check_status=False - ) - if proc.exitstatus == 0: - break - time.sleep(5) - if proc.exitstatus != 0: - raise RuntimeError('Cannot contact ceph-rest-api') - try: - yield - - finally: - """ - TO DO: destroy daemons started -- modify iter_daemons_of_role - """ - teuthology.stop_daemons_of_type(ctx, 'restapi') - -@contextlib.contextmanager -def task(ctx, config): - """ - Start up rest-api. - - To start on on all clients:: - - tasks: - - ceph: - - rest-api: - - To only run on certain clients:: - - tasks: - - ceph: - - rest-api: [client.0, client.3] - - or - - tasks: - - ceph: - - rest-api: - client.0: - client.3: - - The general flow of things here is: - 1. Find clients on which rest-api is supposed to run (api_clients) - 2. Generate keyring values - 3. Start up ceph-rest-api daemons - On cleanup: - 4. Stop the daemons - 5. Delete keyring value files. - """ - api_clients = [] - remotes = ctx.cluster.only(teuthology.is_type('client')).remotes - log.info(remotes) - if config == None: - api_clients = ['client.{id}'.format(id=id_) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] - else: - api_clients = config - log.info(api_clients) - testdir = teuthology.get_testdir(ctx) - coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) - for rems, roles in remotes.iteritems(): - for whole_id_ in roles: - if whole_id_ in api_clients: - id_ = whole_id_[len('client.'):] - keyring = '/etc/ceph/ceph.client.rest{id}.keyring'.format( - id=id_) - rems.run( - args=[ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - coverage_dir, - 'ceph-authtool', - '--create-keyring', - '--gen-key', - '--name=client.rest{id}'.format(id=id_), - '--set-uid=0', - '--cap', 'mon', 'allow *', - '--cap', 'osd', 'allow *', - '--cap', 'mds', 'allow', - keyring, - run.Raw('&&'), - 'sudo', - 'chmod', - '0644', - keyring, - ], - ) - rems.run( - args=[ - 'sudo', - 'sh', - '-c', - run.Raw("'"), - "echo", - '[client.rest{id}]'.format(id=id_), - run.Raw('>>'), - "/etc/ceph/ceph.conf", - run.Raw("'") - ] - ) - rems.run( - args=[ - 'sudo', - 'sh', - '-c', - run.Raw("'"), - 'echo', - 'restapi', - 'keyring', - '=', - '/etc/ceph/ceph.client.rest{id}.keyring'.format(id=id_), - run.Raw('>>'), - '/etc/ceph/ceph.conf', - run.Raw("'"), - ] - ) - rems.run( - args=[ - 'ceph', - 'auth', - 'import', - '-i', - '/etc/ceph/ceph.client.rest{id}.keyring'.format(id=id_), - ] - ) - with contextutil.nested( - lambda: run_rest_api_daemon(ctx=ctx, api_clients=api_clients),): - yield - diff --git a/tasks/restart.py b/tasks/restart.py deleted file mode 100644 index 697345a975b..00000000000 --- a/tasks/restart.py +++ /dev/null @@ -1,163 +0,0 @@ -""" -Daemon restart -""" -import logging -import pipes - -from teuthology import misc as teuthology -from teuthology.orchestra import run as tor - -from teuthology.orchestra import run -log = logging.getLogger(__name__) - -def restart_daemon(ctx, config, role, id_, *args): - """ - Handle restart (including the execution of the command parameters passed) - """ - log.info('Restarting {r}.{i} daemon...'.format(r=role, i=id_)) - daemon = ctx.daemons.get_daemon(role, id_) - log.debug('Waiting for exit of {r}.{i} daemon...'.format(r=role, i=id_)) - try: - daemon.wait_for_exit() - except tor.CommandFailedError as e: - log.debug('Command Failed: {e}'.format(e=e)) - if len(args) > 0: - confargs = ['--{k}={v}'.format(k=k, v=v) for k,v in zip(args[0::2], args[1::2])] - log.debug('Doing restart of {r}.{i} daemon with args: {a}...'.format(r=role, i=id_, a=confargs)) - daemon.restart_with_args(confargs) - else: - log.debug('Doing restart of {r}.{i} daemon...'.format(r=role, i=id_)) - daemon.restart() - -def get_tests(ctx, config, role, remote, testdir): - """Download restart tests""" - srcdir = '{tdir}/restart.{role}'.format(tdir=testdir, role=role) - - refspec = config.get('branch') - if refspec is None: - refspec = config.get('sha1') - if refspec is None: - refspec = config.get('tag') - if refspec is None: - refspec = 'HEAD' - log.info('Pulling restart qa/workunits from ref %s', refspec) - - remote.run( - logger=log.getChild(role), - args=[ - 'mkdir', '--', srcdir, - run.Raw('&&'), - 'git', - 'archive', - '--remote=git://git.ceph.com/ceph.git', - '%s:qa/workunits' % refspec, - run.Raw('|'), - 'tar', - '-C', srcdir, - '-x', - '-f-', - run.Raw('&&'), - 'cd', '--', srcdir, - run.Raw('&&'), - 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', - run.Raw('&&'), - 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir), - run.Raw('>{tdir}/restarts.list'.format(tdir=testdir)), - ], - ) - restarts = sorted(teuthology.get_file( - remote, - '{tdir}/restarts.list'.format(tdir=testdir)).split('\0')) - return (srcdir, restarts) - -def task(ctx, config): - """ - Execute commands and allow daemon restart with config options. - Each process executed can output to stdout restart commands of the form: - restart - This will restart the daemon . with the specified config values once - by modifying the conf file with those values, and then replacing the old conf file - once the daemon is restarted. - This task does not kill a running daemon, it assumes the daemon will abort on an - assert specified in the config. - - tasks: - - install: - - ceph: - - restart: - exec: - client.0: - - test_backtraces.py - - """ - assert isinstance(config, dict), "task kill got invalid config" - - testdir = teuthology.get_testdir(ctx) - - try: - assert 'exec' in config, "config requires exec key with : entries" - for role, task in config['exec'].iteritems(): - log.info('restart for role {r}'.format(r=role)) - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - srcdir, restarts = get_tests(ctx, config, role, remote, testdir) - log.info('Running command on role %s host %s', role, remote.name) - spec = '{spec}'.format(spec=task[0]) - log.info('Restarts list: %s', restarts) - log.info('Spec is %s', spec) - to_run = [w for w in restarts if w == task or w.find(spec) != -1] - log.info('To run: %s', to_run) - for c in to_run: - log.info('Running restart script %s...', c) - args = [ - run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), - ] - env = config.get('env') - if env is not None: - for var, val in env.iteritems(): - quoted_val = pipes.quote(val) - env_arg = '{var}={val}'.format(var=var, val=quoted_val) - args.append(run.Raw(env_arg)) - args.extend([ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - '{srcdir}/{c}'.format( - srcdir=srcdir, - c=c, - ), - ]) - proc = remote.run( - args=args, - stdout=tor.PIPE, - stdin=tor.PIPE, - stderr=log, - wait=False, - ) - log.info('waiting for a command from script') - while True: - l = proc.stdout.readline() - if not l or l == '': - break - log.debug('script command: {c}'.format(c=l)) - ll = l.strip() - cmd = ll.split(' ') - if cmd[0] == "done": - break - assert cmd[0] == 'restart', "script sent invalid command request to kill task" - # cmd should be: restart - # or to clear, just: restart - restart_daemon(ctx, config, cmd[1], cmd[2], *cmd[3:]) - proc.stdin.writelines(['restarted\n']) - proc.stdin.flush() - try: - proc.wait() - except tor.CommandFailedError: - raise Exception('restart task got non-zero exit status from script: {s}'.format(s=c)) - finally: - log.info('Finishing %s on %s...', task, role) - remote.run( - logger=log.getChild(role), - args=[ - 'rm', '-rf', '--', '{tdir}/restarts.list'.format(tdir=testdir), srcdir, - ], - ) diff --git a/tasks/rgw.py b/tasks/rgw.py deleted file mode 100644 index 591a8a8cc68..00000000000 --- a/tasks/rgw.py +++ /dev/null @@ -1,846 +0,0 @@ -""" -rgw routines -""" -import argparse -import contextlib -import json -import logging -import os - -from cStringIO import StringIO - -from teuthology.orchestra import run -from teuthology import misc as teuthology -from teuthology import contextutil -from teuthology.orchestra.run import CommandFailedError -from util.rgw import rgwadmin -from util.rados import (rados, create_ec_pool, - create_replicated_pool, - create_cache_pool) - -log = logging.getLogger(__name__) - - -@contextlib.contextmanager -def create_apache_dirs(ctx, config): - """ - Remotely create apache directories. Delete when finished. - """ - log.info('Creating apache directories...') - testdir = teuthology.get_testdir(ctx) - for client in config.iterkeys(): - ctx.cluster.only(client).run( - args=[ - 'mkdir', - '-p', - '{tdir}/apache/htdocs.{client}'.format(tdir=testdir, - client=client), - '{tdir}/apache/tmp.{client}/fastcgi_sock'.format( - tdir=testdir, - client=client), - run.Raw('&&'), - 'mkdir', - '{tdir}/archive/apache.{client}'.format(tdir=testdir, - client=client), - ], - ) - try: - yield - finally: - log.info('Cleaning up apache directories...') - for client in config.iterkeys(): - ctx.cluster.only(client).run( - args=[ - 'rm', - '-rf', - '{tdir}/apache/tmp.{client}'.format(tdir=testdir, - client=client), - run.Raw('&&'), - 'rmdir', - '{tdir}/apache/htdocs.{client}'.format(tdir=testdir, - client=client), - ], - ) - - for client in config.iterkeys(): - ctx.cluster.only(client).run( - args=[ - 'rmdir', - '{tdir}/apache'.format(tdir=testdir), - ], - check_status=False, # only need to remove once per host - ) - - -@contextlib.contextmanager -def ship_apache_configs(ctx, config, role_endpoints): - """ - Ship apache config and rgw.fgci to all clients. Clean up on termination - """ - assert isinstance(config, dict) - assert isinstance(role_endpoints, dict) - testdir = teuthology.get_testdir(ctx) - log.info('Shipping apache config and rgw.fcgi...') - src = os.path.join(os.path.dirname(__file__), 'apache.conf.template') - for client, conf in config.iteritems(): - (remote,) = ctx.cluster.only(client).remotes.keys() - system_type = teuthology.get_system_type(remote) - if not conf: - conf = {} - idle_timeout = conf.get('idle_timeout', ctx.rgw.default_idle_timeout) - if system_type == 'deb': - mod_path = '/usr/lib/apache2/modules' - print_continue = 'on' - user = 'www-data' - group = 'www-data' - apache24_modconfig = ''' - IncludeOptional /etc/apache2/mods-available/mpm_event.conf - IncludeOptional /etc/apache2/mods-available/mpm_event.load -''' - else: - mod_path = '/usr/lib64/httpd/modules' - print_continue = 'off' - user = 'apache' - group = 'apache' - apache24_modconfig = \ - 'IncludeOptional /etc/httpd/conf.modules.d/00-mpm.conf' - host, port = role_endpoints[client] - with file(src, 'rb') as f: - conf = f.read().format( - testdir=testdir, - mod_path=mod_path, - print_continue=print_continue, - host=host, - port=port, - client=client, - idle_timeout=idle_timeout, - user=user, - group=group, - apache24_modconfig=apache24_modconfig, - ) - teuthology.write_file( - remote=remote, - path='{tdir}/apache/apache.{client}.conf'.format( - tdir=testdir, - client=client), - data=conf, - ) - teuthology.write_file( - remote=remote, - path='{tdir}/apache/htdocs.{client}/rgw.fcgi'.format( - tdir=testdir, - client=client), - data="""#!/bin/sh -ulimit -c unlimited -exec radosgw -f -n {client} -k /etc/ceph/ceph.{client}.keyring --rgw-socket-path {tdir}/apache/tmp.{client}/fastcgi_sock/rgw_sock - -""".format(tdir=testdir, client=client) - ) - remote.run( - args=[ - 'chmod', - 'a=rx', - '{tdir}/apache/htdocs.{client}/rgw.fcgi'.format(tdir=testdir, - client=client), - ], - ) - try: - yield - finally: - log.info('Removing apache config...') - for client in config.iterkeys(): - ctx.cluster.only(client).run( - args=[ - 'rm', - '-f', - '{tdir}/apache/apache.{client}.conf'.format(tdir=testdir, - client=client), - run.Raw('&&'), - 'rm', - '-f', - '{tdir}/apache/htdocs.{client}/rgw.fcgi'.format( - tdir=testdir, - client=client), - ], - ) - - -@contextlib.contextmanager -def start_rgw(ctx, config): - """ - Start rgw on remote sites. - """ - log.info('Starting rgw...') - testdir = teuthology.get_testdir(ctx) - for client in config.iterkeys(): - (remote,) = ctx.cluster.only(client).remotes.iterkeys() - - client_config = config.get(client) - if client_config is None: - client_config = {} - log.info("rgw %s config is %s", client, client_config) - id_ = client.split('.', 1)[1] - log.info('client {client} is id {id}'.format(client=client, id=id_)) - cmd_prefix = [ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'daemon-helper', - 'term', - ] - - rgw_cmd = ['radosgw'] - - if ctx.rgw.frontend == 'apache': - rgw_cmd.extend([ - '--rgw-socket-path', - '{tdir}/apache/tmp.{client}/fastcgi_sock/rgw_sock'.format( - tdir=testdir, - client=client, - ), - ]) - elif ctx.rgw.frontend == 'civetweb': - host, port = ctx.rgw.role_endpoints[client] - rgw_cmd.extend([ - '--rgw-frontends', - 'civetweb port={port}'.format(port=port), - ]) - - rgw_cmd.extend([ - '-n', client, - '-k', '/etc/ceph/ceph.{client}.keyring'.format(client=client), - '--log-file', - '/var/log/ceph/rgw.{client}.log'.format(client=client), - '--rgw_ops_log_socket_path', - '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, - client=client), - '--foreground', - run.Raw('|'), - 'sudo', - 'tee', - '/var/log/ceph/rgw.{client}.stdout'.format(tdir=testdir, - client=client), - run.Raw('2>&1'), - ]) - - if client_config.get('valgrind'): - cmd_prefix = teuthology.get_valgrind_args( - testdir, - client, - cmd_prefix, - client_config.get('valgrind') - ) - - run_cmd = list(cmd_prefix) - run_cmd.extend(rgw_cmd) - - ctx.daemons.add_daemon( - remote, 'rgw', client, - args=run_cmd, - logger=log.getChild(client), - stdin=run.PIPE, - wait=False, - ) - - try: - yield - finally: - teuthology.stop_daemons_of_type(ctx, 'rgw') - for client in config.iterkeys(): - ctx.cluster.only(client).run( - args=[ - 'rm', - '-f', - '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, - client=client), - ], - ) - - -@contextlib.contextmanager -def start_apache(ctx, config): - """ - Start apache on remote sites. - """ - log.info('Starting apache...') - testdir = teuthology.get_testdir(ctx) - apaches = {} - for client in config.iterkeys(): - (remote,) = ctx.cluster.only(client).remotes.keys() - system_type = teuthology.get_system_type(remote) - if system_type == 'deb': - apache_name = 'apache2' - else: - try: - remote.run( - args=[ - 'stat', - '/usr/sbin/httpd.worker', - ], - ) - apache_name = '/usr/sbin/httpd.worker' - except CommandFailedError: - apache_name = '/usr/sbin/httpd' - - proc = remote.run( - args=[ - 'adjust-ulimits', - 'daemon-helper', - 'kill', - apache_name, - '-X', - '-f', - '{tdir}/apache/apache.{client}.conf'.format(tdir=testdir, - client=client), - ], - logger=log.getChild(client), - stdin=run.PIPE, - wait=False, - ) - apaches[client] = proc - - try: - yield - finally: - log.info('Stopping apache...') - for client, proc in apaches.iteritems(): - proc.stdin.close() - - run.wait(apaches.itervalues()) - - -def extract_user_info(client_config): - """ - Extract user info from the client config specified. Returns a dict - that includes system key information. - """ - # test if there isn't a system user or if there isn't a name for that - # user, return None - if ('system user' not in client_config or - 'name' not in client_config['system user']): - return None - - user_info = dict() - user_info['system_key'] = dict( - user=client_config['system user']['name'], - access_key=client_config['system user']['access key'], - secret_key=client_config['system user']['secret key'], - ) - return user_info - - -def extract_zone_info(ctx, client, client_config): - """ - Get zone information. - :param client: dictionary of client information - :param client_config: dictionary of client configuration information - :returns: zone extracted from client and client_config information - """ - ceph_config = ctx.ceph.conf.get('global', {}) - ceph_config.update(ctx.ceph.conf.get('client', {})) - ceph_config.update(ctx.ceph.conf.get(client, {})) - for key in ['rgw zone', 'rgw region', 'rgw zone root pool']: - assert key in ceph_config, \ - 'ceph conf must contain {key} for {client}'.format(key=key, - client=client) - region = ceph_config['rgw region'] - zone = ceph_config['rgw zone'] - zone_info = dict() - for key in ['rgw control pool', 'rgw gc pool', 'rgw log pool', - 'rgw intent log pool', 'rgw usage log pool', - 'rgw user keys pool', 'rgw user email pool', - 'rgw user swift pool', 'rgw user uid pool', - 'rgw domain root']: - new_key = key.split(' ', 1)[1] - new_key = new_key.replace(' ', '_') - - if key in ceph_config: - value = ceph_config[key] - log.debug('{key} specified in ceph_config ({val})'.format( - key=key, val=value)) - zone_info[new_key] = value - else: - zone_info[new_key] = '.' + region + '.' + zone + '.' + new_key - - index_pool = '.' + region + '.' + zone + '.' + 'index_pool' - data_pool = '.' + region + '.' + zone + '.' + 'data_pool' - data_extra_pool = '.' + region + '.' + zone + '.' + 'data_extra_pool' - - zone_info['placement_pools'] = [{'key': 'default_placement', - 'val': {'index_pool': index_pool, - 'data_pool': data_pool, - 'data_extra_pool': data_extra_pool} - }] - - # these keys are meant for the zones argument in the region info. We - # insert them into zone_info with a different format and then remove them - # in the fill_in_endpoints() method - for key in ['rgw log meta', 'rgw log data']: - if key in ceph_config: - zone_info[key] = ceph_config[key] - - # these keys are meant for the zones argument in the region info. We - # insert them into zone_info with a different format and then remove them - # in the fill_in_endpoints() method - for key in ['rgw log meta', 'rgw log data']: - if key in ceph_config: - zone_info[key] = ceph_config[key] - - return region, zone, zone_info - - -def extract_region_info(region, region_info): - """ - Extract region information from the region_info parameter, using get - to set default values. - - :param region: name of the region - :param region_info: region information (in dictionary form). - :returns: dictionary of region information set from region_info, using - default values for missing fields. - """ - assert isinstance(region_info['zones'], list) and region_info['zones'], \ - 'zones must be a non-empty list' - return dict( - name=region, - api_name=region_info.get('api name', region), - is_master=region_info.get('is master', False), - log_meta=region_info.get('log meta', False), - log_data=region_info.get('log data', False), - master_zone=region_info.get('master zone', region_info['zones'][0]), - placement_targets=region_info.get('placement targets', - [{'name': 'default_placement', - 'tags': []}]), - default_placement=region_info.get('default placement', - 'default_placement'), - ) - - -def assign_ports(ctx, config): - """ - Assign port numberst starting with port 7280. - """ - port = 7280 - role_endpoints = {} - for remote, roles_for_host in ctx.cluster.remotes.iteritems(): - for role in roles_for_host: - if role in config: - role_endpoints[role] = (remote.name.split('@')[1], port) - port += 1 - - return role_endpoints - - -def fill_in_endpoints(region_info, role_zones, role_endpoints): - """ - Iterate through the list of role_endpoints, filling in zone information - - :param region_info: region data - :param role_zones: region and zone information. - :param role_endpoints: endpoints being used - """ - for role, (host, port) in role_endpoints.iteritems(): - region, zone, zone_info, _ = role_zones[role] - host, port = role_endpoints[role] - endpoint = 'http://{host}:{port}/'.format(host=host, port=port) - # check if the region specified under client actually exists - # in region_info (it should, if properly configured). - # If not, throw a reasonable error - if region not in region_info: - raise Exception( - 'Region: {region} was specified but no corresponding' - ' entry was found under \'regions\''.format(region=region)) - - region_conf = region_info[region] - region_conf.setdefault('endpoints', []) - region_conf['endpoints'].append(endpoint) - - # this is the payload for the 'zones' field in the region field - zone_payload = dict() - zone_payload['endpoints'] = [endpoint] - zone_payload['name'] = zone - - # Pull the log meta and log data settings out of zone_info, if they - # exist, then pop them as they don't actually belong in the zone info - for key in ['rgw log meta', 'rgw log data']: - new_key = key.split(' ', 1)[1] - new_key = new_key.replace(' ', '_') - - if key in zone_info: - value = zone_info.pop(key) - else: - value = 'false' - - zone_payload[new_key] = value - - region_conf.setdefault('zones', []) - region_conf['zones'].append(zone_payload) - - -@contextlib.contextmanager -def configure_users(ctx, config, everywhere=False): - """ - Create users by remotely running rgwadmin commands using extracted - user information. - """ - log.info('Configuring users...') - - # extract the user info and append it to the payload tuple for the given - # client - for client, c_config in config.iteritems(): - if not c_config: - continue - user_info = extract_user_info(c_config) - if not user_info: - continue - - # For data sync the master zones and regions must have the - # system users of the secondary zones. To keep this simple, - # just create the system users on every client if regions are - # configured. - clients_to_create_as = [client] - if everywhere: - clients_to_create_as = config.keys() - for client_name in clients_to_create_as: - log.debug('Creating user {user} on {client}'.format( - user=user_info['system_key']['user'], client=client)) - rgwadmin(ctx, client_name, - cmd=[ - 'user', 'create', - '--uid', user_info['system_key']['user'], - '--access-key', user_info['system_key']['access_key'], - '--secret', user_info['system_key']['secret_key'], - '--display-name', user_info['system_key']['user'], - '--system', - ], - check_status=True, - ) - - yield - - -@contextlib.contextmanager -def create_nonregion_pools(ctx, config, regions): - """Create replicated or erasure coded data pools for rgw.""" - if regions: - yield - return - - log.info('creating data pools') - for client in config.keys(): - (remote,) = ctx.cluster.only(client).remotes.iterkeys() - data_pool = '.rgw.buckets' - if ctx.rgw.ec_data_pool: - create_ec_pool(remote, data_pool, client, 64, - ctx.rgw.erasure_code_profile) - else: - create_replicated_pool(remote, data_pool, 64) - if ctx.rgw.cache_pools: - create_cache_pool(remote, data_pool, data_pool + '.cache', 64, - 64*1024*1024) - yield - - -@contextlib.contextmanager -def configure_regions_and_zones(ctx, config, regions, role_endpoints): - """ - Configure regions and zones from rados and rgw. - """ - if not regions: - log.debug( - 'In rgw.configure_regions_and_zones() and regions is None. ' - 'Bailing') - yield - return - - log.info('Configuring regions and zones...') - - log.debug('config is %r', config) - log.debug('regions are %r', regions) - log.debug('role_endpoints = %r', role_endpoints) - # extract the zone info - role_zones = dict([(client, extract_zone_info(ctx, client, c_config)) - for client, c_config in config.iteritems()]) - log.debug('roles_zones = %r', role_zones) - - # extract the user info and append it to the payload tuple for the given - # client - for client, c_config in config.iteritems(): - if not c_config: - user_info = None - else: - user_info = extract_user_info(c_config) - - (region, zone, zone_info) = role_zones[client] - role_zones[client] = (region, zone, zone_info, user_info) - - region_info = dict([ - (region_name, extract_region_info(region_name, r_config)) - for region_name, r_config in regions.iteritems()]) - - fill_in_endpoints(region_info, role_zones, role_endpoints) - - # clear out the old defaults - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - # removing these objects from .rgw.root and the per-zone root pools - # may or may not matter - rados(ctx, mon, - cmd=['-p', '.rgw.root', 'rm', 'region_info.default']) - rados(ctx, mon, - cmd=['-p', '.rgw.root', 'rm', 'zone_info.default']) - - for client in config.iterkeys(): - for role, (_, zone, zone_info, user_info) in role_zones.iteritems(): - rados(ctx, mon, - cmd=['-p', zone_info['domain_root'], - 'rm', 'region_info.default']) - rados(ctx, mon, - cmd=['-p', zone_info['domain_root'], - 'rm', 'zone_info.default']) - - (remote,) = ctx.cluster.only(role).remotes.keys() - for pool_info in zone_info['placement_pools']: - remote.run(args=['ceph', 'osd', 'pool', 'create', - pool_info['val']['index_pool'], '64', '64']) - if ctx.rgw.ec_data_pool: - create_ec_pool(remote, pool_info['val']['data_pool'], - zone, 64, ctx.rgw.erasure_code_profile) - else: - create_replicated_pool( - remote, pool_info['val']['data_pool'], - 64) - - rgwadmin(ctx, client, - cmd=['-n', client, 'zone', 'set', '--rgw-zone', zone], - stdin=StringIO(json.dumps(dict( - zone_info.items() + user_info.items()))), - check_status=True) - - for region, info in region_info.iteritems(): - region_json = json.dumps(info) - log.debug('region info is: %s', region_json) - rgwadmin(ctx, client, - cmd=['-n', client, 'region', 'set'], - stdin=StringIO(region_json), - check_status=True) - if info['is_master']: - rgwadmin(ctx, client, - cmd=['-n', client, - 'region', 'default', - '--rgw-region', region], - check_status=True) - - rgwadmin(ctx, client, cmd=['-n', client, 'regionmap', 'update']) - yield - - -@contextlib.contextmanager -def task(ctx, config): - """ - Either use configure apache to run a rados gateway, or use the built-in - civetweb server. - Only one should be run per machine, since it uses a hard-coded port for - now. - - For example, to run rgw on all clients:: - - tasks: - - ceph: - - rgw: - - To only run on certain clients:: - - tasks: - - ceph: - - rgw: [client.0, client.3] - - or - - tasks: - - ceph: - - rgw: - client.0: - client.3: - - You can adjust the idle timeout for fastcgi (default is 30 seconds): - - tasks: - - ceph: - - rgw: - client.0: - idle_timeout: 90 - - To run radosgw through valgrind: - - tasks: - - ceph: - - rgw: - client.0: - valgrind: [--tool=memcheck] - client.3: - valgrind: [--tool=memcheck] - - To use civetweb instead of apache: - - tasks: - - ceph: - - rgw: - - client.0 - overrides: - rgw: - frontend: civetweb - - Note that without a modified fastcgi module e.g. with the default - one on CentOS, you must have rgw print continue = false in ceph.conf:: - - tasks: - - ceph: - conf: - global: - rgw print continue: false - - rgw: [client.0] - - To run rgws for multiple regions or zones, describe the regions - and their zones in a regions section. The endpoints will be - generated by this task. Each client must have a region, zone, - and pools assigned in ceph.conf:: - - tasks: - - install: - - ceph: - conf: - client.0: - rgw region: foo - rgw zone: foo-1 - rgw region root pool: .rgw.rroot.foo - rgw zone root pool: .rgw.zroot.foo - rgw log meta: true - rgw log data: true - client.1: - rgw region: bar - rgw zone: bar-master - rgw region root pool: .rgw.rroot.bar - rgw zone root pool: .rgw.zroot.bar - rgw log meta: true - rgw log data: true - client.2: - rgw region: bar - rgw zone: bar-secondary - rgw region root pool: .rgw.rroot.bar - rgw zone root pool: .rgw.zroot.bar-secondary - - rgw: - default_idle_timeout: 30 - ec-data-pool: true - erasure_code_profile: - k: 2 - m: 1 - ruleset-failure-domain: osd - regions: - foo: - api name: api_name # default: region name - is master: true # default: false - master zone: foo-1 # default: first zone - zones: [foo-1] - log meta: true - log data: true - placement targets: [target1, target2] # default: [] - default placement: target2 # default: '' - bar: - api name: bar-api - zones: [bar-master, bar-secondary] - client.0: - system user: - name: foo-system - access key: X2IYPSTY1072DDY1SJMC - secret key: YIMHICpPvT+MhLTbSsiBJ1jQF15IFvJA8tgwJEcm - client.1: - system user: - name: bar1 - access key: Y2IYPSTY1072DDY1SJMC - secret key: XIMHICpPvT+MhLTbSsiBJ1jQF15IFvJA8tgwJEcm - client.2: - system user: - name: bar2 - access key: Z2IYPSTY1072DDY1SJMC - secret key: ZIMHICpPvT+MhLTbSsiBJ1jQF15IFvJA8tgwJEcm - """ - if config is None: - config = dict(('client.{id}'.format(id=id_), None) - for id_ in teuthology.all_roles_of_type( - ctx.cluster, 'client')) - elif isinstance(config, list): - config = dict((name, None) for name in config) - - overrides = ctx.config.get('overrides', {}) - teuthology.deep_merge(config, overrides.get('rgw', {})) - - regions = {} - if 'regions' in config: - # separate region info so only clients are keys in config - regions = config['regions'] - del config['regions'] - - role_endpoints = assign_ports(ctx, config) - ctx.rgw = argparse.Namespace() - ctx.rgw.role_endpoints = role_endpoints - # stash the region info for later, since it was deleted from the config - # structure - ctx.rgw.regions = regions - - ctx.rgw.ec_data_pool = False - if 'ec-data-pool' in config: - ctx.rgw.ec_data_pool = bool(config['ec-data-pool']) - del config['ec-data-pool'] - ctx.rgw.erasure_code_profile = {} - if 'erasure_code_profile' in config: - ctx.rgw.erasure_code_profile = config['erasure_code_profile'] - del config['erasure_code_profile'] - ctx.rgw.default_idle_timeout = 30 - if 'default_idle_timeout' in config: - ctx.rgw.default_idle_timeout = int(config['default_idle_timeout']) - del config['default_idle_timeout'] - ctx.rgw.cache_pools = False - if 'cache-pools' in config: - ctx.rgw.cache_pools = bool(config['cache-pools']) - del config['cache-pools'] - - ctx.rgw.frontend = 'apache' - if 'frontend' in config: - ctx.rgw.frontend = config['frontend'] - del config['frontend'] - - subtasks = [ - lambda: configure_regions_and_zones( - ctx=ctx, - config=config, - regions=regions, - role_endpoints=role_endpoints, - ), - lambda: configure_users( - ctx=ctx, - config=config, - everywhere=bool(regions), - ), - lambda: create_nonregion_pools( - ctx=ctx, config=config, regions=regions), - ] - if ctx.rgw.frontend == 'apache': - subtasks.insert(0, lambda: create_apache_dirs(ctx=ctx, config=config)) - subtasks.extend([ - lambda: ship_apache_configs(ctx=ctx, config=config, - role_endpoints=role_endpoints), - lambda: start_rgw(ctx=ctx, config=config), - lambda: start_apache(ctx=ctx, config=config), - ]) - elif ctx.rgw.frontend == 'civetweb': - subtasks.extend([ - lambda: start_rgw(ctx=ctx, config=config), - ]) - else: - raise ValueError("frontend must be 'apache' or 'civetweb'") - - log.info("Using %s as radosgw frontend", ctx.rgw.frontend) - with contextutil.nested(*subtasks): - yield diff --git a/tasks/rgw_logsocket.py b/tasks/rgw_logsocket.py deleted file mode 100644 index 6f49b00d8a4..00000000000 --- a/tasks/rgw_logsocket.py +++ /dev/null @@ -1,161 +0,0 @@ -""" -rgw s3tests logging wrappers -""" -from cStringIO import StringIO -from configobj import ConfigObj -import contextlib -import logging -import s3tests - -from teuthology import misc as teuthology -from teuthology import contextutil - -log = logging.getLogger(__name__) - - -@contextlib.contextmanager -def download(ctx, config): - """ - Run s3tests download function - """ - return s3tests.download(ctx, config) - -def _config_user(s3tests_conf, section, user): - """ - Run s3tests user config function - """ - return s3tests._config_user(s3tests_conf, section, user) - -@contextlib.contextmanager -def create_users(ctx, config): - """ - Run s3tests user create function - """ - return s3tests.create_users(ctx, config) - -@contextlib.contextmanager -def configure(ctx, config): - """ - Run s3tests user configure function - """ - return s3tests.configure(ctx, config) - -@contextlib.contextmanager -def run_tests(ctx, config): - """ - Run remote netcat tests - """ - assert isinstance(config, dict) - testdir = teuthology.get_testdir(ctx) - for client, client_config in config.iteritems(): - client_config['extra_args'] = [ - 's3tests.functional.test_s3:test_bucket_list_return_data', - ] -# args = [ -# 'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), -# '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir), -# '-w', -# '{tdir}/s3-tests'.format(tdir=testdir), -# '-v', -# 's3tests.functional.test_s3:test_bucket_list_return_data', -# ] -# if client_config is not None and 'extra_args' in client_config: -# args.extend(client_config['extra_args']) -# -# ctx.cluster.only(client).run( -# args=args, -# ) - - s3tests.run_tests(ctx, config) - - netcat_out = StringIO() - - for client, client_config in config.iteritems(): - ctx.cluster.only(client).run( - args = [ - 'netcat', - '-w', '5', - '-U', '{tdir}/rgw.opslog.sock'.format(tdir=testdir), - ], - stdout = netcat_out, - ) - - out = netcat_out.getvalue() - - assert len(out) > 100 - - log.info('Received', out) - - yield - - -@contextlib.contextmanager -def task(ctx, config): - """ - Run some s3-tests suite against rgw, verify opslog socket returns data - - Must restrict testing to a particular client:: - - tasks: - - ceph: - - rgw: [client.0] - - s3tests: [client.0] - - To pass extra arguments to nose (e.g. to run a certain test):: - - tasks: - - ceph: - - rgw: [client.0] - - s3tests: - client.0: - extra_args: ['test_s3:test_object_acl_grand_public_read'] - client.1: - extra_args: ['--exclude', 'test_100_continue'] - """ - assert config is None or isinstance(config, list) \ - or isinstance(config, dict), \ - "task s3tests only supports a list or dictionary for configuration" - all_clients = ['client.{id}'.format(id=id_) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] - if config is None: - config = all_clients - if isinstance(config, list): - config = dict.fromkeys(config) - clients = config.keys() - - overrides = ctx.config.get('overrides', {}) - # merge each client section, not the top level. - for (client, cconf) in config.iteritems(): - teuthology.deep_merge(cconf, overrides.get('rgw-logsocket', {})) - - log.debug('config is %s', config) - - s3tests_conf = {} - for client in clients: - s3tests_conf[client] = ConfigObj( - indent_type='', - infile={ - 'DEFAULT': - { - 'port' : 7280, - 'is_secure' : 'no', - }, - 'fixtures' : {}, - 's3 main' : {}, - 's3 alt' : {}, - } - ) - - with contextutil.nested( - lambda: download(ctx=ctx, config=config), - lambda: create_users(ctx=ctx, config=dict( - clients=clients, - s3tests_conf=s3tests_conf, - )), - lambda: configure(ctx=ctx, config=dict( - clients=config, - s3tests_conf=s3tests_conf, - )), - lambda: run_tests(ctx=ctx, config=config), - ): - yield diff --git a/tasks/s3readwrite.py b/tasks/s3readwrite.py deleted file mode 100644 index 9f1507ef816..00000000000 --- a/tasks/s3readwrite.py +++ /dev/null @@ -1,346 +0,0 @@ -""" -Run rgw s3 readwite tests -""" -from cStringIO import StringIO -import base64 -import contextlib -import logging -import os -import random -import string -import yaml - -from teuthology import misc as teuthology -from teuthology import contextutil -from teuthology.config import config as teuth_config -from teuthology.orchestra import run -from teuthology.orchestra.connection import split_user - -log = logging.getLogger(__name__) - - -@contextlib.contextmanager -def download(ctx, config): - """ - Download the s3 tests from the git builder. - Remove downloaded s3 file upon exit. - - The context passed in should be identical to the context - passed in to the main task. - """ - assert isinstance(config, dict) - log.info('Downloading s3-tests...') - testdir = teuthology.get_testdir(ctx) - for (client, cconf) in config.items(): - branch = cconf.get('force-branch', None) - if not branch: - branch = cconf.get('branch', 'master') - sha1 = cconf.get('sha1') - ctx.cluster.only(client).run( - args=[ - 'git', 'clone', - '-b', branch, - teuth_config.ceph_git_base_url + 's3-tests.git', - '{tdir}/s3-tests'.format(tdir=testdir), - ], - ) - if sha1 is not None: - ctx.cluster.only(client).run( - args=[ - 'cd', '{tdir}/s3-tests'.format(tdir=testdir), - run.Raw('&&'), - 'git', 'reset', '--hard', sha1, - ], - ) - try: - yield - finally: - log.info('Removing s3-tests...') - testdir = teuthology.get_testdir(ctx) - for client in config: - ctx.cluster.only(client).run( - args=[ - 'rm', - '-rf', - '{tdir}/s3-tests'.format(tdir=testdir), - ], - ) - - -def _config_user(s3tests_conf, section, user): - """ - Configure users for this section by stashing away keys, ids, and - email addresses. - """ - s3tests_conf[section].setdefault('user_id', user) - s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user)) - s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user)) - s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.uppercase) for i in xrange(20))) - s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40))) - -@contextlib.contextmanager -def create_users(ctx, config): - """ - Create a default s3 user. - """ - assert isinstance(config, dict) - log.info('Creating rgw users...') - testdir = teuthology.get_testdir(ctx) - users = {'s3': 'foo'} - cached_client_user_names = dict() - for client in config['clients']: - cached_client_user_names[client] = dict() - s3tests_conf = config['s3tests_conf'][client] - s3tests_conf.setdefault('readwrite', {}) - s3tests_conf['readwrite'].setdefault('bucket', 'rwtest-' + client + '-{random}-') - s3tests_conf['readwrite'].setdefault('readers', 10) - s3tests_conf['readwrite'].setdefault('writers', 3) - s3tests_conf['readwrite'].setdefault('duration', 300) - s3tests_conf['readwrite'].setdefault('files', {}) - rwconf = s3tests_conf['readwrite'] - rwconf['files'].setdefault('num', 10) - rwconf['files'].setdefault('size', 2000) - rwconf['files'].setdefault('stddev', 500) - for section, user in users.iteritems(): - _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) - log.debug('creating user {user} on {client}'.format(user=s3tests_conf[section]['user_id'], - client=client)) - - # stash the 'delete_user' flag along with user name for easier cleanup - delete_this_user = True - if 'delete_user' in s3tests_conf['s3']: - delete_this_user = s3tests_conf['s3']['delete_user'] - log.debug('delete_user set to {flag} for {client}'.format(flag=delete_this_user, client=client)) - cached_client_user_names[client][section+user] = (s3tests_conf[section]['user_id'], delete_this_user) - - # skip actual user creation if the create_user flag is set to false for this client - if 'create_user' in s3tests_conf['s3'] and s3tests_conf['s3']['create_user'] == False: - log.debug('create_user set to False, skipping user creation for {client}'.format(client=client)) - continue - else: - ctx.cluster.only(client).run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'radosgw-admin', - '-n', client, - 'user', 'create', - '--uid', s3tests_conf[section]['user_id'], - '--display-name', s3tests_conf[section]['display_name'], - '--access-key', s3tests_conf[section]['access_key'], - '--secret', s3tests_conf[section]['secret_key'], - '--email', s3tests_conf[section]['email'], - ], - ) - try: - yield - finally: - for client in config['clients']: - for section, user in users.iteritems(): - #uid = '{user}.{client}'.format(user=user, client=client) - real_uid, delete_this_user = cached_client_user_names[client][section+user] - if delete_this_user: - ctx.cluster.only(client).run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'radosgw-admin', - '-n', client, - 'user', 'rm', - '--uid', real_uid, - '--purge-data', - ], - ) - else: - log.debug('skipping delete for user {uid} on {client}'.format(uid=real_uid, client=client)) - -@contextlib.contextmanager -def configure(ctx, config): - """ - Configure the s3-tests. This includes the running of the - bootstrap code and the updating of local conf files. - """ - assert isinstance(config, dict) - log.info('Configuring s3-readwrite-tests...') - for client, properties in config['clients'].iteritems(): - s3tests_conf = config['s3tests_conf'][client] - if properties is not None and 'rgw_server' in properties: - host = None - for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']): - log.info('roles: ' + str(roles)) - log.info('target: ' + str(target)) - if properties['rgw_server'] in roles: - _, host = split_user(target) - assert host is not None, "Invalid client specified as the rgw_server" - s3tests_conf['s3']['host'] = host - else: - s3tests_conf['s3']['host'] = 'localhost' - - def_conf = s3tests_conf['DEFAULT'] - s3tests_conf['s3'].setdefault('port', def_conf['port']) - s3tests_conf['s3'].setdefault('is_secure', def_conf['is_secure']) - - (remote,) = ctx.cluster.only(client).remotes.keys() - remote.run( - args=[ - 'cd', - '{tdir}/s3-tests'.format(tdir=teuthology.get_testdir(ctx)), - run.Raw('&&'), - './bootstrap', - ], - ) - conf_fp = StringIO() - conf = dict( - s3=s3tests_conf['s3'], - readwrite=s3tests_conf['readwrite'], - ) - yaml.safe_dump(conf, conf_fp, default_flow_style=False) - teuthology.write_file( - remote=remote, - path='{tdir}/archive/s3readwrite.{client}.config.yaml'.format(tdir=teuthology.get_testdir(ctx), client=client), - data=conf_fp.getvalue(), - ) - yield - - -@contextlib.contextmanager -def run_tests(ctx, config): - """ - Run the s3readwrite tests after everything is set up. - - :param ctx: Context passed to task - :param config: specific configuration information - """ - assert isinstance(config, dict) - testdir = teuthology.get_testdir(ctx) - for client, client_config in config.iteritems(): - (remote,) = ctx.cluster.only(client).remotes.keys() - conf = teuthology.get_file(remote, '{tdir}/archive/s3readwrite.{client}.config.yaml'.format(tdir=testdir, client=client)) - args = [ - '{tdir}/s3-tests/virtualenv/bin/s3tests-test-readwrite'.format(tdir=testdir), - ] - if client_config is not None and 'extra_args' in client_config: - args.extend(client_config['extra_args']) - - ctx.cluster.only(client).run( - args=args, - stdin=conf, - ) - yield - - -@contextlib.contextmanager -def task(ctx, config): - """ - Run the s3tests-test-readwrite suite against rgw. - - To run all tests on all clients:: - - tasks: - - ceph: - - rgw: - - s3readwrite: - - To restrict testing to particular clients:: - - tasks: - - ceph: - - rgw: [client.0] - - s3readwrite: [client.0] - - To run against a server on client.1:: - - tasks: - - ceph: - - rgw: [client.1] - - s3readwrite: - client.0: - rgw_server: client.1 - - To pass extra test arguments - - tasks: - - ceph: - - rgw: [client.0] - - s3readwrite: - client.0: - readwrite: - bucket: mybucket - readers: 10 - writers: 3 - duration: 600 - files: - num: 10 - size: 2000 - stddev: 500 - client.1: - ... - - To override s3 configuration - - tasks: - - ceph: - - rgw: [client.0] - - s3readwrite: - client.0: - s3: - user_id: myuserid - display_name: myname - email: my@email - access_key: myaccesskey - secret_key: mysecretkey - - """ - assert config is None or isinstance(config, list) \ - or isinstance(config, dict), \ - "task s3tests only supports a list or dictionary for configuration" - all_clients = ['client.{id}'.format(id=id_) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] - if config is None: - config = all_clients - if isinstance(config, list): - config = dict.fromkeys(config) - clients = config.keys() - - overrides = ctx.config.get('overrides', {}) - # merge each client section, not the top level. - for client in config.iterkeys(): - if not config[client]: - config[client] = {} - teuthology.deep_merge(config[client], overrides.get('s3readwrite', {})) - - log.debug('in s3readwrite, config is %s', config) - - s3tests_conf = {} - for client in clients: - if config[client] is None: - config[client] = {} - config[client].setdefault('s3', {}) - config[client].setdefault('readwrite', {}) - - s3tests_conf[client] = ({ - 'DEFAULT': - { - 'port' : 7280, - 'is_secure' : False, - }, - 'readwrite' : config[client]['readwrite'], - 's3' : config[client]['s3'], - }) - - with contextutil.nested( - lambda: download(ctx=ctx, config=config), - lambda: create_users(ctx=ctx, config=dict( - clients=clients, - s3tests_conf=s3tests_conf, - )), - lambda: configure(ctx=ctx, config=dict( - clients=config, - s3tests_conf=s3tests_conf, - )), - lambda: run_tests(ctx=ctx, config=config), - ): - pass - yield diff --git a/tasks/s3roundtrip.py b/tasks/s3roundtrip.py deleted file mode 100644 index 4c17144dbae..00000000000 --- a/tasks/s3roundtrip.py +++ /dev/null @@ -1,302 +0,0 @@ -""" -Run rgw roundtrip message tests -""" -from cStringIO import StringIO -import base64 -import contextlib -import logging -import os -import random -import string -import yaml - -from teuthology import misc as teuthology -from teuthology import contextutil -from teuthology.config import config as teuth_config -from teuthology.orchestra import run -from teuthology.orchestra.connection import split_user - -log = logging.getLogger(__name__) - - -@contextlib.contextmanager -def download(ctx, config): - """ - Download the s3 tests from the git builder. - Remove downloaded s3 file upon exit. - - The context passed in should be identical to the context - passed in to the main task. - """ - assert isinstance(config, list) - log.info('Downloading s3-tests...') - testdir = teuthology.get_testdir(ctx) - for client in config: - ctx.cluster.only(client).run( - args=[ - 'git', 'clone', - teuth_config.ceph_git_base_url + 's3-tests.git', - '{tdir}/s3-tests'.format(tdir=testdir), - ], - ) - try: - yield - finally: - log.info('Removing s3-tests...') - for client in config: - ctx.cluster.only(client).run( - args=[ - 'rm', - '-rf', - '{tdir}/s3-tests'.format(tdir=testdir), - ], - ) - -def _config_user(s3tests_conf, section, user): - """ - Configure users for this section by stashing away keys, ids, and - email addresses. - """ - s3tests_conf[section].setdefault('user_id', user) - s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user)) - s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user)) - s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.uppercase) for i in xrange(20))) - s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40))) - -@contextlib.contextmanager -def create_users(ctx, config): - """ - Create a default s3 user. - """ - assert isinstance(config, dict) - log.info('Creating rgw users...') - testdir = teuthology.get_testdir(ctx) - users = {'s3': 'foo'} - for client in config['clients']: - s3tests_conf = config['s3tests_conf'][client] - s3tests_conf.setdefault('roundtrip', {}) - s3tests_conf['roundtrip'].setdefault('bucket', 'rttest-' + client + '-{random}-') - s3tests_conf['roundtrip'].setdefault('readers', 10) - s3tests_conf['roundtrip'].setdefault('writers', 3) - s3tests_conf['roundtrip'].setdefault('duration', 300) - s3tests_conf['roundtrip'].setdefault('files', {}) - rtconf = s3tests_conf['roundtrip'] - rtconf['files'].setdefault('num', 10) - rtconf['files'].setdefault('size', 2000) - rtconf['files'].setdefault('stddev', 500) - for section, user in [('s3', 'foo')]: - _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) - ctx.cluster.only(client).run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'radosgw-admin', - '-n', client, - 'user', 'create', - '--uid', s3tests_conf[section]['user_id'], - '--display-name', s3tests_conf[section]['display_name'], - '--access-key', s3tests_conf[section]['access_key'], - '--secret', s3tests_conf[section]['secret_key'], - '--email', s3tests_conf[section]['email'], - ], - ) - try: - yield - finally: - for client in config['clients']: - for user in users.itervalues(): - uid = '{user}.{client}'.format(user=user, client=client) - ctx.cluster.only(client).run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'radosgw-admin', - '-n', client, - 'user', 'rm', - '--uid', uid, - '--purge-data', - ], - ) - -@contextlib.contextmanager -def configure(ctx, config): - """ - Configure the s3-tests. This includes the running of the - bootstrap code and the updating of local conf files. - """ - assert isinstance(config, dict) - log.info('Configuring s3-roundtrip-tests...') - testdir = teuthology.get_testdir(ctx) - for client, properties in config['clients'].iteritems(): - s3tests_conf = config['s3tests_conf'][client] - if properties is not None and 'rgw_server' in properties: - host = None - for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']): - log.info('roles: ' + str(roles)) - log.info('target: ' + str(target)) - if properties['rgw_server'] in roles: - _, host = split_user(target) - assert host is not None, "Invalid client specified as the rgw_server" - s3tests_conf['s3']['host'] = host - else: - s3tests_conf['s3']['host'] = 'localhost' - - def_conf = s3tests_conf['DEFAULT'] - s3tests_conf['s3'].setdefault('port', def_conf['port']) - s3tests_conf['s3'].setdefault('is_secure', def_conf['is_secure']) - - (remote,) = ctx.cluster.only(client).remotes.keys() - remote.run( - args=[ - 'cd', - '{tdir}/s3-tests'.format(tdir=testdir), - run.Raw('&&'), - './bootstrap', - ], - ) - conf_fp = StringIO() - conf = dict( - s3=s3tests_conf['s3'], - roundtrip=s3tests_conf['roundtrip'], - ) - yaml.safe_dump(conf, conf_fp, default_flow_style=False) - teuthology.write_file( - remote=remote, - path='{tdir}/archive/s3roundtrip.{client}.config.yaml'.format(tdir=testdir, client=client), - data=conf_fp.getvalue(), - ) - yield - - -@contextlib.contextmanager -def run_tests(ctx, config): - """ - Run the s3 roundtrip after everything is set up. - - :param ctx: Context passed to task - :param config: specific configuration information - """ - assert isinstance(config, dict) - testdir = teuthology.get_testdir(ctx) - for client, client_config in config.iteritems(): - (remote,) = ctx.cluster.only(client).remotes.keys() - conf = teuthology.get_file(remote, '{tdir}/archive/s3roundtrip.{client}.config.yaml'.format(tdir=testdir, client=client)) - args = [ - '{tdir}/s3-tests/virtualenv/bin/s3tests-test-roundtrip'.format(tdir=testdir), - ] - if client_config is not None and 'extra_args' in client_config: - args.extend(client_config['extra_args']) - - ctx.cluster.only(client).run( - args=args, - stdin=conf, - ) - yield - - -@contextlib.contextmanager -def task(ctx, config): - """ - Run the s3tests-test-roundtrip suite against rgw. - - To run all tests on all clients:: - - tasks: - - ceph: - - rgw: - - s3roundtrip: - - To restrict testing to particular clients:: - - tasks: - - ceph: - - rgw: [client.0] - - s3roundtrip: [client.0] - - To run against a server on client.1:: - - tasks: - - ceph: - - rgw: [client.1] - - s3roundtrip: - client.0: - rgw_server: client.1 - - To pass extra test arguments - - tasks: - - ceph: - - rgw: [client.0] - - s3roundtrip: - client.0: - roundtrip: - bucket: mybucket - readers: 10 - writers: 3 - duration: 600 - files: - num: 10 - size: 2000 - stddev: 500 - client.1: - ... - - To override s3 configuration - - tasks: - - ceph: - - rgw: [client.0] - - s3roundtrip: - client.0: - s3: - user_id: myuserid - display_name: myname - email: my@email - access_key: myaccesskey - secret_key: mysecretkey - - """ - assert config is None or isinstance(config, list) \ - or isinstance(config, dict), \ - "task s3tests only supports a list or dictionary for configuration" - all_clients = ['client.{id}'.format(id=id_) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] - if config is None: - config = all_clients - if isinstance(config, list): - config = dict.fromkeys(config) - clients = config.keys() - - s3tests_conf = {} - for client in clients: - if config[client] is None: - config[client] = {} - config[client].setdefault('s3', {}) - config[client].setdefault('roundtrip', {}) - - s3tests_conf[client] = ({ - 'DEFAULT': - { - 'port' : 7280, - 'is_secure' : False, - }, - 'roundtrip' : config[client]['roundtrip'], - 's3' : config[client]['s3'], - }) - - with contextutil.nested( - lambda: download(ctx=ctx, config=clients), - lambda: create_users(ctx=ctx, config=dict( - clients=clients, - s3tests_conf=s3tests_conf, - )), - lambda: configure(ctx=ctx, config=dict( - clients=config, - s3tests_conf=s3tests_conf, - )), - lambda: run_tests(ctx=ctx, config=config), - ): - pass - yield diff --git a/tasks/s3tests.py b/tasks/s3tests.py deleted file mode 100644 index d0f6431dd5f..00000000000 --- a/tasks/s3tests.py +++ /dev/null @@ -1,442 +0,0 @@ -""" -Run a set of s3 tests on rgw. -""" -from cStringIO import StringIO -from configobj import ConfigObj -import base64 -import contextlib -import logging -import os -import random -import string - -import util.rgw as rgw_utils - -from teuthology import misc as teuthology -from teuthology import contextutil -from teuthology.config import config as teuth_config -from teuthology.orchestra import run -from teuthology.orchestra.connection import split_user - -log = logging.getLogger(__name__) - -def extract_sync_client_data(ctx, client_name): - """ - Extract synchronized client rgw zone and rgw region information. - - :param ctx: Context passed to the s3tests task - :param name: Name of client that we are synching with - """ - return_region_name = None - return_dict = None - client = ctx.ceph.conf.get(client_name, None) - if client: - current_client_zone = client.get('rgw zone', None) - if current_client_zone: - (endpoint_host, endpoint_port) = ctx.rgw.role_endpoints.get(client_name, (None, None)) - # pull out the radosgw_agent stuff - regions = ctx.rgw.regions - for region in regions: - log.debug('jbuck, region is {region}'.format(region=region)) - region_data = ctx.rgw.regions[region] - log.debug('region data is {region}'.format(region=region_data)) - zones = region_data['zones'] - for zone in zones: - if current_client_zone in zone: - return_region_name = region - return_dict = dict() - return_dict['api_name'] = region_data['api name'] - return_dict['is_master'] = region_data['is master'] - return_dict['port'] = endpoint_port - return_dict['host'] = endpoint_host - - # The s3tests expect the sync_agent_[addr|port} to be - # set on the non-master node for some reason - if not region_data['is master']: - (rgwagent_host, rgwagent_port) = ctx.radosgw_agent.endpoint - (return_dict['sync_agent_addr'], _) = ctx.rgw.role_endpoints[rgwagent_host] - return_dict['sync_agent_port'] = rgwagent_port - - else: #if client_zone: - log.debug('No zone info for {host}'.format(host=client_name)) - else: # if client - log.debug('No ceph conf for {host}'.format(host=client_name)) - - return return_region_name, return_dict - -def update_conf_with_region_info(ctx, config, s3tests_conf): - """ - Scan for a client (passed in s3tests_conf) that is an s3agent - with which we can sync. Update information in local conf file - if such a client is found. - """ - for key in s3tests_conf.keys(): - # we'll assume that there's only one sync relationship (source / destination) with client.X - # as the key for now - - # Iterate through all of the radosgw_agent (rgwa) configs and see if a - # given client is involved in a relationship. - # If a given client isn't, skip it - this_client_in_rgwa_config = False - for rgwa in ctx.radosgw_agent.config.keys(): - rgwa_data = ctx.radosgw_agent.config[rgwa] - - if key in rgwa_data['src'] or key in rgwa_data['dest']: - this_client_in_rgwa_config = True - log.debug('{client} is in an radosgw-agent sync relationship'.format(client=key)) - radosgw_sync_data = ctx.radosgw_agent.config[key] - break - if not this_client_in_rgwa_config: - log.debug('{client} is NOT in an radosgw-agent sync relationship'.format(client=key)) - continue - - source_client = radosgw_sync_data['src'] - dest_client = radosgw_sync_data['dest'] - - # #xtract the pertinent info for the source side - source_region_name, source_region_dict = extract_sync_client_data(ctx, source_client) - log.debug('\t{key} source_region {source_region} source_dict {source_dict}'.format - (key=key,source_region=source_region_name,source_dict=source_region_dict)) - - # The source *should* be the master region, but test anyway and then set it as the default region - if source_region_dict['is_master']: - log.debug('Setting {region} as default_region'.format(region=source_region_name)) - s3tests_conf[key]['fixtures'].setdefault('default_region', source_region_name) - - # Extract the pertinent info for the destination side - dest_region_name, dest_region_dict = extract_sync_client_data(ctx, dest_client) - log.debug('\t{key} dest_region {dest_region} dest_dict {dest_dict}'.format - (key=key,dest_region=dest_region_name,dest_dict=dest_region_dict)) - - # now add these regions to the s3tests_conf object - s3tests_conf[key]['region {region_name}'.format(region_name=source_region_name)] = source_region_dict - s3tests_conf[key]['region {region_name}'.format(region_name=dest_region_name)] = dest_region_dict - -@contextlib.contextmanager -def download(ctx, config): - """ - Download the s3 tests from the git builder. - Remove downloaded s3 file upon exit. - - The context passed in should be identical to the context - passed in to the main task. - """ - assert isinstance(config, dict) - log.info('Downloading s3-tests...') - testdir = teuthology.get_testdir(ctx) - for (client, cconf) in config.items(): - branch = cconf.get('force-branch', None) - if not branch: - ceph_branch = ctx.config.get('branch') - suite_branch = ctx.config.get('suite_branch', ceph_branch) - branch = cconf.get('branch', suite_branch) - if not branch: - raise ValueError( - "Could not determine what branch to use for s3tests!") - else: - log.info("Using branch '%s' for s3tests", branch) - sha1 = cconf.get('sha1') - ctx.cluster.only(client).run( - args=[ - 'git', 'clone', - '-b', branch, - teuth_config.ceph_git_base_url + 's3-tests.git', - '{tdir}/s3-tests'.format(tdir=testdir), - ], - ) - if sha1 is not None: - ctx.cluster.only(client).run( - args=[ - 'cd', '{tdir}/s3-tests'.format(tdir=testdir), - run.Raw('&&'), - 'git', 'reset', '--hard', sha1, - ], - ) - try: - yield - finally: - log.info('Removing s3-tests...') - testdir = teuthology.get_testdir(ctx) - for client in config: - ctx.cluster.only(client).run( - args=[ - 'rm', - '-rf', - '{tdir}/s3-tests'.format(tdir=testdir), - ], - ) - - -def _config_user(s3tests_conf, section, user): - """ - Configure users for this section by stashing away keys, ids, and - email addresses. - """ - s3tests_conf[section].setdefault('user_id', user) - s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user)) - s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user)) - s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.uppercase) for i in xrange(20))) - s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40))) - - -@contextlib.contextmanager -def create_users(ctx, config): - """ - Create a main and an alternate s3 user. - """ - assert isinstance(config, dict) - log.info('Creating rgw users...') - testdir = teuthology.get_testdir(ctx) - users = {'s3 main': 'foo', 's3 alt': 'bar'} - for client in config['clients']: - s3tests_conf = config['s3tests_conf'][client] - s3tests_conf.setdefault('fixtures', {}) - s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-') - for section, user in users.iteritems(): - _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) - log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client)) - ctx.cluster.only(client).run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'radosgw-admin', - '-n', client, - 'user', 'create', - '--uid', s3tests_conf[section]['user_id'], - '--display-name', s3tests_conf[section]['display_name'], - '--access-key', s3tests_conf[section]['access_key'], - '--secret', s3tests_conf[section]['secret_key'], - '--email', s3tests_conf[section]['email'], - ], - ) - try: - yield - finally: - for client in config['clients']: - for user in users.itervalues(): - uid = '{user}.{client}'.format(user=user, client=client) - ctx.cluster.only(client).run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'radosgw-admin', - '-n', client, - 'user', 'rm', - '--uid', uid, - '--purge-data', - ], - ) - - -@contextlib.contextmanager -def configure(ctx, config): - """ - Configure the s3-tests. This includes the running of the - bootstrap code and the updating of local conf files. - """ - assert isinstance(config, dict) - log.info('Configuring s3-tests...') - testdir = teuthology.get_testdir(ctx) - for client, properties in config['clients'].iteritems(): - s3tests_conf = config['s3tests_conf'][client] - if properties is not None and 'rgw_server' in properties: - host = None - for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']): - log.info('roles: ' + str(roles)) - log.info('target: ' + str(target)) - if properties['rgw_server'] in roles: - _, host = split_user(target) - assert host is not None, "Invalid client specified as the rgw_server" - s3tests_conf['DEFAULT']['host'] = host - else: - s3tests_conf['DEFAULT']['host'] = 'localhost' - - if properties is not None and 'slow_backend' in properties: - s3tests_conf['fixtures']['slow backend'] = properties['slow_backend'] - - (remote,) = ctx.cluster.only(client).remotes.keys() - remote.run( - args=[ - 'cd', - '{tdir}/s3-tests'.format(tdir=testdir), - run.Raw('&&'), - './bootstrap', - ], - ) - conf_fp = StringIO() - s3tests_conf.write(conf_fp) - teuthology.write_file( - remote=remote, - path='{tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), - data=conf_fp.getvalue(), - ) - - log.info('Configuring boto...') - boto_src = os.path.join(os.path.dirname(__file__), 'boto.cfg.template') - for client, properties in config['clients'].iteritems(): - with file(boto_src, 'rb') as f: - (remote,) = ctx.cluster.only(client).remotes.keys() - conf = f.read().format( - idle_timeout=config.get('idle_timeout', 30) - ) - teuthology.write_file( - remote=remote, - path='{tdir}/boto.cfg'.format(tdir=testdir), - data=conf, - ) - - try: - yield - - finally: - log.info('Cleaning up boto...') - for client, properties in config['clients'].iteritems(): - (remote,) = ctx.cluster.only(client).remotes.keys() - remote.run( - args=[ - 'rm', - '{tdir}/boto.cfg'.format(tdir=testdir), - ], - ) - -@contextlib.contextmanager -def sync_users(ctx, config): - """ - Sync this user. - """ - assert isinstance(config, dict) - # do a full sync if this is a multi-region test - if rgw_utils.multi_region_enabled(ctx): - log.debug('Doing a full sync') - rgw_utils.radosgw_agent_sync_all(ctx) - else: - log.debug('Not a multi-region config; skipping the metadata sync') - - yield - -@contextlib.contextmanager -def run_tests(ctx, config): - """ - Run the s3tests after everything is set up. - - :param ctx: Context passed to task - :param config: specific configuration information - """ - assert isinstance(config, dict) - testdir = teuthology.get_testdir(ctx) - for client, client_config in config.iteritems(): - args = [ - 'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), - 'BOTO_CONFIG={tdir}/boto.cfg'.format(tdir=testdir), - '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir), - '-w', - '{tdir}/s3-tests'.format(tdir=testdir), - '-v', - '-a', '!fails_on_rgw', - ] - if client_config is not None and 'extra_args' in client_config: - args.extend(client_config['extra_args']) - - ctx.cluster.only(client).run( - args=args, - label="s3 tests against rgw" - ) - yield - -@contextlib.contextmanager -def task(ctx, config): - """ - Run the s3-tests suite against rgw. - - To run all tests on all clients:: - - tasks: - - ceph: - - rgw: - - s3tests: - - To restrict testing to particular clients:: - - tasks: - - ceph: - - rgw: [client.0] - - s3tests: [client.0] - - To run against a server on client.1 and increase the boto timeout to 10m:: - - tasks: - - ceph: - - rgw: [client.1] - - s3tests: - client.0: - rgw_server: client.1 - idle_timeout: 600 - - To pass extra arguments to nose (e.g. to run a certain test):: - - tasks: - - ceph: - - rgw: [client.0] - - s3tests: - client.0: - extra_args: ['test_s3:test_object_acl_grand_public_read'] - client.1: - extra_args: ['--exclude', 'test_100_continue'] - """ - assert config is None or isinstance(config, list) \ - or isinstance(config, dict), \ - "task s3tests only supports a list or dictionary for configuration" - all_clients = ['client.{id}'.format(id=id_) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] - if config is None: - config = all_clients - if isinstance(config, list): - config = dict.fromkeys(config) - clients = config.keys() - - overrides = ctx.config.get('overrides', {}) - # merge each client section, not the top level. - for client in config.iterkeys(): - if not config[client]: - config[client] = {} - teuthology.deep_merge(config[client], overrides.get('s3tests', {})) - - log.debug('s3tests config is %s', config) - - s3tests_conf = {} - for client in clients: - s3tests_conf[client] = ConfigObj( - indent_type='', - infile={ - 'DEFAULT': - { - 'port' : 7280, - 'is_secure' : 'no', - }, - 'fixtures' : {}, - 's3 main' : {}, - 's3 alt' : {}, - } - ) - - # Only attempt to add in the region info if there's a radosgw_agent configured - if hasattr(ctx, 'radosgw_agent'): - update_conf_with_region_info(ctx, config, s3tests_conf) - - with contextutil.nested( - lambda: download(ctx=ctx, config=config), - lambda: create_users(ctx=ctx, config=dict( - clients=clients, - s3tests_conf=s3tests_conf, - )), - lambda: sync_users(ctx=ctx, config=config), - lambda: configure(ctx=ctx, config=dict( - clients=config, - s3tests_conf=s3tests_conf, - )), - lambda: run_tests(ctx=ctx, config=config), - ): - pass - yield diff --git a/tasks/samba.py b/tasks/samba.py deleted file mode 100644 index 38ebe026b4a..00000000000 --- a/tasks/samba.py +++ /dev/null @@ -1,245 +0,0 @@ -""" -Samba -""" -import contextlib -import logging -import sys -import time - -from teuthology import misc as teuthology -from teuthology.orchestra import run -from teuthology.orchestra.daemon import DaemonGroup - -log = logging.getLogger(__name__) - - -def get_sambas(ctx, roles): - """ - Scan for roles that are samba. Yield the id of the the samba role - (samba.0, samba.1...) and the associated remote site - - :param ctx: Context - :param roles: roles for this test (extracted from yaml files) - """ - for role in roles: - assert isinstance(role, basestring) - PREFIX = 'samba.' - assert role.startswith(PREFIX) - id_ = role[len(PREFIX):] - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - yield (id_, remote) - - -@contextlib.contextmanager -def task(ctx, config): - """ - Setup samba smbd with ceph vfs module. This task assumes the samba - package has already been installed via the install task. - - The config is optional and defaults to starting samba on all nodes. - If a config is given, it is expected to be a list of - samba nodes to start smbd servers on. - - Example that starts smbd on all samba nodes:: - - tasks: - - install: - - install: - project: samba - extra_packages: ['samba'] - - ceph: - - samba: - - interactive: - - Example that starts smbd on just one of the samba nodes and cifs on the other:: - - tasks: - - samba: [samba.0] - - cifs: [samba.1] - - An optional backend can be specified, and requires a path which smbd will - use as the backend storage location: - - roles: - - [osd.0, osd.1, osd.2, mon.0, mon.1, mon.2, mds.a] - - [client.0, samba.0] - - tasks: - - ceph: - - ceph-fuse: [client.0] - - samba: - samba.0: - cephfuse: "{testdir}/mnt.0" - - This mounts ceph to {testdir}/mnt.0 using fuse, and starts smbd with - a UNC of //localhost/cephfuse. Access through that UNC will be on - the ceph fuse mount point. - - If no arguments are specified in the samba - role, the default behavior is to enable the ceph UNC //localhost/ceph - and use the ceph vfs module as the smbd backend. - - :param ctx: Context - :param config: Configuration - """ - log.info("Setting up smbd with ceph vfs...") - assert config is None or isinstance(config, list) or isinstance(config, dict), \ - "task samba got invalid config" - - if config is None: - config = dict(('samba.{id}'.format(id=id_), None) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')) - elif isinstance(config, list): - config = dict((name, None) for name in config) - - samba_servers = list(get_sambas(ctx=ctx, roles=config.keys())) - - testdir = teuthology.get_testdir(ctx) - - if not hasattr(ctx, 'daemons'): - ctx.daemons = DaemonGroup() - - for id_, remote in samba_servers: - - rolestr = "samba.{id_}".format(id_=id_) - - confextras = """vfs objects = ceph - ceph:config_file = /etc/ceph/ceph.conf""" - - unc = "ceph" - backend = "/" - - if config[rolestr] is not None: - # verify that there's just one parameter in role - if len(config[rolestr]) != 1: - log.error("samba config for role samba.{id_} must have only one parameter".format(id_=id_)) - raise Exception('invalid config') - confextras = "" - (unc, backendstr) = config[rolestr].items()[0] - backend = backendstr.format(testdir=testdir) - - # on first samba role, set ownership and permissions of ceph root - # so that samba tests succeed - if config[rolestr] is None and id_ == samba_servers[0][0]: - remote.run( - args=[ - 'mkdir', '-p', '/tmp/cmnt', run.Raw('&&'), - 'sudo', 'ceph-fuse', '/tmp/cmnt', run.Raw('&&'), - 'sudo', 'chown', 'ubuntu:ubuntu', '/tmp/cmnt/', run.Raw('&&'), - 'sudo', 'chmod', '1777', '/tmp/cmnt/', run.Raw('&&'), - 'sudo', 'umount', '/tmp/cmnt/', run.Raw('&&'), - 'rm', '-rf', '/tmp/cmnt', - ], - ) - else: - remote.run( - args=[ - 'sudo', 'chown', 'ubuntu:ubuntu', backend, run.Raw('&&'), - 'sudo', 'chmod', '1777', backend, - ], - ) - - teuthology.sudo_write_file(remote, "/usr/local/samba/etc/smb.conf", """ -[global] - workgroup = WORKGROUP - netbios name = DOMAIN - -[{unc}] - path = {backend} - {extras} - writeable = yes - valid users = ubuntu -""".format(extras=confextras, unc=unc, backend=backend)) - - # create ubuntu user - remote.run( - args=[ - 'sudo', '/usr/local/samba/bin/smbpasswd', '-e', 'ubuntu', - run.Raw('||'), - 'printf', run.Raw('"ubuntu\nubuntu\n"'), - run.Raw('|'), - 'sudo', '/usr/local/samba/bin/smbpasswd', '-s', '-a', 'ubuntu' - ]) - - smbd_cmd = [ - 'sudo', - 'daemon-helper', - 'kill', - 'nostdin', - '/usr/local/samba/sbin/smbd', - '-F', - ] - ctx.daemons.add_daemon(remote, 'smbd', id_, - args=smbd_cmd, - logger=log.getChild("smbd.{id_}".format(id_=id_)), - stdin=run.PIPE, - wait=False, - ) - - # let smbd initialize, probably a better way... - seconds_to_sleep = 100 - log.info('Sleeping for %s seconds...' % seconds_to_sleep) - time.sleep(seconds_to_sleep) - log.info('Sleeping stopped...') - - try: - yield - finally: - log.info('Stopping smbd processes...') - exc_info = (None, None, None) - for d in ctx.daemons.iter_daemons_of_role('smbd'): - try: - d.stop() - except (run.CommandFailedError, - run.CommandCrashedError, - run.ConnectionLostError): - exc_info = sys.exc_info() - log.exception('Saw exception from %s.%s', d.role, d.id_) - if exc_info != (None, None, None): - raise exc_info[0], exc_info[1], exc_info[2] - - for id_, remote in samba_servers: - remote.run( - args=[ - 'sudo', - 'rm', '-rf', - '/usr/local/samba/etc/smb.conf', - '/usr/local/samba/private/*', - '/usr/local/samba/var/run/', - '/usr/local/samba/var/locks', - '/usr/local/samba/var/lock', - ], - ) - # make sure daemons are gone - try: - remote.run( - args=[ - 'while', - 'sudo', 'killall', '-9', 'smbd', - run.Raw(';'), - 'do', 'sleep', '1', - run.Raw(';'), - 'done', - ], - ) - - remote.run( - args=[ - 'sudo', - 'lsof', - backend, - ], - check_status=False - ) - remote.run( - args=[ - 'sudo', - 'fuser', - '-M', - backend, - ], - check_status=False - ) - except Exception: - log.exception("Saw exception") - pass diff --git a/tasks/scrub.py b/tasks/scrub.py deleted file mode 100644 index 7a25300a677..00000000000 --- a/tasks/scrub.py +++ /dev/null @@ -1,117 +0,0 @@ -""" -Scrub osds -""" -import contextlib -import gevent -import logging -import random -import time - -import ceph_manager -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -@contextlib.contextmanager -def task(ctx, config): - """ - Run scrub periodically. Randomly chooses an OSD to scrub. - - The config should be as follows: - - scrub: - frequency: - deep: - - example: - - tasks: - - ceph: - - scrub: - frequency: 30 - deep: 0 - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'scrub task only accepts a dict for configuration' - - log.info('Beginning scrub...') - - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') - while len(manager.get_osd_status()['up']) < num_osds: - manager.sleep(10) - - scrub_proc = Scrubber( - manager, - config, - ) - try: - yield - finally: - log.info('joining scrub') - scrub_proc.do_join() - -class Scrubber: - """ - Scrubbing is actually performed during initialzation - """ - def __init__(self, manager, config): - """ - Spawn scrubbing thread upon completion. - """ - self.ceph_manager = manager - self.ceph_manager.wait_for_clean() - - osd_status = self.ceph_manager.get_osd_status() - self.osds = osd_status['up'] - - self.config = config - if self.config is None: - self.config = dict() - - else: - def tmp(x): - """Local display""" - print x - self.log = tmp - - self.stopping = False - - log.info("spawning thread") - - self.thread = gevent.spawn(self.do_scrub) - - def do_join(self): - """Scrubbing thread finished""" - self.stopping = True - self.thread.get() - - def do_scrub(self): - """Perform the scrub operation""" - frequency = self.config.get("frequency", 30) - deep = self.config.get("deep", 0) - - log.info("stopping %s" % self.stopping) - - while not self.stopping: - osd = str(random.choice(self.osds)) - - if deep: - cmd = 'deep-scrub' - else: - cmd = 'scrub' - - log.info('%sbing %s' % (cmd, osd)) - self.ceph_manager.raw_cluster_cmd('osd', cmd, osd) - - time.sleep(frequency) diff --git a/tasks/scrub_test.py b/tasks/scrub_test.py deleted file mode 100644 index 3443ae9f45e..00000000000 --- a/tasks/scrub_test.py +++ /dev/null @@ -1,199 +0,0 @@ -"""Scrub testing""" -from cStringIO import StringIO - -import logging -import os -import time - -import ceph_manager -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -def task(ctx, config): - """ - Test [deep] scrub - - tasks: - - chef: - - install: - - ceph: - log-whitelist: - - '!= known digest' - - '!= known omap_digest' - - deep-scrub 0 missing, 1 inconsistent objects - - deep-scrub 1 errors - - repair 0 missing, 1 inconsistent objects - - repair 1 errors, 1 fixed - - scrub_test: - - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'scrub_test task only accepts a dict for configuration' - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') - log.info('num_osds is %s' % num_osds) - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - - while len(manager.get_osd_status()['up']) < num_osds: - time.sleep(10) - - for i in range(num_osds): - manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'flush_pg_stats') - manager.wait_for_clean() - - # write some data - p = manager.do_rados(mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1', 'write', '-b', '4096']) - err = p.exitstatus - log.info('err is %d' % err) - - # wait for some PG to have data that we can mess with - victim = None - osd = None - while victim is None: - stats = manager.get_pg_stats() - for pg in stats: - size = pg['stat_sum']['num_bytes'] - if size > 0: - victim = pg['pgid'] - osd = pg['acting'][0] - break - - if victim is None: - time.sleep(3) - - log.info('messing with PG %s on osd %d' % (victim, osd)) - - (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.iterkeys() - data_path = os.path.join( - '/var/lib/ceph/osd', - 'ceph-{id}'.format(id=osd), - 'current', - '{pg}_head'.format(pg=victim) - ) - - # fuzz time - ls_fp = StringIO() - osd_remote.run( - args=[ 'ls', data_path ], - stdout=ls_fp, - ) - ls_out = ls_fp.getvalue() - ls_fp.close() - - # find an object file we can mess with - osdfilename = None - for line in ls_out.split('\n'): - if 'object' in line: - osdfilename = line - break - assert osdfilename is not None - - # Get actual object name from osd stored filename - tmp=osdfilename.split('__') - objname=tmp[0] - objname=objname.replace('\u', '_') - log.info('fuzzing %s' % objname) - - # put a single \0 at the beginning of the file - osd_remote.run( - args=[ 'sudo', 'dd', - 'if=/dev/zero', - 'of=%s' % os.path.join(data_path, osdfilename), - 'bs=1', 'count=1', 'conv=notrunc' - ] - ) - - # scrub, verify inconsistent - manager.raw_cluster_cmd('pg', 'deep-scrub', victim) - # Give deep-scrub a chance to start - time.sleep(60) - - while True: - stats = manager.get_single_pg_stats(victim) - state = stats['state'] - - # wait for the scrub to finish - if 'scrubbing' in state: - time.sleep(3) - continue - - inconsistent = stats['state'].find('+inconsistent') != -1 - assert inconsistent - break - - - # repair, verify no longer inconsistent - manager.raw_cluster_cmd('pg', 'repair', victim) - # Give repair a chance to start - time.sleep(60) - - while True: - stats = manager.get_single_pg_stats(victim) - state = stats['state'] - - # wait for the scrub to finish - if 'scrubbing' in state: - time.sleep(3) - continue - - inconsistent = stats['state'].find('+inconsistent') != -1 - assert not inconsistent - break - - # Test deep-scrub with various omap modifications - manager.do_rados(mon, ['-p', 'rbd', 'setomapval', objname, 'key', 'val']) - manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', objname, 'hdr']) - - # Modify omap on specific osd - log.info('fuzzing omap of %s' % objname) - manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key']); - manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, 'badkey', 'badval']); - manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr']); - - # scrub, verify inconsistent - manager.raw_cluster_cmd('pg', 'deep-scrub', victim) - # Give deep-scrub a chance to start - time.sleep(60) - - while True: - stats = manager.get_single_pg_stats(victim) - state = stats['state'] - - # wait for the scrub to finish - if 'scrubbing' in state: - time.sleep(3) - continue - - inconsistent = stats['state'].find('+inconsistent') != -1 - assert inconsistent - break - - # repair, verify no longer inconsistent - manager.raw_cluster_cmd('pg', 'repair', victim) - # Give repair a chance to start - time.sleep(60) - - while True: - stats = manager.get_single_pg_stats(victim) - state = stats['state'] - - # wait for the scrub to finish - if 'scrubbing' in state: - time.sleep(3) - continue - - inconsistent = stats['state'].find('+inconsistent') != -1 - assert not inconsistent - break - - log.info('test successful!') diff --git a/tasks/tests/__init__.py b/tasks/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tasks/tests/test_buildpackages.py b/tasks/tests/test_buildpackages.py deleted file mode 100644 index fed5aa02b91..00000000000 --- a/tasks/tests/test_buildpackages.py +++ /dev/null @@ -1,170 +0,0 @@ -# py.test -v -s tests/test_buildpackages.py - -from mock import patch, Mock - -from .. import buildpackages -from teuthology import packaging - -def test_get_tag_branch_sha1(): - gitbuilder = packaging.GitbuilderProject( - 'ceph', - { - 'os_type': 'centos', - 'os_version': '7.0', - }) - (tag, branch, sha1) = buildpackages.get_tag_branch_sha1(gitbuilder) - assert tag == None - assert branch == None - assert sha1 is not None - - gitbuilder = packaging.GitbuilderProject( - 'ceph', - { - 'os_type': 'centos', - 'os_version': '7.0', - 'sha1': 'asha1', - }) - (tag, branch, sha1) = buildpackages.get_tag_branch_sha1(gitbuilder) - assert tag == None - assert branch == None - assert sha1 == 'asha1' - - remote = Mock - remote.arch = 'x86_64' - remote.os = Mock - remote.os.name = 'ubuntu' - remote.os.version = '14.04' - remote.os.codename = 'trusty' - remote.system_type = 'deb' - ctx = Mock - ctx.cluster = Mock - ctx.cluster.remotes = {remote: ['client.0']} - - expected_tag = 'v0.94.1' - expected_sha1 = 'expectedsha1' - def check_output(cmd, shell): - assert shell == True - return expected_sha1 + " refs/tags/" + expected_tag - with patch.multiple( - buildpackages, - check_output=check_output, - ): - gitbuilder = packaging.GitbuilderProject( - 'ceph', - { - 'os_type': 'centos', - 'os_version': '7.0', - 'sha1': 'asha1', - 'all': { - 'tag': tag, - }, - }, - ctx = ctx, - remote = remote) - (tag, branch, sha1) = buildpackages.get_tag_branch_sha1(gitbuilder) - assert tag == expected_tag - assert branch == None - assert sha1 == expected_sha1 - - expected_branch = 'hammer' - expected_sha1 = 'otherexpectedsha1' - def check_output(cmd, shell): - assert shell == True - return expected_sha1 + " refs/heads/" + expected_branch - with patch.multiple( - buildpackages, - check_output=check_output, - ): - gitbuilder = packaging.GitbuilderProject( - 'ceph', - { - 'os_type': 'centos', - 'os_version': '7.0', - 'sha1': 'asha1', - 'all': { - 'branch': branch, - }, - }, - ctx = ctx, - remote = remote) - (tag, branch, sha1) = buildpackages.get_tag_branch_sha1(gitbuilder) - assert tag == None - assert branch == expected_branch - assert sha1 == expected_sha1 - -def test_lookup_configs(): - expected_system_type = 'deb' - def make_remote(): - remote = Mock() - remote.arch = 'x86_64' - remote.os = Mock() - remote.os.name = 'ubuntu' - remote.os.version = '14.04' - remote.os.codename = 'trusty' - remote.system_type = expected_system_type - return remote - ctx = Mock() - class cluster: - remote1 = make_remote() - remote2 = make_remote() - remotes = { - remote1: ['client.0'], - remote2: ['mon.a','osd.0'], - } - def only(self, role): - result = Mock() - if role in ('client.0',): - result.remotes = { cluster.remote1: None } - elif role in ('osd.0', 'mon.a'): - result.remotes = { cluster.remote2: None } - else: - result.remotes = None - return result - ctx.cluster = cluster() - ctx.config = { - 'roles': [ ['client.0'], ['mon.a','osd.0'] ], - } - - # nothing -> nothing - assert buildpackages.lookup_configs(ctx, {}) == [] - assert buildpackages.lookup_configs(ctx, {1:[1,2,3]}) == [] - assert buildpackages.lookup_configs(ctx, [[1,2,3]]) == [] - assert buildpackages.lookup_configs(ctx, None) == [] - - # - # the overrides applies to install and to install.upgrade - # that have no tag, branch or sha1 - # - config = { - 'overrides': { - 'install': { - 'ceph': { - 'sha1': 'overridesha1', - 'tag': 'overridetag', - 'branch': 'overridebranch', - }, - }, - }, - 'tasks': [ - { - 'install': { - 'sha1': 'installsha1', - }, - }, - { - 'install.upgrade': { - 'osd.0': { - }, - 'client.0': { - 'sha1': 'client0sha1', - }, - }, - } - ], - } - ctx.config = config - expected_configs = [{'branch': 'overridebranch', 'sha1': 'overridesha1', 'tag': 'overridetag'}, - {'project': 'ceph', 'branch': 'overridebranch', 'sha1': 'overridesha1', 'tag': 'overridetag'}, - {'project': 'ceph', 'sha1': 'client0sha1'}] - - assert buildpackages.lookup_configs(ctx, config) == expected_configs diff --git a/tasks/tests/test_devstack.py b/tasks/tests/test_devstack.py deleted file mode 100644 index 117b3076818..00000000000 --- a/tasks/tests/test_devstack.py +++ /dev/null @@ -1,48 +0,0 @@ -from textwrap import dedent - -from .. import devstack - - -class TestDevstack(object): - def test_parse_os_table(self): - table_str = dedent(""" - +---------------------+--------------------------------------+ - | Property | Value | - +---------------------+--------------------------------------+ - | attachments | [] | - | availability_zone | nova | - | bootable | false | - | created_at | 2014-02-21T17:14:47.548361 | - | display_description | None | - | display_name | NAME | - | id | ffdbd1bb-60dc-4d95-acfe-88774c09ad3e | - | metadata | {} | - | size | 1 | - | snapshot_id | None | - | source_volid | None | - | status | creating | - | volume_type | None | - +---------------------+--------------------------------------+ - """).strip() - expected = { - 'Property': 'Value', - 'attachments': '[]', - 'availability_zone': 'nova', - 'bootable': 'false', - 'created_at': '2014-02-21T17:14:47.548361', - 'display_description': 'None', - 'display_name': 'NAME', - 'id': 'ffdbd1bb-60dc-4d95-acfe-88774c09ad3e', - 'metadata': '{}', - 'size': '1', - 'snapshot_id': 'None', - 'source_volid': 'None', - 'status': 'creating', - 'volume_type': 'None'} - - vol_info = devstack.parse_os_table(table_str) - assert vol_info == expected - - - - diff --git a/tasks/tests/test_radosgw_admin.py b/tasks/tests/test_radosgw_admin.py deleted file mode 100644 index 59f357891ca..00000000000 --- a/tasks/tests/test_radosgw_admin.py +++ /dev/null @@ -1,31 +0,0 @@ -from mock import Mock - -from .. import radosgw_admin - -acl_with_version = """fooFoofooFooFULL_CONTROL -""" # noqa - - -acl_without_version = """fooFoofooFooFULL_CONTROL -""" # noqa - - -class TestGetAcl(object): - - def setup(self): - self.key = Mock() - - def test_removes_xml_version(self): - self.key.get_xml_acl = Mock(return_value=acl_with_version) - result = radosgw_admin.get_acl(self.key) - assert result.startswith(' -# -# Author: Loic Dachary -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation -# files (the "Software"), to deal in the Software without -# restriction, including without limitation the rights to use, -# copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following -# conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -from .. import rados - -class TestRados(object): - - def test_cmd_erasure_code_profile(self): - name = 'NAME' - cmd = rados.cmd_erasure_code_profile(name, {}) - assert 'k=2' in cmd - assert name in cmd - cmd = rados.cmd_erasure_code_profile(name, { 'k': '88' }) - assert 'k=88' in cmd - assert name in cmd diff --git a/tasks/watch_notify_same_primary.py b/tasks/watch_notify_same_primary.py deleted file mode 100644 index 168b6bb6c30..00000000000 --- a/tasks/watch_notify_same_primary.py +++ /dev/null @@ -1,133 +0,0 @@ - -""" -watch_notify_same_primary task -""" -from cStringIO import StringIO -import contextlib -import logging - -from teuthology.orchestra import run -from teuthology.contextutil import safe_while - -log = logging.getLogger(__name__) - - -@contextlib.contextmanager -def task(ctx, config): - """ - Run watch_notify_same_primary - - The config should be as follows: - - watch_notify_same_primary: - clients: [client list] - - The client list should contain 1 client - - The test requires 3 osds. - - example: - - tasks: - - ceph: - - watch_notify_same_primary: - clients: [client.0] - - interactive: - """ - log.info('Beginning watch_notify_same_primary...') - assert isinstance(config, dict), \ - "please list clients to run on" - - clients = config.get('clients', ['client.0']) - assert len(clients) == 1 - role = clients[0] - assert isinstance(role, basestring) - PREFIX = 'client.' - assert role.startswith(PREFIX) - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - ctx.manager.raw_cluster_cmd('osd', 'set', 'noout') - - pool = ctx.manager.create_pool_with_unique_name() - def obj(n): return "foo-{num}".format(num=n) - def start_watch(n): - remote.run( - args = [ - "rados", - "-p", pool, - "put", - obj(n), - "/etc/resolv.conf"], - logger=log.getChild('watch.{id}'.format(id=n))) - proc = remote.run( - args = [ - "rados", - "-p", pool, - "watch", - obj(n)], - stdin=run.PIPE, - stdout=StringIO(), - stderr=StringIO(), - wait=False) - return proc - - num = 20 - - watches = [start_watch(i) for i in range(num)] - - # wait for them all to register - for i in range(num): - with safe_while() as proceed: - while proceed(): - proc = remote.run( - args = [ - "rados", - "-p", pool, - "listwatchers", - obj(i)], - stdout=StringIO()) - lines = proc.stdout.getvalue() - num_watchers = lines.count('watcher=') - log.info('i see %d watchers for %s', num_watchers, obj(i)) - if num_watchers >= 1: - break - - def notify(n, msg): - remote.run( - args = [ - "rados", - "-p", pool, - "notify", - obj(n), - msg], - logger=log.getChild('notify.{id}'.format(id=n))) - - [notify(n, 'notify1') for n in range(len(watches))] - - ctx.manager.kill_osd(0) - ctx.manager.mark_down_osd(0) - - [notify(n, 'notify2') for n in range(len(watches))] - - try: - yield - finally: - log.info('joining watch_notify_stress') - for watch in watches: - watch.stdin.write("\n") - - run.wait(watches) - - for watch in watches: - lines = watch.stdout.getvalue().split("\n") - got1 = False - got2 = False - for l in lines: - if 'notify1' in l: - got1 = True - if 'notify2' in l: - got2 = True - log.info(lines) - assert got1 and got2 - - ctx.manager.revive_osd(0) - ctx.manager.remove_pool(pool) diff --git a/tasks/watch_notify_stress.py b/tasks/watch_notify_stress.py deleted file mode 100644 index 6db313fea6d..00000000000 --- a/tasks/watch_notify_stress.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -test_stress_watch task -""" -import contextlib -import logging -import proc_thrasher - -from teuthology.orchestra import run - -log = logging.getLogger(__name__) - - -@contextlib.contextmanager -def task(ctx, config): - """ - Run test_stress_watch - - The config should be as follows: - - test_stress_watch: - clients: [client list] - - example: - - tasks: - - ceph: - - test_stress_watch: - clients: [client.0] - - interactive: - """ - log.info('Beginning test_stress_watch...') - assert isinstance(config, dict), \ - "please list clients to run on" - testwatch = {} - - remotes = [] - - for role in config.get('clients', ['client.0']): - assert isinstance(role, basestring) - PREFIX = 'client.' - assert role.startswith(PREFIX) - id_ = role[len(PREFIX):] - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - remotes.append(remote) - - args =['CEPH_CLIENT_ID={id_}'.format(id_=id_), - 'CEPH_ARGS="{flags}"'.format(flags=config.get('flags', '')), - 'daemon-helper', - 'kill', - 'multi_stress_watch foo foo' - ] - - log.info("args are %s" % (args,)) - - proc = proc_thrasher.ProcThrasher({}, remote, - args=[run.Raw(i) for i in args], - logger=log.getChild('testwatch.{id}'.format(id=id_)), - stdin=run.PIPE, - wait=False - ) - proc.start() - testwatch[id_] = proc - - try: - yield - finally: - log.info('joining watch_notify_stress') - for i in testwatch.itervalues(): - i.join() diff --git a/tasks/workunit.py b/tasks/workunit.py deleted file mode 100644 index 833d81df217..00000000000 --- a/tasks/workunit.py +++ /dev/null @@ -1,380 +0,0 @@ -""" -Workunit task -- Run ceph on sets of specific clients -""" -import logging -import pipes -import os - -from teuthology import misc -from teuthology.config import config as teuth_config -from teuthology.orchestra.run import CommandFailedError -from teuthology.parallel import parallel -from teuthology.orchestra import run - -log = logging.getLogger(__name__) - -CLIENT_PREFIX = 'client.' - - -def task(ctx, config): - """ - Run ceph on all workunits found under the specified path. - - For example:: - - tasks: - - ceph: - - ceph-fuse: [client.0] - - workunit: - clients: - client.0: [direct_io, xattrs.sh] - client.1: [snaps] - branch: foo - - You can also run a list of workunits on all clients: - tasks: - - ceph: - - ceph-fuse: - - workunit: - tag: v0.47 - clients: - all: [direct_io, xattrs.sh, snaps] - - If you have an "all" section it will run all the workunits - on each client simultaneously, AFTER running any workunits specified - for individual clients. (This prevents unintended simultaneous runs.) - - To customize tests, you can specify environment variables as a dict. You - can also specify a time limit for each work unit (defaults to 3h): - - tasks: - - ceph: - - ceph-fuse: - - workunit: - sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6 - clients: - all: [snaps] - env: - FOO: bar - BAZ: quux - timeout: 3h - - :param ctx: Context - :param config: Configuration - """ - assert isinstance(config, dict) - assert isinstance(config.get('clients'), dict), \ - 'configuration must contain a dictionary of clients' - - overrides = ctx.config.get('overrides', {}) - misc.deep_merge(config, overrides.get('workunit', {})) - - refspec = config.get('branch') - if refspec is None: - refspec = config.get('tag') - if refspec is None: - refspec = config.get('sha1') - if refspec is None: - refspec = 'HEAD' - - timeout = config.get('timeout', '3h') - - log.info('Pulling workunits from ref %s', refspec) - - created_mountpoint = {} - - if config.get('env') is not None: - assert isinstance(config['env'], dict), 'env must be a dictionary' - clients = config['clients'] - - # Create scratch dirs for any non-all workunits - log.info('Making a separate scratch dir for every client...') - for role in clients.iterkeys(): - assert isinstance(role, basestring) - if role == "all": - continue - - assert role.startswith(CLIENT_PREFIX) - created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir')) - created_mountpoint[role] = created_mnt_dir - - # Execute any non-all workunits - with parallel() as p: - for role, tests in clients.iteritems(): - if role != "all": - p.spawn(_run_tests, ctx, refspec, role, tests, - config.get('env'), timeout=timeout) - - # Clean up dirs from any non-all workunits - for role, created in created_mountpoint.items(): - _delete_dir(ctx, role, created) - - # Execute any 'all' workunits - if 'all' in clients: - all_tasks = clients["all"] - _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'), - config.get('subdir'), timeout=timeout) - - -def _delete_dir(ctx, role, created_mountpoint): - """ - Delete file used by this role, and delete the directory that this - role appeared in. - - :param ctx: Context - :param role: "role.#" where # is used for the role id. - """ - testdir = misc.get_testdir(ctx) - id_ = role[len(CLIENT_PREFIX):] - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) - # Is there any reason why this is not: join(mnt, role) ? - client = os.path.join(mnt, 'client.{id}'.format(id=id_)) - - # Remove the directory inside the mount where the workunit ran - remote.run( - args=[ - 'sudo', - 'rm', - '-rf', - '--', - client, - ], - ) - log.info("Deleted dir {dir}".format(dir=client)) - - # If the mount was an artificially created dir, delete that too - if created_mountpoint: - remote.run( - args=[ - 'rmdir', - '--', - mnt, - ], - ) - log.info("Deleted artificial mount point {dir}".format(dir=client)) - - -def _make_scratch_dir(ctx, role, subdir): - """ - Make scratch directories for this role. This also makes the mount - point if that directory does not exist. - - :param ctx: Context - :param role: "role.#" where # is used for the role id. - :param subdir: use this subdir (False if not used) - """ - created_mountpoint = False - id_ = role[len(CLIENT_PREFIX):] - log.debug("getting remote for {id} role {role_}".format(id=id_, role_=role)) - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - dir_owner = remote.user - mnt = os.path.join(misc.get_testdir(ctx), 'mnt.{id}'.format(id=id_)) - # if neither kclient nor ceph-fuse are required for a workunit, - # mnt may not exist. Stat and create the directory if it doesn't. - try: - remote.run( - args=[ - 'stat', - '--', - mnt, - ], - ) - log.info('Did not need to create dir {dir}'.format(dir=mnt)) - except CommandFailedError: - remote.run( - args=[ - 'mkdir', - '--', - mnt, - ], - ) - log.info('Created dir {dir}'.format(dir=mnt)) - created_mountpoint = True - - if not subdir: - subdir = 'client.{id}'.format(id=id_) - - if created_mountpoint: - remote.run( - args=[ - 'cd', - '--', - mnt, - run.Raw('&&'), - 'mkdir', - '--', - subdir, - ], - ) - else: - remote.run( - args=[ - # cd first so this will fail if the mount point does - # not exist; pure install -d will silently do the - # wrong thing - 'cd', - '--', - mnt, - run.Raw('&&'), - 'sudo', - 'install', - '-d', - '-m', '0755', - '--owner={user}'.format(user=dir_owner), - '--', - subdir, - ], - ) - - return created_mountpoint - - -def _spawn_on_all_clients(ctx, refspec, tests, env, subdir, timeout=None): - """ - Make a scratch directory for each client in the cluster, and then for each - test spawn _run_tests() for each role. - - See run_tests() for parameter documentation. - """ - client_generator = misc.all_roles_of_type(ctx.cluster, 'client') - client_remotes = list() - - created_mountpoint = {} - for client in client_generator: - (client_remote,) = ctx.cluster.only('client.{id}'.format(id=client)).remotes.iterkeys() - client_remotes.append((client_remote, 'client.{id}'.format(id=client))) - created_mountpoint[client] = _make_scratch_dir(ctx, "client.{id}".format(id=client), subdir) - - for unit in tests: - with parallel() as p: - for remote, role in client_remotes: - p.spawn(_run_tests, ctx, refspec, role, [unit], env, subdir, - timeout=timeout) - - # cleanup the generated client directories - client_generator = misc.all_roles_of_type(ctx.cluster, 'client') - for client in client_generator: - _delete_dir(ctx, 'client.{id}'.format(id=client), created_mountpoint[client]) - - -def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None): - """ - Run the individual test. Create a scratch directory and then extract the - workunits from git. Make the executables, and then run the tests. - Clean up (remove files created) after the tests are finished. - - :param ctx: Context - :param refspec: branch, sha1, or version tag used to identify this - build - :param tests: specific tests specified. - :param env: environment set in yaml file. Could be None. - :param subdir: subdirectory set in yaml file. Could be None - :param timeout: If present, use the 'timeout' command on the remote host - to limit execution time. Must be specified by a number - followed by 's' for seconds, 'm' for minutes, 'h' for - hours, or 'd' for days. If '0' or anything that evaluates - to False is passed, the 'timeout' command is not used. - """ - testdir = misc.get_testdir(ctx) - assert isinstance(role, basestring) - assert role.startswith(CLIENT_PREFIX) - id_ = role[len(CLIENT_PREFIX):] - (remote,) = ctx.cluster.only(role).remotes.iterkeys() - mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) - # subdir so we can remove and recreate this a lot without sudo - if subdir is None: - scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') - else: - scratch_tmp = os.path.join(mnt, subdir) - srcdir = '{tdir}/workunit.{role}'.format(tdir=testdir, role=role) - clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role) - - git_url = teuth_config.get_ceph_git_url() - remote.run( - logger=log.getChild(role), - args=[ - 'git', - 'clone', - git_url, - clonedir, - run.Raw(';'), - 'cd', '--', clonedir, - run.Raw('&&'), - 'git', 'checkout', refspec, - run.Raw('&&'), - 'mv', 'qa/workunits', srcdir, - ], - ) - - remote.run( - logger=log.getChild(role), - args=[ - 'cd', '--', srcdir, - run.Raw('&&'), - 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', - run.Raw('&&'), - 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir), - run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)), - ], - ) - - workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role) - workunits = sorted(misc.get_file(remote, workunits_file).split('\0')) - assert workunits - - try: - assert isinstance(tests, list) - for spec in tests: - log.info('Running workunits matching %s on %s...', spec, role) - prefix = '{spec}/'.format(spec=spec) - to_run = [w for w in workunits if w == spec or w.startswith(prefix)] - if not to_run: - raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) - for workunit in to_run: - log.info('Running workunit %s...', workunit) - args = [ - 'mkdir', '-p', '--', scratch_tmp, - run.Raw('&&'), - 'cd', '--', scratch_tmp, - run.Raw('&&'), - run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'), - run.Raw('CEPH_REF={ref}'.format(ref=refspec)), - run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), - run.Raw('CEPH_ID="{id}"'.format(id=id_)), - run.Raw('PATH=$PATH:/usr/sbin') - ] - if env is not None: - for var, val in env.iteritems(): - quoted_val = pipes.quote(val) - env_arg = '{var}={val}'.format(var=var, val=quoted_val) - args.append(run.Raw(env_arg)) - args.extend([ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir)]) - if timeout and timeout != '0': - args.extend(['timeout', timeout]) - args.extend([ - '{srcdir}/{workunit}'.format( - srcdir=srcdir, - workunit=workunit, - ), - ]) - remote.run( - logger=log.getChild(role), - args=args, - label="workunit test {workunit}".format(workunit=workunit) - ) - remote.run( - logger=log.getChild(role), - args=['sudo', 'rm', '-rf', '--', scratch_tmp], - ) - finally: - log.info('Stopping %s on %s...', tests, role) - remote.run( - logger=log.getChild(role), - args=[ - 'rm', '-rf', '--', workunits_file, srcdir, clonedir, - ], - ) diff --git a/tox.ini b/tox.ini deleted file mode 100644 index c5826ecb6ec..00000000000 --- a/tox.ini +++ /dev/null @@ -1,8 +0,0 @@ -[tox] -envlist = flake8 -skipsdist = True - -[testenv:flake8] -deps= - flake8 -commands=flake8 --select=F,E9 --exclude=venv,.tox