From 6f67a72c423801ebf936002adcec42d08200b66d Mon Sep 17 00:00:00 2001 From: Nathan Cutler Date: Tue, 22 Oct 2019 14:35:11 +0200 Subject: [PATCH] qa/deepsea: forward-port basic (tier{0,1}) tests from SES6 Fixes: https://jira.suse.com/browse/SES-1049 Signed-off-by: Nathan Cutler --- qa/deepsea/.qa | 1 + qa/deepsea/boilerplate/+ | 0 qa/deepsea/boilerplate/ceph_cm_salt.yaml | 2 + qa/deepsea/boilerplate/disable-tuned.yaml | 6 + qa/deepsea/boilerplate/zypper-dup.yaml | 4 + qa/deepsea/cli/.qa | 1 + qa/deepsea/cli/off.yaml | 3 + qa/deepsea/cli/on.yaml | 3 + qa/deepsea/deepsea-services.yaml | 4 + qa/deepsea/deepsea.yaml | 14 + qa/deepsea/disks/0disks.yaml | 4 + qa/deepsea/disks/1disk.yaml | 4 + qa/deepsea/disks/2disks.yaml | 4 + qa/deepsea/disks/3disks.yaml | 4 + qa/deepsea/disks/4disks.yaml | 4 + qa/deepsea/disks/5disks.yaml | 4 + qa/deepsea/distros/.qa | 1 + qa/deepsea/distros/opensuse_15.1.yaml | 1 + qa/deepsea/distros/sle_15.1.yaml | 1 + qa/deepsea/health-ok/common/common.sh | 457 ++++ qa/deepsea/health-ok/common/deploy.sh | 229 ++ qa/deepsea/health-ok/common/helper.sh | 184 ++ qa/deepsea/health-ok/common/json.sh | 24 + qa/deepsea/health-ok/common/nfs-ganesha.sh | 177 ++ qa/deepsea/health-ok/common/policy.sh | 271 +++ qa/deepsea/health-ok/common/pool.sh | 64 + qa/deepsea/health-ok/common/rbd.sh | 29 + qa/deepsea/health-ok/common/rgw.sh | 129 ++ qa/deepsea/health-ok/common/zypper.sh | 24 + qa/deepsea/health-ok/health-ok.sh | 202 ++ qa/deepsea/health-ok/stage-5.sh | 112 + qa/deepsea/nodes/1node.yaml | 2 + qa/deepsea/nodes/20nodes.yaml | 21 + qa/deepsea/nodes/2nodes.yaml | 3 + qa/deepsea/nodes/3nodes.yaml | 4 + qa/deepsea/nodes/4nodes.yaml | 5 + qa/deepsea/nodes/5nodes.yaml | 6 + qa/deepsea/salt.yaml | 6 + .../storage-profiles/bs_dedicated_db.yaml | 15 + .../bs_dedicated_db_crypt.yaml | 18 + .../bs_dedicated_db_sizes.yaml | 18 + .../bs_dedicated_db_sizes_crypt.yaml | 21 + .../bs_dedicated_db_sizes_mixed.yaml | 17 + .../bs_dedicated_db_sizes_mixed_crypt.yaml | 20 + .../storage-profiles/bs_dedicated_wal.yaml | 15 + .../bs_dedicated_wal_crypt.yaml | 18 + .../storage-profiles/bs_dedicated_wal_db.yaml | 14 + .../bs_dedicated_wal_db_crypt.yaml | 16 + .../bs_dedicated_wal_db_sizes_all.yaml | 18 + .../bs_dedicated_wal_db_sizes_all_crypt.yaml | 20 + .../bs_dedicated_wal_db_sizes_mixed.yaml | 16 + ...bs_dedicated_wal_db_sizes_mixed_crypt.yaml | 18 + .../bs_dedicated_wal_sizes.yaml | 18 + .../bs_dedicated_wal_sizes_crypt.yaml | 21 + .../bs_dedicated_wal_sizes_mixed.yaml | 17 + .../bs_dedicated_wal_sizes_mixed_crypt.yaml | 20 + .../fs_dedicated_journal.yaml | 15 + .../fs_dedicated_journal_crypt.yaml | 18 + qa/suites/deepsea/.qa | 1 + qa/suites/deepsea/tier0/.qa | 1 + qa/suites/deepsea/tier0/salt/% | 0 qa/suites/deepsea/tier0/salt/.qa | 1 + qa/suites/deepsea/tier0/salt/0-salt.yaml | 1 + qa/suites/deepsea/tier0/salt/boilerplate | 1 + qa/suites/deepsea/tier0/salt/cluster/+ | 0 qa/suites/deepsea/tier0/salt/cluster/.qa | 1 + .../deepsea/tier0/salt/cluster/1disk.yaml | 1 + .../deepsea/tier0/salt/cluster/1node.yaml | 1 + qa/suites/deepsea/tier0/salt/distros | 1 + qa/suites/deepsea/tier1/.qa | 1 + qa/suites/deepsea/tier1/health-ok/% | 0 qa/suites/deepsea/tier1/health-ok/.qa | 1 + qa/suites/deepsea/tier1/health-ok/0-salt.yaml | 1 + .../tier1/health-ok/1-deploy-phase.yaml | 1 + .../deepsea/tier1/health-ok/2-test-phase.yaml | 8 + qa/suites/deepsea/tier1/health-ok/boilerplate | 1 + qa/suites/deepsea/tier1/health-ok/cluster/+ | 0 qa/suites/deepsea/tier1/health-ok/cluster/.qa | 1 + .../tier1/health-ok/cluster/4disks.yaml | 1 + .../tier1/health-ok/cluster/roles.yaml | 2 + .../tier1/health-ok/deepsea_cli_off.yaml | 3 + qa/suites/deepsea/tier1/health-ok/distros | 1 + qa/suites/suse/.qa | 1 + qa/suites/suse/tier0 | 1 + qa/suites/suse/tier1 | 1 + qa/tasks/deepsea.py | 2019 +++++++++++++++++ qa/tasks/salt.py | 300 +++ qa/tasks/salt_manager.py | 275 +++ qa/tasks/scripts.py | 40 + qa/tasks/scripts/ceph_cluster_status.sh | 13 + qa/tasks/scripts/ceph_version_sanity.sh | 21 + qa/tasks/scripts/create_all_pools_at_once.sh | 89 + qa/tasks/scripts/lvm_status.sh | 10 + qa/tasks/scripts/rados_write_test.sh | 19 + qa/tasks/scripts/rgw_init.sh | 9 + qa/tasks/scripts/rgw_init_ssl.sh | 22 + qa/tasks/scripts/salt_api_test.sh | 10 + qa/tasks/ses_qa.py | 183 ++ qa/tasks/util/__init__.py | 261 +++ 99 files changed, 5671 insertions(+) create mode 120000 qa/deepsea/.qa create mode 100644 qa/deepsea/boilerplate/+ create mode 100644 qa/deepsea/boilerplate/ceph_cm_salt.yaml create mode 100644 qa/deepsea/boilerplate/disable-tuned.yaml create mode 100644 qa/deepsea/boilerplate/zypper-dup.yaml create mode 120000 qa/deepsea/cli/.qa create mode 100644 qa/deepsea/cli/off.yaml create mode 100644 qa/deepsea/cli/on.yaml create mode 100644 qa/deepsea/deepsea-services.yaml create mode 100644 qa/deepsea/deepsea.yaml create mode 100644 qa/deepsea/disks/0disks.yaml create mode 100644 qa/deepsea/disks/1disk.yaml create mode 100644 qa/deepsea/disks/2disks.yaml create mode 100644 qa/deepsea/disks/3disks.yaml create mode 100644 qa/deepsea/disks/4disks.yaml create mode 100644 qa/deepsea/disks/5disks.yaml create mode 120000 qa/deepsea/distros/.qa create mode 120000 qa/deepsea/distros/opensuse_15.1.yaml create mode 120000 qa/deepsea/distros/sle_15.1.yaml create mode 100644 qa/deepsea/health-ok/common/common.sh create mode 100644 qa/deepsea/health-ok/common/deploy.sh create mode 100644 qa/deepsea/health-ok/common/helper.sh create mode 100644 qa/deepsea/health-ok/common/json.sh create mode 100644 qa/deepsea/health-ok/common/nfs-ganesha.sh create mode 100644 qa/deepsea/health-ok/common/policy.sh create mode 100644 qa/deepsea/health-ok/common/pool.sh create mode 100644 qa/deepsea/health-ok/common/rbd.sh create mode 100644 qa/deepsea/health-ok/common/rgw.sh create mode 100644 qa/deepsea/health-ok/common/zypper.sh create mode 100755 qa/deepsea/health-ok/health-ok.sh create mode 100755 qa/deepsea/health-ok/stage-5.sh create mode 100644 qa/deepsea/nodes/1node.yaml create mode 100644 qa/deepsea/nodes/20nodes.yaml create mode 100644 qa/deepsea/nodes/2nodes.yaml create mode 100644 qa/deepsea/nodes/3nodes.yaml create mode 100644 qa/deepsea/nodes/4nodes.yaml create mode 100644 qa/deepsea/nodes/5nodes.yaml create mode 100644 qa/deepsea/salt.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_db.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_db_crypt.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_db_sizes.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_db_sizes_crypt.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed_crypt.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_wal.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_wal_crypt.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_wal_db.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_wal_db_crypt.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all_crypt.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed_crypt.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_wal_sizes.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_crypt.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed.yaml create mode 100644 qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed_crypt.yaml create mode 100644 qa/deepsea/storage-profiles/fs_dedicated_journal.yaml create mode 100644 qa/deepsea/storage-profiles/fs_dedicated_journal_crypt.yaml create mode 120000 qa/suites/deepsea/.qa create mode 120000 qa/suites/deepsea/tier0/.qa create mode 100644 qa/suites/deepsea/tier0/salt/% create mode 120000 qa/suites/deepsea/tier0/salt/.qa create mode 120000 qa/suites/deepsea/tier0/salt/0-salt.yaml create mode 120000 qa/suites/deepsea/tier0/salt/boilerplate create mode 100644 qa/suites/deepsea/tier0/salt/cluster/+ create mode 120000 qa/suites/deepsea/tier0/salt/cluster/.qa create mode 120000 qa/suites/deepsea/tier0/salt/cluster/1disk.yaml create mode 120000 qa/suites/deepsea/tier0/salt/cluster/1node.yaml create mode 120000 qa/suites/deepsea/tier0/salt/distros create mode 120000 qa/suites/deepsea/tier1/.qa create mode 100644 qa/suites/deepsea/tier1/health-ok/% create mode 120000 qa/suites/deepsea/tier1/health-ok/.qa create mode 120000 qa/suites/deepsea/tier1/health-ok/0-salt.yaml create mode 120000 qa/suites/deepsea/tier1/health-ok/1-deploy-phase.yaml create mode 100644 qa/suites/deepsea/tier1/health-ok/2-test-phase.yaml create mode 120000 qa/suites/deepsea/tier1/health-ok/boilerplate create mode 100644 qa/suites/deepsea/tier1/health-ok/cluster/+ create mode 120000 qa/suites/deepsea/tier1/health-ok/cluster/.qa create mode 120000 qa/suites/deepsea/tier1/health-ok/cluster/4disks.yaml create mode 100644 qa/suites/deepsea/tier1/health-ok/cluster/roles.yaml create mode 100644 qa/suites/deepsea/tier1/health-ok/deepsea_cli_off.yaml create mode 120000 qa/suites/deepsea/tier1/health-ok/distros create mode 120000 qa/suites/suse/.qa create mode 120000 qa/suites/suse/tier0 create mode 120000 qa/suites/suse/tier1 create mode 100644 qa/tasks/deepsea.py create mode 100644 qa/tasks/salt.py create mode 100644 qa/tasks/salt_manager.py create mode 100644 qa/tasks/scripts.py create mode 100644 qa/tasks/scripts/ceph_cluster_status.sh create mode 100644 qa/tasks/scripts/ceph_version_sanity.sh create mode 100644 qa/tasks/scripts/create_all_pools_at_once.sh create mode 100644 qa/tasks/scripts/lvm_status.sh create mode 100644 qa/tasks/scripts/rados_write_test.sh create mode 100644 qa/tasks/scripts/rgw_init.sh create mode 100644 qa/tasks/scripts/rgw_init_ssl.sh create mode 100644 qa/tasks/scripts/salt_api_test.sh create mode 100644 qa/tasks/ses_qa.py diff --git a/qa/deepsea/.qa b/qa/deepsea/.qa new file mode 120000 index 0000000000000..a96aa0ea9d8c4 --- /dev/null +++ b/qa/deepsea/.qa @@ -0,0 +1 @@ +.. \ No newline at end of file diff --git a/qa/deepsea/boilerplate/+ b/qa/deepsea/boilerplate/+ new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/deepsea/boilerplate/ceph_cm_salt.yaml b/qa/deepsea/boilerplate/ceph_cm_salt.yaml new file mode 100644 index 0000000000000..bcae3d98cc986 --- /dev/null +++ b/qa/deepsea/boilerplate/ceph_cm_salt.yaml @@ -0,0 +1,2 @@ +ceph_cm: salt +ceph_cm_ansible: false diff --git a/qa/deepsea/boilerplate/disable-tuned.yaml b/qa/deepsea/boilerplate/disable-tuned.yaml new file mode 100644 index 0000000000000..21e22528864dd --- /dev/null +++ b/qa/deepsea/boilerplate/disable-tuned.yaml @@ -0,0 +1,6 @@ +overrides: + deepsea: + alternative_defaults: + tuned_mgr_init: default-off + tuned_mon_init: default-off + tuned_osd_init: default-off diff --git a/qa/deepsea/boilerplate/zypper-dup.yaml b/qa/deepsea/boilerplate/zypper-dup.yaml new file mode 100644 index 0000000000000..049604d9e7b5a --- /dev/null +++ b/qa/deepsea/boilerplate/zypper-dup.yaml @@ -0,0 +1,4 @@ +overrides: + deepsea: + alternative_defaults: + upgrade_init: zypper-dup diff --git a/qa/deepsea/cli/.qa b/qa/deepsea/cli/.qa new file mode 120000 index 0000000000000..fea2489fdf6d9 --- /dev/null +++ b/qa/deepsea/cli/.qa @@ -0,0 +1 @@ +../.qa \ No newline at end of file diff --git a/qa/deepsea/cli/off.yaml b/qa/deepsea/cli/off.yaml new file mode 100644 index 0000000000000..a2beb7f3e0796 --- /dev/null +++ b/qa/deepsea/cli/off.yaml @@ -0,0 +1,3 @@ +overrides: + deepsea: + cli: false diff --git a/qa/deepsea/cli/on.yaml b/qa/deepsea/cli/on.yaml new file mode 100644 index 0000000000000..739b017c5398f --- /dev/null +++ b/qa/deepsea/cli/on.yaml @@ -0,0 +1,3 @@ +overrides: + deepsea: + cli: true diff --git a/qa/deepsea/deepsea-services.yaml b/qa/deepsea/deepsea-services.yaml new file mode 100644 index 0000000000000..ebad49e526cd0 --- /dev/null +++ b/qa/deepsea/deepsea-services.yaml @@ -0,0 +1,4 @@ +tasks: + - deepsea.create_pools: + - deepsea.orch: + stage: 4 diff --git a/qa/deepsea/deepsea.yaml b/qa/deepsea/deepsea.yaml new file mode 100644 index 0000000000000..3d5fec30cbde9 --- /dev/null +++ b/qa/deepsea/deepsea.yaml @@ -0,0 +1,14 @@ +tasks: + - deepsea: + allow_python2: false + drive_group: default + - deepsea.orch: + stage: prep + - deepsea.orch: + stage: 1 + - deepsea.policy: + - deepsea.orch: + stage: 2 + - deepsea.ceph_conf: + - deepsea.orch: + stage: 3 diff --git a/qa/deepsea/disks/0disks.yaml b/qa/deepsea/disks/0disks.yaml new file mode 100644 index 0000000000000..dc8605a64d6fd --- /dev/null +++ b/qa/deepsea/disks/0disks.yaml @@ -0,0 +1,4 @@ +openstack: +- volumes: # attached to each instance + count: 0 + size: 10 # GB diff --git a/qa/deepsea/disks/1disk.yaml b/qa/deepsea/disks/1disk.yaml new file mode 100644 index 0000000000000..1654bdaf20b2e --- /dev/null +++ b/qa/deepsea/disks/1disk.yaml @@ -0,0 +1,4 @@ +openstack: +- volumes: # attached to each instance + count: 1 + size: 10 # GB diff --git a/qa/deepsea/disks/2disks.yaml b/qa/deepsea/disks/2disks.yaml new file mode 100644 index 0000000000000..f794a6f90ecfa --- /dev/null +++ b/qa/deepsea/disks/2disks.yaml @@ -0,0 +1,4 @@ +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB diff --git a/qa/deepsea/disks/3disks.yaml b/qa/deepsea/disks/3disks.yaml new file mode 100644 index 0000000000000..8da92ca9628df --- /dev/null +++ b/qa/deepsea/disks/3disks.yaml @@ -0,0 +1,4 @@ +openstack: +- volumes: # attached to each instance + count: 3 + size: 10 # GB diff --git a/qa/deepsea/disks/4disks.yaml b/qa/deepsea/disks/4disks.yaml new file mode 100644 index 0000000000000..2054da95e8b08 --- /dev/null +++ b/qa/deepsea/disks/4disks.yaml @@ -0,0 +1,4 @@ +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/deepsea/disks/5disks.yaml b/qa/deepsea/disks/5disks.yaml new file mode 100644 index 0000000000000..a5bf871fb394c --- /dev/null +++ b/qa/deepsea/disks/5disks.yaml @@ -0,0 +1,4 @@ +openstack: +- volumes: # attached to each instance + count: 5 + size: 10 # GB diff --git a/qa/deepsea/distros/.qa b/qa/deepsea/distros/.qa new file mode 120000 index 0000000000000..fea2489fdf6d9 --- /dev/null +++ b/qa/deepsea/distros/.qa @@ -0,0 +1 @@ +../.qa \ No newline at end of file diff --git a/qa/deepsea/distros/opensuse_15.1.yaml b/qa/deepsea/distros/opensuse_15.1.yaml new file mode 120000 index 0000000000000..570c8cfde67ea --- /dev/null +++ b/qa/deepsea/distros/opensuse_15.1.yaml @@ -0,0 +1 @@ +.qa/distros/all/opensuse_15.1.yaml \ No newline at end of file diff --git a/qa/deepsea/distros/sle_15.1.yaml b/qa/deepsea/distros/sle_15.1.yaml new file mode 120000 index 0000000000000..c6791f58126dd --- /dev/null +++ b/qa/deepsea/distros/sle_15.1.yaml @@ -0,0 +1 @@ +.qa/distros/all/sle_15.1.yaml \ No newline at end of file diff --git a/qa/deepsea/health-ok/common/common.sh b/qa/deepsea/health-ok/common/common.sh new file mode 100644 index 0000000000000..3c6d9bcbc9644 --- /dev/null +++ b/qa/deepsea/health-ok/common/common.sh @@ -0,0 +1,457 @@ +# +# This file is part of the DeepSea integration test suite +# + +# BASEDIR is set by the calling script +source $BASEDIR/common/deploy.sh +source $BASEDIR/common/helper.sh +source $BASEDIR/common/json.sh +source $BASEDIR/common/nfs-ganesha.sh +source $BASEDIR/common/policy.sh +source $BASEDIR/common/pool.sh +source $BASEDIR/common/rbd.sh +source $BASEDIR/common/rgw.sh +source $BASEDIR/common/zypper.sh + + +# +# functions that process command-line arguments +# + +function assert_enhanced_getopt { + set +e + echo -n "Running 'getopt --test'... " + getopt --test > /dev/null + if [ $? -ne 4 ]; then + echo "FAIL" + echo "This script requires enhanced getopt. Bailing out." + exit 1 + fi + echo "PASS" + set -e +} + + +# +# functions that run the DeepSea stages +# + +function _disable_update_in_stage_0 { + cp /srv/salt/ceph/stage/prep/master/default.sls /srv/salt/ceph/stage/prep/master/default-orig.sls + cp /srv/salt/ceph/stage/prep/master/default-no-update-no-reboot.sls /srv/salt/ceph/stage/prep/master/default.sls + cp /srv/salt/ceph/stage/prep/minion/default.sls /srv/salt/ceph/stage/prep/minion/default-orig.sls + cp /srv/salt/ceph/stage/prep/minion/default-no-update-no-reboot.sls /srv/salt/ceph/stage/prep/minion/default.sls +} + +function run_stage_0 { + test "$NO_UPDATE" && _disable_update_in_stage_0 + _run_stage 0 "$@" + if _root_fs_is_btrfs ; then + echo "Root filesystem is btrfs: creating subvolumes for /var/lib/ceph" + salt-run state.orch ceph.migrate.subvolume + else + echo "Root filesystem is *not* btrfs: skipping subvolume creation" + fi + test "$STAGE_SUCCEEDED" +} + +function run_stage_1 { + _run_stage 1 "$@" + test "$STAGE_SUCCEEDED" +} + +function run_stage_2 { + # This was needed with SCC repos + #salt '*' cmd.run "for delay in 60 60 60 60 ; do sudo zypper --non-interactive --gpg-auto-import-keys refresh && break ; sleep $delay ; done" + _run_stage 2 "$@" + salt_pillar_items 2>/dev/null + test "$STAGE_SUCCEEDED" +} + +function _disable_tuned { + local prefix=/srv/salt/ceph/tuned + mv $prefix/mgr/default.sls $prefix/mgr/default.sls-MOVED + mv $prefix/mon/default.sls $prefix/mon/default.sls-MOVED + mv $prefix/osd/default.sls $prefix/osd/default.sls-MOVED + mv $prefix/mgr/default-off.sls $prefix/mgr/default.sls + mv $prefix/mon/default-off.sls $prefix/mon/default.sls + mv $prefix/osd/default-off.sls $prefix/osd/default.sls +} + +function run_stage_3 { + cat_global_conf + lsblk_on_storage_node + if [ "$TUNED" ] ; then + echo "WWWW: tuned will be deployed as usual" + else + echo "WWWW: tuned will NOT be deployed" + _disable_tuned + fi + _run_stage 3 "$@" + lsblk_on_storage_node + ceph osd tree + cat_ceph_conf + admin_auth_status + test "$STAGE_SUCCEEDED" +} + +function run_stage_4 { + _run_stage 4 "$@" + test "$STAGE_SUCCEEDED" +} + +function run_stage_5 { + _run_stage 5 "$@" + test "$STAGE_SUCCEEDED" +} + + +# +# functions that generate /etc/ceph/ceph.conf +# see https://github.com/SUSE/DeepSea/tree/master/srv/salt/ceph/configuration/files/ceph.conf.d +# + +function change_rgw_conf { + cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/rgw.conf +foo = bar +EOF +} + +function change_osd_conf { + cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/osd.conf +foo = bar +EOF +} + +function change_mon_conf { + cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/mon.conf +foo = bar +EOF +} + +function ceph_conf_small_cluster { + local STORAGENODES=$(json_storage_nodes) + test -n "$STORAGENODES" + if [ "$STORAGENODES" -eq 1 ] ; then + echo "Adjusting ceph.conf for operation with 1 storage node" + cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/global.conf +mon pg warn min per osd = 16 +osd pool default size = 2 +osd crush chooseleaf type = 0 # failure domain == osd +EOF + elif [ "$STORAGENODES" -eq 2 -o "$STORAGENODES" -eq 3 ] ; then + echo "Adjusting ceph.conf for operation with 2 or 3 storage nodes" + cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/global.conf +mon pg warn min per osd = 8 +osd pool default size = 2 +EOF + else + echo "Four or more storage nodes; not adjusting ceph.conf" + fi +} + +function ceph_conf_mon_allow_pool_delete { + echo "Adjusting ceph.conf to allow pool deletes" + cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/global.conf +mon allow pool delete = true +EOF +} + +function ceph_conf_dashboard { + echo "Adjusting ceph.conf for deployment of dashboard MGR module" + cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/mon.conf +mgr initial modules = dashboard +EOF +} + + +# +# functions that print status information +# + +function cat_deepsea_log { + cat /var/log/deepsea.log +} + +function cat_salt_config { + cat /etc/salt/master + cat /etc/salt/minion +} + +function cat_policy_cfg { + cat /srv/pillar/ceph/proposals/policy.cfg +} + +function salt_pillar_items { + salt '*' pillar.items +} + +function salt_pillar_get_roles { + salt '*' pillar.get roles +} + +function salt_cmd_run_lsblk { + salt '*' cmd.run lsblk +} + +function cat_global_conf { + cat /srv/salt/ceph/configuration/files/ceph.conf.d/global.conf || true +} + +function cat_ceph_conf { + salt '*' cmd.run "cat /etc/ceph/ceph.conf" 2>/dev/null +} + +function admin_auth_status { + ceph auth get client.admin + ls -l /etc/ceph/ceph.client.admin.keyring + cat /etc/ceph/ceph.client.admin.keyring +} + +function number_of_hosts_in_ceph_osd_tree { + ceph osd tree -f json-pretty | jq '[.nodes[] | select(.type == "host")] | length' +} + +function number_of_osds_in_ceph_osd_tree { + ceph osd tree -f json-pretty | jq '[.nodes[] | select(.type == "osd")] | length' +} + +function ceph_cluster_status { + ceph pg stat -f json-pretty + _grace_period 1 + ceph health detail -f json-pretty + _grace_period 1 + ceph osd tree + _grace_period 1 + ceph osd pool ls detail -f json-pretty + _grace_period 1 + ceph -s +} + +function ceph_log_grep_enoent_eaccess { + set +e + grep -rH "Permission denied" /var/log/ceph + grep -rH "No such file or directory" /var/log/ceph + set -e +} + + +# +# core validation tests +# + +function ceph_version_test { +# test that ceph RPM version matches "ceph --version" +# for a loose definition of "matches" + rpm -q ceph + local RPM_NAME=$(rpm -q ceph) + local RPM_CEPH_VERSION=$(perl -e '"'"$RPM_NAME"'" =~ m/ceph-(\d+\.\d+\.\d+)/; print "$1\n";') + echo "According to RPM, the ceph upstream version is ->$RPM_CEPH_VERSION<-" + test -n "$RPM_CEPH_VERSION" + ceph --version + local BUFFER=$(ceph --version) + local CEPH_CEPH_VERSION=$(perl -e '"'"$BUFFER"'" =~ m/ceph version (\d+\.\d+\.\d+)/; print "$1\n";') + echo "According to \"ceph --version\", the ceph upstream version is ->$CEPH_CEPH_VERSION<-" + test -n "$RPM_CEPH_VERSION" + test "$RPM_CEPH_VERSION" = "$CEPH_CEPH_VERSION" +} + +function ceph_health_test { + local LOGFILE=/tmp/ceph_health_test.log + echo "Waiting up to 15 minutes for HEALTH_OK..." + salt -C 'I@roles:master' wait.until status=HEALTH_OK timeout=900 check=1 2>/dev/null | tee $LOGFILE + # last line: determines return value of function + ! grep -q 'Timeout expired' $LOGFILE +} + +function rados_write_test { + # + # NOTE: function assumes the pool "write_test" already exists. Pool can be + # created by calling e.g. "create_all_pools_at_once write_test" immediately + # before calling this function. + # + ceph osd pool application enable write_test deepsea_qa + echo "dummy_content" > verify.txt + rados -p write_test put test_object verify.txt + rados -p write_test get test_object verify_returned.txt + test "x$(cat verify.txt)" = "x$(cat verify_returned.txt)" +} + +function lsblk_on_storage_node { + local TESTSCRIPT=/tmp/lsblk_test.sh + local STORAGENODE=$(_first_x_node storage) + cat << 'EOF' > $TESTSCRIPT +set -ex +trap 'echo "Result: NOT_OK"' ERR +echo "running lsblk as $(whoami) on $(hostname --fqdn)" +lsblk +echo "Result: OK" +EOF + _run_test_script_on_node $TESTSCRIPT $STORAGENODE +} + +function cephfs_mount_and_sanity_test { + # + # run cephfs mount test script on the client node + # mounts cephfs in /mnt, touches a file, asserts that it exists + # + local TESTSCRIPT=/tmp/cephfs_test.sh + local CLIENTNODE=$(_client_node) + cat << 'EOF' > $TESTSCRIPT +set -ex +trap 'echo "Result: NOT_OK"' ERR +echo "cephfs mount test script running as $(whoami) on $(hostname --fqdn)" +TESTMONS=$(ceph-conf --lookup 'mon_initial_members' | tr -d '[:space:]') +TESTSECR=$(grep 'key =' /etc/ceph/ceph.client.admin.keyring | awk '{print $NF}') +echo "MONs: $TESTMONS" +echo "admin secret: $TESTSECR" +test -d /mnt +mount -t ceph ${TESTMONS}:/ /mnt -o name=admin,secret="$TESTSECR" +touch /mnt/bubba +test -f /mnt/bubba +umount /mnt +echo "Result: OK" +EOF + # FIXME: assert no MDS running on $CLIENTNODE + _run_test_script_on_node $TESTSCRIPT $CLIENTNODE +} + +function iscsi_kludge { + # + # apply kludge to work around bsc#1049669 + # + local TESTSCRIPT=/tmp/iscsi_kludge.sh + local IGWNODE=$(_first_x_node igw) + cat << 'EOF' > $TESTSCRIPT +set -ex +trap 'echo "Result: NOT_OK"' ERR +echo "igw kludge script running as $(whoami) on $(hostname --fqdn)" +sed -i -e 's/\("host": "target[[:digit:]]\+\)"/\1.teuthology"/' /tmp/lrbd.conf +cat /tmp/lrbd.conf +source /etc/sysconfig/lrbd; lrbd -v $LRBD_OPTIONS -f /tmp/lrbd.conf +systemctl restart lrbd.service +systemctl --no-pager --full status lrbd.service +echo "Result: OK" +EOF + _run_test_script_on_node $TESTSCRIPT $IGWNODE +} + +function igw_info { + # + # peek at igw information on the igw node + # + local TESTSCRIPT=/tmp/igw_info.sh + local IGWNODE=$(_first_x_node igw) + cat << 'EOF' > $TESTSCRIPT +set -ex +trap 'echo "Result: NOT_OK"' ERR +echo "igw info script running as $(whoami) on $(hostname --fqdn)" +rpm -q lrbd || true +lrbd --output || true +ls -lR /sys/kernel/config/target/ || true +ss --tcp --numeric state listening +echo "See 3260 there?" +echo "Result: OK" +EOF + _run_test_script_on_node $TESTSCRIPT $IGWNODE +} + +function iscsi_mount_and_sanity_test { + # + # run iscsi mount test script on the client node + # mounts iscsi in /mnt, touches a file, asserts that it exists + # + local TESTSCRIPT=/tmp/iscsi_test.sh + local CLIENTNODE=$(_client_node) + local IGWNODE=$(_first_x_node igw) + cat << EOF > $TESTSCRIPT +set -e +trap 'echo "Result: NOT_OK"' ERR +for delay in 60 60 60 60 ; do + sudo zypper --non-interactive --gpg-auto-import-keys refresh && break + sleep $delay +done +set -x +zypper --non-interactive install --no-recommends open-iscsi multipath-tools +systemctl start iscsid.service +sleep 5 +systemctl --no-pager --full status iscsid.service +iscsiadm -m discovery -t st -p $IGWNODE +iscsiadm -m node -L all +systemctl start multipathd.service +sleep 5 +systemctl --no-pager --full status multipathd.service +ls -lR /dev/mapper +ls -l /dev/disk/by-path +ls -l /dev/disk/by-*id +multipath -ll +mkfs -t xfs /dev/dm-0 +test -d /mnt +mount /dev/dm-0 /mnt +df -h /mnt +touch /mnt/bubba +test -f /mnt/bubba +umount /mnt +echo "Result: OK" +EOF + # FIXME: assert script not running on the iSCSI gateway node + _run_test_script_on_node $TESTSCRIPT $CLIENTNODE +} + +function test_systemd_ceph_osd_target_wants { + # + # see bsc#1051598 in which ceph-disk was omitting --runtime when it enabled + # ceph-osd@$ID.service units + # + local TESTSCRIPT=/tmp/test_systemd_ceph_osd_target_wants.sh + local STORAGENODE=$(_first_x_node storage) + cat << 'EOF' > $TESTSCRIPT +set -x +CEPH_OSD_WANTS="/systemd/system/ceph-osd.target.wants" +ETC_CEPH_OSD_WANTS="/etc$CEPH_OSD_WANTS" +RUN_CEPH_OSD_WANTS="/run$CEPH_OSD_WANTS" +ls -l $ETC_CEPH_OSD_WANTS +ls -l $RUN_CEPH_OSD_WANTS +set -e +trap 'echo "Result: NOT_OK"' ERR +echo "Asserting that there is no directory $ETC_CEPH_OSD_WANTS" +test -d "$ETC_CEPH_OSD_WANTS" && false +echo "Asserting that $RUN_CEPH_OSD_WANTS exists, is a directory, and is not empty" +test -d "$RUN_CEPH_OSD_WANTS" +test -n "$(ls --almost-all $RUN_CEPH_OSD_WANTS)" +echo "Result: OK" +EOF + _run_test_script_on_node $TESTSCRIPT $STORAGENODE +} + +function configure_all_OSDs_to_filestore { + salt-run proposal.populate format=filestore name=filestore 2>/dev/null + chown salt:salt /srv/pillar/ceph/proposals/policy.cfg + sed -i 's/profile-default/profile-filestore/g' /srv/pillar/ceph/proposals/policy.cfg +} + +function verify_OSD_type { + # checking with 'ceph osd metadata' command + # 1st input argument: type 'filestore' or 'bluestore' + # 2nd input argument: OSD ID + osd_type=$(ceph osd metadata $2 -f json-pretty | jq '.osd_objectstore') + if [[ $osd_type != \"$1\" ]] + then + echo "Error: Object store type is not $1 for OSD.ID : $2" + exit 1 + else + echo OSD.${2} $osd_type + fi +} + +function check_OSD_type { + # expecting as argument 'filestore' or 'bluestore' + for i in $(ceph osd ls);do verify_OSD_type $1 $i;done +} + +function migrate_to_bluestore { + salt-run state.orch ceph.migrate.policy 2>/dev/null + sed -i 's/profile-filestore/migrated-profile-filestore/g' /srv/pillar/ceph/proposals/policy.cfg + salt-run disengage.safety 2>/dev/null + salt-run state.orch ceph.migrate.osds 2>/dev/null +} diff --git a/qa/deepsea/health-ok/common/deploy.sh b/qa/deepsea/health-ok/common/deploy.sh new file mode 100644 index 0000000000000..92729c25183f3 --- /dev/null +++ b/qa/deepsea/health-ok/common/deploy.sh @@ -0,0 +1,229 @@ +# This file is part of the DeepSea integration test suite + +# +# separate file to house the deploy_ceph function +# + +DEPLOY_PHASE_COMPLETE_MESSAGE="deploy phase complete!" + + +function _os_specific_install_deps { + echo "Installing dependencies on the Salt Master node" + local DEPENDENCIES="jq + " + _zypper_ref_on_master + for d in $DEPENDENCIES ; do + _zypper_install_on_master $d + done +} + +function _determine_master_minion { + type hostname + MASTER_MINION=$(hostname --fqdn) + salt $MASTER_MINION test.ping +} + +function _os_specific_repos_and_packages_info { + _dump_salt_master_zypper_repos + type rpm + rpm -q salt-master + rpm -q salt-minion + rpm -q salt-api + rpm -q deepsea || true +} + +function _set_deepsea_minions { + # + # set deepsea_minions to * - see https://github.com/SUSE/DeepSea/pull/526 + # (otherwise we would have to set deepsea grain on all minions) + echo "deepsea_minions: '*'" > /srv/pillar/ceph/deepsea_minions.sls + cat /srv/pillar/ceph/deepsea_minions.sls +} + +function _initialize_minion_array { + local m= + local i=0 + if type salt-key > /dev/null 2>&1; then + MINION_LIST=$(salt-key -L -l acc | grep -v '^Accepted Keys') + for m in $MINION_LIST ; do + MINION_ARRAY[0]=$m + i=$((i+1)) + done + else + echo "Cannot find salt-key. Is Salt installed? Is this running on the Salt Master?" + exit 1 + fi + echo $i +} + +function _initialize_storage_profile { + test "$STORAGE_PROFILE" + case "$STORAGE_PROFILE" in + default) echo "Storage profile: bluestore OSDs (default)" ;; + dmcrypt) echo "Storage profile: encrypted bluestore OSDs" ;; + filestore) echo "Storage profile: filestore OSDs" ;; + random) echo "Storage profile will be chosen randomly" ;; + *) + CUSTOM_STORAGE_PROFILE="$STORAGE_PROFILE" + STORAGE_PROFILE="custom" + echo "Storage profile: custom ($CUSTOM_STORAGE_PROFILE)" + ;; + esac +} + +function _initialize_and_vet_nodes { + if [ -n "$MIN_NODES" ] ; then + echo "MIN_NODES is set to $MIN_NODES" + PROPOSED_MIN_NODES="$MIN_NODES" + else + echo "MIN_NODES was not set. Default is 1" + PROPOSED_MIN_NODES=1 + fi + if [ -n "$CLIENT_NODES" ] ; then + echo "CLIENT_NODES is set to $CLIENT_NODES" + else + echo "CLIENT_NODES was not set. Default is 0" + CLIENT_NODES=0 + fi + MIN_NODES=$(($CLIENT_NODES + 1)) + if [ "$PROPOSED_MIN_NODES" -lt "$MIN_NODES" ] ; then + echo "Proposed MIN_NODES value is too low. Need at least 1 + CLIENT_NODES" + exit 1 + fi + test "$PROPOSED_MIN_NODES" -gt "$MIN_NODES" && MIN_NODES="$PROPOSED_MIN_NODES" + echo "Final MIN_NODES is $MIN_NODES" + echo "TOTAL_NODES is $TOTAL_NODES" + test "$TOTAL_NODES" + test "$TOTAL_NODES" -ge "$MIN_NODES" + STORAGE_NODES=$((TOTAL_NODES - CLIENT_NODES)) + echo "WWWW" + echo "This script will use DeepSea with a cluster of $TOTAL_NODES nodes total (including Salt Master)." + echo "Of these, $CLIENT_NODES are assumed to be clients (nodes without any DeepSea roles except \"admin\")." +} + +function _zypper_ps { + salt '*' cmd.run 'zypper ps -s' 2>/dev/null || true +} + +function _python_versions { + type python2 > /dev/null 2>&1 && python2 --version || echo "Python 2 not installed" + type python3 > /dev/null 2>&1 && python3 --version || echo "Python 3 not installed" +} + +function initialization_sequence { + set +x + _determine_master_minion + _os_specific_install_deps + _os_specific_repos_and_packages_info + set +e + _python_versions + type deepsea > /dev/null 2>&1 && deepsea --version || echo "deepsea CLI not installed" + TOTAL_MINIONS=$(_initialize_minion_array) + echo "There are $TOTAL_MINIONS minions in this Salt cluster" + set -e + _set_deepsea_minions + salt '*' saltutil.sync_all 2>/dev/null + TOTAL_NODES=$(json_total_nodes) + test "$TOTAL_NODES" = "$TOTAL_MINIONS" + _ping_minions_until_all_respond + cat_salt_config + _initialize_storage_profile + _initialize_and_vet_nodes + set -x + test $STORAGE_NODES -lt 4 && export DEV_ENV="true" +} + +function pared_down_init_sequence { + test "$ALREADY_INITIALIZED" && return 0 + set +x + TOTAL_MINIONS=$(_initialize_minion_array) + TOTAL_NODES=$(json_total_nodes) + test "$TOTAL_NODES" = "$TOTAL_MINIONS" + _ping_minions_until_all_respond + _initialize_and_vet_nodes + set -x + test "$STORAGE_NODES" -lt "4" -a -z "$DEV_ENV" && export DEV_ENV="true" || true +} + +function salt_api_test { + local tmpfile=$(mktemp) + echo "Salt API test: BEGIN" + systemctl --no-pager --full status salt-api.service + curl http://$(hostname):8000/ | tee $tmpfile # show curl output in log + test -s $tmpfile + jq . $tmpfile >/dev/null + echo -en "\n" # this is just for log readability + rm $tmpfile + echo "Salt API test: END" +} + +function deploy_ceph { + if [ "$START_STAGE" -lt "0" -o "$START_STAGE" -gt "4" ] ; then + echo "Received bad --start-stage value ->$START_STAGE<- (must be 0-4 inclusive)" + exit 1 + fi + if _ceph_cluster_running ; then + echo "Running ceph cluster detected: skipping deploy phase" + pared_down_init_sequence + return 0 + fi + if [ "$START_STAGE" = "0" ] ; then + if [ -z "$TEUTHOLOGY" ] ; then + initialization_sequence + fi + run_stage_0 "$CLI" + _zypper_ps + salt_api_test + fi + if [ "$START_STAGE" -le "1" ] ; then + test -n "$RGW" -a -n "$SSL" && rgw_ssl_init + run_stage_1 "$CLI" + policy_cfg_base + policy_cfg_mon_flex + test -n "$MDS" && policy_cfg_mds + policy_cfg_openattic_rgw_igw_ganesha + test "$RGW" && rgw_demo_users + case "$STORAGE_PROFILE" in + dmcrypt) proposal_populate_dmcrypt ;; + filestore) proposal_populate_filestore ;; + random) random_or_custom_storage_profile ;; + custom) random_or_custom_storage_profile ;; + default) ;; + *) echo "Bad storage profile ->$STORAGE_PROFILE<-. Bailing out!" ; exit 1 ;; + esac + policy_cfg_storage + cat_policy_cfg + fi + if [ "$START_STAGE" -le "2" ] ; then + run_stage_2 "$CLI" + ceph_conf_small_cluster + ceph_conf_mon_allow_pool_delete + ceph_conf_dashboard + test "$RBD" && ceph_conf_upstream_rbd_default_features + fi + if [ "$START_STAGE" -le "3" ] ; then + run_stage_3 "$CLI" + pre_create_pools + ceph_cluster_status + test "$RBD" && ceph_test_librbd_can_be_run + if [ -z "$MDS" -a -z "$NFS_GANESHA" -a -z "$RGW" ] ; then + echo "WWWW" + echo "Stage 3 OK, no roles requiring Stage 4: $DEPLOY_PHASE_COMPLETE_MESSAGE" + return 0 + fi + test -n "$NFS_GANESHA" && nfs_ganesha_no_root_squash + fi + if [ "$START_STAGE" -le "4" ] ; then + run_stage_4 "$CLI" + if [ -n "$NFS_GANESHA" ] ; then + nfs_ganesha_cat_config_file + nfs_ganesha_debug_log + echo "WWWW" + echo "NFS-Ganesha set to debug logging" + fi + ceph_cluster_status + _zypper_ps + echo "Stage 4 OK: $DEPLOY_PHASE_COMPLETE_MESSAGE" + fi + return 0 +} diff --git a/qa/deepsea/health-ok/common/helper.sh b/qa/deepsea/health-ok/common/helper.sh new file mode 100644 index 0000000000000..fdc135b016d50 --- /dev/null +++ b/qa/deepsea/health-ok/common/helper.sh @@ -0,0 +1,184 @@ +# This file is part of the DeepSea integration test suite + +# +# helper functions (not to be called directly from test scripts) +# + +STAGE_TIMEOUT_DURATION="60m" + +function _report_stage_failure { + STAGE_SUCCEEDED="" + local stage_num=$1 + local stage_status=$2 + + echo "********** Stage $stage_num failed **********" + test "$stage_status" = "124" && echo "Stage $stage_num timed out after $STAGE_TIMEOUT_DURATION" + set -ex + journalctl -r | head -n 2000 + echo "WWWW" + echo "Finished dumping up to 2000 lines of journalctl" +} + +function _run_stage { + local stage_num=$1 + + set +x + echo "" + echo "*********************************************" + echo "********** Running DeepSea Stage $stage_num **********" + echo "*********************************************" + + STAGE_SUCCEEDED="non-empty string" + test -n "$CLI" && _run_stage_cli $stage_num || _run_stage_non_cli $stage_num +} + +function _run_stage_cli { + local stage_num=$1 + local deepsea_cli_output_path="/tmp/deepsea.${stage_num}.log" + + set +e + set -x + timeout $STAGE_TIMEOUT_DURATION \ + deepsea \ + --log-file=/var/log/salt/deepsea.log \ + --log-level=debug \ + stage \ + run \ + ceph.stage.${stage_num} \ + --simple-output \ + 2>&1 | tee $deepsea_cli_output_path + local stage_status="${PIPESTATUS[0]}" + set +x + echo "deepsea exit status: $stage_status" + echo "WWWW" + if [ "$stage_status" != "0" ] ; then + _report_stage_failure $stage_num $stage_status + return 0 + fi + if grep -q -F "failed=0" $deepsea_cli_output_path ; then + echo "********** Stage $stage_num completed successfully **********" + else + echo "ERROR: deepsea stage returned exit status 0, yet one or more steps failed. Bailing out!" + _report_stage_failure $stage_num $stage_status + fi + set -ex +} + +function _run_stage_non_cli { + local stage_num=$1 + local stage_log_path="/tmp/stage.${stage_num}.log" + + set +e + set -x + timeout $STAGE_TIMEOUT_DURATION \ + salt-run \ + --no-color \ + state.orch \ + ceph.stage.${stage_num} \ + 2>/dev/null | tee $stage_log_path + local stage_status="${PIPESTATUS[0]}" + set +x + echo "WWWW" + if [ "$stage_status" != "0" ] ; then + _report_stage_failure $stage_num $stage_status + return 0 + fi + STAGE_FINISHED=$(grep -F 'Total states run' $stage_log_path) + if [ "$STAGE_FINISHED" ]; then + FAILED=$(grep -F 'Failed: ' $stage_log_path | sed 's/.*Failed:\s*//g' | head -1) + if [ "$FAILED" -gt "0" ]; then + echo "ERROR: salt-run returned exit status 0, yet one or more steps failed. Bailing out!" + _report_stage_failure $stage_num $stage_status + else + echo "********** Stage $stage_num completed successfully **********" + fi + else + echo "ERROR: salt-run returned exit status 0, yet Stage did not complete. Bailing out!" + _report_stage_failure $stage_num $stage_status + fi + set -ex +} + +function _client_node { + salt --static --out json -C 'not I@roles:storage' test.ping 2>/dev/null | jq -r 'keys[0]' +} + +function _master_has_role { + local ROLE=$1 + echo "Asserting that master minion has role ->$ROLE<-" + salt $MASTER_MINION pillar.get roles 2>/dev/null + salt $MASTER_MINION pillar.get roles 2>/dev/null | grep -q "$ROLE" + echo "Yes, it does." +} + +function _first_x_node { + local ROLE=$1 + salt --static --out json -C "I@roles:$ROLE" test.ping 2>/dev/null | jq -r 'keys[0]' +} + +function _first_storage_only_node { + local COMPOUND_TARGET="I@roles:storage" + local NOT_ROLES="mon +mgr +mds +rgw +igw +ganesha +" + local ROLE= + for ROLE in $NOT_ROLES ; do + COMPOUND_TARGET="$COMPOUND_TARGET and not I@roles:$ROLE" + done + local MAYBEJSON=$(salt --static --out json -C "$COMPOUND_TARGET" test.ping 2>/dev/null) + echo $MAYBEJSON | jq --raw-output 'keys[0]' +} + +function _run_test_script_on_node { + local TESTSCRIPT=$1 # on success, TESTSCRIPT must output the exact string + # "Result: OK" on a line by itself, otherwise it will + # be considered to have failed + local TESTNODE=$2 + local ASUSER=$3 + salt-cp $TESTNODE $TESTSCRIPT $TESTSCRIPT 2>/dev/null + local LOGFILE=/tmp/test_script.log + local STDERR_LOGFILE=/tmp/test_script_stderr.log + local stage_status= + if [ -z "$ASUSER" -o "x$ASUSER" = "xroot" ] ; then + salt $TESTNODE cmd.run "sh $TESTSCRIPT" 2>$STDERR_LOGFILE | tee $LOGFILE + stage_status="${PIPESTATUS[0]}" + else + salt $TESTNODE cmd.run "sudo su $ASUSER -c \"bash $TESTSCRIPT\"" 2>$STDERR_LOGFILE | tee $LOGFILE + stage_status="${PIPESTATUS[0]}" + fi + local RESULT=$(grep -o -P '(?<=Result: )(OK)$' $LOGFILE) # since the script + # is run by salt, the output appears indented + test "x$RESULT" = "xOK" && return + echo "The test script that ran on $TESTNODE failed. The stderr output was as follows:" + cat $STDERR_LOGFILE + exit 1 +} + +function _grace_period { + local SECONDS=$1 + echo "${SECONDS}-second grace period" + sleep $SECONDS +} + +function _root_fs_is_btrfs { + stat -f / | grep -q 'Type: btrfs' +} + +function _ping_minions_until_all_respond { + local RESPONDING="" + for i in {1..20} ; do + sleep 10 + RESPONDING=$(salt '*' test.ping 2>/dev/null | grep True 2>/dev/null | wc --lines) + echo "Of $TOTAL_NODES total minions, $RESPONDING are responding" + test "$TOTAL_NODES" -eq "$RESPONDING" && break + done +} + +function _ceph_cluster_running { + ceph status >/dev/null 2>&1 +} + diff --git a/qa/deepsea/health-ok/common/json.sh b/qa/deepsea/health-ok/common/json.sh new file mode 100644 index 0000000000000..99a2d22e32aef --- /dev/null +++ b/qa/deepsea/health-ok/common/json.sh @@ -0,0 +1,24 @@ +# +# This file is part of the DeepSea integration test suite. +# It contains various cluster introspection functions. +# + +function json_total_nodes { + # total number of nodes in the cluster + salt --static --out json '*' test.ping 2>/dev/null | jq '. | length' +} + +function _json_nodes_of_role_x { + local ROLE=$1 + salt --static --out json -C "I@roles:$ROLE" test.ping 2>/dev/null | jq '. | length' +} + +function json_storage_nodes { + # number of storage nodes in the cluster + _json_nodes_of_role_x storage +} + +function json_total_osds { + # total number of OSDs in the cluster + ceph osd ls --format json | jq '. | length' +} diff --git a/qa/deepsea/health-ok/common/nfs-ganesha.sh b/qa/deepsea/health-ok/common/nfs-ganesha.sh new file mode 100644 index 0000000000000..61cf3826778e8 --- /dev/null +++ b/qa/deepsea/health-ok/common/nfs-ganesha.sh @@ -0,0 +1,177 @@ +# +# This file is part of the DeepSea integration test suite +# + +NFS_MOUNTPOINT=/root/mnt + +function _nfs_ganesha_node { + _first_x_node ganesha +} + +function nfs_ganesha_no_root_squash { + local GANESHAJ2=/srv/salt/ceph/ganesha/files/ganesha.conf.j2 + sed -i '/Access_Type = RW;/a \\tSquash = No_root_squash;' $GANESHAJ2 +} + +function nfs_ganesha_no_grace_period { + local GANESHAJ2=/srv/salt/ceph/ganesha/files/ganesha.conf.j2 + cat <>$GANESHAJ2 +NFSv4 {Graceless = True} +EOF +} + +function nfs_ganesha_debug_log { + local GANESHANODE=$(_nfs_ganesha_node) + local TESTSCRIPT=/tmp/test-nfs-ganesha.sh + cat < $TESTSCRIPT +set -ex +trap 'echo "Result: NOT_OK"' ERR +echo "nfs-ganesha debug log script running as $(whoami) on $(hostname --fqdn)" +sed -i 's/NIV_EVENT/NIV_DEBUG/g' /etc/sysconfig/nfs-ganesha +cat /etc/sysconfig/nfs-ganesha +rm -rf /var/log/ganesha/ganesha.log +systemctl restart nfs-ganesha.service +systemctl is-active nfs-ganesha.service +rpm -q nfs-ganesha +echo "Result: OK" +EOF + _run_test_script_on_node $TESTSCRIPT $GANESHANODE +} + +function nfs_ganesha_cat_config_file { + salt -C 'I@roles:ganesha' cmd.run 'cat /etc/ganesha/ganesha.conf' +} + +#function nfs_ganesha_showmount_loop { +# local TESTSCRIPT=/tmp/test-nfs-ganesha.sh +# salt -C 'I@roles:ganesha' cmd.run "while true ; do showmount -e $GANESHANODE | tee /tmp/showmount.log || true ; grep -q 'Timed out' /tmp/showmount.log || break ; done" +#} + +function nfs_ganesha_mount { + # + # creates a mount point and mounts NFS-Ganesha export in it + # + local NFSVERSION=$1 # can be "3", "4", or "" + local ASUSER=$2 + local CLIENTNODE=$(_client_node) + local GANESHANODE=$(_nfs_ganesha_node) + local TESTSCRIPT=/tmp/test-nfs-ganesha.sh + salt "$CLIENTNODE" pillar.get roles + salt "$CLIENTNODE" pkg.install nfs-client # FIXME: only works on SUSE + cat < $TESTSCRIPT +set -ex +trap 'echo "Result: NOT_OK"' ERR +echo "nfs-ganesha mount test script" +test ! -e $NFS_MOUNTPOINT +mkdir $NFS_MOUNTPOINT +test -d $NFS_MOUNTPOINT +#mount -t nfs -o nfsvers=4 ${GANESHANODE}:/ $NFS_MOUNTPOINT +mount -t nfs -o ##OPTIONS## ${GANESHANODE}:/ $NFS_MOUNTPOINT +ls -lR $NFS_MOUNTPOINT +echo "Result: OK" +EOF + if test -z $NFSVERSION ; then + sed -i 's/##OPTIONS##/sync/' $TESTSCRIPT + elif [ "$NFSVERSION" = "3" -o "$NFSVERSION" = "4" ] ; then + sed -i 's/##OPTIONS##/sync,nfsvers='$NFSVERSION'/' $TESTSCRIPT + else + echo "Bad NFS version ->$NFS_VERSION<- Bailing out!" + exit 1 + fi + _run_test_script_on_node $TESTSCRIPT $CLIENTNODE $ASUSER +} + +function nfs_ganesha_umount { + local ASUSER=$1 + local CLIENTNODE=$(_client_node) + local TESTSCRIPT=/tmp/test-nfs-ganesha-umount.sh + cat < $TESTSCRIPT +set -ex +trap 'echo "Result: NOT_OK"' ERR +echo "nfs-ganesha umount test script running as $(whoami) on $(hostname --fqdn)" +umount $NFS_MOUNTPOINT +rm -rf $NFS_MOUNTPOINT +echo "Result: OK" +EOF + _run_test_script_on_node $TESTSCRIPT $CLIENTNODE $ASUSER +} + +function nfs_ganesha_write_test { + # + # NFS-Ganesha FSAL write test + # + local FSAL=$1 + local NFSVERSION=$2 + local CLIENTNODE=$(_client_node) + local TESTSCRIPT=/tmp/test-nfs-ganesha-write.sh + local APPENDAGE="" + if [ "$FSAL" = "cephfs" ] ; then + if [ "$NFSVERSION" = "3" ] ; then + APPENDAGE="" + else + APPENDAGE="/cephfs" + fi + else + APPENDAGE="/demo/demo-demo" + fi + local TOUCHFILE=$NFS_MOUNTPOINT$APPENDAGE/saturn + cat < $TESTSCRIPT +set -ex +trap 'echo "Result: NOT_OK"' ERR +echo "nfs-ganesha write test script" +! test -e $TOUCHFILE +touch $TOUCHFILE +test -f $TOUCHFILE +rm -f $TOUCHFILE +echo "Result: OK" +EOF + _run_test_script_on_node $TESTSCRIPT $CLIENTNODE +} + +function nfs_ganesha_pynfs_test { + # + # NFS-Ganesha PyNFS test + # + local CLIENTNODE=$(_client_node) + local GANESHANODE=$(_nfs_ganesha_node) + local TESTSCRIPT=/tmp/test-nfs-ganesha-pynfs.sh + cat <<'EOF' > $TESTSCRIPT +set -ex +trap 'echo "Result: NOT_OK"' ERR + +function assert_success { + local PYNFS_OUTPUT=$1 + test -s $PYNFS_OUTPUT + # last line: determined return value of function + ! grep -q FAILURE $PYNFS_OUTPUT +} + +echo "nfs-ganesha PyNFS test script running as $(whoami) on $(hostname --fqdn)" +set +x +for delay in 60 60 60 60 ; do + sudo zypper --non-interactive --gpg-auto-import-keys refresh && break + sleep $delay +done +set -x +zypper --non-interactive install --no-recommends krb5-devel python3-devel +git clone --depth 1 https://github.com/supriti/Pynfs +cd Pynfs +./setup.py build +cd nfs4.0 +sleep 90 # NFSv4 grace period +LOGFILE="PyNFS.out" +./testserver.py -v \ + --outfile RESULTS.out \ + --maketree GANESHANODE:/cephfs/ \ + --showomit \ + --secure \ + --rundeps \ + all \ + ganesha 2>&1 | tee $LOGFILE +#./showresults.py RESULTS.out +assert_success $LOGFILE +echo "Result: OK" +EOF + sed -i 's/GANESHANODE/'$GANESHANODE'/' $TESTSCRIPT + _run_test_script_on_node $TESTSCRIPT $CLIENTNODE +} diff --git a/qa/deepsea/health-ok/common/policy.sh b/qa/deepsea/health-ok/common/policy.sh new file mode 100644 index 0000000000000..6bda313d90c24 --- /dev/null +++ b/qa/deepsea/health-ok/common/policy.sh @@ -0,0 +1,271 @@ +# This file is part of the DeepSea integration test suite + +# +# functions for generating storage proposals +# + +PROPOSALSDIR="/srv/pillar/ceph/proposals" +POLICY_CFG="$PROPOSALSDIR/policy.cfg" + +function proposal_populate_dmcrypt { + salt-run proposal.populate encryption='dmcrypt' name='dmcrypt' +} + +function proposal_populate_filestore { + salt-run proposal.populate format='filestore' name='filestore' +} + + +# +# functions for generating policy.cfg +# + +function policy_cfg_base { + cat < $POLICY_CFG +# Cluster assignment +cluster-ceph/cluster/*.sls +# Common configuration +config/stack/default/global.yml +config/stack/default/ceph/cluster.yml +# Role assignment - master +role-master/cluster/${MASTER_MINION}.sls +# Role assignment - admin +role-admin/cluster/*.sls +EOF +} + +function policy_cfg_mon_flex { + test -n "$STORAGE_NODES" # set in initialization_sequence + test "$STORAGE_NODES" -gt 0 + if [ "$STORAGE_NODES" -lt 4 ] ; then + echo "Undersized cluster ($STORAGE_NODES nodes)" + policy_cfg_one_mon + else + policy_cfg_three_mons + fi +} + +function policy_cfg_one_mon { + cat <> $POLICY_CFG +# Role assignment - 1 mon, 1 mgr +role-mon/cluster/*.sls slice=[:1] +role-mgr/cluster/*.sls slice=[:1] +EOF +} + +function policy_cfg_three_mons { + cat <> $POLICY_CFG +# Role assignment - 3 mons, 3 mgrs +role-mon/cluster/*.sls slice=[:3] +role-mgr/cluster/*.sls slice=[:3] +EOF +} + +function _initialize_minion_configs_array { + local DIR=$1 + + shopt -s nullglob + pushd $DIR >/dev/null + MINION_CONFIGS_ARRAY=(*.yaml *.yml) + echo "Made global array containing the following files (from ->$DIR<-):" + printf '%s\n' "${MINION_CONFIGS_ARRAY[@]}" + popd >/dev/null + shopt -u nullglob +} + +function _initialize_osd_configs_array { + local DIR=$1 + + shopt -s nullglob + pushd $DIR >/dev/null + OSD_CONFIGS_ARRAY=(*.yaml *.yml) + echo "Made global array containing the following OSD configs (from ->$DIR<-):" + printf '%s\n' "${OSD_CONFIGS_ARRAY[@]}" + popd >/dev/null + shopt -u nullglob +} + +function _custom_osd_config { + local PROFILE=$1 + local FILENAME="" + for i in "${OSD_CONFIGS_ARRAY[@]}" ; do + case "$i" in + $PROFILE) FILENAME=$i ; break ;; + ${PROFILE}.yaml) FILENAME=$i ; break ;; + ${PROFILE}.yml) FILENAME=$i ; break ;; + esac + done + if [ -z "$FILENAME" ] ; then + echo "Custom OSD profile $PROFILE not found. Bailing out!" + exit 1 + fi + echo "$FILENAME" +} + +function _random_osd_config { + # the bare config file names are assumed to already be in OSD_CONFIGS_ARRAY + # (accomplished by calling _initialize_osd_configs_array first) + OSD_CONFIGS_ARRAY_LENGTH="${#OSD_CONFIGS_ARRAY[@]}" + local INDEX=$((RANDOM % OSD_CONFIGS_ARRAY_LENGTH)) + echo "${OSD_CONFIGS_ARRAY[$INDEX]}" + +} + +function random_or_custom_storage_profile { + test "$STORAGE_PROFILE" + test "$STORAGE_PROFILE" = "random" -o "$STORAGE_PROFILE" = "custom" + # + # choose OSD configuration from osd-config/ovh + # + local SOURCEDIR="$BASEDIR/osd-config/ovh" + _initialize_osd_configs_array $SOURCEDIR + local SOURCEFILE="" + case "$STORAGE_PROFILE" in + random) SOURCEFILE=$(_random_osd_config) ;; + custom) SOURCEFILE=$(_custom_osd_config $CUSTOM_STORAGE_PROFILE) ;; + esac + test "$SOURCEFILE" + file $SOURCEDIR/$SOURCEFILE + # + # prepare new profile, which will be exactly the same as the default + # profile except the files in stack/default/ceph/minions/ will be + # overwritten with our chosen OSD configuration + # + cp -a $PROPOSALSDIR/profile-default $PROPOSALSDIR/profile-$STORAGE_PROFILE + local DESTDIR="$PROPOSALSDIR/profile-$STORAGE_PROFILE/stack/default/ceph/minions" + _initialize_minion_configs_array $DESTDIR + for DESTFILE in "${MINION_CONFIGS_ARRAY[@]}" ; do + cp $SOURCEDIR/$SOURCEFILE $DESTDIR/$DESTFILE + done + echo "Your $STORAGE_PROFILE storage profile $SOURCEFILE has the following contents:" + cat $DESTDIR/$DESTFILE + ls -lR $PROPOSALSDIR +} + +function policy_cfg_storage { + test -n "$CLIENT_NODES" + test -n "$STORAGE_PROFILE" + + if [ "$CLIENT_NODES" -eq 0 ] ; then + cat <> $POLICY_CFG +# Hardware Profile +profile-$STORAGE_PROFILE/cluster/*.sls +profile-$STORAGE_PROFILE/stack/default/ceph/minions/*yml +EOF + elif [ "$CLIENT_NODES" -ge 1 ] ; then + cat <> $POLICY_CFG +# Hardware Profile +profile-$STORAGE_PROFILE/cluster/*.sls slice=[:-$CLIENT_NODES] +profile-$STORAGE_PROFILE/stack/default/ceph/minions/*yml slice=[:-$CLIENT_NODES] +EOF + else + echo "Unexpected number of client nodes ->$CLIENT_NODES<-; bailing out!" + exit 1 + fi +} + +function storage_profile_from_policy_cfg { + local BUFFER=$(grep --max-count 1 '^profile-' $POLICY_CFG) + perl -e '"'"$BUFFER"'" =~ m/profile-(\w+)/; print "$1\n";' +} + +function policy_remove_storage_node { + local NODE_TO_DELETE=$1 + + echo "Before" + ls -1 $PROPOSALSDIR/profile-$STORAGE_PROFILE/cluster/ + ls -1 $PROPOSALSDIR/profile-$STORAGE_PROFILE/stack/default/ceph/minions/ + + local basedirsls=$PROPOSALSDIR/profile-$STORAGE_PROFILE/cluster + local basediryml=$PROPOSALSDIR/profile-$STORAGE_PROFILE/stack/default/ceph/minions + mv $basedirsls/${NODE_TO_DELETE}.sls $basedirsls/${NODE_TO_DELETE}.sls-DISABLED + mv $basediryml/${NODE_TO_DELETE}.yml $basedirsls/${NODE_TO_DELETE}.yml-DISABLED + + echo "After" + ls -1 $PROPOSALSDIR/profile-$STORAGE_PROFILE/cluster/ + ls -1 $PROPOSALSDIR/profile-$STORAGE_PROFILE/stack/default/ceph/minions/ +} + +function policy_cfg_mds { + test -n "$STORAGE_NODES" + # MDS on up to 3 storage nodes + if [ "$STORAGE_NODES" -le 3 ] ; then + cat <> $POLICY_CFG +# Role assignment - mds +role-mds/cluster/*.sls slice=[:$STORAGE_NODES] +EOF + else + cat <> $POLICY_CFG +# Role assignment - mds +role-mds/cluster/*.sls slice=[:3] +EOF + fi +} + +function policy_cfg_openattic_rgw_igw_ganesha { + # first, determine the slices + local slice_openattic="" + local slice_rgw="" + local slice_igw="" + local slice_ganesha="" + # lest we become confused, "storage nodes" is a synonym for "cluster nodes" + test -n "$STORAGE_NODES" + if [ "$STORAGE_NODES" -eq 1 ] ; then + slice_openattic="[:1]" + slice_rgw="[:1]" + slice_igw="[:1]" + slice_ganesha="[:1]" + elif [ "$STORAGE_NODES" -eq 2 ] ; then + slice_openattic="[:1]" + slice_rgw="[1:2]" + slice_igw="[1:2]" + slice_ganesha="[1:2]" + elif [ "$STORAGE_NODES" -eq 3 ] ; then + slice_openattic="[:1]" + slice_rgw="[1:2]" + slice_igw="[2:3]" + slice_ganesha="[2:3]" + elif [ "$STORAGE_NODES" -ge 4 ] ; then + slice_openattic="[:1]" + slice_rgw="[1:2]" + slice_igw="[2:3]" + slice_ganesha="[3:4]" + else + echo "Unexpected number of cluster/storage nodes ->$STORAGE_NODES<-: bailing out!" + exit 1 + fi + # then, populate policy.cfg + if [ "$OPENATTIC" ] ; then + cat <> $POLICY_CFG +# Role assignment - openattic +role-openattic/cluster/*.sls slice=$slice_openattic +EOF + fi + if [ "$RGW" ] ; then + if [ -z "$SSL" ] ; then + cat <> $POLICY_CFG +# Role assignment - rgw +role-rgw/cluster/*.sls slice=$slice_rgw +EOF + else + cat <> $POLICY_CFG +# Role assignment - rgw +role-rgw/cluster/*.sls slice=$slice_rgw +role-rgw-ssl/cluster/*.sls slice=$slice_rgw +EOF + fi + fi + if [ "$IGW" ] ; then + cat <> $POLICY_CFG +# Role assignment - igw +role-igw/cluster/*.sls slice=$slice_igw +EOF + fi + if [ "$NFS_GANESHA" ] ; then + cat <> $POLICY_CFG +# Role assignment - ganesha +role-ganesha/cluster/*.sls slice=$slice_ganesha +EOF + fi +} + diff --git a/qa/deepsea/health-ok/common/pool.sh b/qa/deepsea/health-ok/common/pool.sh new file mode 100644 index 0000000000000..5baf3199d9582 --- /dev/null +++ b/qa/deepsea/health-ok/common/pool.sh @@ -0,0 +1,64 @@ +# This file is part of the DeepSea integration test suite + +# +# separate file to house the pool creation functions +# + + +function pgs_per_pool { + local TOTALPOOLS=$1 + test -n "$TOTALPOOLS" + local TOTALOSDS=$(json_total_osds) + test -n "$TOTALOSDS" + # given the total number of pools and OSDs, + # assume triple replication and equal number of PGs per pool + # and aim for 100 PGs per OSD + let "TOTALPGS = $TOTALOSDS * 100" + let "PGSPEROSD = $TOTALPGS / $TOTALPOOLS / 3" + echo $PGSPEROSD +} + +function create_pool_incrementally { + # Special-purpose function for creating pools incrementally. For example, + # if your test case needs 2 pools "foo" and "bar", but you cannot create + # them all at once for some reason. Otherwise, use create_all_pools_at_once. + # + # sample usage: + # + # create_pool foo 2 + # ... do something ... + # create_pool bar 2 + # ... do something else ... + # + local POOLNAME=$1 + test -n "$POOLNAME" + local TOTALPOOLS=$2 + test -n "$TOTALPOOLS" + local PGSPERPOOL=$(pgs_per_pool $TOTALPOOLS) + ceph osd pool create $POOLNAME $PGSPERPOOL $PGSPERPOOL replicated +} + +function create_all_pools_at_once { + # sample usage: create_all_pools_at_once foo bar + local TOTALPOOLS="${#@}" + local PGSPERPOOL=$(pgs_per_pool $TOTALPOOLS) + for POOLNAME in "$@" + do + ceph osd pool create $POOLNAME $PGSPERPOOL $PGSPERPOOL replicated + done + ceph osd pool ls detail +} + +function pre_create_pools { + # pre-create pools with calculated number of PGs so we don't get health + # warnings after Stage 4 due to "too few" or "too many" PGs per OSD + # (the "write_test" pool is used in common/sanity-basic.sh) + sleep 10 + POOLS="write_test" + test "$MDS" && POOLS+=" cephfs_data cephfs_metadata" + test "$OPENSTACK" && POOLS+=" smoketest-cloud-backups smoketest-cloud-volumes smoketest-cloud-images smoketest-cloud-vms cloud-backups cloud-volumes cloud-images cloud-vms" + test "$RBD" && POOLS+=" rbd" + create_all_pools_at_once $POOLS + ceph osd pool application enable write_test deepsea_qa + sleep 10 +} diff --git a/qa/deepsea/health-ok/common/rbd.sh b/qa/deepsea/health-ok/common/rbd.sh new file mode 100644 index 0000000000000..9204d3f8bae1b --- /dev/null +++ b/qa/deepsea/health-ok/common/rbd.sh @@ -0,0 +1,29 @@ +# +# This file is part of the DeepSea integration test suite +# + +function ceph_conf_upstream_rbd_default_features { + # + # by removing this line, we ensure that there will be no "rbd default + # features" setting in ceph.conf, so the default value will be used + # + sed -i '/^rbd default features =/d' \ + /srv/salt/ceph/configuration/files/rbd.conf +} + +function ceph_test_librbd_can_be_run { + local TESTSCRIPT=/tmp/rbd_script.sh + local CLIENTNODE=$(_client_node) + cat << 'EOF' > $TESTSCRIPT +set -e +trap 'echo "Result: NOT_OK"' ERR +set -x +chmod a+r /etc/ceph/ceph.client.admin.keyring +rpm -V ceph-test +type ceph_test_librbd +echo "Result: OK" +EOF + _run_test_script_on_node $TESTSCRIPT $CLIENTNODE + echo "You can now run ceph_test_librbd on the client node" +} + diff --git a/qa/deepsea/health-ok/common/rgw.sh b/qa/deepsea/health-ok/common/rgw.sh new file mode 100644 index 0000000000000..b21db4f36f74b --- /dev/null +++ b/qa/deepsea/health-ok/common/rgw.sh @@ -0,0 +1,129 @@ +# +# This file is part of the DeepSea integration test suite +# +RGW_ROLE=rgw + +function rgw_demo_users { + local RGWSLS=/srv/salt/ceph/rgw/users/users.d/users.yml + cat << EOF >> $RGWSLS +- { uid: "demo", name: "Demo", email: "demo@demo.nil" } +- { uid: "demo1", name: "Demo1", email: "demo1@demo.nil" } +EOF + cat $RGWSLS +} + +function rgw_user_and_bucket_list { + # + # just list rgw users and buckets + # + local TESTSCRIPT=/tmp/rgw_user_and_bucket_list.sh + local RGWNODE=$(_first_x_node $RGW_ROLE) + cat << EOF > $TESTSCRIPT +set -ex +radosgw-admin user list +radosgw-admin bucket list +echo "Result: OK" +EOF + _run_test_script_on_node $TESTSCRIPT $RGWNODE +} + +function rgw_validate_system_user { + # + # prove the system user "admin" was really set up + # + local TESTSCRIPT=/tmp/rgw_validate_system_user.sh + local RGWNODE=$(_first_x_node $RGW_ROLE) + cat << EOF > $TESTSCRIPT +set -ex +trap 'echo "Result: NOT_OK"' ERR +radosgw-admin user info --uid=admin +radosgw-admin user info --uid=admin | grep system | grep -q true +echo "Result: OK" +EOF + _run_test_script_on_node $TESTSCRIPT $RGWNODE +} + +function rgw_validate_demo_users { + # + # prove the demo users from rgw_demo_users were really set up + # + local TESTSCRIPT=/tmp/rgw_validate_demo_users.sh + local RGWNODE=$(_first_x_node $RGW_ROLE) + cat << EOF > $TESTSCRIPT +set -ex +trap 'echo "Result: NOT_OK"' ERR +radosgw-admin user info --uid=demo +radosgw-admin user info --uid=demo1 +echo "Result: OK" +EOF + _run_test_script_on_node $TESTSCRIPT $RGWNODE +} + +function rgw_curl_test { + local RGWNODE=$(_first_x_node $RGW_ROLE) + test -n "$SSL" && PROTOCOL="https" || PROTOCOL="http" + test -n "$SSL" && CURL_OPTS="--insecure" + set +x + for delay in 60 60 60 60 ; do + sudo zypper --non-interactive --gpg-auto-import-keys refresh && break + sleep $delay + done + set -x + zypper --non-interactive install --no-recommends curl libxml2-tools + # installing curl RPM causes ceph-radosgw and rsyslog services to need restart + salt-run state.orch ceph.restart.rgw 2>/dev/null + systemctl restart rsyslog.service + _zypper_ps + salt --no-color -C "I@roles:$RGW_ROLE" cmd.run 'systemctl | grep radosgw' + #RGWNODE=$(salt --no-color -C "I@roles:$RGW_ROLE" test.ping | grep -o -P '^\S+(?=:)' | head -1) + RGWXMLOUT=/tmp/rgw_test.xml + curl $CURL_OPTS "${PROTOCOL}://$RGWNODE" > $RGWXMLOUT + test -f $RGWXMLOUT + xmllint $RGWXMLOUT + grep anonymous $RGWXMLOUT + rm -f $RGWXMLOUT +} + +function rgw_add_ssl_global { + local GLOBALYML=/srv/pillar/ceph/stack/global.yml + cat <> $GLOBALYML +rgw_init: default-ssl +rgw_configurations: + rgw: + users: + - { uid: "admin", name: "Admin", email: "admin@demo.nil", system: True } + # when using only RGW& not ganesha ssl will have all the users of rgw already, + # but to be consistent we define atleast one user + rgw-ssl: + users: + - { uid: "admin", name: "Admin", email: "admin@demo.nil", system: True } +EOF + cat $GLOBALYML +} + +function rgw_ssl_init { + local CERTDIR=/srv/salt/ceph/rgw/cert + mkdir -p $CERTDIR + pushd $CERTDIR + openssl req -x509 -nodes -days 1095 -newkey rsa:4096 -keyout rgw.key -out rgw.crt -subj "/C=DE" + cat rgw.key > rgw.pem && cat rgw.crt >> rgw.pem + popd + rgw_add_ssl_global +} + +function validate_rgw_cert_perm { + local TESTSCRIPT=/tmp/test_validate_rgw_cert_perm.sh + local RGWNODE=$(_first_x_node $RGW_ROLE) + cat << 'EOF' > $TESTSCRIPT +set -ex +trap 'echo "Result: NOT_OK"' ERR +RGW_PEM=/etc/ceph/rgw.pem +test -f "$RGW_PEM" +test "$(stat -c'%U' $RGW_PEM)" == "ceph" +test "$(stat -c'%G' $RGW_PEM)" == "ceph" +test "$(stat -c'%a' $RGW_PEM)" -eq 600 +echo "Result: OK" +EOF + _run_test_script_on_node $TESTSCRIPT $RGWNODE +} + diff --git a/qa/deepsea/health-ok/common/zypper.sh b/qa/deepsea/health-ok/common/zypper.sh new file mode 100644 index 0000000000000..0abadb2d6b1ef --- /dev/null +++ b/qa/deepsea/health-ok/common/zypper.sh @@ -0,0 +1,24 @@ +# This file is part of the DeepSea integration test suite + +# +# zypper-specific helper functions +# + +function _dump_salt_master_zypper_repos { + zypper lr -upEP +} + +function _zypper_ref_on_master { + set +x + for delay in 60 60 60 60 ; do + zypper --non-interactive --gpg-auto-import-keys refresh && break + sleep $delay + done + set -x +} + +function _zypper_install_on_master { + local PACKAGE=$1 + zypper --non-interactive install --no-recommends $PACKAGE +} + diff --git a/qa/deepsea/health-ok/health-ok.sh b/qa/deepsea/health-ok/health-ok.sh new file mode 100755 index 0000000000000..159303a01973b --- /dev/null +++ b/qa/deepsea/health-ok/health-ok.sh @@ -0,0 +1,202 @@ +#!/bin/bash +# +# DeepSea integration test "suites/basic/health-ok.sh" +# +# This script runs DeepSea stages 0-3 (or 0-4, depending on options) to deploy +# a Ceph cluster (with various options to control the cluster configuration). +# After the last stage completes, the script checks for HEALTH_OK. +# +# The script makes no assumptions beyond those listed in README. +# +# After HEALTH_OK is reached, the script also runs various sanity tests +# depending on the options provided. +# +# On success (HEALTH_OK is reached, sanity tests pass), the script returns 0. +# On failure, for whatever reason, the script returns non-zero. +# +# The script produces verbose output on stdout, which can be captured for later +# forensic analysis. +# + +set -e +set +x + +SCRIPTNAME=$(basename ${0}) +BASEDIR=$(readlink -f "$(dirname ${0})") +test -d $BASEDIR +[[ $BASEDIR =~ \/health-ok$ ]] + +source $BASEDIR/common/common.sh + +function usage { + set +x + echo "$SCRIPTNAME - script for testing HEALTH_OK deployment" + echo "for use in SUSE Enterprise Storage testing" + echo + echo "Usage:" + echo " $SCRIPTNAME [-h,--help] [--cli] [--client-nodes=X]" + echo " [--mds] [--min-nodes=X] [--nfs-ganesha] [--no-update]" + echo " [--openstack] [--profile=X] [--rbd] [--rgw] [--ssl]" + echo " [--tuned=X]" + echo + echo "Options:" + echo " --cli Use DeepSea CLI" + echo " --client-nodes Number of client (non-cluster) nodes" + echo " --help Display this usage message" + echo " --mds Deploy MDS" + echo " --min-nodes Minimum number of nodes" + echo " --nfs-ganesha Deploy NFS-Ganesha" + echo " --no-update Use no-update-no-reboot Stage 0 alt default" + echo " --openstack Pre-create pools for OpenStack functests" + echo " --profile Storage/OSD profile (see below)" + echo " --rbd Modify ceph.conf for rbd integration testing" + echo " --rgw Deploy RGW" + echo " --ssl Deploy RGW with SSL" + echo " --start-stage Run stages from (defaults to 0)" + echo " --teuthology Provide this option when running via teuthology" + echo " --tuned=on/off Deploy tuned in Stage 3 (default: off)" + echo + echo "Supported storage/OSD profiles:" + echo " default Whatever is generated by Stage 1 (bluestore)" + echo " dmcrypt All encrypted OSDs" + echo " filestore All filestore OSDs" + echo " random A randomly chosen profile (teuthology/OVH only)" + echo " Any other value will be assumed to be the name" + echo " of an OSD profile in qa/osd-config/ovh" + exit 1 +} + +assert_enhanced_getopt + +TEMP=$(getopt -o h \ +--long "cli,client-nodes:,help,igw,mds,min-nodes:,nfs-ganesha,no-update,openstack,profile:,rbd,rgw,ssl,start-stage:,teuthology,tuned:" \ +-n 'health-ok.sh' -- "$@") + +if [ $? != 0 ] ; then echo "Terminating..." >&2 ; exit 1 ; fi + +# Note the quotes around TEMP': they are essential! +eval set -- "$TEMP" + +# process command-line options +CLI="" +CLIENT_NODES=0 +STORAGE_PROFILE="default" +CUSTOM_STORAGE_PROFILE="" +MDS="" +MIN_NODES=1 +OPENSTACK="" +NFS_GANESHA="" +NO_UPDATE="" +RBD="" +RGW="" +SSL="" +START_STAGE="0" +TEUTHOLOGY="" +TUNED="off" +while true ; do + case "$1" in + --cli) CLI="$1" ; shift ;; + --client-nodes) shift ; CLIENT_NODES=$1 ; shift ;; + -h|--help) usage ;; # does not return + --mds) MDS="$1" ; shift ;; + --min-nodes) shift ; MIN_NODES=$1 ; shift ;; + --nfs-ganesha) NFS_GANESHA="$1" ; shift ;; + --no-update) NO_UPDATE="$1" ; shift ;; + --openstack) OPENSTACK="$1" ; shift ;; + --profile) shift ; STORAGE_PROFILE=$1 ; shift ;; + --rbd) RBD="$1" ; shift ;; + --rgw) RGW="$1" ; shift ;; + --ssl) SSL="$1" ; shift ;; + --start-stage) shift ; START_STAGE=$1 ; shift ;; + --teuthology) TEUTHOLOGY="$1" ; shift ;; + --tuned) shift ; TUNED=$1 ; shift ;; + --) shift ; break ;; + *) echo "Internal error" ; exit 1 ;; + esac +done +if [ "$NFS_GANESHA" ] ; then + if [ -z "$MDS" -a -z "$RGW" ] ; then + echo "NFS-Ganesha requires either mds or rgw role, but neither was specified. Bailing out!" + exit 1 + fi +fi +TUNED=${TUNED,,} +case "$TUNED" in + on) ;; + off) TUNED='' ;; + *) echo "Bad value ->$TUNED<- passed with --tuned. Bailing out!" ; exit 1 ;; +esac +echo "WWWW" +echo "health-ok.sh running with the following configuration:" +test -n "$CLI" && echo "- CLI" +echo "- CLIENT_NODES ->$CLIENT_NODES<-" +echo "- MIN_NODES ->$MIN_NODES<-" +test -n "$MDS" && echo "- MDS" +test -n "$NFS_GANESHA" && echo "- NFS-Ganesha" +test -n "$OPENSTACK" && echo "- OpenStack test pools will be pre-created" +echo "- PROFILE ->$STORAGE_PROFILE<-" +test -n "$RBD" && echo "- RBD" +test -n "$RGW" && echo "- RGW" +test -n "$SSL" && echo "- SSL" +echo "- Start Stage ->$START_STAGE<-" +test -n "$TEUTHOLOGY" && echo "- TEUTHOLOGY" +echo -n "- TUNED: " +test -n "$TUNED" && echo "ON" +test -z "$TUNED" && echo "OFF" +echo -n "Stage 0 update: " +test -n "$NO_UPDATE" && echo "disabled" || echo "enabled" +set -x + +# deploy phase +deploy_ceph + +# verification phase +ceph_health_test +test "$STORAGE_NODES" = "$(number_of_hosts_in_ceph_osd_tree)" +#salt -I roles:storage osd.report 2>/dev/null + +# test phase +REPEAT_STAGE_0="" +ceph_log_grep_enoent_eaccess +test_systemd_ceph_osd_target_wants +#rados_write_test +#ceph_version_test +if [ -n "$RGW" ] ; then + rgw_curl_test + test -n "$SSL" && validate_rgw_cert_perm + rgw_user_and_bucket_list + rgw_validate_system_user + rgw_validate_demo_users +fi +test -n "$MDS" -a "$CLIENT_NODES" -ge 1 && cephfs_mount_and_sanity_test +if [ "$NFS_GANESHA" ] ; then + for v in "" "3" "4" ; do + echo "Testing NFS-Ganesha with NFS version ->$v<-" + if [ "$RGW" -a "$v" = "3" ] ; then + echo "Not testing RGW FSAL on NFSv3" + continue + else + nfs_ganesha_mount "$v" + fi + if [ "$MDS" ] ; then + nfs_ganesha_write_test cephfs "$v" + fi + if [ "$RGW" ] ; then + if [ "$v" = "3" ] ; then + echo "Not testing RGW FSAL on NFSv3" + else + rgw_curl_test + rgw_user_and_bucket_list + rgw_validate_demo_users + nfs_ganesha_write_test rgw "$v" + fi + fi + nfs_ganesha_umount + sleep 10 + done + REPEAT_STAGE_0="yes, please" +fi +test "$REPEAT_STAGE_0" && run_stage_0 "$CLI" # exercise ceph.restart orchestration + +echo "YYYY" +echo "health-ok test result: PASS" diff --git a/qa/deepsea/health-ok/stage-5.sh b/qa/deepsea/health-ok/stage-5.sh new file mode 100755 index 0000000000000..fa63516edddc6 --- /dev/null +++ b/qa/deepsea/health-ok/stage-5.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# +# DeepSea integration test "suites/basic/stage-5.sh" +# +# This script runs DeepSea stages 2 and 5 to remove a storage-only node from +# an existing Ceph cluster. +# +# In addition to the assumptions contained in README, this script assumes +# that: +# 1. DeepSea has already been used to deploy a cluster, +# 2. the cluster has at least one "storage-only" node (i.e. a node with role +# "storage" and no other roles (except possibly "admin")), and +# 3. the cluster will be able to reach HEALTH_OK after one storage-only node +# is dropped (typically this means the cluster needs at least 3 storage +# nodes to start with). +# +# On success (HEALTH_OK is reached, number of storage nodes went down by 1, +# number of OSDs decreased), the script returns 0. On failure, for whatever +# reason, the script returns non-zero. +# +# The script produces verbose output on stdout, which can be captured for later +# forensic analysis. +# + +set -e +set +x + +SCRIPTNAME=$(basename ${0}) +BASEDIR=$(readlink -f "$(dirname ${0})") +test -d $BASEDIR +[[ $BASEDIR =~ \/health-ok$ ]] + +source $BASEDIR/common/common.sh + +function usage { + set +x + echo "$SCRIPTNAME - script for testing HEALTH_OK deployment" + echo "for use in SUSE Enterprise Storage testing" + echo + echo "Usage:" + echo " $SCRIPTNAME [-h,--help] [--cli]" + echo + echo "Options:" + echo " --cli Use DeepSea CLI" + echo " --help Display this usage message" + exit 1 +} + +assert_enhanced_getopt + +TEMP=$(getopt -o h \ +--long "cli,help" \ +-n 'health-ok.sh' -- "$@") + +if [ $? != 0 ] ; then echo "Terminating..." >&2 ; exit 1 ; fi + +# Note the quotes around TEMP': they are essential! +eval set -- "$TEMP" + +# process command-line options +CLI="" +while true ; do + case "$1" in + --cli) CLI="$1" ; shift ;; + -h|--help) usage ;; # does not return + --) shift ; break ;; + *) echo "Internal error" ; exit 1 ;; + esac +done +echo "WWWW" +echo "stage-5.sh running with the following configuration:" +test -n "$CLI" && echo "- CLI" +set -x + +# double-check there is a healthy cluster +ceph_health_test +STORAGE_NODES_BEFORE=$(number_of_hosts_in_ceph_osd_tree) +OSDS_BEFORE=$(number_of_osds_in_ceph_osd_tree) +test "$STORAGE_NODES_BEFORE" +test "$OSDS_BEFORE" +test "$STORAGE_NODES_BEFORE" -gt 1 +test "$OSDS_BEFORE" -gt 0 + +# modify storage profile +STORAGE_PROFILE=$(storage_profile_from_policy_cfg) +FIRST_STORAGE_ONLY_NODE=$(_first_storage_only_node) +ls -lR $PROPOSALSDIR +PROPOSALS_BEFORE=$(find $PROPOSALSDIR -name \*$FIRST_STORAGE_ONLY_NODE\* | wc --lines) +policy_remove_storage_node $FIRST_STORAGE_ONLY_NODE +ls -lR $PROPOSALSDIR +PROPOSALS_AFTER=$(find $PROPOSALSDIR -name \*$FIRST_STORAGE_ONLY_NODE\* | wc --lines) + +# run stages 2 and 5 +run_stage_2 "$CLI" +ceph_cluster_status +run_stage_5 "$CLI" +ceph_cluster_status + +# verification phase +ceph_health_test +STORAGE_NODES_AFTER=$(number_of_hosts_in_ceph_osd_tree) +OSDS_AFTER=$(number_of_osds_in_ceph_osd_tree) +test "$STORAGE_NODES_BEFORE" +test "$OSDS_BEFORE" +test "$STORAGE_NODES_AFTER" -eq "$((STORAGE_NODES_BEFORE - 1))" +test "$OSDS_AFTER" -lt "$OSDS_BEFORE" + +## osd.report for good measure +#salt -I roles:storage osd.report 2>/dev/null + +echo "YYYY" +echo "stage-5 test result: PASS" diff --git a/qa/deepsea/nodes/1node.yaml b/qa/deepsea/nodes/1node.yaml new file mode 100644 index 0000000000000..aaaf43d31d45c --- /dev/null +++ b/qa/deepsea/nodes/1node.yaml @@ -0,0 +1,2 @@ +roles: +- [client.salt_master] diff --git a/qa/deepsea/nodes/20nodes.yaml b/qa/deepsea/nodes/20nodes.yaml new file mode 100644 index 0000000000000..1704ce75a45ff --- /dev/null +++ b/qa/deepsea/nodes/20nodes.yaml @@ -0,0 +1,21 @@ +roles: +- [client.salt_master, node.0] +- [node.1] +- [node.2] +- [node.3] +- [node.4] +- [node.5] +- [node.6] +- [node.7] +- [node.8] +- [node.9] +- [node.10] +- [node.11] +- [node.12] +- [node.13] +- [node.14] +- [node.15] +- [node.16] +- [node.17] +- [node.18] +- [node.19] diff --git a/qa/deepsea/nodes/2nodes.yaml b/qa/deepsea/nodes/2nodes.yaml new file mode 100644 index 0000000000000..c71b410ead025 --- /dev/null +++ b/qa/deepsea/nodes/2nodes.yaml @@ -0,0 +1,3 @@ +roles: +- [client.salt_master] +- [node.1] diff --git a/qa/deepsea/nodes/3nodes.yaml b/qa/deepsea/nodes/3nodes.yaml new file mode 100644 index 0000000000000..32d7cb21f8758 --- /dev/null +++ b/qa/deepsea/nodes/3nodes.yaml @@ -0,0 +1,4 @@ +roles: +- [client.salt_master] +- [node.1] +- [node.2] diff --git a/qa/deepsea/nodes/4nodes.yaml b/qa/deepsea/nodes/4nodes.yaml new file mode 100644 index 0000000000000..fbda8eeb0e357 --- /dev/null +++ b/qa/deepsea/nodes/4nodes.yaml @@ -0,0 +1,5 @@ +roles: +- [client.salt_master] +- [node.1] +- [node.2] +- [node.3] diff --git a/qa/deepsea/nodes/5nodes.yaml b/qa/deepsea/nodes/5nodes.yaml new file mode 100644 index 0000000000000..33c023a886ced --- /dev/null +++ b/qa/deepsea/nodes/5nodes.yaml @@ -0,0 +1,6 @@ +roles: +- [client.salt_master, node.0] +- [node.1] +- [node.2] +- [node.3] +- [node.4] diff --git a/qa/deepsea/salt.yaml b/qa/deepsea/salt.yaml new file mode 100644 index 0000000000000..addcc5ae5cdac --- /dev/null +++ b/qa/deepsea/salt.yaml @@ -0,0 +1,6 @@ +tasks: +- clock: +- install: + install_ceph_packages: false + extra_system_packages: [salt, salt-master, salt-minion, salt-api] +- salt: diff --git a/qa/deepsea/storage-profiles/bs_dedicated_db.yaml b/qa/deepsea/storage-profiles/bs_dedicated_db.yaml new file mode 100644 index 0000000000000..78114bf003ddb --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_db.yaml @@ -0,0 +1,15 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + db: /dev/vde + /dev/vdc: + format: bluestore + db: /dev/vde + /dev/vdd: + format: bluestore + db: /dev/vde diff --git a/qa/deepsea/storage-profiles/bs_dedicated_db_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_db_crypt.yaml new file mode 100644 index 0000000000000..a9ea7b7a6fd35 --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_db_crypt.yaml @@ -0,0 +1,18 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + encryption: dmcrypt + db: /dev/vde + /dev/vdc: + format: bluestore + encryption: dmcrypt + db: /dev/vde + /dev/vdd: + format: bluestore + encryption: dmcrypt + db: /dev/vde diff --git a/qa/deepsea/storage-profiles/bs_dedicated_db_sizes.yaml b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes.yaml new file mode 100644 index 0000000000000..34a8d5f926ef2 --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes.yaml @@ -0,0 +1,18 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + db: /dev/vde + db_size: 1G + /dev/vdc: + format: bluestore + db: /dev/vde + db_size: 2G + /dev/vdd: + format: bluestore + db: /dev/vde + db_size: 3G diff --git a/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_crypt.yaml new file mode 100644 index 0000000000000..3e08f56189ec2 --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_crypt.yaml @@ -0,0 +1,21 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + encryption: dmcrypt + db: /dev/vde + db_size: 1G + /dev/vdc: + format: bluestore + encryption: dmcrypt + db: /dev/vde + db_size: 2G + /dev/vdd: + format: bluestore + encryption: dmcrypt + db: /dev/vde + db_size: 3G diff --git a/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed.yaml b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed.yaml new file mode 100644 index 0000000000000..4f838bbff0add --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed.yaml @@ -0,0 +1,17 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + db: /dev/vde + db_size: 1G + /dev/vdc: + format: bluestore + db: /dev/vde + db_size: 2G + /dev/vdd: + format: bluestore + db: /dev/vde diff --git a/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed_crypt.yaml new file mode 100644 index 0000000000000..4f2f60e36c819 --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed_crypt.yaml @@ -0,0 +1,20 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + encryption: dmcrypt + db: /dev/vde + db_size: 1G + /dev/vdc: + format: bluestore + encryption: dmcrypt + db: /dev/vde + db_size: 2G + /dev/vdd: + format: bluestore + encryption: dmcrypt + db: /dev/vde diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal.yaml new file mode 100644 index 0000000000000..7f6093df7a1ed --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_wal.yaml @@ -0,0 +1,15 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + wal: /dev/vde + /dev/vdc: + format: bluestore + wal: /dev/vde + /dev/vdd: + format: bluestore + wal: /dev/vde diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_crypt.yaml new file mode 100644 index 0000000000000..df5e5ebee189f --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_crypt.yaml @@ -0,0 +1,18 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + encryption: dmcrypt + wal: /dev/vde + /dev/vdc: + format: bluestore + encryption: dmcrypt + wal: /dev/vde + /dev/vdd: + format: bluestore + encryption: dmcrypt + wal: /dev/vde diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_db.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_db.yaml new file mode 100644 index 0000000000000..61daf1a1c0315 --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_db.yaml @@ -0,0 +1,14 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + wal: /dev/vde + db: /dev/vdd + /dev/vdc: + format: bluestore + wal: /dev/vde + db: /dev/vdd diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_db_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_crypt.yaml new file mode 100644 index 0000000000000..07ea6bfc6332a --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_crypt.yaml @@ -0,0 +1,16 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + encryption: dmcrypt + wal: /dev/vde + db: /dev/vdd + /dev/vdc: + format: bluestore + encryption: dmcrypt + wal: /dev/vde + db: /dev/vdd diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all.yaml new file mode 100644 index 0000000000000..8693a351d13e4 --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all.yaml @@ -0,0 +1,18 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + wal_size: 1G + wal: /dev/vde + db: /dev/vdd + db_size: 2G + /dev/vdc: + format: bluestore + wal: /dev/vde + db: /dev/vdd + wal_size: 3G + db_size: 4G diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all_crypt.yaml new file mode 100644 index 0000000000000..a9c4aecb165ab --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all_crypt.yaml @@ -0,0 +1,20 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + encryption: dmcrypt + wal_size: 1G + wal: /dev/vde + db: /dev/vdd + db_size: 2G + /dev/vdc: + format: bluestore + encryption: dmcrypt + wal: /dev/vde + db: /dev/vdd + wal_size: 3G + db_size: 4G diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed.yaml new file mode 100644 index 0000000000000..c4f2e147e2530 --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed.yaml @@ -0,0 +1,16 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + wal: /dev/vde + db: /dev/vdd + /dev/vdc: + format: bluestore + wal: /dev/vde + db: /dev/vdd + wal_size: 3G + db_size: 4G diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed_crypt.yaml new file mode 100644 index 0000000000000..9a1f408fb939c --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed_crypt.yaml @@ -0,0 +1,18 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + encryption: dmcrypt + wal: /dev/vde + db: /dev/vdd + /dev/vdc: + format: bluestore + encryption: dmcrypt + wal: /dev/vde + db: /dev/vdd + wal_size: 3G + db_size: 4G diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes.yaml new file mode 100644 index 0000000000000..b22f89616e4d8 --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes.yaml @@ -0,0 +1,18 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + wal_size: 1G + wal: /dev/vde + /dev/vdc: + format: bluestore + wal: /dev/vde + wal_size: 2G + /dev/vdd: + format: bluestore + wal: /dev/vde + wal_size: 3G diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_crypt.yaml new file mode 100644 index 0000000000000..b5c02df46ee62 --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_crypt.yaml @@ -0,0 +1,21 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + encryption: dmcrypt + wal_size: 1G + wal: /dev/vde + /dev/vdc: + format: bluestore + encryption: dmcrypt + wal: /dev/vde + wal_size: 2G + /dev/vdd: + format: bluestore + encryption: dmcrypt + wal: /dev/vde + wal_size: 3G diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed.yaml new file mode 100644 index 0000000000000..0897b6e01a315 --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed.yaml @@ -0,0 +1,17 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + wal_size: 1G + wal: /dev/vde + /dev/vdc: + format: bluestore + wal: /dev/vde + wal_size: 2G + /dev/vdd: + format: bluestore + wal: /dev/vde diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed_crypt.yaml new file mode 100644 index 0000000000000..f4c803382a082 --- /dev/null +++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed_crypt.yaml @@ -0,0 +1,20 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: bluestore + encryption: dmcrypt + wal_size: 1G + wal: /dev/vde + /dev/vdc: + format: bluestore + encryption: dmcrypt + wal: /dev/vde + wal_size: 2G + /dev/vdd: + format: bluestore + encryption: dmcrypt + wal: /dev/vde diff --git a/qa/deepsea/storage-profiles/fs_dedicated_journal.yaml b/qa/deepsea/storage-profiles/fs_dedicated_journal.yaml new file mode 100644 index 0000000000000..0b5b2513e119c --- /dev/null +++ b/qa/deepsea/storage-profiles/fs_dedicated_journal.yaml @@ -0,0 +1,15 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: filestore + journal: /dev/vde + /dev/vdc: + format: filestore + journal: /dev/vde + /dev/vdd: + format: filestore + journal: /dev/vde diff --git a/qa/deepsea/storage-profiles/fs_dedicated_journal_crypt.yaml b/qa/deepsea/storage-profiles/fs_dedicated_journal_crypt.yaml new file mode 100644 index 0000000000000..6c7d500a0f30a --- /dev/null +++ b/qa/deepsea/storage-profiles/fs_dedicated_journal_crypt.yaml @@ -0,0 +1,18 @@ +overrides: + deepsea: + storage_profile: + ceph: + storage: + osds: + /dev/vdb: + format: filestore + encryption: dmcrypt + journal: /dev/vde + /dev/vdc: + format: filestore + encryption: dmcrypt + journal: /dev/vde + /dev/vdd: + format: filestore + encryption: dmcrypt + journal: /dev/vde diff --git a/qa/suites/deepsea/.qa b/qa/suites/deepsea/.qa new file mode 120000 index 0000000000000..fea2489fdf6d9 --- /dev/null +++ b/qa/suites/deepsea/.qa @@ -0,0 +1 @@ +../.qa \ No newline at end of file diff --git a/qa/suites/deepsea/tier0/.qa b/qa/suites/deepsea/tier0/.qa new file mode 120000 index 0000000000000..fea2489fdf6d9 --- /dev/null +++ b/qa/suites/deepsea/tier0/.qa @@ -0,0 +1 @@ +../.qa \ No newline at end of file diff --git a/qa/suites/deepsea/tier0/salt/% b/qa/suites/deepsea/tier0/salt/% new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/deepsea/tier0/salt/.qa b/qa/suites/deepsea/tier0/salt/.qa new file mode 120000 index 0000000000000..fea2489fdf6d9 --- /dev/null +++ b/qa/suites/deepsea/tier0/salt/.qa @@ -0,0 +1 @@ +../.qa \ No newline at end of file diff --git a/qa/suites/deepsea/tier0/salt/0-salt.yaml b/qa/suites/deepsea/tier0/salt/0-salt.yaml new file mode 120000 index 0000000000000..4ee5639d36694 --- /dev/null +++ b/qa/suites/deepsea/tier0/salt/0-salt.yaml @@ -0,0 +1 @@ +.qa/deepsea/salt.yaml \ No newline at end of file diff --git a/qa/suites/deepsea/tier0/salt/boilerplate b/qa/suites/deepsea/tier0/salt/boilerplate new file mode 120000 index 0000000000000..a1e87ef7da830 --- /dev/null +++ b/qa/suites/deepsea/tier0/salt/boilerplate @@ -0,0 +1 @@ +.qa/deepsea/boilerplate/ \ No newline at end of file diff --git a/qa/suites/deepsea/tier0/salt/cluster/+ b/qa/suites/deepsea/tier0/salt/cluster/+ new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/deepsea/tier0/salt/cluster/.qa b/qa/suites/deepsea/tier0/salt/cluster/.qa new file mode 120000 index 0000000000000..fea2489fdf6d9 --- /dev/null +++ b/qa/suites/deepsea/tier0/salt/cluster/.qa @@ -0,0 +1 @@ +../.qa \ No newline at end of file diff --git a/qa/suites/deepsea/tier0/salt/cluster/1disk.yaml b/qa/suites/deepsea/tier0/salt/cluster/1disk.yaml new file mode 120000 index 0000000000000..d94d2f86fe2b2 --- /dev/null +++ b/qa/suites/deepsea/tier0/salt/cluster/1disk.yaml @@ -0,0 +1 @@ +.qa/deepsea/disks/1disk.yaml \ No newline at end of file diff --git a/qa/suites/deepsea/tier0/salt/cluster/1node.yaml b/qa/suites/deepsea/tier0/salt/cluster/1node.yaml new file mode 120000 index 0000000000000..ebfbfefc3a5a7 --- /dev/null +++ b/qa/suites/deepsea/tier0/salt/cluster/1node.yaml @@ -0,0 +1 @@ +.qa/deepsea/nodes/1node.yaml \ No newline at end of file diff --git a/qa/suites/deepsea/tier0/salt/distros b/qa/suites/deepsea/tier0/salt/distros new file mode 120000 index 0000000000000..337a606a50afd --- /dev/null +++ b/qa/suites/deepsea/tier0/salt/distros @@ -0,0 +1 @@ +.qa/deepsea/distros/ \ No newline at end of file diff --git a/qa/suites/deepsea/tier1/.qa b/qa/suites/deepsea/tier1/.qa new file mode 120000 index 0000000000000..fea2489fdf6d9 --- /dev/null +++ b/qa/suites/deepsea/tier1/.qa @@ -0,0 +1 @@ +../.qa \ No newline at end of file diff --git a/qa/suites/deepsea/tier1/health-ok/% b/qa/suites/deepsea/tier1/health-ok/% new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/deepsea/tier1/health-ok/.qa b/qa/suites/deepsea/tier1/health-ok/.qa new file mode 120000 index 0000000000000..fea2489fdf6d9 --- /dev/null +++ b/qa/suites/deepsea/tier1/health-ok/.qa @@ -0,0 +1 @@ +../.qa \ No newline at end of file diff --git a/qa/suites/deepsea/tier1/health-ok/0-salt.yaml b/qa/suites/deepsea/tier1/health-ok/0-salt.yaml new file mode 120000 index 0000000000000..4ee5639d36694 --- /dev/null +++ b/qa/suites/deepsea/tier1/health-ok/0-salt.yaml @@ -0,0 +1 @@ +.qa/deepsea/salt.yaml \ No newline at end of file diff --git a/qa/suites/deepsea/tier1/health-ok/1-deploy-phase.yaml b/qa/suites/deepsea/tier1/health-ok/1-deploy-phase.yaml new file mode 120000 index 0000000000000..d1c469a349bcf --- /dev/null +++ b/qa/suites/deepsea/tier1/health-ok/1-deploy-phase.yaml @@ -0,0 +1 @@ +.qa/deepsea/deepsea.yaml \ No newline at end of file diff --git a/qa/suites/deepsea/tier1/health-ok/2-test-phase.yaml b/qa/suites/deepsea/tier1/health-ok/2-test-phase.yaml new file mode 100644 index 0000000000000..1f0c720c601a6 --- /dev/null +++ b/qa/suites/deepsea/tier1/health-ok/2-test-phase.yaml @@ -0,0 +1,8 @@ +tasks: + - exec: + client.salt_master: + - 'ceph -s' + - deepsea.validation: + - deepsea.toolbox: + assert_bluestore: + osd.0: diff --git a/qa/suites/deepsea/tier1/health-ok/boilerplate b/qa/suites/deepsea/tier1/health-ok/boilerplate new file mode 120000 index 0000000000000..a1e87ef7da830 --- /dev/null +++ b/qa/suites/deepsea/tier1/health-ok/boilerplate @@ -0,0 +1 @@ +.qa/deepsea/boilerplate/ \ No newline at end of file diff --git a/qa/suites/deepsea/tier1/health-ok/cluster/+ b/qa/suites/deepsea/tier1/health-ok/cluster/+ new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/deepsea/tier1/health-ok/cluster/.qa b/qa/suites/deepsea/tier1/health-ok/cluster/.qa new file mode 120000 index 0000000000000..fea2489fdf6d9 --- /dev/null +++ b/qa/suites/deepsea/tier1/health-ok/cluster/.qa @@ -0,0 +1 @@ +../.qa \ No newline at end of file diff --git a/qa/suites/deepsea/tier1/health-ok/cluster/4disks.yaml b/qa/suites/deepsea/tier1/health-ok/cluster/4disks.yaml new file mode 120000 index 0000000000000..e21aaff328ffa --- /dev/null +++ b/qa/suites/deepsea/tier1/health-ok/cluster/4disks.yaml @@ -0,0 +1 @@ +.qa/deepsea/disks/4disks.yaml \ No newline at end of file diff --git a/qa/suites/deepsea/tier1/health-ok/cluster/roles.yaml b/qa/suites/deepsea/tier1/health-ok/cluster/roles.yaml new file mode 100644 index 0000000000000..f42d43c8f0049 --- /dev/null +++ b/qa/suites/deepsea/tier1/health-ok/cluster/roles.yaml @@ -0,0 +1,2 @@ +roles: + - [client.salt_master, mon.a, mgr.x, osd.0, prometheus.p, grafana.g] diff --git a/qa/suites/deepsea/tier1/health-ok/deepsea_cli_off.yaml b/qa/suites/deepsea/tier1/health-ok/deepsea_cli_off.yaml new file mode 100644 index 0000000000000..a2beb7f3e0796 --- /dev/null +++ b/qa/suites/deepsea/tier1/health-ok/deepsea_cli_off.yaml @@ -0,0 +1,3 @@ +overrides: + deepsea: + cli: false diff --git a/qa/suites/deepsea/tier1/health-ok/distros b/qa/suites/deepsea/tier1/health-ok/distros new file mode 120000 index 0000000000000..513ff61af3858 --- /dev/null +++ b/qa/suites/deepsea/tier1/health-ok/distros @@ -0,0 +1 @@ +.qa/deepsea/distros \ No newline at end of file diff --git a/qa/suites/suse/.qa b/qa/suites/suse/.qa new file mode 120000 index 0000000000000..fea2489fdf6d9 --- /dev/null +++ b/qa/suites/suse/.qa @@ -0,0 +1 @@ +../.qa \ No newline at end of file diff --git a/qa/suites/suse/tier0 b/qa/suites/suse/tier0 new file mode 120000 index 0000000000000..06233b177ec30 --- /dev/null +++ b/qa/suites/suse/tier0 @@ -0,0 +1 @@ +../deepsea/tier0 \ No newline at end of file diff --git a/qa/suites/suse/tier1 b/qa/suites/suse/tier1 new file mode 120000 index 0000000000000..5be3d9a32a6c1 --- /dev/null +++ b/qa/suites/suse/tier1 @@ -0,0 +1 @@ +../deepsea/tier1 \ No newline at end of file diff --git a/qa/tasks/deepsea.py b/qa/tasks/deepsea.py new file mode 100644 index 0000000000000..3c81eb5b1dc02 --- /dev/null +++ b/qa/tasks/deepsea.py @@ -0,0 +1,2019 @@ +""" +Task (and subtasks) for automating deployment of Ceph using DeepSea + +Linter: + flake8 --max-line-length=100 +""" +import logging +import time +import yaml + +from salt_manager import SaltManager +from scripts import Scripts +from teuthology import misc +from util import ( + copy_directory_recursively, + enumerate_osds, + get_remote_for_role, + get_rpm_pkg_version, + introspect_roles, + remote_exec, + remote_run_script_as_root, + sudo_append_to_file, + ) + +from teuthology.exceptions import ( + CommandFailedError, + ConfigError, + ) +from teuthology.orchestra import run +from teuthology.task import Task +from teuthology.contextutil import safe_while + +log = logging.getLogger(__name__) +deepsea_ctx = {} +proposals_dir = "/srv/pillar/ceph/proposals" +reboot_tries = 30 + + +def anchored(log_message): + global deepsea_ctx + assert 'log_anchor' in deepsea_ctx, "deepsea_ctx not populated" + return "{}{}".format(deepsea_ctx['log_anchor'], log_message) + + +def dump_file_that_might_not_exist(remote, fpath): + try: + remote.run(args="cat {}".format(fpath)) + except CommandFailedError: + pass + + +class DeepSea(Task): + """ + Install DeepSea on the Salt Master node. + + Assumes a Salt cluster is already running (use the Salt task to achieve + this). + + This task understands the following config keys which apply to + this task and all its subtasks: + + allow_python2: (default: True) + whether to continue if Python 2 is installed anywhere + in the test cluster + alternative_defaults: (default: empty) + a dictionary of DeepSea alternative defaults + to be activated via the Salt Pillar + cli: + true deepsea CLI will be used (the default) + false deepsea CLI will not be used + dashboard_ssl: + true deploy MGR dashboard module with SSL (the default) + false deploy MGR dashboard module *without* SSL + log_anchor a string (default: "WWWW: ") which will precede + log messages emitted at key points during the + deployment + quiet_salt: + true suppress stderr on salt commands (the default) + false let salt commands spam the log + rgw_ssl: + true use SSL if RGW is deployed + false if RGW is deployed, do not use SSL (the default) + drive_group: + default if a teuthology osd role is present on a node, + DeepSea will tell ceph-volume to make all available + disks into standalone OSDs + teuthology populate DeepSea storage profile for 1:1 mapping + between teuthology osd roles and actual osds + deployed (the default, but not yet implemented) + (dict) a dictionary is assumed to be a custom drive group + (yaml blob) to be passed verbatim to ceph-volume + + This task also understands the following config keys that affect + the behavior of just this one task (no effect on subtasks): + + repo: (git repo for initial DeepSea install, e.g. + "https://github.com/SUSE/DeepSea.git") + branch: (git branch for initial deepsea install, e.g. "master") + install: + package|pkg deepsea will be installed via package system + source|src deepsea will be installed via 'make install' (default) + upgrade_install: + package|pkg post-upgrade deepsea will be installed via package system + source|src post-upgrade deepsea will be installed via 'make install' (default) + upgrade_repo: (git repo for DeepSea re-install/upgrade - used by second + invocation of deepsea task only) + upgrade_branch: (git branch for DeepSea re-install/upgrade - used by + second invocation of deepsea task only) + + Example: + + tasks + - deepsea: + repo: https://github.com/SUSE/DeepSea.git + branch: wip-foo + install: source + + :param ctx: the argparse.Namespace object + :param config: the config dict + """ + + err_prefix = "(deepsea task) " + + log_anchor_str = "WWWW: " + + def __init__(self, ctx, config): + global deepsea_ctx + super(DeepSea, self).__init__(ctx, config) + if deepsea_ctx: + # context already populated (we are in a subtask, or a + # re-invocation of the deepsea task) + self.log = deepsea_ctx['logger_obj'] + if type(self).__name__ == 'DeepSea': + # The only valid reason for a second invocation of the deepsea + # task is to upgrade DeepSea (actually reinstall it) + deepsea_ctx['reinstall_deepsea'] = True + # deepsea_ctx['install_method'] is the _initial_ install method from the + # first invocation. If initial install was from package, the + # package must be removed for reinstall from source to work. + # If reinstall method is 'package', removing the package here + # will not hurt anything. + if deepsea_ctx['install_method'] == 'package': + deepsea_ctx['master_remote'].run(args=[ + 'sudo', + 'zypper', + '--non-interactive', + '--no-gpg-checks', + 'remove', + 'deepsea', + 'deepsea-qa', + run.Raw('||'), + 'true' + ]) + install_key = 'install' + upgrade_install = self.config.get('upgrade_install', '') + if upgrade_install: + install_key = 'upgrade_install' + self.__populate_install_method_basic(install_key) + if not deepsea_ctx: + # populating context (we are *not* in a subtask) + deepsea_ctx['logger_obj'] = log + self.ctx['roles'] = self.ctx.config['roles'] + self.log = log + self._populate_deepsea_context() + introspect_roles(self.ctx, self.log, quiet=False) + self.allow_python2 = deepsea_ctx['allow_python2'] + self.alternative_defaults = deepsea_ctx['alternative_defaults'] + self.dashboard_ssl = deepsea_ctx['dashboard_ssl'] + self.deepsea_cli = deepsea_ctx['cli'] + self.dev_env = self.ctx['dev_env'] + self.install_method = deepsea_ctx['install_method'] + self.log_anchor = deepsea_ctx['log_anchor'] + self.master_remote = deepsea_ctx['master_remote'] + self.nodes = self.ctx['nodes'] + self.nodes_storage = self.ctx['nodes_storage'] + self.nodes_storage_only = self.ctx['nodes_storage_only'] + self.quiet_salt = deepsea_ctx['quiet_salt'] + self.remotes = self.ctx['remotes'] + self.reinstall_deepsea = deepsea_ctx.get('reinstall_deepsea', False) + self.repositories = deepsea_ctx['repositories'] + self.rgw_ssl = deepsea_ctx['rgw_ssl'] + self.roles = self.ctx['roles'] + self.role_types = self.ctx['role_types'] + self.role_lookup_table = self.ctx['role_lookup_table'] + self.scripts = Scripts(self.ctx, self.log) + self.sm = deepsea_ctx['salt_manager_instance'] + self.drive_group = deepsea_ctx['drive_group'] + # self.log.debug("ctx.config {}".format(ctx.config)) + # self.log.debug("deepsea context: {}".format(deepsea_ctx)) + + def __install_deepsea_from_source(self): + info_msg_prefix = 'Reinstalling' if self.reinstall_deepsea else 'Installing' + info_msg = info_msg_prefix + ' deepsea from source' + self.log.info(anchored(info_msg)) + if self.sm.master_rpm_q('deepsea'): + self.log.info("DeepSea already installed from RPM") + return None + upgrade_repo = self.config.get('upgrade_repo', '') + upgrade_branch = self.config.get('upgrade_branch', '') + repo = self.config.get('repo', 'https://github.com/SUSE/DeepSea.git') + branch = self.config.get('branch', 'master') + if self.reinstall_deepsea: + if upgrade_repo: + repo = upgrade_repo + if upgrade_branch: + branch = upgrade_branch + self.log.info( + "{} - repo: {}, branch: {}" + .format(info_msg, repo, branch) + ) + self.master_remote.run(args=[ + 'sudo', + 'rm', + '-rf', + 'DeepSea', + run.Raw(';'), + 'git', + '--version', + run.Raw(';'), + 'git', + 'clone', + '--branch', + branch, + repo, + run.Raw(';'), + 'cd', + 'DeepSea', + run.Raw(';'), + 'git', + 'rev-parse', + '--abbrev-ref', + 'HEAD', + run.Raw(';'), + 'git', + 'rev-parse', + 'HEAD', + run.Raw(';'), + 'git', + 'describe', + run.Raw('||'), + 'true', + ]) + self.log.info("Running \"make install\" in DeepSea clone...") + self.master_remote.run(args=[ + 'cd', + 'DeepSea', + run.Raw(';'), + 'sudo', + 'make', + 'install', + ]) + self.log.info("installing deepsea dependencies...") + rpmspec_cmd = ( + '$(rpmspec --requires -q DeepSea/deepsea.spec.in 2>/dev/null)' + ) + self.master_remote.run(args=[ + 'sudo', + 'zypper', + '--non-interactive', + 'install', + '--no-recommends', + run.Raw(rpmspec_cmd) + ]) + + def __install_deepsea_using_zypper(self): + info_msg_prefix = 'Reinstalling' if self.reinstall_deepsea else 'Installing' + info_msg = info_msg_prefix + ' deepsea using zypper' + self.log.info(anchored(info_msg)) + self.master_remote.run(args=[ + 'sudo', + 'zypper', + '--non-interactive', + 'search', + '--details', + 'deepsea' + ]) + self.master_remote.run(args=[ + 'sudo', + 'zypper', + '--non-interactive', + '--no-gpg-checks', + 'install', + '--force', + '--no-recommends', + 'deepsea', + 'deepsea-cli', + 'deepsea-qa' + ]) + + def _deepsea_minions(self): + """ + Set deepsea_minions pillar value + """ + deepsea_minions_sls = '/srv/pillar/ceph/deepsea_minions.sls' + content = "deepsea_minions: \'*\'" + self.log.info("Clobbering {} with content ->{}<-".format( + deepsea_minions_sls, content)) + cmd = 'sudo tee {}'.format(deepsea_minions_sls) + self.master_remote.sh(cmd, stdin=content) + + def _deepsea_version(self): + if self.deepsea_cli: + try: + self.master_remote.run(args=[ + 'type', + 'deepsea', + run.Raw('>'), + '/dev/null', + run.Raw('2>&1'), + ]) + except CommandFailedError: + raise ConfigError(self.err_prefix + "Test case calls for " + "deepsea CLI, but it is not installed") + self.master_remote.run(args='deepsea --version') + else: + cmd_str = "sudo salt-run deepsea.version" + if self.quiet_salt: + cmd_str += " 2>/dev/null" + self.master_remote.run(args=cmd_str) + + def _disable_gpg_checks(self): + cmd = ( + 'sed -i -e \'/gpgcheck/ d\' /etc/zypp/repos.d/* ; ' + 'sed -i -e \'/gpgkey/ d\' /etc/zypp/repos.d/* ; ' + 'sed -i -e \'$a gpgcheck=0\' /etc/zypp/repos.d/*' + ) + self.ctx.cluster.run(args=[ + 'sudo', 'sh', '-c', cmd + ]) + + def _install_deepsea(self): + global deepsea_ctx + install_method = deepsea_ctx['install_method'] + if install_method == 'package': + self.__install_deepsea_using_zypper() + elif install_method == 'source': + self.__install_deepsea_from_source() + else: + raise ConfigError(self.err_prefix + "internal error") + deepsea_ctx['deepsea_installed'] = True + + def _master_python_version(self, py_version): + """ + Determine if a given python version is installed on the Salt Master + node. + """ + python_binary = 'python{}'.format(py_version) + installed = True + try: + self.master_remote.run(args=[ + 'type', + python_binary, + run.Raw('>'), + '/dev/null', + run.Raw('2>&1'), + ]) + except CommandFailedError: + installed = False + if installed: + self.master_remote.run(args=[ + python_binary, + '--version' + ]) + else: + self.log.info( + '{} not installed on master node'.format(python_binary) + ) + return installed + + def _maybe_apply_alternative_defaults(self): + global_yml = '/srv/pillar/ceph/stack/global.yml' + if self.alternative_defaults: + self.log.info(anchored("Applying alternative defaults")) + data = '' + for k, v in self.alternative_defaults.items(): + data += "{}: {}\n".format(k, v) + if data: + sudo_append_to_file( + self.master_remote, + global_yml, + data, + ) + dump_file_that_might_not_exist(self.master_remote, global_yml) + + def _populate_deepsea_context(self): + global deepsea_ctx + deepsea_ctx['allow_python2'] = self.config.get('allow_python2', True) + deepsea_ctx['alternative_defaults'] = self.config.get('alternative_defaults', {}) + if not isinstance(deepsea_ctx['alternative_defaults'], dict): + raise ConfigError(self.err_prefix + "alternative_defaults must be a dict") + deepsea_ctx['cli'] = self.config.get('cli', True) + deepsea_ctx['dashboard_ssl'] = self.config.get('dashboard_ssl', True) + deepsea_ctx['log_anchor'] = self.config.get('log_anchor', self.log_anchor_str) + if not isinstance(deepsea_ctx['log_anchor'], str): + self.log.warning( + "log_anchor was set to non-string value ->{}<-, " + "changing to empty string" + .format(deepsea_ctx['log_anchor']) + ) + deepsea_ctx['log_anchor'] = '' + deepsea_ctx['drive_group'] = self.config.get("drive_group", "teuthology") + deepsea_ctx['quiet_salt'] = self.config.get('quiet_salt', True) + deepsea_ctx['salt_manager_instance'] = SaltManager(self.ctx) + deepsea_ctx['master_remote'] = ( + deepsea_ctx['salt_manager_instance'].master_remote + ) + deepsea_ctx['repositories'] = self.config.get("repositories", None) + deepsea_ctx['rgw_ssl'] = self.config.get('rgw_ssl', False) + self.__populate_install_method('install') + + def __populate_install_method_basic(self, key): + if self.config[key] in ['package', 'pkg']: + deepsea_ctx['install_method'] = 'package' + elif self.config[key] in ['source', 'src']: + deepsea_ctx['install_method'] = 'source' + else: + raise ConfigError(self.err_prefix + "Unrecognized {} config " + "value ->{}<-".format(key, self.config[key])) + + def __populate_install_method(self, key): + if key in self.config: + self.__populate_install_method_basic(key) + else: + if 'repo' in self.config or 'branch' in self.config: + deepsea_ctx['install_method'] = 'source' + else: + deepsea_ctx['install_method'] = 'package' + + def _purge_osds(self): + # needed as long as teuthology install task purges /var/lib/ceph + # in its teardown phase + for _remote in self.ctx.cluster.remotes.keys(): + self.log.info("stopping OSD services on {}" + .format(_remote.hostname)) + _remote.run(args=[ + 'sudo', 'sh', '-c', + 'systemctl stop ceph-osd.target ; sleep 10' + ]) + self.log.info("unmounting OSD partitions on {}" + .format(_remote.hostname)) + # unmount up to five OSDs + # bluestore XFS partition is vd?1 + # filestore XFS partition is vd?2 + for_loop = ( + 'for f in vdb{pn} vdc{pn} vdd{pn} vde{pn} vdf{pn} ; ' + 'do test -b /dev/$f && umount /dev/$f || true ; ' + 'done' + ) + for pn in [1, 2]: + _remote.run(args=['sudo', 'sh', '-c', for_loop.format(pn=pn)]) + + def first_storage_only_node(self): + if self.nodes_storage_only: + return self.nodes_storage_only[0] + else: + return None + + def os_type_and_version(self): + os_type = self.ctx.config.get('os_type', 'unknown') + os_version = float(self.ctx.config.get('os_version', 0)) + return (os_type, os_version) + + def reboot_a_single_machine_now(self, remote, log_spec=None): + global reboot_tries + if not log_spec: + log_spec = "node {} reboot now".format(remote.hostname) + cmd_str = "sudo reboot" + remote_exec( + remote, + cmd_str, + self.log, + log_spec, + rerun=False, + quiet=True, + tries=reboot_tries, + ) + + def reboot_the_cluster_now(self, log_spec=None): + global reboot_tries + if not log_spec: + log_spec = "all nodes reboot now" + cmd_str = "salt \\* cmd.run reboot" + if self.quiet_salt: + cmd_str += " 2> /dev/null" + remote_exec( + self.master_remote, + cmd_str, + self.log, + log_spec, + rerun=False, + quiet=True, + tries=reboot_tries, + ) + self.sm.ping_minions() + + def role_type_present(self, role_type): + """ + Method for determining if _any_ test node has the given role type + (teuthology role, not DeepSea role). Examples: "osd", "mon" (not + "mon.a"). + + If the role type is present, returns the hostname of the first remote + with that role type. + + If the role type is absent, returns the empty string. + """ + role_dict = self.role_lookup_table.get(role_type, {}) + host = role_dict[role_dict.keys()[0]] if role_dict else '' + return host + + # Teuthology iterates through the tasks stanza twice: once to "execute" + # the tasks and a second time to "unwind" them. During the first pass + # it pushes each task onto a stack, and during the second pass it "unwinds" + # the stack, with the result being that the tasks are unwound in reverse + # order. During the execution phase it calls three methods: the + # constructor, setup(), and begin() - in that order -, and during the + # unwinding phase it calls end() and teardown() - in that order. + + # The task does not have to implement any of the methods. If not + # implemented, the method in question will be called via inheritance. + # If a method _is_ implemented, the implementation can optionally call + # the parent's implementation of that method as well. This is illustrated + # here: + def setup(self): + # self.log.debug("beginning of setup method") + super(DeepSea, self).setup() + pass + # self.log.debug("end of setup method") + + def begin(self): + global deepsea_ctx + super(DeepSea, self).begin() + if self.reinstall_deepsea: + self._install_deepsea() + return None + self.sm.master_rpm_q('ceph') + self.sm.master_rpm_q('ceph-test') + self.sm.master_rpm_q('salt-master') + self.sm.master_rpm_q('salt-minion') + self.sm.master_rpm_q('salt-api') + # the Salt Master node is assumed to be running an already + # configured chrony for time synchronization within the cluster + # and DeepSea Stage 3 will point the minions at the Salt Master's + # chrony instance (?) + self.sm.master_rpm_q('chrony') + self.master_remote.run( + args="sudo systemctl status --lines=0 chronyd.service" + ) + if self.allow_python2: + self._master_python_version(2) + else: + self.log.info( + 'allow_python2 is set to \'false\'. That means the ' + 'test will now fail if a python2 binary is found on ' + 'any of the test machines.' + ) + self.ctx.cluster.run(args='if type python2 ; then false ; else true ; fi') + if not self._master_python_version(3): + raise ConfigError(self.err_prefix + "Python 3 not installed on master node") + if 'deepsea_installed' not in deepsea_ctx: + self._disable_gpg_checks() + self.master_remote.run(args="zypper lr -upEP") + self._install_deepsea() + assert deepsea_ctx['deepsea_installed'] + self._deepsea_version() + self._deepsea_minions() + self._maybe_apply_alternative_defaults() + # Stage 0 does this, but we have no guarantee Stage 0 will run + self.sm.sync_pillar_data(quiet=self.quiet_salt) + + def end(self): + self.log.debug("beginning of end method") + super(DeepSea, self).end() + success = self.ctx.summary.get('success', None) + if success is None: + self.log.warning("Problem with ctx summary key? ctx is {}".format(self.ctx)) + if not success: + self.ctx.cluster.run(args="rpm -qa | sort") + self.sm.gather_logs('/home/farm/.npm/_logs', 'dashboard-e2e-npm') + self.sm.gather_logs('/home/farm/.protractor-report', 'dashboard-e2e-protractor') + self.log.debug("end of end method") + + def teardown(self): + self.log.debug("beginning of teardown method") + super(DeepSea, self).teardown() + # # + # # the install task does "rm -r /var/lib/ceph" on every test node, + # # and that fails when there are OSDs running + # # FIXME - deprecated, remove after awhile + # self._purge_osds() + self.log.debug("end of teardown method") + + +class CephConf(DeepSea): + """ + Adds custom options to ceph.conf. + Edit yaml file between stage 2 and 3. + Example: + - deepsea.orch: + stage: 2 + - deepsea.ceph_conf: + global: + mon lease: 15 + mon lease ack timeout: 25 + mon: + debug mon: 20 + osd: + debug filestore: 20 + - deepsea.orch: + stage: 3 + """ + + customize = { + "client": "client.conf", + "global": "global.conf", + "mds": "mds.conf", + "mgr": "mgr.conf", + "mon": "mon.conf", + "osd": "osd.conf", + } + + deepsea_configuration_files = '/srv/salt/ceph/configuration/files' + + err_prefix = "(ceph_conf subtask) " + + targets = { + "mon_allow_pool_delete": True, + "osd_memory_target": True, + "small_cluster": True, + "rbd": False, + } + + def __init__(self, ctx, config): + global deepsea_ctx + deepsea_ctx['logger_obj'] = log.getChild('ceph_conf') + self.name = 'deepsea.ceph_conf' + super(CephConf, self).__init__(ctx, config) + self.log.debug("munged config is {}".format(self.config)) + + def __ceph_conf_d_full_path(self, section): + ceph_conf_d = self.deepsea_configuration_files + '/ceph.conf.d' + if section in self.customize.keys(): + return "{}/{}".format(ceph_conf_d, self.customize[section]) + + def __custom_ceph_conf(self, section, customizations): + for conf_item, conf_value in customizations.items(): + data = '{} = {}\n'.format(conf_item, conf_value) + sudo_append_to_file( + self.master_remote, + self.__ceph_conf_d_full_path(section), + data + ) + self.log.info( + "Adding to ceph.conf, {} section: {}" + .format(section, data) + ) + + def _customizations(self): + for section in self.customize.keys(): + if section in self.config and isinstance(self.config[section], dict): + self.__custom_ceph_conf(section, self.config[section]) + + def _dump_customizations(self): + for section in self.customize.keys(): + path = self.__ceph_conf_d_full_path(section) + dump_file_that_might_not_exist(self.master_remote, path) + + def _list_ceph_conf_d(self): + self.master_remote.run( + args="ls -l {}".format(self.deepsea_configuration_files) + ) + + def _targets(self): + for target, default in self.targets.items(): + method = getattr(self, target, None) + assert method, "target ->{}<- has no method".format(target) + if target in self.config: + method() + else: + if default: + method() + + def mon_allow_pool_delete(self): + info_msg = "adjusted ceph.conf to allow pool deletes" + data = "mon allow pool delete = true\n" + sudo_append_to_file( + self.master_remote, + self.__ceph_conf_d_full_path("mon"), + data, + ) + self.log.info(info_msg) + + def osd_memory_target(self): + info_msg = "lowered osd_memory_target to 1GiB to facilitate testing in OpenStack" + data = "osd memory target = 1105322466" # https://tracker.ceph.com/issues/37507#note-4 + sudo_append_to_file( + self.master_remote, + self.__ceph_conf_d_full_path("osd"), + data, + ) + self.log.info(info_msg) + + def rbd(self): + """ + Delete "rbd default features" from ceph.conf. By removing this line, we + ensure that there will be no explicit "rbd default features" setting, + so the default will be used. + """ + info_msg = "adjusted ceph.conf by removing 'rbd default features' line" + rbd_conf = '/srv/salt/ceph/configuration/files/rbd.conf' + cmd = 'sudo sed -i \'/^rbd default features =/d\' {}'.format(rbd_conf) + self.master_remote.run(args=cmd) + self.log.info(info_msg) + + def small_cluster(self): + """ + Apply necessary ceph.conf for small clusters + """ + storage_nodes = len(self.nodes_storage) + info_msg = ( + "adjusted ceph.conf for operation with {} storage node(s)" + .format(storage_nodes) + ) + data = None + if storage_nodes == 1: + data = ( + "mon pg warn min per osd = 16\n" + "osd pool default size = 2\n" + "osd crush chooseleaf type = 0 # failure domain == osd\n" + ) + elif storage_nodes == 2 or storage_nodes == 3: + data = ( + "mon pg warn min per osd = 8\n" + "osd pool default size = 2\n" + ) + if data: + sudo_append_to_file( + self.master_remote, + self.__ceph_conf_d_full_path("global"), + data, + ) + self.log.info(info_msg) + + def begin(self): + self.log.info(anchored("Adding custom options to ceph.conf")) + self._targets() + self._customizations() + self._list_ceph_conf_d() + self._dump_customizations() + + def end(self): + pass + + def teardown(self): + pass + + +class CreatePools(DeepSea): + + err_prefix = "(create_pools subtask) " + + def __init__(self, ctx, config): + global deepsea_ctx + deepsea_ctx['logger_obj'] = log.getChild('create_pools') + self.name = 'deepsea.create_pools' + super(CreatePools, self).__init__(ctx, config) + if not isinstance(self.config, dict): + raise ConfigError(self.err_prefix + "config must be a dictionary") + + def begin(self): + self.log.info(anchored("pre-creating pools")) + args = [] + for key in self.config: + if self.config[key] is None: + self.config[key] = True + if self.config[key]: + args.append(key) + args = list(set(args)) + self.scripts.run( + self.master_remote, + 'create_all_pools_at_once.sh', + args=args, + ) + + def end(self): + pass + + def teardown(self): + pass + + +class Dummy(DeepSea): + + def __init__(self, ctx, config): + global deepsea_ctx + deepsea_ctx['logger_obj'] = log.getChild('dummy') + self.name = 'deepsea.dummy' + super(Dummy, self).__init__(ctx, config) + self.log.debug("munged config is {}".format(self.config)) + + def begin(self): + self.log.debug("beginning of begin method") + global deepsea_ctx + self.log.info("deepsea_ctx == {}".format(deepsea_ctx)) + self.log.debug("end of begin method") + + def end(self): + pass + + def teardown(self): + pass + + +class HealthOK(DeepSea): + """ + Copy health_ok.sh to Salt Master node and run commands. + + This task understands the following config key: + + commands: + [list of health-ok.sh commands] + + + The list of commands will be executed as root on the Salt Master node. + """ + + err_prefix = "(health_ok subtask) " + + prefix = 'health-ok/' + + def __init__(self, ctx, config): + global deepsea_ctx + deepsea_ctx['logger_obj'] = log.getChild('health_ok') + self.name = 'deepsea.health_ok' + super(HealthOK, self).__init__(ctx, config) + + def _copy_health_ok(self): + """ + Copy health-ok.sh from teuthology VM to master_remote + """ + global deepsea_ctx + suite_path = self.ctx.config.get('suite_path') + log.info("suite_path is ->{}<-".format(suite_path)) + misc.sh("ls -l {}".format(suite_path)) + health_ok_path = suite_path + "/deepsea/health-ok" + misc.sh("test -d " + health_ok_path) + copy_directory_recursively( + health_ok_path, self.master_remote, "health-ok") + self.master_remote.run(args="pwd ; ls -lR health-ok") + deepsea_ctx['health_ok_copied'] = True + + def _maybe_run_commands(self, commands): + if not commands: + self.log.warning( + "The health_ok task was run, but no commands were specified. " + "Doing nothing." + ) + return None + for cmd_str in commands: + if not isinstance(cmd_str, str): + raise ConfigError( + self.err_prefix + + "command ->{}<- is not a string".format(cmd_str) + ) + if cmd_str.startswith('health-ok.sh'): + cmd_str = self.prefix + cmd_str + if self.dev_env: + cmd_str = 'DEV_ENV=true ' + cmd_str + if self.deepsea_cli: + cmd_str += ' --cli' + if self.rgw_ssl: + cmd_str += ' --ssl' + self.master_remote.run(args=[ + 'sudo', 'bash', '-c', cmd_str, + ]) + + def setup(self): + global deepsea_ctx + if 'health_ok_copied' not in deepsea_ctx: + self._copy_health_ok() + assert deepsea_ctx['health_ok_copied'] + + def begin(self): + commands = self.config.get('commands', []) + if not isinstance(commands, list): + raise ConfigError(self.err_prefix + "commands must be a list") + self._maybe_run_commands(commands) + + def end(self): + pass + + def teardown(self): + pass + + +class Orch(DeepSea): + + all_stages = [ + "0", "prep", "1", "discovery", "2", "configure", "3", "deploy", + "4", "services", "5", "removal", "cephfs", "ganesha", "iscsi", + "openattic", "openstack", "radosgw", "validate" + ] + + err_prefix = "(orch subtask) " + + stage_synonyms = { + 0: 'prep', + 1: 'discovery', + 2: 'configure', + 3: 'deploy', + 4: 'services', + 5: 'removal', + } + + def __init__(self, ctx, config): + global deepsea_ctx + deepsea_ctx['logger_obj'] = log.getChild('orch') + self.name = 'deepsea.orch' + super(Orch, self).__init__(ctx, config) + self.stage = str(self.config.get("stage", '')) + self.state_orch = str(self.config.get("state_orch", '')) + self.reboots_explicitly_forbidden = not self.config.get("allow_reboots", True) + self.survive_reboots = self._detect_reboots() + if not self.stage and not self.state_orch: + raise ConfigError( + self.err_prefix + + "nothing to do. Specify a value for 'stage' or " + "'state_orch' key in config dict" + ) + if self.stage and self.stage not in self.all_stages: + raise ConfigError( + self.err_prefix + + "unrecognized Stage ->{}<-".format(self.stage) + ) + self.log.debug("munged config is {}".format(self.config)) + + def __ceph_health_test(self): + cmd = 'sudo salt-call wait.until status=HEALTH_OK timeout=900 check=1' + if self.quiet_salt: + cmd += ' 2> /dev/null' + self.master_remote.run(args=cmd) + + def __check_ceph_test_rpm_version(self): + """Checks rpm version for ceph and ceph-test; logs warning if differs""" + ceph_test_ver = get_rpm_pkg_version(self.master_remote, "ceph-test", self.log) + ceph_ver = get_rpm_pkg_version(self.master_remote, "ceph", self.log) + if ceph_test_ver != ceph_ver: + self.log.warning( + "ceph-test rpm version: {} differs from ceph version: {}" + .format(ceph_test_ver, ceph_ver)) + + def __check_salt_api_service(self): + base_cmd = 'sudo systemctl status --full --lines={} {}.service' + try: + self.master_remote.run(args=base_cmd.format('0', 'salt-api')) + except CommandFailedError: + self.master_remote.run(args=base_cmd.format('100', 'salt-api')) + raise + self.scripts.run( + self.master_remote, + 'salt_api_test.sh', + ) + + def __dump_drive_groups_yml(self): + self.scripts.run( + self.master_remote, + 'dump_drive_groups_yml.sh', + ) + + def __dump_lvm_status(self): + self.log.info("Dumping LVM status on storage nodes ->{}<-" + .format(self.nodes_storage)) + for hostname in self.nodes_storage: + remote = self.remotes[hostname] + self.scripts.run( + remote, + 'lvm_status.sh', + ) + + def __is_stage_between_0_and_5(self): + """ + This is implemented as a separate function because the stage specified + in the YAML might be a number or a string, and we really don't care + what Python sees it as. + """ + num = self.stage + try: + num = int(num) + except ValueError: + return False + if num < 0 or num > 5: + return False + return True + + def __log_stage_start(self, stage): + self.log.info(anchored( + "Running DeepSea Stage {} ({})" + .format(stage, self.stage_synonyms[stage]) + )) + + def __maybe_cat_ganesha_conf(self): + ganesha_host = self.role_type_present('ganesha') + if ganesha_host: + ganesha_remote = self.remotes[ganesha_host] + ganesha_remote.run(args="cat /etc/ganesha/ganesha.conf") + + def __mgr_dashboard_module_deploy(self): + script = ("# deploy MGR dashboard module\n" + "set -ex\n" + "ceph mgr module enable dashboard\n") + if self.dashboard_ssl: + script += "ceph dashboard create-self-signed-cert\n" + else: + script += "ceph config set mgr mgr/dashboard/ssl false\n" + remote_run_script_as_root( + self.master_remote, + 'mgr_dashboard_module_deploy.sh', + script, + ) + + def __zypper_ps_with_possible_reboot(self): + if self.sm.all_minions_zypper_ps_requires_reboot(): + log_spec = "Detected updates requiring reboot" + self.log.warning(anchored(log_spec)) + if self.reboots_explicitly_forbidden: + self.log.info("Reboots explicitly forbidden in test configuration: not rebooting") + self.log.warning("Processes using deleted files may cause instability") + else: + self.log.warning(anchored("Rebooting the whole cluster now!")) + self.reboot_the_cluster_now(log_spec=log_spec) + assert not self.sm.all_minions_zypper_ps_requires_reboot(), \ + "No more updates requiring reboot anywhere in the whole cluster" + + def _configure_rgw(self): + self.log.debug("self.rgw_ssl is ->{}<-".format(self.rgw_ssl)) + rgw_host = self.role_type_present('rgw') + if rgw_host: + self.log.debug( + "detected rgw host ->{}<-".format(rgw_host) + ) + self.log.info(anchored("configuring RGW")) + self.scripts.run( + self.master_remote, + 'rgw_init.sh', + ) + if self.rgw_ssl: + self.scripts.run( + self.master_remote, + 'rgw_init_ssl.sh', + ) + + # FIXME: run on each minion individually, and compare deepsea "roles" + # with teuthology roles! + def _pillar_items(self): + cmd = "sudo salt \\* pillar.items" + if self.quiet_salt: + cmd += " 2>/dev/null" + self.master_remote.run(args=cmd) + + def _run_orch(self, orch_tuple): + """Run an orchestration. Dump journalctl on error.""" + global reboot_tries + orch_type, orch_spec = orch_tuple + if orch_type == 'orch': + cli = False + pass + elif orch_type == 'stage': + cli = self.deepsea_cli + orch_spec = 'ceph.stage.{}'.format(orch_spec) + else: + raise ConfigError( + self.err_prefix + + "Unrecognized orchestration type ->{}<-".format(orch_type) + ) + cmd_str = None + if cli: + cmd_str = ( + 'timeout 60m deepsea ' + '--log-file=/var/log/salt/deepsea.log ' + '--log-level=debug ' + 'salt-run state.orch {} --simple-output' + ).format(orch_spec) + else: + cmd_str = ( + 'timeout 60m salt-run ' + '--no-color state.orch {}' + ).format(orch_spec) + if self.quiet_salt: + cmd_str += ' 2>/dev/null' + if self.dev_env: + cmd_str = 'DEV_ENV=true ' + cmd_str + tries = 0 + if self.survive_reboots: + tries = reboot_tries + remote_exec( + self.master_remote, + cmd_str, + self.log, + "orchestration {}".format(orch_spec), + rerun=True, + quiet=True, + tries=tries, + ) + + def _detect_reboots(self): + """ + Check for all known states/stages/alt-defaults that + may cause a reboot + If there is a 'allow_reboot' flag, it takes presedence. + """ + allow_reboot = self.config.get("allow_reboot", None) + if allow_reboot is not None: + self.log.info("Setting allow_reboot explicitly to {}" + .format(self.allow_reboot)) + return allow_reboot + orchs_prone_to_reboot = ['ceph.maintenance.upgrade'] + if self.state_orch in orchs_prone_to_reboot: + self.log.warning("This orchestration may trigger a reboot") + return True + # + # The alternative_defaults stanza has been moved up to the deepsea task + # (for two reasons: because it's a global setting and also so we can do + # boilerplate overrides like qa/deepsea/boilerplate/disable_tuned.yaml). + # That change makes the following heuristic becomes problematic: since + # all the alternative defaults are concentrated in one place, if any of + # them contains the string "reboot" (without preceding "no-"), **all** + # orchestrations in the test will run with survive_reboots, not just + # one. + for k, v in self.alternative_defaults.items(): + if 'reboot' in v and 'no-reboot' not in v: + self.log.warning("Orchestrations may trigger a reboot") + return True + self.log.info("Not allowing reboots for this orchestration") + return False + + def _run_stage_0(self): + """ + Run Stage 0 + """ + stage = 0 + self.__log_stage_start(stage) + self._run_orch(("stage", stage)) + self._pillar_items() + self.sm.all_minions_zypper_ref() + self.sm.all_minions_zypper_lu() + self.__zypper_ps_with_possible_reboot() + self.__check_salt_api_service() + + def _run_stage_1(self): + """ + Run Stage 1 + """ + stage = 1 + self._configure_rgw() + self.__log_stage_start(stage) + self._run_orch(("stage", stage)) + + def _run_stage_2(self): + """ + Run Stage 2 + """ + stage = 2 + self.__log_stage_start(stage) + self._run_orch(("stage", stage)) + self.__check_ceph_test_rpm_version() + self._pillar_items() + self.__dump_drive_groups_yml() + + def _run_stage_3(self): + """ + Run Stage 3 + """ + stage = 3 + self.__log_stage_start(stage) + self._run_orch(("stage", stage)) + # self.__mgr_dashboard_module_deploy() + self.sm.all_minions_cmd_run( + 'cat /etc/ceph/ceph.conf', + abort_on_fail=False + ) + self.__dump_lvm_status() + self.scripts.run( + self.master_remote, + 'ceph_cluster_status.sh', + ) + self.__ceph_health_test() + + def _run_stage_4(self): + """ + Run Stage 4 + """ + stage = 4 + self.__log_stage_start(stage) + self._run_orch(("stage", stage)) + self.__maybe_cat_ganesha_conf() + self.__ceph_health_test() + + def _run_stage_5(self): + """ + Run Stage 5 + """ + stage = 5 + self.__log_stage_start(stage) + self._run_orch(("stage", 5)) + + def begin(self): + self.master_remote.sh('sudo salt-run jobs.active 2>/dev/null') + if self.state_orch: + self.log.info(anchored( + "running orchestration {}".format(self.state_orch) + )) + self._run_orch(("orch", self.state_orch)) + else: + # it's not an orch, so it must be a stage + assert self.stage, "Neither state_orch, nor stage" + if self.__is_stage_between_0_and_5(): + exec('self._run_stage_{}()'.format(self.stage)) + elif self.stage == 'prep': + self.log.info("Running Stage 0 instead of Stage \"prep\"") + self._run_stage_0() + elif self.stage == 'discovery': + self.log.info("Running Stage 1 instead of Stage \"discovery\"") + self._run_stage_1() + elif self.stage == 'configure': + self.log.info("Running Stage 2 instead of Stage \"configure\"") + self._run_stage_2() + elif self.stage == 'deploy': + self.log.info("Running Stage 3 instead of Stage \"deploy\"") + self._run_stage_3() + elif self.stage == 'services': + self.log.info("Running Stage 4 instead of Stage \"services\"") + self._run_stage_4() + elif self.stage == 'removal': + self.log.info("Running Stage 5 instead of Stage \"removal\"") + self._run_stage_5() + elif self.stage in self.all_stages: + self.log.info("Running non-numeric Stage \"{}\"".format(self.stage)) + self._run_orch(("stage", self.stage)) + else: + raise ConfigError( + self.err_prefix + + 'unsupported stage ->{}<-'.format(self.stage) + ) + self.master_remote.sh('sudo salt-run jobs.active 2>/dev/null') + + def end(self): + pass + + def teardown(self): + pass + + +class Policy(DeepSea): + + err_prefix = "(policy subtask) " + + def __init__(self, ctx, config): + global deepsea_ctx + deepsea_ctx['logger_obj'] = log.getChild('policy') + self.name = 'deepsea.policy' + super(Policy, self).__init__(ctx, config) + self.policy_cfg = '' + self.munge_policy = self.config.get('munge_policy', {}) + + def __build_drive_group_x(self, drive_group): + # generate our own drive_group.yml (as opposed to letting + # DeepSea generate one for us) + if not self.nodes_storage: + raise ConfigError(self.err_prefix + "no osd roles configured, " + "but at least one of these is required.") + self.log.debug("building drive group ->{}<- for {} storage nodes" + .format(drive_group, len(self.nodes_storage))) + if drive_group == 'teuthology': + raise ConfigError(self.err_prefix + "\"teuthology\" drive group " + "generation not implemented yet") + elif drive_group == 'custom': + self.__roll_out_drive_group() + else: + ConfigError(self.err_prefix + "unknown drive group ->{}<-" + .format(self.drive_group)) + + def __roll_out_drive_group(self, fpath="/srv/salt/ceph/configuration/files/drive_groups.yml"): + misc.sudo_write_file( + self.master_remote, + fpath, + yaml.dump(self.drive_group), + perms="0644", + ) + + def _build_base(self): + """ + policy.cfg boilerplate + """ + self.policy_cfg = ("# policy.cfg generated by deepsea.policy subtask\n" + "# Cluster assignment\n" + "cluster-ceph/cluster/*.sls\n" + "# Common configuration\n" + "config/stack/default/global.yml\n" + "config/stack/default/ceph/cluster.yml\n" + "# Role assignment - master\n" + "role-master/cluster/{}.sls\n" + "# Role assignment - admin\n" + "role-admin/cluster/*.sls\n" + .format(self.master_remote.hostname)) + + def _build_drive_groups_yml(self): + """ + Generate a special-purpose drive_groups.yml + (currently fails the test in all cases except when + "drive_group: default" is explicitly given) + """ + if isinstance(self.drive_group, str): + if self.drive_group == 'teuthology': + self.__build_drive_group_x('teuthology') + elif self.drive_group == 'default': + pass + else: + ConfigError(self.err_prefix + "unknown drive group ->{}<-" + .format(self.drive_group)) + elif isinstance(self.drive_group, dict): + self.__build_drive_group_x('custom') + else: + raise ConfigError(self.err_prefix + "drive_group config param " + "must be a string or a dict") + + def _build_x(self, role_type, required=False): + no_roles_of_type = "no {} roles configured".format(role_type) + but_required = ", but at least one of these is required." + role_dict = {} + if role_type in self.role_lookup_table: + role_dict = self.role_lookup_table[role_type] + elif required: + raise ConfigError(self.err_prefix + no_roles_of_type + but_required) + else: + self.log.debug(no_roles_of_type) + return None + self.log.debug("generating policy.cfg lines for {} based on {}" + .format(role_type, role_dict)) + if required: + if len(role_dict.keys()) < 1: + raise ConfigError(self.err_prefix + no_roles_of_type + but_required) + for role_spec, remote_name in role_dict.items(): + if role_type == 'osd': + role_type = 'storage' + self.policy_cfg += ('# Role assignment - {}\n' + 'role-{}/cluster/{}.sls\n' + .format(role_spec, role_type, remote_name)) + + def _cat_policy_cfg(self): + """ + Dump the final policy.cfg file to teuthology log. + """ + cmd_str = "cat {}/policy.cfg".format(proposals_dir) + self.master_remote.run(args=cmd_str) + + def _write_policy_cfg(self): + """ + Write policy_cfg to master remote. + """ + misc.sudo_write_file( + self.master_remote, + proposals_dir + "/policy.cfg", + self.policy_cfg, + perms="0644", + owner="salt", + ) + cmd_str = "ls -l {}/policy.cfg".format(proposals_dir) + self.master_remote.run(args=cmd_str) + + def begin(self): + """ + Generate policy.cfg from the results of role introspection + """ + # FIXME: this should be run only once - check for that and + # return an error otherwise + if self.munge_policy: + for k, v in self.munge_policy.items(): + if k == 'remove_storage_only_node': + delete_me = self.first_storage_only_node() + if not delete_me: + raise ConfigError( + self.err_prefix + "remove_storage_only_node " + "requires a storage-only node, but there is no such" + ) + raise ConfigError(self.err_prefix + ( + "munge_policy is a kludge - get rid of it! " + "This test needs to be reworked - deepsea.py " + "does not currently have a proper way of " + "changing (\"munging\") the policy.cfg file." + )) + else: + raise ConfigError(self.err_prefix + "unrecognized " + "munge_policy directive {}".format(k)) + else: + self.log.info(anchored("generating policy.cfg")) + self._build_base() + self._build_x('mon', required=True) + self._build_x('mgr', required=True) + self._build_x('osd', required=True) + self._build_drive_groups_yml() + self._build_x('mds') + self._build_x('rgw') + self._build_x('igw') + self._build_x('ganesha') + self._build_x('prometheus') + self._build_x('grafana') + self._write_policy_cfg() + self._cat_policy_cfg() + + def end(self): + pass + + def teardown(self): + pass + + +class Reboot(DeepSea): + """ + A class that does nothing but unconditionally reboot - either a single node + or the whole cluster. + + Configuration (reboot a single node) + + tasks: + - deepsea.reboot: + client.salt_master: + + Configuration (reboot the entire cluster) + + tasks: + - deepsea.reboot: + all: + """ + + err_prefix = '(reboot subtask) ' + + def __init__(self, ctx, config): + global deepsea_ctx + deepsea_ctx['logger_obj'] = log.getChild('reboot') + self.name = 'deepsea.reboot' + super(Reboot, self).__init__(ctx, config) + + def begin(self): + if not self.config: + self.log.warning("empty config: nothing to do") + return None + config_keys = len(self.config) + if config_keys > 1: + raise ConfigError( + self.err_prefix + + "config dictionary may contain only one key. " + "You provided ->{}<- keys ({})".format(len(config_keys), config_keys) + ) + role_spec, repositories = self.config.items()[0] + if role_spec == "all": + remote = self.ctx.cluster + log_spec = "all nodes reboot now" + self.log.warning(anchored(log_spec)) + self.reboot_the_cluster_now(log_spec=log_spec) + else: + remote = get_remote_for_role(self.ctx, role_spec) + log_spec = "node {} reboot now".format(remote.hostname) + self.log.warning(anchored(log_spec)) + self.reboot_a_single_machine_now(remote, log_spec=log_spec) + + def end(self): + pass + + def teardown(self): + pass + + +class Repository(DeepSea): + """ + A class for manipulating zypper repos on the test nodes. + All it knows how to do is wipe out the existing repos (i.e. rename them to + foo.repo.bck) and replace them with a given set of new ones. + + Configuration (one node): + + tasks: + - deepsea.repository: + client.salt_master: + - name: repo_foo + url: http://example.com/foo/ + - name: repo_bar + url: http://example.com/bar/ + + Configuration (all nodes): + + tasks: + - deepsea.repository: + all: + - name: repo_foo + url: http://example.com/foo/ + - name: repo_bar + url: http://example.com/bar/ + + To eliminate the need to duplicate the repos array, it can be specified + in the configuration of the main deepsea task. Then the yaml will look + like so: + + tasks: + - deepsea: + repositories: + - name: repo_foo + url: http://example.com/foo/ + - name: repo_bar + url: http://example.com/bar/ + ... + - deepsea.repository: + client.salt_master: + ... + - deepsea.repository: + all: + + One last note: we try to be careful and not clobber the repos twice. + """ + + err_prefix = '(repository subtask) ' + + def __init__(self, ctx, config): + deepsea_ctx['logger_obj'] = log.getChild('repository') + self.name = 'deepsea.repository' + super(Repository, self).__init__(ctx, config) + + def _repositories_to_remote(self, remote): + args = [] + for repo in self.repositories: + args += [repo['name'] + ':' + repo['url']] + self.scripts.run( + remote, + 'clobber_repositories.sh', + args=args + ) + + def begin(self): + if not self.config: + self.log.warning("empty config: nothing to do") + return None + config_keys = len(self.config) + if config_keys > 1: + raise ConfigError( + self.err_prefix + + "config dictionary may contain only one key. " + "You provided ->{}<- keys ({})".format(len(config_keys), config_keys) + ) + role_spec, repositories = self.config.items()[0] + if role_spec == "all": + remote = self.ctx.cluster + else: + remote = get_remote_for_role(self.ctx, role_spec) + if repositories is None: + assert self.repositories, \ + "self.repositories must be populated if role_dict is None" + else: + assert isinstance(repositories, list), \ + "value of role key must be a list of repositories" + self.repositories = repositories + if not self.repositories: + raise ConfigError( + self.err_prefix + + "No repositories specified. Bailing out!" + ) + self._repositories_to_remote(remote) + + def end(self): + pass + + def teardown(self): + pass + + +class Script(DeepSea): + """ + A class that runs a bash script on the node with given role, or on all nodes. + + Example 1 (run foo_bar.sh, with arguments, on Salt Master node): + + tasks: + - deepsea.script: + client.salt_master: + foo_bar.sh: + args: + - 'foo' + - 'bar' + + Example 2 (run foo_bar.sh, with no arguments, on all test nodes) + + tasks: + - deepsea.script: + all: + foo_bar.sh: + """ + + err_prefix = '(script subtask) ' + + def __init__(self, ctx, config): + global deepsea_ctx + deepsea_ctx['logger_obj'] = log.getChild('script') + self.name = 'deepsea.script' + super(Script, self).__init__(ctx, config) + + def begin(self): + if not self.config: + self.log.warning("empty config: nothing to do") + return None + config_keys = len(self.config) + if config_keys > 1: + raise ConfigError( + self.err_prefix + + "config dictionary may contain only one key. " + "You provided ->{}<- keys ({})".format(len(config_keys), config_keys) + ) + role_spec, role_dict = self.config.items()[0] + role_keys = len(role_dict) + if role_keys > 1: + raise ConfigError( + self.err_prefix + + "role dictionary may contain only one key. " + "You provided ->{}<- keys ({})".format(len(role_keys), role_keys) + ) + if role_spec == "all": + remote = self.ctx.cluster + else: + remote = get_remote_for_role(self.ctx, role_spec) + script_spec, script_dict = role_dict.items()[0] + if script_dict is None: + args = [] + if isinstance(script_dict, dict): + if len(script_dict) > 1 or script_dict.keys()[0] != 'args': + raise ConfigError( + self.err_prefix + + 'script dicts may only contain one key (args)' + ) + args = script_dict.values()[0] or [] + if not isinstance(args, list): + raise ConfigError(self.err_prefix + 'script args must be a list') + self.scripts.run( + remote, + script_spec, + args=args + ) + + def end(self): + pass + + def teardown(self): + pass + + +class Toolbox(DeepSea): + """ + A class that contains various miscellaneous routines. For example: + + tasks: + - deepsea.toolbox: + foo: + + Runs the "foo" tool without any options. + """ + + err_prefix = '(toolbox subtask) ' + + def __init__(self, ctx, config): + global deepsea_ctx + deepsea_ctx['logger_obj'] = log.getChild('toolbox') + self.name = 'deepsea.toolbox' + super(Toolbox, self).__init__(ctx, config) + + def _assert_store(self, file_or_blue, teuth_role): + """ + file_or_blue can be either 'bluestore' or 'filestore' + teuth_role is an 'osd' role uniquely specifying one of the storage nodes. + Enumerates the OSDs on the node and asserts that each of these OSDs is + either filestore or bluestore, as appropriate. + """ + remote = get_remote_for_role(self.ctx, teuth_role) + osds = enumerate_osds(remote, self.log) + assert osds, "No OSDs were captured, so please check if they are active" + self.log.info("Checking if OSDs ->{}<- are ->{}<-".format(osds, file_or_blue)) + all_green = True + for osd in osds: + store = remote.sh("sudo ceph osd metadata {} | jq -r .osd_objectstore" + .format(osd)).rstrip() + self.log.info("OSD {} is ->{}<-.".format(osd, store)) + if store != file_or_blue: + self.log.warning("OSD {} has objectstore ->{}<- which is not ->{}<-". + format(osd, store, file_or_blue)) + all_green = False + assert all_green, "One or more OSDs is not {}".format(file_or_blue) + + def rebuild_node(self, **kwargs): + """ + Expects a teuthology 'osd' role specifying one of the storage nodes. + Then runs 'rebuild.nodes' on the node, can be used for filestore to bluestore + migration if you run it after you change the drive_groups.yml file. + """ + role = kwargs.keys()[0] + remote = get_remote_for_role(self.ctx, role) + osds_before_rebuild = len(enumerate_osds(remote, self.log)) + self.log.info("Disengaging safety to prepare for rebuild") + self.master_remote.sh("sudo salt-run disengage.safety 2>/dev/null") + self.log.info("Rebuilding node {}".format(remote.hostname)) + self.master_remote.sh("sudo salt-run rebuild.node {} 2>/dev/null".format(remote.hostname)) + with safe_while(sleep=15, tries=10, + action="ceph osd tree") as proceed: + while proceed(): + self.master_remote.sh("sudo ceph osd tree || true") + if osds_before_rebuild == len(enumerate_osds(remote, self.log)): + break + + def _noout(self, add_or_rm, teuth_role): + """ + add_or_rm is either 'add' or 'rm' + teuth_role is an 'osd' role uniquely specifying one of the storage nodes. + Enumerates the OSDs on the node and does 'add-noout' on each of them. + """ + remote = get_remote_for_role(self.ctx, teuth_role) + osds = enumerate_osds(remote, self.log) + self.log.info("Running {}-noout for OSDs ->{}<-".format(add_or_rm, osds)) + for osd in osds: + remote.sh("sudo ceph osd {}-noout osd.{}".format(add_or_rm, osd)) + + def add_noout(self, **kwargs): + """ + Expects one key - a teuthology 'osd' role specifying one of the storage nodes. + Enumerates the OSDs on this node and does 'add-noout' on each of them. + """ + role = kwargs.keys()[0] + self._noout("add", role) + + def assert_bluestore(self, **kwargs): + """ + Expects one key - a teuthology 'osd' role specifying one of the storage nodes. + Enumerates the OSDs on this node and asserts that each one is a bluestore OSD. + """ + role = kwargs.keys()[0] + self._assert_store("bluestore", role) + + def assert_filestore(self, **kwargs): + """ + Expects one key - a teuthology 'osd' role specifying one of the storage nodes. + Enumerates the OSDs on this node and asserts that each one is a filestore OSD. + """ + role = kwargs.keys()[0] + self._assert_store("filestore", role) + + def rm_noout(self, **kwargs): + """ + Expects one key - a teuthology 'osd' role specifying one of the storage nodes. + Enumerates the OSDs on this node and does 'rm-noout' on each of them. + """ + role = kwargs.keys()[0] + self._noout("rm", role) + + def wait_for_health_ok(self, **kwargs): + """ + Wait for HEALTH_OK - stop after HEALTH_OK is reached or timeout expires. + Timeout defaults to 120 minutes, but can be specified by providing a + configuration option. For example: + + tasks: + - deepsea.toolbox + wait_for_health_ok: + timeout_minutes: 90 + """ + if kwargs: + self.log.info("wait_for_health_ok: Considering config dict ->{}<-".format(kwargs)) + config_keys = len(kwargs) + if config_keys > 1: + raise ConfigError( + self.err_prefix + + "wait_for_health_ok config dictionary may contain only one key. " + "You provided ->{}<- keys ({})".format(len(config_keys), config_keys) + ) + timeout_spec, timeout_minutes = kwargs.items()[0] + else: + timeout_minutes = 120 + self.log.info("Waiting up to ->{}<- minutes for HEALTH_OK".format(timeout_minutes)) + remote = get_remote_for_role(self.ctx, "client.salt_master") + cluster_status = "" + for minute in range(1, timeout_minutes+1): + remote.sh("sudo ceph status") + cluster_status = remote.sh( + "sudo ceph health detail --format json | jq -r '.status'" + ).rstrip() + if cluster_status == "HEALTH_OK": + break + self.log.info("Waiting for one minute for cluster to reach HEALTH_OK" + "({} minutes left to timeout)" + .format(timeout_minutes + 1 - minute)) + time.sleep(60) + if cluster_status == "HEALTH_OK": + self.log.info(anchored("Cluster is healthy")) + else: + raise RuntimeError("Cluster still not healthy (current status ->{}<-) " + "after reaching timeout" + .format(cluster_status)) + + def begin(self): + if not self.config: + self.log.warning("empty config: nothing to do") + return None + self.log.info("Considering config dict ->{}<-".format(self.config)) + config_keys = len(self.config) + if config_keys > 1: + raise ConfigError( + self.err_prefix + + "config dictionary may contain only one key. " + "You provided ->{}<- keys ({})".format(len(config_keys), config_keys) + ) + tool_spec, kwargs = self.config.items()[0] + kwargs = {} if not kwargs else kwargs + method = getattr(self, tool_spec, None) + if method: + self.log.info("About to run tool ->{}<- from toolbox with config ->{}<-" + .format(tool_spec, kwargs)) + method(**kwargs) + else: + raise ConfigError(self.err_prefix + "No such tool ->{}<- in toolbox" + .format(tool_spec)) + + def end(self): + pass + + def teardown(self): + pass + + +class Validation(DeepSea): + """ + A container for "validation tests", which are understood to mean tests that + validate the Ceph cluster (just) deployed by DeepSea. + + The tests implemented in this class should be small and not take long to + finish. Anything more involved should be implemented in a separate task + (see ses_qa.py for an example of such a task). + + The config YAML is a dictionary in which the keys are the names of tests + (methods to be run) and the values are the config dictionaries of each test + to be run. + + Validation tests with lines like this + + self._apply_config_default("foo_test", None) + + are triggered by default, while others have to be explicitly mentioned in + the YAML. + """ + + err_prefix = '(validation subtask) ' + + def __init__(self, ctx, config): + global deepsea_ctx + deepsea_ctx['logger_obj'] = log.getChild('validation') + self.name = 'deepsea.validation' + super(Validation, self).__init__(ctx, config) + self._apply_config_default("ceph_version_sanity", None) + self._apply_config_default("rados_striper", None) + self._apply_config_default("systemd_units_active", None) + + def _apply_config_default(self, validation_test, default_config): + """ + Use to activate tests that should always be run. + """ + self.config[validation_test] = self.config.get(validation_test, default_config) + + def ceph_version_sanity(self, **kwargs): + self.scripts.run( + self.master_remote, + 'ceph_version_sanity.sh', + ) + + def ganesha_smoke_test(self, **kwargs): + client_host = self.role_type_present("ganeshaclient") + rgw = self.role_type_present("rgw") + mds = self.role_type_present("mds") + args = [] + if mds: + args += ['--mds'] + if rgw: + args += ['--rgw'] + if not args: + raise ConfigError(self.err_prefix + + "ganesha_smoke_test needs an rgw or mds role, but neither was given") + if client_host: + self.master_remote.sh("sudo salt-run ganesha.report 2>/dev/null || true") + remote = self.remotes[client_host] + self.scripts.run( + remote, + 'ganesha_smoke_test.sh', + args=args, + ) + self.master_remote.sh("sudo salt-run ganesha.report 2>/dev/null || true") + else: + raise ConfigError(self.err_prefix + + "ganesha_smoke_test needs a client role, but none was given") + + def grafana_service_check(self, **kwargs): + grafana = self.role_type_present("grafana") + if grafana: + remote = self.remotes[grafana] + remote.sh('sudo systemctl status grafana-server.service') + else: + raise ConfigError(self.err_prefix + + "grafana_service_check needs a grafana role, but none was given") + + def iscsi_smoke_test(self, **kwargs): + igw_host = self.role_type_present("igw") + if igw_host: + remote = self.remotes[igw_host] + self.scripts.run( + remote, + 'iscsi_smoke_test.sh', + ) + + def rados_striper(self, **kwargs): + """ + Verify that rados does not has the --striper option + """ + cmd_str = 'sudo rados --striper 2>&1 || true' + output = self.master_remote.sh(cmd_str) + os_type, os_version = self.os_type_and_version() + self.log.info( + "Checking for expected output on OS ->{}<-" + .format(os_type + " " + str(os_version)) + ) + if os_type == 'sle' and os_version >= 15: + assert 'unrecognized command --striper' in output, \ + "ceph is compiled without libradosstriper" + else: + assert '--striper' not in output, \ + "ceph is compiled with libradosstriper" + self.log.info("OK") + + def rados_write_test(self, **kwargs): + self.scripts.run( + self.master_remote, + 'rados_write_test.sh', + ) + + def systemd_units_active(self, **kwargs): + """ + For all cluster nodes, determine which systemd services + should be running and assert that the respective units + are in "active" state. + """ + # map role types to systemd units + unit_map = { + "mds": "ceph-mds@", + "mgr": "ceph-mgr@", + "mon": "ceph-mon@", + "osd": "ceph-osd@", + "rgw": "ceph-radosgw@", + "ganesha": "nfs-ganesha" + } + # for each machine in the cluster + idx = 0 + for rtl in self.role_types: + node = self.nodes[idx] + script = ("# validate systemd units on {}\n" + "set -ex\n").format(node) + self.log.info("Machine {} ({}) has role types {}" + .format(idx, node, ','.join(rtl))) + remote = self.remotes[node] + run_script = False + for role_type in rtl: + if role_type in unit_map: + script += ("systemctl --state=active --type=service list-units " + "| grep -e '^{}'\n".format(unit_map[role_type])) + run_script = True + else: + self.log.debug("Ignoring role_type {} which has no associated " + "systemd unit".format(role_type)) + if run_script: + remote_run_script_as_root( + remote, + "systemd_validation.sh", + script + ) + idx += 1 + + def begin(self): + self.log.debug("Processing tests: ->{}<-".format(self.config.keys())) + for method_spec, kwargs in self.config.items(): + kwargs = {} if not kwargs else kwargs + if not isinstance(kwargs, dict): + raise ConfigError(self.err_prefix + "Method config must be a dict") + self.log.info(anchored( + "Running validation test {} with config ->{}<-" + .format(method_spec, kwargs) + )) + method = getattr(self, method_spec, None) + if method: + method(**kwargs) + else: + raise ConfigError(self.err_prefix + "No such method ->{}<-" + .format(method_spec)) + + def end(self): + pass + + def teardown(self): + pass + + +task = DeepSea +ceph_conf = CephConf +create_pools = CreatePools +dummy = Dummy +health_ok = HealthOK +orch = Orch +policy = Policy +reboot = Reboot +repository = Repository +script = Script +toolbox = Toolbox +validation = Validation diff --git a/qa/tasks/salt.py b/qa/tasks/salt.py new file mode 100644 index 0000000000000..949c95358ce9f --- /dev/null +++ b/qa/tasks/salt.py @@ -0,0 +1,300 @@ +''' +Task that deploys a Salt cluster on all the nodes + +Linter: + flake8 --max-line-length=100 +''' +import logging + +from salt_manager import SaltManager +from util import remote_exec +from teuthology.exceptions import ConfigError +from teuthology.misc import ( + delete_file, + move_file, + sh, + sudo_write_file, + write_file, + ) +from teuthology.orchestra import run +from teuthology.task import Task + +log = logging.getLogger(__name__) + + +class Salt(Task): + """ + Deploy a Salt cluster on all remotes (test nodes). + + This task assumes all relevant Salt packages (salt, salt-master, + salt-minion, salt-api, python-salt, etc. - whatever they may be called for + the OS in question) are already installed. This should be done using the + install task. + + One, and only one, of the machines must have a role corresponding to the + value of the variable salt.sm.master_role (see salt_manager.py). This node + is referred to as the "Salt Master", or the "master node". + + The task starts the Salt Master daemon on the master node, and Salt Minion + daemons on all the nodes (including the master node), and ensures that the + minions are properly linked to the master. Finally, it tries to ping all + the minions from the Salt Master. + + :param ctx: the argparse.Namespace object + :param config: the config dict + """ + + def __init__(self, ctx, config): + super(Salt, self).__init__(ctx, config) + log.debug("beginning of constructor method") + log.debug("munged config is {}".format(self.config)) + self.remotes = self.cluster.remotes + self.sm = SaltManager(self.ctx) + self.master_remote = self.sm.master_remote + log.debug("end of constructor method") + + def _disable_autodiscovery(self): + """ + It's supposed to be off by default, but just in case. + """ + self.sm.master_remote.run(args=[ + 'sudo', 'sh', '-c', + 'echo discovery: false >> /etc/salt/master' + ]) + for rem in self.remotes.keys(): + rem.run(args=[ + 'sudo', 'sh', '-c', + 'echo discovery: false >> /etc/salt/minion' + ]) + + def _generate_minion_keys(self): + ''' + Generate minion key on salt master to be used to preseed this cluster's + minions. + ''' + for rem in self.remotes.keys(): + minion_id = rem.hostname + log.info('Ensuring that minion ID {} has a keypair on the master' + .format(minion_id)) + # mode 777 is necessary to be able to generate keys reliably + # we hit this before: + # https://github.com/saltstack/salt/issues/31565 + self.sm.master_remote.run(args=[ + 'sudo', + 'sh', + '-c', + 'test -d salt || mkdir -m 777 salt', + ]) + self.sm.master_remote.run(args=[ + 'sudo', + 'sh', + '-c', + 'test -d salt/minion-keys || mkdir -m 777 salt/minion-keys', + ]) + self.sm.master_remote.run(args=[ + 'sudo', + 'sh', + '-c', + ('if [ ! -f salt/minion-keys/{mid}.pem ]; then ' + 'salt-key --gen-keys={mid} ' + '--gen-keys-dir=salt/minion-keys/; ' + ' fi').format(mid=minion_id), + ]) + + def _preseed_minions(self): + ''' + Preseed minions with generated and accepted keys; set minion id + to the remote's hostname. + ''' + for rem in self.remotes.keys(): + minion_id = rem.hostname + src = 'salt/minion-keys/{}.pub'.format(minion_id) + dest = '/etc/salt/pki/master/minions/{}'.format(minion_id) + self.sm.master_remote.run(args=[ + 'sudo', + 'sh', + '-c', + ('if [ ! -f {d} ]; then ' + 'cp {s} {d} ; ' + 'chown root {d} ; ' + 'fi').format(s=src, d=dest) + ]) + self.sm.master_remote.run(args=[ + 'sudo', + 'chown', + 'ubuntu', + 'salt/minion-keys/{}.pem'.format(minion_id), + 'salt/minion-keys/{}.pub'.format(minion_id), + ]) + # + # copy the keys via the teuthology VM. The worker VMs can't ssh to + # each other. scp -3 does a 3-point copy through the teuthology VM. + sh('scp -3 {}:salt/minion-keys/{}.* {}:'.format( + self.sm.master_remote.name, + minion_id, rem.name)) + sudo_write_file(rem, '/etc/salt/minion_id', minion_id) + # + # set proper owner and permissions on keys + rem.run( + args=[ + 'sudo', + 'chown', + 'root', + '{}.pem'.format(minion_id), + '{}.pub'.format(minion_id), + run.Raw(';'), + 'sudo', + 'chmod', + '600', + '{}.pem'.format(minion_id), + run.Raw(';'), + 'sudo', + 'chmod', + '644', + '{}.pub'.format(minion_id), + ], + ) + # + # move keys to correct location + move_file(rem, '{}.pem'.format(minion_id), + '/etc/salt/pki/minion/minion.pem', sudo=True, + preserve_perms=False) + move_file(rem, '{}.pub'.format(minion_id), + '/etc/salt/pki/minion/minion.pub', sudo=True, + preserve_perms=False) + + def _set_minion_master(self): + """Points all minions to the master""" + master_id = self.sm.master_remote.hostname + for rem in self.remotes.keys(): + # remove old master public key if present. Minion will refuse to + # start if master name changed but old key is present + delete_file(rem, '/etc/salt/pki/minion/minion_master.pub', + sudo=True, check=False) + + # set master id + sed_cmd = ('echo master: {} > ' + '/etc/salt/minion.d/master.conf').format(master_id) + rem.run(args=[ + 'sudo', + 'sh', + '-c', + sed_cmd, + ]) + + def _set_debug_log_level(self): + """Sets log_level: debug for all salt daemons""" + for rem in self.remotes.keys(): + rem.run(args=[ + 'sudo', + 'sed', '--in-place', '--regexp-extended', + '-e', 's/^\s*#\s*log_level:.*$/log_level: debug/g', # noqa: W605 + '-e', '/^\s*#.*$/d', '-e', '/^\s*$/d', # noqa: W605 + '/etc/salt/master', + '/etc/salt/minion', + ]) + + def setup(self): + super(Salt, self).setup() + log.debug("beginning of setup method") + self._generate_minion_keys() + self._preseed_minions() + self._set_minion_master() + self._disable_autodiscovery() + self._set_debug_log_level() + self.sm.enable_master() + self.sm.start_master() + self.sm.enable_minions() + self.sm.start_minions() + log.debug("end of setup method") + + def begin(self): + super(Salt, self).begin() + log.debug("beginning of begin method") + self.sm.check_salt_daemons() + self.sm.cat_salt_master_conf() + self.sm.cat_salt_minion_confs() + self.sm.ping_minions() + log.debug("end of begin method") + + def end(self): + super(Salt, self).end() + log.debug("beginning of end method") + self.sm.gather_logs('salt') + self.sm.gather_logs('zypp') + self.sm.gather_logs('rbd-target-api') + self.sm.gather_logfile('zypper.log') + self.sm.gather_logfile('journalctl.log') + log.debug("end of end method") + + def teardown(self): + super(Salt, self).teardown() + # log.debug("beginning of teardown method") + pass + # log.debug("end of teardown method") + + +class Command(Salt): + """ + Subtask for running an arbitrary salt command. + + This subtask understands the following config keys: + + command the command to run (mandatory) + For example: + + command: 'state.apply ceph.updates.salt' + + target target selection specifier (default: *) + For details, see "man salt" + + Note: "command: saltutil.sync_all" gets special handling. + """ + + err_prefix = "(command subtask) " + + def __init__(self, ctx, config): + super(Command, self).__init__(ctx, config) + self.command = str(self.config.get("command", '')) + # targets all machines if omitted + self.target = str(self.config.get("target", '*')) + if not self.command: + raise ConfigError( + self.err_prefix + "nothing to do. Specify a non-empty value for 'command'") + + def _run_command(self): + if '*' in self.target: + quoted_target = "\'{}\'".format(self.target) + else: + quoted_target = self.target + cmd_str = ( + "set -ex\n" + "timeout 60m salt {} --no-color {} 2>/dev/null\n" + ).format(quoted_target, self.command) + write_file(self.master_remote, 'run_salt_command.sh', cmd_str) + remote_exec( + self.master_remote, + 'sudo bash run_salt_command.sh', + log, + "salt command ->{}<-".format(self.command), + ) + + def setup(self): + pass + + def begin(self): + self.log.info("running salt command ->{}<-".format(self.command)) + if self.command == 'saltutil.sync_all': + self.sm.sync_pillar_data() + else: + self._run_command() + + def end(self): + pass + + def teardown(self): + pass + + +task = Salt +command = Command diff --git a/qa/tasks/salt_manager.py b/qa/tasks/salt_manager.py new file mode 100644 index 0000000000000..f769f0fcdf748 --- /dev/null +++ b/qa/tasks/salt_manager.py @@ -0,0 +1,275 @@ +''' +Salt "manager" module + +Usage: First, ensure that there is a role whose name corresponds +to the value of the master_role variable, below. Second, in your +task, instantiate a SaltManager object: + + from salt_manager import SaltManager + + sm = SaltManager(ctx) + +Third, enjoy the SaltManager goodness - e.g.: + + sm.ping_minions() + +Linter: + flake8 --max-line-length=100 +''' +import logging +import re + +from teuthology.contextutil import safe_while +from teuthology.exceptions import CommandFailedError, MaxWhileTries +from teuthology.orchestra import run +from util import get_remote_for_role + +log = logging.getLogger(__name__) +master_role = 'client.salt_master' + + +class InternalError(Exception): + pass + + +def systemctl_remote(remote, subcommand, service_name): + """ + Caveat: only works for units ending in ".service" + """ + def systemctl_cmd(subcommand, lines=0): + return ('sudo systemctl {} --full --lines={} {}.service' + .format(subcommand, lines, service_name)) + try: + remote.run(args=systemctl_cmd(subcommand)) + except CommandFailedError: + remote.run(args=systemctl_cmd('status', 100)) + raise + + +class SaltManager(object): + + def __init__(self, ctx): + self.ctx = ctx + self.master_remote = get_remote_for_role(self.ctx, master_role) + + def __cat_file_cluster(self, filename=None): + """ + cat a file everywhere on the whole cluster + """ + self.ctx.cluster.run(args=[ + 'sudo', 'cat', filename]) + + def __cat_file_remote(self, remote, filename=None): + """ + cat a file on a particular remote + """ + try: + remote.run(args=[ + 'sudo', 'cat', filename]) + except CommandFailedError: + log.warning(( + "salt_manager: {} not found on {}" + ).format(filename, remote.name)) + + def __ping(self, ping_cmd, expected): + try: + def instances_of_str(search_str, output): + return len(re.findall(search_str, output)) + with safe_while(sleep=15, tries=50, + action=ping_cmd) as proceed: + while proceed(): + output = self.master_remote.sh(ping_cmd) + no_master = instances_of_str('The salt master could not be contacted', output) + responded = instances_of_str(' True', output) + log.info("{} of {} minions responded".format(responded, expected)) + if (expected == responded): + return None + except MaxWhileTries: + if no_master: + cmd = 'sudo systemctl status --full --lines=100 salt-master.service' + self.master_remote.run(args=cmd) + + def all_minions_cmd_run(self, cmd, abort_on_fail=True, show_stderr=False): + """ + Use cmd.run to run a command on all nodes. + """ + if not abort_on_fail: + cmd += ' || true' + redirect = "" if show_stderr else " 2>/dev/null" + self.master_remote.run(args=( + 'sudo salt \\* cmd.run \'{}\'{}'.format(cmd, redirect) + )) + + def all_minions_zypper_lu(self): + """Run "zypper lu" on all nodes""" + cmd = "zypper --non-interactive --no-gpg-checks list-updates" + self.all_minions_cmd_run(cmd, abort_on_fail=False) + + def all_minions_zypper_ps(self): + """Run "zypper ps -s" on all nodes""" + cmd = "zypper ps -s || true" + self.all_minions_cmd_run(cmd, abort_on_fail=False) + + def all_minions_zypper_ps_requires_reboot(self): + number_of_minions = len(self.ctx.cluster.remotes) + salt_cmd = "sudo salt \\* cmd.run \'zypper ps -s || true\' 2>/dev/null" + number_with_no_processes = len( + re.findall('No processes using deleted files found', + self.master_remote.sh(salt_cmd)) + ) + return number_with_no_processes != number_of_minions + + def all_minions_zypper_ref(self): + """Run "zypper ref" on all nodes""" + cmd = "zypper --non-interactive --gpg-auto-import-keys refresh" + self.all_minions_cmd_run(cmd, abort_on_fail=False) + + def all_minions_zypper_status(self): + """ + Implement someone's idea of a general 'zypper status' + """ + self.all_minions_zypper_ref() + self.all_minions_zypper_lu() + self.all_minions_zypper_ps() + + def cat_salt_master_conf(self): + self.__cat_file_remote(self.master_remote, filename="/etc/salt/master") + + def cat_salt_minion_confs(self): + self.__cat_file_cluster(filename="/etc/salt/minion") + + def check_salt_daemons(self): + self.master_remote.run(args=['sudo', 'salt-key', '-L']) + systemctl_remote(self.master_remote, 'status', 'salt-master') + for _remote in self.ctx.cluster.remotes.keys(): + systemctl_remote(_remote, 'status', 'salt-minion') + _remote.run(args='sudo cat /etc/salt/minion_id') + _remote.run(args='sudo cat /etc/salt/minion.d/master.conf') + + def enable_master(self): + """Enables salt-master.service on the Salt Master node""" + systemctl_remote(self.master_remote, "enable", "salt-master") + + def enable_minions(self): + """Enables salt-minion.service on all cluster nodes""" + systemctl_remote(self.ctx.cluster, "enable", "salt-minion") + + def gather_logfile(self, logfile): + for _remote in self.ctx.cluster.remotes.keys(): + try: + _remote.run(args=[ + 'sudo', 'test', '-f', '/var/log/{}'.format(logfile), + ]) + except CommandFailedError: + continue + log.info(( + "gathering logfile /var/log/{} from remote {}" + ).format(logfile, _remote.hostname)) + _remote.run(args=[ + 'sudo', 'cp', '-a', '/var/log/{}'.format(logfile), + '/home/ubuntu/cephtest/archive/', + run.Raw(';'), + 'sudo', 'chown', 'ubuntu', + '/home/ubuntu/cephtest/archive/{}'.format(logfile) + ]) + + def gather_logs(self, logdir, archive=None): + """ + Grabs contents of logdir and saves them in /home/ubuntu/cephtest/archive + teuthology will harvest them before destroying the remote (target machine). + + logdir can be specified as an absolute path or a relative path. Relative + paths are assumed to be under /var/log. + """ + if logdir[:1] == '/': + if not archive: + raise InternalError(( + 'Unable to harvest logs from absolute directory ->{}<- ' + 'because no archive option was passed' + ).format(logdir) + ) + else: + if not archive: + archive = logdir + logdir = '/var/log/{}'.format(logdir) + for _remote in self.ctx.cluster.remotes.keys(): + try: + _remote.run(args=[ + 'sudo', 'test', '-d', '{}/'.format(logdir), + ]) + except CommandFailedError: + continue + log.info("gathering {} logs from remote {}" + .format(logdir, _remote.hostname)) + _remote.run(args=[ + 'sudo', 'cp', '-a', '{}/'.format(logdir), + '/home/ubuntu/cephtest/archive/', + run.Raw(';'), + 'sudo', 'chown', '-R', 'ubuntu', + '/home/ubuntu/cephtest/archive/{}/'.format(archive), + run.Raw(';'), + 'find', '/home/ubuntu/cephtest/archive/{}/'.format(archive), + '-type', 'f', '-print0', + run.Raw('|'), + 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--' + ]) + + def master_role(self): + return master_role + + def master_rpm_q(self, pkg_name): + """Run rpm -q on the Salt Master node""" + # FIXME: should possibly take a list of pkg_names + installed = True + try: + self.master_remote.run(args=[ + 'rpm', '-q', pkg_name + ]) + except CommandFailedError: + installed = False + return installed + + def ping_minion(self, mid): + """Pings a minion; raises exception if it doesn't respond""" + self.__ping(['sudo', 'salt', mid, 'test.ping'], 1) + + def ping_minions(self): + """ + Pings minions; raises exception if they don't respond + """ + number_of_minions = len(self.ctx.cluster.remotes) + self.__ping( + "sudo sh -c \'salt \\* test.ping\' 2>/dev/null || true", + number_of_minions, + ) + return number_of_minions + + def restart_master(self): + """Starts salt-master.service on the Salt Master node""" + systemctl_remote(self.master_remote, "restart", "salt-master") + + def restart_minions(self): + """Restarts salt-minion.service on all cluster nodes""" + systemctl_remote(self.ctx.cluster, "restart", "salt-minion") + + def start_master(self): + """Starts salt-master.service on the Salt Master node""" + systemctl_remote(self.master_remote, "start", "salt-master") + + def start_minions(self): + """Starts salt-minion.service on all cluster nodes""" + systemctl_remote(self.ctx.cluster, "start", "salt-minion") + + def sync_pillar_data(self, quiet=True): + cmd = "sudo salt \\* saltutil.sync_all" + if quiet: + cmd += " 2>/dev/null" + cmd += " || true" + with safe_while(sleep=15, tries=10, + action=cmd) as proceed: + while proceed(): + no_response = len(re.findall('Minion did not return', self.master_remote.sh(cmd))) + if no_response: + log.info("Not all minions responded. Retrying.") + else: + return None diff --git a/qa/tasks/scripts.py b/qa/tasks/scripts.py new file mode 100644 index 0000000000000..7ebc032e8f0cb --- /dev/null +++ b/qa/tasks/scripts.py @@ -0,0 +1,40 @@ +import os + +from util import copy_directory_recursively + + +class Scripts: + + def __init__(self, ctx, logger): + self.log = logger + copied = ctx.get('scripts_copied', False) + remotes = ctx['remotes'] + if copied: + # self.log.info('(scripts ctor) scripts already copied to remotes') + pass + else: + local_path = os.path.dirname(os.path.realpath(__file__)) + '/scripts/' + for remote_name, remote_obj in remotes.items(): + copy_directory_recursively(local_path, remote_obj, "scripts") + ctx['scripts_copied'] = True + + def run(self, remote, script_name, args=[], as_root=True): + class_name = type(remote).__name__ + self.log.debug( + '(scripts) run method was passed a remote object of class {}' + .format(class_name) + ) + if class_name == 'Cluster': + remote_spec = 'the whole cluster' + else: + remote_spec = 'remote {}'.format(remote.hostname) + self.log.info('(scripts) running script {} with args {} on {}' + .format(script_name, args, remote_spec) + ) + path = 'scripts/' + script_name + cmd = 'bash {}'.format(path) + if as_root: + cmd = "sudo " + cmd + if args: + cmd += ' ' + ' '.join(map(str, args)) + return remote.sh(cmd, label=script_name) diff --git a/qa/tasks/scripts/ceph_cluster_status.sh b/qa/tasks/scripts/ceph_cluster_status.sh new file mode 100644 index 0000000000000..4491c60d1d7e9 --- /dev/null +++ b/qa/tasks/scripts/ceph_cluster_status.sh @@ -0,0 +1,13 @@ +# ceph_cluster_status.sh +# +# Display ceph cluster status +# +# args: None +# +set -ex +ceph pg stat -f json-pretty +ceph health detail -f json-pretty +ceph osd tree +ceph osd pool ls detail -f json-pretty +ceph -s +echo "OK" >/dev/null diff --git a/qa/tasks/scripts/ceph_version_sanity.sh b/qa/tasks/scripts/ceph_version_sanity.sh new file mode 100644 index 0000000000000..d565ad6579b84 --- /dev/null +++ b/qa/tasks/scripts/ceph_version_sanity.sh @@ -0,0 +1,21 @@ +# ceph_version_sanity.sh +# +# test that ceph RPM version matches "ceph --version" +# for a loose definition of "matches" +# +# args: None + +set -ex +rpm -q ceph +RPM_NAME=$(rpm -q ceph) +RPM_CEPH_VERSION=$(perl -e '"'"$RPM_NAME"'" =~ m/ceph-(\d+\.\d+\.\d+)/; print "$1\n";') +echo "According to RPM, the ceph upstream version is ->$RPM_CEPH_VERSION<-" >/dev/null +test -n "$RPM_CEPH_VERSION" +ceph --version +BUFFER=$(ceph --version) +CEPH_CEPH_VERSION=$(perl -e '"'"$BUFFER"'" =~ m/ceph version (\d+\.\d+\.\d+)/; print "$1\n";') +echo "According to \"ceph --version\", the ceph upstream version is ->$CEPH_CEPH_VERSION<-" \ + >/dev/null +test -n "$RPM_CEPH_VERSION" +test "$RPM_CEPH_VERSION" = "$CEPH_CEPH_VERSION" +echo "OK" >/dev/null diff --git a/qa/tasks/scripts/create_all_pools_at_once.sh b/qa/tasks/scripts/create_all_pools_at_once.sh new file mode 100644 index 0000000000000..09749f36b687d --- /dev/null +++ b/qa/tasks/scripts/create_all_pools_at_once.sh @@ -0,0 +1,89 @@ +# create_all_pools_at_once.sh +# +# Script for pre-creating pools prior to Stage 4 +# +# Pools are created with a number of PGs calculated to avoid health warnings +# that can arise during/after Stage 4 due to "too few" or "too many" PGs per +# OSD when DeepSea is allowed to create the pools with hard-coded number of +# PGs. +# +# see also https://github.com/SUSE/DeepSea/issues/536 +# +# args: pools to be created +# +# example invocation: ./create_all_pools_at_once.sh foo bar baz + +echo "Creating pools: $@" + +set -ex + +function json_total_osds { + # total number of OSDs in the cluster + ceph osd ls --format json | jq '. | length' +} + +function pgs_per_pool { + local TOTALPOOLS=$1 + test -n "$TOTALPOOLS" + local TOTALOSDS=$(json_total_osds) + test -n "$TOTALOSDS" + # given the total number of pools and OSDs, + # assume triple replication and equal number of PGs per pool + # and aim for 100 PGs per OSD + let "TOTALPGS = $TOTALOSDS * 100" + let "PGSPEROSD = $TOTALPGS / $TOTALPOOLS / 3" + echo $PGSPEROSD +} + +function create_all_pools_at_once { + # sample usage: create_all_pools_at_once foo bar + local TOTALPOOLS="${#@}" + local PGSPERPOOL=$(pgs_per_pool $TOTALPOOLS) + for POOLNAME in "$@" + do + ceph osd pool create $POOLNAME $PGSPERPOOL $PGSPERPOOL replicated + done + ceph osd pool ls detail +} + +CEPHFS="" +OPENSTACK="" +RBD="" +OTHER="" +for arg in "$@" ; do + arg="${arg,,}" + case "$arg" in + cephfs) CEPHFS="$arg" ;; + openstack) OPENSTACK="$arg" ;; + rbd) RBD="$arg" ;; + *) OTHER+=" $arg" ;; + esac +done + +POOLS="" +if [ $CEPHFS ] ; then + POOLS+=" cephfs_data cephfs_metadata" +fi +if [ "$OPENSTACK" ] ; then + POOLS+=" smoketest-cloud-backups smoketest-cloud-volumes smoketest-cloud-images" + POOLS+=" smoketest-cloud-vms cloud-backups cloud-volumes cloud-images cloud-vms" +fi +if [ "$RBD" ] ; then + POOLS+=" rbd" +fi +if [ "$OTHER" ] ; then + POOLS+="$OTHER" + APPLICATION_ENABLE="$OTHER" +fi +if [ -z "$POOLS" ] ; then + echo "create_all_pools_at_once: bad arguments" + exit 1 +fi +echo "About to create pools ->$POOLS<-" +create_all_pools_at_once $POOLS +if [ "$APPLICATION_ENABLE" ] ; then + for pool in "$APPLICATION_ENABLE" ; do + ceph osd pool application enable $pool deepsea_qa + done +fi +echo "OK" >/dev/null diff --git a/qa/tasks/scripts/lvm_status.sh b/qa/tasks/scripts/lvm_status.sh new file mode 100644 index 0000000000000..bb42111f2a90c --- /dev/null +++ b/qa/tasks/scripts/lvm_status.sh @@ -0,0 +1,10 @@ +# lvm_status.sh +# +# args: None + +set -ex + +pvs --all +vgs --all +lvs --all +lsblk --ascii diff --git a/qa/tasks/scripts/rados_write_test.sh b/qa/tasks/scripts/rados_write_test.sh new file mode 100644 index 0000000000000..ba12c725c887e --- /dev/null +++ b/qa/tasks/scripts/rados_write_test.sh @@ -0,0 +1,19 @@ +# rados_write_test.sh +# +# Write a RADOS object and read it back +# +# NOTE: function assumes the pool "write_test" already exists. Pool can be +# created by calling e.g. "create_all_pools_at_once write_test" immediately +# before calling this function. +# +# args: None + +set -ex + +ceph osd pool application enable write_test deepsea_qa +echo "dummy_content" > verify.txt +rados -p write_test put test_object verify.txt +rados -p write_test get test_object verify_returned.txt +test "x$(cat verify.txt)" = "x$(cat verify_returned.txt)" + +echo "OK" >/dev/null diff --git a/qa/tasks/scripts/rgw_init.sh b/qa/tasks/scripts/rgw_init.sh new file mode 100644 index 0000000000000..d7408b33e862a --- /dev/null +++ b/qa/tasks/scripts/rgw_init.sh @@ -0,0 +1,9 @@ +# rgw_init.sh +# Set up RGW +set -ex +USERSYML=/srv/salt/ceph/rgw/users/users.d/rgw.yml +cat < $USERSYML +- { uid: "demo", name: "Demo", email: "demo@demo.nil" } +- { uid: "demo1", name: "Demo1", email: "demo1@demo.nil" } +EOF +cat $USERSYML diff --git a/qa/tasks/scripts/rgw_init_ssl.sh b/qa/tasks/scripts/rgw_init_ssl.sh new file mode 100644 index 0000000000000..d6c1ec299c724 --- /dev/null +++ b/qa/tasks/scripts/rgw_init_ssl.sh @@ -0,0 +1,22 @@ +# rgw_init_ssl.sh +# Set up RGW-over-SSL +set -ex +CERTDIR=/srv/salt/ceph/rgw/cert +mkdir -p $CERTDIR +pushd $CERTDIR +openssl req -x509 \ + -nodes \ + -days 1095 \ + -newkey rsa:4096 \ + -keyout rgw.key \ + -out rgw.crt \ + -subj "/C=DE" +cat rgw.key > rgw.pem && cat rgw.crt >> rgw.pem +popd +GLOBALYML=/srv/pillar/ceph/stack/global.yml +cat <> $GLOBALYML +rgw_init: default-ssl +EOF +cat $GLOBALYML +cp /srv/salt/ceph/configuration/files/rgw-ssl.conf \ + /srv/salt/ceph/configuration/files/ceph.conf.d/rgw.conf diff --git a/qa/tasks/scripts/salt_api_test.sh b/qa/tasks/scripts/salt_api_test.sh new file mode 100644 index 0000000000000..82014ec4f0985 --- /dev/null +++ b/qa/tasks/scripts/salt_api_test.sh @@ -0,0 +1,10 @@ +# salt_api_test.sh +# Salt API test script +set -ex +TMPFILE=$(mktemp) +curl --silent http://$(hostname):8000/ | tee $TMPFILE # show curl output in log +test -s $TMPFILE +jq . $TMPFILE >/dev/null +echo -en "\\n" # this is just for log readability +rm $TMPFILE +echo "Salt API test passed" diff --git a/qa/tasks/ses_qa.py b/qa/tasks/ses_qa.py new file mode 100644 index 0000000000000..f0409264dbfac --- /dev/null +++ b/qa/tasks/ses_qa.py @@ -0,0 +1,183 @@ +""" +Task (and subtasks) for SES test automation + +Linter: + flake8 --max-line-length=100 +""" +import logging + +from salt_manager import SaltManager +from scripts import Scripts + +from teuthology.exceptions import ( + ConfigError, + ) +from teuthology.task import Task + +log = logging.getLogger(__name__) +ses_qa_ctx = {} +number_of_osds_in_cluster = """sudo ceph osd tree -f json-pretty | + jq '[.nodes[] | select(.type == \"osd\")] | length'""" + + +class SESQA(Task): + + def __init__(self, ctx, config): + global ses_qa_ctx + super(SESQA, self).__init__(ctx, config) + if ses_qa_ctx: + self.log = ses_qa_ctx['logger_obj'] + self.log.debug("ses_qa_ctx already populated (we are in a subtask)") + if not ses_qa_ctx: + ses_qa_ctx['logger_obj'] = log + self.log = log + self.log.debug("populating ses_qa_ctx (we are *not* in a subtask)") + self._populate_ses_qa_context() + self.master_remote = ses_qa_ctx['master_remote'] + self.nodes = self.ctx['nodes'] + self.nodes_client_only = self.ctx['nodes_client_only'] + self.nodes_cluster = self.ctx['nodes_cluster'] + self.nodes_gateway = self.ctx['nodes_gateway'] + self.nodes_storage = self.ctx['nodes_storage'] + self.nodes_storage_only = self.ctx['nodes_storage_only'] + self.remote_lookup_table = self.ctx['remote_lookup_table'] + self.remotes = self.ctx['remotes'] + self.roles = self.ctx['roles'] + self.role_lookup_table = self.ctx['role_lookup_table'] + self.role_types = self.ctx['role_types'] + self.scripts = Scripts(self.ctx, self.log) + self.sm = ses_qa_ctx['salt_manager_instance'] + + def _populate_ses_qa_context(self): + global ses_qa_ctx + ses_qa_ctx['salt_manager_instance'] = SaltManager(self.ctx) + ses_qa_ctx['master_remote'] = ses_qa_ctx['salt_manager_instance'].master_remote + + def os_type_and_version(self): + os_type = self.ctx.config.get('os_type', 'unknown') + os_version = float(self.ctx.config.get('os_version', 0)) + return (os_type, os_version) + + def setup(self): + super(SESQA, self).setup() + + def begin(self): + super(SESQA, self).begin() + + def end(self): + super(SESQA, self).end() + self.sm.gather_logs('/home/farm/.npm/_logs', 'dashboard-e2e-npm') + self.sm.gather_logs('/home/farm/.protractor-report', 'dashboard-e2e-protractor') + + def teardown(self): + super(SESQA, self).teardown() + + +class Validation(SESQA): + + err_prefix = "(validation subtask) " + + def __init__(self, ctx, config): + global ses_qa_ctx + ses_qa_ctx['logger_obj'] = log.getChild('validation') + self.name = 'ses_qa.validation' + super(Validation, self).__init__(ctx, config) + self.log.debug("munged config is {}".format(self.config)) + + def mgr_plugin_influx(self, **kwargs): + """ + Minimal/smoke test for the MGR influx plugin + + Tests the 'influx' MGR plugin, but only on openSUSE Leap 15.0. + + Testing on SLE-15 is not currently possible because the influxdb + package is not built in IBS for anything higher than SLE-12-SP4. + Getting it to build for SLE-15 requires a newer golang stack than what + is available in SLE-15 - see + https://build.suse.de/project/show/NON_Public:infrastructure:icinga2 + for how another team is building it (and no, we don't want to do that). + + Testing on openSUSE Leap 15.0 is only possible because we are building + the influxdb package in filesystems:ceph:nautilus with modified project + metadata. + + (This problem will hopefully go away when we switch to SLE-15-SP1.) + """ + zypper_cmd = ("sudo zypper --non-interactive --no-gpg-check " + "install --force --no-recommends {}") + os_type, os_version = self.os_type_and_version() + if os_type == 'opensuse' and os_version >= 15: + self.ctx.cluster.run( + args=zypper_cmd.format(' '.join(["python3-influxdb", "influxdb"])) + ) + self.scripts.run( + self.master_remote, + 'mgr_plugin_influx.sh', + ) + else: + self.log.warning( + "mgr_plugin_influx test case not implemented for OS ->{}<-" + .format(os_type + " " + str(os_version)) + ) + + def begin(self): + self.log.debug("Processing tests: ->{}<-".format(self.config.keys())) + for method_spec, kwargs in self.config.items(): + kwargs = {} if not kwargs else kwargs + if not isinstance(kwargs, dict): + raise ConfigError(self.err_prefix + "Method config must be a dict") + self.log.info( + "Running test {} with config ->{}<-" + .format(method_spec, kwargs) + ) + method = getattr(self, method_spec, None) + if method: + method(**kwargs) + else: + raise ConfigError(self.err_prefix + "No such method ->{}<-" + .format(method_spec)) + + def drive_replace_initiate(self, **kwargs): + """ + Initiate Deepsea drive replacement + + Assumes there is 1 drive not being deployed (1node5disks - with DriveGroup `limit: 4`) + + In order to "hide" an existing disk from the ceph.c_v in teuthology + the disk is formatted and mounted. + """ + total_osds = self.master_remote.sh(number_of_osds_in_cluster) + osd_id = 0 + disks = self._get_drive_group_limit() + assert int(total_osds) == disks, "Unexpected number of osds {} (expected {})"\ + .format(total_osds, disks) + self.scripts.run( + self.master_remote, + 'drive_replace.sh', + args=[osd_id] + ) + + def drive_replace_check(self, **kwargs): + """ + Deepsea drive replacement after check + + Replaced osd_id should be back in the osd tree once stage.3 is ran + """ + total_osds = self.master_remote.sh(number_of_osds_in_cluster) + disks = self._get_drive_group_limit() + assert int(total_osds) == disks, "Unexpected number of osds {} (expected {})"\ + .format(total_osds, disks) + self.master_remote.sh("sudo ceph osd tree --format json | tee after.json") + self.master_remote.sh("diff before.json after.json && echo 'Drive Replaced OK'") + + def _get_drive_group_limit(self, **kwargs): + """ + Helper to get drive_groups limit field value + """ + drive_group = next(x for x in self.ctx['config']['tasks'] + if 'deepsea' in x and 'drive_group' in x['deepsea']) + return int(drive_group['deepsea']['drive_group']['custom']['data_devices']['limit']) + + +task = SESQA +validation = Validation diff --git a/qa/tasks/util/__init__.py b/qa/tasks/util/__init__.py index 5b8575ed94e72..5815553989174 100644 --- a/qa/tasks/util/__init__.py +++ b/qa/tasks/util/__init__.py @@ -1,4 +1,27 @@ +import json + from teuthology import misc +from teuthology.contextutil import safe_while +from teuthology.exceptions import ( + CommandFailedError, + ConfigError, + ConnectionLostError, + ) + + +def enumerate_osds(remote, logger): + """ + Given a remote, enumerates the OSDs (if any) running on the machine + associated with that role. + """ + hostname = remote.hostname + logger.info("Enumerating OSDs on {}".format(hostname)) + cmd = ("sudo ceph osd tree -f json | " + "jq -c '[.nodes[] | select(.name == \"{}\")][0].children'" + .format(hostname.split(".")[0])) + osds = json.loads(remote.sh(cmd)) + return osds + def get_remote(ctx, cluster, service_type, service_id): """ @@ -22,5 +45,243 @@ def get_remote(ctx, cluster, service_type, service_id): service_id)) return remote + def get_remote_for_role(ctx, role): return get_remote(ctx, *misc.split_role(role)) + + +def copy_directory_recursively(from_path, to_remote, to_path=None): + """ + Recursively copies a local directory to a remote. + """ + if to_path is None: + to_path = from_path + misc.sh("scp -r -v {from_path} {host}:{to_path}".format( + from_path=from_path, host=to_remote.name, to_path=to_path)) + + +def introspect_roles(ctx, logger, quiet=True): + """ + Creates the following keys in ctx: + + nodes, + nodes_client_only, + nodes_cluster, + nodes_gateway, + nodes_storage, and + nodes_storage_only. + + These are all simple lists of hostnames. + + Also creates + + ctx['remotes'], + + which is a dict of teuthology "remote" objects, which look like this: + + { remote1_name: remote1_obj, ..., remoten_name: remoten_obj } + + Also creates + + ctx['role_types'] + + which is just like the "roles" list, except it contains only unique + role types per node. + + Finally, creates: + + ctx['role_lookup_table'] + + which will look something like this: + + { + "osd": { "osd.0": osd0remname, ..., "osd.n": osdnremname }, + "mon": { "mon.a": monaremname, ..., "mon.n": monnremname }, + ... + } + + and + + ctx['remote_lookup_table'] + + which looks like this: + + { + remote0name: [ "osd.0", "client.0" ], + ... + remotenname: [ remotenrole0, ..., remotenrole99 ], + } + + (In other words, remote_lookup_table is just like the roles + stanza, except the role lists are keyed by remote name.) + """ + # initialization phase + cluster_roles = ['mon', 'mgr', 'osd', 'mds'] + non_storage_cluster_roles = ['mon', 'mgr', 'mds'] + gateway_roles = ['rgw', 'igw', 'ganesha'] + roles = ctx.config['roles'] + nodes = [] + nodes_client_only = [] + nodes_cluster = [] + non_storage_cluster_nodes = [] + nodes_gateway = [] + nodes_storage = [] + nodes_storage_only = [] + remotes = {} + role_types = [] + role_lookup_table = {} + remote_lookup_table = {} + # introspection phase + idx = 0 + for node_roles_list in roles: + assert isinstance(node_roles_list, list), \ + "node_roles_list is a list" + assert node_roles_list, "node_roles_list is not empty" + remote = get_remote_for_role(ctx, node_roles_list[0]) + role_types.append([]) + if not quiet: + logger.debug("Considering remote name {}, hostname {}" + .format(remote.name, remote.hostname)) + nodes += [remote.hostname] + remotes[remote.hostname] = remote + remote_lookup_table[remote.hostname] = node_roles_list + # inner loop: roles (something like "osd.1" or "c2.mon.a") + for role in node_roles_list: + # FIXME: support multiple clusters as used in, e.g., + # rgw/multisite suite + role_arr = role.split('.') + if len(role_arr) != 2: + raise ConfigError("Unsupported role ->{}<-" + .format(role)) + (role_type, _) = role_arr + if role_type not in role_lookup_table: + role_lookup_table[role_type] = {} + role_lookup_table[role_type][role] = remote.hostname + if role_type in cluster_roles: + nodes_cluster += [remote.hostname] + if role_type in gateway_roles: + nodes_gateway += [remote.hostname] + if role_type in non_storage_cluster_roles: + non_storage_cluster_nodes += [remote.hostname] + if role_type == 'osd': + nodes_storage += [remote.hostname] + if role_type not in role_types[idx]: + role_types[idx] += [role_type] + idx += 1 + nodes_cluster = list(set(nodes_cluster)) + nodes_gateway = list(set(nodes_gateway)) + nodes_storage = list(set(nodes_storage)) + nodes_storage_only = [] + for node in nodes_storage: + if node not in non_storage_cluster_nodes: + if node not in nodes_gateway: + nodes_storage_only += [node] + nodes_client_only = list( + set(nodes).difference(set(nodes_cluster).union(set(nodes_gateway))) + ) + if not quiet: + logger.debug("nodes_client_only is ->{}<-".format(nodes_client_only)) + assign_vars = [ + 'nodes', + 'nodes_client_only', + 'nodes_cluster', + 'nodes_gateway', + 'nodes_storage', + 'nodes_storage_only', + 'remote_lookup_table', + 'remotes', + 'role_lookup_table', + 'role_types', + ] + for var in assign_vars: + exec("ctx['{var}'] = {var}".format(var=var)) + ctx['dev_env'] = True if len(nodes_cluster) < 4 else False + if not quiet: + # report phase + logger.info("ROLE INTROSPECTION REPORT") + report_vars = assign_vars + ['dev_env'] + for var in report_vars: + logger.info("{} == {}".format(var, ctx[var])) + + +def remote_exec(remote, cmd_str, logger, log_spec, quiet=True, rerun=False, tries=0): + """ + Execute cmd_str and catch CommandFailedError and ConnectionLostError (and + rerun cmd_str post-reboot if rerun flag is set) until one of the conditons + are fulfilled: + 1) Execution succeeded + 2) Attempts are exceeded + 3) CommandFailedError is raised + """ + cmd_str = "sudo bash -c '{}'".format(cmd_str) + # if quiet: + # cmd_args += [run.Raw('2>'), "/dev/null"] + already_rebooted_at_least_once = False + if tries: + remote.run(args="uptime") + logger.info("Running command ->{}<- on {}. " + "This might cause the machine to reboot!" + .format(cmd_str, remote.hostname)) + with safe_while(sleep=60, tries=tries, action="wait for reconnect") as proceed: + while proceed(): + try: + if already_rebooted_at_least_once: + if not rerun: + remote.run(args="echo Back from reboot ; uptime") + break + remote.run(args=cmd_str) + break + except CommandFailedError: + logger.error(("{} failed. Creating /var/log/journalctl.log with " + "output of \"journalctl --all\"!").format(log_spec)) + remote.sh("sudo su -c 'journalctl --all > /var/log/journalctl.log'") + raise + except ConnectionLostError: + already_rebooted_at_least_once = True + if tries < 1: + raise + logger.warning("No connection established yet..") + + +def remote_run_script_as_root(remote, path, data, args=None): + """ + Wrapper around misc.write_file to simplify the design pattern: + 1. use misc.write_file to create bash script on the remote + 2. use Remote.run to run that bash script via "sudo bash $SCRIPT" + """ + misc.write_file(remote, path, data) + cmd = 'sudo bash {}'.format(path) + if args: + cmd += ' ' + ' '.join(args) + remote.run(label=path, args=cmd) + + +def sudo_append_to_file(remote, path, data): + """ + Append data to a remote file. Standard 'cat >>' - creates file + if it doesn't exist, but all directory components in the file + path must exist. + + :param remote: Remote site. + :param path: Path on the remote being written to. + :param data: Python string containing data to be written. + """ + remote.run( + args=[ + 'sudo', + 'sh', + '-c', + 'cat >> ' + path, + ], + stdin=data, + ) + + +def get_rpm_pkg_version(remote, pkg, logger): + """Gather RPM package version""" + version = None + try: + version = remote.sh('rpm --queryformat="%{{VERSION}}" -q {}'.format(pkg)) + except CommandFailedError: + logger.warning("Package {} is not installed".format(pkg)) + return version -- 2.39.5