qa/deepsea: forward-port basic (tier{0,1}) tests from SES6

author Nathan Cutler <ncutler@suse.com>

Tue, 22 Oct 2019 12:35:11 +0000 (14:35 +0200)

committer Stefen Allen <sallen@suse.com>

Thu, 9 Dec 2021 19:29:44 +0000 (12:29 -0700)
author Nathan Cutler <ncutler@suse.com>
Tue, 22 Oct 2019 12:35:11 +0000 (14:35 +0200)
committer Stefen Allen <sallen@suse.com>
Thu, 9 Dec 2021 19:29:44 +0000 (12:29 -0700)
diff --git a/qa/deepsea/.qa b/qa/deepsea/.qa

new file mode 120000 (symlink)

index 0000000..a96aa0e
--- /dev/null
+++ b/qa/deepsea/.qa
@@ -0,0 +1 @@
+..
+\ No newline at end of file
diff --git a/qa/deepsea/boilerplate/+ b/qa/deepsea/boilerplate/+

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/qa/deepsea/boilerplate/ceph_cm_salt.yaml b/qa/deepsea/boilerplate/ceph_cm_salt.yaml

new file mode 100644 (file)

index 0000000..bcae3d9
--- /dev/null
+++ b/qa/deepsea/boilerplate/ceph_cm_salt.yaml
@@ -0,0 +1,2 @@
+ceph_cm: salt
+ceph_cm_ansible: false
diff --git a/qa/deepsea/boilerplate/disable-tuned.yaml b/qa/deepsea/boilerplate/disable-tuned.yaml

new file mode 100644 (file)

index 0000000..21e2252
--- /dev/null
+++ b/qa/deepsea/boilerplate/disable-tuned.yaml
@@ -0,0 +1,6 @@
+overrides:
+        deepsea:
+                alternative_defaults:
+                      tuned_mgr_init: default-off
+                      tuned_mon_init: default-off
+                      tuned_osd_init: default-off
diff --git a/qa/deepsea/boilerplate/zypper-dup.yaml b/qa/deepsea/boilerplate/zypper-dup.yaml

new file mode 100644 (file)

index 0000000..049604d
--- /dev/null
+++ b/qa/deepsea/boilerplate/zypper-dup.yaml
@@ -0,0 +1,4 @@
+overrides:
+        deepsea:
+                alternative_defaults:
+                        upgrade_init: zypper-dup
diff --git a/qa/deepsea/cli/.qa b/qa/deepsea/cli/.qa

new file mode 120000 (symlink)

index 0000000..fea2489
--- /dev/null
+++ b/qa/deepsea/cli/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/deepsea/cli/off.yaml b/qa/deepsea/cli/off.yaml

new file mode 100644 (file)

index 0000000..a2beb7f
--- /dev/null
+++ b/qa/deepsea/cli/off.yaml
@@ -0,0 +1,3 @@
+overrides:
+  deepsea:
+    cli: false
diff --git a/qa/deepsea/cli/on.yaml b/qa/deepsea/cli/on.yaml

new file mode 100644 (file)

index 0000000..739b017
--- /dev/null
+++ b/qa/deepsea/cli/on.yaml
@@ -0,0 +1,3 @@
+overrides:
+  deepsea:
+    cli: true
diff --git a/qa/deepsea/deepsea-services.yaml b/qa/deepsea/deepsea-services.yaml

new file mode 100644 (file)

index 0000000..ebad49e
--- /dev/null
+++ b/qa/deepsea/deepsea-services.yaml
@@ -0,0 +1,4 @@
+tasks:
+        - deepsea.create_pools:
+        - deepsea.orch:
+                stage: 4
diff --git a/qa/deepsea/deepsea.yaml b/qa/deepsea/deepsea.yaml

new file mode 100644 (file)

index 0000000..3d5fec3
--- /dev/null
+++ b/qa/deepsea/deepsea.yaml
@@ -0,0 +1,14 @@
+tasks:
+        - deepsea:
+                allow_python2: false
+                drive_group: default
+        - deepsea.orch:
+                stage: prep
+        - deepsea.orch:
+                stage: 1
+        - deepsea.policy:
+        - deepsea.orch:
+                stage: 2
+        - deepsea.ceph_conf:
+        - deepsea.orch:
+                stage: 3
diff --git a/qa/deepsea/disks/0disks.yaml b/qa/deepsea/disks/0disks.yaml

new file mode 100644 (file)

index 0000000..dc8605a
--- /dev/null
+++ b/qa/deepsea/disks/0disks.yaml
@@ -0,0 +1,4 @@
+openstack:
+- volumes: # attached to each instance
+    count: 0
+    size: 10 # GB
diff --git a/qa/deepsea/disks/1disk.yaml b/qa/deepsea/disks/1disk.yaml

new file mode 100644 (file)

index 0000000..1654bda
--- /dev/null
+++ b/qa/deepsea/disks/1disk.yaml
@@ -0,0 +1,4 @@
+openstack:
+- volumes: # attached to each instance
+    count: 1
+    size: 10 # GB
diff --git a/qa/deepsea/disks/2disks.yaml b/qa/deepsea/disks/2disks.yaml

new file mode 100644 (file)

index 0000000..f794a6f
--- /dev/null
+++ b/qa/deepsea/disks/2disks.yaml
@@ -0,0 +1,4 @@
+openstack:
+- volumes: # attached to each instance
+    count: 2
+    size: 10 # GB
diff --git a/qa/deepsea/disks/3disks.yaml b/qa/deepsea/disks/3disks.yaml

new file mode 100644 (file)

index 0000000..8da92ca
--- /dev/null
+++ b/qa/deepsea/disks/3disks.yaml
@@ -0,0 +1,4 @@
+openstack:
+- volumes: # attached to each instance
+    count: 3
+    size: 10 # GB
diff --git a/qa/deepsea/disks/4disks.yaml b/qa/deepsea/disks/4disks.yaml

new file mode 100644 (file)

index 0000000..2054da9
--- /dev/null
+++ b/qa/deepsea/disks/4disks.yaml
@@ -0,0 +1,4 @@
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
diff --git a/qa/deepsea/disks/5disks.yaml b/qa/deepsea/disks/5disks.yaml

new file mode 100644 (file)

index 0000000..a5bf871
--- /dev/null
+++ b/qa/deepsea/disks/5disks.yaml
@@ -0,0 +1,4 @@
+openstack:
+- volumes: # attached to each instance
+    count: 5
+    size: 10 # GB
diff --git a/qa/deepsea/distros/.qa b/qa/deepsea/distros/.qa

new file mode 120000 (symlink)

index 0000000..fea2489
--- /dev/null
+++ b/qa/deepsea/distros/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/deepsea/distros/opensuse_15.1.yaml b/qa/deepsea/distros/opensuse_15.1.yaml

new file mode 120000 (symlink)

index 0000000..570c8cf
--- /dev/null
+++ b/qa/deepsea/distros/opensuse_15.1.yaml
@@ -0,0 +1 @@
+.qa/distros/all/opensuse_15.1.yaml
+\ No newline at end of file
diff --git a/qa/deepsea/distros/sle_15.1.yaml b/qa/deepsea/distros/sle_15.1.yaml

new file mode 120000 (symlink)

index 0000000..c6791f5
--- /dev/null
+++ b/qa/deepsea/distros/sle_15.1.yaml
@@ -0,0 +1 @@
+.qa/distros/all/sle_15.1.yaml
+\ No newline at end of file
diff --git a/qa/deepsea/health-ok/common/common.sh b/qa/deepsea/health-ok/common/common.sh

new file mode 100644 (file)

index 0000000..3c6d9bc
--- /dev/null
+++ b/qa/deepsea/health-ok/common/common.sh
@@ -0,0 +1,457 @@
+#
+# This file is part of the DeepSea integration test suite
+#
+
+# BASEDIR is set by the calling script
+source $BASEDIR/common/deploy.sh
+source $BASEDIR/common/helper.sh
+source $BASEDIR/common/json.sh
+source $BASEDIR/common/nfs-ganesha.sh
+source $BASEDIR/common/policy.sh
+source $BASEDIR/common/pool.sh
+source $BASEDIR/common/rbd.sh
+source $BASEDIR/common/rgw.sh
+source $BASEDIR/common/zypper.sh
+
+
+#
+# functions that process command-line arguments
+#
+
+function assert_enhanced_getopt {
+    set +e
+    echo -n "Running 'getopt --test'... "
+    getopt --test > /dev/null
+    if [ $? -ne 4 ]; then
+        echo "FAIL"
+        echo "This script requires enhanced getopt. Bailing out."
+        exit 1
+    fi
+    echo "PASS"
+    set -e
+}
+
+
+#
+# functions that run the DeepSea stages
+#
+
+function _disable_update_in_stage_0 {
+    cp /srv/salt/ceph/stage/prep/master/default.sls /srv/salt/ceph/stage/prep/master/default-orig.sls
+    cp /srv/salt/ceph/stage/prep/master/default-no-update-no-reboot.sls /srv/salt/ceph/stage/prep/master/default.sls
+    cp /srv/salt/ceph/stage/prep/minion/default.sls /srv/salt/ceph/stage/prep/minion/default-orig.sls
+    cp /srv/salt/ceph/stage/prep/minion/default-no-update-no-reboot.sls /srv/salt/ceph/stage/prep/minion/default.sls
+}
+
+function run_stage_0 {
+    test "$NO_UPDATE" && _disable_update_in_stage_0
+    _run_stage 0 "$@"
+    if _root_fs_is_btrfs ; then
+        echo "Root filesystem is btrfs: creating subvolumes for /var/lib/ceph"
+        salt-run state.orch ceph.migrate.subvolume
+    else
+        echo "Root filesystem is *not* btrfs: skipping subvolume creation"
+    fi
+    test "$STAGE_SUCCEEDED"
+}
+
+function run_stage_1 {
+    _run_stage 1 "$@"
+    test "$STAGE_SUCCEEDED"
+}
+
+function run_stage_2 {
+    # This was needed with SCC repos
+    #salt '*' cmd.run "for delay in 60 60 60 60 ; do sudo zypper --non-interactive --gpg-auto-import-keys refresh && break ; sleep $delay ; done"
+    _run_stage 2 "$@"
+    salt_pillar_items 2>/dev/null
+    test "$STAGE_SUCCEEDED"
+}
+
+function _disable_tuned {
+    local prefix=/srv/salt/ceph/tuned
+    mv $prefix/mgr/default.sls $prefix/mgr/default.sls-MOVED
+    mv $prefix/mon/default.sls $prefix/mon/default.sls-MOVED
+    mv $prefix/osd/default.sls $prefix/osd/default.sls-MOVED
+    mv $prefix/mgr/default-off.sls $prefix/mgr/default.sls
+    mv $prefix/mon/default-off.sls $prefix/mon/default.sls
+    mv $prefix/osd/default-off.sls $prefix/osd/default.sls
+}
+
+function run_stage_3 {
+    cat_global_conf
+    lsblk_on_storage_node
+    if [ "$TUNED" ] ; then
+        echo "WWWW: tuned will be deployed as usual"
+    else
+        echo "WWWW: tuned will NOT be deployed"
+        _disable_tuned
+    fi
+    _run_stage 3 "$@"
+    lsblk_on_storage_node
+    ceph osd tree
+    cat_ceph_conf
+    admin_auth_status
+    test "$STAGE_SUCCEEDED"
+}
+
+function run_stage_4 {
+    _run_stage 4 "$@"
+    test "$STAGE_SUCCEEDED"
+}
+
+function run_stage_5 {
+    _run_stage 5 "$@"
+    test "$STAGE_SUCCEEDED"
+}
+
+
+#
+# functions that generate /etc/ceph/ceph.conf
+# see https://github.com/SUSE/DeepSea/tree/master/srv/salt/ceph/configuration/files/ceph.conf.d
+#
+
+function change_rgw_conf {
+    cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/rgw.conf
+foo = bar
+EOF
+}
+
+function change_osd_conf {
+    cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/osd.conf
+foo = bar
+EOF
+}
+
+function change_mon_conf {
+    cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/mon.conf
+foo = bar
+EOF
+}
+
+function ceph_conf_small_cluster {
+    local STORAGENODES=$(json_storage_nodes)
+    test -n "$STORAGENODES"
+    if [ "$STORAGENODES" -eq 1 ] ; then
+        echo "Adjusting ceph.conf for operation with 1 storage node"
+        cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/global.conf
+mon pg warn min per osd = 16
+osd pool default size = 2
+osd crush chooseleaf type = 0 # failure domain == osd
+EOF
+    elif [ "$STORAGENODES" -eq 2 -o "$STORAGENODES" -eq 3 ] ; then
+        echo "Adjusting ceph.conf for operation with 2 or 3 storage nodes"
+        cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/global.conf
+mon pg warn min per osd = 8
+osd pool default size = 2
+EOF
+    else
+        echo "Four or more storage nodes; not adjusting ceph.conf"
+    fi
+}
+
+function ceph_conf_mon_allow_pool_delete {
+    echo "Adjusting ceph.conf to allow pool deletes"
+    cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/global.conf
+mon allow pool delete = true
+EOF
+}
+
+function ceph_conf_dashboard {
+    echo "Adjusting ceph.conf for deployment of dashboard MGR module"
+    cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/mon.conf
+mgr initial modules = dashboard
+EOF
+}
+
+
+#
+# functions that print status information
+#
+
+function cat_deepsea_log {
+    cat /var/log/deepsea.log
+}
+
+function cat_salt_config {
+    cat /etc/salt/master
+    cat /etc/salt/minion
+}
+
+function cat_policy_cfg {
+    cat /srv/pillar/ceph/proposals/policy.cfg
+}
+
+function salt_pillar_items {
+    salt '*' pillar.items
+}
+
+function salt_pillar_get_roles {
+    salt '*' pillar.get roles
+}
+
+function salt_cmd_run_lsblk {
+    salt '*' cmd.run lsblk
+}
+
+function cat_global_conf {
+    cat /srv/salt/ceph/configuration/files/ceph.conf.d/global.conf || true
+}
+
+function cat_ceph_conf {
+    salt '*' cmd.run "cat /etc/ceph/ceph.conf" 2>/dev/null
+}
+
+function admin_auth_status {
+    ceph auth get client.admin
+    ls -l /etc/ceph/ceph.client.admin.keyring
+    cat /etc/ceph/ceph.client.admin.keyring
+}
+
+function number_of_hosts_in_ceph_osd_tree {
+    ceph osd tree -f json-pretty | jq '[.nodes[] | select(.type == "host")] | length'
+}
+
+function number_of_osds_in_ceph_osd_tree {
+    ceph osd tree -f json-pretty | jq '[.nodes[] | select(.type == "osd")] | length'
+}
+
+function ceph_cluster_status {
+    ceph pg stat -f json-pretty
+    _grace_period 1
+    ceph health detail -f json-pretty
+    _grace_period 1
+    ceph osd tree
+    _grace_period 1
+    ceph osd pool ls detail -f json-pretty
+    _grace_period 1
+    ceph -s
+}
+
+function ceph_log_grep_enoent_eaccess {
+    set +e
+    grep -rH "Permission denied" /var/log/ceph
+    grep -rH "No such file or directory" /var/log/ceph
+    set -e
+}
+
+
+#
+# core validation tests
+#
+
+function ceph_version_test {
+# test that ceph RPM version matches "ceph --version"
+# for a loose definition of "matches"
+    rpm -q ceph
+    local RPM_NAME=$(rpm -q ceph)
+    local RPM_CEPH_VERSION=$(perl -e '"'"$RPM_NAME"'" =~ m/ceph-(\d+\.\d+\.\d+)/; print "$1\n";')
+    echo "According to RPM, the ceph upstream version is ->$RPM_CEPH_VERSION<-"
+    test -n "$RPM_CEPH_VERSION"
+    ceph --version
+    local BUFFER=$(ceph --version)
+    local CEPH_CEPH_VERSION=$(perl -e '"'"$BUFFER"'" =~ m/ceph version (\d+\.\d+\.\d+)/; print "$1\n";')
+    echo "According to \"ceph --version\", the ceph upstream version is ->$CEPH_CEPH_VERSION<-"
+    test -n "$RPM_CEPH_VERSION"
+    test "$RPM_CEPH_VERSION" = "$CEPH_CEPH_VERSION"
+}
+
+function ceph_health_test {
+    local LOGFILE=/tmp/ceph_health_test.log
+    echo "Waiting up to 15 minutes for HEALTH_OK..."
+    salt -C 'I@roles:master' wait.until status=HEALTH_OK timeout=900 check=1 2>/dev/null | tee $LOGFILE
+    # last line: determines return value of function
+    ! grep -q 'Timeout expired' $LOGFILE
+}
+
+function rados_write_test {
+    #
+    # NOTE: function assumes the pool "write_test" already exists. Pool can be
+    # created by calling e.g. "create_all_pools_at_once write_test" immediately
+    # before calling this function.
+    #
+    ceph osd pool application enable write_test deepsea_qa
+    echo "dummy_content" > verify.txt
+    rados -p write_test put test_object verify.txt
+    rados -p write_test get test_object verify_returned.txt
+    test "x$(cat verify.txt)" = "x$(cat verify_returned.txt)"
+}
+
+function lsblk_on_storage_node {
+    local TESTSCRIPT=/tmp/lsblk_test.sh
+    local STORAGENODE=$(_first_x_node storage)
+    cat << 'EOF' > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "running lsblk as $(whoami) on $(hostname --fqdn)"
+lsblk
+echo "Result: OK"
+EOF
+    _run_test_script_on_node $TESTSCRIPT $STORAGENODE
+}
+
+function cephfs_mount_and_sanity_test {
+    #
+    # run cephfs mount test script on the client node
+    # mounts cephfs in /mnt, touches a file, asserts that it exists
+    #
+    local TESTSCRIPT=/tmp/cephfs_test.sh
+    local CLIENTNODE=$(_client_node)
+    cat << 'EOF' > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "cephfs mount test script running as $(whoami) on $(hostname --fqdn)"
+TESTMONS=$(ceph-conf --lookup 'mon_initial_members' | tr -d '[:space:]')
+TESTSECR=$(grep 'key =' /etc/ceph/ceph.client.admin.keyring | awk '{print $NF}')
+echo "MONs: $TESTMONS"
+echo "admin secret: $TESTSECR"
+test -d /mnt
+mount -t ceph ${TESTMONS}:/ /mnt -o name=admin,secret="$TESTSECR"
+touch /mnt/bubba
+test -f /mnt/bubba
+umount /mnt
+echo "Result: OK"
+EOF
+    # FIXME: assert no MDS running on $CLIENTNODE
+    _run_test_script_on_node $TESTSCRIPT $CLIENTNODE
+}
+
+function iscsi_kludge {
+    #
+    # apply kludge to work around bsc#1049669
+    #
+    local TESTSCRIPT=/tmp/iscsi_kludge.sh
+    local IGWNODE=$(_first_x_node igw)
+    cat << 'EOF' > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "igw kludge script running as $(whoami) on $(hostname --fqdn)"
+sed -i -e 's/\("host": "target[[:digit:]]\+\)"/\1.teuthology"/' /tmp/lrbd.conf
+cat /tmp/lrbd.conf
+source /etc/sysconfig/lrbd; lrbd -v $LRBD_OPTIONS -f /tmp/lrbd.conf
+systemctl restart lrbd.service
+systemctl --no-pager --full status lrbd.service
+echo "Result: OK"
+EOF
+    _run_test_script_on_node $TESTSCRIPT $IGWNODE
+}
+
+function igw_info {
+    #
+    # peek at igw information on the igw node
+    #
+    local TESTSCRIPT=/tmp/igw_info.sh
+    local IGWNODE=$(_first_x_node igw)
+    cat << 'EOF' > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "igw info script running as $(whoami) on $(hostname --fqdn)"
+rpm -q lrbd || true
+lrbd --output || true
+ls -lR /sys/kernel/config/target/ || true
+ss --tcp --numeric state listening
+echo "See 3260 there?"
+echo "Result: OK"
+EOF
+    _run_test_script_on_node $TESTSCRIPT $IGWNODE
+}
+
+function iscsi_mount_and_sanity_test {
+    #
+    # run iscsi mount test script on the client node
+    # mounts iscsi in /mnt, touches a file, asserts that it exists
+    #
+    local TESTSCRIPT=/tmp/iscsi_test.sh
+    local CLIENTNODE=$(_client_node)
+    local IGWNODE=$(_first_x_node igw)
+    cat << EOF > $TESTSCRIPT
+set -e
+trap 'echo "Result: NOT_OK"' ERR
+for delay in 60 60 60 60 ; do
+    sudo zypper --non-interactive --gpg-auto-import-keys refresh && break
+    sleep $delay
+done
+set -x
+zypper --non-interactive install --no-recommends open-iscsi multipath-tools
+systemctl start iscsid.service
+sleep 5
+systemctl --no-pager --full status iscsid.service
+iscsiadm -m discovery -t st -p $IGWNODE
+iscsiadm -m node -L all
+systemctl start multipathd.service
+sleep 5
+systemctl --no-pager --full status multipathd.service
+ls -lR /dev/mapper
+ls -l /dev/disk/by-path
+ls -l /dev/disk/by-*id
+multipath -ll
+mkfs -t xfs /dev/dm-0
+test -d /mnt
+mount /dev/dm-0 /mnt
+df -h /mnt
+touch /mnt/bubba
+test -f /mnt/bubba
+umount /mnt
+echo "Result: OK"
+EOF
+    # FIXME: assert script not running on the iSCSI gateway node
+    _run_test_script_on_node $TESTSCRIPT $CLIENTNODE
+}
+
+function test_systemd_ceph_osd_target_wants {
+    #
+    # see bsc#1051598 in which ceph-disk was omitting --runtime when it enabled
+    # ceph-osd@$ID.service units
+    #
+    local TESTSCRIPT=/tmp/test_systemd_ceph_osd_target_wants.sh
+    local STORAGENODE=$(_first_x_node storage)
+    cat << 'EOF' > $TESTSCRIPT
+set -x
+CEPH_OSD_WANTS="/systemd/system/ceph-osd.target.wants"
+ETC_CEPH_OSD_WANTS="/etc$CEPH_OSD_WANTS"
+RUN_CEPH_OSD_WANTS="/run$CEPH_OSD_WANTS"
+ls -l $ETC_CEPH_OSD_WANTS
+ls -l $RUN_CEPH_OSD_WANTS
+set -e
+trap 'echo "Result: NOT_OK"' ERR
+echo "Asserting that there is no directory $ETC_CEPH_OSD_WANTS"
+test -d "$ETC_CEPH_OSD_WANTS" && false
+echo "Asserting that $RUN_CEPH_OSD_WANTS exists, is a directory, and is not empty"
+test -d "$RUN_CEPH_OSD_WANTS"
+test -n "$(ls --almost-all $RUN_CEPH_OSD_WANTS)"
+echo "Result: OK"
+EOF
+    _run_test_script_on_node $TESTSCRIPT $STORAGENODE
+}
+
+function configure_all_OSDs_to_filestore {
+    salt-run proposal.populate format=filestore name=filestore 2>/dev/null
+    chown salt:salt /srv/pillar/ceph/proposals/policy.cfg
+    sed -i 's/profile-default/profile-filestore/g' /srv/pillar/ceph/proposals/policy.cfg
+}
+
+function verify_OSD_type {
+    # checking with 'ceph osd metadata' command
+    # 1st input argument: type 'filestore' or 'bluestore'
+    # 2nd input argument: OSD ID 
+    osd_type=$(ceph osd metadata $2 -f json-pretty | jq '.osd_objectstore')
+    if [[ $osd_type != \"$1\" ]]
+        then 
+        echo "Error: Object store type is not $1 for OSD.ID : $2"
+        exit 1
+    else
+        echo OSD.${2} $osd_type
+    fi
+}
+
+function check_OSD_type {  
+    # expecting as argument 'filestore' or 'bluestore' 
+    for i in $(ceph osd ls);do verify_OSD_type $1 $i;done
+}
+
+function migrate_to_bluestore {
+    salt-run state.orch ceph.migrate.policy 2>/dev/null
+    sed -i 's/profile-filestore/migrated-profile-filestore/g' /srv/pillar/ceph/proposals/policy.cfg
+    salt-run disengage.safety 2>/dev/null
+    salt-run state.orch ceph.migrate.osds 2>/dev/null
+}
diff --git a/qa/deepsea/health-ok/common/deploy.sh b/qa/deepsea/health-ok/common/deploy.sh

new file mode 100644 (file)

index 0000000..92729c2
--- /dev/null
+++ b/qa/deepsea/health-ok/common/deploy.sh
@@ -0,0 +1,229 @@
+# This file is part of the DeepSea integration test suite
+
+#
+# separate file to house the deploy_ceph function
+#
+
+DEPLOY_PHASE_COMPLETE_MESSAGE="deploy phase complete!"
+
+
+function _os_specific_install_deps {
+    echo "Installing dependencies on the Salt Master node"
+    local DEPENDENCIES="jq
+    "
+    _zypper_ref_on_master
+    for d in $DEPENDENCIES ; do
+        _zypper_install_on_master $d
+    done
+}
+
+function _determine_master_minion {
+    type hostname
+    MASTER_MINION=$(hostname --fqdn)
+    salt $MASTER_MINION test.ping
+}
+
+function _os_specific_repos_and_packages_info {
+    _dump_salt_master_zypper_repos
+    type rpm
+    rpm -q salt-master
+    rpm -q salt-minion
+    rpm -q salt-api
+    rpm -q deepsea || true
+}
+
+function _set_deepsea_minions {
+    #
+    # set deepsea_minions to * - see https://github.com/SUSE/DeepSea/pull/526
+    # (otherwise we would have to set deepsea grain on all minions)
+    echo "deepsea_minions: '*'" > /srv/pillar/ceph/deepsea_minions.sls
+    cat /srv/pillar/ceph/deepsea_minions.sls
+}
+
+function _initialize_minion_array {
+    local m=
+    local i=0
+    if type salt-key > /dev/null 2>&1; then
+        MINION_LIST=$(salt-key -L -l acc | grep -v '^Accepted Keys')
+        for m in $MINION_LIST ; do
+            MINION_ARRAY[0]=$m
+            i=$((i+1))
+        done
+    else
+        echo "Cannot find salt-key. Is Salt installed? Is this running on the Salt Master?"
+        exit 1
+    fi
+    echo $i
+}
+
+function _initialize_storage_profile {
+    test "$STORAGE_PROFILE"
+    case "$STORAGE_PROFILE" in
+        default)   echo "Storage profile: bluestore OSDs (default)" ;;
+        dmcrypt)   echo "Storage profile: encrypted bluestore OSDs" ;;
+        filestore) echo "Storage profile: filestore OSDs"           ;;
+        random)    echo "Storage profile will be chosen randomly"   ;;
+        *)
+            CUSTOM_STORAGE_PROFILE="$STORAGE_PROFILE"
+            STORAGE_PROFILE="custom"
+            echo "Storage profile: custom ($CUSTOM_STORAGE_PROFILE)"
+            ;;
+    esac
+}
+
+function _initialize_and_vet_nodes {
+    if [ -n "$MIN_NODES" ] ; then
+        echo "MIN_NODES is set to $MIN_NODES"
+        PROPOSED_MIN_NODES="$MIN_NODES"
+    else
+        echo "MIN_NODES was not set. Default is 1"
+        PROPOSED_MIN_NODES=1
+    fi
+    if [ -n "$CLIENT_NODES" ] ; then
+        echo "CLIENT_NODES is set to $CLIENT_NODES"
+    else
+        echo "CLIENT_NODES was not set. Default is 0"
+        CLIENT_NODES=0
+    fi
+    MIN_NODES=$(($CLIENT_NODES + 1))
+    if [ "$PROPOSED_MIN_NODES" -lt "$MIN_NODES" ] ; then
+        echo "Proposed MIN_NODES value is too low. Need at least 1 + CLIENT_NODES"
+        exit 1
+    fi
+    test "$PROPOSED_MIN_NODES" -gt "$MIN_NODES" && MIN_NODES="$PROPOSED_MIN_NODES"
+    echo "Final MIN_NODES is $MIN_NODES"
+    echo "TOTAL_NODES is $TOTAL_NODES"
+    test "$TOTAL_NODES"
+    test "$TOTAL_NODES" -ge "$MIN_NODES"
+    STORAGE_NODES=$((TOTAL_NODES - CLIENT_NODES))
+    echo "WWWW"
+    echo "This script will use DeepSea with a cluster of $TOTAL_NODES nodes total (including Salt Master)."
+    echo "Of these, $CLIENT_NODES are assumed to be clients (nodes without any DeepSea roles except \"admin\")."
+}
+
+function _zypper_ps {
+    salt '*' cmd.run 'zypper ps -s' 2>/dev/null || true
+}
+
+function _python_versions {
+    type python2 > /dev/null 2>&1 && python2 --version || echo "Python 2 not installed"
+    type python3 > /dev/null 2>&1 && python3 --version || echo "Python 3 not installed"
+}
+
+function initialization_sequence {
+    set +x
+    _determine_master_minion
+    _os_specific_install_deps
+    _os_specific_repos_and_packages_info
+    set +e
+    _python_versions
+    type deepsea > /dev/null 2>&1 && deepsea --version || echo "deepsea CLI not installed"
+    TOTAL_MINIONS=$(_initialize_minion_array)
+    echo "There are $TOTAL_MINIONS minions in this Salt cluster"
+    set -e
+    _set_deepsea_minions
+    salt '*' saltutil.sync_all 2>/dev/null
+    TOTAL_NODES=$(json_total_nodes)
+    test "$TOTAL_NODES" = "$TOTAL_MINIONS"
+    _ping_minions_until_all_respond
+    cat_salt_config
+    _initialize_storage_profile
+    _initialize_and_vet_nodes
+    set -x
+    test $STORAGE_NODES -lt 4 && export DEV_ENV="true"
+}
+
+function pared_down_init_sequence {
+    test "$ALREADY_INITIALIZED" && return 0
+    set +x
+    TOTAL_MINIONS=$(_initialize_minion_array)
+    TOTAL_NODES=$(json_total_nodes)
+    test "$TOTAL_NODES" = "$TOTAL_MINIONS"
+    _ping_minions_until_all_respond
+    _initialize_and_vet_nodes
+    set -x
+    test "$STORAGE_NODES" -lt "4" -a -z "$DEV_ENV" && export DEV_ENV="true" || true
+}
+
+function salt_api_test {
+    local tmpfile=$(mktemp)
+    echo "Salt API test: BEGIN"
+    systemctl --no-pager --full status salt-api.service
+    curl http://$(hostname):8000/ | tee $tmpfile # show curl output in log
+    test -s $tmpfile
+    jq . $tmpfile >/dev/null
+    echo -en "\n" # this is just for log readability
+    rm $tmpfile
+    echo "Salt API test: END"
+}
+
+function deploy_ceph {
+    if [ "$START_STAGE" -lt "0" -o "$START_STAGE" -gt "4" ] ; then
+        echo "Received bad --start-stage value ->$START_STAGE<- (must be 0-4 inclusive)"
+       exit 1
+    fi
+    if _ceph_cluster_running ; then
+        echo "Running ceph cluster detected: skipping deploy phase"
+        pared_down_init_sequence
+        return 0
+    fi
+    if [ "$START_STAGE" = "0" ] ; then
+        if [ -z "$TEUTHOLOGY" ] ; then
+            initialization_sequence
+        fi
+        run_stage_0 "$CLI"
+        _zypper_ps
+        salt_api_test
+    fi
+    if [ "$START_STAGE" -le "1" ] ; then
+        test -n "$RGW" -a -n "$SSL" && rgw_ssl_init
+        run_stage_1 "$CLI"
+        policy_cfg_base
+        policy_cfg_mon_flex
+        test -n "$MDS" && policy_cfg_mds
+        policy_cfg_openattic_rgw_igw_ganesha
+        test "$RGW" && rgw_demo_users
+        case "$STORAGE_PROFILE" in
+            dmcrypt) proposal_populate_dmcrypt ;;
+            filestore) proposal_populate_filestore ;;
+            random) random_or_custom_storage_profile ;;
+            custom) random_or_custom_storage_profile ;;
+            default) ;;
+            *) echo "Bad storage profile ->$STORAGE_PROFILE<-. Bailing out!" ; exit 1 ;;
+        esac
+        policy_cfg_storage
+        cat_policy_cfg
+    fi
+    if [ "$START_STAGE" -le "2" ] ; then
+        run_stage_2 "$CLI"
+        ceph_conf_small_cluster
+        ceph_conf_mon_allow_pool_delete
+        ceph_conf_dashboard
+        test "$RBD" && ceph_conf_upstream_rbd_default_features
+    fi
+    if [ "$START_STAGE" -le "3" ] ; then
+        run_stage_3 "$CLI"
+        pre_create_pools
+        ceph_cluster_status
+        test "$RBD" && ceph_test_librbd_can_be_run
+        if [ -z "$MDS" -a -z "$NFS_GANESHA" -a -z "$RGW" ] ; then
+            echo "WWWW"
+            echo "Stage 3 OK, no roles requiring Stage 4: $DEPLOY_PHASE_COMPLETE_MESSAGE"
+            return 0
+        fi
+        test -n "$NFS_GANESHA" && nfs_ganesha_no_root_squash
+    fi
+    if [ "$START_STAGE" -le "4" ] ; then
+        run_stage_4 "$CLI"
+        if [ -n "$NFS_GANESHA" ] ; then
+            nfs_ganesha_cat_config_file
+            nfs_ganesha_debug_log
+            echo "WWWW"
+            echo "NFS-Ganesha set to debug logging"
+        fi
+        ceph_cluster_status
+        _zypper_ps
+        echo "Stage 4 OK: $DEPLOY_PHASE_COMPLETE_MESSAGE"
+    fi
+    return 0
+}
diff --git a/qa/deepsea/health-ok/common/helper.sh b/qa/deepsea/health-ok/common/helper.sh

new file mode 100644 (file)

index 0000000..fdc135b
--- /dev/null
+++ b/qa/deepsea/health-ok/common/helper.sh
@@ -0,0 +1,184 @@
+# This file is part of the DeepSea integration test suite
+
+#
+# helper functions (not to be called directly from test scripts)
+#
+
+STAGE_TIMEOUT_DURATION="60m"
+
+function _report_stage_failure {
+    STAGE_SUCCEEDED=""
+    local stage_num=$1
+    local stage_status=$2
+
+    echo "********** Stage $stage_num failed **********"
+    test "$stage_status" = "124" && echo "Stage $stage_num timed out after $STAGE_TIMEOUT_DURATION"
+    set -ex
+    journalctl -r | head -n 2000
+    echo "WWWW"
+    echo "Finished dumping up to 2000 lines of journalctl"
+}
+
+function _run_stage {
+    local stage_num=$1
+
+    set +x
+    echo ""
+    echo "*********************************************"
+    echo "********** Running DeepSea Stage $stage_num **********"
+    echo "*********************************************"
+
+    STAGE_SUCCEEDED="non-empty string"
+    test -n "$CLI" && _run_stage_cli $stage_num || _run_stage_non_cli $stage_num
+}
+
+function _run_stage_cli {
+    local stage_num=$1
+    local deepsea_cli_output_path="/tmp/deepsea.${stage_num}.log"
+
+    set +e
+    set -x
+    timeout $STAGE_TIMEOUT_DURATION \
+        deepsea \
+        --log-file=/var/log/salt/deepsea.log \
+        --log-level=debug \
+        stage \
+        run \
+        ceph.stage.${stage_num} \
+        --simple-output \
+        2>&1 | tee $deepsea_cli_output_path
+    local stage_status="${PIPESTATUS[0]}"
+    set +x
+    echo "deepsea exit status: $stage_status"
+    echo "WWWW"
+    if [ "$stage_status" != "0" ] ; then
+        _report_stage_failure $stage_num $stage_status
+        return 0
+    fi
+    if grep -q -F "failed=0" $deepsea_cli_output_path ; then
+        echo "********** Stage $stage_num completed successfully **********"
+    else
+        echo "ERROR: deepsea stage returned exit status 0, yet one or more steps failed. Bailing out!"
+        _report_stage_failure $stage_num $stage_status
+    fi
+    set -ex
+}
+
+function _run_stage_non_cli {
+    local stage_num=$1
+    local stage_log_path="/tmp/stage.${stage_num}.log"
+
+    set +e
+    set -x
+    timeout $STAGE_TIMEOUT_DURATION \
+        salt-run \
+        --no-color \
+        state.orch \
+        ceph.stage.${stage_num} \
+        2>/dev/null | tee $stage_log_path
+    local stage_status="${PIPESTATUS[0]}"
+    set +x
+    echo "WWWW"
+    if [ "$stage_status" != "0" ] ; then
+        _report_stage_failure $stage_num $stage_status
+        return 0
+    fi
+    STAGE_FINISHED=$(grep -F 'Total states run' $stage_log_path)
+    if [ "$STAGE_FINISHED" ]; then
+        FAILED=$(grep -F 'Failed: ' $stage_log_path | sed 's/.*Failed:\s*//g' | head -1)
+        if [ "$FAILED" -gt "0" ]; then
+            echo "ERROR: salt-run returned exit status 0, yet one or more steps failed. Bailing out!"
+            _report_stage_failure $stage_num $stage_status
+        else
+            echo "********** Stage $stage_num completed successfully **********"
+        fi
+    else
+        echo "ERROR: salt-run returned exit status 0, yet Stage did not complete. Bailing out!"
+        _report_stage_failure $stage_num $stage_status
+    fi
+    set -ex
+}
+
+function _client_node {
+    salt --static --out json -C 'not I@roles:storage' test.ping 2>/dev/null | jq -r 'keys[0]'
+}
+
+function _master_has_role {
+    local ROLE=$1
+    echo "Asserting that master minion has role ->$ROLE<-"
+    salt $MASTER_MINION pillar.get roles 2>/dev/null
+    salt $MASTER_MINION pillar.get roles 2>/dev/null | grep -q "$ROLE"
+    echo "Yes, it does."
+}
+
+function _first_x_node {
+    local ROLE=$1
+    salt --static --out json -C "I@roles:$ROLE" test.ping 2>/dev/null | jq -r 'keys[0]'
+}
+
+function _first_storage_only_node {
+    local COMPOUND_TARGET="I@roles:storage"
+    local NOT_ROLES="mon
+mgr
+mds
+rgw
+igw
+ganesha
+"
+    local ROLE=
+    for ROLE in $NOT_ROLES ; do
+        COMPOUND_TARGET="$COMPOUND_TARGET and not I@roles:$ROLE"
+    done
+    local MAYBEJSON=$(salt --static --out json -C "$COMPOUND_TARGET" test.ping 2>/dev/null)
+    echo $MAYBEJSON | jq --raw-output 'keys[0]'
+}
+
+function _run_test_script_on_node {
+    local TESTSCRIPT=$1 # on success, TESTSCRIPT must output the exact string
+                        # "Result: OK" on a line by itself, otherwise it will
+                        # be considered to have failed
+    local TESTNODE=$2
+    local ASUSER=$3
+    salt-cp $TESTNODE $TESTSCRIPT $TESTSCRIPT 2>/dev/null
+    local LOGFILE=/tmp/test_script.log
+    local STDERR_LOGFILE=/tmp/test_script_stderr.log
+    local stage_status=
+    if [ -z "$ASUSER" -o "x$ASUSER" = "xroot" ] ; then
+      salt $TESTNODE cmd.run "sh $TESTSCRIPT" 2>$STDERR_LOGFILE | tee $LOGFILE
+      stage_status="${PIPESTATUS[0]}"
+    else
+      salt $TESTNODE cmd.run "sudo su $ASUSER -c \"bash $TESTSCRIPT\"" 2>$STDERR_LOGFILE | tee $LOGFILE
+      stage_status="${PIPESTATUS[0]}"
+    fi
+    local RESULT=$(grep -o -P '(?<=Result: )(OK)$' $LOGFILE) # since the script
+                                  # is run by salt, the output appears indented
+    test "x$RESULT" = "xOK" && return
+    echo "The test script that ran on $TESTNODE failed. The stderr output was as follows:"
+    cat $STDERR_LOGFILE
+    exit 1
+}
+
+function _grace_period {
+    local SECONDS=$1
+    echo "${SECONDS}-second grace period"
+    sleep $SECONDS
+}
+
+function _root_fs_is_btrfs {
+    stat -f / | grep -q 'Type: btrfs'
+}
+
+function _ping_minions_until_all_respond {
+    local RESPONDING=""
+    for i in {1..20} ; do
+        sleep 10
+        RESPONDING=$(salt '*' test.ping 2>/dev/null | grep True 2>/dev/null | wc --lines)
+        echo "Of $TOTAL_NODES total minions, $RESPONDING are responding"
+        test "$TOTAL_NODES" -eq "$RESPONDING" && break
+    done
+}
+
+function _ceph_cluster_running {
+    ceph status >/dev/null 2>&1
+}
+
diff --git a/qa/deepsea/health-ok/common/json.sh b/qa/deepsea/health-ok/common/json.sh

new file mode 100644 (file)

index 0000000..99a2d22
--- /dev/null
+++ b/qa/deepsea/health-ok/common/json.sh
@@ -0,0 +1,24 @@
+#
+# This file is part of the DeepSea integration test suite.
+# It contains various cluster introspection functions.
+#
+
+function json_total_nodes {
+    # total number of nodes in the cluster
+    salt --static --out json '*' test.ping 2>/dev/null | jq '. | length'
+}
+
+function _json_nodes_of_role_x {
+    local ROLE=$1
+    salt --static --out json -C "I@roles:$ROLE" test.ping 2>/dev/null | jq '. | length'
+}
+
+function json_storage_nodes {
+    # number of storage nodes in the cluster
+    _json_nodes_of_role_x storage
+}
+
+function json_total_osds {
+    # total number of OSDs in the cluster
+    ceph osd ls --format json | jq '. | length'
+}
diff --git a/qa/deepsea/health-ok/common/nfs-ganesha.sh b/qa/deepsea/health-ok/common/nfs-ganesha.sh

new file mode 100644 (file)

index 0000000..61cf382
--- /dev/null
+++ b/qa/deepsea/health-ok/common/nfs-ganesha.sh
@@ -0,0 +1,177 @@
+#
+# This file is part of the DeepSea integration test suite
+#
+
+NFS_MOUNTPOINT=/root/mnt
+
+function _nfs_ganesha_node {
+  _first_x_node ganesha
+}
+
+function nfs_ganesha_no_root_squash {
+  local GANESHAJ2=/srv/salt/ceph/ganesha/files/ganesha.conf.j2
+  sed -i '/Access_Type = RW;/a \\tSquash = No_root_squash;' $GANESHAJ2
+}
+
+function nfs_ganesha_no_grace_period {
+  local GANESHAJ2=/srv/salt/ceph/ganesha/files/ganesha.conf.j2
+  cat <<EOF >>$GANESHAJ2
+NFSv4 {Graceless = True}
+EOF
+}
+
+function nfs_ganesha_debug_log {
+  local GANESHANODE=$(_nfs_ganesha_node)
+  local TESTSCRIPT=/tmp/test-nfs-ganesha.sh
+  cat <<EOF > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "nfs-ganesha debug log script running as $(whoami) on $(hostname --fqdn)"
+sed -i 's/NIV_EVENT/NIV_DEBUG/g' /etc/sysconfig/nfs-ganesha
+cat /etc/sysconfig/nfs-ganesha
+rm -rf /var/log/ganesha/ganesha.log
+systemctl restart nfs-ganesha.service
+systemctl is-active nfs-ganesha.service
+rpm -q nfs-ganesha
+echo "Result: OK"
+EOF
+  _run_test_script_on_node $TESTSCRIPT $GANESHANODE
+}
+
+function nfs_ganesha_cat_config_file {
+  salt -C 'I@roles:ganesha' cmd.run 'cat /etc/ganesha/ganesha.conf'
+}
+
+#function nfs_ganesha_showmount_loop {
+#  local TESTSCRIPT=/tmp/test-nfs-ganesha.sh
+#  salt -C 'I@roles:ganesha' cmd.run "while true ; do showmount -e $GANESHANODE | tee /tmp/showmount.log || true ; grep -q 'Timed out' /tmp/showmount.log || break ; done"
+#}
+
+function nfs_ganesha_mount {
+  #
+  # creates a mount point and mounts NFS-Ganesha export in it
+  #
+  local NFSVERSION=$1   # can be "3", "4", or ""
+  local ASUSER=$2
+  local CLIENTNODE=$(_client_node)
+  local GANESHANODE=$(_nfs_ganesha_node)
+  local TESTSCRIPT=/tmp/test-nfs-ganesha.sh
+  salt "$CLIENTNODE" pillar.get roles
+  salt "$CLIENTNODE" pkg.install nfs-client # FIXME: only works on SUSE
+  cat <<EOF > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "nfs-ganesha mount test script"
+test ! -e $NFS_MOUNTPOINT
+mkdir $NFS_MOUNTPOINT
+test -d $NFS_MOUNTPOINT
+#mount -t nfs -o nfsvers=4 ${GANESHANODE}:/ $NFS_MOUNTPOINT
+mount -t nfs -o ##OPTIONS## ${GANESHANODE}:/ $NFS_MOUNTPOINT
+ls -lR $NFS_MOUNTPOINT
+echo "Result: OK"
+EOF
+  if test -z $NFSVERSION ; then
+      sed -i 's/##OPTIONS##/sync/' $TESTSCRIPT
+  elif [ "$NFSVERSION" = "3" -o "$NFSVERSION" = "4" ] ; then
+      sed -i 's/##OPTIONS##/sync,nfsvers='$NFSVERSION'/' $TESTSCRIPT
+  else
+      echo "Bad NFS version ->$NFS_VERSION<- Bailing out!"
+      exit 1
+  fi
+  _run_test_script_on_node $TESTSCRIPT $CLIENTNODE $ASUSER
+}
+
+function nfs_ganesha_umount {
+  local ASUSER=$1
+  local CLIENTNODE=$(_client_node)
+  local TESTSCRIPT=/tmp/test-nfs-ganesha-umount.sh
+  cat <<EOF > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "nfs-ganesha umount test script running as $(whoami) on $(hostname --fqdn)"
+umount $NFS_MOUNTPOINT
+rm -rf $NFS_MOUNTPOINT
+echo "Result: OK"
+EOF
+  _run_test_script_on_node $TESTSCRIPT $CLIENTNODE $ASUSER
+}
+
+function nfs_ganesha_write_test {
+  #
+  # NFS-Ganesha FSAL write test
+  #
+  local FSAL=$1
+  local NFSVERSION=$2
+  local CLIENTNODE=$(_client_node)
+  local TESTSCRIPT=/tmp/test-nfs-ganesha-write.sh
+  local APPENDAGE=""
+  if [ "$FSAL" = "cephfs" ] ; then
+      if [ "$NFSVERSION" = "3" ] ; then
+          APPENDAGE=""
+      else
+          APPENDAGE="/cephfs"
+      fi
+  else
+      APPENDAGE="/demo/demo-demo"
+  fi
+  local TOUCHFILE=$NFS_MOUNTPOINT$APPENDAGE/saturn
+  cat <<EOF > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "nfs-ganesha write test script"
+! test -e $TOUCHFILE
+touch $TOUCHFILE
+test -f $TOUCHFILE
+rm -f $TOUCHFILE
+echo "Result: OK"
+EOF
+  _run_test_script_on_node $TESTSCRIPT $CLIENTNODE
+}
+
+function nfs_ganesha_pynfs_test {
+  #
+  # NFS-Ganesha PyNFS test
+  #
+  local CLIENTNODE=$(_client_node)
+  local GANESHANODE=$(_nfs_ganesha_node)
+  local TESTSCRIPT=/tmp/test-nfs-ganesha-pynfs.sh
+  cat <<'EOF' > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+
+function assert_success {
+    local PYNFS_OUTPUT=$1
+    test -s $PYNFS_OUTPUT
+    # last line: determined return value of function
+    ! grep -q FAILURE $PYNFS_OUTPUT
+}
+
+echo "nfs-ganesha PyNFS test script running as $(whoami) on $(hostname --fqdn)"
+set +x
+for delay in 60 60 60 60 ; do
+    sudo zypper --non-interactive --gpg-auto-import-keys refresh && break
+    sleep $delay
+done
+set -x
+zypper --non-interactive install --no-recommends krb5-devel python3-devel
+git clone --depth 1 https://github.com/supriti/Pynfs
+cd Pynfs
+./setup.py build
+cd nfs4.0
+sleep 90 # NFSv4 grace period
+LOGFILE="PyNFS.out"
+./testserver.py -v \
+    --outfile RESULTS.out \
+    --maketree GANESHANODE:/cephfs/ \
+    --showomit \
+    --secure \
+    --rundeps \
+    all \
+    ganesha 2>&1 | tee $LOGFILE
+#./showresults.py RESULTS.out
+assert_success $LOGFILE
+echo "Result: OK"
+EOF
+  sed -i 's/GANESHANODE/'$GANESHANODE'/' $TESTSCRIPT
+  _run_test_script_on_node $TESTSCRIPT $CLIENTNODE
+}
diff --git a/qa/deepsea/health-ok/common/policy.sh b/qa/deepsea/health-ok/common/policy.sh

new file mode 100644 (file)

index 0000000..6bda313
--- /dev/null
+++ b/qa/deepsea/health-ok/common/policy.sh
@@ -0,0 +1,271 @@
+# This file is part of the DeepSea integration test suite
+
+#
+# functions for generating storage proposals
+#
+
+PROPOSALSDIR="/srv/pillar/ceph/proposals"
+POLICY_CFG="$PROPOSALSDIR/policy.cfg"
+
+function proposal_populate_dmcrypt {
+    salt-run proposal.populate encryption='dmcrypt' name='dmcrypt'
+}
+
+function proposal_populate_filestore {
+    salt-run proposal.populate format='filestore' name='filestore'
+}
+
+
+#
+# functions for generating policy.cfg
+#
+
+function policy_cfg_base {
+  cat <<EOF > $POLICY_CFG
+# Cluster assignment
+cluster-ceph/cluster/*.sls
+# Common configuration
+config/stack/default/global.yml
+config/stack/default/ceph/cluster.yml
+# Role assignment - master
+role-master/cluster/${MASTER_MINION}.sls
+# Role assignment - admin
+role-admin/cluster/*.sls
+EOF
+}
+
+function policy_cfg_mon_flex {
+  test -n "$STORAGE_NODES" # set in initialization_sequence
+  test "$STORAGE_NODES" -gt 0
+  if [ "$STORAGE_NODES" -lt 4 ] ; then
+    echo "Undersized cluster ($STORAGE_NODES nodes)"
+    policy_cfg_one_mon
+  else
+    policy_cfg_three_mons
+  fi
+}
+
+function policy_cfg_one_mon {
+  cat <<EOF >> $POLICY_CFG
+# Role assignment - 1 mon, 1 mgr
+role-mon/cluster/*.sls slice=[:1]
+role-mgr/cluster/*.sls slice=[:1]
+EOF
+}
+
+function policy_cfg_three_mons {
+  cat <<EOF >> $POLICY_CFG
+# Role assignment - 3 mons, 3 mgrs
+role-mon/cluster/*.sls slice=[:3]
+role-mgr/cluster/*.sls slice=[:3]
+EOF
+}
+
+function _initialize_minion_configs_array {
+    local DIR=$1
+
+    shopt -s nullglob
+    pushd $DIR >/dev/null
+    MINION_CONFIGS_ARRAY=(*.yaml *.yml)
+    echo "Made global array containing the following files (from ->$DIR<-):"
+    printf '%s\n' "${MINION_CONFIGS_ARRAY[@]}"
+    popd >/dev/null
+    shopt -u nullglob
+}
+
+function _initialize_osd_configs_array {
+    local DIR=$1
+
+    shopt -s nullglob
+    pushd $DIR >/dev/null
+    OSD_CONFIGS_ARRAY=(*.yaml *.yml)
+    echo "Made global array containing the following OSD configs (from ->$DIR<-):"
+    printf '%s\n' "${OSD_CONFIGS_ARRAY[@]}"
+    popd >/dev/null
+    shopt -u nullglob
+}
+
+function _custom_osd_config {
+    local PROFILE=$1
+    local FILENAME=""
+    for i in "${OSD_CONFIGS_ARRAY[@]}" ; do
+        case "$i" in
+            $PROFILE) FILENAME=$i ; break ;;
+            ${PROFILE}.yaml) FILENAME=$i ; break ;;
+            ${PROFILE}.yml) FILENAME=$i ; break ;;
+        esac
+    done
+    if [ -z "$FILENAME" ] ; then
+        echo "Custom OSD profile $PROFILE not found. Bailing out!"
+        exit 1
+    fi
+    echo "$FILENAME"
+}
+
+function _random_osd_config {
+    # the bare config file names are assumed to already be in OSD_CONFIGS_ARRAY
+    # (accomplished by calling _initialize_osd_configs_array first)
+    OSD_CONFIGS_ARRAY_LENGTH="${#OSD_CONFIGS_ARRAY[@]}"
+    local INDEX=$((RANDOM % OSD_CONFIGS_ARRAY_LENGTH))
+    echo "${OSD_CONFIGS_ARRAY[$INDEX]}"
+
+}
+
+function random_or_custom_storage_profile {
+    test "$STORAGE_PROFILE"
+    test "$STORAGE_PROFILE" = "random" -o "$STORAGE_PROFILE" = "custom"
+    #
+    # choose OSD configuration from osd-config/ovh
+    #
+    local SOURCEDIR="$BASEDIR/osd-config/ovh"
+    _initialize_osd_configs_array $SOURCEDIR
+    local SOURCEFILE=""
+    case "$STORAGE_PROFILE" in
+        random) SOURCEFILE=$(_random_osd_config) ;;
+        custom) SOURCEFILE=$(_custom_osd_config $CUSTOM_STORAGE_PROFILE) ;;
+    esac
+    test "$SOURCEFILE"
+    file $SOURCEDIR/$SOURCEFILE
+    #
+    # prepare new profile, which will be exactly the same as the default
+    # profile except the files in stack/default/ceph/minions/ will be
+    # overwritten with our chosen OSD configuration
+    #
+    cp -a $PROPOSALSDIR/profile-default $PROPOSALSDIR/profile-$STORAGE_PROFILE
+    local DESTDIR="$PROPOSALSDIR/profile-$STORAGE_PROFILE/stack/default/ceph/minions"
+    _initialize_minion_configs_array $DESTDIR
+    for DESTFILE in "${MINION_CONFIGS_ARRAY[@]}" ; do
+        cp $SOURCEDIR/$SOURCEFILE $DESTDIR/$DESTFILE
+    done
+    echo "Your $STORAGE_PROFILE storage profile $SOURCEFILE has the following contents:"
+    cat $DESTDIR/$DESTFILE
+    ls -lR $PROPOSALSDIR
+}
+
+function policy_cfg_storage {
+    test -n "$CLIENT_NODES"
+    test -n "$STORAGE_PROFILE"
+
+    if [ "$CLIENT_NODES" -eq 0 ] ; then
+        cat <<EOF >> $POLICY_CFG
+# Hardware Profile
+profile-$STORAGE_PROFILE/cluster/*.sls
+profile-$STORAGE_PROFILE/stack/default/ceph/minions/*yml
+EOF
+    elif [ "$CLIENT_NODES" -ge 1 ] ; then
+        cat <<EOF >> $POLICY_CFG
+# Hardware Profile
+profile-$STORAGE_PROFILE/cluster/*.sls slice=[:-$CLIENT_NODES]
+profile-$STORAGE_PROFILE/stack/default/ceph/minions/*yml slice=[:-$CLIENT_NODES]
+EOF
+    else
+        echo "Unexpected number of client nodes ->$CLIENT_NODES<-; bailing out!"
+        exit 1
+    fi
+}
+
+function storage_profile_from_policy_cfg {
+    local BUFFER=$(grep --max-count 1 '^profile-' $POLICY_CFG)
+    perl -e '"'"$BUFFER"'" =~ m/profile-(\w+)/; print "$1\n";'
+}
+
+function policy_remove_storage_node {
+    local NODE_TO_DELETE=$1
+
+    echo "Before"
+    ls -1 $PROPOSALSDIR/profile-$STORAGE_PROFILE/cluster/
+    ls -1 $PROPOSALSDIR/profile-$STORAGE_PROFILE/stack/default/ceph/minions/
+
+    local basedirsls=$PROPOSALSDIR/profile-$STORAGE_PROFILE/cluster
+    local basediryml=$PROPOSALSDIR/profile-$STORAGE_PROFILE/stack/default/ceph/minions
+    mv $basedirsls/${NODE_TO_DELETE}.sls $basedirsls/${NODE_TO_DELETE}.sls-DISABLED
+    mv $basediryml/${NODE_TO_DELETE}.yml $basedirsls/${NODE_TO_DELETE}.yml-DISABLED
+
+    echo "After"
+    ls -1 $PROPOSALSDIR/profile-$STORAGE_PROFILE/cluster/
+    ls -1 $PROPOSALSDIR/profile-$STORAGE_PROFILE/stack/default/ceph/minions/
+}
+
+function policy_cfg_mds {
+    test -n "$STORAGE_NODES"
+    # MDS on up to 3 storage nodes
+    if [ "$STORAGE_NODES" -le 3 ] ; then
+        cat <<EOF >> $POLICY_CFG
+# Role assignment - mds
+role-mds/cluster/*.sls slice=[:$STORAGE_NODES]
+EOF
+    else
+        cat <<EOF >> $POLICY_CFG
+# Role assignment - mds
+role-mds/cluster/*.sls slice=[:3]
+EOF
+    fi
+}
+
+function policy_cfg_openattic_rgw_igw_ganesha {
+    # first, determine the slices
+    local slice_openattic=""
+    local slice_rgw=""
+    local slice_igw=""
+    local slice_ganesha=""
+    # lest we become confused, "storage nodes" is a synonym for "cluster nodes"
+    test -n "$STORAGE_NODES"
+    if [ "$STORAGE_NODES" -eq 1 ] ; then
+        slice_openattic="[:1]"
+        slice_rgw="[:1]"
+        slice_igw="[:1]"
+        slice_ganesha="[:1]"
+    elif [ "$STORAGE_NODES" -eq 2 ] ; then
+        slice_openattic="[:1]"
+        slice_rgw="[1:2]"
+        slice_igw="[1:2]"
+        slice_ganesha="[1:2]"
+    elif [ "$STORAGE_NODES" -eq 3 ] ; then
+        slice_openattic="[:1]"
+        slice_rgw="[1:2]"
+        slice_igw="[2:3]"
+        slice_ganesha="[2:3]"
+    elif [ "$STORAGE_NODES" -ge 4 ] ; then
+        slice_openattic="[:1]"
+        slice_rgw="[1:2]"
+        slice_igw="[2:3]"
+        slice_ganesha="[3:4]"
+    else
+        echo "Unexpected number of cluster/storage nodes ->$STORAGE_NODES<-: bailing out!"
+        exit 1
+    fi
+    # then, populate policy.cfg
+    if [ "$OPENATTIC" ] ; then
+        cat <<EOF >> $POLICY_CFG
+# Role assignment - openattic
+role-openattic/cluster/*.sls slice=$slice_openattic
+EOF
+    fi
+    if [ "$RGW" ] ; then
+        if [ -z "$SSL" ] ; then
+            cat <<EOF >> $POLICY_CFG
+# Role assignment - rgw
+role-rgw/cluster/*.sls slice=$slice_rgw
+EOF
+        else
+            cat <<EOF >> $POLICY_CFG
+# Role assignment - rgw
+role-rgw/cluster/*.sls slice=$slice_rgw
+role-rgw-ssl/cluster/*.sls slice=$slice_rgw
+EOF
+        fi
+    fi
+    if [ "$IGW" ] ; then
+        cat <<EOF >> $POLICY_CFG
+# Role assignment - igw
+role-igw/cluster/*.sls slice=$slice_igw
+EOF
+    fi
+    if [ "$NFS_GANESHA" ] ; then
+        cat <<EOF >> $POLICY_CFG
+# Role assignment - ganesha
+role-ganesha/cluster/*.sls slice=$slice_ganesha
+EOF
+    fi
+}
+
diff --git a/qa/deepsea/health-ok/common/pool.sh b/qa/deepsea/health-ok/common/pool.sh

new file mode 100644 (file)

index 0000000..5baf319
--- /dev/null
+++ b/qa/deepsea/health-ok/common/pool.sh
@@ -0,0 +1,64 @@
+# This file is part of the DeepSea integration test suite
+
+#
+# separate file to house the pool creation functions
+#
+
+
+function pgs_per_pool {
+    local TOTALPOOLS=$1
+    test -n "$TOTALPOOLS"
+    local TOTALOSDS=$(json_total_osds)
+    test -n "$TOTALOSDS"
+    # given the total number of pools and OSDs,
+    # assume triple replication and equal number of PGs per pool
+    # and aim for 100 PGs per OSD
+    let "TOTALPGS = $TOTALOSDS * 100"
+    let "PGSPEROSD = $TOTALPGS / $TOTALPOOLS / 3"
+    echo $PGSPEROSD
+}
+
+function create_pool_incrementally {
+    # Special-purpose function for creating pools incrementally. For example,
+    # if your test case needs 2 pools "foo" and "bar", but you cannot create
+    # them all at once for some reason. Otherwise, use create_all_pools_at_once.
+    #
+    # sample usage:
+    #
+    # create_pool foo 2
+    # ... do something ...
+    # create_pool bar 2
+    # ... do something else ...
+    #
+    local POOLNAME=$1
+    test -n "$POOLNAME"
+    local TOTALPOOLS=$2
+    test -n "$TOTALPOOLS"
+    local PGSPERPOOL=$(pgs_per_pool $TOTALPOOLS)
+    ceph osd pool create $POOLNAME $PGSPERPOOL $PGSPERPOOL replicated
+}
+
+function create_all_pools_at_once {
+    # sample usage: create_all_pools_at_once foo bar
+    local TOTALPOOLS="${#@}"
+    local PGSPERPOOL=$(pgs_per_pool $TOTALPOOLS)
+    for POOLNAME in "$@"
+    do
+        ceph osd pool create $POOLNAME $PGSPERPOOL $PGSPERPOOL replicated
+    done
+    ceph osd pool ls detail
+}
+
+function pre_create_pools {
+    # pre-create pools with calculated number of PGs so we don't get health
+    # warnings after Stage 4 due to "too few" or "too many" PGs per OSD
+    # (the "write_test" pool is used in common/sanity-basic.sh)
+    sleep 10
+    POOLS="write_test"
+    test "$MDS" && POOLS+=" cephfs_data cephfs_metadata"
+    test "$OPENSTACK" && POOLS+=" smoketest-cloud-backups smoketest-cloud-volumes smoketest-cloud-images smoketest-cloud-vms cloud-backups cloud-volumes cloud-images cloud-vms"
+    test "$RBD" && POOLS+=" rbd"
+    create_all_pools_at_once $POOLS
+    ceph osd pool application enable write_test deepsea_qa
+    sleep 10
+}
diff --git a/qa/deepsea/health-ok/common/rbd.sh b/qa/deepsea/health-ok/common/rbd.sh

new file mode 100644 (file)

index 0000000..9204d3f
--- /dev/null
+++ b/qa/deepsea/health-ok/common/rbd.sh
@@ -0,0 +1,29 @@
+#
+# This file is part of the DeepSea integration test suite
+#
+
+function ceph_conf_upstream_rbd_default_features {
+  #
+  # by removing this line, we ensure that there will be no "rbd default
+  # features" setting in ceph.conf, so the default value will be used
+  #
+  sed -i '/^rbd default features =/d' \
+      /srv/salt/ceph/configuration/files/rbd.conf
+}
+
+function ceph_test_librbd_can_be_run {
+  local TESTSCRIPT=/tmp/rbd_script.sh
+  local CLIENTNODE=$(_client_node)
+  cat << 'EOF' > $TESTSCRIPT
+set -e
+trap 'echo "Result: NOT_OK"' ERR
+set -x
+chmod a+r /etc/ceph/ceph.client.admin.keyring
+rpm -V ceph-test
+type ceph_test_librbd
+echo "Result: OK"
+EOF
+  _run_test_script_on_node $TESTSCRIPT $CLIENTNODE
+  echo "You can now run ceph_test_librbd on the client node"
+}
+
diff --git a/qa/deepsea/health-ok/common/rgw.sh b/qa/deepsea/health-ok/common/rgw.sh

new file mode 100644 (file)

index 0000000..b21db4f
--- /dev/null
+++ b/qa/deepsea/health-ok/common/rgw.sh
@@ -0,0 +1,129 @@
+#
+# This file is part of the DeepSea integration test suite
+#
+RGW_ROLE=rgw
+
+function rgw_demo_users {
+    local RGWSLS=/srv/salt/ceph/rgw/users/users.d/users.yml
+    cat << EOF >> $RGWSLS
+- { uid: "demo", name: "Demo", email: "demo@demo.nil" }
+- { uid: "demo1", name: "Demo1", email: "demo1@demo.nil" }
+EOF
+    cat $RGWSLS
+}
+
+function rgw_user_and_bucket_list {
+    #
+    # just list rgw users and buckets
+    #
+    local TESTSCRIPT=/tmp/rgw_user_and_bucket_list.sh
+    local RGWNODE=$(_first_x_node $RGW_ROLE)
+    cat << EOF > $TESTSCRIPT
+set -ex
+radosgw-admin user list
+radosgw-admin bucket list
+echo "Result: OK"
+EOF
+    _run_test_script_on_node $TESTSCRIPT $RGWNODE
+}
+
+function rgw_validate_system_user {
+    #
+    # prove the system user "admin" was really set up
+    #
+    local TESTSCRIPT=/tmp/rgw_validate_system_user.sh
+    local RGWNODE=$(_first_x_node $RGW_ROLE)
+    cat << EOF > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+radosgw-admin user info --uid=admin
+radosgw-admin user info --uid=admin | grep system | grep -q true
+echo "Result: OK"
+EOF
+    _run_test_script_on_node $TESTSCRIPT $RGWNODE
+}
+
+function rgw_validate_demo_users {
+    #
+    # prove the demo users from rgw_demo_users were really set up
+    #
+    local TESTSCRIPT=/tmp/rgw_validate_demo_users.sh
+    local RGWNODE=$(_first_x_node $RGW_ROLE)
+    cat << EOF > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+radosgw-admin user info --uid=demo
+radosgw-admin user info --uid=demo1
+echo "Result: OK"
+EOF
+    _run_test_script_on_node $TESTSCRIPT $RGWNODE
+}
+
+function rgw_curl_test {
+    local RGWNODE=$(_first_x_node $RGW_ROLE)
+    test -n "$SSL" && PROTOCOL="https" || PROTOCOL="http"
+    test -n "$SSL" && CURL_OPTS="--insecure"
+    set +x
+    for delay in 60 60 60 60 ; do
+        sudo zypper --non-interactive --gpg-auto-import-keys refresh && break
+        sleep $delay
+    done
+    set -x
+    zypper --non-interactive install --no-recommends curl libxml2-tools
+    # installing curl RPM causes ceph-radosgw and rsyslog services to need restart
+    salt-run state.orch ceph.restart.rgw 2>/dev/null
+    systemctl restart rsyslog.service
+    _zypper_ps
+    salt --no-color -C "I@roles:$RGW_ROLE" cmd.run 'systemctl | grep radosgw'
+    #RGWNODE=$(salt --no-color -C "I@roles:$RGW_ROLE" test.ping | grep -o -P '^\S+(?=:)' | head -1)
+    RGWXMLOUT=/tmp/rgw_test.xml
+    curl $CURL_OPTS "${PROTOCOL}://$RGWNODE" > $RGWXMLOUT
+    test -f $RGWXMLOUT
+    xmllint $RGWXMLOUT
+    grep anonymous $RGWXMLOUT
+    rm -f $RGWXMLOUT
+}
+
+function rgw_add_ssl_global {
+    local GLOBALYML=/srv/pillar/ceph/stack/global.yml
+    cat <<EOF >> $GLOBALYML
+rgw_init: default-ssl
+rgw_configurations:
+  rgw:
+    users:
+      - { uid: "admin", name: "Admin", email: "admin@demo.nil", system: True }
+  # when using only RGW& not ganesha ssl will have all the users of rgw already,
+  # but to be consistent we define atleast one user
+  rgw-ssl:
+    users:
+      - { uid: "admin", name: "Admin", email: "admin@demo.nil", system: True }
+EOF
+    cat $GLOBALYML
+}
+
+function rgw_ssl_init {
+    local CERTDIR=/srv/salt/ceph/rgw/cert
+    mkdir -p $CERTDIR
+    pushd $CERTDIR
+    openssl req -x509 -nodes -days 1095 -newkey rsa:4096 -keyout rgw.key -out rgw.crt -subj "/C=DE"
+    cat rgw.key > rgw.pem && cat rgw.crt >> rgw.pem
+    popd
+    rgw_add_ssl_global
+}
+
+function validate_rgw_cert_perm {
+    local TESTSCRIPT=/tmp/test_validate_rgw_cert_perm.sh
+    local RGWNODE=$(_first_x_node $RGW_ROLE)
+    cat << 'EOF' > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+RGW_PEM=/etc/ceph/rgw.pem
+test -f "$RGW_PEM"
+test "$(stat -c'%U' $RGW_PEM)" == "ceph"
+test "$(stat -c'%G' $RGW_PEM)" == "ceph"
+test "$(stat -c'%a' $RGW_PEM)" -eq 600
+echo "Result: OK"
+EOF
+    _run_test_script_on_node $TESTSCRIPT $RGWNODE
+}
+
diff --git a/qa/deepsea/health-ok/common/zypper.sh b/qa/deepsea/health-ok/common/zypper.sh

new file mode 100644 (file)

index 0000000..0abadb2
--- /dev/null
+++ b/qa/deepsea/health-ok/common/zypper.sh
@@ -0,0 +1,24 @@
+# This file is part of the DeepSea integration test suite
+
+#
+# zypper-specific helper functions
+#
+
+function _dump_salt_master_zypper_repos {
+    zypper lr -upEP
+}
+
+function _zypper_ref_on_master {
+    set +x
+    for delay in 60 60 60 60 ; do
+        zypper --non-interactive --gpg-auto-import-keys refresh && break
+        sleep $delay
+    done
+    set -x
+}
+
+function _zypper_install_on_master {
+    local PACKAGE=$1
+    zypper --non-interactive install --no-recommends $PACKAGE
+}
+
diff --git a/qa/deepsea/health-ok/health-ok.sh b/qa/deepsea/health-ok/health-ok.sh

new file mode 100755 (executable)

index 0000000..159303a
--- /dev/null
+++ b/qa/deepsea/health-ok/health-ok.sh
@@ -0,0 +1,202 @@
+#!/bin/bash
+#
+# DeepSea integration test "suites/basic/health-ok.sh"
+#
+# This script runs DeepSea stages 0-3 (or 0-4, depending on options) to deploy
+# a Ceph cluster (with various options to control the cluster configuration).
+# After the last stage completes, the script checks for HEALTH_OK.
+#
+# The script makes no assumptions beyond those listed in README.
+#
+# After HEALTH_OK is reached, the script also runs various sanity tests
+# depending on the options provided.
+#
+# On success (HEALTH_OK is reached, sanity tests pass), the script returns 0.
+# On failure, for whatever reason, the script returns non-zero.
+#
+# The script produces verbose output on stdout, which can be captured for later
+# forensic analysis.
+#
+
+set -e
+set +x
+
+SCRIPTNAME=$(basename ${0})
+BASEDIR=$(readlink -f "$(dirname ${0})")
+test -d $BASEDIR
+[[ $BASEDIR =~ \/health-ok$ ]]
+
+source $BASEDIR/common/common.sh
+
+function usage {
+    set +x
+    echo "$SCRIPTNAME - script for testing HEALTH_OK deployment"
+    echo "for use in SUSE Enterprise Storage testing"
+    echo
+    echo "Usage:"
+    echo "  $SCRIPTNAME [-h,--help] [--cli] [--client-nodes=X]"
+    echo "  [--mds] [--min-nodes=X] [--nfs-ganesha] [--no-update]"
+    echo "  [--openstack] [--profile=X] [--rbd] [--rgw] [--ssl]"
+    echo "  [--tuned=X]"
+    echo
+    echo "Options:"
+    echo "    --cli           Use DeepSea CLI"
+    echo "    --client-nodes  Number of client (non-cluster) nodes"
+    echo "    --help          Display this usage message"
+    echo "    --mds           Deploy MDS"
+    echo "    --min-nodes     Minimum number of nodes"
+    echo "    --nfs-ganesha   Deploy NFS-Ganesha"
+    echo "    --no-update     Use no-update-no-reboot Stage 0 alt default"
+    echo "    --openstack     Pre-create pools for OpenStack functests"
+    echo "    --profile       Storage/OSD profile (see below)"
+    echo "    --rbd           Modify ceph.conf for rbd integration testing"
+    echo "    --rgw           Deploy RGW"
+    echo "    --ssl           Deploy RGW with SSL"
+    echo "    --start-stage   Run stages from (defaults to 0)"
+    echo "    --teuthology    Provide this option when running via teuthology"
+    echo "    --tuned=on/off  Deploy tuned in Stage 3 (default: off)"
+    echo
+    echo "Supported storage/OSD profiles:"
+    echo "    default         Whatever is generated by Stage 1 (bluestore)"
+    echo "    dmcrypt         All encrypted OSDs"
+    echo "    filestore       All filestore OSDs"
+    echo "    random          A randomly chosen profile (teuthology/OVH only)"
+    echo "    <OTHER>         Any other value will be assumed to be the name"
+    echo "                    of an OSD profile in qa/osd-config/ovh"
+    exit 1
+}
+
+assert_enhanced_getopt
+
+TEMP=$(getopt -o h \
+--long "cli,client-nodes:,help,igw,mds,min-nodes:,nfs-ganesha,no-update,openstack,profile:,rbd,rgw,ssl,start-stage:,teuthology,tuned:" \
+-n 'health-ok.sh' -- "$@")
+
+if [ $? != 0 ] ; then echo "Terminating..." >&2 ; exit 1 ; fi
+
+# Note the quotes around TEMP': they are essential!
+eval set -- "$TEMP"
+
+# process command-line options
+CLI=""
+CLIENT_NODES=0
+STORAGE_PROFILE="default"
+CUSTOM_STORAGE_PROFILE=""
+MDS=""
+MIN_NODES=1
+OPENSTACK=""
+NFS_GANESHA=""
+NO_UPDATE=""
+RBD=""
+RGW=""
+SSL=""
+START_STAGE="0"
+TEUTHOLOGY=""
+TUNED="off"
+while true ; do
+    case "$1" in
+        --cli) CLI="$1" ; shift ;;
+        --client-nodes) shift ; CLIENT_NODES=$1 ; shift ;;
+        -h|--help) usage ;;    # does not return
+        --mds) MDS="$1" ; shift ;;
+        --min-nodes) shift ; MIN_NODES=$1 ; shift ;;
+        --nfs-ganesha) NFS_GANESHA="$1" ; shift ;;
+        --no-update) NO_UPDATE="$1" ; shift ;;
+        --openstack) OPENSTACK="$1" ; shift ;;
+        --profile) shift ; STORAGE_PROFILE=$1 ; shift ;;
+        --rbd) RBD="$1" ; shift ;;
+        --rgw) RGW="$1" ; shift ;;
+        --ssl) SSL="$1" ; shift ;;
+       --start-stage) shift ; START_STAGE=$1 ; shift ;;
+       --teuthology) TEUTHOLOGY="$1" ; shift ;;
+        --tuned) shift ; TUNED=$1 ; shift ;;
+        --) shift ; break ;;
+        *) echo "Internal error" ; exit 1 ;;
+    esac
+done
+if [ "$NFS_GANESHA" ] ; then
+    if [ -z "$MDS" -a -z "$RGW" ] ; then
+        echo "NFS-Ganesha requires either mds or rgw role, but neither was specified. Bailing out!"
+        exit 1
+    fi
+fi
+TUNED=${TUNED,,}
+case "$TUNED" in
+    on) ;;
+    off) TUNED='' ;;
+    *) echo "Bad value ->$TUNED<- passed with --tuned. Bailing out!" ; exit 1 ;;
+esac
+echo "WWWW"
+echo "health-ok.sh running with the following configuration:"
+test -n "$CLI" && echo "- CLI"
+echo "- CLIENT_NODES ->$CLIENT_NODES<-"
+echo "- MIN_NODES ->$MIN_NODES<-"
+test -n "$MDS" && echo "- MDS"
+test -n "$NFS_GANESHA" && echo "- NFS-Ganesha"
+test -n "$OPENSTACK" && echo "- OpenStack test pools will be pre-created"
+echo "- PROFILE ->$STORAGE_PROFILE<-"
+test -n "$RBD" && echo "- RBD"
+test -n "$RGW" && echo "- RGW"
+test -n "$SSL" && echo "- SSL"
+echo "- Start Stage ->$START_STAGE<-"
+test -n "$TEUTHOLOGY" && echo "- TEUTHOLOGY"
+echo -n "- TUNED: "
+test -n "$TUNED" && echo "ON"
+test -z "$TUNED" && echo "OFF"
+echo -n "Stage 0 update: "
+test -n "$NO_UPDATE" && echo "disabled" || echo "enabled"
+set -x
+
+# deploy phase
+deploy_ceph
+
+# verification phase
+ceph_health_test
+test "$STORAGE_NODES" = "$(number_of_hosts_in_ceph_osd_tree)"
+#salt -I roles:storage osd.report 2>/dev/null
+
+# test phase
+REPEAT_STAGE_0=""
+ceph_log_grep_enoent_eaccess
+test_systemd_ceph_osd_target_wants
+#rados_write_test
+#ceph_version_test
+if [ -n "$RGW" ] ; then
+    rgw_curl_test
+    test -n "$SSL" && validate_rgw_cert_perm
+    rgw_user_and_bucket_list
+    rgw_validate_system_user
+    rgw_validate_demo_users
+fi
+test -n "$MDS" -a "$CLIENT_NODES" -ge 1 && cephfs_mount_and_sanity_test
+if [ "$NFS_GANESHA" ] ; then
+    for v in "" "3" "4" ; do
+        echo "Testing NFS-Ganesha with NFS version ->$v<-"
+        if [ "$RGW" -a "$v" = "3" ] ; then
+            echo "Not testing RGW FSAL on NFSv3"
+            continue
+        else
+            nfs_ganesha_mount "$v"
+        fi
+        if [ "$MDS" ] ; then
+            nfs_ganesha_write_test cephfs "$v"
+        fi
+        if [ "$RGW" ] ; then
+            if [ "$v" = "3" ] ; then
+                echo "Not testing RGW FSAL on NFSv3"
+            else
+                rgw_curl_test
+                rgw_user_and_bucket_list
+                rgw_validate_demo_users
+                nfs_ganesha_write_test rgw "$v"
+            fi
+        fi
+        nfs_ganesha_umount
+        sleep 10
+    done
+    REPEAT_STAGE_0="yes, please"
+fi
+test "$REPEAT_STAGE_0" && run_stage_0 "$CLI" # exercise ceph.restart orchestration
+
+echo "YYYY"
+echo "health-ok test result: PASS"
diff --git a/qa/deepsea/health-ok/stage-5.sh b/qa/deepsea/health-ok/stage-5.sh

new file mode 100755 (executable)

index 0000000..fa63516
--- /dev/null
+++ b/qa/deepsea/health-ok/stage-5.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+#
+# DeepSea integration test "suites/basic/stage-5.sh"
+#
+# This script runs DeepSea stages 2 and 5 to remove a storage-only node from
+# an existing Ceph cluster.
+#
+# In addition to the assumptions contained in README, this script assumes
+# that:
+# 1. DeepSea has already been used to deploy a cluster,
+# 2. the cluster has at least one "storage-only" node (i.e. a node with role
+#    "storage" and no other roles (except possibly "admin")), and
+# 3. the cluster will be able to reach HEALTH_OK after one storage-only node
+#    is dropped (typically this means the cluster needs at least 3 storage
+#    nodes to start with).
+#
+# On success (HEALTH_OK is reached, number of storage nodes went down by 1,
+# number of OSDs decreased), the script returns 0. On failure, for whatever
+# reason, the script returns non-zero.
+#
+# The script produces verbose output on stdout, which can be captured for later
+# forensic analysis.
+#
+
+set -e
+set +x
+
+SCRIPTNAME=$(basename ${0})
+BASEDIR=$(readlink -f "$(dirname ${0})")
+test -d $BASEDIR
+[[ $BASEDIR =~ \/health-ok$ ]]
+
+source $BASEDIR/common/common.sh
+
+function usage {
+    set +x
+    echo "$SCRIPTNAME - script for testing HEALTH_OK deployment"
+    echo "for use in SUSE Enterprise Storage testing"
+    echo
+    echo "Usage:"
+    echo "  $SCRIPTNAME [-h,--help] [--cli]"
+    echo
+    echo "Options:"
+    echo "    --cli           Use DeepSea CLI"
+    echo "    --help          Display this usage message"
+    exit 1
+}
+
+assert_enhanced_getopt
+
+TEMP=$(getopt -o h \
+--long "cli,help" \
+-n 'health-ok.sh' -- "$@")
+
+if [ $? != 0 ] ; then echo "Terminating..." >&2 ; exit 1 ; fi
+
+# Note the quotes around TEMP': they are essential!
+eval set -- "$TEMP"
+
+# process command-line options
+CLI=""
+while true ; do
+    case "$1" in
+        --cli) CLI="$1" ; shift ;;
+        -h|--help) usage ;;    # does not return
+        --) shift ; break ;;
+        *) echo "Internal error" ; exit 1 ;;
+    esac
+done
+echo "WWWW"
+echo "stage-5.sh running with the following configuration:"
+test -n "$CLI" && echo "- CLI"
+set -x
+
+# double-check there is a healthy cluster
+ceph_health_test
+STORAGE_NODES_BEFORE=$(number_of_hosts_in_ceph_osd_tree)
+OSDS_BEFORE=$(number_of_osds_in_ceph_osd_tree)
+test "$STORAGE_NODES_BEFORE"
+test "$OSDS_BEFORE"
+test "$STORAGE_NODES_BEFORE" -gt 1
+test "$OSDS_BEFORE" -gt 0
+
+# modify storage profile
+STORAGE_PROFILE=$(storage_profile_from_policy_cfg)
+FIRST_STORAGE_ONLY_NODE=$(_first_storage_only_node)
+ls -lR $PROPOSALSDIR
+PROPOSALS_BEFORE=$(find $PROPOSALSDIR -name \*$FIRST_STORAGE_ONLY_NODE\* | wc --lines)
+policy_remove_storage_node $FIRST_STORAGE_ONLY_NODE
+ls -lR $PROPOSALSDIR
+PROPOSALS_AFTER=$(find $PROPOSALSDIR -name \*$FIRST_STORAGE_ONLY_NODE\* | wc --lines)
+
+# run stages 2 and 5
+run_stage_2 "$CLI"
+ceph_cluster_status
+run_stage_5 "$CLI"
+ceph_cluster_status
+
+# verification phase
+ceph_health_test
+STORAGE_NODES_AFTER=$(number_of_hosts_in_ceph_osd_tree)
+OSDS_AFTER=$(number_of_osds_in_ceph_osd_tree)
+test "$STORAGE_NODES_BEFORE"
+test "$OSDS_BEFORE"
+test "$STORAGE_NODES_AFTER" -eq "$((STORAGE_NODES_BEFORE - 1))"
+test "$OSDS_AFTER" -lt "$OSDS_BEFORE"
+
+## osd.report for good measure
+#salt -I roles:storage osd.report 2>/dev/null
+
+echo "YYYY"
+echo "stage-5 test result: PASS"
diff --git a/qa/deepsea/nodes/1node.yaml b/qa/deepsea/nodes/1node.yaml

new file mode 100644 (file)

index 0000000..aaaf43d
--- /dev/null
+++ b/qa/deepsea/nodes/1node.yaml
@@ -0,0 +1,2 @@
+roles:
+- [client.salt_master]
diff --git a/qa/deepsea/nodes/20nodes.yaml b/qa/deepsea/nodes/20nodes.yaml

new file mode 100644 (file)

index 0000000..1704ce7
--- /dev/null
+++ b/qa/deepsea/nodes/20nodes.yaml
@@ -0,0 +1,21 @@
+roles:
+- [client.salt_master, node.0]
+- [node.1]
+- [node.2]
+- [node.3]
+- [node.4]
+- [node.5]
+- [node.6]
+- [node.7]
+- [node.8]
+- [node.9]
+- [node.10]
+- [node.11]
+- [node.12]
+- [node.13]
+- [node.14]
+- [node.15]
+- [node.16]
+- [node.17]
+- [node.18]
+- [node.19]
diff --git a/qa/deepsea/nodes/2nodes.yaml b/qa/deepsea/nodes/2nodes.yaml

new file mode 100644 (file)

index 0000000..c71b410
--- /dev/null
+++ b/qa/deepsea/nodes/2nodes.yaml
@@ -0,0 +1,3 @@
+roles:
+- [client.salt_master]
+- [node.1]
diff --git a/qa/deepsea/nodes/3nodes.yaml b/qa/deepsea/nodes/3nodes.yaml

new file mode 100644 (file)

index 0000000..32d7cb2
--- /dev/null
+++ b/qa/deepsea/nodes/3nodes.yaml
@@ -0,0 +1,4 @@
+roles:
+- [client.salt_master]
+- [node.1]
+- [node.2]
diff --git a/qa/deepsea/nodes/4nodes.yaml b/qa/deepsea/nodes/4nodes.yaml

new file mode 100644 (file)

index 0000000..fbda8ee
--- /dev/null
+++ b/qa/deepsea/nodes/4nodes.yaml
@@ -0,0 +1,5 @@
+roles:
+- [client.salt_master]
+- [node.1]
+- [node.2]
+- [node.3]
diff --git a/qa/deepsea/nodes/5nodes.yaml b/qa/deepsea/nodes/5nodes.yaml

new file mode 100644 (file)

index 0000000..33c023a
--- /dev/null
+++ b/qa/deepsea/nodes/5nodes.yaml
@@ -0,0 +1,6 @@
+roles:
+- [client.salt_master, node.0]
+- [node.1]
+- [node.2]
+- [node.3]
+- [node.4]
diff --git a/qa/deepsea/salt.yaml b/qa/deepsea/salt.yaml

new file mode 100644 (file)

index 0000000..addcc5a
--- /dev/null
+++ b/qa/deepsea/salt.yaml
@@ -0,0 +1,6 @@
+tasks:
+- clock:
+- install:
+    install_ceph_packages: false
+    extra_system_packages: [salt, salt-master, salt-minion, salt-api]
+- salt:
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_db.yaml b/qa/deepsea/storage-profiles/bs_dedicated_db.yaml

new file mode 100644 (file)

index 0000000..78114bf
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_db.yaml
@@ -0,0 +1,15 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                db: /dev/vde
+              /dev/vdc:
+                format: bluestore
+                db: /dev/vde
+              /dev/vdd:
+                format: bluestore
+                db: /dev/vde
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_db_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_db_crypt.yaml

new file mode 100644 (file)

index 0000000..a9ea7b7
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_db_crypt.yaml
@@ -0,0 +1,18 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                encryption: dmcrypt
+                db: /dev/vde
+              /dev/vdc:
+                format: bluestore
+                encryption: dmcrypt
+                db: /dev/vde
+              /dev/vdd:
+                format: bluestore
+                encryption: dmcrypt
+                db: /dev/vde
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_db_sizes.yaml b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes.yaml

new file mode 100644 (file)

index 0000000..34a8d5f
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes.yaml
@@ -0,0 +1,18 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                db: /dev/vde
+                db_size: 1G
+              /dev/vdc:
+                format: bluestore
+                db: /dev/vde
+                db_size: 2G
+              /dev/vdd:
+                format: bluestore
+                db: /dev/vde
+                db_size: 3G
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_crypt.yaml

new file mode 100644 (file)

index 0000000..3e08f56
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_crypt.yaml
@@ -0,0 +1,21 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                encryption: dmcrypt
+                db: /dev/vde
+                db_size: 1G
+              /dev/vdc:
+                format: bluestore
+                encryption: dmcrypt
+                db: /dev/vde
+                db_size: 2G
+              /dev/vdd:
+                format: bluestore
+                encryption: dmcrypt
+                db: /dev/vde
+                db_size: 3G
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed.yaml b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed.yaml

new file mode 100644 (file)

index 0000000..4f838bb
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed.yaml
@@ -0,0 +1,17 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                db: /dev/vde
+                db_size: 1G
+              /dev/vdc:
+                format: bluestore
+                db: /dev/vde
+                db_size: 2G
+              /dev/vdd:
+                format: bluestore
+                db: /dev/vde
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed_crypt.yaml

new file mode 100644 (file)

index 0000000..4f2f60e
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed_crypt.yaml
@@ -0,0 +1,20 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                encryption: dmcrypt
+                db: /dev/vde
+                db_size: 1G
+              /dev/vdc:
+                format: bluestore
+                encryption: dmcrypt
+                db: /dev/vde
+                db_size: 2G
+              /dev/vdd:
+                format: bluestore
+                encryption: dmcrypt
+                db: /dev/vde
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal.yaml

new file mode 100644 (file)

index 0000000..7f6093d
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_wal.yaml
@@ -0,0 +1,15 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                wal: /dev/vde
+              /dev/vdc:
+                format: bluestore
+                wal: /dev/vde
+              /dev/vdd:
+                format: bluestore
+                wal: /dev/vde
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_crypt.yaml

new file mode 100644 (file)

index 0000000..df5e5eb
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_crypt.yaml
@@ -0,0 +1,18 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                encryption: dmcrypt
+                wal: /dev/vde
+              /dev/vdc:
+                format: bluestore
+                encryption: dmcrypt
+                wal: /dev/vde
+              /dev/vdd:
+                format: bluestore
+                encryption: dmcrypt
+                wal: /dev/vde
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_db.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_db.yaml

new file mode 100644 (file)

index 0000000..61daf1a
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_db.yaml
@@ -0,0 +1,14 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                wal: /dev/vde
+                db: /dev/vdd
+              /dev/vdc:
+                format: bluestore
+                wal: /dev/vde
+                db: /dev/vdd
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_db_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_crypt.yaml

new file mode 100644 (file)

index 0000000..07ea6bf
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_crypt.yaml
@@ -0,0 +1,16 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                encryption: dmcrypt
+                wal: /dev/vde
+                db: /dev/vdd
+              /dev/vdc:
+                format: bluestore
+                encryption: dmcrypt
+                wal: /dev/vde
+                db: /dev/vdd
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all.yaml

new file mode 100644 (file)

index 0000000..8693a35
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all.yaml
@@ -0,0 +1,18 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                wal_size: 1G
+                wal: /dev/vde
+                db: /dev/vdd
+                db_size: 2G
+              /dev/vdc:
+                format: bluestore
+                wal: /dev/vde
+                db: /dev/vdd
+                wal_size: 3G
+                db_size: 4G
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all_crypt.yaml

new file mode 100644 (file)

index 0000000..a9c4aec
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all_crypt.yaml
@@ -0,0 +1,20 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                encryption: dmcrypt
+                wal_size: 1G
+                wal: /dev/vde
+                db: /dev/vdd
+                db_size: 2G
+              /dev/vdc:
+                format: bluestore
+                encryption: dmcrypt
+                wal: /dev/vde
+                db: /dev/vdd
+                wal_size: 3G
+                db_size: 4G
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed.yaml

new file mode 100644 (file)

index 0000000..c4f2e14
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed.yaml
@@ -0,0 +1,16 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                wal: /dev/vde
+                db: /dev/vdd
+              /dev/vdc:
+                format: bluestore
+                wal: /dev/vde
+                db: /dev/vdd
+                wal_size: 3G
+                db_size: 4G
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed_crypt.yaml

new file mode 100644 (file)

index 0000000..9a1f408
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed_crypt.yaml
@@ -0,0 +1,18 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                encryption: dmcrypt
+                wal: /dev/vde
+                db: /dev/vdd
+              /dev/vdc:
+                format: bluestore
+                encryption: dmcrypt
+                wal: /dev/vde
+                db: /dev/vdd
+                wal_size: 3G
+                db_size: 4G
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes.yaml

new file mode 100644 (file)

index 0000000..b22f896
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes.yaml
@@ -0,0 +1,18 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                wal_size: 1G
+                wal: /dev/vde
+              /dev/vdc:
+                format: bluestore
+                wal: /dev/vde
+                wal_size: 2G
+              /dev/vdd:
+                format: bluestore
+                wal: /dev/vde
+                wal_size: 3G
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_crypt.yaml

new file mode 100644 (file)

index 0000000..b5c02df
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_crypt.yaml
@@ -0,0 +1,21 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                encryption: dmcrypt
+                wal_size: 1G
+                wal: /dev/vde
+              /dev/vdc:
+                format: bluestore
+                encryption: dmcrypt
+                wal: /dev/vde
+                wal_size: 2G
+              /dev/vdd:
+                format: bluestore
+                encryption: dmcrypt
+                wal: /dev/vde
+                wal_size: 3G
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed.yaml

new file mode 100644 (file)

index 0000000..0897b6e
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed.yaml
@@ -0,0 +1,17 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                wal_size: 1G
+                wal: /dev/vde
+              /dev/vdc:
+                format: bluestore
+                wal: /dev/vde
+                wal_size: 2G
+              /dev/vdd:
+                format: bluestore
+                wal: /dev/vde
diff --git a/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed_crypt.yaml b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed_crypt.yaml

new file mode 100644 (file)

index 0000000..f4c8033
--- /dev/null
+++ b/qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed_crypt.yaml
@@ -0,0 +1,20 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: bluestore
+                encryption: dmcrypt
+                wal_size: 1G
+                wal: /dev/vde
+              /dev/vdc:
+                format: bluestore
+                encryption: dmcrypt
+                wal: /dev/vde
+                wal_size: 2G
+              /dev/vdd:
+                format: bluestore
+                encryption: dmcrypt
+                wal: /dev/vde
diff --git a/qa/deepsea/storage-profiles/fs_dedicated_journal.yaml b/qa/deepsea/storage-profiles/fs_dedicated_journal.yaml

new file mode 100644 (file)

index 0000000..0b5b251
--- /dev/null
+++ b/qa/deepsea/storage-profiles/fs_dedicated_journal.yaml
@@ -0,0 +1,15 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: filestore
+                journal: /dev/vde
+              /dev/vdc:
+                format: filestore
+                journal: /dev/vde
+              /dev/vdd:
+                format: filestore
+                journal: /dev/vde
diff --git a/qa/deepsea/storage-profiles/fs_dedicated_journal_crypt.yaml b/qa/deepsea/storage-profiles/fs_dedicated_journal_crypt.yaml

new file mode 100644 (file)

index 0000000..6c7d500
--- /dev/null
+++ b/qa/deepsea/storage-profiles/fs_dedicated_journal_crypt.yaml
@@ -0,0 +1,18 @@
+overrides:
+  deepsea:
+    storage_profile:
+        ceph:
+          storage:
+            osds:
+              /dev/vdb:
+                format: filestore
+                encryption: dmcrypt
+                journal: /dev/vde
+              /dev/vdc:
+                format: filestore
+                encryption: dmcrypt
+                journal: /dev/vde
+              /dev/vdd:
+                format: filestore
+                encryption: dmcrypt
+                journal: /dev/vde
diff --git a/qa/suites/deepsea/.qa b/qa/suites/deepsea/.qa

new file mode 120000 (symlink)

index 0000000..fea2489
--- /dev/null
+++ b/qa/suites/deepsea/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier0/.qa b/qa/suites/deepsea/tier0/.qa

new file mode 120000 (symlink)

index 0000000..fea2489
--- /dev/null
+++ b/qa/suites/deepsea/tier0/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier0/salt/% b/qa/suites/deepsea/tier0/salt/%

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/qa/suites/deepsea/tier0/salt/.qa b/qa/suites/deepsea/tier0/salt/.qa

new file mode 120000 (symlink)

index 0000000..fea2489
--- /dev/null
+++ b/qa/suites/deepsea/tier0/salt/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier0/salt/0-salt.yaml b/qa/suites/deepsea/tier0/salt/0-salt.yaml

new file mode 120000 (symlink)

index 0000000..4ee5639
--- /dev/null
+++ b/qa/suites/deepsea/tier0/salt/0-salt.yaml
@@ -0,0 +1 @@
+.qa/deepsea/salt.yaml
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier0/salt/boilerplate b/qa/suites/deepsea/tier0/salt/boilerplate

new file mode 120000 (symlink)

index 0000000..a1e87ef
--- /dev/null
+++ b/qa/suites/deepsea/tier0/salt/boilerplate
@@ -0,0 +1 @@
+.qa/deepsea/boilerplate/
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier0/salt/cluster/+ b/qa/suites/deepsea/tier0/salt/cluster/+

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/qa/suites/deepsea/tier0/salt/cluster/.qa b/qa/suites/deepsea/tier0/salt/cluster/.qa

new file mode 120000 (symlink)

index 0000000..fea2489
--- /dev/null
+++ b/qa/suites/deepsea/tier0/salt/cluster/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier0/salt/cluster/1disk.yaml b/qa/suites/deepsea/tier0/salt/cluster/1disk.yaml

new file mode 120000 (symlink)

index 0000000..d94d2f8
--- /dev/null
+++ b/qa/suites/deepsea/tier0/salt/cluster/1disk.yaml
@@ -0,0 +1 @@
+.qa/deepsea/disks/1disk.yaml
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier0/salt/cluster/1node.yaml b/qa/suites/deepsea/tier0/salt/cluster/1node.yaml

new file mode 120000 (symlink)

index 0000000..ebfbfef
--- /dev/null
+++ b/qa/suites/deepsea/tier0/salt/cluster/1node.yaml
@@ -0,0 +1 @@
+.qa/deepsea/nodes/1node.yaml
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier0/salt/distros b/qa/suites/deepsea/tier0/salt/distros

new file mode 120000 (symlink)

index 0000000..337a606
--- /dev/null
+++ b/qa/suites/deepsea/tier0/salt/distros
@@ -0,0 +1 @@
+.qa/deepsea/distros/
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier1/.qa b/qa/suites/deepsea/tier1/.qa

new file mode 120000 (symlink)

index 0000000..fea2489
--- /dev/null
+++ b/qa/suites/deepsea/tier1/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier1/health-ok/% b/qa/suites/deepsea/tier1/health-ok/%

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/qa/suites/deepsea/tier1/health-ok/.qa b/qa/suites/deepsea/tier1/health-ok/.qa

new file mode 120000 (symlink)

index 0000000..fea2489
--- /dev/null
+++ b/qa/suites/deepsea/tier1/health-ok/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier1/health-ok/0-salt.yaml b/qa/suites/deepsea/tier1/health-ok/0-salt.yaml

new file mode 120000 (symlink)

index 0000000..4ee5639
--- /dev/null
+++ b/qa/suites/deepsea/tier1/health-ok/0-salt.yaml
@@ -0,0 +1 @@
+.qa/deepsea/salt.yaml
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier1/health-ok/1-deploy-phase.yaml b/qa/suites/deepsea/tier1/health-ok/1-deploy-phase.yaml

new file mode 120000 (symlink)

index 0000000..d1c469a
--- /dev/null
+++ b/qa/suites/deepsea/tier1/health-ok/1-deploy-phase.yaml
@@ -0,0 +1 @@
+.qa/deepsea/deepsea.yaml
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier1/health-ok/2-test-phase.yaml b/qa/suites/deepsea/tier1/health-ok/2-test-phase.yaml

new file mode 100644 (file)

index 0000000..1f0c720
--- /dev/null
+++ b/qa/suites/deepsea/tier1/health-ok/2-test-phase.yaml
@@ -0,0 +1,8 @@
+tasks:
+        - exec:
+                client.salt_master:
+                        - 'ceph -s'
+        - deepsea.validation:
+        - deepsea.toolbox:
+                assert_bluestore:
+                        osd.0:
diff --git a/qa/suites/deepsea/tier1/health-ok/boilerplate b/qa/suites/deepsea/tier1/health-ok/boilerplate

new file mode 120000 (symlink)

index 0000000..a1e87ef
--- /dev/null
+++ b/qa/suites/deepsea/tier1/health-ok/boilerplate
@@ -0,0 +1 @@
+.qa/deepsea/boilerplate/
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier1/health-ok/cluster/+ b/qa/suites/deepsea/tier1/health-ok/cluster/+

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/qa/suites/deepsea/tier1/health-ok/cluster/.qa b/qa/suites/deepsea/tier1/health-ok/cluster/.qa

new file mode 120000 (symlink)

index 0000000..fea2489
--- /dev/null
+++ b/qa/suites/deepsea/tier1/health-ok/cluster/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier1/health-ok/cluster/4disks.yaml b/qa/suites/deepsea/tier1/health-ok/cluster/4disks.yaml

new file mode 120000 (symlink)

index 0000000..e21aaff
--- /dev/null
+++ b/qa/suites/deepsea/tier1/health-ok/cluster/4disks.yaml
@@ -0,0 +1 @@
+.qa/deepsea/disks/4disks.yaml
+\ No newline at end of file
diff --git a/qa/suites/deepsea/tier1/health-ok/cluster/roles.yaml b/qa/suites/deepsea/tier1/health-ok/cluster/roles.yaml

new file mode 100644 (file)

index 0000000..f42d43c
--- /dev/null
+++ b/qa/suites/deepsea/tier1/health-ok/cluster/roles.yaml
@@ -0,0 +1,2 @@
+roles:
+  - [client.salt_master, mon.a, mgr.x, osd.0, prometheus.p, grafana.g]
diff --git a/qa/suites/deepsea/tier1/health-ok/deepsea_cli_off.yaml b/qa/suites/deepsea/tier1/health-ok/deepsea_cli_off.yaml

new file mode 100644 (file)

index 0000000..a2beb7f
--- /dev/null
+++ b/qa/suites/deepsea/tier1/health-ok/deepsea_cli_off.yaml
@@ -0,0 +1,3 @@
+overrides:
+  deepsea:
+    cli: false
diff --git a/qa/suites/deepsea/tier1/health-ok/distros b/qa/suites/deepsea/tier1/health-ok/distros

new file mode 120000 (symlink)

index 0000000..513ff61
--- /dev/null
+++ b/qa/suites/deepsea/tier1/health-ok/distros
@@ -0,0 +1 @@
+.qa/deepsea/distros
+\ No newline at end of file
diff --git a/qa/suites/suse/.qa b/qa/suites/suse/.qa

new file mode 120000 (symlink)

index 0000000..fea2489
--- /dev/null
+++ b/qa/suites/suse/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/suse/tier0 b/qa/suites/suse/tier0

new file mode 120000 (symlink)

index 0000000..06233b1
--- /dev/null
+++ b/qa/suites/suse/tier0
@@ -0,0 +1 @@
+../deepsea/tier0
+\ No newline at end of file
diff --git a/qa/suites/suse/tier1 b/qa/suites/suse/tier1

new file mode 120000 (symlink)

index 0000000..5be3d9a
--- /dev/null
+++ b/qa/suites/suse/tier1
@@ -0,0 +1 @@
+../deepsea/tier1
+\ No newline at end of file
diff --git a/qa/tasks/deepsea.py b/qa/tasks/deepsea.py

new file mode 100644 (file)

index 0000000..3c81eb5
--- /dev/null
+++ b/qa/tasks/deepsea.py
@@ -0,0 +1,2019 @@
+"""
+Task (and subtasks) for automating deployment of Ceph using DeepSea
+
+Linter:
+    flake8 --max-line-length=100
+"""
+import logging
+import time
+import yaml
+
+from salt_manager import SaltManager
+from scripts import Scripts
+from teuthology import misc
+from util import (
+    copy_directory_recursively,
+    enumerate_osds,
+    get_remote_for_role,
+    get_rpm_pkg_version,
+    introspect_roles,
+    remote_exec,
+    remote_run_script_as_root,
+    sudo_append_to_file,
+    )
+
+from teuthology.exceptions import (
+    CommandFailedError,
+    ConfigError,
+    )
+from teuthology.orchestra import run
+from teuthology.task import Task
+from teuthology.contextutil import safe_while
+
+log = logging.getLogger(__name__)
+deepsea_ctx = {}
+proposals_dir = "/srv/pillar/ceph/proposals"
+reboot_tries = 30
+
+
+def anchored(log_message):
+    global deepsea_ctx
+    assert 'log_anchor' in deepsea_ctx, "deepsea_ctx not populated"
+    return "{}{}".format(deepsea_ctx['log_anchor'], log_message)
+
+
+def dump_file_that_might_not_exist(remote, fpath):
+    try:
+        remote.run(args="cat {}".format(fpath))
+    except CommandFailedError:
+        pass
+
+
+class DeepSea(Task):
+    """
+    Install DeepSea on the Salt Master node.
+
+    Assumes a Salt cluster is already running (use the Salt task to achieve
+    this).
+
+    This task understands the following config keys which apply to
+    this task and all its subtasks:
+
+        allow_python2:  (default: True)
+                        whether to continue if Python 2 is installed anywhere
+                        in the test cluster
+        alternative_defaults: (default: empty)
+                        a dictionary of DeepSea alternative defaults
+                        to be activated via the Salt Pillar
+        cli:
+            true        deepsea CLI will be used (the default)
+            false       deepsea CLI will not be used
+        dashboard_ssl:
+            true        deploy MGR dashboard module with SSL (the default)
+            false       deploy MGR dashboard module *without* SSL
+        log_anchor      a string (default: "WWWW: ") which will precede
+                        log messages emitted at key points during the
+                        deployment
+        quiet_salt:
+            true        suppress stderr on salt commands (the default)
+            false       let salt commands spam the log
+        rgw_ssl:
+            true        use SSL if RGW is deployed
+            false       if RGW is deployed, do not use SSL (the default)
+        drive_group:
+            default     if a teuthology osd role is present on a node,
+                        DeepSea will tell ceph-volume to make all available
+                        disks into standalone OSDs
+            teuthology  populate DeepSea storage profile for 1:1 mapping
+                        between teuthology osd roles and actual osds
+                        deployed (the default, but not yet implemented)
+            (dict)      a dictionary is assumed to be a custom drive group
+                        (yaml blob) to be passed verbatim to ceph-volume
+
+    This task also understands the following config keys that affect
+    the behavior of just this one task (no effect on subtasks):
+
+        repo: (git repo for initial DeepSea install, e.g.
+              "https://github.com/SUSE/DeepSea.git")
+        branch: (git branch for initial deepsea install, e.g. "master")
+        install:
+            package|pkg deepsea will be installed via package system
+            source|src  deepsea will be installed via 'make install' (default)
+        upgrade_install:
+            package|pkg post-upgrade deepsea will be installed via package system
+            source|src  post-upgrade deepsea will be installed via 'make install' (default)
+        upgrade_repo: (git repo for DeepSea re-install/upgrade - used by second
+                      invocation of deepsea task only)
+        upgrade_branch: (git branch for DeepSea re-install/upgrade - used by
+                        second invocation of deepsea task only)
+
+    Example:
+
+        tasks
+        - deepsea:
+            repo: https://github.com/SUSE/DeepSea.git
+            branch: wip-foo
+            install: source
+
+    :param ctx: the argparse.Namespace object
+    :param config: the config dict
+    """
+
+    err_prefix = "(deepsea task) "
+
+    log_anchor_str = "WWWW: "
+
+    def __init__(self, ctx, config):
+        global deepsea_ctx
+        super(DeepSea, self).__init__(ctx, config)
+        if deepsea_ctx:
+            # context already populated (we are in a subtask, or a
+            # re-invocation of the deepsea task)
+            self.log = deepsea_ctx['logger_obj']
+            if type(self).__name__ == 'DeepSea':
+                # The only valid reason for a second invocation of the deepsea
+                # task is to upgrade DeepSea (actually reinstall it)
+                deepsea_ctx['reinstall_deepsea'] = True
+                # deepsea_ctx['install_method'] is the _initial_ install method from the
+                # first invocation. If initial install was from package, the
+                # package must be removed for reinstall from source to work.
+                # If reinstall method is 'package', removing the package here
+                # will not hurt anything.
+                if deepsea_ctx['install_method'] == 'package':
+                    deepsea_ctx['master_remote'].run(args=[
+                        'sudo',
+                        'zypper',
+                        '--non-interactive',
+                        '--no-gpg-checks',
+                        'remove',
+                        'deepsea',
+                        'deepsea-qa',
+                        run.Raw('||'),
+                        'true'
+                        ])
+                install_key = 'install'
+                upgrade_install = self.config.get('upgrade_install', '')
+                if upgrade_install:
+                    install_key = 'upgrade_install'
+                self.__populate_install_method_basic(install_key)
+        if not deepsea_ctx:
+            # populating context (we are *not* in a subtask)
+            deepsea_ctx['logger_obj'] = log
+            self.ctx['roles'] = self.ctx.config['roles']
+            self.log = log
+            self._populate_deepsea_context()
+            introspect_roles(self.ctx, self.log, quiet=False)
+        self.allow_python2 = deepsea_ctx['allow_python2']
+        self.alternative_defaults = deepsea_ctx['alternative_defaults']
+        self.dashboard_ssl = deepsea_ctx['dashboard_ssl']
+        self.deepsea_cli = deepsea_ctx['cli']
+        self.dev_env = self.ctx['dev_env']
+        self.install_method = deepsea_ctx['install_method']
+        self.log_anchor = deepsea_ctx['log_anchor']
+        self.master_remote = deepsea_ctx['master_remote']
+        self.nodes = self.ctx['nodes']
+        self.nodes_storage = self.ctx['nodes_storage']
+        self.nodes_storage_only = self.ctx['nodes_storage_only']
+        self.quiet_salt = deepsea_ctx['quiet_salt']
+        self.remotes = self.ctx['remotes']
+        self.reinstall_deepsea = deepsea_ctx.get('reinstall_deepsea', False)
+        self.repositories = deepsea_ctx['repositories']
+        self.rgw_ssl = deepsea_ctx['rgw_ssl']
+        self.roles = self.ctx['roles']
+        self.role_types = self.ctx['role_types']
+        self.role_lookup_table = self.ctx['role_lookup_table']
+        self.scripts = Scripts(self.ctx, self.log)
+        self.sm = deepsea_ctx['salt_manager_instance']
+        self.drive_group = deepsea_ctx['drive_group']
+        # self.log.debug("ctx.config {}".format(ctx.config))
+        # self.log.debug("deepsea context: {}".format(deepsea_ctx))
+
+    def __install_deepsea_from_source(self):
+        info_msg_prefix = 'Reinstalling' if self.reinstall_deepsea else 'Installing'
+        info_msg = info_msg_prefix + ' deepsea from source'
+        self.log.info(anchored(info_msg))
+        if self.sm.master_rpm_q('deepsea'):
+            self.log.info("DeepSea already installed from RPM")
+            return None
+        upgrade_repo = self.config.get('upgrade_repo', '')
+        upgrade_branch = self.config.get('upgrade_branch', '')
+        repo = self.config.get('repo', 'https://github.com/SUSE/DeepSea.git')
+        branch = self.config.get('branch', 'master')
+        if self.reinstall_deepsea:
+            if upgrade_repo:
+                repo = upgrade_repo
+            if upgrade_branch:
+                branch = upgrade_branch
+        self.log.info(
+            "{} - repo: {}, branch: {}"
+            .format(info_msg, repo, branch)
+            )
+        self.master_remote.run(args=[
+            'sudo',
+            'rm',
+            '-rf',
+            'DeepSea',
+            run.Raw(';'),
+            'git',
+            '--version',
+            run.Raw(';'),
+            'git',
+            'clone',
+            '--branch',
+            branch,
+            repo,
+            run.Raw(';'),
+            'cd',
+            'DeepSea',
+            run.Raw(';'),
+            'git',
+            'rev-parse',
+            '--abbrev-ref',
+            'HEAD',
+            run.Raw(';'),
+            'git',
+            'rev-parse',
+            'HEAD',
+            run.Raw(';'),
+            'git',
+            'describe',
+            run.Raw('||'),
+            'true',
+            ])
+        self.log.info("Running \"make install\" in DeepSea clone...")
+        self.master_remote.run(args=[
+            'cd',
+            'DeepSea',
+            run.Raw(';'),
+            'sudo',
+            'make',
+            'install',
+            ])
+        self.log.info("installing deepsea dependencies...")
+        rpmspec_cmd = (
+                '$(rpmspec --requires -q DeepSea/deepsea.spec.in 2>/dev/null)'
+            )
+        self.master_remote.run(args=[
+            'sudo',
+            'zypper',
+            '--non-interactive',
+            'install',
+            '--no-recommends',
+            run.Raw(rpmspec_cmd)
+            ])
+
+    def __install_deepsea_using_zypper(self):
+        info_msg_prefix = 'Reinstalling' if self.reinstall_deepsea else 'Installing'
+        info_msg = info_msg_prefix + ' deepsea using zypper'
+        self.log.info(anchored(info_msg))
+        self.master_remote.run(args=[
+            'sudo',
+            'zypper',
+            '--non-interactive',
+            'search',
+            '--details',
+            'deepsea'
+            ])
+        self.master_remote.run(args=[
+            'sudo',
+            'zypper',
+            '--non-interactive',
+            '--no-gpg-checks',
+            'install',
+            '--force',
+            '--no-recommends',
+            'deepsea',
+            'deepsea-cli',
+            'deepsea-qa'
+            ])
+
+    def _deepsea_minions(self):
+        """
+        Set deepsea_minions pillar value
+        """
+        deepsea_minions_sls = '/srv/pillar/ceph/deepsea_minions.sls'
+        content = "deepsea_minions: \'*\'"
+        self.log.info("Clobbering {} with content ->{}<-".format(
+            deepsea_minions_sls, content))
+        cmd = 'sudo tee {}'.format(deepsea_minions_sls)
+        self.master_remote.sh(cmd, stdin=content)
+
+    def _deepsea_version(self):
+        if self.deepsea_cli:
+            try:
+                self.master_remote.run(args=[
+                    'type',
+                    'deepsea',
+                    run.Raw('>'),
+                    '/dev/null',
+                    run.Raw('2>&1'),
+                    ])
+            except CommandFailedError:
+                raise ConfigError(self.err_prefix + "Test case calls for "
+                                  "deepsea CLI, but it is not installed")
+            self.master_remote.run(args='deepsea --version')
+        else:
+            cmd_str = "sudo salt-run deepsea.version"
+            if self.quiet_salt:
+                cmd_str += " 2>/dev/null"
+            self.master_remote.run(args=cmd_str)
+
+    def _disable_gpg_checks(self):
+        cmd = (
+            'sed -i -e \'/gpgcheck/ d\' /etc/zypp/repos.d/* ; '
+            'sed -i -e \'/gpgkey/ d\' /etc/zypp/repos.d/* ; '
+            'sed -i -e \'$a gpgcheck=0\' /etc/zypp/repos.d/*'
+            )
+        self.ctx.cluster.run(args=[
+            'sudo', 'sh', '-c', cmd
+            ])
+
+    def _install_deepsea(self):
+        global deepsea_ctx
+        install_method = deepsea_ctx['install_method']
+        if install_method == 'package':
+            self.__install_deepsea_using_zypper()
+        elif install_method == 'source':
+            self.__install_deepsea_from_source()
+        else:
+            raise ConfigError(self.err_prefix + "internal error")
+        deepsea_ctx['deepsea_installed'] = True
+
+    def _master_python_version(self, py_version):
+        """
+        Determine if a given python version is installed on the Salt Master
+        node.
+        """
+        python_binary = 'python{}'.format(py_version)
+        installed = True
+        try:
+            self.master_remote.run(args=[
+                'type',
+                python_binary,
+                run.Raw('>'),
+                '/dev/null',
+                run.Raw('2>&1'),
+                ])
+        except CommandFailedError:
+            installed = False
+        if installed:
+            self.master_remote.run(args=[
+                python_binary,
+                '--version'
+                ])
+        else:
+            self.log.info(
+                '{} not installed on master node'.format(python_binary)
+                )
+        return installed
+
+    def _maybe_apply_alternative_defaults(self):
+        global_yml = '/srv/pillar/ceph/stack/global.yml'
+        if self.alternative_defaults:
+            self.log.info(anchored("Applying alternative defaults"))
+            data = ''
+            for k, v in self.alternative_defaults.items():
+                data += "{}: {}\n".format(k, v)
+            if data:
+                sudo_append_to_file(
+                    self.master_remote,
+                    global_yml,
+                    data,
+                    )
+        dump_file_that_might_not_exist(self.master_remote, global_yml)
+
+    def _populate_deepsea_context(self):
+        global deepsea_ctx
+        deepsea_ctx['allow_python2'] = self.config.get('allow_python2', True)
+        deepsea_ctx['alternative_defaults'] = self.config.get('alternative_defaults', {})
+        if not isinstance(deepsea_ctx['alternative_defaults'], dict):
+            raise ConfigError(self.err_prefix + "alternative_defaults must be a dict")
+        deepsea_ctx['cli'] = self.config.get('cli', True)
+        deepsea_ctx['dashboard_ssl'] = self.config.get('dashboard_ssl', True)
+        deepsea_ctx['log_anchor'] = self.config.get('log_anchor', self.log_anchor_str)
+        if not isinstance(deepsea_ctx['log_anchor'], str):
+            self.log.warning(
+                "log_anchor was set to non-string value ->{}<-, "
+                "changing to empty string"
+                .format(deepsea_ctx['log_anchor'])
+                )
+            deepsea_ctx['log_anchor'] = ''
+        deepsea_ctx['drive_group'] = self.config.get("drive_group", "teuthology")
+        deepsea_ctx['quiet_salt'] = self.config.get('quiet_salt', True)
+        deepsea_ctx['salt_manager_instance'] = SaltManager(self.ctx)
+        deepsea_ctx['master_remote'] = (
+                deepsea_ctx['salt_manager_instance'].master_remote
+                )
+        deepsea_ctx['repositories'] = self.config.get("repositories", None)
+        deepsea_ctx['rgw_ssl'] = self.config.get('rgw_ssl', False)
+        self.__populate_install_method('install')
+
+    def __populate_install_method_basic(self, key):
+        if self.config[key] in ['package', 'pkg']:
+            deepsea_ctx['install_method'] = 'package'
+        elif self.config[key] in ['source', 'src']:
+            deepsea_ctx['install_method'] = 'source'
+        else:
+            raise ConfigError(self.err_prefix + "Unrecognized {} config "
+                              "value ->{}<-".format(key, self.config[key]))
+
+    def __populate_install_method(self, key):
+        if key in self.config:
+            self.__populate_install_method_basic(key)
+        else:
+            if 'repo' in self.config or 'branch' in self.config:
+                deepsea_ctx['install_method'] = 'source'
+            else:
+                deepsea_ctx['install_method'] = 'package'
+
+    def _purge_osds(self):
+        # needed as long as teuthology install task purges /var/lib/ceph
+        # in its teardown phase
+        for _remote in self.ctx.cluster.remotes.keys():
+            self.log.info("stopping OSD services on {}"
+                          .format(_remote.hostname))
+            _remote.run(args=[
+                'sudo', 'sh', '-c',
+                'systemctl stop ceph-osd.target ; sleep 10'
+                ])
+            self.log.info("unmounting OSD partitions on {}"
+                          .format(_remote.hostname))
+            # unmount up to five OSDs
+            # bluestore XFS partition is vd?1
+            # filestore XFS partition is vd?2
+            for_loop = (
+                    'for f in vdb{pn} vdc{pn} vdd{pn} vde{pn} vdf{pn} ; '
+                    'do test -b /dev/$f && umount /dev/$f || true ; '
+                    'done'
+                )
+            for pn in [1, 2]:
+                _remote.run(args=['sudo', 'sh', '-c', for_loop.format(pn=pn)])
+
+    def first_storage_only_node(self):
+        if self.nodes_storage_only:
+            return self.nodes_storage_only[0]
+        else:
+            return None
+
+    def os_type_and_version(self):
+        os_type = self.ctx.config.get('os_type', 'unknown')
+        os_version = float(self.ctx.config.get('os_version', 0))
+        return (os_type, os_version)
+
+    def reboot_a_single_machine_now(self, remote, log_spec=None):
+        global reboot_tries
+        if not log_spec:
+            log_spec = "node {} reboot now".format(remote.hostname)
+        cmd_str = "sudo reboot"
+        remote_exec(
+            remote,
+            cmd_str,
+            self.log,
+            log_spec,
+            rerun=False,
+            quiet=True,
+            tries=reboot_tries,
+            )
+
+    def reboot_the_cluster_now(self, log_spec=None):
+        global reboot_tries
+        if not log_spec:
+            log_spec = "all nodes reboot now"
+        cmd_str = "salt \\* cmd.run reboot"
+        if self.quiet_salt:
+            cmd_str += " 2> /dev/null"
+        remote_exec(
+            self.master_remote,
+            cmd_str,
+            self.log,
+            log_spec,
+            rerun=False,
+            quiet=True,
+            tries=reboot_tries,
+            )
+        self.sm.ping_minions()
+
+    def role_type_present(self, role_type):
+        """
+        Method for determining if _any_ test node has the given role type
+        (teuthology role, not DeepSea role). Examples: "osd", "mon" (not
+        "mon.a").
+
+        If the role type is present, returns the hostname of the first remote
+        with that role type.
+
+        If the role type is absent, returns the empty string.
+        """
+        role_dict = self.role_lookup_table.get(role_type, {})
+        host = role_dict[role_dict.keys()[0]] if role_dict else ''
+        return host
+
+    # Teuthology iterates through the tasks stanza twice: once to "execute"
+    # the tasks and a second time to "unwind" them. During the first pass
+    # it pushes each task onto a stack, and during the second pass it "unwinds"
+    # the stack, with the result being that the tasks are unwound in reverse
+    # order. During the execution phase it calls three methods: the
+    # constructor, setup(), and begin() - in that order -, and during the
+    # unwinding phase it calls end() and teardown() - in that order.
+
+    # The task does not have to implement any of the methods. If not
+    # implemented, the method in question will be called via inheritance.
+    # If a method _is_ implemented, the implementation can optionally call
+    # the parent's implementation of that method as well. This is illustrated
+    # here:
+    def setup(self):
+        # self.log.debug("beginning of setup method")
+        super(DeepSea, self).setup()
+        pass
+        # self.log.debug("end of setup method")
+
+    def begin(self):
+        global deepsea_ctx
+        super(DeepSea, self).begin()
+        if self.reinstall_deepsea:
+            self._install_deepsea()
+            return None
+        self.sm.master_rpm_q('ceph')
+        self.sm.master_rpm_q('ceph-test')
+        self.sm.master_rpm_q('salt-master')
+        self.sm.master_rpm_q('salt-minion')
+        self.sm.master_rpm_q('salt-api')
+        # the Salt Master node is assumed to be running an already
+        # configured chrony for time synchronization within the cluster
+        # and DeepSea Stage 3 will point the minions at the Salt Master's
+        # chrony instance (?)
+        self.sm.master_rpm_q('chrony')
+        self.master_remote.run(
+            args="sudo systemctl status --lines=0 chronyd.service"
+            )
+        if self.allow_python2:
+            self._master_python_version(2)
+        else:
+            self.log.info(
+                'allow_python2 is set to \'false\'. That means the '
+                'test will now fail if a python2 binary is found on '
+                'any of the test machines.'
+                )
+            self.ctx.cluster.run(args='if type python2 ; then false ; else true ; fi')
+        if not self._master_python_version(3):
+            raise ConfigError(self.err_prefix + "Python 3 not installed on master node")
+        if 'deepsea_installed' not in deepsea_ctx:
+            self._disable_gpg_checks()
+            self.master_remote.run(args="zypper lr -upEP")
+            self._install_deepsea()
+            assert deepsea_ctx['deepsea_installed']
+        self._deepsea_version()
+        self._deepsea_minions()
+        self._maybe_apply_alternative_defaults()
+        # Stage 0 does this, but we have no guarantee Stage 0 will run
+        self.sm.sync_pillar_data(quiet=self.quiet_salt)
+
+    def end(self):
+        self.log.debug("beginning of end method")
+        super(DeepSea, self).end()
+        success = self.ctx.summary.get('success', None)
+        if success is None:
+            self.log.warning("Problem with ctx summary key? ctx is {}".format(self.ctx))
+        if not success:
+            self.ctx.cluster.run(args="rpm -qa | sort")
+        self.sm.gather_logs('/home/farm/.npm/_logs', 'dashboard-e2e-npm')
+        self.sm.gather_logs('/home/farm/.protractor-report', 'dashboard-e2e-protractor')
+        self.log.debug("end of end method")
+
+    def teardown(self):
+        self.log.debug("beginning of teardown method")
+        super(DeepSea, self).teardown()
+        # #
+        # # the install task does "rm -r /var/lib/ceph" on every test node,
+        # # and that fails when there are OSDs running
+        # # FIXME - deprecated, remove after awhile
+        # self._purge_osds()
+        self.log.debug("end of teardown method")
+
+
+class CephConf(DeepSea):
+    """
+    Adds custom options to ceph.conf.
+    Edit yaml file between stage 2 and 3.
+    Example:
+        - deepsea.orch:
+                stage: 2
+        - deepsea.ceph_conf:
+                global:
+                  mon lease: 15
+                  mon lease ack timeout: 25
+                mon:
+                  debug mon: 20
+                osd:
+                  debug filestore: 20
+        - deepsea.orch:
+                stage: 3
+    """
+
+    customize = {
+        "client": "client.conf",
+        "global": "global.conf",
+        "mds": "mds.conf",
+        "mgr": "mgr.conf",
+        "mon": "mon.conf",
+        "osd": "osd.conf",
+        }
+
+    deepsea_configuration_files = '/srv/salt/ceph/configuration/files'
+
+    err_prefix = "(ceph_conf subtask) "
+
+    targets = {
+        "mon_allow_pool_delete": True,
+        "osd_memory_target": True,
+        "small_cluster": True,
+        "rbd": False,
+        }
+
+    def __init__(self, ctx, config):
+        global deepsea_ctx
+        deepsea_ctx['logger_obj'] = log.getChild('ceph_conf')
+        self.name = 'deepsea.ceph_conf'
+        super(CephConf, self).__init__(ctx, config)
+        self.log.debug("munged config is {}".format(self.config))
+
+    def __ceph_conf_d_full_path(self, section):
+        ceph_conf_d = self.deepsea_configuration_files + '/ceph.conf.d'
+        if section in self.customize.keys():
+            return "{}/{}".format(ceph_conf_d, self.customize[section])
+
+    def __custom_ceph_conf(self, section, customizations):
+        for conf_item, conf_value in customizations.items():
+            data = '{} = {}\n'.format(conf_item, conf_value)
+            sudo_append_to_file(
+                self.master_remote,
+                self.__ceph_conf_d_full_path(section),
+                data
+                )
+            self.log.info(
+                "Adding to ceph.conf, {} section: {}"
+                .format(section, data)
+                )
+
+    def _customizations(self):
+        for section in self.customize.keys():
+            if section in self.config and isinstance(self.config[section], dict):
+                self.__custom_ceph_conf(section, self.config[section])
+
+    def _dump_customizations(self):
+        for section in self.customize.keys():
+            path = self.__ceph_conf_d_full_path(section)
+            dump_file_that_might_not_exist(self.master_remote, path)
+
+    def _list_ceph_conf_d(self):
+        self.master_remote.run(
+            args="ls -l {}".format(self.deepsea_configuration_files)
+            )
+
+    def _targets(self):
+        for target, default in self.targets.items():
+            method = getattr(self, target, None)
+            assert method, "target ->{}<- has no method".format(target)
+            if target in self.config:
+                method()
+            else:
+                if default:
+                    method()
+
+    def mon_allow_pool_delete(self):
+        info_msg = "adjusted ceph.conf to allow pool deletes"
+        data = "mon allow pool delete = true\n"
+        sudo_append_to_file(
+            self.master_remote,
+            self.__ceph_conf_d_full_path("mon"),
+            data,
+            )
+        self.log.info(info_msg)
+
+    def osd_memory_target(self):
+        info_msg = "lowered osd_memory_target to 1GiB to facilitate testing in OpenStack"
+        data = "osd memory target = 1105322466"  # https://tracker.ceph.com/issues/37507#note-4
+        sudo_append_to_file(
+            self.master_remote,
+            self.__ceph_conf_d_full_path("osd"),
+            data,
+            )
+        self.log.info(info_msg)
+
+    def rbd(self):
+        """
+        Delete "rbd default features" from ceph.conf. By removing this line, we
+        ensure that there will be no explicit "rbd default features" setting,
+        so the default will be used.
+        """
+        info_msg = "adjusted ceph.conf by removing 'rbd default features' line"
+        rbd_conf = '/srv/salt/ceph/configuration/files/rbd.conf'
+        cmd = 'sudo sed -i \'/^rbd default features =/d\' {}'.format(rbd_conf)
+        self.master_remote.run(args=cmd)
+        self.log.info(info_msg)
+
+    def small_cluster(self):
+        """
+        Apply necessary ceph.conf for small clusters
+        """
+        storage_nodes = len(self.nodes_storage)
+        info_msg = (
+            "adjusted ceph.conf for operation with {} storage node(s)"
+            .format(storage_nodes)
+            )
+        data = None
+        if storage_nodes == 1:
+            data = (
+                   "mon pg warn min per osd = 16\n"
+                   "osd pool default size = 2\n"
+                   "osd crush chooseleaf type = 0 # failure domain == osd\n"
+                   )
+        elif storage_nodes == 2 or storage_nodes == 3:
+            data = (
+                   "mon pg warn min per osd = 8\n"
+                   "osd pool default size = 2\n"
+                   )
+        if data:
+            sudo_append_to_file(
+                self.master_remote,
+                self.__ceph_conf_d_full_path("global"),
+                data,
+                )
+            self.log.info(info_msg)
+
+    def begin(self):
+        self.log.info(anchored("Adding custom options to ceph.conf"))
+        self._targets()
+        self._customizations()
+        self._list_ceph_conf_d()
+        self._dump_customizations()
+
+    def end(self):
+        pass
+
+    def teardown(self):
+        pass
+
+
+class CreatePools(DeepSea):
+
+    err_prefix = "(create_pools subtask) "
+
+    def __init__(self, ctx, config):
+        global deepsea_ctx
+        deepsea_ctx['logger_obj'] = log.getChild('create_pools')
+        self.name = 'deepsea.create_pools'
+        super(CreatePools, self).__init__(ctx, config)
+        if not isinstance(self.config, dict):
+            raise ConfigError(self.err_prefix + "config must be a dictionary")
+
+    def begin(self):
+        self.log.info(anchored("pre-creating pools"))
+        args = []
+        for key in self.config:
+            if self.config[key] is None:
+                self.config[key] = True
+            if self.config[key]:
+                args.append(key)
+        args = list(set(args))
+        self.scripts.run(
+            self.master_remote,
+            'create_all_pools_at_once.sh',
+            args=args,
+            )
+
+    def end(self):
+        pass
+
+    def teardown(self):
+        pass
+
+
+class Dummy(DeepSea):
+
+    def __init__(self, ctx, config):
+        global deepsea_ctx
+        deepsea_ctx['logger_obj'] = log.getChild('dummy')
+        self.name = 'deepsea.dummy'
+        super(Dummy, self).__init__(ctx, config)
+        self.log.debug("munged config is {}".format(self.config))
+
+    def begin(self):
+        self.log.debug("beginning of begin method")
+        global deepsea_ctx
+        self.log.info("deepsea_ctx == {}".format(deepsea_ctx))
+        self.log.debug("end of begin method")
+
+    def end(self):
+        pass
+
+    def teardown(self):
+        pass
+
+
+class HealthOK(DeepSea):
+    """
+    Copy health_ok.sh to Salt Master node and run commands.
+
+    This task understands the following config key:
+
+        commands:
+            [list of health-ok.sh commands]
+
+
+    The list of commands will be executed as root on the Salt Master node.
+    """
+
+    err_prefix = "(health_ok subtask) "
+
+    prefix = 'health-ok/'
+
+    def __init__(self, ctx, config):
+        global deepsea_ctx
+        deepsea_ctx['logger_obj'] = log.getChild('health_ok')
+        self.name = 'deepsea.health_ok'
+        super(HealthOK, self).__init__(ctx, config)
+
+    def _copy_health_ok(self):
+        """
+        Copy health-ok.sh from teuthology VM to master_remote
+        """
+        global deepsea_ctx
+        suite_path = self.ctx.config.get('suite_path')
+        log.info("suite_path is ->{}<-".format(suite_path))
+        misc.sh("ls -l {}".format(suite_path))
+        health_ok_path = suite_path + "/deepsea/health-ok"
+        misc.sh("test -d " + health_ok_path)
+        copy_directory_recursively(
+                health_ok_path, self.master_remote, "health-ok")
+        self.master_remote.run(args="pwd ; ls -lR health-ok")
+        deepsea_ctx['health_ok_copied'] = True
+
+    def _maybe_run_commands(self, commands):
+        if not commands:
+            self.log.warning(
+                "The health_ok task was run, but no commands were specified. "
+                "Doing nothing."
+                )
+            return None
+        for cmd_str in commands:
+            if not isinstance(cmd_str, str):
+                raise ConfigError(
+                    self.err_prefix +
+                    "command ->{}<- is not a string".format(cmd_str)
+                    )
+            if cmd_str.startswith('health-ok.sh'):
+                cmd_str = self.prefix + cmd_str
+                if self.dev_env:
+                    cmd_str = 'DEV_ENV=true ' + cmd_str
+                if self.deepsea_cli:
+                    cmd_str += ' --cli'
+                if self.rgw_ssl:
+                    cmd_str += ' --ssl'
+            self.master_remote.run(args=[
+                'sudo', 'bash', '-c', cmd_str,
+                ])
+
+    def setup(self):
+        global deepsea_ctx
+        if 'health_ok_copied' not in deepsea_ctx:
+            self._copy_health_ok()
+            assert deepsea_ctx['health_ok_copied']
+
+    def begin(self):
+        commands = self.config.get('commands', [])
+        if not isinstance(commands, list):
+            raise ConfigError(self.err_prefix + "commands must be a list")
+        self._maybe_run_commands(commands)
+
+    def end(self):
+        pass
+
+    def teardown(self):
+        pass
+
+
+class Orch(DeepSea):
+
+    all_stages = [
+        "0", "prep", "1", "discovery", "2", "configure", "3", "deploy",
+        "4", "services", "5", "removal", "cephfs", "ganesha", "iscsi",
+        "openattic", "openstack", "radosgw", "validate"
+        ]
+
+    err_prefix = "(orch subtask) "
+
+    stage_synonyms = {
+        0: 'prep',
+        1: 'discovery',
+        2: 'configure',
+        3: 'deploy',
+        4: 'services',
+        5: 'removal',
+        }
+
+    def __init__(self, ctx, config):
+        global deepsea_ctx
+        deepsea_ctx['logger_obj'] = log.getChild('orch')
+        self.name = 'deepsea.orch'
+        super(Orch, self).__init__(ctx, config)
+        self.stage = str(self.config.get("stage", ''))
+        self.state_orch = str(self.config.get("state_orch", ''))
+        self.reboots_explicitly_forbidden = not self.config.get("allow_reboots", True)
+        self.survive_reboots = self._detect_reboots()
+        if not self.stage and not self.state_orch:
+            raise ConfigError(
+                self.err_prefix +
+                "nothing to do. Specify a value for 'stage' or "
+                "'state_orch' key in config dict"
+                )
+        if self.stage and self.stage not in self.all_stages:
+            raise ConfigError(
+                self.err_prefix +
+                "unrecognized Stage ->{}<-".format(self.stage)
+                )
+        self.log.debug("munged config is {}".format(self.config))
+
+    def __ceph_health_test(self):
+        cmd = 'sudo salt-call wait.until status=HEALTH_OK timeout=900 check=1'
+        if self.quiet_salt:
+            cmd += ' 2> /dev/null'
+        self.master_remote.run(args=cmd)
+
+    def __check_ceph_test_rpm_version(self):
+        """Checks rpm version for ceph and ceph-test; logs warning if differs"""
+        ceph_test_ver = get_rpm_pkg_version(self.master_remote, "ceph-test", self.log)
+        ceph_ver = get_rpm_pkg_version(self.master_remote, "ceph", self.log)
+        if ceph_test_ver != ceph_ver:
+            self.log.warning(
+                "ceph-test rpm version: {} differs from ceph version: {}"
+                .format(ceph_test_ver, ceph_ver))
+
+    def __check_salt_api_service(self):
+        base_cmd = 'sudo systemctl status --full --lines={} {}.service'
+        try:
+            self.master_remote.run(args=base_cmd.format('0', 'salt-api'))
+        except CommandFailedError:
+            self.master_remote.run(args=base_cmd.format('100', 'salt-api'))
+            raise
+        self.scripts.run(
+            self.master_remote,
+            'salt_api_test.sh',
+            )
+
+    def __dump_drive_groups_yml(self):
+        self.scripts.run(
+            self.master_remote,
+            'dump_drive_groups_yml.sh',
+            )
+
+    def __dump_lvm_status(self):
+        self.log.info("Dumping LVM status on storage nodes ->{}<-"
+                      .format(self.nodes_storage))
+        for hostname in self.nodes_storage:
+            remote = self.remotes[hostname]
+            self.scripts.run(
+                remote,
+                'lvm_status.sh',
+                )
+
+    def __is_stage_between_0_and_5(self):
+        """
+        This is implemented as a separate function because the stage specified
+        in the YAML might be a number or a string, and we really don't care
+        what Python sees it as.
+        """
+        num = self.stage
+        try:
+            num = int(num)
+        except ValueError:
+            return False
+        if num < 0 or num > 5:
+            return False
+        return True
+
+    def __log_stage_start(self, stage):
+        self.log.info(anchored(
+            "Running DeepSea Stage {} ({})"
+            .format(stage, self.stage_synonyms[stage])
+            ))
+
+    def __maybe_cat_ganesha_conf(self):
+        ganesha_host = self.role_type_present('ganesha')
+        if ganesha_host:
+            ganesha_remote = self.remotes[ganesha_host]
+            ganesha_remote.run(args="cat /etc/ganesha/ganesha.conf")
+
+    def __mgr_dashboard_module_deploy(self):
+        script = ("# deploy MGR dashboard module\n"
+                  "set -ex\n"
+                  "ceph mgr module enable dashboard\n")
+        if self.dashboard_ssl:
+            script += "ceph dashboard create-self-signed-cert\n"
+        else:
+            script += "ceph config set mgr mgr/dashboard/ssl false\n"
+        remote_run_script_as_root(
+            self.master_remote,
+            'mgr_dashboard_module_deploy.sh',
+            script,
+            )
+
+    def __zypper_ps_with_possible_reboot(self):
+        if self.sm.all_minions_zypper_ps_requires_reboot():
+            log_spec = "Detected updates requiring reboot"
+            self.log.warning(anchored(log_spec))
+            if self.reboots_explicitly_forbidden:
+                self.log.info("Reboots explicitly forbidden in test configuration: not rebooting")
+                self.log.warning("Processes using deleted files may cause instability")
+            else:
+                self.log.warning(anchored("Rebooting the whole cluster now!"))
+                self.reboot_the_cluster_now(log_spec=log_spec)
+                assert not self.sm.all_minions_zypper_ps_requires_reboot(), \
+                    "No more updates requiring reboot anywhere in the whole cluster"
+
+    def _configure_rgw(self):
+        self.log.debug("self.rgw_ssl is ->{}<-".format(self.rgw_ssl))
+        rgw_host = self.role_type_present('rgw')
+        if rgw_host:
+            self.log.debug(
+                "detected rgw host ->{}<-".format(rgw_host)
+                )
+            self.log.info(anchored("configuring RGW"))
+            self.scripts.run(
+                self.master_remote,
+                'rgw_init.sh',
+                )
+            if self.rgw_ssl:
+                self.scripts.run(
+                    self.master_remote,
+                    'rgw_init_ssl.sh',
+                    )
+
+    # FIXME: run on each minion individually, and compare deepsea "roles"
+    # with teuthology roles!
+    def _pillar_items(self):
+        cmd = "sudo salt \\* pillar.items"
+        if self.quiet_salt:
+            cmd += " 2>/dev/null"
+        self.master_remote.run(args=cmd)
+
+    def _run_orch(self, orch_tuple):
+        """Run an orchestration. Dump journalctl on error."""
+        global reboot_tries
+        orch_type, orch_spec = orch_tuple
+        if orch_type == 'orch':
+            cli = False
+            pass
+        elif orch_type == 'stage':
+            cli = self.deepsea_cli
+            orch_spec = 'ceph.stage.{}'.format(orch_spec)
+        else:
+            raise ConfigError(
+                self.err_prefix +
+                "Unrecognized orchestration type ->{}<-".format(orch_type)
+                )
+        cmd_str = None
+        if cli:
+            cmd_str = (
+                'timeout 60m deepsea '
+                '--log-file=/var/log/salt/deepsea.log '
+                '--log-level=debug '
+                'salt-run state.orch {} --simple-output'
+                ).format(orch_spec)
+        else:
+            cmd_str = (
+                'timeout 60m salt-run '
+                '--no-color state.orch {}'
+                ).format(orch_spec)
+            if self.quiet_salt:
+                cmd_str += ' 2>/dev/null'
+        if self.dev_env:
+            cmd_str = 'DEV_ENV=true ' + cmd_str
+        tries = 0
+        if self.survive_reboots:
+            tries = reboot_tries
+        remote_exec(
+            self.master_remote,
+            cmd_str,
+            self.log,
+            "orchestration {}".format(orch_spec),
+            rerun=True,
+            quiet=True,
+            tries=tries,
+            )
+
+    def _detect_reboots(self):
+        """
+        Check for all known states/stages/alt-defaults that
+        may cause a reboot
+        If there is a 'allow_reboot' flag, it takes presedence.
+        """
+        allow_reboot = self.config.get("allow_reboot", None)
+        if allow_reboot is not None:
+            self.log.info("Setting allow_reboot explicitly to {}"
+                          .format(self.allow_reboot))
+            return allow_reboot
+        orchs_prone_to_reboot = ['ceph.maintenance.upgrade']
+        if self.state_orch in orchs_prone_to_reboot:
+            self.log.warning("This orchestration may trigger a reboot")
+            return True
+        #
+        # The alternative_defaults stanza has been moved up to the deepsea task
+        # (for two reasons: because it's a global setting and also so we can do
+        # boilerplate overrides like qa/deepsea/boilerplate/disable_tuned.yaml).
+        # That change makes the following heuristic becomes problematic: since
+        # all the alternative defaults are concentrated in one place, if any of
+        # them contains the string "reboot" (without preceding "no-"), **all**
+        # orchestrations in the test will run with survive_reboots, not just
+        # one.
+        for k, v in self.alternative_defaults.items():
+            if 'reboot' in v and 'no-reboot' not in v:
+                self.log.warning("Orchestrations may trigger a reboot")
+                return True
+        self.log.info("Not allowing reboots for this orchestration")
+        return False
+
+    def _run_stage_0(self):
+        """
+        Run Stage 0
+        """
+        stage = 0
+        self.__log_stage_start(stage)
+        self._run_orch(("stage", stage))
+        self._pillar_items()
+        self.sm.all_minions_zypper_ref()
+        self.sm.all_minions_zypper_lu()
+        self.__zypper_ps_with_possible_reboot()
+        self.__check_salt_api_service()
+
+    def _run_stage_1(self):
+        """
+        Run Stage 1
+        """
+        stage = 1
+        self._configure_rgw()
+        self.__log_stage_start(stage)
+        self._run_orch(("stage", stage))
+
+    def _run_stage_2(self):
+        """
+        Run Stage 2
+        """
+        stage = 2
+        self.__log_stage_start(stage)
+        self._run_orch(("stage", stage))
+        self.__check_ceph_test_rpm_version()
+        self._pillar_items()
+        self.__dump_drive_groups_yml()
+
+    def _run_stage_3(self):
+        """
+        Run Stage 3
+        """
+        stage = 3
+        self.__log_stage_start(stage)
+        self._run_orch(("stage", stage))
+        # self.__mgr_dashboard_module_deploy()
+        self.sm.all_minions_cmd_run(
+            'cat /etc/ceph/ceph.conf',
+            abort_on_fail=False
+            )
+        self.__dump_lvm_status()
+        self.scripts.run(
+            self.master_remote,
+            'ceph_cluster_status.sh',
+            )
+        self.__ceph_health_test()
+
+    def _run_stage_4(self):
+        """
+        Run Stage 4
+        """
+        stage = 4
+        self.__log_stage_start(stage)
+        self._run_orch(("stage", stage))
+        self.__maybe_cat_ganesha_conf()
+        self.__ceph_health_test()
+
+    def _run_stage_5(self):
+        """
+        Run Stage 5
+        """
+        stage = 5
+        self.__log_stage_start(stage)
+        self._run_orch(("stage", 5))
+
+    def begin(self):
+        self.master_remote.sh('sudo salt-run jobs.active 2>/dev/null')
+        if self.state_orch:
+            self.log.info(anchored(
+                "running orchestration {}".format(self.state_orch)
+                ))
+            self._run_orch(("orch", self.state_orch))
+        else:
+            # it's not an orch, so it must be a stage
+            assert self.stage, "Neither state_orch, nor stage"
+            if self.__is_stage_between_0_and_5():
+                exec('self._run_stage_{}()'.format(self.stage))
+            elif self.stage == 'prep':
+                self.log.info("Running Stage 0 instead of Stage \"prep\"")
+                self._run_stage_0()
+            elif self.stage == 'discovery':
+                self.log.info("Running Stage 1 instead of Stage \"discovery\"")
+                self._run_stage_1()
+            elif self.stage == 'configure':
+                self.log.info("Running Stage 2 instead of Stage \"configure\"")
+                self._run_stage_2()
+            elif self.stage == 'deploy':
+                self.log.info("Running Stage 3 instead of Stage \"deploy\"")
+                self._run_stage_3()
+            elif self.stage == 'services':
+                self.log.info("Running Stage 4 instead of Stage \"services\"")
+                self._run_stage_4()
+            elif self.stage == 'removal':
+                self.log.info("Running Stage 5 instead of Stage \"removal\"")
+                self._run_stage_5()
+            elif self.stage in self.all_stages:
+                self.log.info("Running non-numeric Stage \"{}\"".format(self.stage))
+                self._run_orch(("stage", self.stage))
+            else:
+                raise ConfigError(
+                    self.err_prefix +
+                    'unsupported stage ->{}<-'.format(self.stage)
+                    )
+        self.master_remote.sh('sudo salt-run jobs.active 2>/dev/null')
+
+    def end(self):
+        pass
+
+    def teardown(self):
+        pass
+
+
+class Policy(DeepSea):
+
+    err_prefix = "(policy subtask) "
+
+    def __init__(self, ctx, config):
+        global deepsea_ctx
+        deepsea_ctx['logger_obj'] = log.getChild('policy')
+        self.name = 'deepsea.policy'
+        super(Policy, self).__init__(ctx, config)
+        self.policy_cfg = ''
+        self.munge_policy = self.config.get('munge_policy', {})
+
+    def __build_drive_group_x(self, drive_group):
+        # generate our own drive_group.yml (as opposed to letting
+        # DeepSea generate one for us)
+        if not self.nodes_storage:
+            raise ConfigError(self.err_prefix + "no osd roles configured, "
+                              "but at least one of these is required.")
+        self.log.debug("building drive group ->{}<- for {} storage nodes"
+                       .format(drive_group, len(self.nodes_storage)))
+        if drive_group == 'teuthology':
+            raise ConfigError(self.err_prefix + "\"teuthology\" drive group "
+                              "generation not implemented yet")
+        elif drive_group == 'custom':
+            self.__roll_out_drive_group()
+        else:
+            ConfigError(self.err_prefix + "unknown drive group ->{}<-"
+                        .format(self.drive_group))
+
+    def __roll_out_drive_group(self, fpath="/srv/salt/ceph/configuration/files/drive_groups.yml"):
+        misc.sudo_write_file(
+            self.master_remote,
+            fpath,
+            yaml.dump(self.drive_group),
+            perms="0644",
+            )
+
+    def _build_base(self):
+        """
+        policy.cfg boilerplate
+        """
+        self.policy_cfg = ("# policy.cfg generated by deepsea.policy subtask\n"
+                           "# Cluster assignment\n"
+                           "cluster-ceph/cluster/*.sls\n"
+                           "# Common configuration\n"
+                           "config/stack/default/global.yml\n"
+                           "config/stack/default/ceph/cluster.yml\n"
+                           "# Role assignment - master\n"
+                           "role-master/cluster/{}.sls\n"
+                           "# Role assignment - admin\n"
+                           "role-admin/cluster/*.sls\n"
+                           .format(self.master_remote.hostname))
+
+    def _build_drive_groups_yml(self):
+        """
+        Generate a special-purpose drive_groups.yml
+        (currently fails the test in all cases except when
+        "drive_group: default" is explicitly given)
+        """
+        if isinstance(self.drive_group, str):
+            if self.drive_group == 'teuthology':
+                self.__build_drive_group_x('teuthology')
+            elif self.drive_group == 'default':
+                pass
+            else:
+                ConfigError(self.err_prefix + "unknown drive group ->{}<-"
+                            .format(self.drive_group))
+        elif isinstance(self.drive_group, dict):
+            self.__build_drive_group_x('custom')
+        else:
+            raise ConfigError(self.err_prefix + "drive_group config param "
+                              "must be a string or a dict")
+
+    def _build_x(self, role_type, required=False):
+        no_roles_of_type = "no {} roles configured".format(role_type)
+        but_required = ", but at least one of these is required."
+        role_dict = {}
+        if role_type in self.role_lookup_table:
+            role_dict = self.role_lookup_table[role_type]
+        elif required:
+            raise ConfigError(self.err_prefix + no_roles_of_type + but_required)
+        else:
+            self.log.debug(no_roles_of_type)
+            return None
+        self.log.debug("generating policy.cfg lines for {} based on {}"
+                       .format(role_type, role_dict))
+        if required:
+            if len(role_dict.keys()) < 1:
+                raise ConfigError(self.err_prefix + no_roles_of_type + but_required)
+        for role_spec, remote_name in role_dict.items():
+            if role_type == 'osd':
+                role_type = 'storage'
+            self.policy_cfg += ('# Role assignment - {}\n'
+                                'role-{}/cluster/{}.sls\n'
+                                .format(role_spec, role_type, remote_name))
+
+    def _cat_policy_cfg(self):
+        """
+        Dump the final policy.cfg file to teuthology log.
+        """
+        cmd_str = "cat {}/policy.cfg".format(proposals_dir)
+        self.master_remote.run(args=cmd_str)
+
+    def _write_policy_cfg(self):
+        """
+        Write policy_cfg to master remote.
+        """
+        misc.sudo_write_file(
+            self.master_remote,
+            proposals_dir + "/policy.cfg",
+            self.policy_cfg,
+            perms="0644",
+            owner="salt",
+            )
+        cmd_str = "ls -l {}/policy.cfg".format(proposals_dir)
+        self.master_remote.run(args=cmd_str)
+
+    def begin(self):
+        """
+        Generate policy.cfg from the results of role introspection
+        """
+        # FIXME: this should be run only once - check for that and
+        # return an error otherwise
+        if self.munge_policy:
+            for k, v in self.munge_policy.items():
+                if k == 'remove_storage_only_node':
+                    delete_me = self.first_storage_only_node()
+                    if not delete_me:
+                        raise ConfigError(
+                            self.err_prefix + "remove_storage_only_node "
+                            "requires a storage-only node, but there is no such"
+                            )
+                    raise ConfigError(self.err_prefix + (
+                        "munge_policy is a kludge - get rid of it! "
+                        "This test needs to be reworked - deepsea.py "
+                        "does not currently have a proper way of "
+                        "changing (\"munging\") the policy.cfg file."
+                        ))
+                else:
+                    raise ConfigError(self.err_prefix + "unrecognized "
+                                      "munge_policy directive {}".format(k))
+        else:
+            self.log.info(anchored("generating policy.cfg"))
+            self._build_base()
+            self._build_x('mon', required=True)
+            self._build_x('mgr', required=True)
+            self._build_x('osd', required=True)
+            self._build_drive_groups_yml()
+            self._build_x('mds')
+            self._build_x('rgw')
+            self._build_x('igw')
+            self._build_x('ganesha')
+            self._build_x('prometheus')
+            self._build_x('grafana')
+            self._write_policy_cfg()
+            self._cat_policy_cfg()
+
+    def end(self):
+        pass
+
+    def teardown(self):
+        pass
+
+
+class Reboot(DeepSea):
+    """
+    A class that does nothing but unconditionally reboot - either a single node
+    or the whole cluster.
+
+    Configuration (reboot a single node)
+
+    tasks:
+    - deepsea.reboot:
+          client.salt_master:
+
+    Configuration (reboot the entire cluster)
+
+    tasks:
+    - deepsea.reboot:
+          all:
+    """
+
+    err_prefix = '(reboot subtask) '
+
+    def __init__(self, ctx, config):
+        global deepsea_ctx
+        deepsea_ctx['logger_obj'] = log.getChild('reboot')
+        self.name = 'deepsea.reboot'
+        super(Reboot, self).__init__(ctx, config)
+
+    def begin(self):
+        if not self.config:
+            self.log.warning("empty config: nothing to do")
+            return None
+        config_keys = len(self.config)
+        if config_keys > 1:
+            raise ConfigError(
+                self.err_prefix +
+                "config dictionary may contain only one key. "
+                "You provided ->{}<- keys ({})".format(len(config_keys), config_keys)
+                )
+        role_spec, repositories = self.config.items()[0]
+        if role_spec == "all":
+            remote = self.ctx.cluster
+            log_spec = "all nodes reboot now"
+            self.log.warning(anchored(log_spec))
+            self.reboot_the_cluster_now(log_spec=log_spec)
+        else:
+            remote = get_remote_for_role(self.ctx, role_spec)
+            log_spec = "node {} reboot now".format(remote.hostname)
+            self.log.warning(anchored(log_spec))
+            self.reboot_a_single_machine_now(remote, log_spec=log_spec)
+
+    def end(self):
+        pass
+
+    def teardown(self):
+        pass
+
+
+class Repository(DeepSea):
+    """
+    A class for manipulating zypper repos on the test nodes.
+    All it knows how to do is wipe out the existing repos (i.e. rename them to
+    foo.repo.bck) and replace them with a given set of new ones.
+
+    Configuration (one node):
+
+    tasks:
+    - deepsea.repository:
+          client.salt_master:
+              - name: repo_foo
+                url: http://example.com/foo/
+              - name: repo_bar
+                url: http://example.com/bar/
+
+    Configuration (all nodes):
+
+    tasks:
+    - deepsea.repository:
+          all:
+              - name: repo_foo
+                url: http://example.com/foo/
+              - name: repo_bar
+                url: http://example.com/bar/
+
+    To eliminate the need to duplicate the repos array, it can be specified
+    in the configuration of the main deepsea task. Then the yaml will look
+    like so:
+
+    tasks:
+    - deepsea:
+          repositories:
+              - name: repo_foo
+                url: http://example.com/foo/
+              - name: repo_bar
+                url: http://example.com/bar/
+    ...
+    - deepsea.repository:
+          client.salt_master:
+    ...
+    - deepsea.repository:
+          all:
+
+    One last note: we try to be careful and not clobber the repos twice.
+    """
+
+    err_prefix = '(repository subtask) '
+
+    def __init__(self, ctx, config):
+        deepsea_ctx['logger_obj'] = log.getChild('repository')
+        self.name = 'deepsea.repository'
+        super(Repository, self).__init__(ctx, config)
+
+    def _repositories_to_remote(self, remote):
+        args = []
+        for repo in self.repositories:
+            args += [repo['name'] + ':' + repo['url']]
+        self.scripts.run(
+            remote,
+            'clobber_repositories.sh',
+            args=args
+            )
+
+    def begin(self):
+        if not self.config:
+            self.log.warning("empty config: nothing to do")
+            return None
+        config_keys = len(self.config)
+        if config_keys > 1:
+            raise ConfigError(
+                self.err_prefix +
+                "config dictionary may contain only one key. "
+                "You provided ->{}<- keys ({})".format(len(config_keys), config_keys)
+                )
+        role_spec, repositories = self.config.items()[0]
+        if role_spec == "all":
+            remote = self.ctx.cluster
+        else:
+            remote = get_remote_for_role(self.ctx, role_spec)
+        if repositories is None:
+            assert self.repositories, \
+                "self.repositories must be populated if role_dict is None"
+        else:
+            assert isinstance(repositories, list), \
+                "value of role key must be a list of repositories"
+            self.repositories = repositories
+        if not self.repositories:
+            raise ConfigError(
+                self.err_prefix +
+                "No repositories specified. Bailing out!"
+                )
+        self._repositories_to_remote(remote)
+
+    def end(self):
+        pass
+
+    def teardown(self):
+        pass
+
+
+class Script(DeepSea):
+    """
+    A class that runs a bash script on the node with given role, or on all nodes.
+
+    Example 1 (run foo_bar.sh, with arguments, on Salt Master node):
+
+    tasks:
+        - deepsea.script:
+              client.salt_master:
+                  foo_bar.sh:
+                      args:
+                          - 'foo'
+                          - 'bar'
+
+    Example 2 (run foo_bar.sh, with no arguments, on all test nodes)
+
+    tasks:
+        - deepsea.script:
+              all:
+                  foo_bar.sh:
+    """
+
+    err_prefix = '(script subtask) '
+
+    def __init__(self, ctx, config):
+        global deepsea_ctx
+        deepsea_ctx['logger_obj'] = log.getChild('script')
+        self.name = 'deepsea.script'
+        super(Script, self).__init__(ctx, config)
+
+    def begin(self):
+        if not self.config:
+            self.log.warning("empty config: nothing to do")
+            return None
+        config_keys = len(self.config)
+        if config_keys > 1:
+            raise ConfigError(
+                self.err_prefix +
+                "config dictionary may contain only one key. "
+                "You provided ->{}<- keys ({})".format(len(config_keys), config_keys)
+                )
+        role_spec, role_dict = self.config.items()[0]
+        role_keys = len(role_dict)
+        if role_keys > 1:
+            raise ConfigError(
+                self.err_prefix +
+                "role dictionary may contain only one key. "
+                "You provided ->{}<- keys ({})".format(len(role_keys), role_keys)
+                )
+        if role_spec == "all":
+            remote = self.ctx.cluster
+        else:
+            remote = get_remote_for_role(self.ctx, role_spec)
+        script_spec, script_dict = role_dict.items()[0]
+        if script_dict is None:
+            args = []
+        if isinstance(script_dict, dict):
+            if len(script_dict) > 1 or script_dict.keys()[0] != 'args':
+                raise ConfigError(
+                    self.err_prefix +
+                    'script dicts may only contain one key (args)'
+                    )
+            args = script_dict.values()[0] or []
+            if not isinstance(args, list):
+                raise ConfigError(self.err_prefix + 'script args must be a list')
+        self.scripts.run(
+            remote,
+            script_spec,
+            args=args
+            )
+
+    def end(self):
+        pass
+
+    def teardown(self):
+        pass
+
+
+class Toolbox(DeepSea):
+    """
+    A class that contains various miscellaneous routines. For example:
+
+    tasks:
+    - deepsea.toolbox:
+          foo:
+
+    Runs the "foo" tool without any options.
+    """
+
+    err_prefix = '(toolbox subtask) '
+
+    def __init__(self, ctx, config):
+        global deepsea_ctx
+        deepsea_ctx['logger_obj'] = log.getChild('toolbox')
+        self.name = 'deepsea.toolbox'
+        super(Toolbox, self).__init__(ctx, config)
+
+    def _assert_store(self, file_or_blue, teuth_role):
+        """
+        file_or_blue can be either 'bluestore' or 'filestore'
+        teuth_role is an 'osd' role uniquely specifying one of the storage nodes.
+        Enumerates the OSDs on the node and asserts that each of these OSDs is
+        either filestore or bluestore, as appropriate.
+        """
+        remote = get_remote_for_role(self.ctx, teuth_role)
+        osds = enumerate_osds(remote, self.log)
+        assert osds, "No OSDs were captured, so please check if they are active"
+        self.log.info("Checking if OSDs ->{}<- are ->{}<-".format(osds, file_or_blue))
+        all_green = True
+        for osd in osds:
+            store = remote.sh("sudo ceph osd metadata {} | jq -r .osd_objectstore"
+                              .format(osd)).rstrip()
+            self.log.info("OSD {} is ->{}<-.".format(osd, store))
+            if store != file_or_blue:
+                self.log.warning("OSD {} has objectstore ->{}<- which is not ->{}<-".
+                                 format(osd, store, file_or_blue))
+                all_green = False
+        assert all_green, "One or more OSDs is not {}".format(file_or_blue)
+
+    def rebuild_node(self, **kwargs):
+        """
+        Expects a teuthology 'osd' role specifying one of the storage nodes.
+        Then runs 'rebuild.nodes' on the node, can be used for filestore to bluestore
+        migration if you run it after you change the drive_groups.yml file.
+        """
+        role = kwargs.keys()[0]
+        remote = get_remote_for_role(self.ctx, role)
+        osds_before_rebuild = len(enumerate_osds(remote, self.log))
+        self.log.info("Disengaging safety to prepare for rebuild")
+        self.master_remote.sh("sudo salt-run disengage.safety 2>/dev/null")
+        self.log.info("Rebuilding node {}".format(remote.hostname))
+        self.master_remote.sh("sudo salt-run rebuild.node {} 2>/dev/null".format(remote.hostname))
+        with safe_while(sleep=15, tries=10,
+                        action="ceph osd tree") as proceed:
+            while proceed():
+                self.master_remote.sh("sudo ceph osd tree || true")
+                if osds_before_rebuild == len(enumerate_osds(remote, self.log)):
+                    break
+
+    def _noout(self, add_or_rm, teuth_role):
+        """
+        add_or_rm is either 'add' or 'rm'
+        teuth_role is an 'osd' role uniquely specifying one of the storage nodes.
+        Enumerates the OSDs on the node and does 'add-noout' on each of them.
+        """
+        remote = get_remote_for_role(self.ctx, teuth_role)
+        osds = enumerate_osds(remote, self.log)
+        self.log.info("Running {}-noout for OSDs ->{}<-".format(add_or_rm, osds))
+        for osd in osds:
+            remote.sh("sudo ceph osd {}-noout osd.{}".format(add_or_rm, osd))
+
+    def add_noout(self, **kwargs):
+        """
+        Expects one key - a teuthology 'osd' role specifying one of the storage nodes.
+        Enumerates the OSDs on this node and does 'add-noout' on each of them.
+        """
+        role = kwargs.keys()[0]
+        self._noout("add", role)
+
+    def assert_bluestore(self, **kwargs):
+        """
+        Expects one key - a teuthology 'osd' role specifying one of the storage nodes.
+        Enumerates the OSDs on this node and asserts that each one is a bluestore OSD.
+        """
+        role = kwargs.keys()[0]
+        self._assert_store("bluestore", role)
+
+    def assert_filestore(self, **kwargs):
+        """
+        Expects one key - a teuthology 'osd' role specifying one of the storage nodes.
+        Enumerates the OSDs on this node and asserts that each one is a filestore OSD.
+        """
+        role = kwargs.keys()[0]
+        self._assert_store("filestore", role)
+
+    def rm_noout(self, **kwargs):
+        """
+        Expects one key - a teuthology 'osd' role specifying one of the storage nodes.
+        Enumerates the OSDs on this node and does 'rm-noout' on each of them.
+        """
+        role = kwargs.keys()[0]
+        self._noout("rm", role)
+
+    def wait_for_health_ok(self, **kwargs):
+        """
+        Wait for HEALTH_OK - stop after HEALTH_OK is reached or timeout expires.
+        Timeout defaults to 120 minutes, but can be specified by providing a
+        configuration option. For example:
+
+        tasks:
+        - deepsea.toolbox
+            wait_for_health_ok:
+              timeout_minutes: 90
+        """
+        if kwargs:
+            self.log.info("wait_for_health_ok: Considering config dict ->{}<-".format(kwargs))
+            config_keys = len(kwargs)
+            if config_keys > 1:
+                raise ConfigError(
+                    self.err_prefix +
+                    "wait_for_health_ok config dictionary may contain only one key. "
+                    "You provided ->{}<- keys ({})".format(len(config_keys), config_keys)
+                    )
+            timeout_spec, timeout_minutes = kwargs.items()[0]
+        else:
+            timeout_minutes = 120
+        self.log.info("Waiting up to ->{}<- minutes for HEALTH_OK".format(timeout_minutes))
+        remote = get_remote_for_role(self.ctx, "client.salt_master")
+        cluster_status = ""
+        for minute in range(1, timeout_minutes+1):
+            remote.sh("sudo ceph status")
+            cluster_status = remote.sh(
+                "sudo ceph health detail --format json | jq -r '.status'"
+                ).rstrip()
+            if cluster_status == "HEALTH_OK":
+                break
+            self.log.info("Waiting for one minute for cluster to reach HEALTH_OK"
+                          "({} minutes left to timeout)"
+                          .format(timeout_minutes + 1 - minute))
+            time.sleep(60)
+        if cluster_status == "HEALTH_OK":
+            self.log.info(anchored("Cluster is healthy"))
+        else:
+            raise RuntimeError("Cluster still not healthy (current status ->{}<-) "
+                               "after reaching timeout"
+                               .format(cluster_status))
+
+    def begin(self):
+        if not self.config:
+            self.log.warning("empty config: nothing to do")
+            return None
+        self.log.info("Considering config dict ->{}<-".format(self.config))
+        config_keys = len(self.config)
+        if config_keys > 1:
+            raise ConfigError(
+                self.err_prefix +
+                "config dictionary may contain only one key. "
+                "You provided ->{}<- keys ({})".format(len(config_keys), config_keys)
+                )
+        tool_spec, kwargs = self.config.items()[0]
+        kwargs = {} if not kwargs else kwargs
+        method = getattr(self, tool_spec, None)
+        if method:
+            self.log.info("About to run tool ->{}<- from toolbox with config ->{}<-"
+                          .format(tool_spec, kwargs))
+            method(**kwargs)
+        else:
+            raise ConfigError(self.err_prefix + "No such tool ->{}<- in toolbox"
+                              .format(tool_spec))
+
+    def end(self):
+        pass
+
+    def teardown(self):
+        pass
+
+
+class Validation(DeepSea):
+    """
+    A container for "validation tests", which are understood to mean tests that
+    validate the Ceph cluster (just) deployed by DeepSea.
+
+    The tests implemented in this class should be small and not take long to
+    finish. Anything more involved should be implemented in a separate task
+    (see ses_qa.py for an example of such a task).
+
+    The config YAML is a dictionary in which the keys are the names of tests
+    (methods to be run) and the values are the config dictionaries of each test
+    to be run.
+
+    Validation tests with lines like this
+
+        self._apply_config_default("foo_test", None)
+
+    are triggered by default, while others have to be explicitly mentioned in
+    the YAML.
+    """
+
+    err_prefix = '(validation subtask) '
+
+    def __init__(self, ctx, config):
+        global deepsea_ctx
+        deepsea_ctx['logger_obj'] = log.getChild('validation')
+        self.name = 'deepsea.validation'
+        super(Validation, self).__init__(ctx, config)
+        self._apply_config_default("ceph_version_sanity", None)
+        self._apply_config_default("rados_striper", None)
+        self._apply_config_default("systemd_units_active", None)
+
+    def _apply_config_default(self, validation_test, default_config):
+        """
+        Use to activate tests that should always be run.
+        """
+        self.config[validation_test] = self.config.get(validation_test, default_config)
+
+    def ceph_version_sanity(self, **kwargs):
+        self.scripts.run(
+            self.master_remote,
+            'ceph_version_sanity.sh',
+            )
+
+    def ganesha_smoke_test(self, **kwargs):
+        client_host = self.role_type_present("ganeshaclient")
+        rgw = self.role_type_present("rgw")
+        mds = self.role_type_present("mds")
+        args = []
+        if mds:
+            args += ['--mds']
+        if rgw:
+            args += ['--rgw']
+        if not args:
+            raise ConfigError(self.err_prefix +
+                              "ganesha_smoke_test needs an rgw or mds role, but neither was given")
+        if client_host:
+            self.master_remote.sh("sudo salt-run ganesha.report 2>/dev/null || true")
+            remote = self.remotes[client_host]
+            self.scripts.run(
+                remote,
+                'ganesha_smoke_test.sh',
+                args=args,
+                )
+            self.master_remote.sh("sudo salt-run ganesha.report 2>/dev/null || true")
+        else:
+            raise ConfigError(self.err_prefix +
+                              "ganesha_smoke_test needs a client role, but none was given")
+
+    def grafana_service_check(self, **kwargs):
+        grafana = self.role_type_present("grafana")
+        if grafana:
+            remote = self.remotes[grafana]
+            remote.sh('sudo systemctl status grafana-server.service')
+        else:
+            raise ConfigError(self.err_prefix +
+                              "grafana_service_check needs a grafana role, but none was given")
+
+    def iscsi_smoke_test(self, **kwargs):
+        igw_host = self.role_type_present("igw")
+        if igw_host:
+            remote = self.remotes[igw_host]
+            self.scripts.run(
+                remote,
+                'iscsi_smoke_test.sh',
+                )
+
+    def rados_striper(self, **kwargs):
+        """
+        Verify that rados does not has the --striper option
+        """
+        cmd_str = 'sudo rados --striper 2>&1 || true'
+        output = self.master_remote.sh(cmd_str)
+        os_type, os_version = self.os_type_and_version()
+        self.log.info(
+            "Checking for expected output on OS ->{}<-"
+            .format(os_type + " " + str(os_version))
+            )
+        if os_type == 'sle' and os_version >= 15:
+            assert 'unrecognized command --striper' in output, \
+                "ceph is compiled without libradosstriper"
+        else:
+            assert '--striper' not in output, \
+                "ceph is compiled with libradosstriper"
+        self.log.info("OK")
+
+    def rados_write_test(self, **kwargs):
+        self.scripts.run(
+            self.master_remote,
+            'rados_write_test.sh',
+            )
+
+    def systemd_units_active(self, **kwargs):
+        """
+        For all cluster nodes, determine which systemd services
+        should be running and assert that the respective units
+        are in "active" state.
+        """
+        # map role types to systemd units
+        unit_map = {
+            "mds": "ceph-mds@",
+            "mgr": "ceph-mgr@",
+            "mon": "ceph-mon@",
+            "osd": "ceph-osd@",
+            "rgw": "ceph-radosgw@",
+            "ganesha": "nfs-ganesha"
+            }
+        # for each machine in the cluster
+        idx = 0
+        for rtl in self.role_types:
+            node = self.nodes[idx]
+            script = ("# validate systemd units on {}\n"
+                      "set -ex\n").format(node)
+            self.log.info("Machine {} ({}) has role types {}"
+                          .format(idx, node, ','.join(rtl)))
+            remote = self.remotes[node]
+            run_script = False
+            for role_type in rtl:
+                if role_type in unit_map:
+                    script += ("systemctl --state=active --type=service list-units "
+                               "| grep -e '^{}'\n".format(unit_map[role_type]))
+                    run_script = True
+                else:
+                    self.log.debug("Ignoring role_type {} which has no associated "
+                                   "systemd unit".format(role_type))
+            if run_script:
+                remote_run_script_as_root(
+                    remote,
+                    "systemd_validation.sh",
+                    script
+                    )
+            idx += 1
+
+    def begin(self):
+        self.log.debug("Processing tests: ->{}<-".format(self.config.keys()))
+        for method_spec, kwargs in self.config.items():
+            kwargs = {} if not kwargs else kwargs
+            if not isinstance(kwargs, dict):
+                raise ConfigError(self.err_prefix + "Method config must be a dict")
+            self.log.info(anchored(
+                "Running validation test {} with config ->{}<-"
+                .format(method_spec, kwargs)
+                ))
+            method = getattr(self, method_spec, None)
+            if method:
+                method(**kwargs)
+            else:
+                raise ConfigError(self.err_prefix + "No such method ->{}<-"
+                                  .format(method_spec))
+
+    def end(self):
+        pass
+
+    def teardown(self):
+        pass
+
+
+task = DeepSea
+ceph_conf = CephConf
+create_pools = CreatePools
+dummy = Dummy
+health_ok = HealthOK
+orch = Orch
+policy = Policy
+reboot = Reboot
+repository = Repository
+script = Script
+toolbox = Toolbox
+validation = Validation
diff --git a/qa/tasks/salt.py b/qa/tasks/salt.py

new file mode 100644 (file)

index 0000000..949c953
--- /dev/null
+++ b/qa/tasks/salt.py
@@ -0,0 +1,300 @@
+'''
+Task that deploys a Salt cluster on all the nodes
+
+Linter:
+    flake8 --max-line-length=100
+'''
+import logging
+
+from salt_manager import SaltManager
+from util import remote_exec
+from teuthology.exceptions import ConfigError
+from teuthology.misc import (
+    delete_file,
+    move_file,
+    sh,
+    sudo_write_file,
+    write_file,
+    )
+from teuthology.orchestra import run
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+
+
+class Salt(Task):
+    """
+    Deploy a Salt cluster on all remotes (test nodes).
+
+    This task assumes all relevant Salt packages (salt, salt-master,
+    salt-minion, salt-api, python-salt, etc. - whatever they may be called for
+    the OS in question) are already installed. This should be done using the
+    install task.
+
+    One, and only one, of the machines must have a role corresponding to the
+    value of the variable salt.sm.master_role (see salt_manager.py). This node
+    is referred to as the "Salt Master", or the "master node".
+
+    The task starts the Salt Master daemon on the master node, and Salt Minion
+    daemons on all the nodes (including the master node), and ensures that the
+    minions are properly linked to the master. Finally, it tries to ping all
+    the minions from the Salt Master.
+
+    :param ctx: the argparse.Namespace object
+    :param config: the config dict
+    """
+
+    def __init__(self, ctx, config):
+        super(Salt, self).__init__(ctx, config)
+        log.debug("beginning of constructor method")
+        log.debug("munged config is {}".format(self.config))
+        self.remotes = self.cluster.remotes
+        self.sm = SaltManager(self.ctx)
+        self.master_remote = self.sm.master_remote
+        log.debug("end of constructor method")
+
+    def _disable_autodiscovery(self):
+        """
+        It's supposed to be off by default, but just in case.
+        """
+        self.sm.master_remote.run(args=[
+            'sudo', 'sh', '-c',
+            'echo discovery: false >> /etc/salt/master'
+        ])
+        for rem in self.remotes.keys():
+            rem.run(args=[
+                'sudo', 'sh', '-c',
+                'echo discovery: false >> /etc/salt/minion'
+            ])
+
+    def _generate_minion_keys(self):
+        '''
+        Generate minion key on salt master to be used to preseed this cluster's
+        minions.
+        '''
+        for rem in self.remotes.keys():
+            minion_id = rem.hostname
+            log.info('Ensuring that minion ID {} has a keypair on the master'
+                     .format(minion_id))
+            # mode 777 is necessary to be able to generate keys reliably
+            # we hit this before:
+            # https://github.com/saltstack/salt/issues/31565
+            self.sm.master_remote.run(args=[
+                'sudo',
+                'sh',
+                '-c',
+                'test -d salt || mkdir -m 777 salt',
+            ])
+            self.sm.master_remote.run(args=[
+                'sudo',
+                'sh',
+                '-c',
+                'test -d salt/minion-keys || mkdir -m 777 salt/minion-keys',
+            ])
+            self.sm.master_remote.run(args=[
+                'sudo',
+                'sh',
+                '-c',
+                ('if [ ! -f salt/minion-keys/{mid}.pem ]; then '
+                 'salt-key --gen-keys={mid} '
+                 '--gen-keys-dir=salt/minion-keys/; '
+                 ' fi').format(mid=minion_id),
+            ])
+
+    def _preseed_minions(self):
+        '''
+        Preseed minions with generated and accepted keys; set minion id
+        to the remote's hostname.
+        '''
+        for rem in self.remotes.keys():
+            minion_id = rem.hostname
+            src = 'salt/minion-keys/{}.pub'.format(minion_id)
+            dest = '/etc/salt/pki/master/minions/{}'.format(minion_id)
+            self.sm.master_remote.run(args=[
+                'sudo',
+                'sh',
+                '-c',
+                ('if [ ! -f {d} ]; then '
+                 'cp {s} {d} ; '
+                 'chown root {d} ; '
+                 'fi').format(s=src, d=dest)
+            ])
+            self.sm.master_remote.run(args=[
+                'sudo',
+                'chown',
+                'ubuntu',
+                'salt/minion-keys/{}.pem'.format(minion_id),
+                'salt/minion-keys/{}.pub'.format(minion_id),
+            ])
+            #
+            # copy the keys via the teuthology VM. The worker VMs can't ssh to
+            # each other. scp -3 does a 3-point copy through the teuthology VM.
+            sh('scp -3 {}:salt/minion-keys/{}.* {}:'.format(
+                self.sm.master_remote.name,
+                minion_id, rem.name))
+            sudo_write_file(rem, '/etc/salt/minion_id', minion_id)
+            #
+            # set proper owner and permissions on keys
+            rem.run(
+                args=[
+                    'sudo',
+                    'chown',
+                    'root',
+                    '{}.pem'.format(minion_id),
+                    '{}.pub'.format(minion_id),
+                    run.Raw(';'),
+                    'sudo',
+                    'chmod',
+                    '600',
+                    '{}.pem'.format(minion_id),
+                    run.Raw(';'),
+                    'sudo',
+                    'chmod',
+                    '644',
+                    '{}.pub'.format(minion_id),
+                ],
+            )
+            #
+            # move keys to correct location
+            move_file(rem, '{}.pem'.format(minion_id),
+                      '/etc/salt/pki/minion/minion.pem', sudo=True,
+                      preserve_perms=False)
+            move_file(rem, '{}.pub'.format(minion_id),
+                      '/etc/salt/pki/minion/minion.pub', sudo=True,
+                      preserve_perms=False)
+
+    def _set_minion_master(self):
+        """Points all minions to the master"""
+        master_id = self.sm.master_remote.hostname
+        for rem in self.remotes.keys():
+            # remove old master public key if present. Minion will refuse to
+            # start if master name changed but old key is present
+            delete_file(rem, '/etc/salt/pki/minion/minion_master.pub',
+                        sudo=True, check=False)
+
+            # set master id
+            sed_cmd = ('echo master: {} > '
+                       '/etc/salt/minion.d/master.conf').format(master_id)
+            rem.run(args=[
+                'sudo',
+                'sh',
+                '-c',
+                sed_cmd,
+            ])
+
+    def _set_debug_log_level(self):
+        """Sets log_level: debug for all salt daemons"""
+        for rem in self.remotes.keys():
+            rem.run(args=[
+                'sudo',
+                'sed', '--in-place', '--regexp-extended',
+                '-e', 's/^\s*#\s*log_level:.*$/log_level: debug/g',  # noqa: W605
+                '-e', '/^\s*#.*$/d', '-e', '/^\s*$/d',               # noqa: W605
+                '/etc/salt/master',
+                '/etc/salt/minion',
+            ])
+
+    def setup(self):
+        super(Salt, self).setup()
+        log.debug("beginning of setup method")
+        self._generate_minion_keys()
+        self._preseed_minions()
+        self._set_minion_master()
+        self._disable_autodiscovery()
+        self._set_debug_log_level()
+        self.sm.enable_master()
+        self.sm.start_master()
+        self.sm.enable_minions()
+        self.sm.start_minions()
+        log.debug("end of setup method")
+
+    def begin(self):
+        super(Salt, self).begin()
+        log.debug("beginning of begin method")
+        self.sm.check_salt_daemons()
+        self.sm.cat_salt_master_conf()
+        self.sm.cat_salt_minion_confs()
+        self.sm.ping_minions()
+        log.debug("end of begin method")
+
+    def end(self):
+        super(Salt, self).end()
+        log.debug("beginning of end method")
+        self.sm.gather_logs('salt')
+        self.sm.gather_logs('zypp')
+        self.sm.gather_logs('rbd-target-api')
+        self.sm.gather_logfile('zypper.log')
+        self.sm.gather_logfile('journalctl.log')
+        log.debug("end of end method")
+
+    def teardown(self):
+        super(Salt, self).teardown()
+        # log.debug("beginning of teardown method")
+        pass
+        # log.debug("end of teardown method")
+
+
+class Command(Salt):
+    """
+    Subtask for running an arbitrary salt command.
+
+    This subtask understands the following config keys:
+
+        command  the command to run (mandatory)
+                 For example:
+
+                     command: 'state.apply ceph.updates.salt'
+
+        target   target selection specifier (default: *)
+                 For details, see "man salt"
+
+    Note: "command: saltutil.sync_all" gets special handling.
+    """
+
+    err_prefix = "(command subtask) "
+
+    def __init__(self, ctx, config):
+        super(Command, self).__init__(ctx, config)
+        self.command = str(self.config.get("command", ''))
+        # targets all machines if omitted
+        self.target = str(self.config.get("target", '*'))
+        if not self.command:
+            raise ConfigError(
+                self.err_prefix + "nothing to do. Specify a non-empty value for 'command'")
+
+    def _run_command(self):
+        if '*' in self.target:
+            quoted_target = "\'{}\'".format(self.target)
+        else:
+            quoted_target = self.target
+        cmd_str = (
+            "set -ex\n"
+            "timeout 60m salt {} --no-color {} 2>/dev/null\n"
+            ).format(quoted_target, self.command)
+        write_file(self.master_remote, 'run_salt_command.sh', cmd_str)
+        remote_exec(
+            self.master_remote,
+            'sudo bash run_salt_command.sh',
+            log,
+            "salt command ->{}<-".format(self.command),
+            )
+
+    def setup(self):
+        pass
+
+    def begin(self):
+        self.log.info("running salt command ->{}<-".format(self.command))
+        if self.command == 'saltutil.sync_all':
+            self.sm.sync_pillar_data()
+        else:
+            self._run_command()
+
+    def end(self):
+        pass
+
+    def teardown(self):
+        pass
+
+
+task = Salt
+command = Command
diff --git a/qa/tasks/salt_manager.py b/qa/tasks/salt_manager.py

new file mode 100644 (file)

index 0000000..f769f0f
--- /dev/null
+++ b/qa/tasks/salt_manager.py
@@ -0,0 +1,275 @@
+'''
+Salt "manager" module
+
+Usage: First, ensure that there is a role whose name corresponds
+to the value of the master_role variable, below. Second, in your
+task, instantiate a SaltManager object:
+
+    from salt_manager import SaltManager
+
+    sm = SaltManager(ctx)
+
+Third, enjoy the SaltManager goodness - e.g.:
+
+    sm.ping_minions()
+
+Linter:
+    flake8 --max-line-length=100
+'''
+import logging
+import re
+
+from teuthology.contextutil import safe_while
+from teuthology.exceptions import CommandFailedError, MaxWhileTries
+from teuthology.orchestra import run
+from util import get_remote_for_role
+
+log = logging.getLogger(__name__)
+master_role = 'client.salt_master'
+
+
+class InternalError(Exception):
+    pass
+
+
+def systemctl_remote(remote, subcommand, service_name):
+    """
+    Caveat: only works for units ending in ".service"
+    """
+    def systemctl_cmd(subcommand, lines=0):
+        return ('sudo systemctl {} --full --lines={} {}.service'
+                .format(subcommand, lines, service_name))
+    try:
+        remote.run(args=systemctl_cmd(subcommand))
+    except CommandFailedError:
+        remote.run(args=systemctl_cmd('status', 100))
+        raise
+
+
+class SaltManager(object):
+
+    def __init__(self, ctx):
+        self.ctx = ctx
+        self.master_remote = get_remote_for_role(self.ctx, master_role)
+
+    def __cat_file_cluster(self, filename=None):
+        """
+        cat a file everywhere on the whole cluster
+        """
+        self.ctx.cluster.run(args=[
+            'sudo', 'cat', filename])
+
+    def __cat_file_remote(self, remote, filename=None):
+        """
+        cat a file on a particular remote
+        """
+        try:
+            remote.run(args=[
+                'sudo', 'cat', filename])
+        except CommandFailedError:
+            log.warning((
+                "salt_manager: {} not found on {}"
+                ).format(filename, remote.name))
+
+    def __ping(self, ping_cmd, expected):
+        try:
+            def instances_of_str(search_str, output):
+                return len(re.findall(search_str, output))
+            with safe_while(sleep=15, tries=50,
+                            action=ping_cmd) as proceed:
+                while proceed():
+                    output = self.master_remote.sh(ping_cmd)
+                    no_master = instances_of_str('The salt master could not be contacted', output)
+                    responded = instances_of_str('  True', output)
+                    log.info("{} of {} minions responded".format(responded, expected))
+                    if (expected == responded):
+                        return None
+        except MaxWhileTries:
+            if no_master:
+                cmd = 'sudo systemctl status --full --lines=100 salt-master.service'
+                self.master_remote.run(args=cmd)
+
+    def all_minions_cmd_run(self, cmd, abort_on_fail=True, show_stderr=False):
+        """
+        Use cmd.run to run a command on all nodes.
+        """
+        if not abort_on_fail:
+            cmd += ' || true'
+        redirect = "" if show_stderr else " 2>/dev/null"
+        self.master_remote.run(args=(
+            'sudo salt \\* cmd.run \'{}\'{}'.format(cmd, redirect)
+            ))
+
+    def all_minions_zypper_lu(self):
+        """Run "zypper lu" on all nodes"""
+        cmd = "zypper --non-interactive --no-gpg-checks list-updates"
+        self.all_minions_cmd_run(cmd, abort_on_fail=False)
+
+    def all_minions_zypper_ps(self):
+        """Run "zypper ps -s" on all nodes"""
+        cmd = "zypper ps -s || true"
+        self.all_minions_cmd_run(cmd, abort_on_fail=False)
+
+    def all_minions_zypper_ps_requires_reboot(self):
+        number_of_minions = len(self.ctx.cluster.remotes)
+        salt_cmd = "sudo salt \\* cmd.run \'zypper ps -s || true\' 2>/dev/null"
+        number_with_no_processes = len(
+            re.findall('No processes using deleted files found',
+                       self.master_remote.sh(salt_cmd))
+            )
+        return number_with_no_processes != number_of_minions
+
+    def all_minions_zypper_ref(self):
+        """Run "zypper ref" on all nodes"""
+        cmd = "zypper --non-interactive --gpg-auto-import-keys refresh"
+        self.all_minions_cmd_run(cmd, abort_on_fail=False)
+
+    def all_minions_zypper_status(self):
+        """
+        Implement someone's idea of a general 'zypper status'
+        """
+        self.all_minions_zypper_ref()
+        self.all_minions_zypper_lu()
+        self.all_minions_zypper_ps()
+
+    def cat_salt_master_conf(self):
+        self.__cat_file_remote(self.master_remote, filename="/etc/salt/master")
+
+    def cat_salt_minion_confs(self):
+        self.__cat_file_cluster(filename="/etc/salt/minion")
+
+    def check_salt_daemons(self):
+        self.master_remote.run(args=['sudo', 'salt-key', '-L'])
+        systemctl_remote(self.master_remote, 'status', 'salt-master')
+        for _remote in self.ctx.cluster.remotes.keys():
+            systemctl_remote(_remote, 'status', 'salt-minion')
+            _remote.run(args='sudo cat /etc/salt/minion_id')
+            _remote.run(args='sudo cat /etc/salt/minion.d/master.conf')
+
+    def enable_master(self):
+        """Enables salt-master.service on the Salt Master node"""
+        systemctl_remote(self.master_remote, "enable", "salt-master")
+
+    def enable_minions(self):
+        """Enables salt-minion.service on all cluster nodes"""
+        systemctl_remote(self.ctx.cluster, "enable", "salt-minion")
+
+    def gather_logfile(self, logfile):
+        for _remote in self.ctx.cluster.remotes.keys():
+            try:
+                _remote.run(args=[
+                    'sudo', 'test', '-f', '/var/log/{}'.format(logfile),
+                    ])
+            except CommandFailedError:
+                continue
+            log.info((
+                "gathering logfile /var/log/{} from remote {}"
+                ).format(logfile, _remote.hostname))
+            _remote.run(args=[
+                'sudo', 'cp', '-a', '/var/log/{}'.format(logfile),
+                '/home/ubuntu/cephtest/archive/',
+                run.Raw(';'),
+                'sudo', 'chown', 'ubuntu',
+                '/home/ubuntu/cephtest/archive/{}'.format(logfile)
+                ])
+
+    def gather_logs(self, logdir, archive=None):
+        """
+        Grabs contents of logdir and saves them in /home/ubuntu/cephtest/archive
+        teuthology will harvest them before destroying the remote (target machine).
+
+        logdir can be specified as an absolute path or a relative path. Relative
+        paths are assumed to be under /var/log.
+        """
+        if logdir[:1] == '/':
+            if not archive:
+                raise InternalError((
+                    'Unable to harvest logs from absolute directory ->{}<- '
+                    'because no archive option was passed'
+                    ).format(logdir)
+                    )
+        else:
+            if not archive:
+                archive = logdir
+            logdir = '/var/log/{}'.format(logdir)
+        for _remote in self.ctx.cluster.remotes.keys():
+            try:
+                _remote.run(args=[
+                    'sudo', 'test', '-d', '{}/'.format(logdir),
+                    ])
+            except CommandFailedError:
+                continue
+            log.info("gathering {} logs from remote {}"
+                     .format(logdir, _remote.hostname))
+            _remote.run(args=[
+                'sudo', 'cp', '-a', '{}/'.format(logdir),
+                '/home/ubuntu/cephtest/archive/',
+                run.Raw(';'),
+                'sudo', 'chown', '-R', 'ubuntu',
+                '/home/ubuntu/cephtest/archive/{}/'.format(archive),
+                run.Raw(';'),
+                'find', '/home/ubuntu/cephtest/archive/{}/'.format(archive),
+                '-type', 'f', '-print0',
+                run.Raw('|'),
+                'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--'
+                ])
+
+    def master_role(self):
+        return master_role
+
+    def master_rpm_q(self, pkg_name):
+        """Run rpm -q on the Salt Master node"""
+        # FIXME: should possibly take a list of pkg_names
+        installed = True
+        try:
+            self.master_remote.run(args=[
+                'rpm', '-q', pkg_name
+            ])
+        except CommandFailedError:
+            installed = False
+        return installed
+
+    def ping_minion(self, mid):
+        """Pings a minion; raises exception if it doesn't respond"""
+        self.__ping(['sudo', 'salt', mid, 'test.ping'], 1)
+
+    def ping_minions(self):
+        """
+        Pings minions; raises exception if they don't respond
+        """
+        number_of_minions = len(self.ctx.cluster.remotes)
+        self.__ping(
+            "sudo sh -c \'salt \\* test.ping\' 2>/dev/null || true",
+            number_of_minions,
+            )
+        return number_of_minions
+
+    def restart_master(self):
+        """Starts salt-master.service on the Salt Master node"""
+        systemctl_remote(self.master_remote, "restart", "salt-master")
+
+    def restart_minions(self):
+        """Restarts salt-minion.service on all cluster nodes"""
+        systemctl_remote(self.ctx.cluster, "restart", "salt-minion")
+
+    def start_master(self):
+        """Starts salt-master.service on the Salt Master node"""
+        systemctl_remote(self.master_remote, "start", "salt-master")
+
+    def start_minions(self):
+        """Starts salt-minion.service on all cluster nodes"""
+        systemctl_remote(self.ctx.cluster, "start", "salt-minion")
+
+    def sync_pillar_data(self, quiet=True):
+        cmd = "sudo salt \\* saltutil.sync_all"
+        if quiet:
+            cmd += " 2>/dev/null"
+        cmd += " || true"
+        with safe_while(sleep=15, tries=10,
+                        action=cmd) as proceed:
+            while proceed():
+                no_response = len(re.findall('Minion did not return', self.master_remote.sh(cmd)))
+                if no_response:
+                    log.info("Not all minions responded. Retrying.")
+                else:
+                    return None
diff --git a/qa/tasks/scripts.py b/qa/tasks/scripts.py

new file mode 100644 (file)

index 0000000..7ebc032
--- /dev/null
+++ b/qa/tasks/scripts.py
@@ -0,0 +1,40 @@
+import os
+
+from util import copy_directory_recursively
+
+
+class Scripts:
+
+    def __init__(self, ctx, logger):
+        self.log = logger
+        copied = ctx.get('scripts_copied', False)
+        remotes = ctx['remotes']
+        if copied:
+            # self.log.info('(scripts ctor) scripts already copied to remotes')
+            pass
+        else:
+            local_path = os.path.dirname(os.path.realpath(__file__)) + '/scripts/'
+            for remote_name, remote_obj in remotes.items():
+                copy_directory_recursively(local_path, remote_obj, "scripts")
+            ctx['scripts_copied'] = True
+
+    def run(self, remote, script_name, args=[], as_root=True):
+        class_name = type(remote).__name__
+        self.log.debug(
+            '(scripts) run method was passed a remote object of class {}'
+            .format(class_name)
+            )
+        if class_name == 'Cluster':
+            remote_spec = 'the whole cluster'
+        else:
+            remote_spec = 'remote {}'.format(remote.hostname)
+        self.log.info('(scripts) running script {} with args {} on {}'
+                      .format(script_name, args, remote_spec)
+                      )
+        path = 'scripts/' + script_name
+        cmd = 'bash {}'.format(path)
+        if as_root:
+            cmd = "sudo " + cmd
+        if args:
+            cmd += ' ' + ' '.join(map(str, args))
+        return remote.sh(cmd, label=script_name)
diff --git a/qa/tasks/scripts/ceph_cluster_status.sh b/qa/tasks/scripts/ceph_cluster_status.sh

new file mode 100644 (file)

index 0000000..4491c60
--- /dev/null
+++ b/qa/tasks/scripts/ceph_cluster_status.sh
@@ -0,0 +1,13 @@
+# ceph_cluster_status.sh
+#
+# Display ceph cluster status
+#
+# args: None
+#
+set -ex
+ceph pg stat -f json-pretty
+ceph health detail -f json-pretty
+ceph osd tree
+ceph osd pool ls detail -f json-pretty
+ceph -s
+echo "OK" >/dev/null
diff --git a/qa/tasks/scripts/ceph_version_sanity.sh b/qa/tasks/scripts/ceph_version_sanity.sh

new file mode 100644 (file)

index 0000000..d565ad6
--- /dev/null
+++ b/qa/tasks/scripts/ceph_version_sanity.sh
@@ -0,0 +1,21 @@
+# ceph_version_sanity.sh
+#
+# test that ceph RPM version matches "ceph --version"
+# for a loose definition of "matches"
+#
+# args: None
+
+set -ex
+rpm -q ceph
+RPM_NAME=$(rpm -q ceph)
+RPM_CEPH_VERSION=$(perl -e '"'"$RPM_NAME"'" =~ m/ceph-(\d+\.\d+\.\d+)/; print "$1\n";')
+echo "According to RPM, the ceph upstream version is ->$RPM_CEPH_VERSION<-" >/dev/null
+test -n "$RPM_CEPH_VERSION"
+ceph --version
+BUFFER=$(ceph --version)
+CEPH_CEPH_VERSION=$(perl -e '"'"$BUFFER"'" =~ m/ceph version (\d+\.\d+\.\d+)/; print "$1\n";')
+echo "According to \"ceph --version\", the ceph upstream version is ->$CEPH_CEPH_VERSION<-" \
+    >/dev/null
+test -n "$RPM_CEPH_VERSION"
+test "$RPM_CEPH_VERSION" = "$CEPH_CEPH_VERSION"
+echo "OK" >/dev/null
diff --git a/qa/tasks/scripts/create_all_pools_at_once.sh b/qa/tasks/scripts/create_all_pools_at_once.sh

new file mode 100644 (file)

index 0000000..09749f3
--- /dev/null
+++ b/qa/tasks/scripts/create_all_pools_at_once.sh
@@ -0,0 +1,89 @@
+# create_all_pools_at_once.sh
+#
+# Script for pre-creating pools prior to Stage 4
+#
+# Pools are created with a number of PGs calculated to avoid health warnings
+# that can arise during/after Stage 4 due to "too few" or "too many" PGs per
+# OSD when DeepSea is allowed to create the pools with hard-coded number of
+# PGs.
+#
+# see also https://github.com/SUSE/DeepSea/issues/536
+#
+# args: pools to be created
+#
+# example invocation: ./create_all_pools_at_once.sh foo bar baz
+
+echo "Creating pools: $@"
+
+set -ex
+
+function json_total_osds {
+    # total number of OSDs in the cluster
+    ceph osd ls --format json | jq '. | length'
+}
+
+function pgs_per_pool {
+    local TOTALPOOLS=$1
+    test -n "$TOTALPOOLS"
+    local TOTALOSDS=$(json_total_osds)
+    test -n "$TOTALOSDS"
+    # given the total number of pools and OSDs,
+    # assume triple replication and equal number of PGs per pool
+    # and aim for 100 PGs per OSD
+    let "TOTALPGS = $TOTALOSDS * 100"
+    let "PGSPEROSD = $TOTALPGS / $TOTALPOOLS / 3"
+    echo $PGSPEROSD
+}
+
+function create_all_pools_at_once {
+    # sample usage: create_all_pools_at_once foo bar
+    local TOTALPOOLS="${#@}"
+    local PGSPERPOOL=$(pgs_per_pool $TOTALPOOLS)
+    for POOLNAME in "$@"
+    do
+        ceph osd pool create $POOLNAME $PGSPERPOOL $PGSPERPOOL replicated
+    done
+    ceph osd pool ls detail
+}
+
+CEPHFS=""
+OPENSTACK=""
+RBD=""
+OTHER=""
+for arg in "$@" ; do
+    arg="${arg,,}"
+    case "$arg" in
+        cephfs) CEPHFS="$arg" ;;
+        openstack) OPENSTACK="$arg" ;;
+        rbd) RBD="$arg" ;;
+        *) OTHER+=" $arg" ;;
+    esac
+done
+
+POOLS=""
+if [ $CEPHFS ] ; then
+    POOLS+=" cephfs_data cephfs_metadata"
+fi
+if [ "$OPENSTACK" ] ; then
+    POOLS+=" smoketest-cloud-backups smoketest-cloud-volumes smoketest-cloud-images"
+    POOLS+=" smoketest-cloud-vms cloud-backups cloud-volumes cloud-images cloud-vms"
+fi
+if [ "$RBD" ] ; then
+    POOLS+=" rbd"
+fi
+if [ "$OTHER" ] ; then
+    POOLS+="$OTHER"
+    APPLICATION_ENABLE="$OTHER"
+fi
+if [ -z "$POOLS" ] ; then
+    echo "create_all_pools_at_once: bad arguments"
+    exit 1
+fi
+echo "About to create pools ->$POOLS<-"
+create_all_pools_at_once $POOLS
+if [ "$APPLICATION_ENABLE" ] ; then
+    for pool in "$APPLICATION_ENABLE" ; do
+        ceph osd pool application enable $pool deepsea_qa
+    done
+fi
+echo "OK" >/dev/null
diff --git a/qa/tasks/scripts/lvm_status.sh b/qa/tasks/scripts/lvm_status.sh

new file mode 100644 (file)

index 0000000..bb42111
--- /dev/null
+++ b/qa/tasks/scripts/lvm_status.sh
@@ -0,0 +1,10 @@
+# lvm_status.sh
+#
+# args: None
+
+set -ex
+
+pvs --all
+vgs --all
+lvs --all
+lsblk --ascii
diff --git a/qa/tasks/scripts/rados_write_test.sh b/qa/tasks/scripts/rados_write_test.sh

new file mode 100644 (file)

index 0000000..ba12c72
--- /dev/null
+++ b/qa/tasks/scripts/rados_write_test.sh
@@ -0,0 +1,19 @@
+# rados_write_test.sh
+#
+# Write a RADOS object and read it back
+#
+# NOTE: function assumes the pool "write_test" already exists. Pool can be
+# created by calling e.g. "create_all_pools_at_once write_test" immediately
+# before calling this function.
+#
+# args: None
+
+set -ex
+
+ceph osd pool application enable write_test deepsea_qa
+echo "dummy_content" > verify.txt
+rados -p write_test put test_object verify.txt
+rados -p write_test get test_object verify_returned.txt
+test "x$(cat verify.txt)" = "x$(cat verify_returned.txt)"
+
+echo "OK" >/dev/null
diff --git a/qa/tasks/scripts/rgw_init.sh b/qa/tasks/scripts/rgw_init.sh

new file mode 100644 (file)

index 0000000..d7408b3
--- /dev/null
+++ b/qa/tasks/scripts/rgw_init.sh
@@ -0,0 +1,9 @@
+# rgw_init.sh
+# Set up RGW
+set -ex
+USERSYML=/srv/salt/ceph/rgw/users/users.d/rgw.yml
+cat <<EOF > $USERSYML
+- { uid: "demo", name: "Demo", email: "demo@demo.nil" }
+- { uid: "demo1", name: "Demo1", email: "demo1@demo.nil" }
+EOF
+cat $USERSYML
diff --git a/qa/tasks/scripts/rgw_init_ssl.sh b/qa/tasks/scripts/rgw_init_ssl.sh

new file mode 100644 (file)

index 0000000..d6c1ec2
--- /dev/null
+++ b/qa/tasks/scripts/rgw_init_ssl.sh
@@ -0,0 +1,22 @@
+# rgw_init_ssl.sh
+# Set up RGW-over-SSL
+set -ex
+CERTDIR=/srv/salt/ceph/rgw/cert
+mkdir -p $CERTDIR
+pushd $CERTDIR
+openssl req -x509 \
+        -nodes \
+        -days 1095 \
+        -newkey rsa:4096 \
+        -keyout rgw.key \
+        -out rgw.crt \
+        -subj "/C=DE"
+cat rgw.key > rgw.pem && cat rgw.crt >> rgw.pem
+popd
+GLOBALYML=/srv/pillar/ceph/stack/global.yml
+cat <<EOF >> $GLOBALYML
+rgw_init: default-ssl
+EOF
+cat $GLOBALYML
+cp /srv/salt/ceph/configuration/files/rgw-ssl.conf \
+    /srv/salt/ceph/configuration/files/ceph.conf.d/rgw.conf
diff --git a/qa/tasks/scripts/salt_api_test.sh b/qa/tasks/scripts/salt_api_test.sh

new file mode 100644 (file)

index 0000000..82014ec
--- /dev/null
+++ b/qa/tasks/scripts/salt_api_test.sh
@@ -0,0 +1,10 @@
+# salt_api_test.sh
+# Salt API test script
+set -ex
+TMPFILE=$(mktemp)
+curl --silent http://$(hostname):8000/ | tee $TMPFILE # show curl output in log
+test -s $TMPFILE
+jq . $TMPFILE >/dev/null
+echo -en "\\n" # this is just for log readability
+rm $TMPFILE
+echo "Salt API test passed"
diff --git a/qa/tasks/ses_qa.py b/qa/tasks/ses_qa.py

new file mode 100644 (file)

index 0000000..f040926
--- /dev/null
+++ b/qa/tasks/ses_qa.py
@@ -0,0 +1,183 @@
+"""
+Task (and subtasks) for SES test automation
+
+Linter:
+    flake8 --max-line-length=100
+"""
+import logging
+
+from salt_manager import SaltManager
+from scripts import Scripts
+
+from teuthology.exceptions import (
+    ConfigError,
+    )
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+ses_qa_ctx = {}
+number_of_osds_in_cluster = """sudo ceph osd tree -f json-pretty |
+                               jq '[.nodes[] | select(.type == \"osd\")] | length'"""
+
+
+class SESQA(Task):
+
+    def __init__(self, ctx, config):
+        global ses_qa_ctx
+        super(SESQA, self).__init__(ctx, config)
+        if ses_qa_ctx:
+            self.log = ses_qa_ctx['logger_obj']
+            self.log.debug("ses_qa_ctx already populated (we are in a subtask)")
+        if not ses_qa_ctx:
+            ses_qa_ctx['logger_obj'] = log
+            self.log = log
+            self.log.debug("populating ses_qa_ctx (we are *not* in a subtask)")
+            self._populate_ses_qa_context()
+        self.master_remote = ses_qa_ctx['master_remote']
+        self.nodes = self.ctx['nodes']
+        self.nodes_client_only = self.ctx['nodes_client_only']
+        self.nodes_cluster = self.ctx['nodes_cluster']
+        self.nodes_gateway = self.ctx['nodes_gateway']
+        self.nodes_storage = self.ctx['nodes_storage']
+        self.nodes_storage_only = self.ctx['nodes_storage_only']
+        self.remote_lookup_table = self.ctx['remote_lookup_table']
+        self.remotes = self.ctx['remotes']
+        self.roles = self.ctx['roles']
+        self.role_lookup_table = self.ctx['role_lookup_table']
+        self.role_types = self.ctx['role_types']
+        self.scripts = Scripts(self.ctx, self.log)
+        self.sm = ses_qa_ctx['salt_manager_instance']
+
+    def _populate_ses_qa_context(self):
+        global ses_qa_ctx
+        ses_qa_ctx['salt_manager_instance'] = SaltManager(self.ctx)
+        ses_qa_ctx['master_remote'] = ses_qa_ctx['salt_manager_instance'].master_remote
+
+    def os_type_and_version(self):
+        os_type = self.ctx.config.get('os_type', 'unknown')
+        os_version = float(self.ctx.config.get('os_version', 0))
+        return (os_type, os_version)
+
+    def setup(self):
+        super(SESQA, self).setup()
+
+    def begin(self):
+        super(SESQA, self).begin()
+
+    def end(self):
+        super(SESQA, self).end()
+        self.sm.gather_logs('/home/farm/.npm/_logs', 'dashboard-e2e-npm')
+        self.sm.gather_logs('/home/farm/.protractor-report', 'dashboard-e2e-protractor')
+
+    def teardown(self):
+        super(SESQA, self).teardown()
+
+
+class Validation(SESQA):
+
+    err_prefix = "(validation subtask) "
+
+    def __init__(self, ctx, config):
+        global ses_qa_ctx
+        ses_qa_ctx['logger_obj'] = log.getChild('validation')
+        self.name = 'ses_qa.validation'
+        super(Validation, self).__init__(ctx, config)
+        self.log.debug("munged config is {}".format(self.config))
+
+    def mgr_plugin_influx(self, **kwargs):
+        """
+        Minimal/smoke test for the MGR influx plugin
+
+        Tests the 'influx' MGR plugin, but only on openSUSE Leap 15.0.
+
+        Testing on SLE-15 is not currently possible because the influxdb
+        package is not built in IBS for anything higher than SLE-12-SP4.
+        Getting it to build for SLE-15 requires a newer golang stack than what
+        is available in SLE-15 - see
+        https://build.suse.de/project/show/NON_Public:infrastructure:icinga2
+        for how another team is building it (and no, we don't want to do that).
+
+        Testing on openSUSE Leap 15.0 is only possible because we are building
+        the influxdb package in filesystems:ceph:nautilus with modified project
+        metadata.
+
+        (This problem will hopefully go away when we switch to SLE-15-SP1.)
+        """
+        zypper_cmd = ("sudo zypper --non-interactive --no-gpg-check "
+                      "install --force --no-recommends {}")
+        os_type, os_version = self.os_type_and_version()
+        if os_type == 'opensuse' and os_version >= 15:
+            self.ctx.cluster.run(
+                args=zypper_cmd.format(' '.join(["python3-influxdb", "influxdb"]))
+                )
+            self.scripts.run(
+                self.master_remote,
+                'mgr_plugin_influx.sh',
+                )
+        else:
+            self.log.warning(
+                "mgr_plugin_influx test case not implemented for OS ->{}<-"
+                .format(os_type + " " + str(os_version))
+                )
+
+    def begin(self):
+        self.log.debug("Processing tests: ->{}<-".format(self.config.keys()))
+        for method_spec, kwargs in self.config.items():
+            kwargs = {} if not kwargs else kwargs
+            if not isinstance(kwargs, dict):
+                raise ConfigError(self.err_prefix + "Method config must be a dict")
+            self.log.info(
+                "Running test {} with config ->{}<-"
+                .format(method_spec, kwargs)
+                )
+            method = getattr(self, method_spec, None)
+            if method:
+                method(**kwargs)
+            else:
+                raise ConfigError(self.err_prefix + "No such method ->{}<-"
+                                  .format(method_spec))
+
+    def drive_replace_initiate(self, **kwargs):
+        """
+        Initiate Deepsea drive replacement
+
+        Assumes there is 1 drive not being deployed (1node5disks - with DriveGroup `limit: 4`)
+
+        In order to "hide" an existing disk from the ceph.c_v in teuthology
+        the disk is formatted and mounted.
+        """
+        total_osds = self.master_remote.sh(number_of_osds_in_cluster)
+        osd_id = 0
+        disks = self._get_drive_group_limit()
+        assert int(total_osds) == disks, "Unexpected number of osds {} (expected {})"\
+            .format(total_osds, disks)
+        self.scripts.run(
+                self.master_remote,
+                'drive_replace.sh',
+                args=[osd_id]
+                )
+
+    def drive_replace_check(self, **kwargs):
+        """
+        Deepsea drive replacement after check
+
+        Replaced osd_id should be back in the osd tree once stage.3 is ran
+        """
+        total_osds = self.master_remote.sh(number_of_osds_in_cluster)
+        disks = self._get_drive_group_limit()
+        assert int(total_osds) == disks, "Unexpected number of osds {} (expected {})"\
+            .format(total_osds, disks)
+        self.master_remote.sh("sudo ceph osd tree --format json | tee after.json")
+        self.master_remote.sh("diff before.json after.json && echo 'Drive Replaced OK'")
+
+    def _get_drive_group_limit(self, **kwargs):
+        """
+        Helper to get drive_groups limit field value
+        """
+        drive_group = next(x for x in self.ctx['config']['tasks']
+                           if 'deepsea' in x and 'drive_group' in x['deepsea'])
+        return int(drive_group['deepsea']['drive_group']['custom']['data_devices']['limit'])
+
+
+task = SESQA
+validation = Validation
diff --git a/qa/tasks/util/__init__.py b/qa/tasks/util/__init__.py

index 5b8575ed94e7235c4fe3fa939661190b244d0c01..5815553989174b6597bcc086298d28cd7dbc8caa 100644 (file)
--- a/qa/tasks/util/__init__.py
+++ b/qa/tasks/util/__init__.py
@@ -1,4 +1,27 @@
+import json
+
  from teuthology import misc
+from teuthology.contextutil import safe_while
+from teuthology.exceptions import (
+    CommandFailedError,
+    ConfigError,
+    ConnectionLostError,
+    )
+
+
+def enumerate_osds(remote, logger):
+    """
+    Given a remote, enumerates the OSDs (if any) running on the machine
+    associated with that role.
+    """
+    hostname = remote.hostname
+    logger.info("Enumerating OSDs on {}".format(hostname))
+    cmd = ("sudo ceph osd tree -f json | "
+           "jq -c '[.nodes[] | select(.name == \"{}\")][0].children'"
+           .format(hostname.split(".")[0]))
+    osds = json.loads(remote.sh(cmd))
+    return osds
+
  
  def get_remote(ctx, cluster, service_type, service_id):
      """
@@ -22,5 +45,243 @@ def get_remote(ctx, cluster, service_type, service_id):
                                                                service_id))
      return remote
  
+
  def get_remote_for_role(ctx, role):
      return get_remote(ctx, *misc.split_role(role))
+
+
+def copy_directory_recursively(from_path, to_remote, to_path=None):
+    """
+    Recursively copies a local directory to a remote.
+    """
+    if to_path is None:
+        to_path = from_path
+    misc.sh("scp -r -v {from_path} {host}:{to_path}".format(
+            from_path=from_path, host=to_remote.name, to_path=to_path))
+
+
+def introspect_roles(ctx, logger, quiet=True):
+    """
+    Creates the following keys in ctx:
+
+        nodes,
+        nodes_client_only,
+        nodes_cluster,
+        nodes_gateway,
+        nodes_storage, and
+        nodes_storage_only.
+
+    These are all simple lists of hostnames.
+
+    Also creates
+
+        ctx['remotes'],
+
+    which is a dict of teuthology "remote" objects, which look like this:
+
+        { remote1_name: remote1_obj, ..., remoten_name: remoten_obj }
+
+    Also creates
+
+        ctx['role_types']
+
+    which is just like the "roles" list, except it contains only unique
+    role types per node.
+
+    Finally, creates:
+
+        ctx['role_lookup_table']
+
+    which will look something like this:
+
+        {
+            "osd": { "osd.0": osd0remname, ..., "osd.n": osdnremname },
+            "mon": { "mon.a": monaremname, ..., "mon.n": monnremname },
+            ...
+        }
+
+    and
+
+        ctx['remote_lookup_table']
+
+    which looks like this:
+
+        {
+            remote0name: [ "osd.0", "client.0" ],
+            ...
+            remotenname: [ remotenrole0, ..., remotenrole99 ],
+        }
+
+    (In other words, remote_lookup_table is just like the roles
+    stanza, except the role lists are keyed by remote name.)
+    """
+    # initialization phase
+    cluster_roles = ['mon', 'mgr', 'osd', 'mds']
+    non_storage_cluster_roles = ['mon', 'mgr', 'mds']
+    gateway_roles = ['rgw', 'igw', 'ganesha']
+    roles = ctx.config['roles']
+    nodes = []
+    nodes_client_only = []
+    nodes_cluster = []
+    non_storage_cluster_nodes = []
+    nodes_gateway = []
+    nodes_storage = []
+    nodes_storage_only = []
+    remotes = {}
+    role_types = []
+    role_lookup_table = {}
+    remote_lookup_table = {}
+    # introspection phase
+    idx = 0
+    for node_roles_list in roles:
+        assert isinstance(node_roles_list, list), \
+            "node_roles_list is a list"
+        assert node_roles_list, "node_roles_list is not empty"
+        remote = get_remote_for_role(ctx, node_roles_list[0])
+        role_types.append([])
+        if not quiet:
+            logger.debug("Considering remote name {}, hostname {}"
+                         .format(remote.name, remote.hostname))
+        nodes += [remote.hostname]
+        remotes[remote.hostname] = remote
+        remote_lookup_table[remote.hostname] = node_roles_list
+        # inner loop: roles (something like "osd.1" or "c2.mon.a")
+        for role in node_roles_list:
+            # FIXME: support multiple clusters as used in, e.g.,
+            # rgw/multisite suite
+            role_arr = role.split('.')
+            if len(role_arr) != 2:
+                raise ConfigError("Unsupported role ->{}<-"
+                                  .format(role))
+            (role_type, _) = role_arr
+            if role_type not in role_lookup_table:
+                role_lookup_table[role_type] = {}
+            role_lookup_table[role_type][role] = remote.hostname
+            if role_type in cluster_roles:
+                nodes_cluster += [remote.hostname]
+            if role_type in gateway_roles:
+                nodes_gateway += [remote.hostname]
+            if role_type in non_storage_cluster_roles:
+                non_storage_cluster_nodes += [remote.hostname]
+            if role_type == 'osd':
+                nodes_storage += [remote.hostname]
+            if role_type not in role_types[idx]:
+                role_types[idx] += [role_type]
+        idx += 1
+    nodes_cluster = list(set(nodes_cluster))
+    nodes_gateway = list(set(nodes_gateway))
+    nodes_storage = list(set(nodes_storage))
+    nodes_storage_only = []
+    for node in nodes_storage:
+        if node not in non_storage_cluster_nodes:
+            if node not in nodes_gateway:
+                nodes_storage_only += [node]
+    nodes_client_only = list(
+        set(nodes).difference(set(nodes_cluster).union(set(nodes_gateway)))
+        )
+    if not quiet:
+        logger.debug("nodes_client_only is ->{}<-".format(nodes_client_only))
+    assign_vars = [
+        'nodes',
+        'nodes_client_only',
+        'nodes_cluster',
+        'nodes_gateway',
+        'nodes_storage',
+        'nodes_storage_only',
+        'remote_lookup_table',
+        'remotes',
+        'role_lookup_table',
+        'role_types',
+        ]
+    for var in assign_vars:
+        exec("ctx['{var}'] = {var}".format(var=var))
+    ctx['dev_env'] = True if len(nodes_cluster) < 4 else False
+    if not quiet:
+        # report phase
+        logger.info("ROLE INTROSPECTION REPORT")
+        report_vars = assign_vars + ['dev_env']
+        for var in report_vars:
+            logger.info("{} == {}".format(var, ctx[var]))
+
+
+def remote_exec(remote, cmd_str, logger, log_spec, quiet=True, rerun=False, tries=0):
+    """
+    Execute cmd_str and catch CommandFailedError and ConnectionLostError (and
+    rerun cmd_str post-reboot if rerun flag is set) until one of the conditons
+    are fulfilled:
+    1) Execution succeeded
+    2) Attempts are exceeded
+    3) CommandFailedError is raised
+    """
+    cmd_str = "sudo bash -c '{}'".format(cmd_str)
+    # if quiet:
+    #     cmd_args += [run.Raw('2>'), "/dev/null"]
+    already_rebooted_at_least_once = False
+    if tries:
+        remote.run(args="uptime")
+        logger.info("Running command ->{}<- on {}. "
+                    "This might cause the machine to reboot!"
+                    .format(cmd_str, remote.hostname))
+    with safe_while(sleep=60, tries=tries, action="wait for reconnect") as proceed:
+        while proceed():
+            try:
+                if already_rebooted_at_least_once:
+                    if not rerun:
+                        remote.run(args="echo Back from reboot ; uptime")
+                        break
+                remote.run(args=cmd_str)
+                break
+            except CommandFailedError:
+                logger.error(("{} failed. Creating /var/log/journalctl.log with "
+                              "output of \"journalctl --all\"!").format(log_spec))
+                remote.sh("sudo su -c 'journalctl --all > /var/log/journalctl.log'")
+                raise
+            except ConnectionLostError:
+                already_rebooted_at_least_once = True
+                if tries < 1:
+                    raise
+                logger.warning("No connection established yet..")
+
+
+def remote_run_script_as_root(remote, path, data, args=None):
+    """
+    Wrapper around misc.write_file to simplify the design pattern:
+    1. use misc.write_file to create bash script on the remote
+    2. use Remote.run to run that bash script via "sudo bash $SCRIPT"
+    """
+    misc.write_file(remote, path, data)
+    cmd = 'sudo bash {}'.format(path)
+    if args:
+        cmd += ' ' + ' '.join(args)
+    remote.run(label=path, args=cmd)
+
+
+def sudo_append_to_file(remote, path, data):
+    """
+    Append data to a remote file. Standard 'cat >>' - creates file
+    if it doesn't exist, but all directory components in the file
+    path must exist.
+
+    :param remote: Remote site.
+    :param path: Path on the remote being written to.
+    :param data: Python string containing data to be written.
+    """
+    remote.run(
+        args=[
+            'sudo',
+            'sh',
+            '-c',
+            'cat >> ' + path,
+        ],
+        stdin=data,
+    )
+
+
+def get_rpm_pkg_version(remote, pkg, logger):
+    """Gather RPM package version"""
+    version = None
+    try:
+        version = remote.sh('rpm --queryformat="%{{VERSION}}" -q {}'.format(pkg))
+    except CommandFailedError:
+        logger.warning("Package {} is not installed".format(pkg))
+    return version
author	Nathan Cutler <ncutler@suse.com>
	Tue, 22 Oct 2019 12:35:11 +0000 (14:35 +0200)
committer	Stefen Allen <sallen@suse.com>
	Thu, 9 Dec 2021 19:29:44 +0000 (12:29 -0700)
qa/deepsea/.qa	[new symlink]	patch \| blob
qa/deepsea/boilerplate/+	[new file with mode: 0644]	patch \| blob
qa/deepsea/boilerplate/ceph_cm_salt.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/boilerplate/disable-tuned.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/boilerplate/zypper-dup.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/cli/.qa	[new symlink]	patch \| blob
qa/deepsea/cli/off.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/cli/on.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/deepsea-services.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/deepsea.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/disks/0disks.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/disks/1disk.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/disks/2disks.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/disks/3disks.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/disks/4disks.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/disks/5disks.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/distros/.qa	[new symlink]	patch \| blob
qa/deepsea/distros/opensuse_15.1.yaml	[new symlink]	patch \| blob
qa/deepsea/distros/sle_15.1.yaml	[new symlink]	patch \| blob
qa/deepsea/health-ok/common/common.sh	[new file with mode: 0644]	patch \| blob
qa/deepsea/health-ok/common/deploy.sh	[new file with mode: 0644]	patch \| blob
qa/deepsea/health-ok/common/helper.sh	[new file with mode: 0644]	patch \| blob
qa/deepsea/health-ok/common/json.sh	[new file with mode: 0644]	patch \| blob
qa/deepsea/health-ok/common/nfs-ganesha.sh	[new file with mode: 0644]	patch \| blob
qa/deepsea/health-ok/common/policy.sh	[new file with mode: 0644]	patch \| blob
qa/deepsea/health-ok/common/pool.sh	[new file with mode: 0644]	patch \| blob
qa/deepsea/health-ok/common/rbd.sh	[new file with mode: 0644]	patch \| blob
qa/deepsea/health-ok/common/rgw.sh	[new file with mode: 0644]	patch \| blob
qa/deepsea/health-ok/common/zypper.sh	[new file with mode: 0644]	patch \| blob
qa/deepsea/health-ok/health-ok.sh	[new file with mode: 0755]	patch \| blob
qa/deepsea/health-ok/stage-5.sh	[new file with mode: 0755]	patch \| blob
qa/deepsea/nodes/1node.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/nodes/20nodes.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/nodes/2nodes.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/nodes/3nodes.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/nodes/4nodes.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/nodes/5nodes.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/salt.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_db.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_db_crypt.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_db_sizes.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_db_sizes_crypt.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_db_sizes_mixed_crypt.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_wal.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_wal_crypt.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_wal_db.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_wal_db_crypt.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_all_crypt.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_wal_db_sizes_mixed_crypt.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_wal_sizes.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_crypt.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/bs_dedicated_wal_sizes_mixed_crypt.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/fs_dedicated_journal.yaml	[new file with mode: 0644]	patch \| blob
qa/deepsea/storage-profiles/fs_dedicated_journal_crypt.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/deepsea/.qa	[new symlink]	patch \| blob
qa/suites/deepsea/tier0/.qa	[new symlink]	patch \| blob
qa/suites/deepsea/tier0/salt/%	[new file with mode: 0644]	patch \| blob
qa/suites/deepsea/tier0/salt/.qa	[new symlink]	patch \| blob
qa/suites/deepsea/tier0/salt/0-salt.yaml	[new symlink]	patch \| blob
qa/suites/deepsea/tier0/salt/boilerplate	[new symlink]	patch \| blob
qa/suites/deepsea/tier0/salt/cluster/+	[new file with mode: 0644]	patch \| blob
qa/suites/deepsea/tier0/salt/cluster/.qa	[new symlink]	patch \| blob
qa/suites/deepsea/tier0/salt/cluster/1disk.yaml	[new symlink]	patch \| blob
qa/suites/deepsea/tier0/salt/cluster/1node.yaml	[new symlink]	patch \| blob
qa/suites/deepsea/tier0/salt/distros	[new symlink]	patch \| blob
qa/suites/deepsea/tier1/.qa	[new symlink]	patch \| blob
qa/suites/deepsea/tier1/health-ok/%	[new file with mode: 0644]	patch \| blob
qa/suites/deepsea/tier1/health-ok/.qa	[new symlink]	patch \| blob
qa/suites/deepsea/tier1/health-ok/0-salt.yaml	[new symlink]	patch \| blob
qa/suites/deepsea/tier1/health-ok/1-deploy-phase.yaml	[new symlink]	patch \| blob
qa/suites/deepsea/tier1/health-ok/2-test-phase.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/deepsea/tier1/health-ok/boilerplate	[new symlink]	patch \| blob
qa/suites/deepsea/tier1/health-ok/cluster/+	[new file with mode: 0644]	patch \| blob
qa/suites/deepsea/tier1/health-ok/cluster/.qa	[new symlink]	patch \| blob
qa/suites/deepsea/tier1/health-ok/cluster/4disks.yaml	[new symlink]	patch \| blob
qa/suites/deepsea/tier1/health-ok/cluster/roles.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/deepsea/tier1/health-ok/deepsea_cli_off.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/deepsea/tier1/health-ok/distros	[new symlink]	patch \| blob
qa/suites/suse/.qa	[new symlink]	patch \| blob
qa/suites/suse/tier0	[new symlink]	patch \| blob
qa/suites/suse/tier1	[new symlink]	patch \| blob
qa/tasks/deepsea.py	[new file with mode: 0644]	patch \| blob
qa/tasks/salt.py	[new file with mode: 0644]	patch \| blob
qa/tasks/salt_manager.py	[new file with mode: 0644]	patch \| blob
qa/tasks/scripts.py	[new file with mode: 0644]	patch \| blob
qa/tasks/scripts/ceph_cluster_status.sh	[new file with mode: 0644]	patch \| blob
qa/tasks/scripts/ceph_version_sanity.sh	[new file with mode: 0644]	patch \| blob
qa/tasks/scripts/create_all_pools_at_once.sh	[new file with mode: 0644]	patch \| blob
qa/tasks/scripts/lvm_status.sh	[new file with mode: 0644]	patch \| blob
qa/tasks/scripts/rados_write_test.sh	[new file with mode: 0644]	patch \| blob
qa/tasks/scripts/rgw_init.sh	[new file with mode: 0644]	patch \| blob
qa/tasks/scripts/rgw_init_ssl.sh	[new file with mode: 0644]	patch \| blob
qa/tasks/scripts/salt_api_test.sh	[new file with mode: 0644]	patch \| blob
qa/tasks/ses_qa.py	[new file with mode: 0644]	patch \| blob
qa/tasks/util/__init__.py		patch \| blob \| history