--- /dev/null
+..
\ No newline at end of file
--- /dev/null
+ceph_cm: salt
+ceph_cm_ansible: false
--- /dev/null
+overrides:
+ deepsea:
+ alternative_defaults:
+ tuned_mgr_init: default-off
+ tuned_mon_init: default-off
+ tuned_osd_init: default-off
--- /dev/null
+overrides:
+ deepsea:
+ alternative_defaults:
+ upgrade_init: zypper-dup
--- /dev/null
+../.qa
\ No newline at end of file
--- /dev/null
+overrides:
+ deepsea:
+ cli: false
--- /dev/null
+overrides:
+ deepsea:
+ cli: true
--- /dev/null
+tasks:
+ - deepsea.create_pools:
+ - deepsea.orch:
+ stage: 4
--- /dev/null
+tasks:
+ - deepsea:
+ allow_python2: false
+ drive_group: default
+ - deepsea.orch:
+ stage: prep
+ - deepsea.orch:
+ stage: 1
+ - deepsea.policy:
+ - deepsea.orch:
+ stage: 2
+ - deepsea.ceph_conf:
+ - deepsea.orch:
+ stage: 3
--- /dev/null
+openstack:
+- volumes: # attached to each instance
+ count: 0
+ size: 10 # GB
--- /dev/null
+openstack:
+- volumes: # attached to each instance
+ count: 1
+ size: 10 # GB
--- /dev/null
+openstack:
+- volumes: # attached to each instance
+ count: 2
+ size: 10 # GB
--- /dev/null
+openstack:
+- volumes: # attached to each instance
+ count: 3
+ size: 10 # GB
--- /dev/null
+openstack:
+- volumes: # attached to each instance
+ count: 4
+ size: 10 # GB
--- /dev/null
+openstack:
+- volumes: # attached to each instance
+ count: 5
+ size: 10 # GB
--- /dev/null
+../.qa
\ No newline at end of file
--- /dev/null
+.qa/distros/all/opensuse_15.1.yaml
\ No newline at end of file
--- /dev/null
+.qa/distros/all/sle_15.1.yaml
\ No newline at end of file
--- /dev/null
+#
+# This file is part of the DeepSea integration test suite
+#
+
+# BASEDIR is set by the calling script
+source $BASEDIR/common/deploy.sh
+source $BASEDIR/common/helper.sh
+source $BASEDIR/common/json.sh
+source $BASEDIR/common/nfs-ganesha.sh
+source $BASEDIR/common/policy.sh
+source $BASEDIR/common/pool.sh
+source $BASEDIR/common/rbd.sh
+source $BASEDIR/common/rgw.sh
+source $BASEDIR/common/zypper.sh
+
+
+#
+# functions that process command-line arguments
+#
+
+function assert_enhanced_getopt {
+ set +e
+ echo -n "Running 'getopt --test'... "
+ getopt --test > /dev/null
+ if [ $? -ne 4 ]; then
+ echo "FAIL"
+ echo "This script requires enhanced getopt. Bailing out."
+ exit 1
+ fi
+ echo "PASS"
+ set -e
+}
+
+
+#
+# functions that run the DeepSea stages
+#
+
+function _disable_update_in_stage_0 {
+ cp /srv/salt/ceph/stage/prep/master/default.sls /srv/salt/ceph/stage/prep/master/default-orig.sls
+ cp /srv/salt/ceph/stage/prep/master/default-no-update-no-reboot.sls /srv/salt/ceph/stage/prep/master/default.sls
+ cp /srv/salt/ceph/stage/prep/minion/default.sls /srv/salt/ceph/stage/prep/minion/default-orig.sls
+ cp /srv/salt/ceph/stage/prep/minion/default-no-update-no-reboot.sls /srv/salt/ceph/stage/prep/minion/default.sls
+}
+
+function run_stage_0 {
+ test "$NO_UPDATE" && _disable_update_in_stage_0
+ _run_stage 0 "$@"
+ if _root_fs_is_btrfs ; then
+ echo "Root filesystem is btrfs: creating subvolumes for /var/lib/ceph"
+ salt-run state.orch ceph.migrate.subvolume
+ else
+ echo "Root filesystem is *not* btrfs: skipping subvolume creation"
+ fi
+ test "$STAGE_SUCCEEDED"
+}
+
+function run_stage_1 {
+ _run_stage 1 "$@"
+ test "$STAGE_SUCCEEDED"
+}
+
+function run_stage_2 {
+ # This was needed with SCC repos
+ #salt '*' cmd.run "for delay in 60 60 60 60 ; do sudo zypper --non-interactive --gpg-auto-import-keys refresh && break ; sleep $delay ; done"
+ _run_stage 2 "$@"
+ salt_pillar_items 2>/dev/null
+ test "$STAGE_SUCCEEDED"
+}
+
+function _disable_tuned {
+ local prefix=/srv/salt/ceph/tuned
+ mv $prefix/mgr/default.sls $prefix/mgr/default.sls-MOVED
+ mv $prefix/mon/default.sls $prefix/mon/default.sls-MOVED
+ mv $prefix/osd/default.sls $prefix/osd/default.sls-MOVED
+ mv $prefix/mgr/default-off.sls $prefix/mgr/default.sls
+ mv $prefix/mon/default-off.sls $prefix/mon/default.sls
+ mv $prefix/osd/default-off.sls $prefix/osd/default.sls
+}
+
+function run_stage_3 {
+ cat_global_conf
+ lsblk_on_storage_node
+ if [ "$TUNED" ] ; then
+ echo "WWWW: tuned will be deployed as usual"
+ else
+ echo "WWWW: tuned will NOT be deployed"
+ _disable_tuned
+ fi
+ _run_stage 3 "$@"
+ lsblk_on_storage_node
+ ceph osd tree
+ cat_ceph_conf
+ admin_auth_status
+ test "$STAGE_SUCCEEDED"
+}
+
+function run_stage_4 {
+ _run_stage 4 "$@"
+ test "$STAGE_SUCCEEDED"
+}
+
+function run_stage_5 {
+ _run_stage 5 "$@"
+ test "$STAGE_SUCCEEDED"
+}
+
+
+#
+# functions that generate /etc/ceph/ceph.conf
+# see https://github.com/SUSE/DeepSea/tree/master/srv/salt/ceph/configuration/files/ceph.conf.d
+#
+
+function change_rgw_conf {
+ cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/rgw.conf
+foo = bar
+EOF
+}
+
+function change_osd_conf {
+ cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/osd.conf
+foo = bar
+EOF
+}
+
+function change_mon_conf {
+ cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/mon.conf
+foo = bar
+EOF
+}
+
+function ceph_conf_small_cluster {
+ local STORAGENODES=$(json_storage_nodes)
+ test -n "$STORAGENODES"
+ if [ "$STORAGENODES" -eq 1 ] ; then
+ echo "Adjusting ceph.conf for operation with 1 storage node"
+ cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/global.conf
+mon pg warn min per osd = 16
+osd pool default size = 2
+osd crush chooseleaf type = 0 # failure domain == osd
+EOF
+ elif [ "$STORAGENODES" -eq 2 -o "$STORAGENODES" -eq 3 ] ; then
+ echo "Adjusting ceph.conf for operation with 2 or 3 storage nodes"
+ cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/global.conf
+mon pg warn min per osd = 8
+osd pool default size = 2
+EOF
+ else
+ echo "Four or more storage nodes; not adjusting ceph.conf"
+ fi
+}
+
+function ceph_conf_mon_allow_pool_delete {
+ echo "Adjusting ceph.conf to allow pool deletes"
+ cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/global.conf
+mon allow pool delete = true
+EOF
+}
+
+function ceph_conf_dashboard {
+ echo "Adjusting ceph.conf for deployment of dashboard MGR module"
+ cat <<'EOF' >> /srv/salt/ceph/configuration/files/ceph.conf.d/mon.conf
+mgr initial modules = dashboard
+EOF
+}
+
+
+#
+# functions that print status information
+#
+
+function cat_deepsea_log {
+ cat /var/log/deepsea.log
+}
+
+function cat_salt_config {
+ cat /etc/salt/master
+ cat /etc/salt/minion
+}
+
+function cat_policy_cfg {
+ cat /srv/pillar/ceph/proposals/policy.cfg
+}
+
+function salt_pillar_items {
+ salt '*' pillar.items
+}
+
+function salt_pillar_get_roles {
+ salt '*' pillar.get roles
+}
+
+function salt_cmd_run_lsblk {
+ salt '*' cmd.run lsblk
+}
+
+function cat_global_conf {
+ cat /srv/salt/ceph/configuration/files/ceph.conf.d/global.conf || true
+}
+
+function cat_ceph_conf {
+ salt '*' cmd.run "cat /etc/ceph/ceph.conf" 2>/dev/null
+}
+
+function admin_auth_status {
+ ceph auth get client.admin
+ ls -l /etc/ceph/ceph.client.admin.keyring
+ cat /etc/ceph/ceph.client.admin.keyring
+}
+
+function number_of_hosts_in_ceph_osd_tree {
+ ceph osd tree -f json-pretty | jq '[.nodes[] | select(.type == "host")] | length'
+}
+
+function number_of_osds_in_ceph_osd_tree {
+ ceph osd tree -f json-pretty | jq '[.nodes[] | select(.type == "osd")] | length'
+}
+
+function ceph_cluster_status {
+ ceph pg stat -f json-pretty
+ _grace_period 1
+ ceph health detail -f json-pretty
+ _grace_period 1
+ ceph osd tree
+ _grace_period 1
+ ceph osd pool ls detail -f json-pretty
+ _grace_period 1
+ ceph -s
+}
+
+function ceph_log_grep_enoent_eaccess {
+ set +e
+ grep -rH "Permission denied" /var/log/ceph
+ grep -rH "No such file or directory" /var/log/ceph
+ set -e
+}
+
+
+#
+# core validation tests
+#
+
+function ceph_version_test {
+# test that ceph RPM version matches "ceph --version"
+# for a loose definition of "matches"
+ rpm -q ceph
+ local RPM_NAME=$(rpm -q ceph)
+ local RPM_CEPH_VERSION=$(perl -e '"'"$RPM_NAME"'" =~ m/ceph-(\d+\.\d+\.\d+)/; print "$1\n";')
+ echo "According to RPM, the ceph upstream version is ->$RPM_CEPH_VERSION<-"
+ test -n "$RPM_CEPH_VERSION"
+ ceph --version
+ local BUFFER=$(ceph --version)
+ local CEPH_CEPH_VERSION=$(perl -e '"'"$BUFFER"'" =~ m/ceph version (\d+\.\d+\.\d+)/; print "$1\n";')
+ echo "According to \"ceph --version\", the ceph upstream version is ->$CEPH_CEPH_VERSION<-"
+ test -n "$RPM_CEPH_VERSION"
+ test "$RPM_CEPH_VERSION" = "$CEPH_CEPH_VERSION"
+}
+
+function ceph_health_test {
+ local LOGFILE=/tmp/ceph_health_test.log
+ echo "Waiting up to 15 minutes for HEALTH_OK..."
+ salt -C 'I@roles:master' wait.until status=HEALTH_OK timeout=900 check=1 2>/dev/null | tee $LOGFILE
+ # last line: determines return value of function
+ ! grep -q 'Timeout expired' $LOGFILE
+}
+
+function rados_write_test {
+ #
+ # NOTE: function assumes the pool "write_test" already exists. Pool can be
+ # created by calling e.g. "create_all_pools_at_once write_test" immediately
+ # before calling this function.
+ #
+ ceph osd pool application enable write_test deepsea_qa
+ echo "dummy_content" > verify.txt
+ rados -p write_test put test_object verify.txt
+ rados -p write_test get test_object verify_returned.txt
+ test "x$(cat verify.txt)" = "x$(cat verify_returned.txt)"
+}
+
+function lsblk_on_storage_node {
+ local TESTSCRIPT=/tmp/lsblk_test.sh
+ local STORAGENODE=$(_first_x_node storage)
+ cat << 'EOF' > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "running lsblk as $(whoami) on $(hostname --fqdn)"
+lsblk
+echo "Result: OK"
+EOF
+ _run_test_script_on_node $TESTSCRIPT $STORAGENODE
+}
+
+function cephfs_mount_and_sanity_test {
+ #
+ # run cephfs mount test script on the client node
+ # mounts cephfs in /mnt, touches a file, asserts that it exists
+ #
+ local TESTSCRIPT=/tmp/cephfs_test.sh
+ local CLIENTNODE=$(_client_node)
+ cat << 'EOF' > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "cephfs mount test script running as $(whoami) on $(hostname --fqdn)"
+TESTMONS=$(ceph-conf --lookup 'mon_initial_members' | tr -d '[:space:]')
+TESTSECR=$(grep 'key =' /etc/ceph/ceph.client.admin.keyring | awk '{print $NF}')
+echo "MONs: $TESTMONS"
+echo "admin secret: $TESTSECR"
+test -d /mnt
+mount -t ceph ${TESTMONS}:/ /mnt -o name=admin,secret="$TESTSECR"
+touch /mnt/bubba
+test -f /mnt/bubba
+umount /mnt
+echo "Result: OK"
+EOF
+ # FIXME: assert no MDS running on $CLIENTNODE
+ _run_test_script_on_node $TESTSCRIPT $CLIENTNODE
+}
+
+function iscsi_kludge {
+ #
+ # apply kludge to work around bsc#1049669
+ #
+ local TESTSCRIPT=/tmp/iscsi_kludge.sh
+ local IGWNODE=$(_first_x_node igw)
+ cat << 'EOF' > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "igw kludge script running as $(whoami) on $(hostname --fqdn)"
+sed -i -e 's/\("host": "target[[:digit:]]\+\)"/\1.teuthology"/' /tmp/lrbd.conf
+cat /tmp/lrbd.conf
+source /etc/sysconfig/lrbd; lrbd -v $LRBD_OPTIONS -f /tmp/lrbd.conf
+systemctl restart lrbd.service
+systemctl --no-pager --full status lrbd.service
+echo "Result: OK"
+EOF
+ _run_test_script_on_node $TESTSCRIPT $IGWNODE
+}
+
+function igw_info {
+ #
+ # peek at igw information on the igw node
+ #
+ local TESTSCRIPT=/tmp/igw_info.sh
+ local IGWNODE=$(_first_x_node igw)
+ cat << 'EOF' > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "igw info script running as $(whoami) on $(hostname --fqdn)"
+rpm -q lrbd || true
+lrbd --output || true
+ls -lR /sys/kernel/config/target/ || true
+ss --tcp --numeric state listening
+echo "See 3260 there?"
+echo "Result: OK"
+EOF
+ _run_test_script_on_node $TESTSCRIPT $IGWNODE
+}
+
+function iscsi_mount_and_sanity_test {
+ #
+ # run iscsi mount test script on the client node
+ # mounts iscsi in /mnt, touches a file, asserts that it exists
+ #
+ local TESTSCRIPT=/tmp/iscsi_test.sh
+ local CLIENTNODE=$(_client_node)
+ local IGWNODE=$(_first_x_node igw)
+ cat << EOF > $TESTSCRIPT
+set -e
+trap 'echo "Result: NOT_OK"' ERR
+for delay in 60 60 60 60 ; do
+ sudo zypper --non-interactive --gpg-auto-import-keys refresh && break
+ sleep $delay
+done
+set -x
+zypper --non-interactive install --no-recommends open-iscsi multipath-tools
+systemctl start iscsid.service
+sleep 5
+systemctl --no-pager --full status iscsid.service
+iscsiadm -m discovery -t st -p $IGWNODE
+iscsiadm -m node -L all
+systemctl start multipathd.service
+sleep 5
+systemctl --no-pager --full status multipathd.service
+ls -lR /dev/mapper
+ls -l /dev/disk/by-path
+ls -l /dev/disk/by-*id
+multipath -ll
+mkfs -t xfs /dev/dm-0
+test -d /mnt
+mount /dev/dm-0 /mnt
+df -h /mnt
+touch /mnt/bubba
+test -f /mnt/bubba
+umount /mnt
+echo "Result: OK"
+EOF
+ # FIXME: assert script not running on the iSCSI gateway node
+ _run_test_script_on_node $TESTSCRIPT $CLIENTNODE
+}
+
+function test_systemd_ceph_osd_target_wants {
+ #
+ # see bsc#1051598 in which ceph-disk was omitting --runtime when it enabled
+ # ceph-osd@$ID.service units
+ #
+ local TESTSCRIPT=/tmp/test_systemd_ceph_osd_target_wants.sh
+ local STORAGENODE=$(_first_x_node storage)
+ cat << 'EOF' > $TESTSCRIPT
+set -x
+CEPH_OSD_WANTS="/systemd/system/ceph-osd.target.wants"
+ETC_CEPH_OSD_WANTS="/etc$CEPH_OSD_WANTS"
+RUN_CEPH_OSD_WANTS="/run$CEPH_OSD_WANTS"
+ls -l $ETC_CEPH_OSD_WANTS
+ls -l $RUN_CEPH_OSD_WANTS
+set -e
+trap 'echo "Result: NOT_OK"' ERR
+echo "Asserting that there is no directory $ETC_CEPH_OSD_WANTS"
+test -d "$ETC_CEPH_OSD_WANTS" && false
+echo "Asserting that $RUN_CEPH_OSD_WANTS exists, is a directory, and is not empty"
+test -d "$RUN_CEPH_OSD_WANTS"
+test -n "$(ls --almost-all $RUN_CEPH_OSD_WANTS)"
+echo "Result: OK"
+EOF
+ _run_test_script_on_node $TESTSCRIPT $STORAGENODE
+}
+
+function configure_all_OSDs_to_filestore {
+ salt-run proposal.populate format=filestore name=filestore 2>/dev/null
+ chown salt:salt /srv/pillar/ceph/proposals/policy.cfg
+ sed -i 's/profile-default/profile-filestore/g' /srv/pillar/ceph/proposals/policy.cfg
+}
+
+function verify_OSD_type {
+ # checking with 'ceph osd metadata' command
+ # 1st input argument: type 'filestore' or 'bluestore'
+ # 2nd input argument: OSD ID
+ osd_type=$(ceph osd metadata $2 -f json-pretty | jq '.osd_objectstore')
+ if [[ $osd_type != \"$1\" ]]
+ then
+ echo "Error: Object store type is not $1 for OSD.ID : $2"
+ exit 1
+ else
+ echo OSD.${2} $osd_type
+ fi
+}
+
+function check_OSD_type {
+ # expecting as argument 'filestore' or 'bluestore'
+ for i in $(ceph osd ls);do verify_OSD_type $1 $i;done
+}
+
+function migrate_to_bluestore {
+ salt-run state.orch ceph.migrate.policy 2>/dev/null
+ sed -i 's/profile-filestore/migrated-profile-filestore/g' /srv/pillar/ceph/proposals/policy.cfg
+ salt-run disengage.safety 2>/dev/null
+ salt-run state.orch ceph.migrate.osds 2>/dev/null
+}
--- /dev/null
+# This file is part of the DeepSea integration test suite
+
+#
+# separate file to house the deploy_ceph function
+#
+
+DEPLOY_PHASE_COMPLETE_MESSAGE="deploy phase complete!"
+
+
+function _os_specific_install_deps {
+ echo "Installing dependencies on the Salt Master node"
+ local DEPENDENCIES="jq
+ "
+ _zypper_ref_on_master
+ for d in $DEPENDENCIES ; do
+ _zypper_install_on_master $d
+ done
+}
+
+function _determine_master_minion {
+ type hostname
+ MASTER_MINION=$(hostname --fqdn)
+ salt $MASTER_MINION test.ping
+}
+
+function _os_specific_repos_and_packages_info {
+ _dump_salt_master_zypper_repos
+ type rpm
+ rpm -q salt-master
+ rpm -q salt-minion
+ rpm -q salt-api
+ rpm -q deepsea || true
+}
+
+function _set_deepsea_minions {
+ #
+ # set deepsea_minions to * - see https://github.com/SUSE/DeepSea/pull/526
+ # (otherwise we would have to set deepsea grain on all minions)
+ echo "deepsea_minions: '*'" > /srv/pillar/ceph/deepsea_minions.sls
+ cat /srv/pillar/ceph/deepsea_minions.sls
+}
+
+function _initialize_minion_array {
+ local m=
+ local i=0
+ if type salt-key > /dev/null 2>&1; then
+ MINION_LIST=$(salt-key -L -l acc | grep -v '^Accepted Keys')
+ for m in $MINION_LIST ; do
+ MINION_ARRAY[0]=$m
+ i=$((i+1))
+ done
+ else
+ echo "Cannot find salt-key. Is Salt installed? Is this running on the Salt Master?"
+ exit 1
+ fi
+ echo $i
+}
+
+function _initialize_storage_profile {
+ test "$STORAGE_PROFILE"
+ case "$STORAGE_PROFILE" in
+ default) echo "Storage profile: bluestore OSDs (default)" ;;
+ dmcrypt) echo "Storage profile: encrypted bluestore OSDs" ;;
+ filestore) echo "Storage profile: filestore OSDs" ;;
+ random) echo "Storage profile will be chosen randomly" ;;
+ *)
+ CUSTOM_STORAGE_PROFILE="$STORAGE_PROFILE"
+ STORAGE_PROFILE="custom"
+ echo "Storage profile: custom ($CUSTOM_STORAGE_PROFILE)"
+ ;;
+ esac
+}
+
+function _initialize_and_vet_nodes {
+ if [ -n "$MIN_NODES" ] ; then
+ echo "MIN_NODES is set to $MIN_NODES"
+ PROPOSED_MIN_NODES="$MIN_NODES"
+ else
+ echo "MIN_NODES was not set. Default is 1"
+ PROPOSED_MIN_NODES=1
+ fi
+ if [ -n "$CLIENT_NODES" ] ; then
+ echo "CLIENT_NODES is set to $CLIENT_NODES"
+ else
+ echo "CLIENT_NODES was not set. Default is 0"
+ CLIENT_NODES=0
+ fi
+ MIN_NODES=$(($CLIENT_NODES + 1))
+ if [ "$PROPOSED_MIN_NODES" -lt "$MIN_NODES" ] ; then
+ echo "Proposed MIN_NODES value is too low. Need at least 1 + CLIENT_NODES"
+ exit 1
+ fi
+ test "$PROPOSED_MIN_NODES" -gt "$MIN_NODES" && MIN_NODES="$PROPOSED_MIN_NODES"
+ echo "Final MIN_NODES is $MIN_NODES"
+ echo "TOTAL_NODES is $TOTAL_NODES"
+ test "$TOTAL_NODES"
+ test "$TOTAL_NODES" -ge "$MIN_NODES"
+ STORAGE_NODES=$((TOTAL_NODES - CLIENT_NODES))
+ echo "WWWW"
+ echo "This script will use DeepSea with a cluster of $TOTAL_NODES nodes total (including Salt Master)."
+ echo "Of these, $CLIENT_NODES are assumed to be clients (nodes without any DeepSea roles except \"admin\")."
+}
+
+function _zypper_ps {
+ salt '*' cmd.run 'zypper ps -s' 2>/dev/null || true
+}
+
+function _python_versions {
+ type python2 > /dev/null 2>&1 && python2 --version || echo "Python 2 not installed"
+ type python3 > /dev/null 2>&1 && python3 --version || echo "Python 3 not installed"
+}
+
+function initialization_sequence {
+ set +x
+ _determine_master_minion
+ _os_specific_install_deps
+ _os_specific_repos_and_packages_info
+ set +e
+ _python_versions
+ type deepsea > /dev/null 2>&1 && deepsea --version || echo "deepsea CLI not installed"
+ TOTAL_MINIONS=$(_initialize_minion_array)
+ echo "There are $TOTAL_MINIONS minions in this Salt cluster"
+ set -e
+ _set_deepsea_minions
+ salt '*' saltutil.sync_all 2>/dev/null
+ TOTAL_NODES=$(json_total_nodes)
+ test "$TOTAL_NODES" = "$TOTAL_MINIONS"
+ _ping_minions_until_all_respond
+ cat_salt_config
+ _initialize_storage_profile
+ _initialize_and_vet_nodes
+ set -x
+ test $STORAGE_NODES -lt 4 && export DEV_ENV="true"
+}
+
+function pared_down_init_sequence {
+ test "$ALREADY_INITIALIZED" && return 0
+ set +x
+ TOTAL_MINIONS=$(_initialize_minion_array)
+ TOTAL_NODES=$(json_total_nodes)
+ test "$TOTAL_NODES" = "$TOTAL_MINIONS"
+ _ping_minions_until_all_respond
+ _initialize_and_vet_nodes
+ set -x
+ test "$STORAGE_NODES" -lt "4" -a -z "$DEV_ENV" && export DEV_ENV="true" || true
+}
+
+function salt_api_test {
+ local tmpfile=$(mktemp)
+ echo "Salt API test: BEGIN"
+ systemctl --no-pager --full status salt-api.service
+ curl http://$(hostname):8000/ | tee $tmpfile # show curl output in log
+ test -s $tmpfile
+ jq . $tmpfile >/dev/null
+ echo -en "\n" # this is just for log readability
+ rm $tmpfile
+ echo "Salt API test: END"
+}
+
+function deploy_ceph {
+ if [ "$START_STAGE" -lt "0" -o "$START_STAGE" -gt "4" ] ; then
+ echo "Received bad --start-stage value ->$START_STAGE<- (must be 0-4 inclusive)"
+ exit 1
+ fi
+ if _ceph_cluster_running ; then
+ echo "Running ceph cluster detected: skipping deploy phase"
+ pared_down_init_sequence
+ return 0
+ fi
+ if [ "$START_STAGE" = "0" ] ; then
+ if [ -z "$TEUTHOLOGY" ] ; then
+ initialization_sequence
+ fi
+ run_stage_0 "$CLI"
+ _zypper_ps
+ salt_api_test
+ fi
+ if [ "$START_STAGE" -le "1" ] ; then
+ test -n "$RGW" -a -n "$SSL" && rgw_ssl_init
+ run_stage_1 "$CLI"
+ policy_cfg_base
+ policy_cfg_mon_flex
+ test -n "$MDS" && policy_cfg_mds
+ policy_cfg_openattic_rgw_igw_ganesha
+ test "$RGW" && rgw_demo_users
+ case "$STORAGE_PROFILE" in
+ dmcrypt) proposal_populate_dmcrypt ;;
+ filestore) proposal_populate_filestore ;;
+ random) random_or_custom_storage_profile ;;
+ custom) random_or_custom_storage_profile ;;
+ default) ;;
+ *) echo "Bad storage profile ->$STORAGE_PROFILE<-. Bailing out!" ; exit 1 ;;
+ esac
+ policy_cfg_storage
+ cat_policy_cfg
+ fi
+ if [ "$START_STAGE" -le "2" ] ; then
+ run_stage_2 "$CLI"
+ ceph_conf_small_cluster
+ ceph_conf_mon_allow_pool_delete
+ ceph_conf_dashboard
+ test "$RBD" && ceph_conf_upstream_rbd_default_features
+ fi
+ if [ "$START_STAGE" -le "3" ] ; then
+ run_stage_3 "$CLI"
+ pre_create_pools
+ ceph_cluster_status
+ test "$RBD" && ceph_test_librbd_can_be_run
+ if [ -z "$MDS" -a -z "$NFS_GANESHA" -a -z "$RGW" ] ; then
+ echo "WWWW"
+ echo "Stage 3 OK, no roles requiring Stage 4: $DEPLOY_PHASE_COMPLETE_MESSAGE"
+ return 0
+ fi
+ test -n "$NFS_GANESHA" && nfs_ganesha_no_root_squash
+ fi
+ if [ "$START_STAGE" -le "4" ] ; then
+ run_stage_4 "$CLI"
+ if [ -n "$NFS_GANESHA" ] ; then
+ nfs_ganesha_cat_config_file
+ nfs_ganesha_debug_log
+ echo "WWWW"
+ echo "NFS-Ganesha set to debug logging"
+ fi
+ ceph_cluster_status
+ _zypper_ps
+ echo "Stage 4 OK: $DEPLOY_PHASE_COMPLETE_MESSAGE"
+ fi
+ return 0
+}
--- /dev/null
+# This file is part of the DeepSea integration test suite
+
+#
+# helper functions (not to be called directly from test scripts)
+#
+
+STAGE_TIMEOUT_DURATION="60m"
+
+function _report_stage_failure {
+ STAGE_SUCCEEDED=""
+ local stage_num=$1
+ local stage_status=$2
+
+ echo "********** Stage $stage_num failed **********"
+ test "$stage_status" = "124" && echo "Stage $stage_num timed out after $STAGE_TIMEOUT_DURATION"
+ set -ex
+ journalctl -r | head -n 2000
+ echo "WWWW"
+ echo "Finished dumping up to 2000 lines of journalctl"
+}
+
+function _run_stage {
+ local stage_num=$1
+
+ set +x
+ echo ""
+ echo "*********************************************"
+ echo "********** Running DeepSea Stage $stage_num **********"
+ echo "*********************************************"
+
+ STAGE_SUCCEEDED="non-empty string"
+ test -n "$CLI" && _run_stage_cli $stage_num || _run_stage_non_cli $stage_num
+}
+
+function _run_stage_cli {
+ local stage_num=$1
+ local deepsea_cli_output_path="/tmp/deepsea.${stage_num}.log"
+
+ set +e
+ set -x
+ timeout $STAGE_TIMEOUT_DURATION \
+ deepsea \
+ --log-file=/var/log/salt/deepsea.log \
+ --log-level=debug \
+ stage \
+ run \
+ ceph.stage.${stage_num} \
+ --simple-output \
+ 2>&1 | tee $deepsea_cli_output_path
+ local stage_status="${PIPESTATUS[0]}"
+ set +x
+ echo "deepsea exit status: $stage_status"
+ echo "WWWW"
+ if [ "$stage_status" != "0" ] ; then
+ _report_stage_failure $stage_num $stage_status
+ return 0
+ fi
+ if grep -q -F "failed=0" $deepsea_cli_output_path ; then
+ echo "********** Stage $stage_num completed successfully **********"
+ else
+ echo "ERROR: deepsea stage returned exit status 0, yet one or more steps failed. Bailing out!"
+ _report_stage_failure $stage_num $stage_status
+ fi
+ set -ex
+}
+
+function _run_stage_non_cli {
+ local stage_num=$1
+ local stage_log_path="/tmp/stage.${stage_num}.log"
+
+ set +e
+ set -x
+ timeout $STAGE_TIMEOUT_DURATION \
+ salt-run \
+ --no-color \
+ state.orch \
+ ceph.stage.${stage_num} \
+ 2>/dev/null | tee $stage_log_path
+ local stage_status="${PIPESTATUS[0]}"
+ set +x
+ echo "WWWW"
+ if [ "$stage_status" != "0" ] ; then
+ _report_stage_failure $stage_num $stage_status
+ return 0
+ fi
+ STAGE_FINISHED=$(grep -F 'Total states run' $stage_log_path)
+ if [ "$STAGE_FINISHED" ]; then
+ FAILED=$(grep -F 'Failed: ' $stage_log_path | sed 's/.*Failed:\s*//g' | head -1)
+ if [ "$FAILED" -gt "0" ]; then
+ echo "ERROR: salt-run returned exit status 0, yet one or more steps failed. Bailing out!"
+ _report_stage_failure $stage_num $stage_status
+ else
+ echo "********** Stage $stage_num completed successfully **********"
+ fi
+ else
+ echo "ERROR: salt-run returned exit status 0, yet Stage did not complete. Bailing out!"
+ _report_stage_failure $stage_num $stage_status
+ fi
+ set -ex
+}
+
+function _client_node {
+ salt --static --out json -C 'not I@roles:storage' test.ping 2>/dev/null | jq -r 'keys[0]'
+}
+
+function _master_has_role {
+ local ROLE=$1
+ echo "Asserting that master minion has role ->$ROLE<-"
+ salt $MASTER_MINION pillar.get roles 2>/dev/null
+ salt $MASTER_MINION pillar.get roles 2>/dev/null | grep -q "$ROLE"
+ echo "Yes, it does."
+}
+
+function _first_x_node {
+ local ROLE=$1
+ salt --static --out json -C "I@roles:$ROLE" test.ping 2>/dev/null | jq -r 'keys[0]'
+}
+
+function _first_storage_only_node {
+ local COMPOUND_TARGET="I@roles:storage"
+ local NOT_ROLES="mon
+mgr
+mds
+rgw
+igw
+ganesha
+"
+ local ROLE=
+ for ROLE in $NOT_ROLES ; do
+ COMPOUND_TARGET="$COMPOUND_TARGET and not I@roles:$ROLE"
+ done
+ local MAYBEJSON=$(salt --static --out json -C "$COMPOUND_TARGET" test.ping 2>/dev/null)
+ echo $MAYBEJSON | jq --raw-output 'keys[0]'
+}
+
+function _run_test_script_on_node {
+ local TESTSCRIPT=$1 # on success, TESTSCRIPT must output the exact string
+ # "Result: OK" on a line by itself, otherwise it will
+ # be considered to have failed
+ local TESTNODE=$2
+ local ASUSER=$3
+ salt-cp $TESTNODE $TESTSCRIPT $TESTSCRIPT 2>/dev/null
+ local LOGFILE=/tmp/test_script.log
+ local STDERR_LOGFILE=/tmp/test_script_stderr.log
+ local stage_status=
+ if [ -z "$ASUSER" -o "x$ASUSER" = "xroot" ] ; then
+ salt $TESTNODE cmd.run "sh $TESTSCRIPT" 2>$STDERR_LOGFILE | tee $LOGFILE
+ stage_status="${PIPESTATUS[0]}"
+ else
+ salt $TESTNODE cmd.run "sudo su $ASUSER -c \"bash $TESTSCRIPT\"" 2>$STDERR_LOGFILE | tee $LOGFILE
+ stage_status="${PIPESTATUS[0]}"
+ fi
+ local RESULT=$(grep -o -P '(?<=Result: )(OK)$' $LOGFILE) # since the script
+ # is run by salt, the output appears indented
+ test "x$RESULT" = "xOK" && return
+ echo "The test script that ran on $TESTNODE failed. The stderr output was as follows:"
+ cat $STDERR_LOGFILE
+ exit 1
+}
+
+function _grace_period {
+ local SECONDS=$1
+ echo "${SECONDS}-second grace period"
+ sleep $SECONDS
+}
+
+function _root_fs_is_btrfs {
+ stat -f / | grep -q 'Type: btrfs'
+}
+
+function _ping_minions_until_all_respond {
+ local RESPONDING=""
+ for i in {1..20} ; do
+ sleep 10
+ RESPONDING=$(salt '*' test.ping 2>/dev/null | grep True 2>/dev/null | wc --lines)
+ echo "Of $TOTAL_NODES total minions, $RESPONDING are responding"
+ test "$TOTAL_NODES" -eq "$RESPONDING" && break
+ done
+}
+
+function _ceph_cluster_running {
+ ceph status >/dev/null 2>&1
+}
+
--- /dev/null
+#
+# This file is part of the DeepSea integration test suite.
+# It contains various cluster introspection functions.
+#
+
+function json_total_nodes {
+ # total number of nodes in the cluster
+ salt --static --out json '*' test.ping 2>/dev/null | jq '. | length'
+}
+
+function _json_nodes_of_role_x {
+ local ROLE=$1
+ salt --static --out json -C "I@roles:$ROLE" test.ping 2>/dev/null | jq '. | length'
+}
+
+function json_storage_nodes {
+ # number of storage nodes in the cluster
+ _json_nodes_of_role_x storage
+}
+
+function json_total_osds {
+ # total number of OSDs in the cluster
+ ceph osd ls --format json | jq '. | length'
+}
--- /dev/null
+#
+# This file is part of the DeepSea integration test suite
+#
+
+NFS_MOUNTPOINT=/root/mnt
+
+function _nfs_ganesha_node {
+ _first_x_node ganesha
+}
+
+function nfs_ganesha_no_root_squash {
+ local GANESHAJ2=/srv/salt/ceph/ganesha/files/ganesha.conf.j2
+ sed -i '/Access_Type = RW;/a \\tSquash = No_root_squash;' $GANESHAJ2
+}
+
+function nfs_ganesha_no_grace_period {
+ local GANESHAJ2=/srv/salt/ceph/ganesha/files/ganesha.conf.j2
+ cat <<EOF >>$GANESHAJ2
+NFSv4 {Graceless = True}
+EOF
+}
+
+function nfs_ganesha_debug_log {
+ local GANESHANODE=$(_nfs_ganesha_node)
+ local TESTSCRIPT=/tmp/test-nfs-ganesha.sh
+ cat <<EOF > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "nfs-ganesha debug log script running as $(whoami) on $(hostname --fqdn)"
+sed -i 's/NIV_EVENT/NIV_DEBUG/g' /etc/sysconfig/nfs-ganesha
+cat /etc/sysconfig/nfs-ganesha
+rm -rf /var/log/ganesha/ganesha.log
+systemctl restart nfs-ganesha.service
+systemctl is-active nfs-ganesha.service
+rpm -q nfs-ganesha
+echo "Result: OK"
+EOF
+ _run_test_script_on_node $TESTSCRIPT $GANESHANODE
+}
+
+function nfs_ganesha_cat_config_file {
+ salt -C 'I@roles:ganesha' cmd.run 'cat /etc/ganesha/ganesha.conf'
+}
+
+#function nfs_ganesha_showmount_loop {
+# local TESTSCRIPT=/tmp/test-nfs-ganesha.sh
+# salt -C 'I@roles:ganesha' cmd.run "while true ; do showmount -e $GANESHANODE | tee /tmp/showmount.log || true ; grep -q 'Timed out' /tmp/showmount.log || break ; done"
+#}
+
+function nfs_ganesha_mount {
+ #
+ # creates a mount point and mounts NFS-Ganesha export in it
+ #
+ local NFSVERSION=$1 # can be "3", "4", or ""
+ local ASUSER=$2
+ local CLIENTNODE=$(_client_node)
+ local GANESHANODE=$(_nfs_ganesha_node)
+ local TESTSCRIPT=/tmp/test-nfs-ganesha.sh
+ salt "$CLIENTNODE" pillar.get roles
+ salt "$CLIENTNODE" pkg.install nfs-client # FIXME: only works on SUSE
+ cat <<EOF > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "nfs-ganesha mount test script"
+test ! -e $NFS_MOUNTPOINT
+mkdir $NFS_MOUNTPOINT
+test -d $NFS_MOUNTPOINT
+#mount -t nfs -o nfsvers=4 ${GANESHANODE}:/ $NFS_MOUNTPOINT
+mount -t nfs -o ##OPTIONS## ${GANESHANODE}:/ $NFS_MOUNTPOINT
+ls -lR $NFS_MOUNTPOINT
+echo "Result: OK"
+EOF
+ if test -z $NFSVERSION ; then
+ sed -i 's/##OPTIONS##/sync/' $TESTSCRIPT
+ elif [ "$NFSVERSION" = "3" -o "$NFSVERSION" = "4" ] ; then
+ sed -i 's/##OPTIONS##/sync,nfsvers='$NFSVERSION'/' $TESTSCRIPT
+ else
+ echo "Bad NFS version ->$NFS_VERSION<- Bailing out!"
+ exit 1
+ fi
+ _run_test_script_on_node $TESTSCRIPT $CLIENTNODE $ASUSER
+}
+
+function nfs_ganesha_umount {
+ local ASUSER=$1
+ local CLIENTNODE=$(_client_node)
+ local TESTSCRIPT=/tmp/test-nfs-ganesha-umount.sh
+ cat <<EOF > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "nfs-ganesha umount test script running as $(whoami) on $(hostname --fqdn)"
+umount $NFS_MOUNTPOINT
+rm -rf $NFS_MOUNTPOINT
+echo "Result: OK"
+EOF
+ _run_test_script_on_node $TESTSCRIPT $CLIENTNODE $ASUSER
+}
+
+function nfs_ganesha_write_test {
+ #
+ # NFS-Ganesha FSAL write test
+ #
+ local FSAL=$1
+ local NFSVERSION=$2
+ local CLIENTNODE=$(_client_node)
+ local TESTSCRIPT=/tmp/test-nfs-ganesha-write.sh
+ local APPENDAGE=""
+ if [ "$FSAL" = "cephfs" ] ; then
+ if [ "$NFSVERSION" = "3" ] ; then
+ APPENDAGE=""
+ else
+ APPENDAGE="/cephfs"
+ fi
+ else
+ APPENDAGE="/demo/demo-demo"
+ fi
+ local TOUCHFILE=$NFS_MOUNTPOINT$APPENDAGE/saturn
+ cat <<EOF > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+echo "nfs-ganesha write test script"
+! test -e $TOUCHFILE
+touch $TOUCHFILE
+test -f $TOUCHFILE
+rm -f $TOUCHFILE
+echo "Result: OK"
+EOF
+ _run_test_script_on_node $TESTSCRIPT $CLIENTNODE
+}
+
+function nfs_ganesha_pynfs_test {
+ #
+ # NFS-Ganesha PyNFS test
+ #
+ local CLIENTNODE=$(_client_node)
+ local GANESHANODE=$(_nfs_ganesha_node)
+ local TESTSCRIPT=/tmp/test-nfs-ganesha-pynfs.sh
+ cat <<'EOF' > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+
+function assert_success {
+ local PYNFS_OUTPUT=$1
+ test -s $PYNFS_OUTPUT
+ # last line: determined return value of function
+ ! grep -q FAILURE $PYNFS_OUTPUT
+}
+
+echo "nfs-ganesha PyNFS test script running as $(whoami) on $(hostname --fqdn)"
+set +x
+for delay in 60 60 60 60 ; do
+ sudo zypper --non-interactive --gpg-auto-import-keys refresh && break
+ sleep $delay
+done
+set -x
+zypper --non-interactive install --no-recommends krb5-devel python3-devel
+git clone --depth 1 https://github.com/supriti/Pynfs
+cd Pynfs
+./setup.py build
+cd nfs4.0
+sleep 90 # NFSv4 grace period
+LOGFILE="PyNFS.out"
+./testserver.py -v \
+ --outfile RESULTS.out \
+ --maketree GANESHANODE:/cephfs/ \
+ --showomit \
+ --secure \
+ --rundeps \
+ all \
+ ganesha 2>&1 | tee $LOGFILE
+#./showresults.py RESULTS.out
+assert_success $LOGFILE
+echo "Result: OK"
+EOF
+ sed -i 's/GANESHANODE/'$GANESHANODE'/' $TESTSCRIPT
+ _run_test_script_on_node $TESTSCRIPT $CLIENTNODE
+}
--- /dev/null
+# This file is part of the DeepSea integration test suite
+
+#
+# functions for generating storage proposals
+#
+
+PROPOSALSDIR="/srv/pillar/ceph/proposals"
+POLICY_CFG="$PROPOSALSDIR/policy.cfg"
+
+function proposal_populate_dmcrypt {
+ salt-run proposal.populate encryption='dmcrypt' name='dmcrypt'
+}
+
+function proposal_populate_filestore {
+ salt-run proposal.populate format='filestore' name='filestore'
+}
+
+
+#
+# functions for generating policy.cfg
+#
+
+function policy_cfg_base {
+ cat <<EOF > $POLICY_CFG
+# Cluster assignment
+cluster-ceph/cluster/*.sls
+# Common configuration
+config/stack/default/global.yml
+config/stack/default/ceph/cluster.yml
+# Role assignment - master
+role-master/cluster/${MASTER_MINION}.sls
+# Role assignment - admin
+role-admin/cluster/*.sls
+EOF
+}
+
+function policy_cfg_mon_flex {
+ test -n "$STORAGE_NODES" # set in initialization_sequence
+ test "$STORAGE_NODES" -gt 0
+ if [ "$STORAGE_NODES" -lt 4 ] ; then
+ echo "Undersized cluster ($STORAGE_NODES nodes)"
+ policy_cfg_one_mon
+ else
+ policy_cfg_three_mons
+ fi
+}
+
+function policy_cfg_one_mon {
+ cat <<EOF >> $POLICY_CFG
+# Role assignment - 1 mon, 1 mgr
+role-mon/cluster/*.sls slice=[:1]
+role-mgr/cluster/*.sls slice=[:1]
+EOF
+}
+
+function policy_cfg_three_mons {
+ cat <<EOF >> $POLICY_CFG
+# Role assignment - 3 mons, 3 mgrs
+role-mon/cluster/*.sls slice=[:3]
+role-mgr/cluster/*.sls slice=[:3]
+EOF
+}
+
+function _initialize_minion_configs_array {
+ local DIR=$1
+
+ shopt -s nullglob
+ pushd $DIR >/dev/null
+ MINION_CONFIGS_ARRAY=(*.yaml *.yml)
+ echo "Made global array containing the following files (from ->$DIR<-):"
+ printf '%s\n' "${MINION_CONFIGS_ARRAY[@]}"
+ popd >/dev/null
+ shopt -u nullglob
+}
+
+function _initialize_osd_configs_array {
+ local DIR=$1
+
+ shopt -s nullglob
+ pushd $DIR >/dev/null
+ OSD_CONFIGS_ARRAY=(*.yaml *.yml)
+ echo "Made global array containing the following OSD configs (from ->$DIR<-):"
+ printf '%s\n' "${OSD_CONFIGS_ARRAY[@]}"
+ popd >/dev/null
+ shopt -u nullglob
+}
+
+function _custom_osd_config {
+ local PROFILE=$1
+ local FILENAME=""
+ for i in "${OSD_CONFIGS_ARRAY[@]}" ; do
+ case "$i" in
+ $PROFILE) FILENAME=$i ; break ;;
+ ${PROFILE}.yaml) FILENAME=$i ; break ;;
+ ${PROFILE}.yml) FILENAME=$i ; break ;;
+ esac
+ done
+ if [ -z "$FILENAME" ] ; then
+ echo "Custom OSD profile $PROFILE not found. Bailing out!"
+ exit 1
+ fi
+ echo "$FILENAME"
+}
+
+function _random_osd_config {
+ # the bare config file names are assumed to already be in OSD_CONFIGS_ARRAY
+ # (accomplished by calling _initialize_osd_configs_array first)
+ OSD_CONFIGS_ARRAY_LENGTH="${#OSD_CONFIGS_ARRAY[@]}"
+ local INDEX=$((RANDOM % OSD_CONFIGS_ARRAY_LENGTH))
+ echo "${OSD_CONFIGS_ARRAY[$INDEX]}"
+
+}
+
+function random_or_custom_storage_profile {
+ test "$STORAGE_PROFILE"
+ test "$STORAGE_PROFILE" = "random" -o "$STORAGE_PROFILE" = "custom"
+ #
+ # choose OSD configuration from osd-config/ovh
+ #
+ local SOURCEDIR="$BASEDIR/osd-config/ovh"
+ _initialize_osd_configs_array $SOURCEDIR
+ local SOURCEFILE=""
+ case "$STORAGE_PROFILE" in
+ random) SOURCEFILE=$(_random_osd_config) ;;
+ custom) SOURCEFILE=$(_custom_osd_config $CUSTOM_STORAGE_PROFILE) ;;
+ esac
+ test "$SOURCEFILE"
+ file $SOURCEDIR/$SOURCEFILE
+ #
+ # prepare new profile, which will be exactly the same as the default
+ # profile except the files in stack/default/ceph/minions/ will be
+ # overwritten with our chosen OSD configuration
+ #
+ cp -a $PROPOSALSDIR/profile-default $PROPOSALSDIR/profile-$STORAGE_PROFILE
+ local DESTDIR="$PROPOSALSDIR/profile-$STORAGE_PROFILE/stack/default/ceph/minions"
+ _initialize_minion_configs_array $DESTDIR
+ for DESTFILE in "${MINION_CONFIGS_ARRAY[@]}" ; do
+ cp $SOURCEDIR/$SOURCEFILE $DESTDIR/$DESTFILE
+ done
+ echo "Your $STORAGE_PROFILE storage profile $SOURCEFILE has the following contents:"
+ cat $DESTDIR/$DESTFILE
+ ls -lR $PROPOSALSDIR
+}
+
+function policy_cfg_storage {
+ test -n "$CLIENT_NODES"
+ test -n "$STORAGE_PROFILE"
+
+ if [ "$CLIENT_NODES" -eq 0 ] ; then
+ cat <<EOF >> $POLICY_CFG
+# Hardware Profile
+profile-$STORAGE_PROFILE/cluster/*.sls
+profile-$STORAGE_PROFILE/stack/default/ceph/minions/*yml
+EOF
+ elif [ "$CLIENT_NODES" -ge 1 ] ; then
+ cat <<EOF >> $POLICY_CFG
+# Hardware Profile
+profile-$STORAGE_PROFILE/cluster/*.sls slice=[:-$CLIENT_NODES]
+profile-$STORAGE_PROFILE/stack/default/ceph/minions/*yml slice=[:-$CLIENT_NODES]
+EOF
+ else
+ echo "Unexpected number of client nodes ->$CLIENT_NODES<-; bailing out!"
+ exit 1
+ fi
+}
+
+function storage_profile_from_policy_cfg {
+ local BUFFER=$(grep --max-count 1 '^profile-' $POLICY_CFG)
+ perl -e '"'"$BUFFER"'" =~ m/profile-(\w+)/; print "$1\n";'
+}
+
+function policy_remove_storage_node {
+ local NODE_TO_DELETE=$1
+
+ echo "Before"
+ ls -1 $PROPOSALSDIR/profile-$STORAGE_PROFILE/cluster/
+ ls -1 $PROPOSALSDIR/profile-$STORAGE_PROFILE/stack/default/ceph/minions/
+
+ local basedirsls=$PROPOSALSDIR/profile-$STORAGE_PROFILE/cluster
+ local basediryml=$PROPOSALSDIR/profile-$STORAGE_PROFILE/stack/default/ceph/minions
+ mv $basedirsls/${NODE_TO_DELETE}.sls $basedirsls/${NODE_TO_DELETE}.sls-DISABLED
+ mv $basediryml/${NODE_TO_DELETE}.yml $basedirsls/${NODE_TO_DELETE}.yml-DISABLED
+
+ echo "After"
+ ls -1 $PROPOSALSDIR/profile-$STORAGE_PROFILE/cluster/
+ ls -1 $PROPOSALSDIR/profile-$STORAGE_PROFILE/stack/default/ceph/minions/
+}
+
+function policy_cfg_mds {
+ test -n "$STORAGE_NODES"
+ # MDS on up to 3 storage nodes
+ if [ "$STORAGE_NODES" -le 3 ] ; then
+ cat <<EOF >> $POLICY_CFG
+# Role assignment - mds
+role-mds/cluster/*.sls slice=[:$STORAGE_NODES]
+EOF
+ else
+ cat <<EOF >> $POLICY_CFG
+# Role assignment - mds
+role-mds/cluster/*.sls slice=[:3]
+EOF
+ fi
+}
+
+function policy_cfg_openattic_rgw_igw_ganesha {
+ # first, determine the slices
+ local slice_openattic=""
+ local slice_rgw=""
+ local slice_igw=""
+ local slice_ganesha=""
+ # lest we become confused, "storage nodes" is a synonym for "cluster nodes"
+ test -n "$STORAGE_NODES"
+ if [ "$STORAGE_NODES" -eq 1 ] ; then
+ slice_openattic="[:1]"
+ slice_rgw="[:1]"
+ slice_igw="[:1]"
+ slice_ganesha="[:1]"
+ elif [ "$STORAGE_NODES" -eq 2 ] ; then
+ slice_openattic="[:1]"
+ slice_rgw="[1:2]"
+ slice_igw="[1:2]"
+ slice_ganesha="[1:2]"
+ elif [ "$STORAGE_NODES" -eq 3 ] ; then
+ slice_openattic="[:1]"
+ slice_rgw="[1:2]"
+ slice_igw="[2:3]"
+ slice_ganesha="[2:3]"
+ elif [ "$STORAGE_NODES" -ge 4 ] ; then
+ slice_openattic="[:1]"
+ slice_rgw="[1:2]"
+ slice_igw="[2:3]"
+ slice_ganesha="[3:4]"
+ else
+ echo "Unexpected number of cluster/storage nodes ->$STORAGE_NODES<-: bailing out!"
+ exit 1
+ fi
+ # then, populate policy.cfg
+ if [ "$OPENATTIC" ] ; then
+ cat <<EOF >> $POLICY_CFG
+# Role assignment - openattic
+role-openattic/cluster/*.sls slice=$slice_openattic
+EOF
+ fi
+ if [ "$RGW" ] ; then
+ if [ -z "$SSL" ] ; then
+ cat <<EOF >> $POLICY_CFG
+# Role assignment - rgw
+role-rgw/cluster/*.sls slice=$slice_rgw
+EOF
+ else
+ cat <<EOF >> $POLICY_CFG
+# Role assignment - rgw
+role-rgw/cluster/*.sls slice=$slice_rgw
+role-rgw-ssl/cluster/*.sls slice=$slice_rgw
+EOF
+ fi
+ fi
+ if [ "$IGW" ] ; then
+ cat <<EOF >> $POLICY_CFG
+# Role assignment - igw
+role-igw/cluster/*.sls slice=$slice_igw
+EOF
+ fi
+ if [ "$NFS_GANESHA" ] ; then
+ cat <<EOF >> $POLICY_CFG
+# Role assignment - ganesha
+role-ganesha/cluster/*.sls slice=$slice_ganesha
+EOF
+ fi
+}
+
--- /dev/null
+# This file is part of the DeepSea integration test suite
+
+#
+# separate file to house the pool creation functions
+#
+
+
+function pgs_per_pool {
+ local TOTALPOOLS=$1
+ test -n "$TOTALPOOLS"
+ local TOTALOSDS=$(json_total_osds)
+ test -n "$TOTALOSDS"
+ # given the total number of pools and OSDs,
+ # assume triple replication and equal number of PGs per pool
+ # and aim for 100 PGs per OSD
+ let "TOTALPGS = $TOTALOSDS * 100"
+ let "PGSPEROSD = $TOTALPGS / $TOTALPOOLS / 3"
+ echo $PGSPEROSD
+}
+
+function create_pool_incrementally {
+ # Special-purpose function for creating pools incrementally. For example,
+ # if your test case needs 2 pools "foo" and "bar", but you cannot create
+ # them all at once for some reason. Otherwise, use create_all_pools_at_once.
+ #
+ # sample usage:
+ #
+ # create_pool foo 2
+ # ... do something ...
+ # create_pool bar 2
+ # ... do something else ...
+ #
+ local POOLNAME=$1
+ test -n "$POOLNAME"
+ local TOTALPOOLS=$2
+ test -n "$TOTALPOOLS"
+ local PGSPERPOOL=$(pgs_per_pool $TOTALPOOLS)
+ ceph osd pool create $POOLNAME $PGSPERPOOL $PGSPERPOOL replicated
+}
+
+function create_all_pools_at_once {
+ # sample usage: create_all_pools_at_once foo bar
+ local TOTALPOOLS="${#@}"
+ local PGSPERPOOL=$(pgs_per_pool $TOTALPOOLS)
+ for POOLNAME in "$@"
+ do
+ ceph osd pool create $POOLNAME $PGSPERPOOL $PGSPERPOOL replicated
+ done
+ ceph osd pool ls detail
+}
+
+function pre_create_pools {
+ # pre-create pools with calculated number of PGs so we don't get health
+ # warnings after Stage 4 due to "too few" or "too many" PGs per OSD
+ # (the "write_test" pool is used in common/sanity-basic.sh)
+ sleep 10
+ POOLS="write_test"
+ test "$MDS" && POOLS+=" cephfs_data cephfs_metadata"
+ test "$OPENSTACK" && POOLS+=" smoketest-cloud-backups smoketest-cloud-volumes smoketest-cloud-images smoketest-cloud-vms cloud-backups cloud-volumes cloud-images cloud-vms"
+ test "$RBD" && POOLS+=" rbd"
+ create_all_pools_at_once $POOLS
+ ceph osd pool application enable write_test deepsea_qa
+ sleep 10
+}
--- /dev/null
+#
+# This file is part of the DeepSea integration test suite
+#
+
+function ceph_conf_upstream_rbd_default_features {
+ #
+ # by removing this line, we ensure that there will be no "rbd default
+ # features" setting in ceph.conf, so the default value will be used
+ #
+ sed -i '/^rbd default features =/d' \
+ /srv/salt/ceph/configuration/files/rbd.conf
+}
+
+function ceph_test_librbd_can_be_run {
+ local TESTSCRIPT=/tmp/rbd_script.sh
+ local CLIENTNODE=$(_client_node)
+ cat << 'EOF' > $TESTSCRIPT
+set -e
+trap 'echo "Result: NOT_OK"' ERR
+set -x
+chmod a+r /etc/ceph/ceph.client.admin.keyring
+rpm -V ceph-test
+type ceph_test_librbd
+echo "Result: OK"
+EOF
+ _run_test_script_on_node $TESTSCRIPT $CLIENTNODE
+ echo "You can now run ceph_test_librbd on the client node"
+}
+
--- /dev/null
+#
+# This file is part of the DeepSea integration test suite
+#
+RGW_ROLE=rgw
+
+function rgw_demo_users {
+ local RGWSLS=/srv/salt/ceph/rgw/users/users.d/users.yml
+ cat << EOF >> $RGWSLS
+- { uid: "demo", name: "Demo", email: "demo@demo.nil" }
+- { uid: "demo1", name: "Demo1", email: "demo1@demo.nil" }
+EOF
+ cat $RGWSLS
+}
+
+function rgw_user_and_bucket_list {
+ #
+ # just list rgw users and buckets
+ #
+ local TESTSCRIPT=/tmp/rgw_user_and_bucket_list.sh
+ local RGWNODE=$(_first_x_node $RGW_ROLE)
+ cat << EOF > $TESTSCRIPT
+set -ex
+radosgw-admin user list
+radosgw-admin bucket list
+echo "Result: OK"
+EOF
+ _run_test_script_on_node $TESTSCRIPT $RGWNODE
+}
+
+function rgw_validate_system_user {
+ #
+ # prove the system user "admin" was really set up
+ #
+ local TESTSCRIPT=/tmp/rgw_validate_system_user.sh
+ local RGWNODE=$(_first_x_node $RGW_ROLE)
+ cat << EOF > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+radosgw-admin user info --uid=admin
+radosgw-admin user info --uid=admin | grep system | grep -q true
+echo "Result: OK"
+EOF
+ _run_test_script_on_node $TESTSCRIPT $RGWNODE
+}
+
+function rgw_validate_demo_users {
+ #
+ # prove the demo users from rgw_demo_users were really set up
+ #
+ local TESTSCRIPT=/tmp/rgw_validate_demo_users.sh
+ local RGWNODE=$(_first_x_node $RGW_ROLE)
+ cat << EOF > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+radosgw-admin user info --uid=demo
+radosgw-admin user info --uid=demo1
+echo "Result: OK"
+EOF
+ _run_test_script_on_node $TESTSCRIPT $RGWNODE
+}
+
+function rgw_curl_test {
+ local RGWNODE=$(_first_x_node $RGW_ROLE)
+ test -n "$SSL" && PROTOCOL="https" || PROTOCOL="http"
+ test -n "$SSL" && CURL_OPTS="--insecure"
+ set +x
+ for delay in 60 60 60 60 ; do
+ sudo zypper --non-interactive --gpg-auto-import-keys refresh && break
+ sleep $delay
+ done
+ set -x
+ zypper --non-interactive install --no-recommends curl libxml2-tools
+ # installing curl RPM causes ceph-radosgw and rsyslog services to need restart
+ salt-run state.orch ceph.restart.rgw 2>/dev/null
+ systemctl restart rsyslog.service
+ _zypper_ps
+ salt --no-color -C "I@roles:$RGW_ROLE" cmd.run 'systemctl | grep radosgw'
+ #RGWNODE=$(salt --no-color -C "I@roles:$RGW_ROLE" test.ping | grep -o -P '^\S+(?=:)' | head -1)
+ RGWXMLOUT=/tmp/rgw_test.xml
+ curl $CURL_OPTS "${PROTOCOL}://$RGWNODE" > $RGWXMLOUT
+ test -f $RGWXMLOUT
+ xmllint $RGWXMLOUT
+ grep anonymous $RGWXMLOUT
+ rm -f $RGWXMLOUT
+}
+
+function rgw_add_ssl_global {
+ local GLOBALYML=/srv/pillar/ceph/stack/global.yml
+ cat <<EOF >> $GLOBALYML
+rgw_init: default-ssl
+rgw_configurations:
+ rgw:
+ users:
+ - { uid: "admin", name: "Admin", email: "admin@demo.nil", system: True }
+ # when using only RGW& not ganesha ssl will have all the users of rgw already,
+ # but to be consistent we define atleast one user
+ rgw-ssl:
+ users:
+ - { uid: "admin", name: "Admin", email: "admin@demo.nil", system: True }
+EOF
+ cat $GLOBALYML
+}
+
+function rgw_ssl_init {
+ local CERTDIR=/srv/salt/ceph/rgw/cert
+ mkdir -p $CERTDIR
+ pushd $CERTDIR
+ openssl req -x509 -nodes -days 1095 -newkey rsa:4096 -keyout rgw.key -out rgw.crt -subj "/C=DE"
+ cat rgw.key > rgw.pem && cat rgw.crt >> rgw.pem
+ popd
+ rgw_add_ssl_global
+}
+
+function validate_rgw_cert_perm {
+ local TESTSCRIPT=/tmp/test_validate_rgw_cert_perm.sh
+ local RGWNODE=$(_first_x_node $RGW_ROLE)
+ cat << 'EOF' > $TESTSCRIPT
+set -ex
+trap 'echo "Result: NOT_OK"' ERR
+RGW_PEM=/etc/ceph/rgw.pem
+test -f "$RGW_PEM"
+test "$(stat -c'%U' $RGW_PEM)" == "ceph"
+test "$(stat -c'%G' $RGW_PEM)" == "ceph"
+test "$(stat -c'%a' $RGW_PEM)" -eq 600
+echo "Result: OK"
+EOF
+ _run_test_script_on_node $TESTSCRIPT $RGWNODE
+}
+
--- /dev/null
+# This file is part of the DeepSea integration test suite
+
+#
+# zypper-specific helper functions
+#
+
+function _dump_salt_master_zypper_repos {
+ zypper lr -upEP
+}
+
+function _zypper_ref_on_master {
+ set +x
+ for delay in 60 60 60 60 ; do
+ zypper --non-interactive --gpg-auto-import-keys refresh && break
+ sleep $delay
+ done
+ set -x
+}
+
+function _zypper_install_on_master {
+ local PACKAGE=$1
+ zypper --non-interactive install --no-recommends $PACKAGE
+}
+
--- /dev/null
+#!/bin/bash
+#
+# DeepSea integration test "suites/basic/health-ok.sh"
+#
+# This script runs DeepSea stages 0-3 (or 0-4, depending on options) to deploy
+# a Ceph cluster (with various options to control the cluster configuration).
+# After the last stage completes, the script checks for HEALTH_OK.
+#
+# The script makes no assumptions beyond those listed in README.
+#
+# After HEALTH_OK is reached, the script also runs various sanity tests
+# depending on the options provided.
+#
+# On success (HEALTH_OK is reached, sanity tests pass), the script returns 0.
+# On failure, for whatever reason, the script returns non-zero.
+#
+# The script produces verbose output on stdout, which can be captured for later
+# forensic analysis.
+#
+
+set -e
+set +x
+
+SCRIPTNAME=$(basename ${0})
+BASEDIR=$(readlink -f "$(dirname ${0})")
+test -d $BASEDIR
+[[ $BASEDIR =~ \/health-ok$ ]]
+
+source $BASEDIR/common/common.sh
+
+function usage {
+ set +x
+ echo "$SCRIPTNAME - script for testing HEALTH_OK deployment"
+ echo "for use in SUSE Enterprise Storage testing"
+ echo
+ echo "Usage:"
+ echo " $SCRIPTNAME [-h,--help] [--cli] [--client-nodes=X]"
+ echo " [--mds] [--min-nodes=X] [--nfs-ganesha] [--no-update]"
+ echo " [--openstack] [--profile=X] [--rbd] [--rgw] [--ssl]"
+ echo " [--tuned=X]"
+ echo
+ echo "Options:"
+ echo " --cli Use DeepSea CLI"
+ echo " --client-nodes Number of client (non-cluster) nodes"
+ echo " --help Display this usage message"
+ echo " --mds Deploy MDS"
+ echo " --min-nodes Minimum number of nodes"
+ echo " --nfs-ganesha Deploy NFS-Ganesha"
+ echo " --no-update Use no-update-no-reboot Stage 0 alt default"
+ echo " --openstack Pre-create pools for OpenStack functests"
+ echo " --profile Storage/OSD profile (see below)"
+ echo " --rbd Modify ceph.conf for rbd integration testing"
+ echo " --rgw Deploy RGW"
+ echo " --ssl Deploy RGW with SSL"
+ echo " --start-stage Run stages from (defaults to 0)"
+ echo " --teuthology Provide this option when running via teuthology"
+ echo " --tuned=on/off Deploy tuned in Stage 3 (default: off)"
+ echo
+ echo "Supported storage/OSD profiles:"
+ echo " default Whatever is generated by Stage 1 (bluestore)"
+ echo " dmcrypt All encrypted OSDs"
+ echo " filestore All filestore OSDs"
+ echo " random A randomly chosen profile (teuthology/OVH only)"
+ echo " <OTHER> Any other value will be assumed to be the name"
+ echo " of an OSD profile in qa/osd-config/ovh"
+ exit 1
+}
+
+assert_enhanced_getopt
+
+TEMP=$(getopt -o h \
+--long "cli,client-nodes:,help,igw,mds,min-nodes:,nfs-ganesha,no-update,openstack,profile:,rbd,rgw,ssl,start-stage:,teuthology,tuned:" \
+-n 'health-ok.sh' -- "$@")
+
+if [ $? != 0 ] ; then echo "Terminating..." >&2 ; exit 1 ; fi
+
+# Note the quotes around TEMP': they are essential!
+eval set -- "$TEMP"
+
+# process command-line options
+CLI=""
+CLIENT_NODES=0
+STORAGE_PROFILE="default"
+CUSTOM_STORAGE_PROFILE=""
+MDS=""
+MIN_NODES=1
+OPENSTACK=""
+NFS_GANESHA=""
+NO_UPDATE=""
+RBD=""
+RGW=""
+SSL=""
+START_STAGE="0"
+TEUTHOLOGY=""
+TUNED="off"
+while true ; do
+ case "$1" in
+ --cli) CLI="$1" ; shift ;;
+ --client-nodes) shift ; CLIENT_NODES=$1 ; shift ;;
+ -h|--help) usage ;; # does not return
+ --mds) MDS="$1" ; shift ;;
+ --min-nodes) shift ; MIN_NODES=$1 ; shift ;;
+ --nfs-ganesha) NFS_GANESHA="$1" ; shift ;;
+ --no-update) NO_UPDATE="$1" ; shift ;;
+ --openstack) OPENSTACK="$1" ; shift ;;
+ --profile) shift ; STORAGE_PROFILE=$1 ; shift ;;
+ --rbd) RBD="$1" ; shift ;;
+ --rgw) RGW="$1" ; shift ;;
+ --ssl) SSL="$1" ; shift ;;
+ --start-stage) shift ; START_STAGE=$1 ; shift ;;
+ --teuthology) TEUTHOLOGY="$1" ; shift ;;
+ --tuned) shift ; TUNED=$1 ; shift ;;
+ --) shift ; break ;;
+ *) echo "Internal error" ; exit 1 ;;
+ esac
+done
+if [ "$NFS_GANESHA" ] ; then
+ if [ -z "$MDS" -a -z "$RGW" ] ; then
+ echo "NFS-Ganesha requires either mds or rgw role, but neither was specified. Bailing out!"
+ exit 1
+ fi
+fi
+TUNED=${TUNED,,}
+case "$TUNED" in
+ on) ;;
+ off) TUNED='' ;;
+ *) echo "Bad value ->$TUNED<- passed with --tuned. Bailing out!" ; exit 1 ;;
+esac
+echo "WWWW"
+echo "health-ok.sh running with the following configuration:"
+test -n "$CLI" && echo "- CLI"
+echo "- CLIENT_NODES ->$CLIENT_NODES<-"
+echo "- MIN_NODES ->$MIN_NODES<-"
+test -n "$MDS" && echo "- MDS"
+test -n "$NFS_GANESHA" && echo "- NFS-Ganesha"
+test -n "$OPENSTACK" && echo "- OpenStack test pools will be pre-created"
+echo "- PROFILE ->$STORAGE_PROFILE<-"
+test -n "$RBD" && echo "- RBD"
+test -n "$RGW" && echo "- RGW"
+test -n "$SSL" && echo "- SSL"
+echo "- Start Stage ->$START_STAGE<-"
+test -n "$TEUTHOLOGY" && echo "- TEUTHOLOGY"
+echo -n "- TUNED: "
+test -n "$TUNED" && echo "ON"
+test -z "$TUNED" && echo "OFF"
+echo -n "Stage 0 update: "
+test -n "$NO_UPDATE" && echo "disabled" || echo "enabled"
+set -x
+
+# deploy phase
+deploy_ceph
+
+# verification phase
+ceph_health_test
+test "$STORAGE_NODES" = "$(number_of_hosts_in_ceph_osd_tree)"
+#salt -I roles:storage osd.report 2>/dev/null
+
+# test phase
+REPEAT_STAGE_0=""
+ceph_log_grep_enoent_eaccess
+test_systemd_ceph_osd_target_wants
+#rados_write_test
+#ceph_version_test
+if [ -n "$RGW" ] ; then
+ rgw_curl_test
+ test -n "$SSL" && validate_rgw_cert_perm
+ rgw_user_and_bucket_list
+ rgw_validate_system_user
+ rgw_validate_demo_users
+fi
+test -n "$MDS" -a "$CLIENT_NODES" -ge 1 && cephfs_mount_and_sanity_test
+if [ "$NFS_GANESHA" ] ; then
+ for v in "" "3" "4" ; do
+ echo "Testing NFS-Ganesha with NFS version ->$v<-"
+ if [ "$RGW" -a "$v" = "3" ] ; then
+ echo "Not testing RGW FSAL on NFSv3"
+ continue
+ else
+ nfs_ganesha_mount "$v"
+ fi
+ if [ "$MDS" ] ; then
+ nfs_ganesha_write_test cephfs "$v"
+ fi
+ if [ "$RGW" ] ; then
+ if [ "$v" = "3" ] ; then
+ echo "Not testing RGW FSAL on NFSv3"
+ else
+ rgw_curl_test
+ rgw_user_and_bucket_list
+ rgw_validate_demo_users
+ nfs_ganesha_write_test rgw "$v"
+ fi
+ fi
+ nfs_ganesha_umount
+ sleep 10
+ done
+ REPEAT_STAGE_0="yes, please"
+fi
+test "$REPEAT_STAGE_0" && run_stage_0 "$CLI" # exercise ceph.restart orchestration
+
+echo "YYYY"
+echo "health-ok test result: PASS"
--- /dev/null
+#!/bin/bash
+#
+# DeepSea integration test "suites/basic/stage-5.sh"
+#
+# This script runs DeepSea stages 2 and 5 to remove a storage-only node from
+# an existing Ceph cluster.
+#
+# In addition to the assumptions contained in README, this script assumes
+# that:
+# 1. DeepSea has already been used to deploy a cluster,
+# 2. the cluster has at least one "storage-only" node (i.e. a node with role
+# "storage" and no other roles (except possibly "admin")), and
+# 3. the cluster will be able to reach HEALTH_OK after one storage-only node
+# is dropped (typically this means the cluster needs at least 3 storage
+# nodes to start with).
+#
+# On success (HEALTH_OK is reached, number of storage nodes went down by 1,
+# number of OSDs decreased), the script returns 0. On failure, for whatever
+# reason, the script returns non-zero.
+#
+# The script produces verbose output on stdout, which can be captured for later
+# forensic analysis.
+#
+
+set -e
+set +x
+
+SCRIPTNAME=$(basename ${0})
+BASEDIR=$(readlink -f "$(dirname ${0})")
+test -d $BASEDIR
+[[ $BASEDIR =~ \/health-ok$ ]]
+
+source $BASEDIR/common/common.sh
+
+function usage {
+ set +x
+ echo "$SCRIPTNAME - script for testing HEALTH_OK deployment"
+ echo "for use in SUSE Enterprise Storage testing"
+ echo
+ echo "Usage:"
+ echo " $SCRIPTNAME [-h,--help] [--cli]"
+ echo
+ echo "Options:"
+ echo " --cli Use DeepSea CLI"
+ echo " --help Display this usage message"
+ exit 1
+}
+
+assert_enhanced_getopt
+
+TEMP=$(getopt -o h \
+--long "cli,help" \
+-n 'health-ok.sh' -- "$@")
+
+if [ $? != 0 ] ; then echo "Terminating..." >&2 ; exit 1 ; fi
+
+# Note the quotes around TEMP': they are essential!
+eval set -- "$TEMP"
+
+# process command-line options
+CLI=""
+while true ; do
+ case "$1" in
+ --cli) CLI="$1" ; shift ;;
+ -h|--help) usage ;; # does not return
+ --) shift ; break ;;
+ *) echo "Internal error" ; exit 1 ;;
+ esac
+done
+echo "WWWW"
+echo "stage-5.sh running with the following configuration:"
+test -n "$CLI" && echo "- CLI"
+set -x
+
+# double-check there is a healthy cluster
+ceph_health_test
+STORAGE_NODES_BEFORE=$(number_of_hosts_in_ceph_osd_tree)
+OSDS_BEFORE=$(number_of_osds_in_ceph_osd_tree)
+test "$STORAGE_NODES_BEFORE"
+test "$OSDS_BEFORE"
+test "$STORAGE_NODES_BEFORE" -gt 1
+test "$OSDS_BEFORE" -gt 0
+
+# modify storage profile
+STORAGE_PROFILE=$(storage_profile_from_policy_cfg)
+FIRST_STORAGE_ONLY_NODE=$(_first_storage_only_node)
+ls -lR $PROPOSALSDIR
+PROPOSALS_BEFORE=$(find $PROPOSALSDIR -name \*$FIRST_STORAGE_ONLY_NODE\* | wc --lines)
+policy_remove_storage_node $FIRST_STORAGE_ONLY_NODE
+ls -lR $PROPOSALSDIR
+PROPOSALS_AFTER=$(find $PROPOSALSDIR -name \*$FIRST_STORAGE_ONLY_NODE\* | wc --lines)
+
+# run stages 2 and 5
+run_stage_2 "$CLI"
+ceph_cluster_status
+run_stage_5 "$CLI"
+ceph_cluster_status
+
+# verification phase
+ceph_health_test
+STORAGE_NODES_AFTER=$(number_of_hosts_in_ceph_osd_tree)
+OSDS_AFTER=$(number_of_osds_in_ceph_osd_tree)
+test "$STORAGE_NODES_BEFORE"
+test "$OSDS_BEFORE"
+test "$STORAGE_NODES_AFTER" -eq "$((STORAGE_NODES_BEFORE - 1))"
+test "$OSDS_AFTER" -lt "$OSDS_BEFORE"
+
+## osd.report for good measure
+#salt -I roles:storage osd.report 2>/dev/null
+
+echo "YYYY"
+echo "stage-5 test result: PASS"
--- /dev/null
+roles:
+- [client.salt_master]
--- /dev/null
+roles:
+- [client.salt_master, node.0]
+- [node.1]
+- [node.2]
+- [node.3]
+- [node.4]
+- [node.5]
+- [node.6]
+- [node.7]
+- [node.8]
+- [node.9]
+- [node.10]
+- [node.11]
+- [node.12]
+- [node.13]
+- [node.14]
+- [node.15]
+- [node.16]
+- [node.17]
+- [node.18]
+- [node.19]
--- /dev/null
+roles:
+- [client.salt_master]
+- [node.1]
--- /dev/null
+roles:
+- [client.salt_master]
+- [node.1]
+- [node.2]
--- /dev/null
+roles:
+- [client.salt_master]
+- [node.1]
+- [node.2]
+- [node.3]
--- /dev/null
+roles:
+- [client.salt_master, node.0]
+- [node.1]
+- [node.2]
+- [node.3]
+- [node.4]
--- /dev/null
+tasks:
+- clock:
+- install:
+ install_ceph_packages: false
+ extra_system_packages: [salt, salt-master, salt-minion, salt-api]
+- salt:
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ db: /dev/vde
+ /dev/vdc:
+ format: bluestore
+ db: /dev/vde
+ /dev/vdd:
+ format: bluestore
+ db: /dev/vde
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ encryption: dmcrypt
+ db: /dev/vde
+ /dev/vdc:
+ format: bluestore
+ encryption: dmcrypt
+ db: /dev/vde
+ /dev/vdd:
+ format: bluestore
+ encryption: dmcrypt
+ db: /dev/vde
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ db: /dev/vde
+ db_size: 1G
+ /dev/vdc:
+ format: bluestore
+ db: /dev/vde
+ db_size: 2G
+ /dev/vdd:
+ format: bluestore
+ db: /dev/vde
+ db_size: 3G
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ encryption: dmcrypt
+ db: /dev/vde
+ db_size: 1G
+ /dev/vdc:
+ format: bluestore
+ encryption: dmcrypt
+ db: /dev/vde
+ db_size: 2G
+ /dev/vdd:
+ format: bluestore
+ encryption: dmcrypt
+ db: /dev/vde
+ db_size: 3G
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ db: /dev/vde
+ db_size: 1G
+ /dev/vdc:
+ format: bluestore
+ db: /dev/vde
+ db_size: 2G
+ /dev/vdd:
+ format: bluestore
+ db: /dev/vde
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ encryption: dmcrypt
+ db: /dev/vde
+ db_size: 1G
+ /dev/vdc:
+ format: bluestore
+ encryption: dmcrypt
+ db: /dev/vde
+ db_size: 2G
+ /dev/vdd:
+ format: bluestore
+ encryption: dmcrypt
+ db: /dev/vde
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ wal: /dev/vde
+ /dev/vdc:
+ format: bluestore
+ wal: /dev/vde
+ /dev/vdd:
+ format: bluestore
+ wal: /dev/vde
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ encryption: dmcrypt
+ wal: /dev/vde
+ /dev/vdc:
+ format: bluestore
+ encryption: dmcrypt
+ wal: /dev/vde
+ /dev/vdd:
+ format: bluestore
+ encryption: dmcrypt
+ wal: /dev/vde
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ wal: /dev/vde
+ db: /dev/vdd
+ /dev/vdc:
+ format: bluestore
+ wal: /dev/vde
+ db: /dev/vdd
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ encryption: dmcrypt
+ wal: /dev/vde
+ db: /dev/vdd
+ /dev/vdc:
+ format: bluestore
+ encryption: dmcrypt
+ wal: /dev/vde
+ db: /dev/vdd
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ wal_size: 1G
+ wal: /dev/vde
+ db: /dev/vdd
+ db_size: 2G
+ /dev/vdc:
+ format: bluestore
+ wal: /dev/vde
+ db: /dev/vdd
+ wal_size: 3G
+ db_size: 4G
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ encryption: dmcrypt
+ wal_size: 1G
+ wal: /dev/vde
+ db: /dev/vdd
+ db_size: 2G
+ /dev/vdc:
+ format: bluestore
+ encryption: dmcrypt
+ wal: /dev/vde
+ db: /dev/vdd
+ wal_size: 3G
+ db_size: 4G
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ wal: /dev/vde
+ db: /dev/vdd
+ /dev/vdc:
+ format: bluestore
+ wal: /dev/vde
+ db: /dev/vdd
+ wal_size: 3G
+ db_size: 4G
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ encryption: dmcrypt
+ wal: /dev/vde
+ db: /dev/vdd
+ /dev/vdc:
+ format: bluestore
+ encryption: dmcrypt
+ wal: /dev/vde
+ db: /dev/vdd
+ wal_size: 3G
+ db_size: 4G
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ wal_size: 1G
+ wal: /dev/vde
+ /dev/vdc:
+ format: bluestore
+ wal: /dev/vde
+ wal_size: 2G
+ /dev/vdd:
+ format: bluestore
+ wal: /dev/vde
+ wal_size: 3G
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ encryption: dmcrypt
+ wal_size: 1G
+ wal: /dev/vde
+ /dev/vdc:
+ format: bluestore
+ encryption: dmcrypt
+ wal: /dev/vde
+ wal_size: 2G
+ /dev/vdd:
+ format: bluestore
+ encryption: dmcrypt
+ wal: /dev/vde
+ wal_size: 3G
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ wal_size: 1G
+ wal: /dev/vde
+ /dev/vdc:
+ format: bluestore
+ wal: /dev/vde
+ wal_size: 2G
+ /dev/vdd:
+ format: bluestore
+ wal: /dev/vde
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: bluestore
+ encryption: dmcrypt
+ wal_size: 1G
+ wal: /dev/vde
+ /dev/vdc:
+ format: bluestore
+ encryption: dmcrypt
+ wal: /dev/vde
+ wal_size: 2G
+ /dev/vdd:
+ format: bluestore
+ encryption: dmcrypt
+ wal: /dev/vde
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: filestore
+ journal: /dev/vde
+ /dev/vdc:
+ format: filestore
+ journal: /dev/vde
+ /dev/vdd:
+ format: filestore
+ journal: /dev/vde
--- /dev/null
+overrides:
+ deepsea:
+ storage_profile:
+ ceph:
+ storage:
+ osds:
+ /dev/vdb:
+ format: filestore
+ encryption: dmcrypt
+ journal: /dev/vde
+ /dev/vdc:
+ format: filestore
+ encryption: dmcrypt
+ journal: /dev/vde
+ /dev/vdd:
+ format: filestore
+ encryption: dmcrypt
+ journal: /dev/vde
--- /dev/null
+../.qa
\ No newline at end of file
--- /dev/null
+../.qa
\ No newline at end of file
--- /dev/null
+../.qa
\ No newline at end of file
--- /dev/null
+.qa/deepsea/salt.yaml
\ No newline at end of file
--- /dev/null
+.qa/deepsea/boilerplate/
\ No newline at end of file
--- /dev/null
+../.qa
\ No newline at end of file
--- /dev/null
+.qa/deepsea/disks/1disk.yaml
\ No newline at end of file
--- /dev/null
+.qa/deepsea/nodes/1node.yaml
\ No newline at end of file
--- /dev/null
+.qa/deepsea/distros/
\ No newline at end of file
--- /dev/null
+../.qa
\ No newline at end of file
--- /dev/null
+../.qa
\ No newline at end of file
--- /dev/null
+.qa/deepsea/salt.yaml
\ No newline at end of file
--- /dev/null
+.qa/deepsea/deepsea.yaml
\ No newline at end of file
--- /dev/null
+tasks:
+ - exec:
+ client.salt_master:
+ - 'ceph -s'
+ - deepsea.validation:
+ - deepsea.toolbox:
+ assert_bluestore:
+ osd.0:
--- /dev/null
+.qa/deepsea/boilerplate/
\ No newline at end of file
--- /dev/null
+../.qa
\ No newline at end of file
--- /dev/null
+.qa/deepsea/disks/4disks.yaml
\ No newline at end of file
--- /dev/null
+roles:
+ - [client.salt_master, mon.a, mgr.x, osd.0, prometheus.p, grafana.g]
--- /dev/null
+overrides:
+ deepsea:
+ cli: false
--- /dev/null
+.qa/deepsea/distros
\ No newline at end of file
--- /dev/null
+../.qa
\ No newline at end of file
--- /dev/null
+../deepsea/tier0
\ No newline at end of file
--- /dev/null
+../deepsea/tier1
\ No newline at end of file
--- /dev/null
+"""
+Task (and subtasks) for automating deployment of Ceph using DeepSea
+
+Linter:
+ flake8 --max-line-length=100
+"""
+import logging
+import time
+import yaml
+
+from salt_manager import SaltManager
+from scripts import Scripts
+from teuthology import misc
+from util import (
+ copy_directory_recursively,
+ enumerate_osds,
+ get_remote_for_role,
+ get_rpm_pkg_version,
+ introspect_roles,
+ remote_exec,
+ remote_run_script_as_root,
+ sudo_append_to_file,
+ )
+
+from teuthology.exceptions import (
+ CommandFailedError,
+ ConfigError,
+ )
+from teuthology.orchestra import run
+from teuthology.task import Task
+from teuthology.contextutil import safe_while
+
+log = logging.getLogger(__name__)
+deepsea_ctx = {}
+proposals_dir = "/srv/pillar/ceph/proposals"
+reboot_tries = 30
+
+
+def anchored(log_message):
+ global deepsea_ctx
+ assert 'log_anchor' in deepsea_ctx, "deepsea_ctx not populated"
+ return "{}{}".format(deepsea_ctx['log_anchor'], log_message)
+
+
+def dump_file_that_might_not_exist(remote, fpath):
+ try:
+ remote.run(args="cat {}".format(fpath))
+ except CommandFailedError:
+ pass
+
+
+class DeepSea(Task):
+ """
+ Install DeepSea on the Salt Master node.
+
+ Assumes a Salt cluster is already running (use the Salt task to achieve
+ this).
+
+ This task understands the following config keys which apply to
+ this task and all its subtasks:
+
+ allow_python2: (default: True)
+ whether to continue if Python 2 is installed anywhere
+ in the test cluster
+ alternative_defaults: (default: empty)
+ a dictionary of DeepSea alternative defaults
+ to be activated via the Salt Pillar
+ cli:
+ true deepsea CLI will be used (the default)
+ false deepsea CLI will not be used
+ dashboard_ssl:
+ true deploy MGR dashboard module with SSL (the default)
+ false deploy MGR dashboard module *without* SSL
+ log_anchor a string (default: "WWWW: ") which will precede
+ log messages emitted at key points during the
+ deployment
+ quiet_salt:
+ true suppress stderr on salt commands (the default)
+ false let salt commands spam the log
+ rgw_ssl:
+ true use SSL if RGW is deployed
+ false if RGW is deployed, do not use SSL (the default)
+ drive_group:
+ default if a teuthology osd role is present on a node,
+ DeepSea will tell ceph-volume to make all available
+ disks into standalone OSDs
+ teuthology populate DeepSea storage profile for 1:1 mapping
+ between teuthology osd roles and actual osds
+ deployed (the default, but not yet implemented)
+ (dict) a dictionary is assumed to be a custom drive group
+ (yaml blob) to be passed verbatim to ceph-volume
+
+ This task also understands the following config keys that affect
+ the behavior of just this one task (no effect on subtasks):
+
+ repo: (git repo for initial DeepSea install, e.g.
+ "https://github.com/SUSE/DeepSea.git")
+ branch: (git branch for initial deepsea install, e.g. "master")
+ install:
+ package|pkg deepsea will be installed via package system
+ source|src deepsea will be installed via 'make install' (default)
+ upgrade_install:
+ package|pkg post-upgrade deepsea will be installed via package system
+ source|src post-upgrade deepsea will be installed via 'make install' (default)
+ upgrade_repo: (git repo for DeepSea re-install/upgrade - used by second
+ invocation of deepsea task only)
+ upgrade_branch: (git branch for DeepSea re-install/upgrade - used by
+ second invocation of deepsea task only)
+
+ Example:
+
+ tasks
+ - deepsea:
+ repo: https://github.com/SUSE/DeepSea.git
+ branch: wip-foo
+ install: source
+
+ :param ctx: the argparse.Namespace object
+ :param config: the config dict
+ """
+
+ err_prefix = "(deepsea task) "
+
+ log_anchor_str = "WWWW: "
+
+ def __init__(self, ctx, config):
+ global deepsea_ctx
+ super(DeepSea, self).__init__(ctx, config)
+ if deepsea_ctx:
+ # context already populated (we are in a subtask, or a
+ # re-invocation of the deepsea task)
+ self.log = deepsea_ctx['logger_obj']
+ if type(self).__name__ == 'DeepSea':
+ # The only valid reason for a second invocation of the deepsea
+ # task is to upgrade DeepSea (actually reinstall it)
+ deepsea_ctx['reinstall_deepsea'] = True
+ # deepsea_ctx['install_method'] is the _initial_ install method from the
+ # first invocation. If initial install was from package, the
+ # package must be removed for reinstall from source to work.
+ # If reinstall method is 'package', removing the package here
+ # will not hurt anything.
+ if deepsea_ctx['install_method'] == 'package':
+ deepsea_ctx['master_remote'].run(args=[
+ 'sudo',
+ 'zypper',
+ '--non-interactive',
+ '--no-gpg-checks',
+ 'remove',
+ 'deepsea',
+ 'deepsea-qa',
+ run.Raw('||'),
+ 'true'
+ ])
+ install_key = 'install'
+ upgrade_install = self.config.get('upgrade_install', '')
+ if upgrade_install:
+ install_key = 'upgrade_install'
+ self.__populate_install_method_basic(install_key)
+ if not deepsea_ctx:
+ # populating context (we are *not* in a subtask)
+ deepsea_ctx['logger_obj'] = log
+ self.ctx['roles'] = self.ctx.config['roles']
+ self.log = log
+ self._populate_deepsea_context()
+ introspect_roles(self.ctx, self.log, quiet=False)
+ self.allow_python2 = deepsea_ctx['allow_python2']
+ self.alternative_defaults = deepsea_ctx['alternative_defaults']
+ self.dashboard_ssl = deepsea_ctx['dashboard_ssl']
+ self.deepsea_cli = deepsea_ctx['cli']
+ self.dev_env = self.ctx['dev_env']
+ self.install_method = deepsea_ctx['install_method']
+ self.log_anchor = deepsea_ctx['log_anchor']
+ self.master_remote = deepsea_ctx['master_remote']
+ self.nodes = self.ctx['nodes']
+ self.nodes_storage = self.ctx['nodes_storage']
+ self.nodes_storage_only = self.ctx['nodes_storage_only']
+ self.quiet_salt = deepsea_ctx['quiet_salt']
+ self.remotes = self.ctx['remotes']
+ self.reinstall_deepsea = deepsea_ctx.get('reinstall_deepsea', False)
+ self.repositories = deepsea_ctx['repositories']
+ self.rgw_ssl = deepsea_ctx['rgw_ssl']
+ self.roles = self.ctx['roles']
+ self.role_types = self.ctx['role_types']
+ self.role_lookup_table = self.ctx['role_lookup_table']
+ self.scripts = Scripts(self.ctx, self.log)
+ self.sm = deepsea_ctx['salt_manager_instance']
+ self.drive_group = deepsea_ctx['drive_group']
+ # self.log.debug("ctx.config {}".format(ctx.config))
+ # self.log.debug("deepsea context: {}".format(deepsea_ctx))
+
+ def __install_deepsea_from_source(self):
+ info_msg_prefix = 'Reinstalling' if self.reinstall_deepsea else 'Installing'
+ info_msg = info_msg_prefix + ' deepsea from source'
+ self.log.info(anchored(info_msg))
+ if self.sm.master_rpm_q('deepsea'):
+ self.log.info("DeepSea already installed from RPM")
+ return None
+ upgrade_repo = self.config.get('upgrade_repo', '')
+ upgrade_branch = self.config.get('upgrade_branch', '')
+ repo = self.config.get('repo', 'https://github.com/SUSE/DeepSea.git')
+ branch = self.config.get('branch', 'master')
+ if self.reinstall_deepsea:
+ if upgrade_repo:
+ repo = upgrade_repo
+ if upgrade_branch:
+ branch = upgrade_branch
+ self.log.info(
+ "{} - repo: {}, branch: {}"
+ .format(info_msg, repo, branch)
+ )
+ self.master_remote.run(args=[
+ 'sudo',
+ 'rm',
+ '-rf',
+ 'DeepSea',
+ run.Raw(';'),
+ 'git',
+ '--version',
+ run.Raw(';'),
+ 'git',
+ 'clone',
+ '--branch',
+ branch,
+ repo,
+ run.Raw(';'),
+ 'cd',
+ 'DeepSea',
+ run.Raw(';'),
+ 'git',
+ 'rev-parse',
+ '--abbrev-ref',
+ 'HEAD',
+ run.Raw(';'),
+ 'git',
+ 'rev-parse',
+ 'HEAD',
+ run.Raw(';'),
+ 'git',
+ 'describe',
+ run.Raw('||'),
+ 'true',
+ ])
+ self.log.info("Running \"make install\" in DeepSea clone...")
+ self.master_remote.run(args=[
+ 'cd',
+ 'DeepSea',
+ run.Raw(';'),
+ 'sudo',
+ 'make',
+ 'install',
+ ])
+ self.log.info("installing deepsea dependencies...")
+ rpmspec_cmd = (
+ '$(rpmspec --requires -q DeepSea/deepsea.spec.in 2>/dev/null)'
+ )
+ self.master_remote.run(args=[
+ 'sudo',
+ 'zypper',
+ '--non-interactive',
+ 'install',
+ '--no-recommends',
+ run.Raw(rpmspec_cmd)
+ ])
+
+ def __install_deepsea_using_zypper(self):
+ info_msg_prefix = 'Reinstalling' if self.reinstall_deepsea else 'Installing'
+ info_msg = info_msg_prefix + ' deepsea using zypper'
+ self.log.info(anchored(info_msg))
+ self.master_remote.run(args=[
+ 'sudo',
+ 'zypper',
+ '--non-interactive',
+ 'search',
+ '--details',
+ 'deepsea'
+ ])
+ self.master_remote.run(args=[
+ 'sudo',
+ 'zypper',
+ '--non-interactive',
+ '--no-gpg-checks',
+ 'install',
+ '--force',
+ '--no-recommends',
+ 'deepsea',
+ 'deepsea-cli',
+ 'deepsea-qa'
+ ])
+
+ def _deepsea_minions(self):
+ """
+ Set deepsea_minions pillar value
+ """
+ deepsea_minions_sls = '/srv/pillar/ceph/deepsea_minions.sls'
+ content = "deepsea_minions: \'*\'"
+ self.log.info("Clobbering {} with content ->{}<-".format(
+ deepsea_minions_sls, content))
+ cmd = 'sudo tee {}'.format(deepsea_minions_sls)
+ self.master_remote.sh(cmd, stdin=content)
+
+ def _deepsea_version(self):
+ if self.deepsea_cli:
+ try:
+ self.master_remote.run(args=[
+ 'type',
+ 'deepsea',
+ run.Raw('>'),
+ '/dev/null',
+ run.Raw('2>&1'),
+ ])
+ except CommandFailedError:
+ raise ConfigError(self.err_prefix + "Test case calls for "
+ "deepsea CLI, but it is not installed")
+ self.master_remote.run(args='deepsea --version')
+ else:
+ cmd_str = "sudo salt-run deepsea.version"
+ if self.quiet_salt:
+ cmd_str += " 2>/dev/null"
+ self.master_remote.run(args=cmd_str)
+
+ def _disable_gpg_checks(self):
+ cmd = (
+ 'sed -i -e \'/gpgcheck/ d\' /etc/zypp/repos.d/* ; '
+ 'sed -i -e \'/gpgkey/ d\' /etc/zypp/repos.d/* ; '
+ 'sed -i -e \'$a gpgcheck=0\' /etc/zypp/repos.d/*'
+ )
+ self.ctx.cluster.run(args=[
+ 'sudo', 'sh', '-c', cmd
+ ])
+
+ def _install_deepsea(self):
+ global deepsea_ctx
+ install_method = deepsea_ctx['install_method']
+ if install_method == 'package':
+ self.__install_deepsea_using_zypper()
+ elif install_method == 'source':
+ self.__install_deepsea_from_source()
+ else:
+ raise ConfigError(self.err_prefix + "internal error")
+ deepsea_ctx['deepsea_installed'] = True
+
+ def _master_python_version(self, py_version):
+ """
+ Determine if a given python version is installed on the Salt Master
+ node.
+ """
+ python_binary = 'python{}'.format(py_version)
+ installed = True
+ try:
+ self.master_remote.run(args=[
+ 'type',
+ python_binary,
+ run.Raw('>'),
+ '/dev/null',
+ run.Raw('2>&1'),
+ ])
+ except CommandFailedError:
+ installed = False
+ if installed:
+ self.master_remote.run(args=[
+ python_binary,
+ '--version'
+ ])
+ else:
+ self.log.info(
+ '{} not installed on master node'.format(python_binary)
+ )
+ return installed
+
+ def _maybe_apply_alternative_defaults(self):
+ global_yml = '/srv/pillar/ceph/stack/global.yml'
+ if self.alternative_defaults:
+ self.log.info(anchored("Applying alternative defaults"))
+ data = ''
+ for k, v in self.alternative_defaults.items():
+ data += "{}: {}\n".format(k, v)
+ if data:
+ sudo_append_to_file(
+ self.master_remote,
+ global_yml,
+ data,
+ )
+ dump_file_that_might_not_exist(self.master_remote, global_yml)
+
+ def _populate_deepsea_context(self):
+ global deepsea_ctx
+ deepsea_ctx['allow_python2'] = self.config.get('allow_python2', True)
+ deepsea_ctx['alternative_defaults'] = self.config.get('alternative_defaults', {})
+ if not isinstance(deepsea_ctx['alternative_defaults'], dict):
+ raise ConfigError(self.err_prefix + "alternative_defaults must be a dict")
+ deepsea_ctx['cli'] = self.config.get('cli', True)
+ deepsea_ctx['dashboard_ssl'] = self.config.get('dashboard_ssl', True)
+ deepsea_ctx['log_anchor'] = self.config.get('log_anchor', self.log_anchor_str)
+ if not isinstance(deepsea_ctx['log_anchor'], str):
+ self.log.warning(
+ "log_anchor was set to non-string value ->{}<-, "
+ "changing to empty string"
+ .format(deepsea_ctx['log_anchor'])
+ )
+ deepsea_ctx['log_anchor'] = ''
+ deepsea_ctx['drive_group'] = self.config.get("drive_group", "teuthology")
+ deepsea_ctx['quiet_salt'] = self.config.get('quiet_salt', True)
+ deepsea_ctx['salt_manager_instance'] = SaltManager(self.ctx)
+ deepsea_ctx['master_remote'] = (
+ deepsea_ctx['salt_manager_instance'].master_remote
+ )
+ deepsea_ctx['repositories'] = self.config.get("repositories", None)
+ deepsea_ctx['rgw_ssl'] = self.config.get('rgw_ssl', False)
+ self.__populate_install_method('install')
+
+ def __populate_install_method_basic(self, key):
+ if self.config[key] in ['package', 'pkg']:
+ deepsea_ctx['install_method'] = 'package'
+ elif self.config[key] in ['source', 'src']:
+ deepsea_ctx['install_method'] = 'source'
+ else:
+ raise ConfigError(self.err_prefix + "Unrecognized {} config "
+ "value ->{}<-".format(key, self.config[key]))
+
+ def __populate_install_method(self, key):
+ if key in self.config:
+ self.__populate_install_method_basic(key)
+ else:
+ if 'repo' in self.config or 'branch' in self.config:
+ deepsea_ctx['install_method'] = 'source'
+ else:
+ deepsea_ctx['install_method'] = 'package'
+
+ def _purge_osds(self):
+ # needed as long as teuthology install task purges /var/lib/ceph
+ # in its teardown phase
+ for _remote in self.ctx.cluster.remotes.keys():
+ self.log.info("stopping OSD services on {}"
+ .format(_remote.hostname))
+ _remote.run(args=[
+ 'sudo', 'sh', '-c',
+ 'systemctl stop ceph-osd.target ; sleep 10'
+ ])
+ self.log.info("unmounting OSD partitions on {}"
+ .format(_remote.hostname))
+ # unmount up to five OSDs
+ # bluestore XFS partition is vd?1
+ # filestore XFS partition is vd?2
+ for_loop = (
+ 'for f in vdb{pn} vdc{pn} vdd{pn} vde{pn} vdf{pn} ; '
+ 'do test -b /dev/$f && umount /dev/$f || true ; '
+ 'done'
+ )
+ for pn in [1, 2]:
+ _remote.run(args=['sudo', 'sh', '-c', for_loop.format(pn=pn)])
+
+ def first_storage_only_node(self):
+ if self.nodes_storage_only:
+ return self.nodes_storage_only[0]
+ else:
+ return None
+
+ def os_type_and_version(self):
+ os_type = self.ctx.config.get('os_type', 'unknown')
+ os_version = float(self.ctx.config.get('os_version', 0))
+ return (os_type, os_version)
+
+ def reboot_a_single_machine_now(self, remote, log_spec=None):
+ global reboot_tries
+ if not log_spec:
+ log_spec = "node {} reboot now".format(remote.hostname)
+ cmd_str = "sudo reboot"
+ remote_exec(
+ remote,
+ cmd_str,
+ self.log,
+ log_spec,
+ rerun=False,
+ quiet=True,
+ tries=reboot_tries,
+ )
+
+ def reboot_the_cluster_now(self, log_spec=None):
+ global reboot_tries
+ if not log_spec:
+ log_spec = "all nodes reboot now"
+ cmd_str = "salt \\* cmd.run reboot"
+ if self.quiet_salt:
+ cmd_str += " 2> /dev/null"
+ remote_exec(
+ self.master_remote,
+ cmd_str,
+ self.log,
+ log_spec,
+ rerun=False,
+ quiet=True,
+ tries=reboot_tries,
+ )
+ self.sm.ping_minions()
+
+ def role_type_present(self, role_type):
+ """
+ Method for determining if _any_ test node has the given role type
+ (teuthology role, not DeepSea role). Examples: "osd", "mon" (not
+ "mon.a").
+
+ If the role type is present, returns the hostname of the first remote
+ with that role type.
+
+ If the role type is absent, returns the empty string.
+ """
+ role_dict = self.role_lookup_table.get(role_type, {})
+ host = role_dict[role_dict.keys()[0]] if role_dict else ''
+ return host
+
+ # Teuthology iterates through the tasks stanza twice: once to "execute"
+ # the tasks and a second time to "unwind" them. During the first pass
+ # it pushes each task onto a stack, and during the second pass it "unwinds"
+ # the stack, with the result being that the tasks are unwound in reverse
+ # order. During the execution phase it calls three methods: the
+ # constructor, setup(), and begin() - in that order -, and during the
+ # unwinding phase it calls end() and teardown() - in that order.
+
+ # The task does not have to implement any of the methods. If not
+ # implemented, the method in question will be called via inheritance.
+ # If a method _is_ implemented, the implementation can optionally call
+ # the parent's implementation of that method as well. This is illustrated
+ # here:
+ def setup(self):
+ # self.log.debug("beginning of setup method")
+ super(DeepSea, self).setup()
+ pass
+ # self.log.debug("end of setup method")
+
+ def begin(self):
+ global deepsea_ctx
+ super(DeepSea, self).begin()
+ if self.reinstall_deepsea:
+ self._install_deepsea()
+ return None
+ self.sm.master_rpm_q('ceph')
+ self.sm.master_rpm_q('ceph-test')
+ self.sm.master_rpm_q('salt-master')
+ self.sm.master_rpm_q('salt-minion')
+ self.sm.master_rpm_q('salt-api')
+ # the Salt Master node is assumed to be running an already
+ # configured chrony for time synchronization within the cluster
+ # and DeepSea Stage 3 will point the minions at the Salt Master's
+ # chrony instance (?)
+ self.sm.master_rpm_q('chrony')
+ self.master_remote.run(
+ args="sudo systemctl status --lines=0 chronyd.service"
+ )
+ if self.allow_python2:
+ self._master_python_version(2)
+ else:
+ self.log.info(
+ 'allow_python2 is set to \'false\'. That means the '
+ 'test will now fail if a python2 binary is found on '
+ 'any of the test machines.'
+ )
+ self.ctx.cluster.run(args='if type python2 ; then false ; else true ; fi')
+ if not self._master_python_version(3):
+ raise ConfigError(self.err_prefix + "Python 3 not installed on master node")
+ if 'deepsea_installed' not in deepsea_ctx:
+ self._disable_gpg_checks()
+ self.master_remote.run(args="zypper lr -upEP")
+ self._install_deepsea()
+ assert deepsea_ctx['deepsea_installed']
+ self._deepsea_version()
+ self._deepsea_minions()
+ self._maybe_apply_alternative_defaults()
+ # Stage 0 does this, but we have no guarantee Stage 0 will run
+ self.sm.sync_pillar_data(quiet=self.quiet_salt)
+
+ def end(self):
+ self.log.debug("beginning of end method")
+ super(DeepSea, self).end()
+ success = self.ctx.summary.get('success', None)
+ if success is None:
+ self.log.warning("Problem with ctx summary key? ctx is {}".format(self.ctx))
+ if not success:
+ self.ctx.cluster.run(args="rpm -qa | sort")
+ self.sm.gather_logs('/home/farm/.npm/_logs', 'dashboard-e2e-npm')
+ self.sm.gather_logs('/home/farm/.protractor-report', 'dashboard-e2e-protractor')
+ self.log.debug("end of end method")
+
+ def teardown(self):
+ self.log.debug("beginning of teardown method")
+ super(DeepSea, self).teardown()
+ # #
+ # # the install task does "rm -r /var/lib/ceph" on every test node,
+ # # and that fails when there are OSDs running
+ # # FIXME - deprecated, remove after awhile
+ # self._purge_osds()
+ self.log.debug("end of teardown method")
+
+
+class CephConf(DeepSea):
+ """
+ Adds custom options to ceph.conf.
+ Edit yaml file between stage 2 and 3.
+ Example:
+ - deepsea.orch:
+ stage: 2
+ - deepsea.ceph_conf:
+ global:
+ mon lease: 15
+ mon lease ack timeout: 25
+ mon:
+ debug mon: 20
+ osd:
+ debug filestore: 20
+ - deepsea.orch:
+ stage: 3
+ """
+
+ customize = {
+ "client": "client.conf",
+ "global": "global.conf",
+ "mds": "mds.conf",
+ "mgr": "mgr.conf",
+ "mon": "mon.conf",
+ "osd": "osd.conf",
+ }
+
+ deepsea_configuration_files = '/srv/salt/ceph/configuration/files'
+
+ err_prefix = "(ceph_conf subtask) "
+
+ targets = {
+ "mon_allow_pool_delete": True,
+ "osd_memory_target": True,
+ "small_cluster": True,
+ "rbd": False,
+ }
+
+ def __init__(self, ctx, config):
+ global deepsea_ctx
+ deepsea_ctx['logger_obj'] = log.getChild('ceph_conf')
+ self.name = 'deepsea.ceph_conf'
+ super(CephConf, self).__init__(ctx, config)
+ self.log.debug("munged config is {}".format(self.config))
+
+ def __ceph_conf_d_full_path(self, section):
+ ceph_conf_d = self.deepsea_configuration_files + '/ceph.conf.d'
+ if section in self.customize.keys():
+ return "{}/{}".format(ceph_conf_d, self.customize[section])
+
+ def __custom_ceph_conf(self, section, customizations):
+ for conf_item, conf_value in customizations.items():
+ data = '{} = {}\n'.format(conf_item, conf_value)
+ sudo_append_to_file(
+ self.master_remote,
+ self.__ceph_conf_d_full_path(section),
+ data
+ )
+ self.log.info(
+ "Adding to ceph.conf, {} section: {}"
+ .format(section, data)
+ )
+
+ def _customizations(self):
+ for section in self.customize.keys():
+ if section in self.config and isinstance(self.config[section], dict):
+ self.__custom_ceph_conf(section, self.config[section])
+
+ def _dump_customizations(self):
+ for section in self.customize.keys():
+ path = self.__ceph_conf_d_full_path(section)
+ dump_file_that_might_not_exist(self.master_remote, path)
+
+ def _list_ceph_conf_d(self):
+ self.master_remote.run(
+ args="ls -l {}".format(self.deepsea_configuration_files)
+ )
+
+ def _targets(self):
+ for target, default in self.targets.items():
+ method = getattr(self, target, None)
+ assert method, "target ->{}<- has no method".format(target)
+ if target in self.config:
+ method()
+ else:
+ if default:
+ method()
+
+ def mon_allow_pool_delete(self):
+ info_msg = "adjusted ceph.conf to allow pool deletes"
+ data = "mon allow pool delete = true\n"
+ sudo_append_to_file(
+ self.master_remote,
+ self.__ceph_conf_d_full_path("mon"),
+ data,
+ )
+ self.log.info(info_msg)
+
+ def osd_memory_target(self):
+ info_msg = "lowered osd_memory_target to 1GiB to facilitate testing in OpenStack"
+ data = "osd memory target = 1105322466" # https://tracker.ceph.com/issues/37507#note-4
+ sudo_append_to_file(
+ self.master_remote,
+ self.__ceph_conf_d_full_path("osd"),
+ data,
+ )
+ self.log.info(info_msg)
+
+ def rbd(self):
+ """
+ Delete "rbd default features" from ceph.conf. By removing this line, we
+ ensure that there will be no explicit "rbd default features" setting,
+ so the default will be used.
+ """
+ info_msg = "adjusted ceph.conf by removing 'rbd default features' line"
+ rbd_conf = '/srv/salt/ceph/configuration/files/rbd.conf'
+ cmd = 'sudo sed -i \'/^rbd default features =/d\' {}'.format(rbd_conf)
+ self.master_remote.run(args=cmd)
+ self.log.info(info_msg)
+
+ def small_cluster(self):
+ """
+ Apply necessary ceph.conf for small clusters
+ """
+ storage_nodes = len(self.nodes_storage)
+ info_msg = (
+ "adjusted ceph.conf for operation with {} storage node(s)"
+ .format(storage_nodes)
+ )
+ data = None
+ if storage_nodes == 1:
+ data = (
+ "mon pg warn min per osd = 16\n"
+ "osd pool default size = 2\n"
+ "osd crush chooseleaf type = 0 # failure domain == osd\n"
+ )
+ elif storage_nodes == 2 or storage_nodes == 3:
+ data = (
+ "mon pg warn min per osd = 8\n"
+ "osd pool default size = 2\n"
+ )
+ if data:
+ sudo_append_to_file(
+ self.master_remote,
+ self.__ceph_conf_d_full_path("global"),
+ data,
+ )
+ self.log.info(info_msg)
+
+ def begin(self):
+ self.log.info(anchored("Adding custom options to ceph.conf"))
+ self._targets()
+ self._customizations()
+ self._list_ceph_conf_d()
+ self._dump_customizations()
+
+ def end(self):
+ pass
+
+ def teardown(self):
+ pass
+
+
+class CreatePools(DeepSea):
+
+ err_prefix = "(create_pools subtask) "
+
+ def __init__(self, ctx, config):
+ global deepsea_ctx
+ deepsea_ctx['logger_obj'] = log.getChild('create_pools')
+ self.name = 'deepsea.create_pools'
+ super(CreatePools, self).__init__(ctx, config)
+ if not isinstance(self.config, dict):
+ raise ConfigError(self.err_prefix + "config must be a dictionary")
+
+ def begin(self):
+ self.log.info(anchored("pre-creating pools"))
+ args = []
+ for key in self.config:
+ if self.config[key] is None:
+ self.config[key] = True
+ if self.config[key]:
+ args.append(key)
+ args = list(set(args))
+ self.scripts.run(
+ self.master_remote,
+ 'create_all_pools_at_once.sh',
+ args=args,
+ )
+
+ def end(self):
+ pass
+
+ def teardown(self):
+ pass
+
+
+class Dummy(DeepSea):
+
+ def __init__(self, ctx, config):
+ global deepsea_ctx
+ deepsea_ctx['logger_obj'] = log.getChild('dummy')
+ self.name = 'deepsea.dummy'
+ super(Dummy, self).__init__(ctx, config)
+ self.log.debug("munged config is {}".format(self.config))
+
+ def begin(self):
+ self.log.debug("beginning of begin method")
+ global deepsea_ctx
+ self.log.info("deepsea_ctx == {}".format(deepsea_ctx))
+ self.log.debug("end of begin method")
+
+ def end(self):
+ pass
+
+ def teardown(self):
+ pass
+
+
+class HealthOK(DeepSea):
+ """
+ Copy health_ok.sh to Salt Master node and run commands.
+
+ This task understands the following config key:
+
+ commands:
+ [list of health-ok.sh commands]
+
+
+ The list of commands will be executed as root on the Salt Master node.
+ """
+
+ err_prefix = "(health_ok subtask) "
+
+ prefix = 'health-ok/'
+
+ def __init__(self, ctx, config):
+ global deepsea_ctx
+ deepsea_ctx['logger_obj'] = log.getChild('health_ok')
+ self.name = 'deepsea.health_ok'
+ super(HealthOK, self).__init__(ctx, config)
+
+ def _copy_health_ok(self):
+ """
+ Copy health-ok.sh from teuthology VM to master_remote
+ """
+ global deepsea_ctx
+ suite_path = self.ctx.config.get('suite_path')
+ log.info("suite_path is ->{}<-".format(suite_path))
+ misc.sh("ls -l {}".format(suite_path))
+ health_ok_path = suite_path + "/deepsea/health-ok"
+ misc.sh("test -d " + health_ok_path)
+ copy_directory_recursively(
+ health_ok_path, self.master_remote, "health-ok")
+ self.master_remote.run(args="pwd ; ls -lR health-ok")
+ deepsea_ctx['health_ok_copied'] = True
+
+ def _maybe_run_commands(self, commands):
+ if not commands:
+ self.log.warning(
+ "The health_ok task was run, but no commands were specified. "
+ "Doing nothing."
+ )
+ return None
+ for cmd_str in commands:
+ if not isinstance(cmd_str, str):
+ raise ConfigError(
+ self.err_prefix +
+ "command ->{}<- is not a string".format(cmd_str)
+ )
+ if cmd_str.startswith('health-ok.sh'):
+ cmd_str = self.prefix + cmd_str
+ if self.dev_env:
+ cmd_str = 'DEV_ENV=true ' + cmd_str
+ if self.deepsea_cli:
+ cmd_str += ' --cli'
+ if self.rgw_ssl:
+ cmd_str += ' --ssl'
+ self.master_remote.run(args=[
+ 'sudo', 'bash', '-c', cmd_str,
+ ])
+
+ def setup(self):
+ global deepsea_ctx
+ if 'health_ok_copied' not in deepsea_ctx:
+ self._copy_health_ok()
+ assert deepsea_ctx['health_ok_copied']
+
+ def begin(self):
+ commands = self.config.get('commands', [])
+ if not isinstance(commands, list):
+ raise ConfigError(self.err_prefix + "commands must be a list")
+ self._maybe_run_commands(commands)
+
+ def end(self):
+ pass
+
+ def teardown(self):
+ pass
+
+
+class Orch(DeepSea):
+
+ all_stages = [
+ "0", "prep", "1", "discovery", "2", "configure", "3", "deploy",
+ "4", "services", "5", "removal", "cephfs", "ganesha", "iscsi",
+ "openattic", "openstack", "radosgw", "validate"
+ ]
+
+ err_prefix = "(orch subtask) "
+
+ stage_synonyms = {
+ 0: 'prep',
+ 1: 'discovery',
+ 2: 'configure',
+ 3: 'deploy',
+ 4: 'services',
+ 5: 'removal',
+ }
+
+ def __init__(self, ctx, config):
+ global deepsea_ctx
+ deepsea_ctx['logger_obj'] = log.getChild('orch')
+ self.name = 'deepsea.orch'
+ super(Orch, self).__init__(ctx, config)
+ self.stage = str(self.config.get("stage", ''))
+ self.state_orch = str(self.config.get("state_orch", ''))
+ self.reboots_explicitly_forbidden = not self.config.get("allow_reboots", True)
+ self.survive_reboots = self._detect_reboots()
+ if not self.stage and not self.state_orch:
+ raise ConfigError(
+ self.err_prefix +
+ "nothing to do. Specify a value for 'stage' or "
+ "'state_orch' key in config dict"
+ )
+ if self.stage and self.stage not in self.all_stages:
+ raise ConfigError(
+ self.err_prefix +
+ "unrecognized Stage ->{}<-".format(self.stage)
+ )
+ self.log.debug("munged config is {}".format(self.config))
+
+ def __ceph_health_test(self):
+ cmd = 'sudo salt-call wait.until status=HEALTH_OK timeout=900 check=1'
+ if self.quiet_salt:
+ cmd += ' 2> /dev/null'
+ self.master_remote.run(args=cmd)
+
+ def __check_ceph_test_rpm_version(self):
+ """Checks rpm version for ceph and ceph-test; logs warning if differs"""
+ ceph_test_ver = get_rpm_pkg_version(self.master_remote, "ceph-test", self.log)
+ ceph_ver = get_rpm_pkg_version(self.master_remote, "ceph", self.log)
+ if ceph_test_ver != ceph_ver:
+ self.log.warning(
+ "ceph-test rpm version: {} differs from ceph version: {}"
+ .format(ceph_test_ver, ceph_ver))
+
+ def __check_salt_api_service(self):
+ base_cmd = 'sudo systemctl status --full --lines={} {}.service'
+ try:
+ self.master_remote.run(args=base_cmd.format('0', 'salt-api'))
+ except CommandFailedError:
+ self.master_remote.run(args=base_cmd.format('100', 'salt-api'))
+ raise
+ self.scripts.run(
+ self.master_remote,
+ 'salt_api_test.sh',
+ )
+
+ def __dump_drive_groups_yml(self):
+ self.scripts.run(
+ self.master_remote,
+ 'dump_drive_groups_yml.sh',
+ )
+
+ def __dump_lvm_status(self):
+ self.log.info("Dumping LVM status on storage nodes ->{}<-"
+ .format(self.nodes_storage))
+ for hostname in self.nodes_storage:
+ remote = self.remotes[hostname]
+ self.scripts.run(
+ remote,
+ 'lvm_status.sh',
+ )
+
+ def __is_stage_between_0_and_5(self):
+ """
+ This is implemented as a separate function because the stage specified
+ in the YAML might be a number or a string, and we really don't care
+ what Python sees it as.
+ """
+ num = self.stage
+ try:
+ num = int(num)
+ except ValueError:
+ return False
+ if num < 0 or num > 5:
+ return False
+ return True
+
+ def __log_stage_start(self, stage):
+ self.log.info(anchored(
+ "Running DeepSea Stage {} ({})"
+ .format(stage, self.stage_synonyms[stage])
+ ))
+
+ def __maybe_cat_ganesha_conf(self):
+ ganesha_host = self.role_type_present('ganesha')
+ if ganesha_host:
+ ganesha_remote = self.remotes[ganesha_host]
+ ganesha_remote.run(args="cat /etc/ganesha/ganesha.conf")
+
+ def __mgr_dashboard_module_deploy(self):
+ script = ("# deploy MGR dashboard module\n"
+ "set -ex\n"
+ "ceph mgr module enable dashboard\n")
+ if self.dashboard_ssl:
+ script += "ceph dashboard create-self-signed-cert\n"
+ else:
+ script += "ceph config set mgr mgr/dashboard/ssl false\n"
+ remote_run_script_as_root(
+ self.master_remote,
+ 'mgr_dashboard_module_deploy.sh',
+ script,
+ )
+
+ def __zypper_ps_with_possible_reboot(self):
+ if self.sm.all_minions_zypper_ps_requires_reboot():
+ log_spec = "Detected updates requiring reboot"
+ self.log.warning(anchored(log_spec))
+ if self.reboots_explicitly_forbidden:
+ self.log.info("Reboots explicitly forbidden in test configuration: not rebooting")
+ self.log.warning("Processes using deleted files may cause instability")
+ else:
+ self.log.warning(anchored("Rebooting the whole cluster now!"))
+ self.reboot_the_cluster_now(log_spec=log_spec)
+ assert not self.sm.all_minions_zypper_ps_requires_reboot(), \
+ "No more updates requiring reboot anywhere in the whole cluster"
+
+ def _configure_rgw(self):
+ self.log.debug("self.rgw_ssl is ->{}<-".format(self.rgw_ssl))
+ rgw_host = self.role_type_present('rgw')
+ if rgw_host:
+ self.log.debug(
+ "detected rgw host ->{}<-".format(rgw_host)
+ )
+ self.log.info(anchored("configuring RGW"))
+ self.scripts.run(
+ self.master_remote,
+ 'rgw_init.sh',
+ )
+ if self.rgw_ssl:
+ self.scripts.run(
+ self.master_remote,
+ 'rgw_init_ssl.sh',
+ )
+
+ # FIXME: run on each minion individually, and compare deepsea "roles"
+ # with teuthology roles!
+ def _pillar_items(self):
+ cmd = "sudo salt \\* pillar.items"
+ if self.quiet_salt:
+ cmd += " 2>/dev/null"
+ self.master_remote.run(args=cmd)
+
+ def _run_orch(self, orch_tuple):
+ """Run an orchestration. Dump journalctl on error."""
+ global reboot_tries
+ orch_type, orch_spec = orch_tuple
+ if orch_type == 'orch':
+ cli = False
+ pass
+ elif orch_type == 'stage':
+ cli = self.deepsea_cli
+ orch_spec = 'ceph.stage.{}'.format(orch_spec)
+ else:
+ raise ConfigError(
+ self.err_prefix +
+ "Unrecognized orchestration type ->{}<-".format(orch_type)
+ )
+ cmd_str = None
+ if cli:
+ cmd_str = (
+ 'timeout 60m deepsea '
+ '--log-file=/var/log/salt/deepsea.log '
+ '--log-level=debug '
+ 'salt-run state.orch {} --simple-output'
+ ).format(orch_spec)
+ else:
+ cmd_str = (
+ 'timeout 60m salt-run '
+ '--no-color state.orch {}'
+ ).format(orch_spec)
+ if self.quiet_salt:
+ cmd_str += ' 2>/dev/null'
+ if self.dev_env:
+ cmd_str = 'DEV_ENV=true ' + cmd_str
+ tries = 0
+ if self.survive_reboots:
+ tries = reboot_tries
+ remote_exec(
+ self.master_remote,
+ cmd_str,
+ self.log,
+ "orchestration {}".format(orch_spec),
+ rerun=True,
+ quiet=True,
+ tries=tries,
+ )
+
+ def _detect_reboots(self):
+ """
+ Check for all known states/stages/alt-defaults that
+ may cause a reboot
+ If there is a 'allow_reboot' flag, it takes presedence.
+ """
+ allow_reboot = self.config.get("allow_reboot", None)
+ if allow_reboot is not None:
+ self.log.info("Setting allow_reboot explicitly to {}"
+ .format(self.allow_reboot))
+ return allow_reboot
+ orchs_prone_to_reboot = ['ceph.maintenance.upgrade']
+ if self.state_orch in orchs_prone_to_reboot:
+ self.log.warning("This orchestration may trigger a reboot")
+ return True
+ #
+ # The alternative_defaults stanza has been moved up to the deepsea task
+ # (for two reasons: because it's a global setting and also so we can do
+ # boilerplate overrides like qa/deepsea/boilerplate/disable_tuned.yaml).
+ # That change makes the following heuristic becomes problematic: since
+ # all the alternative defaults are concentrated in one place, if any of
+ # them contains the string "reboot" (without preceding "no-"), **all**
+ # orchestrations in the test will run with survive_reboots, not just
+ # one.
+ for k, v in self.alternative_defaults.items():
+ if 'reboot' in v and 'no-reboot' not in v:
+ self.log.warning("Orchestrations may trigger a reboot")
+ return True
+ self.log.info("Not allowing reboots for this orchestration")
+ return False
+
+ def _run_stage_0(self):
+ """
+ Run Stage 0
+ """
+ stage = 0
+ self.__log_stage_start(stage)
+ self._run_orch(("stage", stage))
+ self._pillar_items()
+ self.sm.all_minions_zypper_ref()
+ self.sm.all_minions_zypper_lu()
+ self.__zypper_ps_with_possible_reboot()
+ self.__check_salt_api_service()
+
+ def _run_stage_1(self):
+ """
+ Run Stage 1
+ """
+ stage = 1
+ self._configure_rgw()
+ self.__log_stage_start(stage)
+ self._run_orch(("stage", stage))
+
+ def _run_stage_2(self):
+ """
+ Run Stage 2
+ """
+ stage = 2
+ self.__log_stage_start(stage)
+ self._run_orch(("stage", stage))
+ self.__check_ceph_test_rpm_version()
+ self._pillar_items()
+ self.__dump_drive_groups_yml()
+
+ def _run_stage_3(self):
+ """
+ Run Stage 3
+ """
+ stage = 3
+ self.__log_stage_start(stage)
+ self._run_orch(("stage", stage))
+ # self.__mgr_dashboard_module_deploy()
+ self.sm.all_minions_cmd_run(
+ 'cat /etc/ceph/ceph.conf',
+ abort_on_fail=False
+ )
+ self.__dump_lvm_status()
+ self.scripts.run(
+ self.master_remote,
+ 'ceph_cluster_status.sh',
+ )
+ self.__ceph_health_test()
+
+ def _run_stage_4(self):
+ """
+ Run Stage 4
+ """
+ stage = 4
+ self.__log_stage_start(stage)
+ self._run_orch(("stage", stage))
+ self.__maybe_cat_ganesha_conf()
+ self.__ceph_health_test()
+
+ def _run_stage_5(self):
+ """
+ Run Stage 5
+ """
+ stage = 5
+ self.__log_stage_start(stage)
+ self._run_orch(("stage", 5))
+
+ def begin(self):
+ self.master_remote.sh('sudo salt-run jobs.active 2>/dev/null')
+ if self.state_orch:
+ self.log.info(anchored(
+ "running orchestration {}".format(self.state_orch)
+ ))
+ self._run_orch(("orch", self.state_orch))
+ else:
+ # it's not an orch, so it must be a stage
+ assert self.stage, "Neither state_orch, nor stage"
+ if self.__is_stage_between_0_and_5():
+ exec('self._run_stage_{}()'.format(self.stage))
+ elif self.stage == 'prep':
+ self.log.info("Running Stage 0 instead of Stage \"prep\"")
+ self._run_stage_0()
+ elif self.stage == 'discovery':
+ self.log.info("Running Stage 1 instead of Stage \"discovery\"")
+ self._run_stage_1()
+ elif self.stage == 'configure':
+ self.log.info("Running Stage 2 instead of Stage \"configure\"")
+ self._run_stage_2()
+ elif self.stage == 'deploy':
+ self.log.info("Running Stage 3 instead of Stage \"deploy\"")
+ self._run_stage_3()
+ elif self.stage == 'services':
+ self.log.info("Running Stage 4 instead of Stage \"services\"")
+ self._run_stage_4()
+ elif self.stage == 'removal':
+ self.log.info("Running Stage 5 instead of Stage \"removal\"")
+ self._run_stage_5()
+ elif self.stage in self.all_stages:
+ self.log.info("Running non-numeric Stage \"{}\"".format(self.stage))
+ self._run_orch(("stage", self.stage))
+ else:
+ raise ConfigError(
+ self.err_prefix +
+ 'unsupported stage ->{}<-'.format(self.stage)
+ )
+ self.master_remote.sh('sudo salt-run jobs.active 2>/dev/null')
+
+ def end(self):
+ pass
+
+ def teardown(self):
+ pass
+
+
+class Policy(DeepSea):
+
+ err_prefix = "(policy subtask) "
+
+ def __init__(self, ctx, config):
+ global deepsea_ctx
+ deepsea_ctx['logger_obj'] = log.getChild('policy')
+ self.name = 'deepsea.policy'
+ super(Policy, self).__init__(ctx, config)
+ self.policy_cfg = ''
+ self.munge_policy = self.config.get('munge_policy', {})
+
+ def __build_drive_group_x(self, drive_group):
+ # generate our own drive_group.yml (as opposed to letting
+ # DeepSea generate one for us)
+ if not self.nodes_storage:
+ raise ConfigError(self.err_prefix + "no osd roles configured, "
+ "but at least one of these is required.")
+ self.log.debug("building drive group ->{}<- for {} storage nodes"
+ .format(drive_group, len(self.nodes_storage)))
+ if drive_group == 'teuthology':
+ raise ConfigError(self.err_prefix + "\"teuthology\" drive group "
+ "generation not implemented yet")
+ elif drive_group == 'custom':
+ self.__roll_out_drive_group()
+ else:
+ ConfigError(self.err_prefix + "unknown drive group ->{}<-"
+ .format(self.drive_group))
+
+ def __roll_out_drive_group(self, fpath="/srv/salt/ceph/configuration/files/drive_groups.yml"):
+ misc.sudo_write_file(
+ self.master_remote,
+ fpath,
+ yaml.dump(self.drive_group),
+ perms="0644",
+ )
+
+ def _build_base(self):
+ """
+ policy.cfg boilerplate
+ """
+ self.policy_cfg = ("# policy.cfg generated by deepsea.policy subtask\n"
+ "# Cluster assignment\n"
+ "cluster-ceph/cluster/*.sls\n"
+ "# Common configuration\n"
+ "config/stack/default/global.yml\n"
+ "config/stack/default/ceph/cluster.yml\n"
+ "# Role assignment - master\n"
+ "role-master/cluster/{}.sls\n"
+ "# Role assignment - admin\n"
+ "role-admin/cluster/*.sls\n"
+ .format(self.master_remote.hostname))
+
+ def _build_drive_groups_yml(self):
+ """
+ Generate a special-purpose drive_groups.yml
+ (currently fails the test in all cases except when
+ "drive_group: default" is explicitly given)
+ """
+ if isinstance(self.drive_group, str):
+ if self.drive_group == 'teuthology':
+ self.__build_drive_group_x('teuthology')
+ elif self.drive_group == 'default':
+ pass
+ else:
+ ConfigError(self.err_prefix + "unknown drive group ->{}<-"
+ .format(self.drive_group))
+ elif isinstance(self.drive_group, dict):
+ self.__build_drive_group_x('custom')
+ else:
+ raise ConfigError(self.err_prefix + "drive_group config param "
+ "must be a string or a dict")
+
+ def _build_x(self, role_type, required=False):
+ no_roles_of_type = "no {} roles configured".format(role_type)
+ but_required = ", but at least one of these is required."
+ role_dict = {}
+ if role_type in self.role_lookup_table:
+ role_dict = self.role_lookup_table[role_type]
+ elif required:
+ raise ConfigError(self.err_prefix + no_roles_of_type + but_required)
+ else:
+ self.log.debug(no_roles_of_type)
+ return None
+ self.log.debug("generating policy.cfg lines for {} based on {}"
+ .format(role_type, role_dict))
+ if required:
+ if len(role_dict.keys()) < 1:
+ raise ConfigError(self.err_prefix + no_roles_of_type + but_required)
+ for role_spec, remote_name in role_dict.items():
+ if role_type == 'osd':
+ role_type = 'storage'
+ self.policy_cfg += ('# Role assignment - {}\n'
+ 'role-{}/cluster/{}.sls\n'
+ .format(role_spec, role_type, remote_name))
+
+ def _cat_policy_cfg(self):
+ """
+ Dump the final policy.cfg file to teuthology log.
+ """
+ cmd_str = "cat {}/policy.cfg".format(proposals_dir)
+ self.master_remote.run(args=cmd_str)
+
+ def _write_policy_cfg(self):
+ """
+ Write policy_cfg to master remote.
+ """
+ misc.sudo_write_file(
+ self.master_remote,
+ proposals_dir + "/policy.cfg",
+ self.policy_cfg,
+ perms="0644",
+ owner="salt",
+ )
+ cmd_str = "ls -l {}/policy.cfg".format(proposals_dir)
+ self.master_remote.run(args=cmd_str)
+
+ def begin(self):
+ """
+ Generate policy.cfg from the results of role introspection
+ """
+ # FIXME: this should be run only once - check for that and
+ # return an error otherwise
+ if self.munge_policy:
+ for k, v in self.munge_policy.items():
+ if k == 'remove_storage_only_node':
+ delete_me = self.first_storage_only_node()
+ if not delete_me:
+ raise ConfigError(
+ self.err_prefix + "remove_storage_only_node "
+ "requires a storage-only node, but there is no such"
+ )
+ raise ConfigError(self.err_prefix + (
+ "munge_policy is a kludge - get rid of it! "
+ "This test needs to be reworked - deepsea.py "
+ "does not currently have a proper way of "
+ "changing (\"munging\") the policy.cfg file."
+ ))
+ else:
+ raise ConfigError(self.err_prefix + "unrecognized "
+ "munge_policy directive {}".format(k))
+ else:
+ self.log.info(anchored("generating policy.cfg"))
+ self._build_base()
+ self._build_x('mon', required=True)
+ self._build_x('mgr', required=True)
+ self._build_x('osd', required=True)
+ self._build_drive_groups_yml()
+ self._build_x('mds')
+ self._build_x('rgw')
+ self._build_x('igw')
+ self._build_x('ganesha')
+ self._build_x('prometheus')
+ self._build_x('grafana')
+ self._write_policy_cfg()
+ self._cat_policy_cfg()
+
+ def end(self):
+ pass
+
+ def teardown(self):
+ pass
+
+
+class Reboot(DeepSea):
+ """
+ A class that does nothing but unconditionally reboot - either a single node
+ or the whole cluster.
+
+ Configuration (reboot a single node)
+
+ tasks:
+ - deepsea.reboot:
+ client.salt_master:
+
+ Configuration (reboot the entire cluster)
+
+ tasks:
+ - deepsea.reboot:
+ all:
+ """
+
+ err_prefix = '(reboot subtask) '
+
+ def __init__(self, ctx, config):
+ global deepsea_ctx
+ deepsea_ctx['logger_obj'] = log.getChild('reboot')
+ self.name = 'deepsea.reboot'
+ super(Reboot, self).__init__(ctx, config)
+
+ def begin(self):
+ if not self.config:
+ self.log.warning("empty config: nothing to do")
+ return None
+ config_keys = len(self.config)
+ if config_keys > 1:
+ raise ConfigError(
+ self.err_prefix +
+ "config dictionary may contain only one key. "
+ "You provided ->{}<- keys ({})".format(len(config_keys), config_keys)
+ )
+ role_spec, repositories = self.config.items()[0]
+ if role_spec == "all":
+ remote = self.ctx.cluster
+ log_spec = "all nodes reboot now"
+ self.log.warning(anchored(log_spec))
+ self.reboot_the_cluster_now(log_spec=log_spec)
+ else:
+ remote = get_remote_for_role(self.ctx, role_spec)
+ log_spec = "node {} reboot now".format(remote.hostname)
+ self.log.warning(anchored(log_spec))
+ self.reboot_a_single_machine_now(remote, log_spec=log_spec)
+
+ def end(self):
+ pass
+
+ def teardown(self):
+ pass
+
+
+class Repository(DeepSea):
+ """
+ A class for manipulating zypper repos on the test nodes.
+ All it knows how to do is wipe out the existing repos (i.e. rename them to
+ foo.repo.bck) and replace them with a given set of new ones.
+
+ Configuration (one node):
+
+ tasks:
+ - deepsea.repository:
+ client.salt_master:
+ - name: repo_foo
+ url: http://example.com/foo/
+ - name: repo_bar
+ url: http://example.com/bar/
+
+ Configuration (all nodes):
+
+ tasks:
+ - deepsea.repository:
+ all:
+ - name: repo_foo
+ url: http://example.com/foo/
+ - name: repo_bar
+ url: http://example.com/bar/
+
+ To eliminate the need to duplicate the repos array, it can be specified
+ in the configuration of the main deepsea task. Then the yaml will look
+ like so:
+
+ tasks:
+ - deepsea:
+ repositories:
+ - name: repo_foo
+ url: http://example.com/foo/
+ - name: repo_bar
+ url: http://example.com/bar/
+ ...
+ - deepsea.repository:
+ client.salt_master:
+ ...
+ - deepsea.repository:
+ all:
+
+ One last note: we try to be careful and not clobber the repos twice.
+ """
+
+ err_prefix = '(repository subtask) '
+
+ def __init__(self, ctx, config):
+ deepsea_ctx['logger_obj'] = log.getChild('repository')
+ self.name = 'deepsea.repository'
+ super(Repository, self).__init__(ctx, config)
+
+ def _repositories_to_remote(self, remote):
+ args = []
+ for repo in self.repositories:
+ args += [repo['name'] + ':' + repo['url']]
+ self.scripts.run(
+ remote,
+ 'clobber_repositories.sh',
+ args=args
+ )
+
+ def begin(self):
+ if not self.config:
+ self.log.warning("empty config: nothing to do")
+ return None
+ config_keys = len(self.config)
+ if config_keys > 1:
+ raise ConfigError(
+ self.err_prefix +
+ "config dictionary may contain only one key. "
+ "You provided ->{}<- keys ({})".format(len(config_keys), config_keys)
+ )
+ role_spec, repositories = self.config.items()[0]
+ if role_spec == "all":
+ remote = self.ctx.cluster
+ else:
+ remote = get_remote_for_role(self.ctx, role_spec)
+ if repositories is None:
+ assert self.repositories, \
+ "self.repositories must be populated if role_dict is None"
+ else:
+ assert isinstance(repositories, list), \
+ "value of role key must be a list of repositories"
+ self.repositories = repositories
+ if not self.repositories:
+ raise ConfigError(
+ self.err_prefix +
+ "No repositories specified. Bailing out!"
+ )
+ self._repositories_to_remote(remote)
+
+ def end(self):
+ pass
+
+ def teardown(self):
+ pass
+
+
+class Script(DeepSea):
+ """
+ A class that runs a bash script on the node with given role, or on all nodes.
+
+ Example 1 (run foo_bar.sh, with arguments, on Salt Master node):
+
+ tasks:
+ - deepsea.script:
+ client.salt_master:
+ foo_bar.sh:
+ args:
+ - 'foo'
+ - 'bar'
+
+ Example 2 (run foo_bar.sh, with no arguments, on all test nodes)
+
+ tasks:
+ - deepsea.script:
+ all:
+ foo_bar.sh:
+ """
+
+ err_prefix = '(script subtask) '
+
+ def __init__(self, ctx, config):
+ global deepsea_ctx
+ deepsea_ctx['logger_obj'] = log.getChild('script')
+ self.name = 'deepsea.script'
+ super(Script, self).__init__(ctx, config)
+
+ def begin(self):
+ if not self.config:
+ self.log.warning("empty config: nothing to do")
+ return None
+ config_keys = len(self.config)
+ if config_keys > 1:
+ raise ConfigError(
+ self.err_prefix +
+ "config dictionary may contain only one key. "
+ "You provided ->{}<- keys ({})".format(len(config_keys), config_keys)
+ )
+ role_spec, role_dict = self.config.items()[0]
+ role_keys = len(role_dict)
+ if role_keys > 1:
+ raise ConfigError(
+ self.err_prefix +
+ "role dictionary may contain only one key. "
+ "You provided ->{}<- keys ({})".format(len(role_keys), role_keys)
+ )
+ if role_spec == "all":
+ remote = self.ctx.cluster
+ else:
+ remote = get_remote_for_role(self.ctx, role_spec)
+ script_spec, script_dict = role_dict.items()[0]
+ if script_dict is None:
+ args = []
+ if isinstance(script_dict, dict):
+ if len(script_dict) > 1 or script_dict.keys()[0] != 'args':
+ raise ConfigError(
+ self.err_prefix +
+ 'script dicts may only contain one key (args)'
+ )
+ args = script_dict.values()[0] or []
+ if not isinstance(args, list):
+ raise ConfigError(self.err_prefix + 'script args must be a list')
+ self.scripts.run(
+ remote,
+ script_spec,
+ args=args
+ )
+
+ def end(self):
+ pass
+
+ def teardown(self):
+ pass
+
+
+class Toolbox(DeepSea):
+ """
+ A class that contains various miscellaneous routines. For example:
+
+ tasks:
+ - deepsea.toolbox:
+ foo:
+
+ Runs the "foo" tool without any options.
+ """
+
+ err_prefix = '(toolbox subtask) '
+
+ def __init__(self, ctx, config):
+ global deepsea_ctx
+ deepsea_ctx['logger_obj'] = log.getChild('toolbox')
+ self.name = 'deepsea.toolbox'
+ super(Toolbox, self).__init__(ctx, config)
+
+ def _assert_store(self, file_or_blue, teuth_role):
+ """
+ file_or_blue can be either 'bluestore' or 'filestore'
+ teuth_role is an 'osd' role uniquely specifying one of the storage nodes.
+ Enumerates the OSDs on the node and asserts that each of these OSDs is
+ either filestore or bluestore, as appropriate.
+ """
+ remote = get_remote_for_role(self.ctx, teuth_role)
+ osds = enumerate_osds(remote, self.log)
+ assert osds, "No OSDs were captured, so please check if they are active"
+ self.log.info("Checking if OSDs ->{}<- are ->{}<-".format(osds, file_or_blue))
+ all_green = True
+ for osd in osds:
+ store = remote.sh("sudo ceph osd metadata {} | jq -r .osd_objectstore"
+ .format(osd)).rstrip()
+ self.log.info("OSD {} is ->{}<-.".format(osd, store))
+ if store != file_or_blue:
+ self.log.warning("OSD {} has objectstore ->{}<- which is not ->{}<-".
+ format(osd, store, file_or_blue))
+ all_green = False
+ assert all_green, "One or more OSDs is not {}".format(file_or_blue)
+
+ def rebuild_node(self, **kwargs):
+ """
+ Expects a teuthology 'osd' role specifying one of the storage nodes.
+ Then runs 'rebuild.nodes' on the node, can be used for filestore to bluestore
+ migration if you run it after you change the drive_groups.yml file.
+ """
+ role = kwargs.keys()[0]
+ remote = get_remote_for_role(self.ctx, role)
+ osds_before_rebuild = len(enumerate_osds(remote, self.log))
+ self.log.info("Disengaging safety to prepare for rebuild")
+ self.master_remote.sh("sudo salt-run disengage.safety 2>/dev/null")
+ self.log.info("Rebuilding node {}".format(remote.hostname))
+ self.master_remote.sh("sudo salt-run rebuild.node {} 2>/dev/null".format(remote.hostname))
+ with safe_while(sleep=15, tries=10,
+ action="ceph osd tree") as proceed:
+ while proceed():
+ self.master_remote.sh("sudo ceph osd tree || true")
+ if osds_before_rebuild == len(enumerate_osds(remote, self.log)):
+ break
+
+ def _noout(self, add_or_rm, teuth_role):
+ """
+ add_or_rm is either 'add' or 'rm'
+ teuth_role is an 'osd' role uniquely specifying one of the storage nodes.
+ Enumerates the OSDs on the node and does 'add-noout' on each of them.
+ """
+ remote = get_remote_for_role(self.ctx, teuth_role)
+ osds = enumerate_osds(remote, self.log)
+ self.log.info("Running {}-noout for OSDs ->{}<-".format(add_or_rm, osds))
+ for osd in osds:
+ remote.sh("sudo ceph osd {}-noout osd.{}".format(add_or_rm, osd))
+
+ def add_noout(self, **kwargs):
+ """
+ Expects one key - a teuthology 'osd' role specifying one of the storage nodes.
+ Enumerates the OSDs on this node and does 'add-noout' on each of them.
+ """
+ role = kwargs.keys()[0]
+ self._noout("add", role)
+
+ def assert_bluestore(self, **kwargs):
+ """
+ Expects one key - a teuthology 'osd' role specifying one of the storage nodes.
+ Enumerates the OSDs on this node and asserts that each one is a bluestore OSD.
+ """
+ role = kwargs.keys()[0]
+ self._assert_store("bluestore", role)
+
+ def assert_filestore(self, **kwargs):
+ """
+ Expects one key - a teuthology 'osd' role specifying one of the storage nodes.
+ Enumerates the OSDs on this node and asserts that each one is a filestore OSD.
+ """
+ role = kwargs.keys()[0]
+ self._assert_store("filestore", role)
+
+ def rm_noout(self, **kwargs):
+ """
+ Expects one key - a teuthology 'osd' role specifying one of the storage nodes.
+ Enumerates the OSDs on this node and does 'rm-noout' on each of them.
+ """
+ role = kwargs.keys()[0]
+ self._noout("rm", role)
+
+ def wait_for_health_ok(self, **kwargs):
+ """
+ Wait for HEALTH_OK - stop after HEALTH_OK is reached or timeout expires.
+ Timeout defaults to 120 minutes, but can be specified by providing a
+ configuration option. For example:
+
+ tasks:
+ - deepsea.toolbox
+ wait_for_health_ok:
+ timeout_minutes: 90
+ """
+ if kwargs:
+ self.log.info("wait_for_health_ok: Considering config dict ->{}<-".format(kwargs))
+ config_keys = len(kwargs)
+ if config_keys > 1:
+ raise ConfigError(
+ self.err_prefix +
+ "wait_for_health_ok config dictionary may contain only one key. "
+ "You provided ->{}<- keys ({})".format(len(config_keys), config_keys)
+ )
+ timeout_spec, timeout_minutes = kwargs.items()[0]
+ else:
+ timeout_minutes = 120
+ self.log.info("Waiting up to ->{}<- minutes for HEALTH_OK".format(timeout_minutes))
+ remote = get_remote_for_role(self.ctx, "client.salt_master")
+ cluster_status = ""
+ for minute in range(1, timeout_minutes+1):
+ remote.sh("sudo ceph status")
+ cluster_status = remote.sh(
+ "sudo ceph health detail --format json | jq -r '.status'"
+ ).rstrip()
+ if cluster_status == "HEALTH_OK":
+ break
+ self.log.info("Waiting for one minute for cluster to reach HEALTH_OK"
+ "({} minutes left to timeout)"
+ .format(timeout_minutes + 1 - minute))
+ time.sleep(60)
+ if cluster_status == "HEALTH_OK":
+ self.log.info(anchored("Cluster is healthy"))
+ else:
+ raise RuntimeError("Cluster still not healthy (current status ->{}<-) "
+ "after reaching timeout"
+ .format(cluster_status))
+
+ def begin(self):
+ if not self.config:
+ self.log.warning("empty config: nothing to do")
+ return None
+ self.log.info("Considering config dict ->{}<-".format(self.config))
+ config_keys = len(self.config)
+ if config_keys > 1:
+ raise ConfigError(
+ self.err_prefix +
+ "config dictionary may contain only one key. "
+ "You provided ->{}<- keys ({})".format(len(config_keys), config_keys)
+ )
+ tool_spec, kwargs = self.config.items()[0]
+ kwargs = {} if not kwargs else kwargs
+ method = getattr(self, tool_spec, None)
+ if method:
+ self.log.info("About to run tool ->{}<- from toolbox with config ->{}<-"
+ .format(tool_spec, kwargs))
+ method(**kwargs)
+ else:
+ raise ConfigError(self.err_prefix + "No such tool ->{}<- in toolbox"
+ .format(tool_spec))
+
+ def end(self):
+ pass
+
+ def teardown(self):
+ pass
+
+
+class Validation(DeepSea):
+ """
+ A container for "validation tests", which are understood to mean tests that
+ validate the Ceph cluster (just) deployed by DeepSea.
+
+ The tests implemented in this class should be small and not take long to
+ finish. Anything more involved should be implemented in a separate task
+ (see ses_qa.py for an example of such a task).
+
+ The config YAML is a dictionary in which the keys are the names of tests
+ (methods to be run) and the values are the config dictionaries of each test
+ to be run.
+
+ Validation tests with lines like this
+
+ self._apply_config_default("foo_test", None)
+
+ are triggered by default, while others have to be explicitly mentioned in
+ the YAML.
+ """
+
+ err_prefix = '(validation subtask) '
+
+ def __init__(self, ctx, config):
+ global deepsea_ctx
+ deepsea_ctx['logger_obj'] = log.getChild('validation')
+ self.name = 'deepsea.validation'
+ super(Validation, self).__init__(ctx, config)
+ self._apply_config_default("ceph_version_sanity", None)
+ self._apply_config_default("rados_striper", None)
+ self._apply_config_default("systemd_units_active", None)
+
+ def _apply_config_default(self, validation_test, default_config):
+ """
+ Use to activate tests that should always be run.
+ """
+ self.config[validation_test] = self.config.get(validation_test, default_config)
+
+ def ceph_version_sanity(self, **kwargs):
+ self.scripts.run(
+ self.master_remote,
+ 'ceph_version_sanity.sh',
+ )
+
+ def ganesha_smoke_test(self, **kwargs):
+ client_host = self.role_type_present("ganeshaclient")
+ rgw = self.role_type_present("rgw")
+ mds = self.role_type_present("mds")
+ args = []
+ if mds:
+ args += ['--mds']
+ if rgw:
+ args += ['--rgw']
+ if not args:
+ raise ConfigError(self.err_prefix +
+ "ganesha_smoke_test needs an rgw or mds role, but neither was given")
+ if client_host:
+ self.master_remote.sh("sudo salt-run ganesha.report 2>/dev/null || true")
+ remote = self.remotes[client_host]
+ self.scripts.run(
+ remote,
+ 'ganesha_smoke_test.sh',
+ args=args,
+ )
+ self.master_remote.sh("sudo salt-run ganesha.report 2>/dev/null || true")
+ else:
+ raise ConfigError(self.err_prefix +
+ "ganesha_smoke_test needs a client role, but none was given")
+
+ def grafana_service_check(self, **kwargs):
+ grafana = self.role_type_present("grafana")
+ if grafana:
+ remote = self.remotes[grafana]
+ remote.sh('sudo systemctl status grafana-server.service')
+ else:
+ raise ConfigError(self.err_prefix +
+ "grafana_service_check needs a grafana role, but none was given")
+
+ def iscsi_smoke_test(self, **kwargs):
+ igw_host = self.role_type_present("igw")
+ if igw_host:
+ remote = self.remotes[igw_host]
+ self.scripts.run(
+ remote,
+ 'iscsi_smoke_test.sh',
+ )
+
+ def rados_striper(self, **kwargs):
+ """
+ Verify that rados does not has the --striper option
+ """
+ cmd_str = 'sudo rados --striper 2>&1 || true'
+ output = self.master_remote.sh(cmd_str)
+ os_type, os_version = self.os_type_and_version()
+ self.log.info(
+ "Checking for expected output on OS ->{}<-"
+ .format(os_type + " " + str(os_version))
+ )
+ if os_type == 'sle' and os_version >= 15:
+ assert 'unrecognized command --striper' in output, \
+ "ceph is compiled without libradosstriper"
+ else:
+ assert '--striper' not in output, \
+ "ceph is compiled with libradosstriper"
+ self.log.info("OK")
+
+ def rados_write_test(self, **kwargs):
+ self.scripts.run(
+ self.master_remote,
+ 'rados_write_test.sh',
+ )
+
+ def systemd_units_active(self, **kwargs):
+ """
+ For all cluster nodes, determine which systemd services
+ should be running and assert that the respective units
+ are in "active" state.
+ """
+ # map role types to systemd units
+ unit_map = {
+ "mds": "ceph-mds@",
+ "mgr": "ceph-mgr@",
+ "mon": "ceph-mon@",
+ "osd": "ceph-osd@",
+ "rgw": "ceph-radosgw@",
+ "ganesha": "nfs-ganesha"
+ }
+ # for each machine in the cluster
+ idx = 0
+ for rtl in self.role_types:
+ node = self.nodes[idx]
+ script = ("# validate systemd units on {}\n"
+ "set -ex\n").format(node)
+ self.log.info("Machine {} ({}) has role types {}"
+ .format(idx, node, ','.join(rtl)))
+ remote = self.remotes[node]
+ run_script = False
+ for role_type in rtl:
+ if role_type in unit_map:
+ script += ("systemctl --state=active --type=service list-units "
+ "| grep -e '^{}'\n".format(unit_map[role_type]))
+ run_script = True
+ else:
+ self.log.debug("Ignoring role_type {} which has no associated "
+ "systemd unit".format(role_type))
+ if run_script:
+ remote_run_script_as_root(
+ remote,
+ "systemd_validation.sh",
+ script
+ )
+ idx += 1
+
+ def begin(self):
+ self.log.debug("Processing tests: ->{}<-".format(self.config.keys()))
+ for method_spec, kwargs in self.config.items():
+ kwargs = {} if not kwargs else kwargs
+ if not isinstance(kwargs, dict):
+ raise ConfigError(self.err_prefix + "Method config must be a dict")
+ self.log.info(anchored(
+ "Running validation test {} with config ->{}<-"
+ .format(method_spec, kwargs)
+ ))
+ method = getattr(self, method_spec, None)
+ if method:
+ method(**kwargs)
+ else:
+ raise ConfigError(self.err_prefix + "No such method ->{}<-"
+ .format(method_spec))
+
+ def end(self):
+ pass
+
+ def teardown(self):
+ pass
+
+
+task = DeepSea
+ceph_conf = CephConf
+create_pools = CreatePools
+dummy = Dummy
+health_ok = HealthOK
+orch = Orch
+policy = Policy
+reboot = Reboot
+repository = Repository
+script = Script
+toolbox = Toolbox
+validation = Validation
--- /dev/null
+'''
+Task that deploys a Salt cluster on all the nodes
+
+Linter:
+ flake8 --max-line-length=100
+'''
+import logging
+
+from salt_manager import SaltManager
+from util import remote_exec
+from teuthology.exceptions import ConfigError
+from teuthology.misc import (
+ delete_file,
+ move_file,
+ sh,
+ sudo_write_file,
+ write_file,
+ )
+from teuthology.orchestra import run
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+
+
+class Salt(Task):
+ """
+ Deploy a Salt cluster on all remotes (test nodes).
+
+ This task assumes all relevant Salt packages (salt, salt-master,
+ salt-minion, salt-api, python-salt, etc. - whatever they may be called for
+ the OS in question) are already installed. This should be done using the
+ install task.
+
+ One, and only one, of the machines must have a role corresponding to the
+ value of the variable salt.sm.master_role (see salt_manager.py). This node
+ is referred to as the "Salt Master", or the "master node".
+
+ The task starts the Salt Master daemon on the master node, and Salt Minion
+ daemons on all the nodes (including the master node), and ensures that the
+ minions are properly linked to the master. Finally, it tries to ping all
+ the minions from the Salt Master.
+
+ :param ctx: the argparse.Namespace object
+ :param config: the config dict
+ """
+
+ def __init__(self, ctx, config):
+ super(Salt, self).__init__(ctx, config)
+ log.debug("beginning of constructor method")
+ log.debug("munged config is {}".format(self.config))
+ self.remotes = self.cluster.remotes
+ self.sm = SaltManager(self.ctx)
+ self.master_remote = self.sm.master_remote
+ log.debug("end of constructor method")
+
+ def _disable_autodiscovery(self):
+ """
+ It's supposed to be off by default, but just in case.
+ """
+ self.sm.master_remote.run(args=[
+ 'sudo', 'sh', '-c',
+ 'echo discovery: false >> /etc/salt/master'
+ ])
+ for rem in self.remotes.keys():
+ rem.run(args=[
+ 'sudo', 'sh', '-c',
+ 'echo discovery: false >> /etc/salt/minion'
+ ])
+
+ def _generate_minion_keys(self):
+ '''
+ Generate minion key on salt master to be used to preseed this cluster's
+ minions.
+ '''
+ for rem in self.remotes.keys():
+ minion_id = rem.hostname
+ log.info('Ensuring that minion ID {} has a keypair on the master'
+ .format(minion_id))
+ # mode 777 is necessary to be able to generate keys reliably
+ # we hit this before:
+ # https://github.com/saltstack/salt/issues/31565
+ self.sm.master_remote.run(args=[
+ 'sudo',
+ 'sh',
+ '-c',
+ 'test -d salt || mkdir -m 777 salt',
+ ])
+ self.sm.master_remote.run(args=[
+ 'sudo',
+ 'sh',
+ '-c',
+ 'test -d salt/minion-keys || mkdir -m 777 salt/minion-keys',
+ ])
+ self.sm.master_remote.run(args=[
+ 'sudo',
+ 'sh',
+ '-c',
+ ('if [ ! -f salt/minion-keys/{mid}.pem ]; then '
+ 'salt-key --gen-keys={mid} '
+ '--gen-keys-dir=salt/minion-keys/; '
+ ' fi').format(mid=minion_id),
+ ])
+
+ def _preseed_minions(self):
+ '''
+ Preseed minions with generated and accepted keys; set minion id
+ to the remote's hostname.
+ '''
+ for rem in self.remotes.keys():
+ minion_id = rem.hostname
+ src = 'salt/minion-keys/{}.pub'.format(minion_id)
+ dest = '/etc/salt/pki/master/minions/{}'.format(minion_id)
+ self.sm.master_remote.run(args=[
+ 'sudo',
+ 'sh',
+ '-c',
+ ('if [ ! -f {d} ]; then '
+ 'cp {s} {d} ; '
+ 'chown root {d} ; '
+ 'fi').format(s=src, d=dest)
+ ])
+ self.sm.master_remote.run(args=[
+ 'sudo',
+ 'chown',
+ 'ubuntu',
+ 'salt/minion-keys/{}.pem'.format(minion_id),
+ 'salt/minion-keys/{}.pub'.format(minion_id),
+ ])
+ #
+ # copy the keys via the teuthology VM. The worker VMs can't ssh to
+ # each other. scp -3 does a 3-point copy through the teuthology VM.
+ sh('scp -3 {}:salt/minion-keys/{}.* {}:'.format(
+ self.sm.master_remote.name,
+ minion_id, rem.name))
+ sudo_write_file(rem, '/etc/salt/minion_id', minion_id)
+ #
+ # set proper owner and permissions on keys
+ rem.run(
+ args=[
+ 'sudo',
+ 'chown',
+ 'root',
+ '{}.pem'.format(minion_id),
+ '{}.pub'.format(minion_id),
+ run.Raw(';'),
+ 'sudo',
+ 'chmod',
+ '600',
+ '{}.pem'.format(minion_id),
+ run.Raw(';'),
+ 'sudo',
+ 'chmod',
+ '644',
+ '{}.pub'.format(minion_id),
+ ],
+ )
+ #
+ # move keys to correct location
+ move_file(rem, '{}.pem'.format(minion_id),
+ '/etc/salt/pki/minion/minion.pem', sudo=True,
+ preserve_perms=False)
+ move_file(rem, '{}.pub'.format(minion_id),
+ '/etc/salt/pki/minion/minion.pub', sudo=True,
+ preserve_perms=False)
+
+ def _set_minion_master(self):
+ """Points all minions to the master"""
+ master_id = self.sm.master_remote.hostname
+ for rem in self.remotes.keys():
+ # remove old master public key if present. Minion will refuse to
+ # start if master name changed but old key is present
+ delete_file(rem, '/etc/salt/pki/minion/minion_master.pub',
+ sudo=True, check=False)
+
+ # set master id
+ sed_cmd = ('echo master: {} > '
+ '/etc/salt/minion.d/master.conf').format(master_id)
+ rem.run(args=[
+ 'sudo',
+ 'sh',
+ '-c',
+ sed_cmd,
+ ])
+
+ def _set_debug_log_level(self):
+ """Sets log_level: debug for all salt daemons"""
+ for rem in self.remotes.keys():
+ rem.run(args=[
+ 'sudo',
+ 'sed', '--in-place', '--regexp-extended',
+ '-e', 's/^\s*#\s*log_level:.*$/log_level: debug/g', # noqa: W605
+ '-e', '/^\s*#.*$/d', '-e', '/^\s*$/d', # noqa: W605
+ '/etc/salt/master',
+ '/etc/salt/minion',
+ ])
+
+ def setup(self):
+ super(Salt, self).setup()
+ log.debug("beginning of setup method")
+ self._generate_minion_keys()
+ self._preseed_minions()
+ self._set_minion_master()
+ self._disable_autodiscovery()
+ self._set_debug_log_level()
+ self.sm.enable_master()
+ self.sm.start_master()
+ self.sm.enable_minions()
+ self.sm.start_minions()
+ log.debug("end of setup method")
+
+ def begin(self):
+ super(Salt, self).begin()
+ log.debug("beginning of begin method")
+ self.sm.check_salt_daemons()
+ self.sm.cat_salt_master_conf()
+ self.sm.cat_salt_minion_confs()
+ self.sm.ping_minions()
+ log.debug("end of begin method")
+
+ def end(self):
+ super(Salt, self).end()
+ log.debug("beginning of end method")
+ self.sm.gather_logs('salt')
+ self.sm.gather_logs('zypp')
+ self.sm.gather_logs('rbd-target-api')
+ self.sm.gather_logfile('zypper.log')
+ self.sm.gather_logfile('journalctl.log')
+ log.debug("end of end method")
+
+ def teardown(self):
+ super(Salt, self).teardown()
+ # log.debug("beginning of teardown method")
+ pass
+ # log.debug("end of teardown method")
+
+
+class Command(Salt):
+ """
+ Subtask for running an arbitrary salt command.
+
+ This subtask understands the following config keys:
+
+ command the command to run (mandatory)
+ For example:
+
+ command: 'state.apply ceph.updates.salt'
+
+ target target selection specifier (default: *)
+ For details, see "man salt"
+
+ Note: "command: saltutil.sync_all" gets special handling.
+ """
+
+ err_prefix = "(command subtask) "
+
+ def __init__(self, ctx, config):
+ super(Command, self).__init__(ctx, config)
+ self.command = str(self.config.get("command", ''))
+ # targets all machines if omitted
+ self.target = str(self.config.get("target", '*'))
+ if not self.command:
+ raise ConfigError(
+ self.err_prefix + "nothing to do. Specify a non-empty value for 'command'")
+
+ def _run_command(self):
+ if '*' in self.target:
+ quoted_target = "\'{}\'".format(self.target)
+ else:
+ quoted_target = self.target
+ cmd_str = (
+ "set -ex\n"
+ "timeout 60m salt {} --no-color {} 2>/dev/null\n"
+ ).format(quoted_target, self.command)
+ write_file(self.master_remote, 'run_salt_command.sh', cmd_str)
+ remote_exec(
+ self.master_remote,
+ 'sudo bash run_salt_command.sh',
+ log,
+ "salt command ->{}<-".format(self.command),
+ )
+
+ def setup(self):
+ pass
+
+ def begin(self):
+ self.log.info("running salt command ->{}<-".format(self.command))
+ if self.command == 'saltutil.sync_all':
+ self.sm.sync_pillar_data()
+ else:
+ self._run_command()
+
+ def end(self):
+ pass
+
+ def teardown(self):
+ pass
+
+
+task = Salt
+command = Command
--- /dev/null
+'''
+Salt "manager" module
+
+Usage: First, ensure that there is a role whose name corresponds
+to the value of the master_role variable, below. Second, in your
+task, instantiate a SaltManager object:
+
+ from salt_manager import SaltManager
+
+ sm = SaltManager(ctx)
+
+Third, enjoy the SaltManager goodness - e.g.:
+
+ sm.ping_minions()
+
+Linter:
+ flake8 --max-line-length=100
+'''
+import logging
+import re
+
+from teuthology.contextutil import safe_while
+from teuthology.exceptions import CommandFailedError, MaxWhileTries
+from teuthology.orchestra import run
+from util import get_remote_for_role
+
+log = logging.getLogger(__name__)
+master_role = 'client.salt_master'
+
+
+class InternalError(Exception):
+ pass
+
+
+def systemctl_remote(remote, subcommand, service_name):
+ """
+ Caveat: only works for units ending in ".service"
+ """
+ def systemctl_cmd(subcommand, lines=0):
+ return ('sudo systemctl {} --full --lines={} {}.service'
+ .format(subcommand, lines, service_name))
+ try:
+ remote.run(args=systemctl_cmd(subcommand))
+ except CommandFailedError:
+ remote.run(args=systemctl_cmd('status', 100))
+ raise
+
+
+class SaltManager(object):
+
+ def __init__(self, ctx):
+ self.ctx = ctx
+ self.master_remote = get_remote_for_role(self.ctx, master_role)
+
+ def __cat_file_cluster(self, filename=None):
+ """
+ cat a file everywhere on the whole cluster
+ """
+ self.ctx.cluster.run(args=[
+ 'sudo', 'cat', filename])
+
+ def __cat_file_remote(self, remote, filename=None):
+ """
+ cat a file on a particular remote
+ """
+ try:
+ remote.run(args=[
+ 'sudo', 'cat', filename])
+ except CommandFailedError:
+ log.warning((
+ "salt_manager: {} not found on {}"
+ ).format(filename, remote.name))
+
+ def __ping(self, ping_cmd, expected):
+ try:
+ def instances_of_str(search_str, output):
+ return len(re.findall(search_str, output))
+ with safe_while(sleep=15, tries=50,
+ action=ping_cmd) as proceed:
+ while proceed():
+ output = self.master_remote.sh(ping_cmd)
+ no_master = instances_of_str('The salt master could not be contacted', output)
+ responded = instances_of_str(' True', output)
+ log.info("{} of {} minions responded".format(responded, expected))
+ if (expected == responded):
+ return None
+ except MaxWhileTries:
+ if no_master:
+ cmd = 'sudo systemctl status --full --lines=100 salt-master.service'
+ self.master_remote.run(args=cmd)
+
+ def all_minions_cmd_run(self, cmd, abort_on_fail=True, show_stderr=False):
+ """
+ Use cmd.run to run a command on all nodes.
+ """
+ if not abort_on_fail:
+ cmd += ' || true'
+ redirect = "" if show_stderr else " 2>/dev/null"
+ self.master_remote.run(args=(
+ 'sudo salt \\* cmd.run \'{}\'{}'.format(cmd, redirect)
+ ))
+
+ def all_minions_zypper_lu(self):
+ """Run "zypper lu" on all nodes"""
+ cmd = "zypper --non-interactive --no-gpg-checks list-updates"
+ self.all_minions_cmd_run(cmd, abort_on_fail=False)
+
+ def all_minions_zypper_ps(self):
+ """Run "zypper ps -s" on all nodes"""
+ cmd = "zypper ps -s || true"
+ self.all_minions_cmd_run(cmd, abort_on_fail=False)
+
+ def all_minions_zypper_ps_requires_reboot(self):
+ number_of_minions = len(self.ctx.cluster.remotes)
+ salt_cmd = "sudo salt \\* cmd.run \'zypper ps -s || true\' 2>/dev/null"
+ number_with_no_processes = len(
+ re.findall('No processes using deleted files found',
+ self.master_remote.sh(salt_cmd))
+ )
+ return number_with_no_processes != number_of_minions
+
+ def all_minions_zypper_ref(self):
+ """Run "zypper ref" on all nodes"""
+ cmd = "zypper --non-interactive --gpg-auto-import-keys refresh"
+ self.all_minions_cmd_run(cmd, abort_on_fail=False)
+
+ def all_minions_zypper_status(self):
+ """
+ Implement someone's idea of a general 'zypper status'
+ """
+ self.all_minions_zypper_ref()
+ self.all_minions_zypper_lu()
+ self.all_minions_zypper_ps()
+
+ def cat_salt_master_conf(self):
+ self.__cat_file_remote(self.master_remote, filename="/etc/salt/master")
+
+ def cat_salt_minion_confs(self):
+ self.__cat_file_cluster(filename="/etc/salt/minion")
+
+ def check_salt_daemons(self):
+ self.master_remote.run(args=['sudo', 'salt-key', '-L'])
+ systemctl_remote(self.master_remote, 'status', 'salt-master')
+ for _remote in self.ctx.cluster.remotes.keys():
+ systemctl_remote(_remote, 'status', 'salt-minion')
+ _remote.run(args='sudo cat /etc/salt/minion_id')
+ _remote.run(args='sudo cat /etc/salt/minion.d/master.conf')
+
+ def enable_master(self):
+ """Enables salt-master.service on the Salt Master node"""
+ systemctl_remote(self.master_remote, "enable", "salt-master")
+
+ def enable_minions(self):
+ """Enables salt-minion.service on all cluster nodes"""
+ systemctl_remote(self.ctx.cluster, "enable", "salt-minion")
+
+ def gather_logfile(self, logfile):
+ for _remote in self.ctx.cluster.remotes.keys():
+ try:
+ _remote.run(args=[
+ 'sudo', 'test', '-f', '/var/log/{}'.format(logfile),
+ ])
+ except CommandFailedError:
+ continue
+ log.info((
+ "gathering logfile /var/log/{} from remote {}"
+ ).format(logfile, _remote.hostname))
+ _remote.run(args=[
+ 'sudo', 'cp', '-a', '/var/log/{}'.format(logfile),
+ '/home/ubuntu/cephtest/archive/',
+ run.Raw(';'),
+ 'sudo', 'chown', 'ubuntu',
+ '/home/ubuntu/cephtest/archive/{}'.format(logfile)
+ ])
+
+ def gather_logs(self, logdir, archive=None):
+ """
+ Grabs contents of logdir and saves them in /home/ubuntu/cephtest/archive
+ teuthology will harvest them before destroying the remote (target machine).
+
+ logdir can be specified as an absolute path or a relative path. Relative
+ paths are assumed to be under /var/log.
+ """
+ if logdir[:1] == '/':
+ if not archive:
+ raise InternalError((
+ 'Unable to harvest logs from absolute directory ->{}<- '
+ 'because no archive option was passed'
+ ).format(logdir)
+ )
+ else:
+ if not archive:
+ archive = logdir
+ logdir = '/var/log/{}'.format(logdir)
+ for _remote in self.ctx.cluster.remotes.keys():
+ try:
+ _remote.run(args=[
+ 'sudo', 'test', '-d', '{}/'.format(logdir),
+ ])
+ except CommandFailedError:
+ continue
+ log.info("gathering {} logs from remote {}"
+ .format(logdir, _remote.hostname))
+ _remote.run(args=[
+ 'sudo', 'cp', '-a', '{}/'.format(logdir),
+ '/home/ubuntu/cephtest/archive/',
+ run.Raw(';'),
+ 'sudo', 'chown', '-R', 'ubuntu',
+ '/home/ubuntu/cephtest/archive/{}/'.format(archive),
+ run.Raw(';'),
+ 'find', '/home/ubuntu/cephtest/archive/{}/'.format(archive),
+ '-type', 'f', '-print0',
+ run.Raw('|'),
+ 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--'
+ ])
+
+ def master_role(self):
+ return master_role
+
+ def master_rpm_q(self, pkg_name):
+ """Run rpm -q on the Salt Master node"""
+ # FIXME: should possibly take a list of pkg_names
+ installed = True
+ try:
+ self.master_remote.run(args=[
+ 'rpm', '-q', pkg_name
+ ])
+ except CommandFailedError:
+ installed = False
+ return installed
+
+ def ping_minion(self, mid):
+ """Pings a minion; raises exception if it doesn't respond"""
+ self.__ping(['sudo', 'salt', mid, 'test.ping'], 1)
+
+ def ping_minions(self):
+ """
+ Pings minions; raises exception if they don't respond
+ """
+ number_of_minions = len(self.ctx.cluster.remotes)
+ self.__ping(
+ "sudo sh -c \'salt \\* test.ping\' 2>/dev/null || true",
+ number_of_minions,
+ )
+ return number_of_minions
+
+ def restart_master(self):
+ """Starts salt-master.service on the Salt Master node"""
+ systemctl_remote(self.master_remote, "restart", "salt-master")
+
+ def restart_minions(self):
+ """Restarts salt-minion.service on all cluster nodes"""
+ systemctl_remote(self.ctx.cluster, "restart", "salt-minion")
+
+ def start_master(self):
+ """Starts salt-master.service on the Salt Master node"""
+ systemctl_remote(self.master_remote, "start", "salt-master")
+
+ def start_minions(self):
+ """Starts salt-minion.service on all cluster nodes"""
+ systemctl_remote(self.ctx.cluster, "start", "salt-minion")
+
+ def sync_pillar_data(self, quiet=True):
+ cmd = "sudo salt \\* saltutil.sync_all"
+ if quiet:
+ cmd += " 2>/dev/null"
+ cmd += " || true"
+ with safe_while(sleep=15, tries=10,
+ action=cmd) as proceed:
+ while proceed():
+ no_response = len(re.findall('Minion did not return', self.master_remote.sh(cmd)))
+ if no_response:
+ log.info("Not all minions responded. Retrying.")
+ else:
+ return None
--- /dev/null
+import os
+
+from util import copy_directory_recursively
+
+
+class Scripts:
+
+ def __init__(self, ctx, logger):
+ self.log = logger
+ copied = ctx.get('scripts_copied', False)
+ remotes = ctx['remotes']
+ if copied:
+ # self.log.info('(scripts ctor) scripts already copied to remotes')
+ pass
+ else:
+ local_path = os.path.dirname(os.path.realpath(__file__)) + '/scripts/'
+ for remote_name, remote_obj in remotes.items():
+ copy_directory_recursively(local_path, remote_obj, "scripts")
+ ctx['scripts_copied'] = True
+
+ def run(self, remote, script_name, args=[], as_root=True):
+ class_name = type(remote).__name__
+ self.log.debug(
+ '(scripts) run method was passed a remote object of class {}'
+ .format(class_name)
+ )
+ if class_name == 'Cluster':
+ remote_spec = 'the whole cluster'
+ else:
+ remote_spec = 'remote {}'.format(remote.hostname)
+ self.log.info('(scripts) running script {} with args {} on {}'
+ .format(script_name, args, remote_spec)
+ )
+ path = 'scripts/' + script_name
+ cmd = 'bash {}'.format(path)
+ if as_root:
+ cmd = "sudo " + cmd
+ if args:
+ cmd += ' ' + ' '.join(map(str, args))
+ return remote.sh(cmd, label=script_name)
--- /dev/null
+# ceph_cluster_status.sh
+#
+# Display ceph cluster status
+#
+# args: None
+#
+set -ex
+ceph pg stat -f json-pretty
+ceph health detail -f json-pretty
+ceph osd tree
+ceph osd pool ls detail -f json-pretty
+ceph -s
+echo "OK" >/dev/null
--- /dev/null
+# ceph_version_sanity.sh
+#
+# test that ceph RPM version matches "ceph --version"
+# for a loose definition of "matches"
+#
+# args: None
+
+set -ex
+rpm -q ceph
+RPM_NAME=$(rpm -q ceph)
+RPM_CEPH_VERSION=$(perl -e '"'"$RPM_NAME"'" =~ m/ceph-(\d+\.\d+\.\d+)/; print "$1\n";')
+echo "According to RPM, the ceph upstream version is ->$RPM_CEPH_VERSION<-" >/dev/null
+test -n "$RPM_CEPH_VERSION"
+ceph --version
+BUFFER=$(ceph --version)
+CEPH_CEPH_VERSION=$(perl -e '"'"$BUFFER"'" =~ m/ceph version (\d+\.\d+\.\d+)/; print "$1\n";')
+echo "According to \"ceph --version\", the ceph upstream version is ->$CEPH_CEPH_VERSION<-" \
+ >/dev/null
+test -n "$RPM_CEPH_VERSION"
+test "$RPM_CEPH_VERSION" = "$CEPH_CEPH_VERSION"
+echo "OK" >/dev/null
--- /dev/null
+# create_all_pools_at_once.sh
+#
+# Script for pre-creating pools prior to Stage 4
+#
+# Pools are created with a number of PGs calculated to avoid health warnings
+# that can arise during/after Stage 4 due to "too few" or "too many" PGs per
+# OSD when DeepSea is allowed to create the pools with hard-coded number of
+# PGs.
+#
+# see also https://github.com/SUSE/DeepSea/issues/536
+#
+# args: pools to be created
+#
+# example invocation: ./create_all_pools_at_once.sh foo bar baz
+
+echo "Creating pools: $@"
+
+set -ex
+
+function json_total_osds {
+ # total number of OSDs in the cluster
+ ceph osd ls --format json | jq '. | length'
+}
+
+function pgs_per_pool {
+ local TOTALPOOLS=$1
+ test -n "$TOTALPOOLS"
+ local TOTALOSDS=$(json_total_osds)
+ test -n "$TOTALOSDS"
+ # given the total number of pools and OSDs,
+ # assume triple replication and equal number of PGs per pool
+ # and aim for 100 PGs per OSD
+ let "TOTALPGS = $TOTALOSDS * 100"
+ let "PGSPEROSD = $TOTALPGS / $TOTALPOOLS / 3"
+ echo $PGSPEROSD
+}
+
+function create_all_pools_at_once {
+ # sample usage: create_all_pools_at_once foo bar
+ local TOTALPOOLS="${#@}"
+ local PGSPERPOOL=$(pgs_per_pool $TOTALPOOLS)
+ for POOLNAME in "$@"
+ do
+ ceph osd pool create $POOLNAME $PGSPERPOOL $PGSPERPOOL replicated
+ done
+ ceph osd pool ls detail
+}
+
+CEPHFS=""
+OPENSTACK=""
+RBD=""
+OTHER=""
+for arg in "$@" ; do
+ arg="${arg,,}"
+ case "$arg" in
+ cephfs) CEPHFS="$arg" ;;
+ openstack) OPENSTACK="$arg" ;;
+ rbd) RBD="$arg" ;;
+ *) OTHER+=" $arg" ;;
+ esac
+done
+
+POOLS=""
+if [ $CEPHFS ] ; then
+ POOLS+=" cephfs_data cephfs_metadata"
+fi
+if [ "$OPENSTACK" ] ; then
+ POOLS+=" smoketest-cloud-backups smoketest-cloud-volumes smoketest-cloud-images"
+ POOLS+=" smoketest-cloud-vms cloud-backups cloud-volumes cloud-images cloud-vms"
+fi
+if [ "$RBD" ] ; then
+ POOLS+=" rbd"
+fi
+if [ "$OTHER" ] ; then
+ POOLS+="$OTHER"
+ APPLICATION_ENABLE="$OTHER"
+fi
+if [ -z "$POOLS" ] ; then
+ echo "create_all_pools_at_once: bad arguments"
+ exit 1
+fi
+echo "About to create pools ->$POOLS<-"
+create_all_pools_at_once $POOLS
+if [ "$APPLICATION_ENABLE" ] ; then
+ for pool in "$APPLICATION_ENABLE" ; do
+ ceph osd pool application enable $pool deepsea_qa
+ done
+fi
+echo "OK" >/dev/null
--- /dev/null
+# lvm_status.sh
+#
+# args: None
+
+set -ex
+
+pvs --all
+vgs --all
+lvs --all
+lsblk --ascii
--- /dev/null
+# rados_write_test.sh
+#
+# Write a RADOS object and read it back
+#
+# NOTE: function assumes the pool "write_test" already exists. Pool can be
+# created by calling e.g. "create_all_pools_at_once write_test" immediately
+# before calling this function.
+#
+# args: None
+
+set -ex
+
+ceph osd pool application enable write_test deepsea_qa
+echo "dummy_content" > verify.txt
+rados -p write_test put test_object verify.txt
+rados -p write_test get test_object verify_returned.txt
+test "x$(cat verify.txt)" = "x$(cat verify_returned.txt)"
+
+echo "OK" >/dev/null
--- /dev/null
+# rgw_init.sh
+# Set up RGW
+set -ex
+USERSYML=/srv/salt/ceph/rgw/users/users.d/rgw.yml
+cat <<EOF > $USERSYML
+- { uid: "demo", name: "Demo", email: "demo@demo.nil" }
+- { uid: "demo1", name: "Demo1", email: "demo1@demo.nil" }
+EOF
+cat $USERSYML
--- /dev/null
+# rgw_init_ssl.sh
+# Set up RGW-over-SSL
+set -ex
+CERTDIR=/srv/salt/ceph/rgw/cert
+mkdir -p $CERTDIR
+pushd $CERTDIR
+openssl req -x509 \
+ -nodes \
+ -days 1095 \
+ -newkey rsa:4096 \
+ -keyout rgw.key \
+ -out rgw.crt \
+ -subj "/C=DE"
+cat rgw.key > rgw.pem && cat rgw.crt >> rgw.pem
+popd
+GLOBALYML=/srv/pillar/ceph/stack/global.yml
+cat <<EOF >> $GLOBALYML
+rgw_init: default-ssl
+EOF
+cat $GLOBALYML
+cp /srv/salt/ceph/configuration/files/rgw-ssl.conf \
+ /srv/salt/ceph/configuration/files/ceph.conf.d/rgw.conf
--- /dev/null
+# salt_api_test.sh
+# Salt API test script
+set -ex
+TMPFILE=$(mktemp)
+curl --silent http://$(hostname):8000/ | tee $TMPFILE # show curl output in log
+test -s $TMPFILE
+jq . $TMPFILE >/dev/null
+echo -en "\\n" # this is just for log readability
+rm $TMPFILE
+echo "Salt API test passed"
--- /dev/null
+"""
+Task (and subtasks) for SES test automation
+
+Linter:
+ flake8 --max-line-length=100
+"""
+import logging
+
+from salt_manager import SaltManager
+from scripts import Scripts
+
+from teuthology.exceptions import (
+ ConfigError,
+ )
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+ses_qa_ctx = {}
+number_of_osds_in_cluster = """sudo ceph osd tree -f json-pretty |
+ jq '[.nodes[] | select(.type == \"osd\")] | length'"""
+
+
+class SESQA(Task):
+
+ def __init__(self, ctx, config):
+ global ses_qa_ctx
+ super(SESQA, self).__init__(ctx, config)
+ if ses_qa_ctx:
+ self.log = ses_qa_ctx['logger_obj']
+ self.log.debug("ses_qa_ctx already populated (we are in a subtask)")
+ if not ses_qa_ctx:
+ ses_qa_ctx['logger_obj'] = log
+ self.log = log
+ self.log.debug("populating ses_qa_ctx (we are *not* in a subtask)")
+ self._populate_ses_qa_context()
+ self.master_remote = ses_qa_ctx['master_remote']
+ self.nodes = self.ctx['nodes']
+ self.nodes_client_only = self.ctx['nodes_client_only']
+ self.nodes_cluster = self.ctx['nodes_cluster']
+ self.nodes_gateway = self.ctx['nodes_gateway']
+ self.nodes_storage = self.ctx['nodes_storage']
+ self.nodes_storage_only = self.ctx['nodes_storage_only']
+ self.remote_lookup_table = self.ctx['remote_lookup_table']
+ self.remotes = self.ctx['remotes']
+ self.roles = self.ctx['roles']
+ self.role_lookup_table = self.ctx['role_lookup_table']
+ self.role_types = self.ctx['role_types']
+ self.scripts = Scripts(self.ctx, self.log)
+ self.sm = ses_qa_ctx['salt_manager_instance']
+
+ def _populate_ses_qa_context(self):
+ global ses_qa_ctx
+ ses_qa_ctx['salt_manager_instance'] = SaltManager(self.ctx)
+ ses_qa_ctx['master_remote'] = ses_qa_ctx['salt_manager_instance'].master_remote
+
+ def os_type_and_version(self):
+ os_type = self.ctx.config.get('os_type', 'unknown')
+ os_version = float(self.ctx.config.get('os_version', 0))
+ return (os_type, os_version)
+
+ def setup(self):
+ super(SESQA, self).setup()
+
+ def begin(self):
+ super(SESQA, self).begin()
+
+ def end(self):
+ super(SESQA, self).end()
+ self.sm.gather_logs('/home/farm/.npm/_logs', 'dashboard-e2e-npm')
+ self.sm.gather_logs('/home/farm/.protractor-report', 'dashboard-e2e-protractor')
+
+ def teardown(self):
+ super(SESQA, self).teardown()
+
+
+class Validation(SESQA):
+
+ err_prefix = "(validation subtask) "
+
+ def __init__(self, ctx, config):
+ global ses_qa_ctx
+ ses_qa_ctx['logger_obj'] = log.getChild('validation')
+ self.name = 'ses_qa.validation'
+ super(Validation, self).__init__(ctx, config)
+ self.log.debug("munged config is {}".format(self.config))
+
+ def mgr_plugin_influx(self, **kwargs):
+ """
+ Minimal/smoke test for the MGR influx plugin
+
+ Tests the 'influx' MGR plugin, but only on openSUSE Leap 15.0.
+
+ Testing on SLE-15 is not currently possible because the influxdb
+ package is not built in IBS for anything higher than SLE-12-SP4.
+ Getting it to build for SLE-15 requires a newer golang stack than what
+ is available in SLE-15 - see
+ https://build.suse.de/project/show/NON_Public:infrastructure:icinga2
+ for how another team is building it (and no, we don't want to do that).
+
+ Testing on openSUSE Leap 15.0 is only possible because we are building
+ the influxdb package in filesystems:ceph:nautilus with modified project
+ metadata.
+
+ (This problem will hopefully go away when we switch to SLE-15-SP1.)
+ """
+ zypper_cmd = ("sudo zypper --non-interactive --no-gpg-check "
+ "install --force --no-recommends {}")
+ os_type, os_version = self.os_type_and_version()
+ if os_type == 'opensuse' and os_version >= 15:
+ self.ctx.cluster.run(
+ args=zypper_cmd.format(' '.join(["python3-influxdb", "influxdb"]))
+ )
+ self.scripts.run(
+ self.master_remote,
+ 'mgr_plugin_influx.sh',
+ )
+ else:
+ self.log.warning(
+ "mgr_plugin_influx test case not implemented for OS ->{}<-"
+ .format(os_type + " " + str(os_version))
+ )
+
+ def begin(self):
+ self.log.debug("Processing tests: ->{}<-".format(self.config.keys()))
+ for method_spec, kwargs in self.config.items():
+ kwargs = {} if not kwargs else kwargs
+ if not isinstance(kwargs, dict):
+ raise ConfigError(self.err_prefix + "Method config must be a dict")
+ self.log.info(
+ "Running test {} with config ->{}<-"
+ .format(method_spec, kwargs)
+ )
+ method = getattr(self, method_spec, None)
+ if method:
+ method(**kwargs)
+ else:
+ raise ConfigError(self.err_prefix + "No such method ->{}<-"
+ .format(method_spec))
+
+ def drive_replace_initiate(self, **kwargs):
+ """
+ Initiate Deepsea drive replacement
+
+ Assumes there is 1 drive not being deployed (1node5disks - with DriveGroup `limit: 4`)
+
+ In order to "hide" an existing disk from the ceph.c_v in teuthology
+ the disk is formatted and mounted.
+ """
+ total_osds = self.master_remote.sh(number_of_osds_in_cluster)
+ osd_id = 0
+ disks = self._get_drive_group_limit()
+ assert int(total_osds) == disks, "Unexpected number of osds {} (expected {})"\
+ .format(total_osds, disks)
+ self.scripts.run(
+ self.master_remote,
+ 'drive_replace.sh',
+ args=[osd_id]
+ )
+
+ def drive_replace_check(self, **kwargs):
+ """
+ Deepsea drive replacement after check
+
+ Replaced osd_id should be back in the osd tree once stage.3 is ran
+ """
+ total_osds = self.master_remote.sh(number_of_osds_in_cluster)
+ disks = self._get_drive_group_limit()
+ assert int(total_osds) == disks, "Unexpected number of osds {} (expected {})"\
+ .format(total_osds, disks)
+ self.master_remote.sh("sudo ceph osd tree --format json | tee after.json")
+ self.master_remote.sh("diff before.json after.json && echo 'Drive Replaced OK'")
+
+ def _get_drive_group_limit(self, **kwargs):
+ """
+ Helper to get drive_groups limit field value
+ """
+ drive_group = next(x for x in self.ctx['config']['tasks']
+ if 'deepsea' in x and 'drive_group' in x['deepsea'])
+ return int(drive_group['deepsea']['drive_group']['custom']['data_devices']['limit'])
+
+
+task = SESQA
+validation = Validation
+import json
+
from teuthology import misc
+from teuthology.contextutil import safe_while
+from teuthology.exceptions import (
+ CommandFailedError,
+ ConfigError,
+ ConnectionLostError,
+ )
+
+
+def enumerate_osds(remote, logger):
+ """
+ Given a remote, enumerates the OSDs (if any) running on the machine
+ associated with that role.
+ """
+ hostname = remote.hostname
+ logger.info("Enumerating OSDs on {}".format(hostname))
+ cmd = ("sudo ceph osd tree -f json | "
+ "jq -c '[.nodes[] | select(.name == \"{}\")][0].children'"
+ .format(hostname.split(".")[0]))
+ osds = json.loads(remote.sh(cmd))
+ return osds
+
def get_remote(ctx, cluster, service_type, service_id):
"""
service_id))
return remote
+
def get_remote_for_role(ctx, role):
return get_remote(ctx, *misc.split_role(role))
+
+
+def copy_directory_recursively(from_path, to_remote, to_path=None):
+ """
+ Recursively copies a local directory to a remote.
+ """
+ if to_path is None:
+ to_path = from_path
+ misc.sh("scp -r -v {from_path} {host}:{to_path}".format(
+ from_path=from_path, host=to_remote.name, to_path=to_path))
+
+
+def introspect_roles(ctx, logger, quiet=True):
+ """
+ Creates the following keys in ctx:
+
+ nodes,
+ nodes_client_only,
+ nodes_cluster,
+ nodes_gateway,
+ nodes_storage, and
+ nodes_storage_only.
+
+ These are all simple lists of hostnames.
+
+ Also creates
+
+ ctx['remotes'],
+
+ which is a dict of teuthology "remote" objects, which look like this:
+
+ { remote1_name: remote1_obj, ..., remoten_name: remoten_obj }
+
+ Also creates
+
+ ctx['role_types']
+
+ which is just like the "roles" list, except it contains only unique
+ role types per node.
+
+ Finally, creates:
+
+ ctx['role_lookup_table']
+
+ which will look something like this:
+
+ {
+ "osd": { "osd.0": osd0remname, ..., "osd.n": osdnremname },
+ "mon": { "mon.a": monaremname, ..., "mon.n": monnremname },
+ ...
+ }
+
+ and
+
+ ctx['remote_lookup_table']
+
+ which looks like this:
+
+ {
+ remote0name: [ "osd.0", "client.0" ],
+ ...
+ remotenname: [ remotenrole0, ..., remotenrole99 ],
+ }
+
+ (In other words, remote_lookup_table is just like the roles
+ stanza, except the role lists are keyed by remote name.)
+ """
+ # initialization phase
+ cluster_roles = ['mon', 'mgr', 'osd', 'mds']
+ non_storage_cluster_roles = ['mon', 'mgr', 'mds']
+ gateway_roles = ['rgw', 'igw', 'ganesha']
+ roles = ctx.config['roles']
+ nodes = []
+ nodes_client_only = []
+ nodes_cluster = []
+ non_storage_cluster_nodes = []
+ nodes_gateway = []
+ nodes_storage = []
+ nodes_storage_only = []
+ remotes = {}
+ role_types = []
+ role_lookup_table = {}
+ remote_lookup_table = {}
+ # introspection phase
+ idx = 0
+ for node_roles_list in roles:
+ assert isinstance(node_roles_list, list), \
+ "node_roles_list is a list"
+ assert node_roles_list, "node_roles_list is not empty"
+ remote = get_remote_for_role(ctx, node_roles_list[0])
+ role_types.append([])
+ if not quiet:
+ logger.debug("Considering remote name {}, hostname {}"
+ .format(remote.name, remote.hostname))
+ nodes += [remote.hostname]
+ remotes[remote.hostname] = remote
+ remote_lookup_table[remote.hostname] = node_roles_list
+ # inner loop: roles (something like "osd.1" or "c2.mon.a")
+ for role in node_roles_list:
+ # FIXME: support multiple clusters as used in, e.g.,
+ # rgw/multisite suite
+ role_arr = role.split('.')
+ if len(role_arr) != 2:
+ raise ConfigError("Unsupported role ->{}<-"
+ .format(role))
+ (role_type, _) = role_arr
+ if role_type not in role_lookup_table:
+ role_lookup_table[role_type] = {}
+ role_lookup_table[role_type][role] = remote.hostname
+ if role_type in cluster_roles:
+ nodes_cluster += [remote.hostname]
+ if role_type in gateway_roles:
+ nodes_gateway += [remote.hostname]
+ if role_type in non_storage_cluster_roles:
+ non_storage_cluster_nodes += [remote.hostname]
+ if role_type == 'osd':
+ nodes_storage += [remote.hostname]
+ if role_type not in role_types[idx]:
+ role_types[idx] += [role_type]
+ idx += 1
+ nodes_cluster = list(set(nodes_cluster))
+ nodes_gateway = list(set(nodes_gateway))
+ nodes_storage = list(set(nodes_storage))
+ nodes_storage_only = []
+ for node in nodes_storage:
+ if node not in non_storage_cluster_nodes:
+ if node not in nodes_gateway:
+ nodes_storage_only += [node]
+ nodes_client_only = list(
+ set(nodes).difference(set(nodes_cluster).union(set(nodes_gateway)))
+ )
+ if not quiet:
+ logger.debug("nodes_client_only is ->{}<-".format(nodes_client_only))
+ assign_vars = [
+ 'nodes',
+ 'nodes_client_only',
+ 'nodes_cluster',
+ 'nodes_gateway',
+ 'nodes_storage',
+ 'nodes_storage_only',
+ 'remote_lookup_table',
+ 'remotes',
+ 'role_lookup_table',
+ 'role_types',
+ ]
+ for var in assign_vars:
+ exec("ctx['{var}'] = {var}".format(var=var))
+ ctx['dev_env'] = True if len(nodes_cluster) < 4 else False
+ if not quiet:
+ # report phase
+ logger.info("ROLE INTROSPECTION REPORT")
+ report_vars = assign_vars + ['dev_env']
+ for var in report_vars:
+ logger.info("{} == {}".format(var, ctx[var]))
+
+
+def remote_exec(remote, cmd_str, logger, log_spec, quiet=True, rerun=False, tries=0):
+ """
+ Execute cmd_str and catch CommandFailedError and ConnectionLostError (and
+ rerun cmd_str post-reboot if rerun flag is set) until one of the conditons
+ are fulfilled:
+ 1) Execution succeeded
+ 2) Attempts are exceeded
+ 3) CommandFailedError is raised
+ """
+ cmd_str = "sudo bash -c '{}'".format(cmd_str)
+ # if quiet:
+ # cmd_args += [run.Raw('2>'), "/dev/null"]
+ already_rebooted_at_least_once = False
+ if tries:
+ remote.run(args="uptime")
+ logger.info("Running command ->{}<- on {}. "
+ "This might cause the machine to reboot!"
+ .format(cmd_str, remote.hostname))
+ with safe_while(sleep=60, tries=tries, action="wait for reconnect") as proceed:
+ while proceed():
+ try:
+ if already_rebooted_at_least_once:
+ if not rerun:
+ remote.run(args="echo Back from reboot ; uptime")
+ break
+ remote.run(args=cmd_str)
+ break
+ except CommandFailedError:
+ logger.error(("{} failed. Creating /var/log/journalctl.log with "
+ "output of \"journalctl --all\"!").format(log_spec))
+ remote.sh("sudo su -c 'journalctl --all > /var/log/journalctl.log'")
+ raise
+ except ConnectionLostError:
+ already_rebooted_at_least_once = True
+ if tries < 1:
+ raise
+ logger.warning("No connection established yet..")
+
+
+def remote_run_script_as_root(remote, path, data, args=None):
+ """
+ Wrapper around misc.write_file to simplify the design pattern:
+ 1. use misc.write_file to create bash script on the remote
+ 2. use Remote.run to run that bash script via "sudo bash $SCRIPT"
+ """
+ misc.write_file(remote, path, data)
+ cmd = 'sudo bash {}'.format(path)
+ if args:
+ cmd += ' ' + ' '.join(args)
+ remote.run(label=path, args=cmd)
+
+
+def sudo_append_to_file(remote, path, data):
+ """
+ Append data to a remote file. Standard 'cat >>' - creates file
+ if it doesn't exist, but all directory components in the file
+ path must exist.
+
+ :param remote: Remote site.
+ :param path: Path on the remote being written to.
+ :param data: Python string containing data to be written.
+ """
+ remote.run(
+ args=[
+ 'sudo',
+ 'sh',
+ '-c',
+ 'cat >> ' + path,
+ ],
+ stdin=data,
+ )
+
+
+def get_rpm_pkg_version(remote, pkg, logger):
+ """Gather RPM package version"""
+ version = None
+ try:
+ version = remote.sh('rpm --queryformat="%{{VERSION}}" -q {}'.format(pkg))
+ except CommandFailedError:
+ logger.warning("Package {} is not installed".format(pkg))
+ return version