From 3ead00caf6df78aebf6fde20cab30784d88aa9c6 Mon Sep 17 00:00:00 2001 From: Laura Flores Date: Thu, 20 Jun 2024 13:52:07 -0500 Subject: [PATCH] Revert "test: reproduce flap peering bug (wip)" This reverts commit c89f9c4e80a4ac45ae5ea86ea7e9c41999299f27. Note from Dan van der Ster: This is a test that should succeed, it definitely used to succeed back in the L/O days of Ceph. At some point peering code changed and this behaviour regressed. In short, an OSD goes down then comes up, and no objects were modified in the mean time. There should be no degraded PGs in this case. As this commit is currently breaking make check on all PRs, I think it should be re-evaluated and merged so whatever fix is needed along with this test to make it work are merged together. Fixes: https://tracker.ceph.com/issues/66556 Signed-off-by: Laura Flores --- src/test/CMakeLists.txt | 1 - src/test/flap.sh | 129 ---------------------------------------- 2 files changed, 130 deletions(-) delete mode 100755 src/test/flap.sh diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index eb73fe3c191..2e756eeb583 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -644,7 +644,6 @@ add_ceph_test(run-cli-tests ${CMAKE_CURRENT_SOURCE_DIR}/run-cli-tests) #add_ceph_test(test_pidfile.sh ${CMAKE_CURRENT_SOURCE_DIR}/test_pidfile.sh) add_ceph_test(smoke.sh ${CMAKE_CURRENT_SOURCE_DIR}/smoke.sh) -add_ceph_test(flap.sh ${CMAKE_CURRENT_SOURCE_DIR}/flap.sh) set_property( TEST ${tox_tests} diff --git a/src/test/flap.sh b/src/test/flap.sh deleted file mode 100755 index d5bbdab89c9..00000000000 --- a/src/test/flap.sh +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env bash - -source $CEPH_ROOT/qa/standalone/ceph-helpers.sh - -mon_port=$(get_unused_port) - -function run() { - local dir=$1 - shift - - export CEPH_MON="127.0.0.1:$mon_port" - export CEPH_ARGS - CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " - CEPH_ARGS+="--mon-host=$CEPH_MON " - CEPH_ARGS+="--osd_min_pg_log_entries=1 --osd_max_pg_log_entries=2 " - set -e - - local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} - for func in $funcs ; do - setup $dir || return 1 - $func $dir || return 1 - teardown $dir || return 1 - done -} - -function TEST_flap() { - local dir=$1 - - run_mon $dir a - run_mgr $dir x - run_osd $dir 0 - run_osd $dir 1 - run_osd $dir 2 - run_osd $dir 3 - - ceph osd pool create foo 64 - wait_for_clean - - # write lots of objects - rados -p foo bench 30 write -b 4096 --no-cleanup - wait_for_clean - - # set norebalance so that we don't backfill - ceph osd set norebalance - wait_for_clean - - # flap an osd, it should repeer and come back clean - ceph osd down 0 - wait_for_clean - - # drain osd.0, then wait for peering - ceph osd crush reweight osd.0 0 - wait_for_peered - - # flap osd.0 while draining, this has been known to incorrectly degrade pgs - ceph osd down 0 - wait_for_osd up 0 - wait_for_peered - - # now there should be zero undersized or degraded pgs - ceph pg debug degraded_pgs_exist | grep -q FALSE -} - -function TEST_flap_ec() { - local dir=$1 - - run_mon $dir a - run_mgr $dir x - run_osd $dir 0 - run_osd $dir 1 - run_osd $dir 2 - run_osd $dir 3 - run_osd $dir 4 - - ceph osd erasure-code-profile set myprofile k=2 m=2 crush-failure-domain=osd - ceph osd pool create foo 64 erasure myprofile - wait_for_clean - - # write lots of objects - rados -p foo bench 30 write -b 4096 --no-cleanup - wait_for_clean - - # set norebalance so that we don't backfill - ceph osd set norebalance - wait_for_clean - - # flap an osd, it should repeer and come back clean - ceph osd down 0 - wait_for_clean - - # drain osd.0, then wait for peering - ceph osd crush reweight osd.0 0 - wait_for_peered - - # flap osd.0 while draining, this has been known to incorrectly degrade pgs - ceph osd down 0 - wait_for_osd up 0 - wait_for_peered - - # now there should be zero undersized or degraded pgs - ceph pg debug degraded_pgs_exist | grep -q FALSE - - # PART TWO: frans42 testing - # reset osd.0 weight and rebalance - ceph osd unset norebalance - ceph osd crush reweight osd.0 1 - wait_for_clean - ceph osd set norebalance - wait_for_clean - - # add new OSDs - run_osd $dir 5 - run_osd $dir 6 - wait_for_clean - - # We now have old osds=0,...,4 and - # new OSDs 5,6. Flapping an old osd leads to degraded objects. - # flap osd.0 while rebalancing - ceph osd unset norebalance - sleep 10 # let rebalancing progress a bit - ceph osd down 0 - wait_for_osd up 0 - wait_for_peered - - # now there should be zero undersized or degraded pgs - # I don't recall if I saw degraded PGs or only degraded objects. - ceph pg debug degraded_pgs_exist | grep -q FALSE -} -main flap "$@" -- 2.39.5