From e9dd12ea76bf13475313d71e8e7dfa9409ceb45c Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Wed, 17 Mar 2021 19:22:36 +0800 Subject: [PATCH] ceph-pull-requests: kill orphan processes after job is aborted Fixes: https://tracker.ceph.com/issues/47830 Signed-off-by: Kefu Chai --- ceph-pull-requests/build/kill-tests | 32 +++++++++++++++++++ .../config/definitions/ceph-pull-requests.yml | 9 ++++++ 2 files changed, 41 insertions(+) create mode 100644 ceph-pull-requests/build/kill-tests diff --git a/ceph-pull-requests/build/kill-tests b/ceph-pull-requests/build/kill-tests new file mode 100644 index 00000000..8ecc1255 --- /dev/null +++ b/ceph-pull-requests/build/kill-tests @@ -0,0 +1,32 @@ +#!/bin/bash -ex + +# kill all descendant processes of ctest + +# ceph-pull-requests/build/build is killed by jenkins when the ceph-pull-requests job is aborted or +# canceled, see https://www.jenkins.io/doc/book/using/aborting-a-build/ . but build/build does not +# wait until all its children processes quit. after ctest is killed by SIGTERM, there is chance +# that some tests are still running as ctest does not get a chance to kill them before it terminates. +# if these tests had timed out, ctest would kill them using SIGKILL. so we need to kill them +# manually after the job is aborted. + +# if ctest is still running, get its pid, otherwise we are done. +ctest_pid=$(pgrep ctest) || exit 0 +# the parent process of ctest should have been terminated, but this might not be true when +# it comes to some of its descendant processes, for instance, unittest-seastar-messenger +ctest_pgid=$(ps --no-headers --format 'pgid:1' --pid $ctest_pid) +kill -SIGTERM -- -"$ctest_pgid" +# try harder +for seconds in 0 1 1 2 3; do + sleep $seconds + if pgrep --pgroup $ctest_pgid > /dev/null; then + # kill only if we've waited for a while + if test $seconds != 0; then + pgrep --pgroup $ctest_pgid + echo 'try harder' + kill -SIGKILL -- -"$ctest_pgid" + fi + else + echo 'killed' + break + fi +done diff --git a/ceph-pull-requests/config/definitions/ceph-pull-requests.yml b/ceph-pull-requests/config/definitions/ceph-pull-requests.yml index 14791cd0..460cca12 100644 --- a/ceph-pull-requests/config/definitions/ceph-pull-requests.yml +++ b/ceph-pull-requests/config/definitions/ceph-pull-requests.yml @@ -82,6 +82,15 @@ healthy: 10 unhealthy: 20 failing: 30 + - postbuildscript: + builders: + - role: SLAVE + build-on: + - ABORTED + build-steps: + - shell: + !include-raw: + - ../../build/kill-tests wrappers: - ansicolor -- 2.39.5