]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
Restart dead workers 1187/head
authorKyr Shatskyy <kyrylo.shatskyy@gmail.com>
Mon, 7 May 2018 15:01:57 +0000 (18:01 +0300)
committerKyr Shatskyy <kyrylo.shatskyy@suse.com>
Mon, 2 Jul 2018 11:24:34 +0000 (13:24 +0200)
This patch allows to restart dead workers separately
not stopping the rest of the teuthology components,
and what is more important the beanstalkd service.
That makes it possible to extend the number of workers too.
Also, either of pulpito and paddles can be restarted alone.

Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com>
teuthology/openstack/openstack-teuthology.init

index 3d02f06e1cf98b4fc8f9d029fb7111b4d217be5f..4fe668ae8205d2125a45e5d28622de1e9eb5ece6 100755 (executable)
@@ -39,11 +39,124 @@ user=${TEUTHOLOGY_USERNAME:-ubuntu}
 
 export HOME=/home/$user
 
+function worker_pidfile() {
+    echo /var/run/teuthology-worker.$1.pid
+}
+function worker_logfile() {
+    echo /var/log/teuthology.${1}.log
+}
+
+function stop_worker() {
+    wnum=$1
+    wpidfile=$(worker_pidfile $wnum)
+    if [[ -f $wpidfile ]] ; then
+        wpid=$(cat $wpidfile)
+        echo Killing worker $wnum with pid=$wpid...
+        pkill -P $wpid
+        pkill $wpid
+        rm -f $wpidfile
+    fi
+}
+
+function stop_workers() {
+    for i in $(seq 1 $NWORKERS) ; do
+        stop_worker $i
+    done
+}
+
+function start_worker() {
+    local wlogfile=$1
+    local wpidfile=$2
+    mkdir -p /tmp/log && chown $user /tmp/log
+    su - -c "
+cd /home/$user
+source openrc.sh
+cd teuthology
+export LC_ALL=C
+virtualenv/bin/teuthology-worker --tube openstack -l /tmp/log --archive-dir /usr/share/nginx/html
+" $user > $wlogfile 2>&1 & {
+        echo $! > $wpidfile
+        echo "Started worker with pid=$! see log $wlogfile"
+    }
+}
+
+function rkill() {
+    local pid=$1
+    for i in $(pgrep -P $pid) ; do
+        rkill $i
+    done
+    echo Killing process $pid
+    kill -9 $pid
+}
+
+function stop_process() {
+    local pidfile=$1
+    [[ -f $pidfile ]] && {
+        local pid=$(cat $pidfile)
+        rkill $pid
+        ps aux --no-headers -q $pid 2>&1 > /dev/null || rm $pidfile
+    }
+}
+
+function start_workers() {
+    for i in $(seq 1 $NWORKERS) ; do
+        local wpidfile=$(worker_pidfile $i)
+        local wlogfile=$(worker_logfile $i)
+        [[ -f $wpidfile ]] && {
+            local wpid=$(cat $wpidfile)
+            ps aux --no-headers -q $wpid 2>&1 > /dev/null && {
+                echo Worker $i is already running with process $wpid
+                continue
+            }
+        }
+        start_worker $wlogfile $wpidfile
+    done
+}
+
 case $1 in
+        start-workers)
+            start_workers
+            ;;
+        list-workers)
+            for i in $(ls /var/run | grep teuthology-worker | sort) ; do
+                WPID=$(cat /var/run/$i)
+                WORKER=${i##teuthology-worker.}
+                WORKER=${WORKER%%.pid}
+                STATUS=$(ps aux --no-headers -q $WPID 2>&1 > /dev/null && echo running || echo dead)
+                echo $WORKER PID:$WPID STATUS:$STATUS
+            done
+            ;;
+        stop-workers)
+            echo Stopping workers
+            stop_workers
+            ;;
+        stop-worker)
+            stop_worker $2
+            ;;
+        restart-workers)
+            $0 stop-workers
+            $1 start-workers
+            ;;
+        start-pulpito)
+            su - -c "cd /home/$user/pulpito ; virtualenv/bin/python run.py" $user  > /var/log/pulpito.log 2>&1 & \
+            echo $! > /var/run/pulpito.pid
+            ;;
+        stop-pulpito)
+            echo Stopping pulpito
+            stop_process /var/run/pulpito.pid
+            ;;
+        start-paddles)
+            su - -c "cd /home/$user/paddles ; virtualenv/bin/pecan serve config.py" $user  > /var/log/paddles.log 2>&1 &
+            echo $! > /var/run/paddles.pid
+            ;;
+        stop-paddles)
+            echo Stopping paddles
+            stop_process /var/run/paddles.pid
+            ;;
         start)
                 /etc/init.d/beanstalkd start
-                su - -c "cd /home/$user/paddles ; virtualenv/bin/pecan serve config.py" $user  > /var/log/paddles.log 2>&1 &
-                su - -c "cd /home/$user/pulpito ; virtualenv/bin/python run.py" $user  > /var/log/pulpito.log 2>&1 &
+                $0 start-paddles
+                $0 start-pulpito
                 sleep 3
                 (
                    cd /home/$user
@@ -54,17 +167,16 @@ case $1 in
                    if test -s /tmp/t && ! grep -qq 'targets: {}' /tmp/t ; then
                       teuthology-lock --unlock -t /tmp/t --owner scheduled_$user@teuthology
                   fi
-                  mkdir -p /tmp/log
-                  chown $user  /tmp/log
-                   for i in $(seq 1 $NWORKERS) ; do
-                       su - -c "cd /home/$user ; source openrc.sh ; cd teuthology ; LC_ALL=C virtualenv/bin/teuthology-worker --tube openstack -l /tmp/log --archive-dir /usr/share/nginx/html" $user > /var/log/teuthology.$i 2>&1 &
-                   done
+                   start_workers
                 )
                 ;;
         stop)
-                pkill -f 'pecan serve'
-                pkill -f 'python run.py'
-                pkill -f 'teuthology-worker'
+                #pkill -f 'pecan serve'
+                #pkill -f 'python run.py'
+                #pkill -f 'teuthology-worker'
+                $0 stop-pulpito
+                $0 stop-paddles
+                $0 stop-workers
                 pkill -f 'ansible'
                 /etc/init.d/beanstalkd stop
                 source /home/$user/teuthology/virtualenv/bin/activate