From 8b6be11a3605f1a0e44a3302ca2ab43830d2e774 Mon Sep 17 00:00:00 2001
From: Erwan Velu <erwan@redhat.com>
Date: Tue, 15 Mar 2016 16:00:17 +0100
Subject: [PATCH] tests: Adding parallelism to encoding/readable.sh

When running make -j x check, we face a weird situation where the makefile
targets are spawn in parallel up to "x" but one of those target is very very
long and sequential.

The "readable.sh" test is trying to run ~7.9K tests where 5.3K are actually
executed.

The current code is taking 23mn on a recent laptop (Intel(R) Core(TM)
i7-4810MQ CPU @ 2.80GHz, 32GB of RAM & SSD).

This patch implements parallelism to speed up this process which is not really CPU and
neither IO bound.

By default, readable.sh is now using the number of logical processors to determine
the level of parallelism (by using nproc). If needed, defining the MAX_PARALLEL_JOBS
variable will override this default value.

On the same system, where nproc=8, the resulting execution time is 5m55 seconds :
4x faster than the original code.

The global 'make check' is therefore getting faster too and dropped from 30 to
16 minutes : 2x faster than the original code.

Signed-off-by: Erwan Velu <erwan@redhat.com>
---
 src/test/encoding/readable.sh | 99 ++++++++++++++++++++++++++++++-----
 1 file changed, 87 insertions(+), 12 deletions(-)

diff --git a/src/test/encoding/readable.sh b/src/test/encoding/readable.sh
index 2116f45fb3da1..774cd94227ecf 100755
--- a/src/test/encoding/readable.sh
+++ b/src/test/encoding/readable.sh
@@ -4,23 +4,25 @@ dir=../ceph-object-corpus
 
 set -e
 
-tmp1=`mktemp /tmp/typ-XXXXXXXXX`
-tmp2=`mktemp /tmp/typ-XXXXXXXXX`
-
 failed=0
 numtests=0
+pids=""
 
 myversion=`./ceph-dencoder version`
+DEBUG=0
+WAITALL_DELAY=.1
+debug() { if [ "$DEBUG" -gt 0 ]; then echo "DEBUG: $*" >&2; fi }
 
-for arversion in `ls -v $dir/archive`; do
-  vdir="$dir/archive/$arversion"
-  #echo $vdir
+test_object() {
+    local type=$1
+    local output_file=$2
+    local failed=0
+    local numtests=0
 
-  if [ ! -d "$vdir/objects" ]; then
-    continue;
-  fi
+    tmp1=`mktemp /tmp/typ-XXXXXXXXX`
+    tmp2=`mktemp /tmp/typ-XXXXXXXXX`
 
-  for type in `ls $vdir/objects`; do
+    rm -f $output_file
     if ./ceph-dencoder type $type 2>/dev/null; then
       #echo "type $type";
       echo "        $vdir/objects/$type"
@@ -88,11 +90,13 @@ for arversion in `ls -v $dir/archive`; do
         if ! ./ceph-dencoder type $type import $vdir/objects/$type/$f decode dump_json > $tmp1; then
           echo "**** failed to decode $vdir/objects/$type/$f ****"
           failed=$(($failed + 1))
+          rm -f $tmp1 $tmp2
           continue      
         fi
         if ! ./ceph-dencoder type $type import $vdir/objects/$type/$f decode encode decode dump_json > $tmp2; then
           echo "**** failed to decode+encode+decode $vdir/objects/$type/$f ****"
           failed=$(($failed + 1))
+          rm -f $tmp1 $tmp2
           continue
         fi
 
@@ -114,15 +118,86 @@ for arversion in `ls -v $dir/archive`; do
           failed=$(($failed + 1))
         fi
         numtests=$(($numtests + 1))
+        echo "failed=$failed" > $output_file
+        echo "numtests=$numtests" >> $output_file
       done
     else
       echo "skipping unrecognized type $type"
     fi
+
+    rm -f $tmp1 $tmp2
+}
+
+waitall() { # PID...
+   ## Wait for children to exit and indicate whether all exited with 0 status.
+   local errors=0
+   while :; do
+     debug "Processes remaining: $*"
+     for pid in "$@"; do
+       shift
+       if kill -0 "$pid" 2>/dev/null; then
+         debug "$pid is still alive."
+         set -- "$@" "$pid"
+       elif wait "$pid"; then
+         debug "$pid exited with zero exit status."
+       else
+         debug "$pid exited with non-zero exit status."
+         errors=$(($errors + 1))
+       fi
+     done
+     (("$#" > 0)) || break
+     sleep ${WAITALL_DELAY:-1}
+    done
+   [ $errors -eq 0 ]
+}
+
+######
+# MAIN
+######
+
+# Using $MAX_PARALLEL_JOBS jobs if defined, unless the number of logical
+# processors
+max_parallel_jobs=${MAX_PARALLEL_JOBS:-$(nproc)}
+
+for arversion in `ls -v $dir/archive`; do
+  vdir="$dir/archive/$arversion"
+  #echo $vdir
+
+  if [ ! -d "$vdir/objects" ]; then
+    continue;
+  fi
+
+  output_file=`mktemp /tmp/typ-XXXXXXXXX`
+  running_jobs=0
+  for type in `ls $vdir/objects`; do
+    test_object $type $output_file.$running_jobs &
+    pids="$pids $!"
+    running_jobs=$(($running_jobs + 1))
+
+    # Once we spawned enough jobs, let's wait them to complete
+    # Every spawned job have almost the same execution time so
+    # it's not a big deal having them not ending at the same time
+    if [ "$running_jobs" -eq "$max_parallel_jobs" ]; then
+        waitall $pids
+        pids=""
+        # Reading the output of jobs to compute failed & numtests
+        # Tests are run in parallel but sum should be done sequentialy to avoid
+        # races between threads
+        while [ "$running_jobs" -ge 0 ]; do
+            if [ -f $output_file.$running_jobs ]; then
+                read_failed=$(grep "^failed=" $output_file.$running_jobs | cut -d "=" -f 2)
+                read_numtests=$(grep "^numtests=" $output_file.$running_jobs | cut -d "=" -f 2)
+                rm -f $output_file.$running_jobs
+                failed=$(($failed + $read_failed))
+                numtests=$(($numtests + $read_numtests))
+            fi
+            running_jobs=$(($running_jobs - 1))
+        done
+        running_jobs=0
+    fi
   done
 done
 
-rm -f $tmp1 $tmp2
-
 if [ $failed -gt 0 ]; then
   echo "FAILED $failed / $numtests tests."
   exit 1
-- 
2.39.5