]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa: hadoop plays nice with new teuthology task 3711/head
authorNoah Watkins <noahwatkins@gmail.com>
Thu, 12 Feb 2015 00:02:49 +0000 (16:02 -0800)
committerNoah Watkins <noahwatkins@gmail.com>
Thu, 12 Feb 2015 00:03:32 +0000 (16:03 -0800)
This brings the hadoop wordcount up-to-date with the new teuthology
hadoop task.

Signed-off-by: Noah Watkins <noahwatkins@gmail.com>
qa/workunits/hadoop/wordcount.sh

index 1b1c20702431b3de92d83bd064138730c6629afa..da91929fcdc74a2d3564f037d76f34b320ae2353 100755 (executable)
@@ -1,4 +1,11 @@
-#!/bin/sh -ex
+#!/bin/bash
+
+set -e
+set -x
+
+WC_INPUT=/wc_input
+WC_OUTPUT=/wc_output
+DATA_INPUT=$(mktemp -d)
 
 echo "starting hadoop-wordcount test"
 
@@ -6,43 +13,25 @@ echo "starting hadoop-wordcount test"
 [ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; }
 
 # if HADOOP_PREFIX is not set, use default
-[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/apache_hadoop; }
-
-# if HADOOP_MR_HOME is not set, use default
-[ -z $HADOOP_MR_HOME ] && { HADOOP_MR_HOME=$TESTDIR/apache_hadoop/build; }
+[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; }
 
 export JAVA_HOME=/usr/lib/jvm/default-java
 
-set -e
-set -x
+# Nuke hadoop directories
+$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true
+
+# Fetch and import testing data set
+curl http://ceph.com/qa/hadoop_input_files.tar | tar xf - -C $DATA_INPUT
+$HADOOP_PREFIX/bin/hadoop fs -copyFromLocal $DATA_INPUT $WC_INPUT
+rm -rf $DATA_INPUT
+
+# Run the job
+$HADOOP_PREFIX/bin/hadoop jar \
+  $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
+  wordcount $WC_INPUT $WC_OUTPUT
 
-# Clear out in case there was a previous run (idempotency)
-if $HADOOP_PREFIX/bin/hadoop fs -ls /wordcount_output 2>/dev/null ; then
-    $HADOOP_PREFIX/bin/hadoop fs -rmr /wordcount_output
-fi
-if $HADOOP_PREFIX/bin/hadoop fs -ls /wordcount_input 2>/dev/null ; then
-    $HADOOP_PREFIX/bin/hadoop fs -rmr /wordcount_input
-fi
-rm -rf $TESTDIR/hadoop_input
-
-# Load input files into local filesystem
-mkdir -p $TESTDIR/hadoop_input
-wget http://ceph.com/qa/hadoop_input_files.tar -O $TESTDIR/hadoop_input/files.tar
-cd $TESTDIR/hadoop_input
-tar -xf $TESTDIR/hadoop_input/files.tar
-
-# Load input files into hadoop filesystem
-$HADOOP_PREFIX/bin/hadoop fs -mkdir /wordcount_input
-$HADOOP_PREFIX/bin/hadoop fs -put $TESTDIR/hadoop_input/*txt /wordcount_input/
-
-# Execute job
-$HADOOP_PREFIX/bin/hadoop jar $HADOOP_MR_HOME/hadoop*examples*jar wordcount /wordcount_input /wordcount_output
-
-# Clean up
-$HADOOP_PREFIX/bin/hadoop fs -rmr /wordcount_output
-$HADOOP_PREFIX/bin/hadoop fs -rmr /wordcount_input
-cd $TESTDIR
-rm -rf $TESTDIR/hadoop_input
+# Cleanup
+$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true
 
 echo "completed hadoop-wordcount test"
 exit 0