-from cStringIO import StringIO
+"""
+Hadoop task
+Install and cofigure hadoop -- requires that Ceph is already installed and
+already running.
+"""
+from cStringIO import StringIO
import contextlib
import logging
log = logging.getLogger(__name__)
-###################
-# This installeds and configures Hadoop, but requires that Ceph is already installed and running.
-##################
-## Check that there is exactly one master and at least one slave configured
@contextlib.contextmanager
-def validate_config(ctx, config):
+def validate_cluster(ctx):
+ """
+ Check that there is exactly one master and at least one slave configured
+ """
log.info('Vaidating Hadoop configuration')
slaves = ctx.cluster.only(teuthology.is_type('hadoop.slave'))
if (len(masters.remotes) == 1):
pass
else:
- raise Exception("Exactly one hadoop.master must be specified. Currently there are " + str(len(masters.remotes)))
+ raise Exception(
+ "Exactly one hadoop.master must be specified. Currently there are "
+ + str(len(masters.remotes)))
- try:
+ try:
yield
finally:
pass
-## Add required entries to conf/hadoop-env.sh
-def write_hadoop_env(ctx, config):
- hadoopEnvFile = "{tdir}/apache_hadoop/conf/hadoop-env.sh".format(tdir=teuthology.get_testdir(ctx))
- hadoopNodes = ctx.cluster.only(teuthology.is_type('hadoop'))
- for remote, roles_for_host in hadoopNodes.remotes.iteritems():
- teuthology.write_file(remote, hadoopEnvFile,
+def write_hadoop_env(ctx):
+ """
+ Add required entries to conf/hadoop-env.sh
+ """
+ hadoop_envfile = "{tdir}/apache_hadoop/conf/hadoop-env.sh".format(
+ tdir=teuthology.get_testdir(ctx))
+
+ hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
+ for remote in hadoop_nodes.remotes:
+ teuthology.write_file(remote, hadoop_envfile,
'''export JAVA_HOME=/usr/lib/jvm/default-java
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/usr/share/java/libcephfs.jar:{tdir}/apache_hadoop/build/hadoop-core*.jar:{tdir}/inktank_hadoop/build/hadoop-cephfs.jar
export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
-'''.format(tdir=teuthology.get_testdir(ctx)) )
- log.info("wrote file: " + hadoopEnvFile + " to host: " + str(remote))
+'''.format(tdir=teuthology.get_testdir(ctx)))
+ log.info("wrote file: " + hadoop_envfile + " to host: " + str(remote))
+
-## Add required entries to conf/core-site.xml
def write_core_site(ctx, config):
+ """
+ Add required entries to conf/core-site.xml
+ """
testdir = teuthology.get_testdir(ctx)
- coreSiteFile = "{tdir}/apache_hadoop/conf/core-site.xml".format(tdir=testdir)
+ core_site_file = "{tdir}/apache_hadoop/conf/core-site.xml".format(
+ tdir=testdir)
- hadoopNodes = ctx.cluster.only(teuthology.is_type('hadoop'))
- for remote, roles_for_host in hadoopNodes.remotes.iteritems():
+ hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
+ for remote in hadoop_nodes.remotes:
# check the config to see if we should use hdfs or ceph
default_fs_string = ""
if config.get('hdfs'):
- default_fs_string = 'hdfs://{master_ip}:54310'.format(master_ip=get_hadoop_master_ip(ctx))
+ default_fs_string = 'hdfs://{master_ip}:54310'.format(
+ master_ip=get_hadoop_master_ip(ctx))
else:
default_fs_string = 'ceph:///'
- teuthology.write_file(remote, coreSiteFile,
+ teuthology.write_file(remote, core_site_file,
'''<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
</configuration>
'''.format(tdir=teuthology.get_testdir(ctx), default_fs=default_fs_string))
- log.info("wrote file: " + coreSiteFile + " to host: " + str(remote))
+ log.info("wrote file: " + core_site_file + " to host: " + str(remote))
+
-## finds the hadoop.master in the ctx and then pulls out just the IP address
def get_hadoop_master_ip(ctx):
+ """
+ finds the hadoop.master in the ctx and then pulls out just the IP address
+ """
remote, _ = _get_master(ctx)
- master_name,master_port= remote.ssh.get_transport().getpeername()
- log.info('master name: {name} port {port}'.format(name=master_name,port=master_port))
+ master_name, master_port = remote.ssh.get_transport().getpeername()
+ log.info('master name: {name} port {port}'.format(name=master_name,
+ port=master_port))
return master_name
-## Add required entries to conf/mapred-site.xml
+
def write_mapred_site(ctx):
- mapredSiteFile = "{tdir}/apache_hadoop/conf/mapred-site.xml".format(tdir=teuthology.get_testdir(ctx))
+ """
+ Add required entries to conf/mapred-site.xml
+ """
+ mapred_site_file = "{tdir}/apache_hadoop/conf/mapred-site.xml".format(
+ tdir=teuthology.get_testdir(ctx))
master_ip = get_hadoop_master_ip(ctx)
log.info('adding host {remote} as jobtracker'.format(remote=master_ip))
- hadoopNodes = ctx.cluster.only(teuthology.is_type('hadoop'))
- for remote, roles_for_host in hadoopNodes.remotes.iteritems():
- teuthology.write_file(remote, mapredSiteFile,
+ hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
+ for remote in hadoop_nodes.remotes:
+ teuthology.write_file(remote, mapred_site_file,
'''<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<name>mapred.job.tracker</name>
<value>{remote}:54311</value>
</property>
-</configuration>
+</configuration>
'''.format(remote=master_ip))
- log.info("wrote file: " + mapredSiteFile + " to host: " + str(remote))
+ log.info("wrote file: " + mapred_site_file + " to host: " + str(remote))
+
-## Add required entries to conf/hdfs-site.xml
def write_hdfs_site(ctx):
- hdfsSiteFile = "{tdir}/apache_hadoop/conf/hdfs-site.xml".format(tdir=teuthology.get_testdir(ctx))
+ """
+ Add required entries to conf/hdfs-site.xml
+ """
+ hdfs_site_file = "{tdir}/apache_hadoop/conf/hdfs-site.xml".format(
+ tdir=teuthology.get_testdir(ctx))
- hadoopNodes = ctx.cluster.only(teuthology.is_type('hadoop'))
- for remote, roles_for_host in hadoopNodes.remotes.iteritems():
- teuthology.write_file(remote, hdfsSiteFile,
+ hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
+ for remote in hadoop_nodes.remotes:
+ teuthology.write_file(remote, hdfs_site_file,
'''<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<value>1</value>
</property>
</configuration>
-''' )
- log.info("wrote file: " + hdfsSiteFile + " to host: " + str(remote))
+''')
+ log.info("wrote file: " + hdfs_site_file + " to host: " + str(remote))
+
-## Add required entries to conf/slaves
-## These nodes host TaskTrackers and DataNodes
def write_slaves(ctx):
+ """
+ Add required entries to conf/slaves
+ These nodes host TaskTrackers and DataNodes
+ """
log.info('Setting up slave nodes...')
- slavesFile = "{tdir}/apache_hadoop/conf/slaves".format(tdir=teuthology.get_testdir(ctx))
- tmpFile = StringIO()
+ slaves_file = "{tdir}/apache_hadoop/conf/slaves".format(
+ tdir=teuthology.get_testdir(ctx))
+ tmp_file = StringIO()
slaves = ctx.cluster.only(teuthology.is_type('hadoop.slave'))
- for remote, roles_for_host in slaves.remotes.iteritems():
- tmpFile.write('{remote}\n'.format(remote=remote.ssh.get_transport().getpeername()[0]))
+ for remote in slaves.remotes:
+ tmp_file.write('{remote}\n'.format(
+ remote=remote.ssh.get_transport().getpeername()[0]))
+
+ tmp_file.seek(0)
- tmpFile.seek(0)
+ hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
+ for remote in hadoop_nodes.remotes:
+ teuthology.write_file(remote=remote, path=slaves_file, data=tmp_file)
+ tmp_file.seek(0)
+ log.info("wrote file: " + slaves_file + " to host: " + str(remote))
- hadoopNodes = ctx.cluster.only(teuthology.is_type('hadoop'))
- for remote, roles_for_host in hadoopNodes.remotes.iteritems():
- teuthology.write_file(remote=remote, path=slavesFile, data=tmpFile)
- tmpFile.seek(0)
- log.info("wrote file: " + slavesFile + " to host: " + str(remote))
-## Add required entries to conf/masters
-## These nodes host JobTrackers and Namenodes
def write_master(ctx):
- mastersFile = "{tdir}/apache_hadoop/conf/masters".format(tdir=teuthology.get_testdir(ctx))
+ """
+ Add required entries to conf/masters
+ These nodes host JobTrackers and Namenodes
+ """
+ masters_file = "{tdir}/apache_hadoop/conf/masters".format(
+ tdir=teuthology.get_testdir(ctx))
master = _get_master(ctx)
master_remote, _ = master
+ hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
+ for remote in hadoop_nodes.remotes:
+ teuthology.write_file(remote, masters_file, '{master_host}\n'.format(
+ master_host=master_remote.ssh.get_transport().getpeername()[0]))
+ log.info("wrote file: " + masters_file + " to host: " + str(remote))
- hadoopNodes = ctx.cluster.only(teuthology.is_type('hadoop'))
- for remote, roles_for_host in hadoopNodes.remotes.iteritems():
- teuthology.write_file(remote, mastersFile, '{master_host}\n'.format(master_host=master_remote.ssh.get_transport().getpeername()[0]))
- log.info("wrote file: " + mastersFile + " to host: " + str(remote))
-## Call the various functions that configure Hadoop
def _configure_hadoop(ctx, config):
+ """
+ Call the various functions that configure Hadoop
+ """
log.info('writing out config files')
- write_hadoop_env(ctx, config)
+ write_hadoop_env(ctx)
write_core_site(ctx, config)
write_mapred_site(ctx)
write_hdfs_site(ctx)
write_master(ctx)
-
@contextlib.contextmanager
def configure_hadoop(ctx, config):
- _configure_hadoop(ctx,config)
-
+ """
+ Call the various functions that configure Hadoop, and handle the
+ startup of hadoop and clean up of temporary files if this is an hdfs.
+ """
+ _configure_hadoop(ctx, config)
log.info('config.get(hdfs): {hdfs}'.format(hdfs=config.get('hdfs')))
if config.get('hdfs'):
master = _get_master(ctx)
remote, _ = master
remote.run(
- args=["{tdir}/apache_hadoop/bin/hadoop".format(tdir=teuthology.get_testdir(ctx)),
+ args=["{tdir}/apache_hadoop/bin/hadoop".format(
+ tdir=teuthology.get_testdir(ctx)),
"namenode",
"-format"],
wait=True,
log.info('done setting up hadoop')
- try:
+ try:
yield
finally:
),
)
+
def _start_hadoop(ctx, remote, config):
+ """
+ remotely start hdfs if specified, and start mapred.
+ """
testdir = teuthology.get_testdir(ctx)
if config.get('hdfs'):
remote.run(
- args=['{tdir}/apache_hadoop/bin/start-dfs.sh'.format(tdir=testdir), ],
+ args=['{tdir}/apache_hadoop/bin/start-dfs.sh'.format(
+ tdir=testdir), ],
wait=True,
)
log.info('done starting hdfs')
remote.run(
- args=['{tdir}/apache_hadoop/bin/start-mapred.sh'.format(tdir=testdir), ],
+ args=['{tdir}/apache_hadoop/bin/start-mapred.sh'.format(
+ tdir=testdir), ],
wait=True,
)
log.info('done starting mapred')
def _stop_hadoop(ctx, remote, config):
+ """
+ remotely stop mapred, and if hdfs if specified, stop the hdfs handler too.
+ """
testdir = teuthology.get_testdir(ctx)
remote.run(
args=['{tdir}/apache_hadoop/bin/stop-mapred.sh'.format(tdir=testdir), ],
if config.get('hdfs'):
remote.run(
- args=['{tdir}/apache_hadoop/bin/stop-dfs.sh'.format(tdir=testdir), ],
+ args=['{tdir}/apache_hadoop/bin/stop-dfs.sh'.format(
+ tdir=testdir), ],
wait=True,
)
log.info('done stopping hadoop')
+
def _get_master(ctx):
+ """
+ Return the hadoop master. If more than one is found, fail an assertion
+ """
master = ctx.cluster.only(teuthology.is_type('hadoop.master'))
- assert 1 == len(master.remotes.items()), 'There must be exactly 1 hadoop.master configured'
+ assert 1 == len(master.remotes.items()), \
+ 'There must be exactly 1 hadoop.master configured'
return master.remotes.items()[0]
+
@contextlib.contextmanager
def start_hadoop(ctx, config):
+ """
+ Handle the starting and stopping of hadoop
+ """
master = _get_master(ctx)
remote, _ = master
- log.info('Starting hadoop on {remote}\n'.format(remote=remote.ssh.get_transport().getpeername()[0]))
+ log.info('Starting hadoop on {remote}\n'.format(
+ remote=remote.ssh.get_transport().getpeername()[0]))
_start_hadoop(ctx, remote, config)
- try:
+ try:
yield
finally:
- log.info('Running stop-mapred.sh on {remote}'.format(remote=remote.ssh.get_transport().getpeername()[0]))
+ log.info('Running stop-mapred.sh on {remote}'.format(
+ remote=remote.ssh.get_transport().getpeername()[0]))
_stop_hadoop(ctx, remote, config)
-# download and untar the most recent apache hadoop binaries into {testdir}/apache_hadoop
-def _download_apache_hadoop_binaries(ctx, remote, hadoop_url):
- log.info('_download_apache_hadoop_binaries: path {path} on host {host}'.format(path=hadoop_url, host=str(remote)))
- fileName = 'apache-hadoop.tgz'
+
+def _download_apache_hadoop_bins(ctx, remote, hadoop_url):
+ """
+ download and untar the most recent apache hadoop binaries into
+ {testdir}/apache_hadoop
+ """
+ log.info(
+ '_download_apache_hadoop_bins: path {path} on host {host}'.format(
+ path=hadoop_url, host=str(remote)))
+ file_name = 'apache-hadoop.tgz'
testdir = teuthology.get_testdir(ctx)
remote.run(
args=[
- 'mkdir', '-p', '-m0755', '{tdir}/apache_hadoop'.format(tdir=testdir),
+ 'mkdir', '-p', '-m0755',
+ '{tdir}/apache_hadoop'.format(tdir=testdir),
run.Raw('&&'),
'echo',
- '{fileName}'.format(fileName=fileName),
+ '{file_name}'.format(file_name=file_name),
run.Raw('|'),
'wget',
'-nv',
# need to use --input-file to make wget respect --base
'--input-file=-',
run.Raw('|'),
- 'tar', '-xzf', '-', '-C', '{tdir}/apache_hadoop'.format(tdir=testdir),
+ 'tar', '-xzf', '-', '-C',
+ '{tdir}/apache_hadoop'.format(tdir=testdir),
],
)
-# download and untar the most recent Inktank hadoop binaries into {testdir}/hadoop
-def _download_inktank_hadoop_binaries(ctx, remote, hadoop_url):
- log.info('_download_inktank_hadoop_binaries: path {path} on host {host}'.format(path=hadoop_url, host=str(remote)))
- fileName = 'hadoop.tgz'
+
+def _download_inktank_hadoop_bins(ctx, remote, hadoop_url):
+ """
+ download and untar the most recent Inktank hadoop binaries into
+ {testdir}/hadoop
+ """
+ log.info(
+ '_download_inktank_hadoop_bins: path {path} on host {host}'.format(
+ path=hadoop_url, host=str(remote)))
+ file_name = 'hadoop.tgz'
testdir = teuthology.get_testdir(ctx)
remote.run(
args=[
- 'mkdir', '-p', '-m0755', '{tdir}/inktank_hadoop'.format(tdir=testdir),
+ 'mkdir', '-p', '-m0755',
+ '{tdir}/inktank_hadoop'.format(tdir=testdir),
run.Raw('&&'),
'echo',
- '{fileName}'.format(fileName=fileName),
+ '{file_name}'.format(file_name=file_name),
run.Raw('|'),
'wget',
'-nv',
# need to use --input-file to make wget respect --base
'--input-file=-',
run.Raw('|'),
- 'tar', '-xzf', '-', '-C', '{tdir}/inktank_hadoop'.format(tdir=testdir),
+ 'tar', '-xzf', '-', '-C',
+ '{tdir}/inktank_hadoop'.format(tdir=testdir),
],
)
-# copy hadoop-cephfs.jar and hadoop-cephfs-test.jar into apache_hadoop
+
def _copy_hadoop_cephfs_jars(ctx, remote, from_dir, to_dir):
+ """
+ copy hadoop-cephfs.jar and hadoop-cephfs-test.jar into apache_hadoop
+ """
testdir = teuthology.get_testdir(ctx)
- log.info('copy jars from {from_dir} to {to_dir} on host {host}'.format(from_dir=from_dir, to_dir=to_dir, host=str(remote)))
- file_names = [ 'hadoop-cephfs.jar', 'hadoop-cephfs-test.jar' ]
+ log.info('copy jars from {from_dir} to {to_dir} on host {host}'.format(
+ from_dir=from_dir, to_dir=to_dir, host=str(remote)))
+ file_names = ['hadoop-cephfs.jar', 'hadoop-cephfs-test.jar']
for file_name in file_names:
log.info('Copying file {file_name}'.format(file_name=file_name))
remote.run(
- args=[ 'cp', '{tdir}/{from_dir}/{file_name}'.format(tdir=testdir,from_dir=from_dir,file_name=file_name),
- '{tdir}/{to_dir}/'.format(tdir=testdir,to_dir=to_dir)
+ args=['cp', '{tdir}/{from_dir}/{file_name}'.format(
+ tdir=testdir, from_dir=from_dir, file_name=file_name),
+ '{tdir}/{to_dir}/'.format(tdir=testdir, to_dir=to_dir)
],
)
-def _node_binaries(ctx, config, remote, inktank_hadoop_bindir_url, apache_hadoop_bindir_url):
- _download_inktank_hadoop_binaries(ctx, remote, inktank_hadoop_bindir_url)
- _download_apache_hadoop_binaries(ctx, remote, apache_hadoop_bindir_url)
- _copy_hadoop_cephfs_jars(ctx, remote, 'inktank_hadoop/build', 'apache_hadoop/build')
+
+def _node_binaries(ctx, remote, inktank_hadoop_bindir_url,
+ apache_hadoop_bindir_url):
+ """
+ Download and copy over the appropriate binaries and jar files.
+ The calls from binaries() end up spawning this function on remote sites.
+ """
+ _download_inktank_hadoop_bins(ctx, remote, inktank_hadoop_bindir_url)
+ _download_apache_hadoop_bins(ctx, remote, apache_hadoop_bindir_url)
+ _copy_hadoop_cephfs_jars(ctx, remote, 'inktank_hadoop/build',
+ 'apache_hadoop/build')
+
@contextlib.contextmanager
def binaries(ctx, config):
+ """
+ Fetch the binaries from the gitbuilder, and spawn the download tasks on
+ the remote machines.
+ """
path = config.get('path')
if path is None:
# fetch Apache Hadoop from gitbuilder
- log.info('Fetching and unpacking Apache Hadoop binaries from gitbuilder...')
+ log.info(
+ 'Fetching and unpacking Apache Hadoop binaries from gitbuilder...')
apache_sha1, apache_hadoop_bindir_url = teuthology.get_ceph_binary_url(
package='apache-hadoop',
branch=config.get('apache_branch'),
ctx.summary['apache-hadoop-sha1'] = apache_sha1
# fetch Inktank Hadoop from gitbuilder
- log.info('Fetching and unpacking Inktank Hadoop binaries from gitbuilder...')
- inktank_sha1, inktank_hadoop_bindir_url = teuthology.get_ceph_binary_url(
- package='hadoop',
- branch=config.get('inktank_branch'),
- tag=config.get('tag'),
- sha1=config.get('sha1'),
- flavor=config.get('flavor'),
- format=config.get('format'),
- dist=config.get('dist'),
- arch=config.get('arch'),
- )
+ log.info(
+ 'Fetching and unpacking Inktank Hadoop binaries from gitbuilder...')
+ inktank_sha1, inktank_hadoop_bindir_url = \
+ teuthology.get_ceph_binary_url(
+ package='hadoop',
+ branch=config.get('inktank_branch'),
+ tag=config.get('tag'),
+ sha1=config.get('sha1'),
+ flavor=config.get('flavor'),
+ format=config.get('format'),
+ dist=config.get('dist'),
+ arch=config.get('arch'),
+ )
log.info('inktank_hadoop_bindir_url %s' % (inktank_hadoop_bindir_url))
ctx.summary['inktank-hadoop-sha1'] = inktank_sha1
else:
- raise Exception("The hadoop task does not support the path argument at present")
+ raise Exception(
+ "The hadoop task does not support the path argument at present")
- with parallel() as p:
- hadoopNodes = ctx.cluster.only(teuthology.is_type('hadoop'))
+ with parallel() as parallel_task:
+ hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
# these can happen independently
- for remote in hadoopNodes.remotes.iterkeys():
- p.spawn(_node_binaries, ctx, config, remote, inktank_hadoop_bindir_url, apache_hadoop_bindir_url)
+ for remote in hadoop_nodes.remotes.iterkeys():
+ parallel_task.spawn(_node_binaries, ctx, remote,
+ inktank_hadoop_bindir_url, apache_hadoop_bindir_url)
try:
yield
log.info('Removing hadoop binaries...')
run.wait(
ctx.cluster.run(
- args=[ 'rm', '-rf', '--', '{tdir}/apache_hadoop'.format(tdir=teuthology.get_testdir(ctx))],
+ args=['rm', '-rf', '--', '{tdir}/apache_hadoop'.format(
+ tdir=teuthology.get_testdir(ctx))],
wait=False,
),
)
run.wait(
ctx.cluster.run(
- args=[ 'rm', '-rf', '--', '{tdir}/inktank_hadoop'.format(tdir=teuthology.get_testdir(ctx))],
+ args=['rm', '-rf', '--', '{tdir}/inktank_hadoop'.format(
+ tdir=teuthology.get_testdir(ctx))],
wait=False,
),
)
-## A Hadoop NameNode will stay in safe mode for 30 seconds by default.
-## This method blocks until the NameNode is out of safe mode.
+
@contextlib.contextmanager
def out_of_safemode(ctx, config):
-
+ """
+ A Hadoop NameNode will stay in safe mode for 30 seconds by default.
+ This method blocks until the NameNode is out of safe mode.
+ """
if config.get('hdfs'):
log.info('Waiting for the Namenode to exit safe mode...')
master = _get_master(ctx)
remote, _ = master
remote.run(
- args=["{tdir}/apache_hadoop/bin/hadoop".format(tdir=teuthology.get_testdir(ctx)),
+ args=["{tdir}/apache_hadoop/bin/hadoop".format(
+ tdir=teuthology.get_testdir(ctx)),
"dfsadmin",
"-safemode",
"wait"],
finally:
pass
+
@contextlib.contextmanager
def task(ctx, config):
"""
- hadoop:
Or if you want to use HDFS under Hadoop, this will configure Hadoop
- for HDFS and start it along with MapReduce. Note that it does not
+ for HDFS and start it along with MapReduce. Note that it does not
require Ceph be installed.
roles:
- hadoop:
hdfs: True
- This task requires exactly one hadoop.master be specified
+ This task requires exactly one hadoop.master be specified
and at least one hadoop.slave.
- This does *not* do anything with the Hadoop setup. To run wordcount,
+ This does *not* do anything with the Hadoop setup. To run wordcount,
you could use pexec like so (after the hadoop task):
- - pexec:
+ - pexec:
hadoop.slave.0:
- mkdir -p /tmp/hadoop_input
- wget http://ceph.com/qa/hadoop_input_files.tar -O /tmp/hadoop_input/files.tar
- {tdir}/hadoop/bin/hadoop fs -put /tmp/hadoop_input/*txt wordcount_input/
- {tdir}/hadoop/bin/hadoop jar {tdir}/hadoop/build/hadoop-example*jar wordcount wordcount_input wordcount_output
- rm -rf /tmp/hadoop_input
- """.format(tdir=teuthology.get_testdir(ctx))
+
+ Note: {tdir} in the above example is the teuthology test directory.
+ """
if config is None:
config = {}
assert isinstance(config, dict), \
"task hadoop only supports a dictionary for configuration"
- dist = 'precise'
- format = 'jar'
+ dist = 'precise'
+ format_type = 'jar'
arch = 'x86_64'
flavor = config.get('flavor', 'basic')
if config.get('apache_hadoop_branch') is not None:
apache_branch = config.get('apache_hadoop_branch')
else:
- apache_branch = 'branch-1.0' # hadoop branch to acquire
+ apache_branch = 'branch-1.0' # hadoop branch to acquire
inktank_branch = None
if config.get('inktank_hadoop_branch') is not None:
inktank_branch = config.get('inktank_hadoop_branch')
else:
- inktank_branch = 'cephfs/branch-1.0' # default branch name
+ inktank_branch = 'cephfs/branch-1.0' # default branch name
# replace any '/' with a '_' to match the artifact paths
- inktank_branch = inktank_branch.replace('/','_')
- apache_branch = apache_branch.replace('/','_')
+ inktank_branch = inktank_branch.replace('/', '_')
+ apache_branch = apache_branch.replace('/', '_')
with contextutil.nested(
- lambda: validate_config(ctx=ctx, config=config),
+ lambda: validate_cluster(ctx=ctx),
lambda: binaries(ctx=ctx, config=dict(
tag=config.get('tag'),
sha1=config.get('sha1'),
path=config.get('path'),
flavor=flavor,
dist=config.get('dist', dist),
- format=format,
+ format=format_type,
arch=arch,
apache_branch=apache_branch,
inktank_branch=inktank_branch,