From b4a0ed71700fedfe0d9c8d0d0d89c150e8639994 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Thu, 17 Jul 2025 13:06:01 -0400 Subject: [PATCH] qa/rgw: remove hadoop-s3a subsuite this suite hasn't provided much benefit since it was added, and is becoming more of a maintenance burden recently: * https://tracker.ceph.com/issues/71584 * https://tracker.ceph.com/issues/72179 remove the subsuite and its s3a_hadoop.py task Signed-off-by: Casey Bodley (cherry picked from commit d713e9a583cbe449c6744532d038d503a7a71388) --- qa/suites/rgw/hadoop-s3a/% | 0 qa/suites/rgw/hadoop-s3a/.qa | 1 - qa/suites/rgw/hadoop-s3a/clusters/.qa | 1 - .../rgw/hadoop-s3a/clusters/fixed-2.yaml | 1 - qa/suites/rgw/hadoop-s3a/hadoop/.qa | 1 - qa/suites/rgw/hadoop-s3a/hadoop/default.yaml | 1 - qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml | 3 - .../hadoop-s3a/ignore-pg-availability.yaml | 1 - qa/suites/rgw/hadoop-s3a/overrides.yaml | 6 - qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml | 11 - qa/tasks/s3a_hadoop.py | 295 ------------------ 11 files changed, 321 deletions(-) delete mode 100644 qa/suites/rgw/hadoop-s3a/% delete mode 120000 qa/suites/rgw/hadoop-s3a/.qa delete mode 120000 qa/suites/rgw/hadoop-s3a/clusters/.qa delete mode 120000 qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml delete mode 120000 qa/suites/rgw/hadoop-s3a/hadoop/.qa delete mode 100644 qa/suites/rgw/hadoop-s3a/hadoop/default.yaml delete mode 100644 qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml delete mode 120000 qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml delete mode 100644 qa/suites/rgw/hadoop-s3a/overrides.yaml delete mode 100644 qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml delete mode 100644 qa/tasks/s3a_hadoop.py diff --git a/qa/suites/rgw/hadoop-s3a/% b/qa/suites/rgw/hadoop-s3a/% deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/qa/suites/rgw/hadoop-s3a/.qa b/qa/suites/rgw/hadoop-s3a/.qa deleted file mode 120000 index a602a0353e751..0000000000000 --- a/qa/suites/rgw/hadoop-s3a/.qa +++ /dev/null @@ -1 +0,0 @@ -../.qa/ \ No newline at end of file diff --git a/qa/suites/rgw/hadoop-s3a/clusters/.qa b/qa/suites/rgw/hadoop-s3a/clusters/.qa deleted file mode 120000 index a602a0353e751..0000000000000 --- a/qa/suites/rgw/hadoop-s3a/clusters/.qa +++ /dev/null @@ -1 +0,0 @@ -../.qa/ \ No newline at end of file diff --git a/qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml b/qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml deleted file mode 120000 index 230ff0fdab412..0000000000000 --- a/qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml +++ /dev/null @@ -1 +0,0 @@ -.qa/clusters/fixed-2.yaml \ No newline at end of file diff --git a/qa/suites/rgw/hadoop-s3a/hadoop/.qa b/qa/suites/rgw/hadoop-s3a/hadoop/.qa deleted file mode 120000 index a602a0353e751..0000000000000 --- a/qa/suites/rgw/hadoop-s3a/hadoop/.qa +++ /dev/null @@ -1 +0,0 @@ -../.qa/ \ No newline at end of file diff --git a/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml b/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml deleted file mode 100644 index 8b137891791fe..0000000000000 --- a/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml +++ /dev/null @@ -1 +0,0 @@ - diff --git a/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml b/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml deleted file mode 100644 index d017b756b78f0..0000000000000 --- a/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - s3a-hadoop: - hadoop-version: '3.2.0' diff --git a/qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml b/qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml deleted file mode 120000 index 32340b1fa8be8..0000000000000 --- a/qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml +++ /dev/null @@ -1 +0,0 @@ -.qa/rgw/ignore-pg-availability.yaml \ No newline at end of file diff --git a/qa/suites/rgw/hadoop-s3a/overrides.yaml b/qa/suites/rgw/hadoop-s3a/overrides.yaml deleted file mode 100644 index d52080bb5a1f3..0000000000000 --- a/qa/suites/rgw/hadoop-s3a/overrides.yaml +++ /dev/null @@ -1,6 +0,0 @@ -overrides: - ceph: - conf: - client: - setuser: ceph - setgroup: ceph diff --git a/qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml b/qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml deleted file mode 100644 index ed077a89f1969..0000000000000 --- a/qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml +++ /dev/null @@ -1,11 +0,0 @@ -tasks: -- install: -- ceph: -- ssh-keys: -- dnsmasq: - client.0: [s3.] -- rgw: - client.0: - dns-name: s3. -- s3a-hadoop: - role: client.0 diff --git a/qa/tasks/s3a_hadoop.py b/qa/tasks/s3a_hadoop.py deleted file mode 100644 index 4591ddf1cefee..0000000000000 --- a/qa/tasks/s3a_hadoop.py +++ /dev/null @@ -1,295 +0,0 @@ -import contextlib -import logging -import os -from teuthology import misc -from teuthology.orchestra import run - -log = logging.getLogger(__name__) - - -@contextlib.contextmanager -def task(ctx, config): - """ - Run Hadoop S3A tests using Ceph - usage: - -tasks: - ceph-ansible: - s3a-hadoop: - maven-version: '3.9.10' (default) - hadoop-version: '2.9.2' - bucket-name: 's3atest' (default) - access-key: 'anykey' (uses a default value) - secret-key: 'secretkey' ( uses a default value) - role: client.0 - """ - if config is None: - config = {} - - assert isinstance(config, dict), \ - "task only supports a dictionary for configuration" - - assert hasattr(ctx, 'rgw'), 's3a-hadoop must run after the rgw task' - - overrides = ctx.config.get('overrides', {}) - misc.deep_merge(config, overrides.get('s3a-hadoop', {})) - testdir = misc.get_testdir(ctx) - - role = config.get('role') - (remote,) = ctx.cluster.only(role).remotes.keys() - endpoint = ctx.rgw.role_endpoints.get(role) - assert endpoint, 's3tests: no rgw endpoint for {}'.format(role) - - # get versions - maven_major = config.get('maven-major', 'maven-3') - maven_version = config.get('maven-version', '3.9.10') - hadoop_ver = config.get('hadoop-version', '2.9.2') - bucket_name = config.get('bucket-name', 's3atest') - access_key = config.get('access-key', 'EGAQRD2ULOIFKFSKCT4F') - secret_key = config.get( - 'secret-key', - 'zi816w1vZKfaSM85Cl0BxXTwSLyN7zB4RbTswrGb') - - # programmatically find a nearby mirror so as not to hammer archive.apache.org - apache_mirror_cmd="curl 'https://www.apache.org/dyn/closer.cgi' 2>/dev/null | " \ - "grep -o '[^<]*' | sed 's/<[^>]*>//g' | head -n 1" - log.info("determining apache mirror by running: " + apache_mirror_cmd) - apache_mirror_url_front = os.popen(apache_mirror_cmd).read().rstrip() # note: includes trailing slash (/) - log.info("chosen apache mirror is " + apache_mirror_url_front) - - # set versions for cloning the repo - apache_maven = 'apache-maven-{maven_version}-bin.tar.gz'.format( - maven_version=maven_version) - maven_link = '{apache_mirror_url_front}/maven/'.format(apache_mirror_url_front=apache_mirror_url_front) + \ - '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + \ - apache_maven - hadoop_git = 'https://github.com/apache/hadoop' - hadoop_rel = 'hadoop-{ver} rel/release-{ver}'.format(ver=hadoop_ver) - if hadoop_ver == 'trunk': - # just checkout a new branch out of trunk - hadoop_rel = 'hadoop-ceph-trunk' - install_prereq(remote) - remote.run( - args=[ - 'cd', - testdir, - run.Raw('&&'), - 'wget', - maven_link, - run.Raw('&&'), - 'tar', - '-xvf', - apache_maven, - run.Raw('&&'), - 'git', - 'clone', - run.Raw(hadoop_git), - run.Raw('&&'), - 'cd', - 'hadoop', - run.Raw('&&'), - 'git', - 'checkout', - '-b', - run.Raw(hadoop_rel) - ] - ) - configure_s3a(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir) - setup_user_bucket(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir) - if hadoop_ver.startswith('2.8'): - # test all ITtests but skip AWS test using public bucket landsat-pds - # which is not available from within this test - test_options = '-Dit.test=ITestS3A* -Dparallel-tests -Dscale \ - -Dfs.s3a.scale.test.timeout=1200 \ - -Dfs.s3a.scale.test.huge.filesize=256M verify' - else: - test_options = 'test -Dtest=S3a*,TestS3A*' - try: - run_s3atest(remote, maven_version, testdir, test_options) - yield - finally: - log.info("Done s3a testing, Cleaning up") - for fil in ['apache*', 'hadoop*', 'venv*', 'create*']: - remote.run(args=['rm', run.Raw('-rf'), run.Raw('{tdir}/{file}'.format(tdir=testdir, file=fil))]) - - -def install_prereq(client): - """ - Install pre requisites for RHEL and CentOS - TBD: Ubuntu - """ - if client.os.name == 'rhel' or client.os.name == 'centos': - client.run( - args=[ - 'sudo', - 'yum', - 'install', - '-y', - 'protobuf-c.x86_64', - 'java', - 'java-1.8.0-openjdk-devel', - 'dnsmasq' - ] - ) - - -def setup_user_bucket(client, dns_name, access_key, secret_key, bucket_name, testdir): - """ - Create user with access_key and secret_key that will be - used for the s3a testdir - """ - client.run( - args=[ - 'sudo', - 'radosgw-admin', - 'user', - 'create', - run.Raw('--uid'), - 's3a', - run.Raw('--display-name="s3a cephtests"'), - run.Raw('--access-key={access_key}'.format(access_key=access_key)), - run.Raw('--secret-key={secret_key}'.format(secret_key=secret_key)), - run.Raw('--email=s3a@ceph.com'), - ] - ) - client.run( - args=[ - 'python3', - '-m', - 'venv', - '{testdir}/venv'.format(testdir=testdir), - run.Raw('&&'), - run.Raw('{testdir}/venv/bin/pip'.format(testdir=testdir)), - 'install', - 'boto' - ] - ) - create_bucket = """ -#!/usr/bin/env python -import boto -import boto.s3.connection -access_key = '{access_key}' -secret_key = '{secret_key}' - -conn = boto.connect_s3( - aws_access_key_id = access_key, - aws_secret_access_key = secret_key, - host = '{dns_name}', - is_secure=False, - calling_format = boto.s3.connection.OrdinaryCallingFormat(), - ) -bucket = conn.create_bucket('{bucket_name}') -for bucket in conn.get_all_buckets(): - print(bucket.name + "\t" + bucket.creation_date) -""".format(access_key=access_key, secret_key=secret_key, dns_name=dns_name, bucket_name=bucket_name) - py_bucket_file = '{testdir}/create_bucket.py'.format(testdir=testdir) - client.sudo_write_file(py_bucket_file, create_bucket, mode='0744') - client.run( - args=[ - 'cat', - '{testdir}/create_bucket.py'.format(testdir=testdir), - ] - ) - client.run( - args=[ - '{testdir}/venv/bin/python'.format(testdir=testdir), - '{testdir}/create_bucket.py'.format(testdir=testdir), - ] - ) - - -def run_s3atest(client, maven_version, testdir, test_options): - """ - Finally run the s3a test - """ - aws_testdir = '{testdir}/hadoop/hadoop-tools/hadoop-aws/'.format(testdir=testdir) - run_test = '{testdir}/apache-maven-{maven_version}/bin/mvn'.format(testdir=testdir, maven_version=maven_version) - # Remove AWS CredentialsProvider tests as it hits public bucket from AWS - # better solution is to create the public bucket on local server and test - rm_test = 'rm src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java' - client.run( - args=[ - 'cd', - run.Raw(aws_testdir), - run.Raw('&&'), - run.Raw(rm_test), - run.Raw('&&'), - run.Raw('JAVA_HOME=$(alternatives --list | grep jre_1.8.0 | head -n 1 | awk \'{print $3}\')'), - run.Raw(run_test), - run.Raw(test_options) - ] - ) - - -def configure_s3a(client, dns_name, access_key, secret_key, bucket_name, testdir): - """ - Use the template to configure s3a test, Fill in access_key, secret_key - and other details required for test. - """ - config_template = """ - -fs.s3a.endpoint -{name} - - - -fs.contract.test.fs.s3a -s3a://{bucket_name}/ - - - -fs.s3a.connection.ssl.enabled -false - - - -test.fs.s3n.name -s3n://{bucket_name}/ - - - -test.fs.s3a.name -s3a://{bucket_name}/ - - - -test.fs.s3.name -s3://{bucket_name}/ - - - -fs.s3.awsAccessKeyId -{access_key} - - - -fs.s3.awsSecretAccessKey -{secret_key} - - - -fs.s3n.awsAccessKeyId -{access_key} - - - -fs.s3n.awsSecretAccessKey -{secret_key} - - - -fs.s3a.access.key -AWS access key ID. Omit for Role-based authentication. -{access_key} - - - -fs.s3a.secret.key -AWS secret key. Omit for Role-based authentication. -{secret_key} - - -""".format(name=dns_name, bucket_name=bucket_name, access_key=access_key, secret_key=secret_key) - config_path = testdir + '/hadoop/hadoop-tools/hadoop-aws/src/test/resources/auth-keys.xml' - client.write_file(config_path, config_template) - # output for debug - client.run(args=['cat', config_path]) -- 2.39.5