From b4a0ed71700fedfe0d9c8d0d0d89c150e8639994 Mon Sep 17 00:00:00 2001
From: Casey Bodley <cbodley@redhat.com>
Date: Thu, 17 Jul 2025 13:06:01 -0400
Subject: [PATCH] qa/rgw: remove hadoop-s3a subsuite

this suite hasn't provided much benefit since it was added, and is
becoming more of a maintenance burden recently:
* https://tracker.ceph.com/issues/71584
* https://tracker.ceph.com/issues/72179

remove the subsuite and its s3a_hadoop.py task

Signed-off-by: Casey Bodley <cbodley@redhat.com>
(cherry picked from commit d713e9a583cbe449c6744532d038d503a7a71388)
---
 qa/suites/rgw/hadoop-s3a/%                    |   0
 qa/suites/rgw/hadoop-s3a/.qa                  |   1 -
 qa/suites/rgw/hadoop-s3a/clusters/.qa         |   1 -
 .../rgw/hadoop-s3a/clusters/fixed-2.yaml      |   1 -
 qa/suites/rgw/hadoop-s3a/hadoop/.qa           |   1 -
 qa/suites/rgw/hadoop-s3a/hadoop/default.yaml  |   1 -
 qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml      |   3 -
 .../hadoop-s3a/ignore-pg-availability.yaml    |   1 -
 qa/suites/rgw/hadoop-s3a/overrides.yaml       |   6 -
 qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml      |  11 -
 qa/tasks/s3a_hadoop.py                        | 295 ------------------
 11 files changed, 321 deletions(-)
 delete mode 100644 qa/suites/rgw/hadoop-s3a/%
 delete mode 120000 qa/suites/rgw/hadoop-s3a/.qa
 delete mode 120000 qa/suites/rgw/hadoop-s3a/clusters/.qa
 delete mode 120000 qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml
 delete mode 120000 qa/suites/rgw/hadoop-s3a/hadoop/.qa
 delete mode 100644 qa/suites/rgw/hadoop-s3a/hadoop/default.yaml
 delete mode 100644 qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml
 delete mode 120000 qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml
 delete mode 100644 qa/suites/rgw/hadoop-s3a/overrides.yaml
 delete mode 100644 qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml
 delete mode 100644 qa/tasks/s3a_hadoop.py

diff --git a/qa/suites/rgw/hadoop-s3a/% b/qa/suites/rgw/hadoop-s3a/%
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/qa/suites/rgw/hadoop-s3a/.qa b/qa/suites/rgw/hadoop-s3a/.qa
deleted file mode 120000
index a602a0353e751..0000000000000
--- a/qa/suites/rgw/hadoop-s3a/.qa
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
\ No newline at end of file
diff --git a/qa/suites/rgw/hadoop-s3a/clusters/.qa b/qa/suites/rgw/hadoop-s3a/clusters/.qa
deleted file mode 120000
index a602a0353e751..0000000000000
--- a/qa/suites/rgw/hadoop-s3a/clusters/.qa
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
\ No newline at end of file
diff --git a/qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml b/qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml
deleted file mode 120000
index 230ff0fdab412..0000000000000
--- a/qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml
+++ /dev/null
@@ -1 +0,0 @@
-.qa/clusters/fixed-2.yaml
\ No newline at end of file
diff --git a/qa/suites/rgw/hadoop-s3a/hadoop/.qa b/qa/suites/rgw/hadoop-s3a/hadoop/.qa
deleted file mode 120000
index a602a0353e751..0000000000000
--- a/qa/suites/rgw/hadoop-s3a/hadoop/.qa
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
\ No newline at end of file
diff --git a/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml b/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml
deleted file mode 100644
index 8b137891791fe..0000000000000
--- a/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml b/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml
deleted file mode 100644
index d017b756b78f0..0000000000000
--- a/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-overrides:
-  s3a-hadoop:
-    hadoop-version: '3.2.0'
diff --git a/qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml b/qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml
deleted file mode 120000
index 32340b1fa8be8..0000000000000
--- a/qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml
+++ /dev/null
@@ -1 +0,0 @@
-.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file
diff --git a/qa/suites/rgw/hadoop-s3a/overrides.yaml b/qa/suites/rgw/hadoop-s3a/overrides.yaml
deleted file mode 100644
index d52080bb5a1f3..0000000000000
--- a/qa/suites/rgw/hadoop-s3a/overrides.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-overrides:
-  ceph:
-    conf:
-      client:
-        setuser: ceph
-        setgroup: ceph
diff --git a/qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml b/qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml
deleted file mode 100644
index ed077a89f1969..0000000000000
--- a/qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-tasks:
-- install:
-- ceph:
-- ssh-keys:
-- dnsmasq:
-    client.0: [s3.]
-- rgw:
-    client.0:
-      dns-name: s3.
-- s3a-hadoop:
-    role: client.0
diff --git a/qa/tasks/s3a_hadoop.py b/qa/tasks/s3a_hadoop.py
deleted file mode 100644
index 4591ddf1cefee..0000000000000
--- a/qa/tasks/s3a_hadoop.py
+++ /dev/null
@@ -1,295 +0,0 @@
-import contextlib
-import logging
-import os
-from teuthology import misc
-from teuthology.orchestra import run
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def task(ctx, config):
-    """
-     Run Hadoop S3A tests using Ceph
-     usage:
-      -tasks:
-         ceph-ansible:
-         s3a-hadoop:
-           maven-version: '3.9.10' (default)
-           hadoop-version: '2.9.2'
-           bucket-name: 's3atest' (default)
-           access-key: 'anykey' (uses a default value)
-           secret-key: 'secretkey' ( uses a default value)
-           role: client.0
-    """
-    if config is None:
-        config = {}
-
-    assert isinstance(config, dict), \
-        "task only supports a dictionary for configuration"
-
-    assert hasattr(ctx, 'rgw'), 's3a-hadoop must run after the rgw task'
-
-    overrides = ctx.config.get('overrides', {})
-    misc.deep_merge(config, overrides.get('s3a-hadoop', {}))
-    testdir = misc.get_testdir(ctx)
-
-    role = config.get('role')
-    (remote,) = ctx.cluster.only(role).remotes.keys()
-    endpoint = ctx.rgw.role_endpoints.get(role)
-    assert endpoint, 's3tests: no rgw endpoint for {}'.format(role)
-
-    # get versions
-    maven_major = config.get('maven-major', 'maven-3')
-    maven_version = config.get('maven-version', '3.9.10')
-    hadoop_ver = config.get('hadoop-version', '2.9.2')
-    bucket_name = config.get('bucket-name', 's3atest')
-    access_key = config.get('access-key', 'EGAQRD2ULOIFKFSKCT4F')
-    secret_key = config.get(
-        'secret-key',
-        'zi816w1vZKfaSM85Cl0BxXTwSLyN7zB4RbTswrGb')
-
-    # programmatically find a nearby mirror so as not to hammer archive.apache.org
-    apache_mirror_cmd="curl 'https://www.apache.org/dyn/closer.cgi' 2>/dev/null | " \
-        "grep -o '<strong>[^<]*</strong>' | sed 's/<[^>]*>//g' | head -n 1"
-    log.info("determining apache mirror by running: " + apache_mirror_cmd)
-    apache_mirror_url_front = os.popen(apache_mirror_cmd).read().rstrip() # note: includes trailing slash (/)
-    log.info("chosen apache mirror is " + apache_mirror_url_front)
-
-    # set versions for cloning the repo
-    apache_maven = 'apache-maven-{maven_version}-bin.tar.gz'.format(
-        maven_version=maven_version)
-    maven_link = '{apache_mirror_url_front}/maven/'.format(apache_mirror_url_front=apache_mirror_url_front) + \
-        '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + \
-        apache_maven
-    hadoop_git = 'https://github.com/apache/hadoop'
-    hadoop_rel = 'hadoop-{ver} rel/release-{ver}'.format(ver=hadoop_ver)
-    if hadoop_ver == 'trunk':
-        # just checkout a new branch out of trunk
-        hadoop_rel = 'hadoop-ceph-trunk'
-    install_prereq(remote)
-    remote.run(
-        args=[
-            'cd',
-            testdir,
-            run.Raw('&&'),
-            'wget',
-            maven_link,
-            run.Raw('&&'),
-            'tar',
-            '-xvf',
-            apache_maven,
-            run.Raw('&&'),
-            'git',
-            'clone',
-            run.Raw(hadoop_git),
-            run.Raw('&&'),
-            'cd',
-            'hadoop',
-            run.Raw('&&'),
-            'git',
-            'checkout',
-            '-b',
-            run.Raw(hadoop_rel)
-        ]
-    )
-    configure_s3a(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir)
-    setup_user_bucket(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir)
-    if hadoop_ver.startswith('2.8'):
-        # test all ITtests but skip AWS test using public bucket landsat-pds
-        # which is not available from within this test
-        test_options = '-Dit.test=ITestS3A* -Dparallel-tests -Dscale \
-                        -Dfs.s3a.scale.test.timeout=1200 \
-                        -Dfs.s3a.scale.test.huge.filesize=256M verify'
-    else:
-        test_options = 'test -Dtest=S3a*,TestS3A*'
-    try:
-        run_s3atest(remote, maven_version, testdir, test_options)
-        yield
-    finally:
-        log.info("Done s3a testing, Cleaning up")
-        for fil in ['apache*', 'hadoop*', 'venv*', 'create*']:
-            remote.run(args=['rm', run.Raw('-rf'), run.Raw('{tdir}/{file}'.format(tdir=testdir, file=fil))])
-
-
-def install_prereq(client):
-    """
-    Install pre requisites for RHEL and CentOS
-    TBD: Ubuntu
-    """
-    if client.os.name == 'rhel' or client.os.name == 'centos':
-        client.run(
-               args=[
-                    'sudo',
-                    'yum',
-                    'install',
-                    '-y',
-                    'protobuf-c.x86_64',
-                    'java',
-                    'java-1.8.0-openjdk-devel',
-                    'dnsmasq'
-                    ]
-                )
-
-
-def setup_user_bucket(client, dns_name, access_key, secret_key, bucket_name, testdir):
-    """
-    Create user with access_key and secret_key that will be
-    used for the s3a testdir
-    """
-    client.run(
-        args=[
-            'sudo',
-            'radosgw-admin',
-            'user',
-            'create',
-            run.Raw('--uid'),
-            's3a',
-            run.Raw('--display-name="s3a cephtests"'),
-            run.Raw('--access-key={access_key}'.format(access_key=access_key)),
-            run.Raw('--secret-key={secret_key}'.format(secret_key=secret_key)),
-            run.Raw('--email=s3a@ceph.com'),
-        ]
-    )
-    client.run(
-        args=[
-            'python3',
-            '-m',
-            'venv',
-            '{testdir}/venv'.format(testdir=testdir),
-            run.Raw('&&'),
-            run.Raw('{testdir}/venv/bin/pip'.format(testdir=testdir)),
-            'install',
-            'boto'
-        ]
-    )
-    create_bucket = """
-#!/usr/bin/env python
-import boto
-import boto.s3.connection
-access_key = '{access_key}'
-secret_key = '{secret_key}'
-
-conn = boto.connect_s3(
-        aws_access_key_id = access_key,
-        aws_secret_access_key = secret_key,
-        host = '{dns_name}',
-        is_secure=False,
-        calling_format = boto.s3.connection.OrdinaryCallingFormat(),
-        )
-bucket = conn.create_bucket('{bucket_name}')
-for bucket in conn.get_all_buckets():
-        print(bucket.name + "\t" + bucket.creation_date)
-""".format(access_key=access_key, secret_key=secret_key, dns_name=dns_name, bucket_name=bucket_name)
-    py_bucket_file = '{testdir}/create_bucket.py'.format(testdir=testdir)
-    client.sudo_write_file(py_bucket_file, create_bucket, mode='0744')
-    client.run(
-        args=[
-            'cat',
-            '{testdir}/create_bucket.py'.format(testdir=testdir),
-        ]
-    )
-    client.run(
-        args=[
-            '{testdir}/venv/bin/python'.format(testdir=testdir),
-            '{testdir}/create_bucket.py'.format(testdir=testdir),
-        ]
-    )
-
-
-def run_s3atest(client, maven_version, testdir, test_options):
-    """
-    Finally run the s3a test
-    """
-    aws_testdir = '{testdir}/hadoop/hadoop-tools/hadoop-aws/'.format(testdir=testdir)
-    run_test = '{testdir}/apache-maven-{maven_version}/bin/mvn'.format(testdir=testdir, maven_version=maven_version)
-    # Remove AWS CredentialsProvider tests as it hits public bucket from AWS
-    # better solution is to create the public bucket on local server and test
-    rm_test = 'rm src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java'
-    client.run(
-        args=[
-            'cd',
-            run.Raw(aws_testdir),
-            run.Raw('&&'),
-            run.Raw(rm_test),
-            run.Raw('&&'),
-            run.Raw('JAVA_HOME=$(alternatives --list | grep jre_1.8.0 | head -n 1 | awk \'{print $3}\')'),
-            run.Raw(run_test),
-            run.Raw(test_options)
-        ]
-    )
-
-
-def configure_s3a(client, dns_name, access_key, secret_key, bucket_name, testdir):
-    """
-    Use the template to configure s3a test, Fill in access_key, secret_key
-    and other details required for test.
-    """
-    config_template = """<configuration>
-<property>
-<name>fs.s3a.endpoint</name>
-<value>{name}</value>
-</property>
-
-<property>
-<name>fs.contract.test.fs.s3a</name>
-<value>s3a://{bucket_name}/</value>
-</property>
-
-<property>
-<name>fs.s3a.connection.ssl.enabled</name>
-<value>false</value>
-</property>
-
-<property>
-<name>test.fs.s3n.name</name>
-<value>s3n://{bucket_name}/</value>
-</property>
-
-<property>
-<name>test.fs.s3a.name</name>
-<value>s3a://{bucket_name}/</value>
-</property>
-
-<property>
-<name>test.fs.s3.name</name>
-<value>s3://{bucket_name}/</value>
-</property>
-
-<property>
-<name>fs.s3.awsAccessKeyId</name>
-<value>{access_key}</value>
-</property>
-
-<property>
-<name>fs.s3.awsSecretAccessKey</name>
-<value>{secret_key}</value>
-</property>
-
-<property>
-<name>fs.s3n.awsAccessKeyId</name>
-<value>{access_key}</value>
-</property>
-
-<property>
-<name>fs.s3n.awsSecretAccessKey</name>
-<value>{secret_key}</value>
-</property>
-
-<property>
-<name>fs.s3a.access.key</name>
-<description>AWS access key ID. Omit for Role-based authentication.</description>
-<value>{access_key}</value>
-</property>
-
-<property>
-<name>fs.s3a.secret.key</name>
-<description>AWS secret key. Omit for Role-based authentication.</description>
-<value>{secret_key}</value>
-</property>
-</configuration>
-""".format(name=dns_name, bucket_name=bucket_name, access_key=access_key, secret_key=secret_key)
-    config_path = testdir + '/hadoop/hadoop-tools/hadoop-aws/src/test/resources/auth-keys.xml'
-    client.write_file(config_path, config_template)
-    # output for debug
-    client.run(args=['cat', config_path])
-- 
2.39.5