From 6a1147c54760130c961ef9f5dd939b60582870c7 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Thu, 23 Mar 2023 17:45:06 -0400 Subject: [PATCH] qa/rgw: test that multipart re-upload does not leave any orphans Runs a boto script that reuploads one part multiple times before completing and then we check for any orphans. Original boto script contributed by Matt Benjamin on top of which modifications were made. Signed-off-by: J. Eric Ivancich (cherry picked from commit aeffd1b5986ad13f7a1f962cbff2fc72a5b21abe) --- qa/suites/rgw/verify/tasks/mp_reupload.yaml | 5 + qa/workunits/rgw/test_rgw_s3_mp_reupload.py | 121 ++++++++++++++++++++ qa/workunits/rgw/test_rgw_s3_mp_reupload.sh | 110 ++++++++++++++++++ 3 files changed, 236 insertions(+) create mode 100644 qa/suites/rgw/verify/tasks/mp_reupload.yaml create mode 100755 qa/workunits/rgw/test_rgw_s3_mp_reupload.py create mode 100755 qa/workunits/rgw/test_rgw_s3_mp_reupload.sh diff --git a/qa/suites/rgw/verify/tasks/mp_reupload.yaml b/qa/suites/rgw/verify/tasks/mp_reupload.yaml new file mode 100644 index 00000000000..d817a1c358d --- /dev/null +++ b/qa/suites/rgw/verify/tasks/mp_reupload.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rgw/test_rgw_s3_mp_reupload.sh diff --git a/qa/workunits/rgw/test_rgw_s3_mp_reupload.py b/qa/workunits/rgw/test_rgw_s3_mp_reupload.py new file mode 100755 index 00000000000..b3cb2d5ab56 --- /dev/null +++ b/qa/workunits/rgw/test_rgw_s3_mp_reupload.py @@ -0,0 +1,121 @@ +import boto3 +import botocore.exceptions +import sys +import os +import subprocess + +#boto3.set_stream_logger(name='botocore') + +# handles two optional system arguments: +# : default is "bkt134" +# <0 or 1> : 0 -> upload aborted, 1 -> completed; default is completed + +if len(sys.argv) >= 2: + bucket_name = sys.argv[1] +else: + bucket_name = "bkt314738362229" +print("bucket nams is %s" % bucket_name) + +complete_mpu = True +if len(sys.argv) >= 3: + complete_mpu = int(sys.argv[2]) > 0 + +versioned_bucket = False +if len(sys.argv) >= 4: + versioned_bucket = int(sys.argv[3]) > 0 + +rgw_host = os.environ['RGW_HOST'] +access_key = os.environ['RGW_ACCESS_KEY'] +secret_key = os.environ['RGW_SECRET_KEY'] + +try: + endpoint='http://%s:%d' % (rgw_host, 80) + client = boto3.client('s3', + endpoint_url=endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + res = client.create_bucket(Bucket=bucket_name) +except botocore.exceptions.EndpointConnectionError: + try: + endpoint='https://%s:%d' % (rgw_host, 443) + client = boto3.client('s3', + endpoint_url=endpoint, + verify=False, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + res = client.create_bucket(Bucket=bucket_name) + except botocore.exceptions.EndpointConnectionError: + endpoint='http://%s:%d' % (rgw_host, 8000) + client = boto3.client('s3', + endpoint_url=endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + res = client.create_bucket(Bucket=bucket_name) + +print("endpoint is %s" % endpoint) + +if versioned_bucket: + res = client.put_bucket_versioning( + Bucket=bucket_name, + VersioningConfiguration={ + 'MFADelete': 'Disabled', + 'Status': 'Enabled'} + ) + +key = "mpu_test4" +nparts = 2 +ndups = 11 +do_reupload = True + +part_path = "/tmp/mp_part_5m" +subprocess.run(["dd", "if=/dev/urandom", "of=" + part_path, "bs=1M", "count=5"], check=True) + +f = open(part_path, 'rb') + +res = client.create_multipart_upload(Bucket=bucket_name, Key=key) +mpu_id = res["UploadId"] + +print("start UploadId=%s" % (mpu_id)) + +parts = [] +parts2 = [] + +for ix in range(0,nparts): + part_num = ix + 1 + f.seek(0) + res = client.upload_part(Body=f, Bucket=bucket_name, Key=key, + UploadId=mpu_id, PartNumber=part_num) + # save + etag = res['ETag'] + part = {'ETag': etag, 'PartNumber': part_num} + print("phase 1 uploaded part %s" % part) + parts.append(part) + +if do_reupload: + # just re-upload part 1 + part_num = 1 + for ix in range(0,ndups): + f.seek(0) + res = client.upload_part(Body=f, Bucket=bucket_name, Key=key, + UploadId=mpu_id, PartNumber=part_num) + etag = res['ETag'] + part = {'ETag': etag, 'PartNumber': part_num} + print ("phase 2 uploaded part %s" % part) + + # save + etag = res['ETag'] + part = {'ETag': etag, 'PartNumber': part_num} + parts2.append(part) + +if complete_mpu: + print("completing multipart upload, parts=%s" % parts) + res = client.complete_multipart_upload( + Bucket=bucket_name, Key=key, UploadId=mpu_id, + MultipartUpload={'Parts': parts}) +else: + print("aborting multipart upload, parts=%s" % parts) + res = client.abort_multipart_upload( + Bucket=bucket_name, Key=key, UploadId=mpu_id) + +# clean up +subprocess.run(["rm", "-f", part_path], check=True) diff --git a/qa/workunits/rgw/test_rgw_s3_mp_reupload.sh b/qa/workunits/rgw/test_rgw_s3_mp_reupload.sh new file mode 100755 index 00000000000..5d73fd04813 --- /dev/null +++ b/qa/workunits/rgw/test_rgw_s3_mp_reupload.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash + +# INITIALIZATION + +mydir=$(dirname $0) +data_pool=default.rgw.buckets.data +orphan_list_out=/tmp/orphan_list.$$ +radoslist_out=/tmp/radoslist.$$ +rados_ls_out=/tmp/rados_ls.$$ +diff_out=/tmp/diff.$$ + +rgw_host="$(hostname --fqdn)" +echo "INFO: fully qualified domain name: $rgw_host" + +export RGW_ACCESS_KEY="0555b35654ad1656d804" +export RGW_SECRET_KEY="h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q==" +export RGW_HOST="${RGW_HOST:-$rgw_host}" + +# random argument determines if multipart is aborted or completed 50/50 +outcome=$((RANDOM % 2)) +if [ $outcome -eq 0 ] ;then + echo "== TESTING *ABORTING* MULTIPART UPLOAD WITH RE-UPLOADS ==" +else + echo "== TESTING *COMPLETING* MULTIPART UPLOAD WITH RE-UPLOADS ==" +fi + +# random argument determines if multipart is aborted or completed 50/50 +versioning=$((RANDOM % 2)) +if [ $versioning -eq 0 ] ;then + echo "== TESTING NON-VERSIONED BUCKET ==" +else + echo "== TESTING VERSIONED BUCKET ==" +fi + +# create a randomized bucket name +bucket="reupload-bkt-$((RANDOM % 899999 + 100000))" + + +# SET UP PYTHON VIRTUAL ENVIRONMENT + +# install boto3 +python3 -m venv $mydir +source $mydir/bin/activate +pip install pip --upgrade +pip install boto3 + + +# CREATE RGW USER IF NECESSARY + +if radosgw-admin user info --access-key $RGW_ACCESS_KEY 2>/dev/null ;then + echo INFO: user already exists +else + echo INFO: creating user + radosgw-admin user create --uid testid \ + --access-key $RGW_ACCESS_KEY \ + --secret $RGW_SECRET_KEY \ + --display-name 'M. Tester' \ + --email tester@ceph.com 2>/dev/null +fi + + +# RUN REUPLOAD TEST + +$mydir/bin/python3 ${mydir}/test_rgw_s3_mp_reupload.py $bucket $outcome $versioning + + +# ANALYZE FOR ERRORS +# (NOTE: for now we're choosing not to use the rgw-orphan-list tool) + +# force garbage collection to remove extra parts +radosgw-admin gc process --include-all 2>/dev/null + +marker=$(radosgw-admin metadata get bucket:$bucket 2>/dev/null | grep bucket_id | sed 's/.*: "\(.*\)".*/\1/') + +# determine expected rados objects +radosgw-admin bucket radoslist --bucket=$bucket 2>/dev/null | sort >$radoslist_out +echo "radosgw-admin bucket radoslist:" +cat $radoslist_out + +# determine found rados objects +rados ls -p $data_pool 2>/dev/null | grep "^$marker" | sort >$rados_ls_out +echo "rados ls:" +cat $rados_ls_out + +# compare expected and found +diff $radoslist_out $rados_ls_out >$diff_out +if [ $(cat $diff_out | wc -l) -ne 0 ] ;then + error=1 + echo "ERROR: Found differences between expected and actual rados objects for test bucket." + echo " note: indicators: '>' found but not expected; '<' expected but not found." + cat $diff_out +fi + + +# CLEAN UP + +deactivate + +rm -f $orphan_list_out $radoslist_out $rados_ls_out $diff_out + + +# PRODUCE FINAL RESULTS + +if [ -n "$error" ] ;then + echo "== FAILED ==" + exit 1 +fi + +echo "== PASSED ==" +exit 0 -- 2.47.3