From: J. Eric Ivancich Date: Thu, 23 Mar 2023 21:45:06 +0000 (-0400) Subject: qa/rgw: test that multipart re-upload does not leave any orphans X-Git-Tag: v19.0.0~955^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=aeffd1b5986ad13f7a1f962cbff2fc72a5b21abe;p=ceph.git qa/rgw: test that multipart re-upload does not leave any orphans Runs a boto script that reuploads one part multiple times before completing and then we check for any orphans. Original boto script contributed by Matt Benjamin on top of which modifications were made. Signed-off-by: J. Eric Ivancich --- diff --git a/qa/suites/rgw/verify/tasks/mp_reupload.yaml b/qa/suites/rgw/verify/tasks/mp_reupload.yaml new file mode 100644 index 0000000000000..d817a1c358daf --- /dev/null +++ b/qa/suites/rgw/verify/tasks/mp_reupload.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rgw/test_rgw_s3_mp_reupload.sh diff --git a/qa/workunits/rgw/test_rgw_s3_mp_reupload.py b/qa/workunits/rgw/test_rgw_s3_mp_reupload.py new file mode 100755 index 0000000000000..b3cb2d5ab564a --- /dev/null +++ b/qa/workunits/rgw/test_rgw_s3_mp_reupload.py @@ -0,0 +1,121 @@ +import boto3 +import botocore.exceptions +import sys +import os +import subprocess + +#boto3.set_stream_logger(name='botocore') + +# handles two optional system arguments: +# : default is "bkt134" +# <0 or 1> : 0 -> upload aborted, 1 -> completed; default is completed + +if len(sys.argv) >= 2: + bucket_name = sys.argv[1] +else: + bucket_name = "bkt314738362229" +print("bucket nams is %s" % bucket_name) + +complete_mpu = True +if len(sys.argv) >= 3: + complete_mpu = int(sys.argv[2]) > 0 + +versioned_bucket = False +if len(sys.argv) >= 4: + versioned_bucket = int(sys.argv[3]) > 0 + +rgw_host = os.environ['RGW_HOST'] +access_key = os.environ['RGW_ACCESS_KEY'] +secret_key = os.environ['RGW_SECRET_KEY'] + +try: + endpoint='http://%s:%d' % (rgw_host, 80) + client = boto3.client('s3', + endpoint_url=endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + res = client.create_bucket(Bucket=bucket_name) +except botocore.exceptions.EndpointConnectionError: + try: + endpoint='https://%s:%d' % (rgw_host, 443) + client = boto3.client('s3', + endpoint_url=endpoint, + verify=False, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + res = client.create_bucket(Bucket=bucket_name) + except botocore.exceptions.EndpointConnectionError: + endpoint='http://%s:%d' % (rgw_host, 8000) + client = boto3.client('s3', + endpoint_url=endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + res = client.create_bucket(Bucket=bucket_name) + +print("endpoint is %s" % endpoint) + +if versioned_bucket: + res = client.put_bucket_versioning( + Bucket=bucket_name, + VersioningConfiguration={ + 'MFADelete': 'Disabled', + 'Status': 'Enabled'} + ) + +key = "mpu_test4" +nparts = 2 +ndups = 11 +do_reupload = True + +part_path = "/tmp/mp_part_5m" +subprocess.run(["dd", "if=/dev/urandom", "of=" + part_path, "bs=1M", "count=5"], check=True) + +f = open(part_path, 'rb') + +res = client.create_multipart_upload(Bucket=bucket_name, Key=key) +mpu_id = res["UploadId"] + +print("start UploadId=%s" % (mpu_id)) + +parts = [] +parts2 = [] + +for ix in range(0,nparts): + part_num = ix + 1 + f.seek(0) + res = client.upload_part(Body=f, Bucket=bucket_name, Key=key, + UploadId=mpu_id, PartNumber=part_num) + # save + etag = res['ETag'] + part = {'ETag': etag, 'PartNumber': part_num} + print("phase 1 uploaded part %s" % part) + parts.append(part) + +if do_reupload: + # just re-upload part 1 + part_num = 1 + for ix in range(0,ndups): + f.seek(0) + res = client.upload_part(Body=f, Bucket=bucket_name, Key=key, + UploadId=mpu_id, PartNumber=part_num) + etag = res['ETag'] + part = {'ETag': etag, 'PartNumber': part_num} + print ("phase 2 uploaded part %s" % part) + + # save + etag = res['ETag'] + part = {'ETag': etag, 'PartNumber': part_num} + parts2.append(part) + +if complete_mpu: + print("completing multipart upload, parts=%s" % parts) + res = client.complete_multipart_upload( + Bucket=bucket_name, Key=key, UploadId=mpu_id, + MultipartUpload={'Parts': parts}) +else: + print("aborting multipart upload, parts=%s" % parts) + res = client.abort_multipart_upload( + Bucket=bucket_name, Key=key, UploadId=mpu_id) + +# clean up +subprocess.run(["rm", "-f", part_path], check=True) diff --git a/qa/workunits/rgw/test_rgw_s3_mp_reupload.sh b/qa/workunits/rgw/test_rgw_s3_mp_reupload.sh new file mode 100755 index 0000000000000..5d73fd04813ad --- /dev/null +++ b/qa/workunits/rgw/test_rgw_s3_mp_reupload.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash + +# INITIALIZATION + +mydir=$(dirname $0) +data_pool=default.rgw.buckets.data +orphan_list_out=/tmp/orphan_list.$$ +radoslist_out=/tmp/radoslist.$$ +rados_ls_out=/tmp/rados_ls.$$ +diff_out=/tmp/diff.$$ + +rgw_host="$(hostname --fqdn)" +echo "INFO: fully qualified domain name: $rgw_host" + +export RGW_ACCESS_KEY="0555b35654ad1656d804" +export RGW_SECRET_KEY="h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q==" +export RGW_HOST="${RGW_HOST:-$rgw_host}" + +# random argument determines if multipart is aborted or completed 50/50 +outcome=$((RANDOM % 2)) +if [ $outcome -eq 0 ] ;then + echo "== TESTING *ABORTING* MULTIPART UPLOAD WITH RE-UPLOADS ==" +else + echo "== TESTING *COMPLETING* MULTIPART UPLOAD WITH RE-UPLOADS ==" +fi + +# random argument determines if multipart is aborted or completed 50/50 +versioning=$((RANDOM % 2)) +if [ $versioning -eq 0 ] ;then + echo "== TESTING NON-VERSIONED BUCKET ==" +else + echo "== TESTING VERSIONED BUCKET ==" +fi + +# create a randomized bucket name +bucket="reupload-bkt-$((RANDOM % 899999 + 100000))" + + +# SET UP PYTHON VIRTUAL ENVIRONMENT + +# install boto3 +python3 -m venv $mydir +source $mydir/bin/activate +pip install pip --upgrade +pip install boto3 + + +# CREATE RGW USER IF NECESSARY + +if radosgw-admin user info --access-key $RGW_ACCESS_KEY 2>/dev/null ;then + echo INFO: user already exists +else + echo INFO: creating user + radosgw-admin user create --uid testid \ + --access-key $RGW_ACCESS_KEY \ + --secret $RGW_SECRET_KEY \ + --display-name 'M. Tester' \ + --email tester@ceph.com 2>/dev/null +fi + + +# RUN REUPLOAD TEST + +$mydir/bin/python3 ${mydir}/test_rgw_s3_mp_reupload.py $bucket $outcome $versioning + + +# ANALYZE FOR ERRORS +# (NOTE: for now we're choosing not to use the rgw-orphan-list tool) + +# force garbage collection to remove extra parts +radosgw-admin gc process --include-all 2>/dev/null + +marker=$(radosgw-admin metadata get bucket:$bucket 2>/dev/null | grep bucket_id | sed 's/.*: "\(.*\)".*/\1/') + +# determine expected rados objects +radosgw-admin bucket radoslist --bucket=$bucket 2>/dev/null | sort >$radoslist_out +echo "radosgw-admin bucket radoslist:" +cat $radoslist_out + +# determine found rados objects +rados ls -p $data_pool 2>/dev/null | grep "^$marker" | sort >$rados_ls_out +echo "rados ls:" +cat $rados_ls_out + +# compare expected and found +diff $radoslist_out $rados_ls_out >$diff_out +if [ $(cat $diff_out | wc -l) -ne 0 ] ;then + error=1 + echo "ERROR: Found differences between expected and actual rados objects for test bucket." + echo " note: indicators: '>' found but not expected; '<' expected but not found." + cat $diff_out +fi + + +# CLEAN UP + +deactivate + +rm -f $orphan_list_out $radoslist_out $rados_ls_out $diff_out + + +# PRODUCE FINAL RESULTS + +if [ -n "$error" ] ;then + echo "== FAILED ==" + exit 1 +fi + +echo "== PASSED ==" +exit 0