]> git-server-git.apps.pok.os.sepia.ceph.com Git - s3-tests.git/commitdiff
nuke_prefixed_buckets deletes objects in batches
authorCasey Bodley <cbodley@redhat.com>
Wed, 4 Aug 2021 17:56:39 +0000 (13:56 -0400)
committerCasey Bodley <cbodley@redhat.com>
Thu, 5 Aug 2021 14:50:32 +0000 (10:50 -0400)
speed up the cleanup by using delete_objects() with batches of 128

Signed-off-by: Casey Bodley <cbodley@redhat.com>
s3tests_boto3/functional/__init__.py

index ba8f9a73039f433fa8c290e626eeea38f1571ad6..ee91a8545676a5dcebac596d6367a55a62cc7679 100644 (file)
@@ -74,38 +74,38 @@ def get_objects_list(bucket, client=None, prefix=None):
 
     return objects_list
 
-def get_versioned_objects_list(bucket, client=None):
-    if client == None:
-        client = get_client()
-    response = client.list_object_versions(Bucket=bucket)
-    versioned_objects_list = []
-
-    if 'Versions' in response:
-        contents = response['Versions']
-        for obj in contents:
-            key = obj['Key']
-            version_id = obj['VersionId']
-            versioned_obj = (key,version_id)
-            versioned_objects_list.append(versioned_obj)
-
-    return versioned_objects_list
-
-def get_delete_markers_list(bucket, client=None):
-    if client == None:
-        client = get_client()
-    response = client.list_object_versions(Bucket=bucket)
-    delete_markers = []
-
-    if 'DeleteMarkers' in response:
-        contents = response['DeleteMarkers']
-        for obj in contents:
-            key = obj['Key']
-            version_id = obj['VersionId']
-            versioned_obj = (key,version_id)
-            delete_markers.append(versioned_obj)
-
-    return delete_markers
-
+# generator function that returns object listings in batches, where each
+# batch is a list of dicts compatible with delete_objects()
+def list_versions(client, bucket, batch_size):
+    key_marker = ''
+    version_marker = ''
+    truncated = True
+    while truncated:
+        listing = client.list_object_versions(
+                Bucket=bucket,
+                KeyMarker=key_marker,
+                VersionIdMarker=version_marker,
+                MaxKeys=batch_size)
+
+        key_marker = listing.get('NextKeyMarker')
+        version_marker = listing.get('NextVersionIdMarker')
+        truncated = listing['IsTruncated']
+
+        objs = listing.get('Versions', []) + listing.get('DeleteMarkers', [])
+        if len(objs):
+            yield [{'Key': o['Key'], 'VersionId': o['VersionId']} for o in objs]
+
+def nuke_bucket(client, bucket):
+    batch_size = 128
+    max_retain_date = None
+
+    # list and delete objects in batches
+    for objects in list_versions(client, bucket, batch_size):
+        client.delete_objects(Bucket=bucket,
+                Delete={'Objects': objects, 'Quiet': True},
+                BypassGovernanceRetention=True)
+
+    client.delete_bucket(Bucket=bucket)
 
 def nuke_prefixed_buckets(prefix, client=None):
     if client == None:
@@ -114,28 +114,18 @@ def nuke_prefixed_buckets(prefix, client=None):
     buckets = get_buckets_list(client, prefix)
 
     err = None
-    if buckets != []:
-        for bucket_name in buckets:
-            objects_list = get_objects_list(bucket_name, client)
-            for obj in objects_list:
-                response = client.delete_object(Bucket=bucket_name,Key=obj)
-            versioned_objects_list = get_versioned_objects_list(bucket_name, client)
-            for obj in versioned_objects_list:
-                response = client.delete_object(Bucket=bucket_name,Key=obj[0],VersionId=obj[1])
-            delete_markers = get_delete_markers_list(bucket_name, client)
-            for obj in delete_markers:
-                response = client.delete_object(Bucket=bucket_name,Key=obj[0],VersionId=obj[1])
-            try:
-                response = client.delete_bucket(Bucket=bucket_name)
-            except ClientError as e:
-                # The exception shouldn't be raised when doing cleanup. Pass and continue
-                # the bucket cleanup process. Otherwise left buckets wouldn't be cleared
-                # resulting in some kind of resource leak. err is used to hint user some
-                # exception once occurred.
-                err = e
-                pass
-        if err:
-            raise err
+    for bucket_name in buckets:
+        try:
+            nuke_bucket(client, bucket_name)
+        except Exception as e:
+            # The exception shouldn't be raised when doing cleanup. Pass and continue
+            # the bucket cleanup process. Otherwise left buckets wouldn't be cleared
+            # resulting in some kind of resource leak. err is used to hint user some
+            # exception once occurred.
+            err = e
+            pass
+    if err:
+        raise err
 
     print('Done with cleanup of buckets in tests.')