From: Matthew N. Heler Date: Fri, 12 Dec 2025 01:14:22 +0000 (-0600) Subject: s3tests: add tests for per-bucket cloud transition targeting X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F716%2Fhead;p=s3-tests.git s3tests: add tests for per-bucket cloud transition targeting Add tests to validate the target_by_bucket feature which allows each source bucket to transition objects to a dedicated destination bucket rather than sharing a common target. New tests: - test_lifecycle_cloud_transition_target_by_bucket: validates objects land in bucket-specific targets without source bucket name prefix - test_lifecycle_cloud_transition_target_by_bucket_multiple_buckets: validates isolation between different source buckets Each test will perform a restore of the object transitioned. Signed-off-by: Matthew N. Heler --- diff --git a/pytest.ini b/pytest.ini index 9b981c62..64bb49d9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -14,6 +14,7 @@ markers = checksum cloud_transition cloud_restore + target_by_bucket copy encryption fails_on_aws diff --git a/s3tests.conf.SAMPLE b/s3tests.conf.SAMPLE index 53426f48..8baae3c9 100644 --- a/s3tests.conf.SAMPLE +++ b/s3tests.conf.SAMPLE @@ -103,6 +103,8 @@ secret_key = nopqrstuvwxyzabcdefghijklmnabcdefghijklm # read_through_restore_days = 2 # target_storage_class = Target_SC # target_path = cloud-bucket +# target_by_bucket = false # when true, each source bucket gets its own target bucket +# target_by_bucket_prefix = rgwx-${zonegroup}-${storage_class}-${bucket} # template for per-bucket target names ## another regular storage class to test multiple transition rules, # storage_class = S1 diff --git a/s3tests/functional/__init__.py b/s3tests/functional/__init__.py index fbea948e..b4e1dfd7 100644 --- a/s3tests/functional/__init__.py +++ b/s3tests/functional/__init__.py @@ -410,6 +410,16 @@ def get_cloud_config(cfg): except (configparser.NoSectionError, configparser.NoOptionError): config.read_through_restore_days = 10 + try: + config.cloud_target_by_bucket = cfg.getboolean('s3 cloud', "target_by_bucket") + except (configparser.NoSectionError, configparser.NoOptionError): + config.cloud_target_by_bucket = False + + try: + config.cloud_target_by_bucket_prefix = cfg.get('s3 cloud', "target_by_bucket_prefix") + except (configparser.NoSectionError, configparser.NoOptionError): + config.cloud_target_by_bucket_prefix = None + def get_client(client_config=None): if client_config == None: @@ -825,6 +835,12 @@ def get_restore_processor_period(): def get_read_through_days(): return config.read_through_restore_days +def get_cloud_target_by_bucket(): + return config.cloud_target_by_bucket + +def get_cloud_target_by_bucket_prefix(): + return config.cloud_target_by_bucket_prefix + def create_iam_user_s3client(client): prefix = get_iam_path_prefix() diff --git a/s3tests/functional/test_s3.py b/s3tests/functional/test_s3.py index ebbfe3f0..7e02cd47 100644 --- a/s3tests/functional/test_s3.py +++ b/s3tests/functional/test_s3.py @@ -86,6 +86,8 @@ from . import ( get_cloud_regular_storage_class, get_cloud_target_path, get_cloud_target_storage_class, + get_cloud_target_by_bucket, + get_cloud_target_by_bucket_prefix, get_cloud_client, nuke_prefixed_buckets, configured_storage_classes, @@ -10050,6 +10052,212 @@ def test_lifecycle_cloud_transition_large_obj(): expire1_key1_str = prefix + keys[1] verify_object(cloud_client, target_path, expire1_key1_str, data, target_sc) +# Test for per-bucket cloud transition targeting (target_by_bucket=true) +# When target_by_bucket is enabled: +# 1. Each source bucket transitions to a dedicated target bucket +# 2. Object keys are stored without the source bucket name prefix +# 3. Target bucket names follow template: rgwx-${zonegroup}-${storage_class}-${bucket} +@pytest.mark.lifecycle +@pytest.mark.lifecycle_transition +@pytest.mark.cloud_transition +@pytest.mark.cloud_restore +@pytest.mark.target_by_bucket +@pytest.mark.fails_on_aws +@pytest.mark.fails_on_dbstore +def test_lifecycle_cloud_transition_target_by_bucket(): + """ + Test cloud transition with target_by_bucket=true. + + Validates that when target_by_bucket is enabled: + 1. Objects land in a bucket-specific target (not the shared target_path) + 2. Object keys do NOT include the source bucket name as a prefix + 3. Restore can locate and restore objects correctly + """ + cloud_sc = get_cloud_storage_class() + if cloud_sc is None: + pytest.skip('[s3 cloud] section missing cloud_storage_class') + + target_by_bucket = get_cloud_target_by_bucket() + if not target_by_bucket: + pytest.skip('[s3 cloud] target_by_bucket not enabled') + + retain_head_object = get_cloud_retain_head_object() + target_sc = get_cloud_target_storage_class() + target_by_bucket_prefix = get_cloud_target_by_bucket_prefix() + + client = get_client() + cloud_client = get_cloud_client() + lc_interval = get_lc_debug_interval() + restore_period = get_restore_processor_period() + + # Create source bucket with test objects + bucket_name = get_new_bucket() + keys = ['file1.txt', 'subdir/file2.txt'] + + for key in keys: + client.put_object(Bucket=bucket_name, Key=key, Body=key) + + # Configure lifecycle rule for cloud transition + rules = [{'ID': 'rule1', + 'Transitions': [{'Days': 1, 'StorageClass': cloud_sc}], + 'Prefix': '', + 'Status': 'Enabled'}] + lifecycle = {'Rules': rules} + client.put_bucket_lifecycle_configuration(Bucket=bucket_name, LifecycleConfiguration=lifecycle) + + # Verify initial state + response = client.list_objects(Bucket=bucket_name) + init_keys = _get_keys(response) + assert len(init_keys) == len(keys) + + # Wait for transition to complete + time.sleep(15 * lc_interval) + + # Verify objects have transitioned in source bucket + expire_keys = list_bucket_storage_class(client, bucket_name) + if retain_head_object and retain_head_object.lower() == "true": + assert len(expire_keys.get(cloud_sc, [])) == len(keys), \ + f"Expected {len(keys)} objects in {cloud_sc}, got {len(expire_keys.get(cloud_sc, []))}" + + # Derive expected target bucket name + # Default template: rgwx-${zonegroup}-${storage_class}-${bucket} + if target_by_bucket_prefix: + expected_target = target_by_bucket_prefix.replace('${zonegroup}', 'default') + expected_target = expected_target.replace('${storage_class}', cloud_sc.lower()) + expected_target = expected_target.replace('${bucket}', bucket_name) + else: + expected_target = f"rgwx-default-{cloud_sc.lower()}-{bucket_name}" + + # Allow time for cloud operations to complete + time.sleep(5 * lc_interval) + + # Verify objects in target bucket + # With target_by_bucket=true, keys should NOT have bucket_name prefix + for key in keys: + verify_object(cloud_client, expected_target, key, key, target_sc) + + # Verify the old format (with bucket prefix) is NOT used + old_format_key = bucket_name + "/" + key + try: + cloud_client.head_object(Bucket=expected_target, Key=old_format_key) + assert False, f"Found old format key '{old_format_key}' - target_by_bucket not working" + except ClientError as e: + assert e.response['Error']['Code'] in ('404', 'NoSuchKey'), \ + f"Unexpected error: {e}" + + # Test restore functionality + restore_key = keys[0] + + # Verify object is transitioned before attempting restore + verify_transition(client, bucket_name, restore_key, cloud_sc) + + # Delete lifecycle to prevent re-transition after restore + client.delete_bucket_lifecycle(Bucket=bucket_name) + + # Restore object temporarily + client.restore_object(Bucket=bucket_name, Key=restore_key, RestoreRequest={'Days': 2}) + time.sleep(3 * restore_period) + + # Verify object is restored temporarily (storage class stays cloud_sc, but content is accessible) + verify_transition(client, bucket_name, restore_key, cloud_sc) + response = client.head_object(Bucket=bucket_name, Key=restore_key) + assert response['ContentLength'] == len(restore_key) + +@pytest.mark.lifecycle +@pytest.mark.lifecycle_transition +@pytest.mark.cloud_transition +@pytest.mark.cloud_restore +@pytest.mark.target_by_bucket +@pytest.mark.fails_on_aws +@pytest.mark.fails_on_dbstore +def test_lifecycle_cloud_transition_target_by_bucket_multiple_buckets(): + """ + Test that target_by_bucket properly isolates objects between buckets. + Also tests restore functionality for one of the buckets. + """ + cloud_sc = get_cloud_storage_class() + if cloud_sc is None: + pytest.skip('[s3 cloud] section missing cloud_storage_class') + + target_by_bucket = get_cloud_target_by_bucket() + if not target_by_bucket: + pytest.skip('[s3 cloud] target_by_bucket not enabled') + + target_sc = get_cloud_target_storage_class() + target_by_bucket_prefix = get_cloud_target_by_bucket_prefix() + + client = get_client() + cloud_client = get_cloud_client() + lc_interval = get_lc_debug_interval() + restore_period = get_restore_processor_period() + + # Create two source buckets + bucket_a = get_new_bucket() + bucket_b = get_new_bucket() + + key_a = 'only-in-a.txt' + key_b = 'only-in-b.txt' + client.put_object(Bucket=bucket_a, Key=key_a, Body='content-a') + client.put_object(Bucket=bucket_b, Key=key_b, Body='content-b') + + # Configure lifecycle for both buckets + rules = [{'ID': 'rule1', + 'Transitions': [{'Days': 1, 'StorageClass': cloud_sc}], + 'Prefix': '', + 'Status': 'Enabled'}] + lifecycle = {'Rules': rules} + client.put_bucket_lifecycle_configuration(Bucket=bucket_a, LifecycleConfiguration=lifecycle) + client.put_bucket_lifecycle_configuration(Bucket=bucket_b, LifecycleConfiguration=lifecycle) + + # Wait for transitions + time.sleep(20 * lc_interval) + + # Derive expected target bucket names + if target_by_bucket_prefix: + expected_target_a = target_by_bucket_prefix.replace('${zonegroup}', 'default') + expected_target_a = expected_target_a.replace('${storage_class}', cloud_sc.lower()) + expected_target_a = expected_target_a.replace('${bucket}', bucket_a) + expected_target_b = target_by_bucket_prefix.replace('${zonegroup}', 'default') + expected_target_b = expected_target_b.replace('${storage_class}', cloud_sc.lower()) + expected_target_b = expected_target_b.replace('${bucket}', bucket_b) + else: + expected_target_a = f"rgwx-default-{cloud_sc.lower()}-{bucket_a}" + expected_target_b = f"rgwx-default-{cloud_sc.lower()}-{bucket_b}" + + # Verify isolation: target_a should have key_a, NOT key_b + verify_object(cloud_client, expected_target_a, key_a, 'content-a', target_sc) + try: + cloud_client.head_object(Bucket=expected_target_a, Key=key_b) + assert False, f"Isolation violation: '{key_b}' found in target_a" + except ClientError as e: + assert e.response['Error']['Code'] in ('404', 'NoSuchKey'), \ + f"Unexpected error: {e}" + + # Verify isolation: target_b should have key_b, NOT key_a + verify_object(cloud_client, expected_target_b, key_b, 'content-b', target_sc) + try: + cloud_client.head_object(Bucket=expected_target_b, Key=key_a) + assert False, f"Isolation violation: '{key_a}' found in target_b" + except ClientError as e: + assert e.response['Error']['Code'] in ('404', 'NoSuchKey'), \ + f"Unexpected error: {e}" + + # Test restore functionality on bucket_a + # Verify object is transitioned before attempting restore + verify_transition(client, bucket_a, key_a, cloud_sc) + + # Delete lifecycle to prevent re-transition after restore + client.delete_bucket_lifecycle(Bucket=bucket_a) + + # Restore object temporarily + client.restore_object(Bucket=bucket_a, Key=key_a, RestoreRequest={'Days': 2}) + time.sleep(3 * restore_period) + + # Verify object is restored temporarily (storage class stays cloud_sc, but content is accessible) + verify_transition(client, bucket_a, key_a, cloud_sc) + response = client.head_object(Bucket=bucket_a, Key=key_a) + assert response['ContentLength'] == len('content-a') + @pytest.mark.cloud_restore @pytest.mark.fails_on_aws @pytest.mark.fails_on_dbstore