From: Oguzhan Ozmen Date: Mon, 24 Nov 2025 03:12:29 +0000 (+0000) Subject: RGW/test_multi/RGWBucketFullSyncCR: test bucket full sync while source bucket is... X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ce9be9e2a9c1bf23e82ba7dfc7a7caaedfecbfd1;p=ceph-ci.git RGW/test_multi/RGWBucketFullSyncCR: test bucket full sync while source bucket is deleted in the middle Tests: https://tracker.ceph.com/issues/73799 Signed-off-by: Oguzhan Ozmen --- diff --git a/src/rgw/driver/rados/rgw_data_sync.cc b/src/rgw/driver/rados/rgw_data_sync.cc index 04978e3a4a5..af100f02150 100644 --- a/src/rgw/driver/rados/rgw_data_sync.cc +++ b/src/rgw/driver/rados/rgw_data_sync.cc @@ -4669,6 +4669,18 @@ int RGWBucketFullSyncCR::operate(const DoutPrefixProvider *dpp) } return set_cr_error(-ECANCELED); } + // for testing purpose to slow down the execution pace of the this loop + if (cct->_conf->rgw_inject_delay_sec > 0) { + if (std::string_view(cct->_conf->rgw_inject_delay_pattern) == + "delay_bucket_full_sync_loop") { + yield { + utime_t dur; + dur.set_from_double(cct->_conf->rgw_inject_delay_sec); + tn->log(0, SSTR("injecting a delay of " << dur << "s")); + wait(dur); + } + } + } tn->log(20, SSTR("[full sync] syncing object: " << bucket_shard_str{bs} << "/" << entries_iter->key)); entry = &(*entries_iter); diff --git a/src/test/rgw/rgw_multi/tests.py b/src/test/rgw/rgw_multi/tests.py index d1ceddba4ea..ee00e171591 100644 --- a/src/test/rgw/rgw_multi/tests.py +++ b/src/test/rgw/rgw_multi/tests.py @@ -6156,4 +6156,116 @@ def test_object_lock_sync(): assert(response['ObjectLockConfiguration'] == lock_config) - +def test_bucket_full_sync_when_the_bucket_is_deleted_in_the_meantime(): + num_objects_to_upload = ( + 3000 # must be more than 1000 to have pagination at full sync + ) + bucket_full_sync_listing_inject_delay_sec = 100 + bucket_full_sync_listing_inject_delay_pattern = "delay_bucket_full_sync_loop" + + zonegroup = realm.master_zonegroup() + zonegroup_conns = ZonegroupConns(zonegroup) + primary_zone_client_conn = zonegroup_conns.rw_zones[0] + primary_zone_cluster_conn = zonegroup.zones[0] + secondary_zone_cluster_conn = zonegroup.zones[1] + + bucket = primary_zone_client_conn.create_bucket(gen_bucket_name()) + log.info(f"created bucket={bucket.name}") + + log.info(f"disable sync for bucket={bucket.name}") + disable_bucket_sync(realm.meta_master_zone(), bucket.name) + + try: + log.info(f"upload {num_objects_to_upload} objects to bucket={bucket.name}") + num_objects_to_uploaded = 0 + for i in range(num_objects_to_upload): + if i % 100 == 0: + log.debug(f"uploaded {i} objects to bucket={bucket.name}...") + try: + primary_zone_client_conn.s3_client.put_object( + Bucket=bucket.name, Key=f"obj-{i:04d}", Body="..." + ) + num_objects_to_uploaded += 1 + except Exception as e: + log.debug(f"failed to upload object to bucket={bucket.name}: {e}") + log.info( + f"successfully uploaded {num_objects_to_uploaded} objects to bucket={bucket.name}" + ) + + log.info("trim bucket bilog to avoid any incremental sync happening") + primary_zone_cluster_conn.cluster.admin(["bilog", "trim", "--bucket", bucket.name]) + log.info("set rgw_inject_delay_sec and rgw_inject_delay_pattern to slow down bucket full sync") + secondary_zone_cluster_conn.cluster.ceph_admin( + ["config", "set", "client.rgw", "rgw_inject_delay_sec", str(bucket_full_sync_listing_inject_delay_sec)] + ) + secondary_zone_cluster_conn.cluster.ceph_admin( + ["config", "set", "client.rgw", "rgw_inject_delay_pattern", bucket_full_sync_listing_inject_delay_pattern] + ) + log.info("enable bucket sync to initiate full sync") + enable_bucket_sync(realm.meta_master_zone(), bucket.name) + + # Since incremenetal sync is not possible and full sync is stalled, + # we should see that the bucket's sync is stalled. + log.info("verify that bucket sync is stalled") + deadline = time.time() + bucket_full_sync_listing_inject_delay_sec + oldest_inc_change = None + while True: + if time.time() > deadline: + raise Exception("failed to verify the stall of bucket sync") + new_reading = get_oldest_incremental_change_not_applied_epoch( + secondary_zone_cluster_conn + ) + if new_reading is not None: + if oldest_inc_change is None or oldest_inc_change != new_reading: + oldest_inc_change = new_reading + elif ( + oldest_inc_change == new_reading + ): # 2 back-to-back readings are the same + break + time.sleep(10) + log.info( + f"verified that bucket sync is stalled, oldest incremental change not applied epoch: {oldest_inc_change}" + ) + + # while bucket sync is stalled, delete all objects and the bucket. + log.info(f"delete {num_objects_to_upload} objects from bucket={bucket.name}") + for i in range(num_objects_to_upload): + primary_zone_client_conn.s3_client.delete_object( + Bucket=bucket.name, + Key=f"obj-{i:04d}", + ) + log.info(f"delete bucket={bucket.name}") + primary_zone_client_conn.s3_client.delete_bucket(Bucket=bucket.name) + + log.info(f"verify that bucket={bucket.name} is deleted on secondary zone") + zonegroup_meta_checkpoint(zonegroup) + + log.info( + "removing rgw_inject_delay_sec and rgw_inject_delay_pattern to allow bucket full sync to run normally to the completion" + ) + secondary_zone_cluster_conn.cluster.ceph_admin( + ["config", "rm", "client.rgw", "rgw_inject_delay_sec"] + ) + secondary_zone_cluster_conn.cluster.ceph_admin( + ["config", "rm", "client.rgw", "rgw_inject_delay_pattern"] + ) + time.sleep( + bucket_full_sync_listing_inject_delay_sec + ) # wait to make sure bucket sync loop resumes to normal pace + + log.info("wait for data sync to complete") + zonegroup_data_checkpoint(zonegroup_conns) + deadline = config.checkpoint_retries * config.checkpoint_delay + time.time() + while time.time() < deadline: + if ( + get_oldest_incremental_change_not_applied_epoch( + secondary_zone_cluster_conn + ) + is None + ): + break + else: + raise Exception("bucket sync did not complete in time") + except Exception as e: + log.error(f"test_bucket_full_sync_when_the_bucket_is_deleted_in_the_meantime failed: {e}") + raise