]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
RGW/test_multi/RGWBucketFullSyncCR: test bucket full sync while source bucket is...
authorOguzhan Ozmen <oozmen@bloomberg.net>
Mon, 24 Nov 2025 03:12:29 +0000 (03:12 +0000)
committerOguzhan Ozmen <oozmen@bloomberg.net>
Mon, 24 Nov 2025 03:12:29 +0000 (03:12 +0000)
Tests: https://tracker.ceph.com/issues/73799
Signed-off-by: Oguzhan Ozmen <oozmen@bloomberg.net>
src/rgw/driver/rados/rgw_data_sync.cc
src/test/rgw/rgw_multi/tests.py

index 04978e3a4a5388000ded20f552e77670c9300eec..af100f0215029640acab6710caeafa37c8ca9ad2 100644 (file)
@@ -4669,6 +4669,18 @@ int RGWBucketFullSyncCR::operate(const DoutPrefixProvider *dpp)
           }
           return set_cr_error(-ECANCELED);
         }
+        // for testing purpose to slow down the execution pace of the this loop
+        if (cct->_conf->rgw_inject_delay_sec > 0) {
+          if (std::string_view(cct->_conf->rgw_inject_delay_pattern) ==
+              "delay_bucket_full_sync_loop") {
+            yield {
+              utime_t dur;
+              dur.set_from_double(cct->_conf->rgw_inject_delay_sec);
+              tn->log(0, SSTR("injecting a delay of " << dur << "s"));
+              wait(dur);
+            }
+          }
+        }
         tn->log(20, SSTR("[full sync] syncing object: "
             << bucket_shard_str{bs} << "/" << entries_iter->key));
         entry = &(*entries_iter);
index d1ceddba4eab61287d9e830dc70c8ed4c44c8281..ee00e171591d52d24fa713f74e9497ba42f7a9aa 100644 (file)
@@ -6156,4 +6156,116 @@ def test_object_lock_sync():
     assert(response['ObjectLockConfiguration'] == lock_config)
 
 
-    
+def test_bucket_full_sync_when_the_bucket_is_deleted_in_the_meantime():
+    num_objects_to_upload = (
+        3000  # must be more than 1000 to have pagination at full sync
+    )
+    bucket_full_sync_listing_inject_delay_sec = 100
+    bucket_full_sync_listing_inject_delay_pattern = "delay_bucket_full_sync_loop"
+
+    zonegroup = realm.master_zonegroup()
+    zonegroup_conns = ZonegroupConns(zonegroup)
+    primary_zone_client_conn = zonegroup_conns.rw_zones[0]
+    primary_zone_cluster_conn = zonegroup.zones[0]
+    secondary_zone_cluster_conn = zonegroup.zones[1]
+
+    bucket = primary_zone_client_conn.create_bucket(gen_bucket_name())
+    log.info(f"created bucket={bucket.name}")
+
+    log.info(f"disable sync for bucket={bucket.name}")
+    disable_bucket_sync(realm.meta_master_zone(), bucket.name)
+
+    try:
+        log.info(f"upload {num_objects_to_upload} objects to bucket={bucket.name}")
+        num_objects_to_uploaded = 0
+        for i in range(num_objects_to_upload):
+            if i % 100 == 0:
+                log.debug(f"uploaded {i} objects to bucket={bucket.name}...")
+            try:
+                primary_zone_client_conn.s3_client.put_object(
+                    Bucket=bucket.name, Key=f"obj-{i:04d}", Body="..."
+                )
+                num_objects_to_uploaded += 1
+            except Exception as e:
+                log.debug(f"failed to upload object to bucket={bucket.name}: {e}")
+        log.info(
+            f"successfully uploaded {num_objects_to_uploaded} objects to bucket={bucket.name}"
+        )
+
+        log.info("trim bucket bilog to avoid any incremental sync happening")
+        primary_zone_cluster_conn.cluster.admin(["bilog", "trim", "--bucket", bucket.name])
+        log.info("set rgw_inject_delay_sec and rgw_inject_delay_pattern to slow down bucket full sync")
+        secondary_zone_cluster_conn.cluster.ceph_admin(
+            ["config", "set", "client.rgw", "rgw_inject_delay_sec", str(bucket_full_sync_listing_inject_delay_sec)]
+        )
+        secondary_zone_cluster_conn.cluster.ceph_admin(
+            ["config", "set", "client.rgw", "rgw_inject_delay_pattern", bucket_full_sync_listing_inject_delay_pattern]
+        )
+        log.info("enable bucket sync to initiate full sync")
+        enable_bucket_sync(realm.meta_master_zone(), bucket.name)
+
+        # Since incremenetal sync is not possible and full sync is stalled,
+        # we should see that the bucket's sync is stalled.
+        log.info("verify that bucket sync is stalled")
+        deadline = time.time() + bucket_full_sync_listing_inject_delay_sec
+        oldest_inc_change = None
+        while True:
+            if time.time() > deadline:
+                raise Exception("failed to verify the stall of bucket sync")
+            new_reading = get_oldest_incremental_change_not_applied_epoch(
+                secondary_zone_cluster_conn
+            )
+            if new_reading is not None:
+                if oldest_inc_change is None or oldest_inc_change != new_reading:
+                    oldest_inc_change = new_reading
+                elif (
+                    oldest_inc_change == new_reading
+                ):  # 2 back-to-back readings are the same
+                    break
+            time.sleep(10)
+        log.info(
+            f"verified that bucket sync is stalled, oldest incremental change not applied epoch: {oldest_inc_change}"
+        )
+
+        # while bucket sync is stalled, delete all objects and the bucket.
+        log.info(f"delete {num_objects_to_upload} objects from bucket={bucket.name}")
+        for i in range(num_objects_to_upload):
+            primary_zone_client_conn.s3_client.delete_object(
+                Bucket=bucket.name,
+                Key=f"obj-{i:04d}",
+            )
+        log.info(f"delete bucket={bucket.name}")
+        primary_zone_client_conn.s3_client.delete_bucket(Bucket=bucket.name)
+
+        log.info(f"verify that bucket={bucket.name} is deleted on secondary zone")
+        zonegroup_meta_checkpoint(zonegroup)
+
+        log.info(
+            "removing rgw_inject_delay_sec and rgw_inject_delay_pattern to allow bucket full sync to run normally to the completion"
+        )
+        secondary_zone_cluster_conn.cluster.ceph_admin(
+            ["config", "rm", "client.rgw", "rgw_inject_delay_sec"]
+        )
+        secondary_zone_cluster_conn.cluster.ceph_admin(
+            ["config", "rm", "client.rgw", "rgw_inject_delay_pattern"]
+        )
+        time.sleep(
+            bucket_full_sync_listing_inject_delay_sec
+        )  # wait to make sure bucket sync loop resumes to normal pace
+
+        log.info("wait for data sync to complete")
+        zonegroup_data_checkpoint(zonegroup_conns)
+        deadline = config.checkpoint_retries * config.checkpoint_delay + time.time()
+        while time.time() < deadline:
+            if (
+                get_oldest_incremental_change_not_applied_epoch(
+                    secondary_zone_cluster_conn
+                )
+                is None
+            ):
+                break
+        else:
+            raise Exception("bucket sync did not complete in time")
+    except Exception as e:
+        log.error(f"test_bucket_full_sync_when_the_bucket_is_deleted_in_the_meantime failed: {e}")
+        raise