From f59a3580ee2283be9bc6f00fcb9496f1f35667ea Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Wed, 11 May 2022 18:39:08 -0400
Subject: [PATCH] test/rgw: Add incremental test of bucket sync run

This tests for iterating properly over the generations.

1. Create a bucket and write some objects to it. Wait for sync to
   complete. This ensures we are in Incremental.
2. Turn off the secondary `radosgw`.
3. Manually reshard. Then continue writing objects and resharding.
4. Choose objects so that each generation has objects in many but not
   all shards.
5. After building up several generations, run `bucket sync run` on the
   secondary.
6. Verify that all objects on the primary are on the secondary.

Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
 src/test/rgw/rgw_multi/tests.py | 111 ++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)

diff --git a/src/test/rgw/rgw_multi/tests.py b/src/test/rgw/rgw_multi/tests.py
index d9a50005fd117..881594bb83a2b 100644
--- a/src/test/rgw/rgw_multi/tests.py
+++ b/src/test/rgw/rgw_multi/tests.py
@@ -8,6 +8,7 @@ import errno
 import dateutil.parser
 
 from itertools import combinations
+from itertools import zip_longest
 from io import StringIO
 
 import boto
@@ -1479,3 +1480,113 @@ def test_bucket_creation_time():
         for a, b in zip(z1, z2):
             eq(a.name, b.name)
             eq(a.creation_date, b.creation_date)
+
+def get_bucket_shard_objects(zone, num_shards):
+    """
+    Get one object for each shard of the bucket index log
+    """
+    cmd = ['bucket', 'shard', 'objects'] + zone.zone_args()
+    cmd += ['--num-shards', str(num_shards)]
+    shardobjs_json, ret = zone.cluster.admin(cmd, read_only=True)
+    assert ret == 0
+    shardobjs = json.loads(shardobjs_json)
+    return shardobjs['objs']
+
+def write_most_shards(zone, bucket_name, num_shards):
+    """
+    Write one object to most (but not all) bucket index shards.
+    """
+    objs = get_bucket_shard_objects(zone.zone, num_shards)
+    random.shuffle(objs)
+    del objs[-(len(objs)//10):]
+    for obj in objs:
+        k = new_key(zone, bucket_name, obj)
+        k.set_contents_from_string('foo')
+
+def reshard_bucket(zone, bucket_name, num_shards):
+    """
+    Reshard a bucket
+    """
+    cmd = ['bucket', 'reshard'] + zone.zone_args()
+    cmd += ['--bucket', bucket_name]
+    cmd += ['--num-shards', str(num_shards)]
+    cmd += ['--yes-i-really-mean-it']
+    zone.cluster.admin(cmd)
+
+def get_obj_names(zone, bucket_name, maxobjs):
+    """
+    Get names of objects in a bucket.
+    """
+    cmd = ['bucket', 'list'] + zone.zone_args()
+    cmd += ['--bucket', bucket_name]
+    cmd += ['--max-entries', str(maxobjs)]
+    objs_json, _ = zone.cluster.admin(cmd, read_only=True)
+    objs = json.loads(objs_json)
+    return [o['name'] for o in objs]
+
+def bucket_keys_eq(zone1, zone2, bucket_name):
+    """
+    Ensure that two buckets have the same keys, but get the lists through
+    radosgw-admin rather than S3 so it can be used when radosgw isn't running.
+    Only works for buckets of 10,000 objects since the tests calling it don't
+    need more, and the output from bucket list doesn't have an obvious marker
+    with which to continue.
+    """
+    keys1 = get_obj_names(zone1, bucket_name, 10000)
+    keys2 = get_obj_names(zone2, bucket_name, 10000)
+    for key1, key2 in zip_longest(keys1, keys2):
+        if key1 is None:
+            log.critical('key=%s is missing from zone=%s', key1.name,
+                         zone1.name)
+            assert False
+        if key2 is None:
+            log.critical('key=%s is missing from zone=%s', key2.name,
+                         zone2.name)
+            assert False
+
+@attr('bucket_reshard')
+def test_bucket_sync_run_basic_incremental():
+    """
+    Create several generations of objects, then run bucket sync
+    run to ensure they're all processed.
+    """
+    zonegroup = realm.master_zonegroup()
+    zonegroup_conns = ZonegroupConns(zonegroup)
+    primary = zonegroup_conns.rw_zones[0]
+
+    # create a bucket write objects to it and wait for them to sync, ensuring
+    # we are in incremental.
+    bucket = primary.create_bucket(gen_bucket_name())
+    log.debug('created bucket=%s', bucket.name)
+    zonegroup_meta_checkpoint(zonegroup)
+    write_most_shards(primary, bucket.name, 11)
+    zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name)
+
+    try:
+        # stop gateways in other zones so we can rely on bucket sync run
+        for secondary in zonegroup_conns.rw_zones[1:]:
+            secondary.zone.stop()
+
+        # build up multiple generations each with some objects written to
+        # them.
+        generations = [17, 19, 23, 29, 31, 37]
+        for num_shards in generations:
+            reshard_bucket(primary.zone, bucket.name, num_shards)
+            write_most_shards(primary, bucket.name, num_shards)
+
+        # bucket sync run on every secondary
+        for secondary in zonegroup_conns.rw_zones[1:]:
+            cmd = ['bucket', 'sync', 'run'] + secondary.zone.zone_args()
+            cmd += ['--bucket', bucket.name, '--source-zone', primary.name]
+            secondary.zone.cluster.admin(cmd)
+
+            bucket_keys_eq(primary.zone, secondary.zone, bucket.name)
+
+    finally:
+        # Restart so bucket_checkpoint can actually fetch things from the
+        # secondaries. Put this in a finally block so they restart even on
+        # error.
+        for secondary in zonegroup_conns.rw_zones[1:]:
+            secondary.zone.start()
+
+    zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name)
-- 
2.39.5