From ebd81b016b0c833732e18764a09258826ab8ea4d Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Wed, 17 Dec 2014 21:20:21 -0800 Subject: [PATCH] full data sync: treat 404 from bucket list as success Since results are paged, the exception may not occur immediately, so add a new type to distinguish it from other client exceptions, and catch it at any point during full sync of a bucket. Signed-off-by: Josh Durgin --- radosgw_agent/client.py | 16 ++++++++++++++-- radosgw_agent/tests/test_worker.py | 11 +++++++++++ radosgw_agent/worker.py | 20 ++++++++++---------- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/radosgw_agent/client.py b/radosgw_agent/client.py index f7343eb..42a03ec 100644 --- a/radosgw_agent/client.py +++ b/radosgw_agent/client.py @@ -59,6 +59,8 @@ class InvalidZone(ClientException): pass class ZoneNotFound(ClientException): pass +class BucketEmpty(ClientException): + pass def parse_endpoint(endpoint): url = urlparse(endpoint) @@ -205,8 +207,18 @@ def get_bucket_list(connection): def list_objects_in_bucket(connection, bucket_name): # use the boto library to do this bucket = connection.get_bucket(bucket_name) - for key in bucket.list(): - yield key.name + try: + for key in bucket.list(): + yield key.name + except boto.exception.S3ResponseError as e: + # since this is a generator, the exception will be raised when + # it's read, rather than when this call returns, so raise a + # unique exception to distinguish this from client errors from + # other calls + if e.status == 404: + raise BucketEmpty() + else: + raise @boto_call diff --git a/radosgw_agent/tests/test_worker.py b/radosgw_agent/tests/test_worker.py index 195bedf..2d1e486 100644 --- a/radosgw_agent/tests/test_worker.py +++ b/radosgw_agent/tests/test_worker.py @@ -160,3 +160,14 @@ class TestSyncObject(object): exc_message = exc.exconly() assert 'state is error' in exc_message + + def test_sync_bucket_delayed_not_found(self): + class fake_iterable(object): + def __iter__(self): + raise client.BucketEmpty + with patch('radosgw_agent.worker.client', self.client): + w = worker.DataWorker(None, None, None, self.src, None, daemon_id=1) + w.sync_object = lambda *a: None + objects = fake_iterable() + with py.test.raises(client.BucketEmpty): + w.sync_bucket('foo', objects) diff --git a/radosgw_agent/worker.py b/radosgw_agent/worker.py index c259ffb..d46061e 100644 --- a/radosgw_agent/worker.py +++ b/radosgw_agent/worker.py @@ -364,17 +364,17 @@ class DataWorkerFull(DataWorker): log.debug('bucket instance is "%s" with marker %s', instance, marker) objects = client.list_objects_in_bucket(self.src_conn, bucket) - if not objects: - return True - except Exception as e: - log.error('error preparing for full sync of bucket "%s": %s', - bucket, e) - return False - - retries = self.sync_bucket(bucket, objects) + retries = self.sync_bucket(bucket, objects) - result = self.set_bound(instance, marker, retries, 'bucket-index') - return not retries and result == RESULT_SUCCESS + result = self.set_bound(instance, marker, retries, 'bucket-index') + return not retries and result == RESULT_SUCCESS + except client.BucketEmpty: + log.debug('no objects in bucket %s', bucket) + return True + except Exception: + log.exception('error preparing for full sync of bucket "%s"', + bucket) + return False def run(self): self.prepare_lock() -- 2.47.3