From 2e3b1f94fccb29582affddbe4823d74c544be614 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Wed, 17 Dec 2014 21:49:35 -0800 Subject: [PATCH] worker: check op state for progress on any HTTP error 500 can be caused by an fcgi timeout when the operation still ends up succeeding. It doesn't hurt to check for op state being in progress in general in case other error codes happen but don't indicate actual failure of the copy, since wait_for_object fails immediately if the op state is not in progress. Signed-off-by: Josh Durgin --- radosgw_agent/tests/test_worker.py | 34 ++---------------------------- radosgw_agent/worker.py | 11 +++------- 2 files changed, 5 insertions(+), 40 deletions(-) diff --git a/radosgw_agent/tests/test_worker.py b/radosgw_agent/tests/test_worker.py index 2d1e486..093a411 100644 --- a/radosgw_agent/tests/test_worker.py +++ b/radosgw_agent/tests/test_worker.py @@ -50,18 +50,13 @@ class TestSyncObject(object): with py.test.raises(worker.SyncFailed): w.sync_object('mah-bucket', 'mah-object') - def test_syncs_encounters_a_transient_http_error(self): + def test_syncs_encounters_a_http_error(self): self.client.sync_object_intra_region = Mock(side_effect=client.HttpError(400, '')) with patch('radosgw_agent.worker.client', self.client): w = worker.DataWorker(None, None, None, self.src, None, daemon_id=1) w.wait_for_object = lambda *a: None - - with py.test.raises(worker.SyncFailed) as exc: - w.sync_object('mah-bucket', 'mah-object') - - exc_message = exc.value[0] - assert 'HTTP error with status: 400' in exc_message + w.sync_object('mah-bucket', 'mah-object') def test_sync_client_raises_sync_failed(self): self.client.sync_object_intra_region = Mock(side_effect=worker.SyncFailed('failed intra region')) @@ -75,19 +70,6 @@ class TestSyncObject(object): exc_message = exc.value[0] assert 'failed intra region' in exc_message - def test_syncs_encounters_a_critical_http_error(self): - self.client.sync_object_intra_region = Mock(side_effect=client.HttpError(500, 'Internal Server Error')) - - with patch('radosgw_agent.worker.client', self.client): - w = worker.DataWorker(None, None, None, self.src, None, daemon_id=1) - w.wait_for_object = lambda *a: None - - with py.test.raises(worker.SyncFailed) as exc: - w.sync_object('mah-bucket', 'mah-object') - - exc_message = exc.exconly() - assert 'HTTP error with status: 500' in exc_message - def test_fails_to_remove_op_state(self, capsys): # really tricky to test this one, we are forced to just use `capsys` from py.test # which will allow us to check into the stderr logging output and see if the agent @@ -115,18 +97,6 @@ class TestSyncObject(object): w.wait_for_object = lambda *a: None assert w.sync_object('mah-bucket', 'mah-object') is True - def test_fails_so_found_is_still_false(self): - self.client.sync_object_intra_region = Mock(side_effect=ValueError('severe error')) - - with patch('radosgw_agent.worker.client', self.client): - w = worker.DataWorker(None, None, None, self.src, None, daemon_id=1) - - # we intersect this dude so that we know it should not be called - # by making it raise an exception if it does - msg = 'should not have called wait_for_object' - w.wait_for_object = Mock(side_effect=AssertionError(msg)) - assert w.sync_object('mah-bucket', 'mah-object') is True - def test_wait_for_object_state_not_found_raises_sync_failed(self): self.client.get_op_state = Mock(side_effect=client.NotFound(404, '')) with patch('radosgw_agent.worker.client', self.client): diff --git a/radosgw_agent/worker.py b/radosgw_agent/worker.py index 4d7989c..2b714c0 100644 --- a/radosgw_agent/worker.py +++ b/radosgw_agent/worker.py @@ -195,17 +195,12 @@ class DataWorker(Worker): msg = 'could not delete "%s/%s" from secondary' % (bucket, obj) log.exception(msg) raise SyncFailed(msg) - except client.HttpError as e: - # if we have a non-critical Http error, raise a SyncFailed - # so that we can retry this. The Gateway may be returning 400's - msg = 'encountered an HTTP error with status: %s' % e.str_code - raise SyncFailed(msg) except SyncFailed: raise except Exception as e: - log.exception('encountered an exception during sync') - if found: - self.wait_for_object(bucket, obj, until, local_op_id) + log.warn('encountered an exception during sync', exc_info=True) + # wait for it if the op state is in-progress + self.wait_for_object(bucket, obj, until, local_op_id) # TODO: clean up old op states try: if found: -- 2.47.3