From 9742f75720a3d285049c6b818afbedca09eb1323 Mon Sep 17 00:00:00 2001 From: Paul Cuzner Date: Thu, 26 Nov 2020 17:28:10 +1300 Subject: [PATCH] cephadm: use 2xx error codes instead 5xx When a thread fails we still need to see content from the other threads. When a 5xx response is received the payload is absent - so by using 200, 204 and 206 we can understand thread health from the status code, saving a 500 response to indicate all threads are dead! In addition when a thread dies the data payload in the cache is erased to prevent stale information being returned. Signed-off-by: Paul Cuzner --- src/cephadm/cephadm | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index e6711f22fc5e0..46a62d5b9d961 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -5688,29 +5688,31 @@ td,th {{ assert tasks # We're using the http status code to help indicate thread health - # - all threads inactive returns a 500 (Internal Server Error) - # - some threads inactive returns a 503 (Service Unavailable) + # - 200 (OK): request successful + # - 204 (No Content): access to a cache relating to a dead thread + # - 206 (Partial content): one or more theads are inactive + # - 500 (Server Error): all threads inactive if u == 'metadata': data = json.dumps(self.server.cephadm_cache.to_json()) if all([tasks[task_name] == 'inactive' for task_name in tasks if task_name != 'http_server']): - # the subtasks are dead + # All the subtasks are dead! status_code = 500 elif any([tasks[task_name] == 'inactive' for task_name in tasks if task_name != 'http_server']): - status_code = 503 + status_code = 206 # Individual GETs against the a tasks endpoint will also return a 503 if the corresponding thread is inactive elif u == 'daemons': data = json.dumps(self.server.cephadm_cache.daemons) if tasks['daemons'] == 'inactive': - status_code = 503 + status_code = 204 elif u == 'disks': data = json.dumps(self.server.cephadm_cache.disks) if tasks['disks'] == 'inactive': - status_code = 503 + status_code = 204 elif u == 'host': data = json.dumps(self.server.cephadm_cache.host) if tasks['host'] == 'inactive': - status_code = 503 + status_code = 204 # a GET against health will always return a 200, since the op is always successful elif u == 'health': @@ -5839,13 +5841,16 @@ class CephadmDaemon(): def _handle_thread_exception(self, exc, thread_type): e_msg = f"{exc.__class__.__name__} exception: {str(exc)}" - errors = [e_msg] + thread_info = getattr(self.cephadm_cache, thread_type) + errors = thread_info.get('scrape_errors', []) + errors.append(e_msg) logger.error(e_msg) - logger.exception(e) + logger.exception(exc) self.cephadm_cache.update_task( thread_type, { "scrape_errors": errors, + "data": None, } ) -- 2.39.5