From ff72f18b5b199408b3789a5da500d5e6d4fbc74a Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Wed, 22 Nov 2023 14:27:09 +0000 Subject: [PATCH] node-proxy: run only when idrac details provided This agent shouldn't run when no idrac details are available. Signed-off-by: Guillaume Abrioux --- src/cephadm/cephadm.py | 59 +++++++++++++++++------ src/cephadm/cephadmlib/node_proxy/main.py | 8 +++ src/pybind/mgr/cephadm/agent.py | 8 +-- 3 files changed, 57 insertions(+), 18 deletions(-) diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index 8ee75cb65203e..95735512963ff 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -30,7 +30,7 @@ from io import StringIO from threading import Thread, Event from urllib.request import urlopen, Request from pathlib import Path -from cephadmlib.node_proxy.main import NodeProxy +from cephadmlib.node_proxy.main import NodeProxy, NodeProxyInitialization, NodeProxyFetchIdracError from cephadmlib.constants import ( # default images @@ -1358,6 +1358,7 @@ class CephadmAgent(DaemonForm): self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0] self.recent_iteration_index: int = 0 self.cached_ls_values: Dict[str, Dict[str, str]] = {} + self.t_node_proxy: Optional["NodeProxy"] = None def validate(self, config: Dict[str, str] = {}) -> None: # check for the required files @@ -1456,22 +1457,51 @@ class CephadmAgent(DaemonForm): port: str = '', data: Optional[Union[Dict[str, str], str]] = None, endpoint: str = '', - ssl_ctx: Optional[Any] = None) -> str: + ssl_ctx: Optional[Any] = None, + timeout: Optional[int] = 10) -> Tuple[int, str]: _addr = addr if addr else self.target_ip _port = port if port else self.target_port url = f'https://{_addr}:{_port}{endpoint}' - + logger.info(f"sending query to {url}") try: req = Request(url, data, {'Content-Type': 'application/json'}) send_time = time.monotonic() - with urlopen(req, context=ssl_ctx) as response: + with urlopen(req, context=ssl_ctx, timeout=timeout) as response: response_str = response.read() response_json = json.loads(response_str) total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds() logger.info(f'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.') + response_status = response.status + except HTTPError as e: + logger.debug(f"{e.code} {e.reason}") + response_status = e.code + response_str = e.reason + except URLError as e: + logger.debug(f"{e.reason}") + response_status = -1 + response_str = e.reason except Exception: raise - return response_str + return (response_status, response_str) + + def node_proxy_loop_check(self, ssl_ctx: Any) -> None: + while True: + try: + if isinstance(self.t_node_proxy, NodeProxy): + status = self.t_node_proxy.check_status() + label = 'Ok' if status else 'Critical' + logger.debug(f'node-proxy status: {label}') + else: + raise NodeProxyInitialization("starting node-proxy...") + except Exception as e: + logger.error(f'node-proxy not running: {e.__class__.__name__}: {e}') + try: + self.init_node_proxy(ssl_ctx) + except NodeProxyFetchIdracError: + logger.info("No iDrac details could be loaded. " + "Aborting node-proxy initialization. " + "Will retry in 120s.") + time.sleep(120) def init_node_proxy(self, ssl_ctx: Any) -> None: node_proxy_meta = { @@ -1480,9 +1510,13 @@ class CephadmAgent(DaemonForm): 'secret': self.keyring } } - result = self.query_endpoint(data=json.dumps(node_proxy_meta).encode('ascii'), - endpoint='/node-proxy/idrac', - ssl_ctx=ssl_ctx) + status, result = self.query_endpoint(data=json.dumps(node_proxy_meta).encode('ascii'), + endpoint='/node-proxy/idrac', + ssl_ctx=ssl_ctx) + if status != 200: + msg = f"Couldn't load iDrac details: {status}, {result}" + logger.debug(msg) + raise NodeProxyFetchIdracError(msg) result_json = json.loads(result) kwargs = { 'host': result_json['result']['addr'], @@ -1529,15 +1563,10 @@ class CephadmAgent(DaemonForm): self.volume_gatherer.start() # initiate node-proxy thread - self.init_node_proxy(ssl_ctx) + node_proxy_loop_thread = Thread(target=self.node_proxy_loop_check, args=(ssl_ctx,)) + node_proxy_loop_thread.start() while not self.stop: - try: - _mapper = {True: 'Ok', False: 'Critical'} - logger.debug(f'node-proxy status: {_mapper[self.t_node_proxy.check_status()]}') - except Exception as e: - logger.error(f'node-proxy failure: {e.__class__.__name__}: {e}') - self.init_node_proxy(ssl_ctx) start_time = time.monotonic() ack = self.ack diff --git a/src/cephadm/cephadmlib/node_proxy/main.py b/src/cephadm/cephadmlib/node_proxy/main.py index e9c99f0983dcb..106d3b447bcc1 100644 --- a/src/cephadm/cephadmlib/node_proxy/main.py +++ b/src/cephadm/cephadmlib/node_proxy/main.py @@ -26,6 +26,14 @@ DEFAULT_CONFIG = { } +class NodeProxyInitialization(Exception): + pass + + +class NodeProxyFetchIdracError(Exception): + pass + + @cherrypy.tools.auth_basic(on=True) @cherrypy.tools.allow(methods=['PUT']) @cherrypy.tools.json_out() diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py index 50652867e74bb..b8453f719af85 100644 --- a/src/pybind/mgr/cephadm/agent.py +++ b/src/pybind/mgr/cephadm/agent.py @@ -128,7 +128,8 @@ class NodeProxy: host = data["cephx"]["name"] results['result'] = self.mgr.node_proxy.idrac.get(host) - + if not results['result']: + raise cherrypy.HTTPError(400, 'The provided host has no iDrac details.') return results def validate_node_proxy_data(self, data: Dict[str, Any]) -> None: @@ -258,7 +259,8 @@ class NodeProxy: headers: Optional[Dict[str, str]] = {}, data: Optional[bytes] = None, endpoint: str = '', - ssl_ctx: Optional[Any] = None) -> Tuple[int, Dict[str, Any]]: + ssl_ctx: Optional[Any] = None, + timeout: Optional[int] = 10) -> Tuple[int, Dict[str, Any]]: url = f'https://{addr}:{port}{endpoint}' _headers = headers response_json = {} @@ -268,7 +270,7 @@ class NodeProxy: _data = bytes(data, 'ascii') if data else None try: req = Request(url, _data, _headers, method=method) - with urlopen(req, context=ssl_ctx) as response: + with urlopen(req, context=ssl_ctx, timeout=timeout) as response: response_str = response.read() response_json = json.loads(response_str) response_status = response.status -- 2.39.5