]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
node-proxy: send oob management requests to the MgrListener()
authorGuillaume Abrioux <gabrioux@ibm.com>
Thu, 7 Dec 2023 14:20:43 +0000 (14:20 +0000)
committerGuillaume Abrioux <gabrioux@ibm.com>
Thu, 25 Jan 2024 15:07:21 +0000 (15:07 +0000)
Note that this won't be a true out of band management.
In the case where the host hangs, this won't work. The oob
management should be reached directly but most of the time
the oob network is isolated. The idea is to send queries to the
the tcp server exposed by the cephadm agent (MgrListener) so it
can send itself queries to the redfish API using the IP address
exposed on the OS.

Signed-off-by: Guillaume Abrioux <gabrioux@ibm.com>
src/cephadm/cephadm.py
src/cephadm/cephadmlib/node_proxy/redfishdellsystem.py
src/pybind/mgr/cephadm/agent.py

index 879aec568b5b13b5f2da06206ee62b31ef3aeb64..1fc5768ca968efd709d632734e021a47dc70c9df 100755 (executable)
@@ -1287,13 +1287,28 @@ class MgrListener(Thread):
                             self.agent.ls_gatherer.wakeup()
                             self.agent.volume_gatherer.wakeup()
                             logger.debug(f'Got mgr message {data}')
+                        if 'node_proxy_oob_cmd' in data:
+                            if data['node_proxy_oob_cmd']['action'] in ['get_led', 'set_led']:
+                                conn.send(bytes(json.dumps(self.node_proxy_oob_cmd_result), 'utf-8'))
             except Exception as e:
                 logger.error(f'Mgr Listener encountered exception: {e}')
 
     def shutdown(self) -> None:
         self.stop = True
 
+    def validate_node_proxy_payload(self, data: Dict[str, Any]) -> None:
+        if 'action' not in data.keys():
+            raise RuntimeError('node-proxy oob command needs an action.')
+        if data['action'] in ['get_led', 'set_led']:
+            fields = ['type', 'id']
+            if data['type'] not in ['chassis', 'drive']:
+                raise RuntimeError('the LED type must be either "chassis" or "drive".')
+            for field in fields:
+                if field not in data.keys():
+                    raise RuntimeError('Received invalid node-proxy cmd.')
+
     def handle_json_payload(self, data: Dict[Any, Any]) -> None:
+        self.node_proxy_oob_cmd_result: Dict[str, Any] = {}
         if 'counter' in data:
             self.agent.ack = int(data['counter'])
             if 'config' in data:
@@ -1307,7 +1322,24 @@ class MgrListener(Thread):
                 self.agent.pull_conf_settings()
                 self.agent.wakeup()
         elif 'node_proxy_shutdown' in data:
+            logger.info('Received node_proxy_shutdown command.')
             self.agent.shutdown()
+        elif 'node_proxy_oob_cmd' in data:
+            node_proxy_cmd: Dict[str, Any] = data['node_proxy_oob_cmd']
+            try:
+                self.validate_node_proxy_payload(node_proxy_cmd)
+            except RuntimeError as e:
+                logger.error(f"Couldn't validate node-proxy payload:\n{node_proxy_cmd}\n{e}")
+                raise
+            logger.info(f'Received node_proxy_oob_cmd command: {node_proxy_cmd}')
+            if node_proxy_cmd['action'] == 'get_led':
+                if node_proxy_cmd['type'] == 'chassis':
+                    self.node_proxy_oob_cmd_result = self.agent.node_proxy_mgr.node_proxy.system.get_chassis_led()
+            if node_proxy_cmd['action'] == 'set_led':
+                if node_proxy_cmd['type'] == 'chassis':
+                    _data: Dict[str, Any] = json.loads(node_proxy_cmd['data'])
+                    _result: int = self.agent.node_proxy_mgr.node_proxy.system.set_chassis_led(_data)
+                    self.node_proxy_oob_cmd_result = {'http_code': _result}
         else:
             raise RuntimeError('No valid data received.')
 
@@ -1354,7 +1386,7 @@ class NodeProxyManager(Thread):
         if result_json['result'].get('port'):
             kwargs['port'] = result_json['result']['port']
 
-        self.node_proxy = NodeProxy(**kwargs)
+        self.node_proxy: NodeProxy = NodeProxy(**kwargs)
         self.node_proxy.start()
 
     def loop(self) -> None:
index 12d2466a88ff775cdd27002cbd2f64e6d31e9a3a..71006c48bf866ac4feac20b1cefa387b12cd18e4 100644 (file)
@@ -1,3 +1,4 @@
+import json
 from .baseredfishsystem import BaseRedfishSystem
 from .util import Logger, normalize_dict, to_snake_case
 from typing import Dict, Any, List
@@ -160,3 +161,26 @@ class RedfishDellSystem(BaseRedfishSystem):
         self._sys['firmwares'] = self.build_common_data(data=self._system['UpdateService'],
                                                         fields=fields,
                                                         path='FirmwareInventory')
+
+    def get_chassis_led(self) -> Dict[str, Any]:
+        endpoint = f'/redfish/v1/{self.chassis_endpoint}'
+        result = self.client.query(method='GET',
+                                   endpoint=endpoint,
+                                   timeout=10)
+        response_json = json.loads(result[1])
+        _result: Dict[str, Any] = {'http_code': result[2]}
+        if result[2] == 200:
+            _result['LocationIndicatorActive'] = response_json['LocationIndicatorActive']
+        else:
+            _result['LocationIndicatorActive'] = None
+        return _result
+
+    def set_chassis_led(self, data: Dict[str, str]) -> int:
+        # '{"IndicatorLED": "Lit"}'      -> LocationIndicatorActive = false
+        # '{"IndicatorLED": "Blinking"}' -> LocationIndicatorActive = true
+        _, response, status = self.client.query(
+            data=json.dumps(data),
+            method='PATCH',
+            endpoint=f'/redfish/v1{self.chassis_endpoint}'
+        )
+        return status
index 49f45bffef4235efd6a8dcb67f2300e9469ff462..d2a155d5de0cacebc30f1987fe45989814991328 100644 (file)
@@ -22,11 +22,8 @@ from ceph.deployment.service_spec import ServiceSpec, PlacementSpec
 from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
 from cephadm.ssl_cert_utils import SSLCerts
 from mgr_util import test_port_allocation, PortAlreadyInUse
-from urllib.request import urlopen, Request
-from urllib.error import HTTPError, URLError
-from contextlib import contextmanager
 
-from typing import Any, Dict, List, Set, Tuple, TYPE_CHECKING, Optional, Generator
+from typing import Any, Dict, List, Set, TYPE_CHECKING, Optional
 
 if TYPE_CHECKING:
     from cephadm.module import CephadmOrchestrator
@@ -267,103 +264,6 @@ class NodeProxy:
         self.mgr.node_proxy.save(host, data['patch'])
         self.raise_alert(data)
 
-    def login(self,
-              addr: str,
-              username: str,
-              password: str,
-              port: str = '443') -> None:
-        self.mgr.log.info("Logging in to "
-                          f"{addr}:{port} as '{username}'")
-        oob_credentials = json.dumps({"UserName": username,
-                                      "Password": password})
-        headers = {"Content-Type": "application/json"}
-
-        try:
-            _status_code, _data, _headers = self.query(addr=addr,
-                                                       port=port,
-                                                       data=bytes(oob_credentials, 'ascii'),
-                                                       headers=headers,
-                                                       endpoint="/redfish/v1/SessionService/Sessions/",
-                                                       method="POST")
-            if _status_code != 201:
-                self.mgr.log.error(f"Can't log in to {addr}:{port} as '{username}': {_status_code}")
-                raise RuntimeError
-            self.redfish_token = _headers['X-Auth-Token']
-            self.redfish_session_location = _headers['Location']
-        except URLError as e:
-            msg = f"Can't log in to {addr}:{port} as '{username}': {e}"
-            self.mgr.log.error(msg)
-            raise RuntimeError
-
-    def logout(self,
-               addr: str,
-               port: str = '443') -> None:
-        try:
-            _status_code, _data, _ = self.query(addr=addr,
-                                                port=port,
-                                                method='DELETE',
-                                                headers={"X-Auth-Token": self.redfish_token},
-                                                endpoint=self.redfish_session_location)
-        except URLError:
-            msg = f"Can't log out from {addr}:{port}"
-            self.mgr.log.error(msg)
-            raise cherrypy.HTTPError(502, msg)
-
-    @contextmanager
-    def redfish_session(self,
-                        addr: str,
-                        username: str,
-                        password: str,
-                        port: str = '443') -> Generator:
-        try:
-            self.login(addr=addr,
-                       port=port,
-                       username=username,
-                       password=password)
-            yield
-        except Exception:
-            raise
-        else:
-            self.logout(addr=addr,
-                        port=port)
-
-    def query(self,
-              addr: str = '',
-              port: str = '',
-              method: Optional[str] = None,
-              headers: Dict[str, str] = {},
-              data: Optional[bytes] = None,
-              endpoint: str = '',
-              ssl_ctx: Optional[Any] = None,
-              timeout: Optional[int] = 10) -> Tuple[int, Dict[str, Any], Dict[str, Any]]:
-        if not ssl_ctx:
-            ssl_ctx = self.ssl_ctx
-        url = f'https://{addr}:{port}{endpoint}'
-        _headers = headers
-        response_json = {}
-        response_headers = {}
-        if not _headers.get('Content-Type'):
-            # default to application/json if nothing provided
-            _headers['Content-Type'] = 'application/json'
-
-        try:
-            req = Request(url, data, _headers, method=method)
-            with urlopen(req, context=ssl_ctx, timeout=timeout) as response:
-                response_str = response.read()
-                response_headers = response.headers
-                response_json = json.loads(response_str)
-                response_status = response.status
-        except HTTPError as e:
-            self.mgr.log.error(f"{e.code} {e.reason}")
-            response_status = e.code
-        except URLError as e:
-            self.mgr.log.error(f"{e.reason}")
-            raise
-        except Exception as e:
-            self.mgr.log.error(f"{e}")
-            raise
-        return (response_status, response_json, response_headers)
-
     @cherrypy.expose
     @cherrypy.tools.allow(methods=['GET', 'PATCH'])
     @cherrypy.tools.json_in()
@@ -392,6 +292,9 @@ class NodeProxy:
         hostname: Optional[str] = kw.get('hostname')
         led_type: Optional[str] = kw.get('type')
         id_drive: Optional[str] = kw.get('id')
+        data: Optional[str] = None
+        # this method is restricted to 'GET' or 'PATCH'
+        action: str = 'get_led' if method == 'GET' else 'set_led'
 
         if not hostname:
             msg: str = "listing enclosure LED status for all nodes is not implemented."
@@ -414,19 +317,10 @@ class NodeProxy:
             self.mgr.log.debug(msg)
             raise cherrypy.HTTPError(400, msg)
 
-        # if led_type not in ['chassis', 'drive']:
-        #     # TODO(guits): update unit test for this
-        #     raise cherrypy.HTTPError(404, 'LED type must be either "chassis" or "drive"')
-
-        addr = self.mgr.node_proxy.oob[hostname]['addr']
-        port = self.mgr.node_proxy.oob[hostname]['port']
-        username = self.mgr.node_proxy.oob[hostname]['username']
-        password = self.mgr.node_proxy.oob[hostname]['password']
-
         if method == 'PATCH':
             # TODO(guits): need to check the request is authorized
             # allowing a specific keyring only ? (client.admin or client.agent.. ?)
-            data: str = json.dumps(cherrypy.request.json)
+            data = json.dumps(cherrypy.request.json)
 
             if led_type == 'drive':
                 if id_drive not in self.mgr.node_proxy.data[hostname]['status']['storage'].keys():
@@ -434,25 +328,30 @@ class NodeProxy:
                     msg = f"'{id_drive}' not found."
                     self.mgr.log.debug(msg)
                     raise cherrypy.HTTPError(400, msg)
-                endpoint = self.mgr.node_proxy.data[hostname]['status']['storage'][id_drive].get('redfish_endpoint')
-            else:
-                endpoint = self.mgr.node_proxy.data[hostname]['chassis']['redfish_endpoint']
 
-        with self.redfish_session(addr, username, password, port=port):
-            try:
-                status, result, _ = self.query(data=bytes(data, 'ascii'),
-                                               addr=addr,
-                                               method=method,
-                                               headers={"X-Auth-Token": self.redfish_token},
-                                               port=port,
-                                               endpoint=endpoint,
-                                               ssl_ctx=self.ssl_ctx)
-            except (URLError, HTTPError, RuntimeError) as e:
-                raise cherrypy.HTTPError(502, f"{e}")
-            if method == 'GET':
-                result = {"LocationIndicatorActive": result['LocationIndicatorActive']}
-            cherrypy.response.status = status
-            return result
+        payload: Dict[str, Any] = {"node_proxy_oob_cmd":
+                                   {"action": action,
+                                    "type": led_type,
+                                    "id": id_drive,
+                                    "host": hostname,
+                                    "data": data}}
+        try:
+            message_thread = AgentMessageThread(
+                hostname, self.mgr.agent_cache.agent_ports[hostname], payload, self.mgr)
+            message_thread.start()
+            message_thread.join()  # TODO(guits): Add a timeout?
+        except KeyError:
+            raise cherrypy.HTTPError(502, f"{hostname}'s agent not running, please check.")
+        agent_response = message_thread.get_agent_response()
+        try:
+            response_json: Dict[str, Any] = json.loads(agent_response)
+        except json.decoder.JSONDecodeError:
+            cherrypy.response.status = 503
+        else:
+            cherrypy.response.status = response_json.get('http_code', 503)
+        if cherrypy.response.status != 200:
+            raise cherrypy.HTTPError(cherrypy.response.status, "Couldn't change the LED status.")
+        return response_json
 
     @cherrypy.expose
     @cherrypy.tools.allow(methods=['GET'])
@@ -845,6 +744,7 @@ class AgentMessageThread(threading.Thread):
         self.port = port
         self.data: str = json.dumps(data)
         self.daemon_spec: Optional[CephadmDaemonDeploySpec] = daemon_spec
+        self.agent_response: str = ''
         super().__init__(target=self.run)
 
     def run(self) -> None:
@@ -897,8 +797,8 @@ class AgentMessageThread(threading.Thread):
                 secure_agent_socket.connect((self.addr, self.port))
                 msg = (bytes_len + self.data)
                 secure_agent_socket.sendall(msg.encode('utf-8'))
-                agent_response = secure_agent_socket.recv(1024).decode()
-                self.mgr.log.debug(f'Received "{agent_response}" from agent on host {self.host}')
+                self.agent_response = secure_agent_socket.recv(1024).decode()
+                self.mgr.log.debug(f'Received "{self.agent_response}" from agent on host {self.host}')
                 if self.daemon_spec:
                     self.mgr.agent_cache.agent_config_successfully_delivered(self.daemon_spec)
                 self.mgr.agent_cache.sending_agent_message[self.host] = False
@@ -918,6 +818,9 @@ class AgentMessageThread(threading.Thread):
         self.mgr.agent_cache.sending_agent_message[self.host] = False
         return
 
+    def get_agent_response(self) -> str:
+        return self.agent_response
+
 
 class CephadmAgentHelpers:
     def __init__(self, mgr: "CephadmOrchestrator"):