From b6e8da7e5597033fe25e2ae4695576e6de933fb5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Juan=20Miguel=20Olmo=20Mart=C3=ADnez?= Date: Fri, 1 Feb 2019 12:06:04 +0100 Subject: [PATCH] mgr/ansible: Add/remove hosts MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit - Implementation of operations add/rm hosts - Improvement of error management in Ansible Runner Service Client - Addressed @sebastian-philipp suggestions and findings - Remove the parameter of hosts operations. Now all the hosts are placed by default in a specific Ansible Inventory group called . Discussion on-going about to keep Host management API endpoint or not. If hosts management is removed, further modifications will be needed to: - Keep updated the in the orchestrator - Manage hosts groups when operations are executed - Adapted new commands (host add/rm) to use to the new syntax - Improved error management - Added return information to Add/remove host methods docstring - Removed redundant exceptions following @sebastian-philipp suggestion - Removed duplicated code - Removed extra space - Fixed unit tests - Improved error management in launch playbook - removed unused param in docstring (orchestrator.py) Signed-off-by: Juan Miguel Olmo Martínez --- src/pybind/mgr/ansible/ansible_runner_svc.py | 151 ++++++++----- src/pybind/mgr/ansible/module.py | 206 ++++++++++++++++-- .../ansible/tests/test_client_playbooks.py | 33 ++- 3 files changed, 312 insertions(+), 78 deletions(-) diff --git a/src/pybind/mgr/ansible/ansible_runner_svc.py b/src/pybind/mgr/ansible/ansible_runner_svc.py index ac49d04067390..636e20b0fdcde 100644 --- a/src/pybind/mgr/ansible/ansible_runner_svc.py +++ b/src/pybind/mgr/ansible/ansible_runner_svc.py @@ -4,6 +4,7 @@ Client module to interact with the Ansible Runner Service import requests import json import re +from functools import wraps # Ansible Runner service API endpoints API_URL = "api" @@ -12,6 +13,20 @@ PLAYBOOK_EXEC_URL = "api/v1/playbooks" PLAYBOOK_EVENTS = "api/v1/jobs/%s/events" EVENT_DATA_URL = "api/v1/jobs/%s/events/%s" +class AnsibleRunnerServiceError(Exception): + pass + +def handle_requests_exceptions(func): + """Decorator to manage errors raised by requests library + """ + @wraps(func) + def inner(*args, **kwargs): + try: + return func(*args, **kwargs) + except requests.exceptions.RequestException as ex: + raise AnsibleRunnerServiceError(str(ex)) + return inner + class ExecutionStatusCode(object): """Execution status of playbooks ( 'msg' field in playbook status request) """ @@ -21,14 +36,13 @@ class ExecutionStatusCode(object): ON_GOING = 2 # Playbook is being executed msg = running NOT_LAUNCHED = 3 # Not initialized - class PlayBookExecution(object): """Object to provide all the results of a Playbook execution """ def __init__(self, rest_client, playbook, logger, result_pattern="", - the_params={}, - querystr_dict={}): + the_params=None, + querystr_dict=None): self.rest_client = rest_client @@ -54,20 +68,26 @@ class PlayBookExecution(object): """ Launch the playbook execution """ + response = None endpoint = "%s/%s" % (PLAYBOOK_EXEC_URL, self.playbook) - response = self.rest_client.http_post(endpoint, - self.params, - self.querystr_dict) - - if response: + try: + response = self.rest_client.http_post(endpoint, + self.params, + self.querystr_dict) + except AnsibleRunnerServiceError: + self.log.exception("Error launching playbook <%s>", self.playbook) + raise + + # Here we have a server response, but an error trying + # to launch the playbook is also posible (ex. 404, playbook not found) + # Error already logged by rest_client, but an error should be raised + # to the orchestrator (via completion object) + if response.ok: self.play_uuid = json.loads(response.text)["data"]["play_uuid"] self.log.info("Playbook execution launched succesfuly") else: - # An error launching the execution implies play_uuid empty - self.play_uuid = "" - self.log.error("Playbook launch error. \ - Check request result") + raise AnsibleRunnerServiceError(response.reason) def get_status(self): """ Return the status of the execution @@ -79,6 +99,7 @@ class PlayBookExecution(object): """ status_value = ExecutionStatusCode.NOT_LAUNCHED + response = None if self.play_uuid == '-': # Initialized status_value = ExecutionStatusCode.NOT_LAUNCHED @@ -86,7 +107,12 @@ class PlayBookExecution(object): status_value = ExecutionStatusCode.ERROR else: endpoint = "%s/%s" % (PLAYBOOK_EXEC_URL, self.play_uuid) - response = self.rest_client.http_get(endpoint) + + try: + response = self.rest_client.http_get(endpoint) + except AnsibleRunnerServiceError: + self.log.exception("Error getting playbook <%s> status", + self.playbook) if response: the_status = json.loads(response.text)["msg"] @@ -108,11 +134,14 @@ class PlayBookExecution(object): @returns: the events that matches with the patterns provided """ - + response = None if not self.play_uuid: return {} - response = self.rest_client.http_get(PLAYBOOK_EVENTS % self.play_uuid) + try: + response = self.rest_client.http_get(PLAYBOOK_EVENTS % self.play_uuid) + except AnsibleRunnerServiceError: + self.log.exception("Error getting playbook <%s> result", self.playbook) if not response: result_events = {} @@ -170,6 +199,7 @@ class Client(object): # Log in the server and get a token self.login() + @handle_requests_exceptions def login(self): """ Login with user credentials to obtain a valid token """ @@ -190,7 +220,7 @@ class Client(object): self.token = json.loads(response.text)["data"]["token"] self.log.info("Connection with Ansible Runner Service is operative") - + @handle_requests_exceptions def is_operative(self): """Indicates if the connection with the Ansible runner Server is ok """ @@ -207,6 +237,7 @@ class Client(object): else: return False + @handle_requests_exceptions def http_get(self, endpoint): """Execute an http get request @@ -215,29 +246,23 @@ class Client(object): :returns: A requests object """ - response = None - - try: - the_url = "%s/%s" % (self.server_url, endpoint) - r = requests.get(the_url, - verify = self.verify_server, - headers = {"Authorization": self.token}) - - if r.status_code != requests.codes.ok: - self.log.error("http GET %s <--> (%s - %s)\n%s", - the_url, r.status_code, r.reason, r.text) - else: - self.log.info("http GET %s <--> (%s - %s)", - the_url, r.status_code, r.text) - - response = r + the_url = "%s/%s" % (self.server_url, endpoint) + response = requests.get(the_url, + verify = self.verify_server, + headers = {"Authorization": self.token}) - except Exception: - self.log.exception("Ansible runner service(GET %s)", the_url) + if response.status_code != requests.codes.ok: + self.log.error("http GET %s <--> (%s - %s)\n%s", + the_url, response.status_code, response.reason, + response.text) + else: + self.log.info("http GET %s <--> (%s - %s)", + the_url, response.status_code, response.text) return response - def http_post(self, endpoint, payload, params_dict = {}): + @handle_requests_exceptions + def http_post(self, endpoint, payload, params_dict): """Execute an http post request :param endpoint: Ansible Runner service RESTful API endpoint @@ -247,27 +272,45 @@ class Client(object): :returns: A requests object """ - response = None + the_url = "%s/%s" % (self.server_url, endpoint) + response = requests.post(the_url, + verify = self.verify_server, + headers = {"Authorization": self.token, + "Content-type": "application/json"}, + json = payload, + params = params_dict) - try: - the_url = "%s/%s" % (self.server_url, endpoint) - r = requests.post(the_url, - verify = self.verify_server, - headers = {"Authorization": self.token, - "Content-type": "application/json"}, - json = payload, - params = params_dict) - - if r.status_code != requests.codes.ok: - self.log.error("http POST %s [%s] <--> (%s - %s:%s)\n", - the_url, payload, r.status_code, r.reason, r.text) - else: - self.log.info("http POST %s <--> (%s - %s)", - the_url, r.status_code, r.text) - response = r + if response.status_code != requests.codes.ok: + self.log.error("http POST %s [%s] <--> (%s - %s:%s)\n", + the_url, payload, response.status_code, + response.reason, response.text) + else: + self.log.info("http POST %s <--> (%s - %s)", + the_url, response.status_code, response.text) + + return response - except Exception: - self.log.exception("Ansible runner service(POST %s)", the_url) + @handle_requests_exceptions + def http_delete(self, endpoint): + """Execute an http delete request + + :param endpoint: Ansible Runner service RESTful API endpoint + + :returns: A requests object + """ + + the_url = "%s/%s" % (self.server_url, endpoint) + response = requests.delete(the_url, + verify = self.verify_server, + headers = {"Authorization": self.token}) + + if response.status_code != requests.codes.ok: + self.log.error("http DELETE %s <--> (%s - %s)\n%s", + the_url, response.status_code, response.reason, + response.text) + else: + self.log.info("http DELETE %s <--> (%s - %s)", + the_url, response.status_code, response.text) return response diff --git a/src/pybind/mgr/ansible/module.py b/src/pybind/mgr/ansible/module.py index b94f0057325bd..297f98ec54a35 100644 --- a/src/pybind/mgr/ansible/module.py +++ b/src/pybind/mgr/ansible/module.py @@ -4,13 +4,16 @@ ceph-mgr Ansible orchestrator module The external Orchestrator is the Ansible runner service (RESTful https service) """ +import types import json +import requests + from mgr_module import MgrModule import orchestrator from .ansible_runner_svc import Client, PlayBookExecution, ExecutionStatusCode,\ - EVENT_DATA_URL + EVENT_DATA_URL, AnsibleRunnerServiceError # Time to clean the completions list WAIT_PERIOD = 10 @@ -29,7 +32,18 @@ ADD_OSD_PLAYBOOK = "add-osd.yml" # Used in the remove_osds method REMOVE_OSD_PLAYBOOK = "shrink-osd.yml" +# Default name for the inventory group for hosts managed by the Orchestrator +ORCHESTRATOR_GROUP = "orchestrator" + +# URLs for Ansible Runner Operations +# Add or remove host in one group +URL_ADD_RM_HOSTS = "api/v1/hosts/{host_name}/groups/{inventory_group}" +# Retrieve the groups where the host is included in. +URL_GET_HOST_GROUPS = "api/v1/hosts/{host_name}" + +# Manage groups +URL_MANAGE_GROUP = "api/v1/groups/{group_name}" class AnsibleReadOperation(orchestrator.ReadCompletion): """ A read operation means to obtain information from the cluster. @@ -37,7 +51,7 @@ class AnsibleReadOperation(orchestrator.ReadCompletion): def __init__(self, client, playbook, logger, result_pattern, params, - querystr_dict={}): + querystr_dict=None): super(AnsibleReadOperation, self).__init__() # Private attributes @@ -70,6 +84,9 @@ class AnsibleReadOperation(orchestrator.ReadCompletion): params, querystr_dict) + def __str__(self): + return "Playbook {playbook_name}".format(playbook_name = self.playbook) + @property def is_complete(self): return self._is_complete @@ -105,8 +122,11 @@ class AnsibleReadOperation(orchestrator.ReadCompletion): def execute_playbook(self): """Launch the execution of the playbook with the parameters configured """ - - self.pb_execution.launch() + try: + self.pb_execution.launch() + except AnsibleRunnerServiceError: + self._status = ExecutionStatusCode.ERROR + raise def update_result(self): """Output of the read operation @@ -146,13 +166,14 @@ class AnsibleChangeOperation(orchestrator.WriteCompletion): def __init__(self): super(AnsibleChangeOperation, self).__init__() - self.error = False + self._status = ExecutionStatusCode.NOT_LAUNCHED + self._result = None + @property def status(self): """Return the status code of the operation """ - #TODO - return 0 + raise NotImplementedError() @property def is_persistent(self): @@ -162,12 +183,12 @@ class AnsibleChangeOperation(orchestrator.WriteCompletion): had been written to a manifest, but that the update had not necessarily been pushed out to the cluster. - In the case of Ansible is always False. - because a initiated playbook execution will need always to be - relaunched if it fails. + :return Boolean: True if the execution of the Ansible Playbook or the + operation over the Ansible Runner Service has finished """ - return False + return self._status in [ExecutionStatusCode.SUCCESS, + ExecutionStatusCode.ERROR] @property def is_effective(self): @@ -178,19 +199,96 @@ class AnsibleChangeOperation(orchestrator.WriteCompletion): In the case of Ansible, this will be True if the playbooks has been executed succesfully. - :return Boolean: if the playbook has been executed succesfully + :return Boolean: if the playbook/ARS operation has been executed + succesfully """ - return self.status == ExecutionStatusCode.SUCCESS + return self._status == ExecutionStatusCode.SUCCESS @property def is_errored(self): - return self.error + return self._status == ExecutionStatusCode.ERROR @property - def is_complete(self): - return self.is_errored or (self.is_persistent and self.is_effective) + def result(self): + return self._result +class HttpOperation(object): + + def __init__(self, url, http_operation, payload="", query_string="{}"): + """ A class to ease the management of http operations + """ + self.url = url + self.http_operation = http_operation + self.payload = payload + self.query_string = query_string + self.response = None + +class ARSChangeOperation(AnsibleChangeOperation): + """Execute one or more Ansible Runner Service Operations that implies + a change in the cluster + """ + def __init__(self, client, logger, operations): + """ + :param client : Ansible Runner Service Client + :param logger : The object used to log messages + :param operations : A list of http_operation objects + :param payload : dict with http request payload + """ + super(ARSChangeOperation, self).__init__() + + assert operations, "At least one operation is needed" + self.ar_client = client + self.log = logger + self.operations = operations + + self.process_output = None + + def __str__(self): + # Use the last operation as the main + return "Ansible Runner Service: {operation} {url}".format( + operation = self.operations[-1].http_operation, + url = self.operations[-1].url) + + @property + def status(self): + """Execute the Ansible Runner Service operations and update the status + and result of the underlying Completion object. + """ + + for op in self.operations: + # Execute the right kind of http request + try: + if op.http_operation == "post": + response = self.ar_client.http_post(op.url, op.payload, op.query_string) + elif op.http_operation == "delete": + response = self.ar_client.http_delete(op.url) + elif op.http_operation == "get": + response = self.ar_client.http_get(op.url) + + # Any problem executing the secuence of operations will + # produce an errored completion object. + if response.status_code != requests.codes.ok: + self._status = ExecutionStatusCode.ERROR + self._result = response.text + return self._status + + # Any kind of error communicating with ARS or preventing + # to have a right http response + except AnsibleRunnerServiceError as ex: + self._status = ExecutionStatusCode.ERROR + self._result = str(ex) + return self._status + + # If this point is reached, all the operations has been succesfuly + # executed, and the final result is updated + self._status = ExecutionStatusCode.SUCCESS + if self.process_output: + self._result = self.process_output(response.text) + else: + self._result = response.text + + return self._status class Module(MgrModule, orchestrator.Orchestrator): """An Orchestrator that uses to perform operations @@ -229,7 +327,7 @@ class Module(MgrModule, orchestrator.Orchestrator): # Check progress and update status in each operation # Access completion.status property do the trick for operation in completions: - self.log.info("playbook <%s> status:%s", operation.playbook, operation.status) + self.log.info("<%s> status:%s", operation, operation.status) completions = filter(lambda x: not x.is_complete, completions) @@ -253,7 +351,7 @@ class Module(MgrModule, orchestrator.Orchestrator): password = self.get_module_option('password', ''), verify_server = self.get_module_option('verify_server', True), logger = self.log) - except Exception: + except AnsibleRunnerServiceError: self.log.exception("Ansible Runner Service not available. " "Check external server status/TLS identity or " "connection options. If configuration options changed" @@ -347,6 +445,78 @@ class Module(MgrModule, orchestrator.Orchestrator): return ansible_operation + def add_host(self, host): + """ + Add a host to the Ansible Runner Service inventory in the "orchestrator" + group + + :param host: hostname + :returns : orchestrator.WriteCompletion + """ + + url_group = URL_MANAGE_GROUP.format(group_name = ORCHESTRATOR_GROUP) + + try: + # Create the orchestrator default group if not exist. + # If exists we ignore the error response + dummy_response = self.ar_client.http_post(url_group, "", {}) + + # Here, the default group exists so... + # Prepare the operation for adding the new host + add_url = URL_ADD_RM_HOSTS.format(host_name = host, + inventory_group = ORCHESTRATOR_GROUP) + + operations = [HttpOperation(add_url, "post")] + + except AnsibleRunnerServiceError as ex: + # Problems with the external orchestrator. + # Prepare the operation to return the error in a Completion object. + self.log.exception("Error checking group: %s", ex) + operations = [HttpOperation(url_group, "post")] + + return ARSChangeOperation(self.ar_client, self.log, operations) + + + def remove_host(self, host): + """ + Remove a host from all the groups in the Ansible Runner Service + inventory. + + :param host: hostname + :returns : orchestrator.WriteCompletion + """ + + operations = [] + host_groups = [] + + try: + # Get the list of groups where the host is included + groups_url = URL_GET_HOST_GROUPS.format(host_name = host) + response = self.ar_client.http_get(groups_url) + + if response.status_code == requests.codes.ok: + host_groups = json.loads(response.text)["data"]["groups"] + + except AnsibleRunnerServiceError: + self.log.exception("Error retrieving host groups") + + if not host_groups: + # Error retrieving the groups, prepare the completion object to + # execute the problematic operation just to provide the error + # to the caller + operations = [HttpOperation(groups_url, "get")] + else: + # Build the operations list + operations = list(map(lambda x: + HttpOperation(URL_ADD_RM_HOSTS.format( + host_name = host, + inventory_group = x), + "delete"), + host_groups)) + + return ARSChangeOperation(self.ar_client, self.log, operations) + + def _launch_operation(self, ansible_operation): """Launch the operation and add the operation to the completion objects ongoing diff --git a/src/pybind/mgr/ansible/tests/test_client_playbooks.py b/src/pybind/mgr/ansible/tests/test_client_playbooks.py index 4a75b5c7a0f69..98dfd3dd58fe5 100644 --- a/src/pybind/mgr/ansible/tests/test_client_playbooks.py +++ b/src/pybind/mgr/ansible/tests/test_client_playbooks.py @@ -9,7 +9,7 @@ from requests.exceptions import ConnectionError from ..ansible_runner_svc import Client, PlayBookExecution, ExecutionStatusCode, \ LOGIN_URL, API_URL, PLAYBOOK_EXEC_URL, \ - PLAYBOOK_EVENTS + PLAYBOOK_EVENTS, AnsibleRunnerServiceError SERVER_URL = "ars:5001" @@ -77,11 +77,11 @@ def mock_get_pb(mock_server, playbook_name, return_code): return PlayBookExecution(ars_client, playbook_name, logger, result_pattern = "RESULTS") -class ARSclientTest(unittest.TestCase): +class ARSclientTest(unittest.TestCase): def test_server_not_reachable(self): - with self.assertRaises(ConnectionError): + with self.assertRaises(AnsibleRunnerServiceError): ars_client = Client(SERVER_URL, USER, PASSWORD, CERTIFICATE, logger) @@ -114,6 +114,26 @@ class ARSclientTest(unittest.TestCase): self.assertTrue(ars_client.is_operative(), "Operative attribute expected to be True") + def test_server_http_delete(self): + + with requests_mock.Mocker() as mock_server: + + mock_login(mock_server) + + ars_client = Client(SERVER_URL, USER, PASSWORD, + CERTIFICATE, logger) + + url = "https://%s/test" % (SERVER_URL) + mock_server.register_uri("DELETE", + url, + json={ "status": "OK", + "msg": "", + "data": {}}, + status_code=201) + + response = ars_client.http_delete("test") + self.assertTrue(response.status_code == 201) + class PlayBookExecutionTests(unittest.TestCase): @@ -139,10 +159,11 @@ class PlayBookExecutionTests(unittest.TestCase): test_pb = mock_get_pb(mock_server, "unknown_playbook", 404) - test_pb.launch() + with self.assertRaises(AnsibleRunnerServiceError): + test_pb.launch() - self.assertEqual(test_pb.play_uuid, "", - "Playbook uuid not empty") + #self.assertEqual(test_pb.play_uuid, "", + # "Playbook uuid not empty") def test_playbook_not_launched(self): """Check right status code when Playbook execution has not been launched -- 2.39.5