]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/ansible: Add/remove hosts 26241/head
authorJuan Miguel Olmo Martínez <jolmomar@redhat.com>
Fri, 1 Feb 2019 11:06:04 +0000 (12:06 +0100)
committerJuan Miguel Olmo Martínez <jolmomar@redhat.com>
Thu, 21 Feb 2019 11:01:53 +0000 (12:01 +0100)
- Implementation of operations add/rm hosts
- Improvement of error management in Ansible Runner Service Client

- Addressed @sebastian-philipp suggestions and findings
- Remove the <role> parameter of hosts operations.
Now all the hosts are placed by default in a specific Ansible Inventory group called <orchestrator>.
Discussion on-going about to keep Host management API endpoint or not.
If hosts management is removed, further modifications will be needed to:
- Keep updated the <host inventory> in the orchestrator
- Manage hosts groups when operations are executed
- Adapted new commands (host add/rm) to use to the new <orchestrator_cli> syntax

- Improved error management
- Added return information to Add/remove host methods docstring

- Removed redundant exceptions following @sebastian-philipp suggestion

- Removed duplicated code
- Removed extra space

- Fixed unit tests
- Improved error management in launch playbook

- removed unused param in docstring (orchestrator.py)

Signed-off-by: Juan Miguel Olmo Martínez <jolmomar@redhat.com>
src/pybind/mgr/ansible/ansible_runner_svc.py
src/pybind/mgr/ansible/module.py
src/pybind/mgr/ansible/tests/test_client_playbooks.py

index ac49d04067390e6e23d97696c79d0a184cb50e70..636e20b0fdcde5b42c9e8e3a76d9ee13deab59a3 100644 (file)
@@ -4,6 +4,7 @@ Client module to interact with the Ansible Runner Service
 import requests
 import json
 import re
+from functools import wraps
 
 # Ansible Runner service API endpoints
 API_URL = "api"
@@ -12,6 +13,20 @@ PLAYBOOK_EXEC_URL = "api/v1/playbooks"
 PLAYBOOK_EVENTS = "api/v1/jobs/%s/events"
 EVENT_DATA_URL = "api/v1/jobs/%s/events/%s"
 
+class AnsibleRunnerServiceError(Exception):
+    pass
+
+def handle_requests_exceptions(func):
+    """Decorator to manage errors raised by requests library
+    """
+    @wraps(func)
+    def inner(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except requests.exceptions.RequestException as ex:
+               raise AnsibleRunnerServiceError(str(ex))
+    return inner
+
 class ExecutionStatusCode(object):
     """Execution status of playbooks ( 'msg' field in playbook status request)
     """
@@ -21,14 +36,13 @@ class ExecutionStatusCode(object):
     ON_GOING = 2  # Playbook is being executed              msg = running
     NOT_LAUNCHED = 3  # Not initialized
 
-
 class PlayBookExecution(object):
     """Object to provide all the results of a Playbook execution
     """
 
     def __init__(self, rest_client, playbook, logger, result_pattern="",
-                 the_params={},
-                 querystr_dict={}):
+                 the_params=None,
+                 querystr_dict=None):
 
         self.rest_client = rest_client
 
@@ -54,20 +68,26 @@ class PlayBookExecution(object):
         """ Launch the playbook execution
         """
 
+        response = None
         endpoint = "%s/%s" % (PLAYBOOK_EXEC_URL, self.playbook)
 
-        response = self.rest_client.http_post(endpoint,
-                                              self.params,
-                                              self.querystr_dict)
-
-        if response:
+        try:
+            response = self.rest_client.http_post(endpoint,
+                                                    self.params,
+                                                    self.querystr_dict)
+        except AnsibleRunnerServiceError:
+            self.log.exception("Error launching playbook <%s>", self.playbook)
+            raise
+
+        # Here we have a server response, but an error trying
+        # to launch the playbook is also posible (ex. 404, playbook not found)
+        # Error already logged by rest_client, but an error should be raised
+        # to the orchestrator (via completion object)
+        if response.ok:
             self.play_uuid = json.loads(response.text)["data"]["play_uuid"]
             self.log.info("Playbook execution launched succesfuly")
         else:
-            # An error launching the execution implies play_uuid empty
-            self.play_uuid = ""
-            self.log.error("Playbook launch error. \
-                            Check <endpoint> request result")
+            raise AnsibleRunnerServiceError(response.reason)
 
     def get_status(self):
         """ Return the status of the execution
@@ -79,6 +99,7 @@ class PlayBookExecution(object):
         """
 
         status_value = ExecutionStatusCode.NOT_LAUNCHED
+        response = None
 
         if self.play_uuid == '-': # Initialized
             status_value = ExecutionStatusCode.NOT_LAUNCHED
@@ -86,7 +107,12 @@ class PlayBookExecution(object):
             status_value = ExecutionStatusCode.ERROR
         else:
             endpoint = "%s/%s" % (PLAYBOOK_EXEC_URL, self.play_uuid)
-            response = self.rest_client.http_get(endpoint)
+
+            try:
+                response = self.rest_client.http_get(endpoint)
+            except AnsibleRunnerServiceError:
+                self.log.exception("Error getting playbook <%s> status",
+                                   self.playbook)
 
             if response:
                 the_status = json.loads(response.text)["msg"]
@@ -108,11 +134,14 @@ class PlayBookExecution(object):
 
         @returns: the events that matches with the patterns provided
         """
-
+        response = None
         if not self.play_uuid:
             return {}
 
-        response = self.rest_client.http_get(PLAYBOOK_EVENTS % self.play_uuid)
+        try:
+            response = self.rest_client.http_get(PLAYBOOK_EVENTS % self.play_uuid)
+        except AnsibleRunnerServiceError:
+            self.log.exception("Error getting playbook <%s> result", self.playbook)
 
         if not response:
             result_events = {}
@@ -170,6 +199,7 @@ class Client(object):
         # Log in the server and get a token
         self.login()
 
+    @handle_requests_exceptions
     def login(self):
         """ Login with user credentials to obtain a valid token
         """
@@ -190,7 +220,7 @@ class Client(object):
             self.token = json.loads(response.text)["data"]["token"]
             self.log.info("Connection with Ansible Runner Service is operative")
 
-
+    @handle_requests_exceptions
     def is_operative(self):
         """Indicates if the connection with the Ansible runner Server is ok
         """
@@ -207,6 +237,7 @@ class Client(object):
         else:
             return False
 
+    @handle_requests_exceptions
     def http_get(self, endpoint):
         """Execute an http get request
 
@@ -215,29 +246,23 @@ class Client(object):
         :returns: A requests object
         """
 
-        response = None
-
-        try:
-            the_url = "%s/%s" % (self.server_url, endpoint)
-            r = requests.get(the_url,
-                             verify = self.verify_server,
-                             headers = {"Authorization": self.token})
-
-            if r.status_code != requests.codes.ok:
-                self.log.error("http GET %s <--> (%s - %s)\n%s",
-                               the_url, r.status_code, r.reason, r.text)
-            else:
-                self.log.info("http GET %s <--> (%s - %s)",
-                              the_url, r.status_code, r.text)
-
-            response = r
+        the_url = "%s/%s" % (self.server_url, endpoint)
+        response = requests.get(the_url,
+                            verify = self.verify_server,
+                            headers = {"Authorization": self.token})
 
-        except Exception:
-            self.log.exception("Ansible runner service(GET %s)", the_url)
+        if response.status_code != requests.codes.ok:
+            self.log.error("http GET %s <--> (%s - %s)\n%s",
+                            the_url, response.status_code, response.reason,
+                            response.text)
+        else:
+            self.log.info("http GET %s <--> (%s - %s)",
+                            the_url, response.status_code, response.text)
 
         return response
 
-    def http_post(self, endpoint, payload, params_dict = {}):
+    @handle_requests_exceptions
+    def http_post(self, endpoint, payload, params_dict):
         """Execute an http post request
 
         :param endpoint: Ansible Runner service RESTful API endpoint
@@ -247,27 +272,45 @@ class Client(object):
         :returns: A requests object
         """
 
-        response = None
+        the_url = "%s/%s" % (self.server_url, endpoint)
+        response = requests.post(the_url,
+                            verify = self.verify_server,
+                            headers = {"Authorization": self.token,
+                                        "Content-type": "application/json"},
+                            json = payload,
+                            params = params_dict)
 
-        try:
-            the_url = "%s/%s" % (self.server_url, endpoint)
-            r = requests.post(the_url,
-                              verify = self.verify_server,
-                              headers = {"Authorization": self.token,
-                                         "Content-type": "application/json"},
-                              json = payload,
-                              params = params_dict)
-
-            if r.status_code != requests.codes.ok:
-                self.log.error("http POST %s [%s] <--> (%s - %s:%s)\n",
-                              the_url, payload, r.status_code, r.reason, r.text)
-            else:
-                self.log.info("http POST %s <--> (%s - %s)",
-                              the_url, r.status_code, r.text)
-            response = r
+        if response.status_code != requests.codes.ok:
+            self.log.error("http POST %s [%s] <--> (%s - %s:%s)\n",
+                            the_url, payload, response.status_code,
+                            response.reason, response.text)
+        else:
+            self.log.info("http POST %s <--> (%s - %s)",
+                            the_url, response.status_code, response.text)
+
+        return response
 
-        except Exception:
-            self.log.exception("Ansible runner service(POST %s)", the_url)
+    @handle_requests_exceptions
+    def http_delete(self, endpoint):
+        """Execute an http delete request
+
+        :param endpoint: Ansible Runner service RESTful API endpoint
+
+        :returns: A requests object
+        """
+
+        the_url = "%s/%s" % (self.server_url, endpoint)
+        response = requests.delete(the_url,
+                            verify = self.verify_server,
+                            headers = {"Authorization": self.token})
+
+        if response.status_code != requests.codes.ok:
+            self.log.error("http DELETE %s <--> (%s - %s)\n%s",
+                            the_url, response.status_code, response.reason,
+                            response.text)
+        else:
+            self.log.info("http DELETE %s <--> (%s - %s)",
+                            the_url, response.status_code, response.text)
 
         return response
 
index b94f0057325bd2deded1e4a2dc2f158c46b560cf..297f98ec54a35e23861c04f8ad9aa5c1b4c322fd 100644 (file)
@@ -4,13 +4,16 @@ ceph-mgr Ansible orchestrator module
 The external Orchestrator is the Ansible runner service (RESTful https service)
 """
 
+import types
 import json
+import requests
+
 
 from mgr_module import MgrModule
 import orchestrator
 
 from .ansible_runner_svc import Client, PlayBookExecution, ExecutionStatusCode,\
-                               EVENT_DATA_URL
+                               EVENT_DATA_URL, AnsibleRunnerServiceError
 
 # Time to clean the completions list
 WAIT_PERIOD = 10
@@ -29,7 +32,18 @@ ADD_OSD_PLAYBOOK = "add-osd.yml"
 # Used in the remove_osds method
 REMOVE_OSD_PLAYBOOK = "shrink-osd.yml"
 
+# Default name for the inventory group for hosts managed by the Orchestrator
+ORCHESTRATOR_GROUP = "orchestrator"
+
+# URLs for Ansible Runner Operations
+# Add or remove host in one group
+URL_ADD_RM_HOSTS = "api/v1/hosts/{host_name}/groups/{inventory_group}"
 
+# Retrieve the groups where the host is included in.
+URL_GET_HOST_GROUPS = "api/v1/hosts/{host_name}"
+
+# Manage groups
+URL_MANAGE_GROUP = "api/v1/groups/{group_name}"
 
 class AnsibleReadOperation(orchestrator.ReadCompletion):
     """ A read operation means to obtain information from the cluster.
@@ -37,7 +51,7 @@ class AnsibleReadOperation(orchestrator.ReadCompletion):
 
     def __init__(self, client, playbook, logger, result_pattern,
                  params,
-                 querystr_dict={}):
+                 querystr_dict=None):
         super(AnsibleReadOperation, self).__init__()
 
         # Private attributes
@@ -70,6 +84,9 @@ class AnsibleReadOperation(orchestrator.ReadCompletion):
                                               params,
                                               querystr_dict)
 
+    def __str__(self):
+         return "Playbook {playbook_name}".format(playbook_name = self.playbook)
+
     @property
     def is_complete(self):
         return self._is_complete
@@ -105,8 +122,11 @@ class AnsibleReadOperation(orchestrator.ReadCompletion):
     def execute_playbook(self):
         """Launch the execution of the playbook with the parameters configured
         """
-
-        self.pb_execution.launch()
+        try:
+            self.pb_execution.launch()
+        except AnsibleRunnerServiceError:
+            self._status = ExecutionStatusCode.ERROR
+            raise
 
     def update_result(self):
         """Output of the read operation
@@ -146,13 +166,14 @@ class AnsibleChangeOperation(orchestrator.WriteCompletion):
     def __init__(self):
         super(AnsibleChangeOperation, self).__init__()
 
-        self.error = False
+        self._status = ExecutionStatusCode.NOT_LAUNCHED
+        self._result = None
+
     @property
     def status(self):
         """Return the status code of the operation
         """
-        #TODO
-        return 0
+        raise NotImplementedError()
 
     @property
     def is_persistent(self):
@@ -162,12 +183,12 @@ class AnsibleChangeOperation(orchestrator.WriteCompletion):
         had been written to a manifest, but that the update
         had not necessarily been pushed out to the cluster.
 
-        In the case of Ansible is always False.
-        because a initiated playbook execution will need always to be
-        relaunched if it fails.
+        :return Boolean: True if the execution of the Ansible Playbook or the
+                         operation over the Ansible Runner Service has finished
         """
 
-        return False
+        return self._status in [ExecutionStatusCode.SUCCESS,
+                                ExecutionStatusCode.ERROR]
 
     @property
     def is_effective(self):
@@ -178,19 +199,96 @@ class AnsibleChangeOperation(orchestrator.WriteCompletion):
         In the case of Ansible, this will be True if the playbooks has been
         executed succesfully.
 
-        :return Boolean: if the playbook has been executed succesfully
+        :return Boolean: if the playbook/ARS operation has been executed
+                         succesfully
         """
 
-        return self.status == ExecutionStatusCode.SUCCESS
+        return self._status == ExecutionStatusCode.SUCCESS
 
     @property
     def is_errored(self):
-        return self.error
+        return self._status == ExecutionStatusCode.ERROR
 
     @property
-    def is_complete(self):
-        return self.is_errored or (self.is_persistent and self.is_effective)
+    def result(self):
+        return self._result
 
+class HttpOperation(object):
+
+    def __init__(self, url, http_operation, payload="", query_string="{}"):
+        """ A class to ease the management of http operations
+        """
+        self.url = url
+        self.http_operation = http_operation
+        self.payload = payload
+        self.query_string = query_string
+        self.response = None
+
+class ARSChangeOperation(AnsibleChangeOperation):
+    """Execute one or more Ansible Runner Service Operations that implies
+    a change in the cluster
+    """
+    def __init__(self, client, logger, operations):
+        """
+        :param client         : Ansible Runner Service Client
+        :param logger         : The object used to log messages
+        :param operations     : A list of http_operation objects
+        :param payload        : dict with http request payload
+        """
+        super(ARSChangeOperation, self).__init__()
+
+        assert operations, "At least one operation is needed"
+        self.ar_client = client
+        self.log = logger
+        self.operations = operations
+
+        self.process_output = None
+
+    def __str__(self):
+             # Use the last operation as the main
+             return "Ansible Runner Service: {operation} {url}".format(
+                operation = self.operations[-1].http_operation,
+                url = self.operations[-1].url)
+
+    @property
+    def status(self):
+        """Execute the Ansible Runner Service operations and update the status
+        and result of the underlying Completion object.
+        """
+
+        for op in self.operations:
+            # Execute the right kind of http request
+            try:
+                if op.http_operation == "post":
+                    response = self.ar_client.http_post(op.url, op.payload, op.query_string)
+                elif op.http_operation == "delete":
+                    response = self.ar_client.http_delete(op.url)
+                elif op.http_operation == "get":
+                    response = self.ar_client.http_get(op.url)
+
+                # Any problem executing the secuence of operations will
+                # produce an errored completion object.
+                if response.status_code != requests.codes.ok:
+                    self._status = ExecutionStatusCode.ERROR
+                    self._result = response.text
+                    return self._status
+
+            # Any kind of error communicating with ARS or preventing
+            # to have a right http response
+            except AnsibleRunnerServiceError as ex:
+                self._status = ExecutionStatusCode.ERROR
+                self._result = str(ex)
+                return self._status
+
+        # If this point is reached, all the operations has been succesfuly
+        # executed, and the final result is updated
+        self._status = ExecutionStatusCode.SUCCESS
+        if self.process_output:
+            self._result = self.process_output(response.text)
+        else:
+            self._result = response.text
+
+        return self._status
 
 class Module(MgrModule, orchestrator.Orchestrator):
     """An Orchestrator that uses <Ansible Runner Service> to perform operations
@@ -229,7 +327,7 @@ class Module(MgrModule, orchestrator.Orchestrator):
         # Check progress and update status in each operation
         # Access completion.status property do the trick
         for operation in completions:
-            self.log.info("playbook <%s> status:%s", operation.playbook, operation.status)
+            self.log.info("<%s> status:%s", operation, operation.status)
 
         completions = filter(lambda x: not x.is_complete, completions)
 
@@ -253,7 +351,7 @@ class Module(MgrModule, orchestrator.Orchestrator):
                                     password = self.get_module_option('password', ''),
                                     verify_server = self.get_module_option('verify_server', True),
                                     logger = self.log)
-        except Exception:
+        except AnsibleRunnerServiceError:
             self.log.exception("Ansible Runner Service not available. "
                           "Check external server status/TLS identity or "
                           "connection options. If configuration options changed"
@@ -347,6 +445,78 @@ class Module(MgrModule, orchestrator.Orchestrator):
 
         return ansible_operation
 
+    def add_host(self, host):
+        """
+        Add a host to the Ansible Runner Service inventory in the "orchestrator"
+        group
+
+        :param host: hostname
+        :returns : orchestrator.WriteCompletion
+        """
+
+        url_group = URL_MANAGE_GROUP.format(group_name = ORCHESTRATOR_GROUP)
+
+        try:
+            # Create the orchestrator default group if not exist.
+            # If exists we ignore the error response
+            dummy_response = self.ar_client.http_post(url_group, "", {})
+
+            # Here, the default group exists so...
+            # Prepare the operation for adding the new host
+            add_url = URL_ADD_RM_HOSTS.format(host_name = host,
+                                              inventory_group = ORCHESTRATOR_GROUP)
+
+            operations =  [HttpOperation(add_url, "post")]
+
+        except AnsibleRunnerServiceError as ex:
+            # Problems with the external orchestrator.
+            # Prepare the operation to return the error in a Completion object.
+            self.log.exception("Error checking <orchestrator> group: %s", ex)
+            operations =  [HttpOperation(url_group, "post")]
+
+        return ARSChangeOperation(self.ar_client, self.log, operations)
+
+
+    def remove_host(self, host):
+        """
+        Remove a host from all the groups in the Ansible Runner Service
+        inventory.
+
+        :param host: hostname
+        :returns : orchestrator.WriteCompletion
+        """
+
+        operations = []
+        host_groups = []
+
+        try:
+            # Get the list of groups where the host is included
+            groups_url = URL_GET_HOST_GROUPS.format(host_name = host)
+            response = self.ar_client.http_get(groups_url)
+
+            if response.status_code == requests.codes.ok:
+                host_groups = json.loads(response.text)["data"]["groups"]
+
+        except AnsibleRunnerServiceError:
+            self.log.exception("Error retrieving host groups")
+
+        if not host_groups:
+            # Error retrieving the groups, prepare the completion object to
+            # execute the problematic operation just to provide the error
+            # to the caller
+            operations = [HttpOperation(groups_url, "get")]
+        else:
+            # Build the operations list
+            operations = list(map(lambda x:
+                                  HttpOperation(URL_ADD_RM_HOSTS.format(
+                                                    host_name = host,
+                                                    inventory_group = x),
+                                                "delete"),
+                                  host_groups))
+
+        return ARSChangeOperation(self.ar_client, self.log, operations)
+
+
     def _launch_operation(self, ansible_operation):
         """Launch the operation and add the operation to the completion objects
         ongoing
index 4a75b5c7a0f697a840182b3e4e3d0389ed21d88c..98dfd3dd58fe563eab21ddc341d72b17c7df696b 100644 (file)
@@ -9,7 +9,7 @@ from requests.exceptions import ConnectionError
 
 from ..ansible_runner_svc import Client, PlayBookExecution, ExecutionStatusCode, \
                                 LOGIN_URL, API_URL, PLAYBOOK_EXEC_URL, \
-                                PLAYBOOK_EVENTS
+                                PLAYBOOK_EVENTS, AnsibleRunnerServiceError
 
 
 SERVER_URL = "ars:5001"
@@ -77,11 +77,11 @@ def mock_get_pb(mock_server, playbook_name, return_code):
     return PlayBookExecution(ars_client, playbook_name, logger,
                              result_pattern = "RESULTS")
 
-class  ARSclientTest(unittest.TestCase):
+class ARSclientTest(unittest.TestCase):
 
     def test_server_not_reachable(self):
 
-        with self.assertRaises(ConnectionError):
+        with self.assertRaises(AnsibleRunnerServiceError):
             ars_client = Client(SERVER_URL, USER, PASSWORD,
                                 CERTIFICATE, logger)
 
@@ -114,6 +114,26 @@ class  ARSclientTest(unittest.TestCase):
             self.assertTrue(ars_client.is_operative(),
                             "Operative attribute expected to be True")
 
+    def test_server_http_delete(self):
+
+        with requests_mock.Mocker() as mock_server:
+
+            mock_login(mock_server)
+
+            ars_client = Client(SERVER_URL, USER, PASSWORD,
+                                CERTIFICATE, logger)
+
+            url = "https://%s/test" % (SERVER_URL)
+            mock_server.register_uri("DELETE",
+                        url,
+                        json={ "status": "OK",
+                               "msg": "",
+                               "data": {}},
+                        status_code=201)
+
+            response = ars_client.http_delete("test")
+            self.assertTrue(response.status_code == 201)
+
 class PlayBookExecutionTests(unittest.TestCase):
 
 
@@ -139,10 +159,11 @@ class PlayBookExecutionTests(unittest.TestCase):
 
             test_pb = mock_get_pb(mock_server, "unknown_playbook", 404)
 
-            test_pb.launch()
+            with self.assertRaises(AnsibleRunnerServiceError):
+                test_pb.launch()
 
-            self.assertEqual(test_pb.play_uuid, "",
-                            "Playbook uuid not empty")
+            #self.assertEqual(test_pb.play_uuid, "",
+            #                "Playbook uuid not empty")
 
     def test_playbook_not_launched(self):
         """Check right status code when Playbook execution has not been launched