From: Kyr Shatskyy Date: Thu, 18 Oct 2018 00:41:41 +0000 (+0200) Subject: openstack: retry if volume create+attach operation fails X-Git-Tag: 1.1.0~211^2~24 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=729585c9506e055a742c19d5034bba6e7a9cf95c;p=teuthology.git openstack: retry if volume create+attach operation fails Once in awhile, a created volume fails to become available within a reasonable period of time. When this happens, the entire test job fails with the following error: 2018-10-02T06:52:44.106 ERROR:teuthology.provision.openstack:'volume target192168000058-2' reached maximum tries (100) after waiting for 200 seconds With this commit, we catch that exception and retry with a new volume. Signed-off-by: Nathan Cutler Signed-off-by: Kyrylo Shatskyy Conflicts: teuthology/provision/openstack.py --- diff --git a/teuthology/openstack/__init__.py b/teuthology/openstack/__init__.py index 4d865fb4..707d8220 100644 --- a/teuthology/openstack/__init__.py +++ b/teuthology/openstack/__init__.py @@ -190,8 +190,7 @@ class OpenStackInstance(object): OpenStack().run("server delete --wait " + self['id'] + " || true") for volume in volumes: - OpenStack().run("volume set --name REMOVE-ME " + volume + " || true") - OpenStack().run("volume delete " + volume + " || true") + OpenStack().volume_delete(volume) return True @@ -663,6 +662,10 @@ class OpenStack(object): def get_default_arch(self): return self.get_available_archs()[0] + def volume_delete(self, name_or_id): + self.run("volume set --name REMOVE-ME " + name_or_id + " || true") + self.run("volume delete " + name_or_id + " || true") + class TeuthologyOpenStack(OpenStack): diff --git a/teuthology/provision/openstack.py b/teuthology/provision/openstack.py index eeef9a39..1d1812cf 100644 --- a/teuthology/provision/openstack.py +++ b/teuthology/provision/openstack.py @@ -55,51 +55,96 @@ class ProvisionOpenStack(OpenStack): lab_domain=config.lab_domain) open(self.user_data, 'w').write(user_data) - def attach_volumes(self, name, volumes): + def _openstack(self, subcommand, get=None): + # do not use OpenStack().run because its + # bugous for volume create as of openstackclient 3.2.0 + # https://bugs.launchpad.net/python-openstackclient/+bug/1619726 + #r = OpenStack().run("%s -f json " % command) + json_result = misc.sh("openstack %s -f json" % subcommand) + r = json.loads(json_result) + if get: + return self.get_value(r, get) + return r + + def _create_volume(self, volume_name, size): + """ + Create a volume and return valume id + """ + volume_id = None + try: + volume_id = self._openstack("volume show %s" % volume_name, 'id') + except subprocess.CalledProcessError as e: + if 'No volume with a name or ID' not in e.output: + raise e + if volume_id: + log.warn("Volume {} already exists with ID {}; using it" + .format(volume_name, volume_id)) + volume_id = self._openstack( + "volume create %s" % config['openstack'].get('volume-create','') + + " --property ownedby=%s" % config['openstack']['ip'] + + " --size %s" % str(size) + ' ' + volume_name, 'id') + if volume_id: + log.info("Volume {} created with ID {}" + .format(volume_name, volume_id)) + return volume_id + else: + raise Exception("Failed to create volume %s" % volume_name) + + def _await_volume_status(self, volume_id, status='available'): + """ + Wait for volume to have status, like 'available' or 'in-use' + """ + with safe_while(sleep=4, tries=50, + action="volume " + volume_id) as proceed: + while proceed(): + try: + volume_status = \ + self._openstack("volume show %s" % volume_id, 'status') + if volume_status == status: + break + else: + log.debug("volume %s not in '%s' status yet" + % (volume_id, status)) + except subprocess.CalledProcessError: + log.warn("volume " + volume_id + + " not information available yet") + + def _attach_volume(self, volume_id, name): + """ + Attach volume to OpenStack instance. + + Try and attach volume to server, wait until volume gets in-use state. + """ + with safe_while(sleep=20, increment=20, tries=3, + action="add volume " + volume_id) as proceed: + while proceed(): + try: + misc.sh("openstack server add volume " + name + " " + volume_id) + break + except subprocess.CalledProcessError: + log.warning("openstack add volume failed unexpectedly; retrying") + self._await_volume_status(volume_id, 'in-use') + + def attach_volumes(self, server_name, volumes): """ Create and attach volumes to the named OpenStack instance. + If attachment is failed, make another try. """ for i in range(volumes['count']): - volume_name = name + '-' + str(i) - try: - self.run("volume show -f json " + volume_name) - except subprocess.CalledProcessError as e: - if 'No volume with a name or ID' not in e.output: - raise e - # do not use OpenStack().run because its - # bugous for volume create as of openstackclient 3.2.0 - # https://bugs.launchpad.net/python-openstackclient/+bug/1619726 - misc.sh( - "openstack volume create -f json " + - config['openstack'].get('volume-create', '') + " " + - " --property ownedby=" + config.openstack['ip'] + - " --size " + str(volumes['size']) + " " + - volume_name) - with safe_while(sleep=2, tries=100, + volume_name = server_name + '-' + str(i) + volume_id = None + with safe_while(sleep=10, tries=3, action="volume " + volume_name) as proceed: while proceed(): try: - r = OpenStack().run("volume show -f json " + - volume_name) - status = self.get_value(json.loads(r), 'status') - if status == 'available': - break - else: - log.info("volume " + volume_name + - " not available yet") - except subprocess.CalledProcessError: - log.info("volume " + volume_name + - " not information available yet") - # do not use OpenStack().run because its - # bugous for volume - with safe_while(sleep=20, increment=20, tries=10, - action="add volume " + volume_name) as proceed: - while proceed(): - try: - misc.sh("openstack server add volume " + name + " " + volume_name) + volume_id = self._create_volume(volume_name, volumes['size']) + self._await_volume_status(volume_id, 'available') + self._attach_volume(volume_id, server_name) break - except subprocess.CalledProcessError: - log.warning("openstack add volume failed unexpectedly; retrying") + except Exception as e: + log.warning("%s" % e) + if volume_id: + OpenStack().volume_delete(volume_id) @staticmethod def ip2name(prefix, ip):