]> git-server-git.apps.pok.os.sepia.ceph.com Git - teuthology.git/commitdiff
openstack: retry if volume create+attach operation fails
authorKyr Shatskyy <kyrylo.shatskyy@gmail.com>
Thu, 18 Oct 2018 00:41:41 +0000 (02:41 +0200)
committerKyr Shatskyy <kyrylo.shatskyy@suse.com>
Mon, 14 Oct 2019 14:47:01 +0000 (16:47 +0200)
Once in awhile, a created volume fails to become available within
a reasonable period of time. When this happens, the entire test job
fails with the following error:

2018-10-02T06:52:44.106 ERROR:teuthology.provision.openstack:'volume
target192168000058-2' reached maximum tries (100) after waiting for 200
seconds

With this commit, we catch that exception and retry with a new volume.

Signed-off-by: Nathan Cutler <ncutler@suse.com>
Signed-off-by: Kyrylo Shatskyy <kyrylo.shatskyy@suse.com>
Conflicts:
teuthology/provision/openstack.py

teuthology/openstack/__init__.py
teuthology/provision/openstack.py

index 4d865fb4775d04f279983058e098d119f1cc6963..707d8220515d209551e829c5eda30b64e9198a5a 100644 (file)
@@ -190,8 +190,7 @@ class OpenStackInstance(object):
         OpenStack().run("server delete --wait " + self['id'] +
                         " || true")
         for volume in volumes:
-            OpenStack().run("volume set --name REMOVE-ME " + volume + " || true")
-            OpenStack().run("volume delete " + volume + " || true")
+            OpenStack().volume_delete(volume)
         return True
 
 
@@ -663,6 +662,10 @@ class OpenStack(object):
     def get_default_arch(self):
         return self.get_available_archs()[0]
 
+    def volume_delete(self, name_or_id):
+        self.run("volume set --name REMOVE-ME " + name_or_id + " || true")
+        self.run("volume delete " + name_or_id + " || true")
+
 
 class TeuthologyOpenStack(OpenStack):
 
index eeef9a3972eb024d405d0d73240a8fbbdce589f4..1d1812cf01b231426431d4c83e37253ec224aa2d 100644 (file)
@@ -55,51 +55,96 @@ class ProvisionOpenStack(OpenStack):
             lab_domain=config.lab_domain)
         open(self.user_data, 'w').write(user_data)
 
-    def attach_volumes(self, name, volumes):
+    def _openstack(self, subcommand, get=None):
+        # do not use OpenStack().run because its
+        # bugous for volume create as of openstackclient 3.2.0
+        # https://bugs.launchpad.net/python-openstackclient/+bug/1619726
+        #r = OpenStack().run("%s -f json " % command)
+        json_result = misc.sh("openstack %s -f json" % subcommand)
+        r = json.loads(json_result)
+        if get:
+            return self.get_value(r, get)
+        return r
+
+    def _create_volume(self, volume_name, size):
+        """
+        Create a volume and return valume id
+        """
+        volume_id = None
+        try:
+            volume_id = self._openstack("volume show %s" % volume_name, 'id')
+        except subprocess.CalledProcessError as e:
+            if 'No volume with a name or ID' not in e.output:
+                raise e
+        if volume_id:
+            log.warn("Volume {} already exists with ID {}; using it"
+                     .format(volume_name, volume_id))
+        volume_id = self._openstack(
+            "volume create %s" % config['openstack'].get('volume-create','')
+            + " --property ownedby=%s" % config['openstack']['ip']
+            + " --size %s" % str(size) + ' ' + volume_name, 'id')
+        if volume_id:
+            log.info("Volume {} created with ID {}"
+                     .format(volume_name, volume_id))
+            return volume_id
+        else:
+            raise Exception("Failed to create volume %s" % volume_name)
+
+    def _await_volume_status(self, volume_id, status='available'):
+        """
+        Wait for volume to have status, like 'available' or 'in-use'
+        """
+        with safe_while(sleep=4, tries=50,
+                        action="volume " + volume_id) as proceed:
+            while proceed():
+                try:
+                    volume_status = \
+                        self._openstack("volume show %s" % volume_id, 'status')
+                    if volume_status == status:
+                        break
+                    else:
+                        log.debug("volume %s not in '%s' status yet"
+                                  % (volume_id, status))
+                except subprocess.CalledProcessError:
+                        log.warn("volume " + volume_id +
+                                 " not information available yet")
+
+    def _attach_volume(self, volume_id, name):
+        """
+        Attach volume to OpenStack instance.
+
+        Try and attach volume to server, wait until volume gets in-use state.
+        """
+        with safe_while(sleep=20, increment=20, tries=3,
+                        action="add volume " + volume_id) as proceed:
+            while proceed():
+                try:
+                    misc.sh("openstack server add volume " + name + " " + volume_id)
+                    break
+                except subprocess.CalledProcessError:
+                    log.warning("openstack add volume failed unexpectedly; retrying")
+        self._await_volume_status(volume_id, 'in-use')
+
+    def attach_volumes(self, server_name, volumes):
         """
         Create and attach volumes to the named OpenStack instance.
+        If attachment is failed, make another try.
         """
         for i in range(volumes['count']):
-            volume_name = name + '-' + str(i)
-            try:
-                self.run("volume show -f json " + volume_name)
-            except subprocess.CalledProcessError as e:
-                if 'No volume with a name or ID' not in e.output:
-                    raise e
-                # do not use OpenStack().run because its
-                # bugous for volume create as of openstackclient 3.2.0
-                # https://bugs.launchpad.net/python-openstackclient/+bug/1619726
-                misc.sh(
-                    "openstack volume create -f json " +
-                    config['openstack'].get('volume-create', '') + " " +
-                    " --property ownedby=" + config.openstack['ip'] +
-                    " --size " + str(volumes['size']) + " " +
-                    volume_name)
-            with safe_while(sleep=2, tries=100,
+            volume_name = server_name + '-' + str(i)
+            volume_id = None
+            with safe_while(sleep=10, tries=3,
                             action="volume " + volume_name) as proceed:
                 while proceed():
                     try:
-                        r = OpenStack().run("volume show  -f json " +
-                                            volume_name)
-                        status = self.get_value(json.loads(r), 'status')
-                        if status == 'available':
-                            break
-                        else:
-                            log.info("volume " + volume_name +
-                                     " not available yet")
-                    except subprocess.CalledProcessError:
-                            log.info("volume " + volume_name +
-                                     " not information available yet")
-            # do not use OpenStack().run because its
-            # bugous for volume
-            with safe_while(sleep=20, increment=20, tries=10,
-                            action="add volume " + volume_name) as proceed:
-                while proceed():
-                    try:
-                        misc.sh("openstack server add volume " + name + " " + volume_name)
+                        volume_id = self._create_volume(volume_name, volumes['size'])
+                        self._await_volume_status(volume_id, 'available')
+                        self._attach_volume(volume_id, server_name)
                         break
-                    except subprocess.CalledProcessError:
-                        log.warning("openstack add volume failed unexpectedly; retrying")
+                    except Exception as e:
+                        log.warning("%s" % e)
+                        if volume_id:
+                            OpenStack().volume_delete(volume_id)
 
     @staticmethod
     def ip2name(prefix, ip):