]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
ceph-disk: workaround gperftool hang 9427/head
authorLoic Dachary <ldachary@redhat.com>
Thu, 26 May 2016 10:55:51 +0000 (12:55 +0200)
committerNathan Cutler <ncutler@suse.com>
Wed, 1 Jun 2016 18:18:50 +0000 (20:18 +0200)
Temporary workaround: if ceph-osd --mkfs does not
complete within 5 minutes, assume it is blocked
because of https://github.com/gperftools/gperftools/issues/786

References http://tracker.ceph.com/issues/13522

Signed-off-by: Loic Dachary <loic@dachary.org>
(cherry picked from commit c092321c24b9ca493d90a9ccc8f0b4b9a38677f1)

src/ceph-disk/ceph_disk/main.py
src/ceph-disk/tests/ceph-disk.sh

index 45e9bb2163a37117278ddf4902d6eee0dae6efdb..99558fb26a0aeca358cb82189193e90cad2acb1f 100755 (executable)
@@ -2642,6 +2642,36 @@ class PrepareBluestoreData(PrepareData):
         write_one_line(path, 'type', 'bluestore')
 
 
+#
+# Temporary workaround: if ceph-osd --mkfs does not
+# complete within 5 minutes, assume it is blocked
+# because of http://tracker.ceph.com/issues/13522
+# and retry a few times.
+#
+# Remove this function calls with command_check_call
+# when http://tracker.ceph.com/issues/13522 is fixed
+#
+def ceph_osd_mkfs(arguments):
+    timeout = _get_command_executable(['timeout'])
+    mkfs_ok = False
+    error = 'unknown error'
+    for delay in os.environ.get('CEPH_OSD_MKFS_DELAYS',
+                                '300 300 300 300 300').split():
+        try:
+            _check_output(timeout + [delay] + arguments)
+            mkfs_ok = True
+            break
+        except subprocess.CalledProcessError as e:
+            error = e.output
+            if e.returncode == 124:  # timeout fired, retry
+                LOG.debug('%s timed out : %s (retry)'
+                          % (str(arguments), error))
+            else:
+                break
+    if not mkfs_ok:
+        raise Error('%s failed : %s' % (str(arguments), error))
+
+
 def mkfs(
     path,
     cluster,
@@ -2663,7 +2693,7 @@ def mkfs(
     osd_type = read_one_line(path, 'type')
 
     if osd_type == 'bluestore':
-        command_check_call(
+        ceph_osd_mkfs(
             [
                 'ceph-osd',
                 '--cluster', cluster,
@@ -2679,7 +2709,7 @@ def mkfs(
             ],
         )
     else:
-        command_check_call(
+        ceph_osd_mkfs(
             [
                 'ceph-osd',
                 '--cluster', cluster,
index 74bc46b6ebf0636314ffeda8dfe07692e03810df..97e18701b596c17e5ca9a44b4c36a580ca0283ef 100644 (file)
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
-# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+# Copyright (C) 2014, 2015, 2016 Red Hat <contact@redhat.com>
 #
 # Author: Loic Dachary <loic@dachary.org>
 #
@@ -153,7 +153,7 @@ function tweak_path() {
     command_fixture ceph-conf || return 1
     command_fixture ceph-osd || return 1
 
-    test_activate_dir
+    test_activate_dir || return 1
 
     [ ! -f $DIR/used-ceph-conf ] || return 1
     [ ! -f $DIR/used-ceph-osd ] || return 1
@@ -357,6 +357,31 @@ function test_keyring_path() {
     grep --quiet "keyring $DIR/bootstrap-osd/ceph.keyring" $DIR/test_keyring || return 1
 }
 
+# http://tracker.ceph.com/issues/13522
+function ceph_osd_fail_once_fixture() {
+    local command=ceph-osd
+    local fpath=`readlink -f $(which $command)`
+    [ "$fpath" = `readlink -f ../$command` ] || [ "$fpath" = `readlink -f $(pwd)/$command` ] || return 1
+
+    cat > $DIR/$command <<EOF
+#!/bin/bash
+if echo "\$@" | grep -e --mkfs && ! test -f $DIR/used-$command ; then
+   touch $DIR/used-$command
+   # sleep longer than the first CEPH_OSD_MKFS_DELAYS value (5) below
+   sleep 600
+else
+   exec ../$command "\$@"
+fi
+EOF
+    chmod +x $DIR/$command
+}
+
+function test_ceph_osd_mkfs() {
+    ceph_osd_fail_once_fixture || return 1
+    CEPH_OSD_MKFS_DELAYS='5 300 300' use_path test_activate_dir || return 1
+    [ -f $DIR/used-ceph-osd ] || return 1
+}
+
 function run() {
     local default_actions
     default_actions+="test_path "
@@ -369,6 +394,7 @@ function run() {
     default_actions+="test_mark_init "
     default_actions+="test_zap "
     default_actions+="test_activate_dir_bluestore "
+    default_actions+="test_ceph_osd_mkfs "
     local actions=${@:-$default_actions}
     local status
     for action in $actions  ; do