]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
ceph-disk: workaround gperftool hang 9343/head
authorLoic Dachary <ldachary@redhat.com>
Thu, 26 May 2016 10:55:51 +0000 (12:55 +0200)
committerLoic Dachary <ldachary@redhat.com>
Wed, 1 Jun 2016 12:24:27 +0000 (14:24 +0200)
Temporary workaround: if ceph-osd --mkfs does not
complete within 5 minutes, assume it is blocked
because of https://github.com/gperftools/gperftools/issues/786

References http://tracker.ceph.com/issues/13522

Signed-off-by: Loic Dachary <loic@dachary.org>
src/ceph-disk/ceph_disk/main.py
src/ceph-disk/tests/ceph-disk.sh

index 777014f844482989d96ebc56916ddcdd7a683486..baa064d15de8256d37e3b388bddaa7bc6d116379 100755 (executable)
@@ -2642,6 +2642,36 @@ class PrepareBluestoreData(PrepareData):
         write_one_line(path, 'type', 'bluestore')
 
 
+#
+# Temporary workaround: if ceph-osd --mkfs does not
+# complete within 5 minutes, assume it is blocked
+# because of http://tracker.ceph.com/issues/13522
+# and retry a few times.
+#
+# Remove this function calls with command_check_call
+# when http://tracker.ceph.com/issues/13522 is fixed
+#
+def ceph_osd_mkfs(arguments):
+    timeout = _get_command_executable(['timeout'])
+    mkfs_ok = False
+    error = 'unknown error'
+    for delay in os.environ.get('CEPH_OSD_MKFS_DELAYS',
+                                '300 300 300 300 300').split():
+        try:
+            _check_output(timeout + [delay] + arguments)
+            mkfs_ok = True
+            break
+        except subprocess.CalledProcessError as e:
+            error = e.output
+            if e.returncode == 124:  # timeout fired, retry
+                LOG.debug('%s timed out : %s (retry)'
+                          % (str(arguments), error))
+            else:
+                break
+    if not mkfs_ok:
+        raise Error('%s failed : %s' % (str(arguments), error))
+
+
 def mkfs(
     path,
     cluster,
@@ -2663,7 +2693,7 @@ def mkfs(
     osd_type = read_one_line(path, 'type')
 
     if osd_type == 'bluestore':
-        command_check_call(
+        ceph_osd_mkfs(
             [
                 'ceph-osd',
                 '--cluster', cluster,
@@ -2679,7 +2709,7 @@ def mkfs(
             ],
         )
     else:
-        command_check_call(
+        ceph_osd_mkfs(
             [
                 'ceph-osd',
                 '--cluster', cluster,
index ce355713ea9a5e96ff6b90b36eac62bab7d6a037..ba0b4fd20dcf8ffeee64de8d693a3635cf61cd4d 100644 (file)
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
-# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+# Copyright (C) 2014, 2015, 2016 Red Hat <contact@redhat.com>
 #
 # Author: Loic Dachary <loic@dachary.org>
 #
@@ -153,7 +153,7 @@ function tweak_path() {
     command_fixture ceph-conf || return 1
     command_fixture ceph-osd || return 1
 
-    test_activate_dir
+    test_activate_dir || return 1
 
     [ ! -f $DIR/used-ceph-conf ] || return 1
     [ ! -f $DIR/used-ceph-osd ] || return 1
@@ -357,6 +357,31 @@ function test_keyring_path() {
     grep --quiet "keyring $DIR/bootstrap-osd/ceph.keyring" $DIR/test_keyring || return 1
 }
 
+# http://tracker.ceph.com/issues/13522
+function ceph_osd_fail_once_fixture() {
+    local command=ceph-osd
+    local fpath=`readlink -f $(which $command)`
+    [ "$fpath" = `readlink -f ../$command` ] || [ "$fpath" = `readlink -f $(pwd)/$command` ] || return 1
+
+    cat > $DIR/$command <<EOF
+#!/bin/bash
+if echo "\$@" | grep -e --mkfs && ! test -f $DIR/used-$command ; then
+   touch $DIR/used-$command
+   # sleep longer than the first CEPH_OSD_MKFS_DELAYS value (5) below
+   sleep 600
+else
+   exec ../$command "\$@"
+fi
+EOF
+    chmod +x $DIR/$command
+}
+
+function test_ceph_osd_mkfs() {
+    ceph_osd_fail_once_fixture || return 1
+    CEPH_OSD_MKFS_DELAYS='5 300 300' use_path test_activate_dir || return 1
+    [ -f $DIR/used-ceph-osd ] || return 1
+}
+
 function run() {
     local default_actions
     default_actions+="test_path "
@@ -369,6 +394,7 @@ function run() {
     default_actions+="test_mark_init "
     default_actions+="test_zap "
     default_actions+="test_activate_dir_bluestore "
+    default_actions+="test_ceph_osd_mkfs "
     local actions=${@:-$default_actions}
     local status
     for action in $actions  ; do