From 1e3f81486940ceddc6aa110dc878a2810538e4c6 Mon Sep 17 00:00:00 2001 From: Loic Dachary Date: Wed, 25 Feb 2015 15:32:50 +0100 Subject: [PATCH] mon: ignore crushtool validation if too long The crushtool is aborted if it takes more than mon lease seconds. Since the monitor blocks while running it, this is mandatory otherwise the monitor will be considered down and new elections triggered. http://tracker.ceph.com/issues/10947 Fixes: #10947 Signed-off-by: Loic Dachary --- qa/workunits/cephtool/test.sh | 19 +++++++++++++++++++ src/crush/CrushTester.cc | 11 ++++++++++- src/crush/CrushTester.h | 3 ++- src/mon/OSDMonitor.cc | 15 ++++++++++----- 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh index f47f1d8efbf3..776e0ecb60e1 100755 --- a/qa/workunits/cephtool/test.sh +++ b/qa/workunits/cephtool/test.sh @@ -1394,6 +1394,24 @@ function test_mon_tell() ceph_watch_wait 'mon.1 \[DBG\] from.*cmd=\[{"prefix": "version"}\]: dispatch' } +function test_mon_crushmap_validation() +{ + local map=$TMPDIR/map + ceph osd getcrushmap -o $map + # crushtool validation timesout and is ignored + cat > $TMPDIR/crushtool <&1 | grep 'took too long' + ceph tell mon.* injectargs --crushtool crushtool + # crushtool validation succeeds + ceph osd setcrushmap -i $map +} + # # New tests should be added to the TESTS array below # @@ -1428,6 +1446,7 @@ MON_TESTS+=" mon_osd_erasure_code" MON_TESTS+=" mon_osd_misc" MON_TESTS+=" mon_heap_profiler" MON_TESTS+=" mon_tell" +MON_TESTS+=" mon_crushmap_validation" OSD_TESTS+=" osd_bench" diff --git a/src/crush/CrushTester.cc b/src/crush/CrushTester.cc index 69dd5f846139..5ca4978c4c5c 100644 --- a/src/crush/CrushTester.cc +++ b/src/crush/CrushTester.cc @@ -1,4 +1,5 @@ +#include "include/stringify.h" #include "CrushTester.h" #include @@ -355,9 +356,13 @@ void CrushTester::write_integer_indexed_scalar_data_string(vector &dst, dst.push_back( data_buffer.str() ); } -int CrushTester::test_with_crushtool(const string& crushtool) +int CrushTester::test_with_crushtool(const string& crushtool, + int timeout) { + string timeout_string = stringify(timeout); vector cmd_args; + cmd_args.push_back("timeout"); + cmd_args.push_back(timeout_string.c_str()); cmd_args.push_back(crushtool.c_str()); cmd_args.push_back("-i"); cmd_args.push_back("-"); @@ -412,6 +417,10 @@ int CrushTester::test_with_crushtool(const string& crushtool) // major success! return 0; } + if (r == 124) { + // the test takes longer than timeout and was interrupted + return -EINTR; + } if (r == ENOENT) { err << "unable to find " << cmd_args << " to test the map"; diff --git a/src/crush/CrushTester.h b/src/crush/CrushTester.h index 740205946e57..09936ce459e3 100644 --- a/src/crush/CrushTester.h +++ b/src/crush/CrushTester.h @@ -334,7 +334,8 @@ public: } int test(); - int test_with_crushtool(const string& crushtool); + int test_with_crushtool(const string& crushtool, + int timeout); }; #endif diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 77dd265e9ff1..9112ecde6600 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -4526,12 +4526,17 @@ bool OSDMonitor::prepare_command_impl(MMonCommand *m, dout(10) << " testing map" << dendl; stringstream ess; CrushTester tester(crush, ess); - int r = tester.test_with_crushtool(g_conf->crushtool); + int r = tester.test_with_crushtool(g_conf->crushtool, + g_conf->mon_lease); if (r < 0) { - derr << "error on crush map: " << ess.str() << dendl; - ss << "Failed to parse crushmap: " << ess.str(); - err = r; - goto reply; + if (r == -EINTR) { + ss << "(note: crushtool tests not run because they took too long) "; + } else { + derr << "error on crush map: " << ess.str() << dendl; + ss << "Failed to parse crushmap: " << ess.str(); + err = r; + goto reply; + } } dout(10) << " result " << ess.str() << dendl; -- 2.47.3