From eaa1179965c840c0f935c570e146395583f198b6 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Fri, 4 May 2018 16:11:53 +0800 Subject: [PATCH] librados: timeout on mgr_command() because the mgrclient will be waiting for the mgrmap if the mgrmap is not available, when the client is about to send a mgr command. and monitor will drop the subscription requests if the client does not have enough cap for reading mon, so unlike mon commands, the client won't get an EACCES return code in this case. in this change, a timeout machinary is introduced. and the client will wait for "rados-mon-op-timeout" before it gives up. if this setting is 0, it will wait forever. Fixes: https://tracker.ceph.com/issues/23627 Signed-off-by: Kefu Chai --- qa/workunits/mon/caps.sh | 16 ++++++++++++++-- src/ceph.in | 8 ++++++-- src/common/Cond.h | 9 +++++++++ src/librados/RadosClient.cc | 6 +++++- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/qa/workunits/mon/caps.sh b/qa/workunits/mon/caps.sh index 488fcec96ede0..3951e9976c338 100755 --- a/qa/workunits/mon/caps.sh +++ b/qa/workunits/mon/caps.sh @@ -8,6 +8,18 @@ exit_on_error=1 [[ ! -z $TEST_EXIT_ON_ERROR ]] && exit_on_error=$TEST_EXIT_ON_ERROR +if [ `uname` = FreeBSD ]; then + ETIMEDOUT=60 +else + ETIMEDOUT=110 +fi + +# monitor drops the subscribe message from client if it does not have enough caps +# for read from mon. in that case, the client will be waiting for mgrmap in vain, +# if it is instructed to send a command to mgr. "pg dump" is served by mgr. so, +# we need to set a timeout for testing this scenario. +export CEPH_ARGS='--rados-mon-op-timeout=5' + expect() { cmd=$1 @@ -37,7 +49,7 @@ expect "ceph -k $tmp.foo.keyring --user foo auth ls" 0 expect "ceph -k $tmp.foo.keyring --user foo auth export" 13 expect "ceph -k $tmp.foo.keyring --user foo auth del client.bazar" 13 expect "ceph -k $tmp.foo.keyring --user foo osd dump" 13 -expect "ceph -k $tmp.foo.keyring --user foo pg dump" 13 +expect "ceph -k $tmp.foo.keyring --user foo pg dump" $ETIMEDOUT expect "ceph -k $tmp.foo.keyring --user foo quorum_status" 13 ceph auth del client.foo @@ -48,7 +60,7 @@ expect "ceph -k $tmp.bar.keyring --user bar auth ls" 13 expect "ceph -k $tmp.bar.keyring --user bar auth export" 13 expect "ceph -k $tmp.bar.keyring --user bar auth del client.foo" 13 expect "ceph -k $tmp.bar.keyring --user bar osd dump" 13 -expect "ceph -k $tmp.bar.keyring --user bar pg dump" 13 +expect "ceph -k $tmp.bar.keyring --user bar pg dump" $ETIMEDOUT expect "ceph -k $tmp.bar.keyring --user bar quorum_status" 13 ceph auth del client.bar diff --git a/src/ceph.in b/src/ceph.in index 82ee08e39381f..c37ce6d846b18 100755 --- a/src/ceph.in +++ b/src/ceph.in @@ -134,7 +134,7 @@ import subprocess from ceph_argparse import \ concise_sig, descsort_key, parse_json_funcsigs, \ matchnum, validate_command, find_cmd_target, \ - send_command, json_command, run_in_thread + json_command, run_in_thread from ceph_daemon import admin_socket, DaemonWatcher, Termsize @@ -566,7 +566,11 @@ def do_command(parsed_args, target, cmdargs, sigdict, inbuf, verbose): except KeyboardInterrupt: print('Interrupted') return ret, '', '' - + if ret == errno.ETIMEDOUT: + ret = -ret + if not outs: + outs = ("Connection timed out. Please check the client's " + + "permission and connection.") return ret, outbuf, outs diff --git a/src/common/Cond.h b/src/common/Cond.h index 520a1efeb5799..524f23cb603f0 100644 --- a/src/common/Cond.h +++ b/src/common/Cond.h @@ -195,6 +195,15 @@ public: cond.Wait(lock); return rval; } + + /// Wait until the \c secs expires or \c complete() is called + int wait_for(double secs) { + utime_t interval; + interval.set_from_double(secs); + Mutex::Locker l{lock}; + cond.WaitInterval(lock, interval); + return done ? rval : ETIMEDOUT; + } }; #endif diff --git a/src/librados/RadosClient.cc b/src/librados/RadosClient.cc index 95d119cc04ba2..d77517a0f5ca8 100644 --- a/src/librados/RadosClient.cc +++ b/src/librados/RadosClient.cc @@ -866,7 +866,11 @@ int librados::RadosClient::mgr_command(const vector& cmd, return r; lock.Unlock(); - r = cond.wait(); + if (conf->rados_mon_op_timeout) { + r = cond.wait_for(conf->rados_mon_op_timeout); + } else { + r = cond.wait(); + } lock.Lock(); return r; -- 2.39.5