From 1f93c9f22403eb4ffa72e10dde1587a5641fa12a Mon Sep 17 00:00:00 2001 From: Laura Flores Date: Fri, 12 Sep 2025 20:14:30 +0000 Subject: [PATCH] mgr, qa: clarify module checks in DaemonServer The current check groups modules not being enabled with failing to initialize. In this commit, we reorder the checks: 1: Screen for a module being enabled. If it's not, issue an EOPNOTSUPP with instructions on how to enable it. 2. Screen for if a module is active. If a module is enabled, then the cluster expects it to be active to support commands. If the module took too long to initialize though, we will catch this and issue an ETIMEDOUT error with a link for troubleshooting. Now, these two separate issues are not grouped together, and they are checked in the right order. Fixes: https://tracker.ceph.com/issues/71631 Signed-off-by: Laura Flores (cherry picked from commit fdc072f15da7ec4c918a1ebff439f6ce4922f33f) --- .../mgr/test_mgr_module_loading_time.sh | 2 +- src/mgr/DaemonServer.cc | 36 +++++++++---------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/qa/workunits/mgr/test_mgr_module_loading_time.sh b/qa/workunits/mgr/test_mgr_module_loading_time.sh index 0f7cccf25155..9901e73ec29c 100755 --- a/qa/workunits/mgr/test_mgr_module_loading_time.sh +++ b/qa/workunits/mgr/test_mgr_module_loading_time.sh @@ -136,7 +136,7 @@ echo "Test 3: Inject large delay (10000000000 ms) that exceeds max loading expir orch_status_output=$("$ceph" orch status 2>&1) echo "$orch_status_output" -if [[ "$orch_status_output" == *"Error ENOTSUP: Module 'orchestrator' is not enabled/loaded"* ]]; then +if [[ "$orch_status_output" == *"Error ETIMEDOUT: Module 'orchestrator' did not initialize in time"* ]]; then echo "PASS: orch command failed during large delay as expected." else echo "FAIL: Unexpected error in orch command during large delay." diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 2b1fa32e0bfd..6ee6a92b177d 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -3047,14 +3047,27 @@ bool DaemonServer::_handle_command( return true; } + // Validate that the module is enabled + auto& py_handler_name = py_command.module_name; + PyModuleRef module = py_modules.get_module(py_handler_name); + ceph_assert(module); + if (!module->is_enabled()) { + ss << "Module '" << py_handler_name << "' is not enabled (required by " + "command '" << prefix << "'): use `ceph mgr module enable " + << py_handler_name << "` to enable it"; + dout(4) << ss.str() << dendl; + cmdctx->reply(-EOPNOTSUPP, ss); + return true; + } + // Validate that the module is active auto& mod_name = py_command.module_name; if (!py_modules.is_module_active(mod_name)) { - ss << "Module '" << mod_name << "' is not enabled/loaded (required by " - "command '" << prefix << "'): use `ceph mgr module enable " - << mod_name << "` to enable it"; + ss << "Module '" << mod_name << "' did not initialize in time (required by " + "command '" << prefix << "'): see https://docs.ceph.com/en/latest/rados/operations/health-checks/#mgr-module-error " + "for troubleshooting tips."; dout(4) << ss.str() << dendl; - cmdctx->reply(-EOPNOTSUPP, ss); + cmdctx->reply(-ETIMEDOUT, ss); return true; } @@ -3063,25 +3076,12 @@ bool DaemonServer::_handle_command( dout(10) << "passing through command '" << prefix << "' size " << cmdctx->cmdmap.size() << dendl; Finisher& mod_finisher = py_modules.get_active_module_finisher(mod_name); - mod_finisher.queue(new LambdaContext([this, cmdctx, session, py_command, prefix, op] + mod_finisher.queue(new LambdaContext([this, cmdctx, session, py_command, prefix, op, py_handler_name, module] (int r_) mutable { std::stringstream ss; dout(10) << "dispatching command '" << prefix << "' size " << cmdctx->cmdmap.size() << dendl; - // Validate that the module is enabled - auto& py_handler_name = py_command.module_name; - PyModuleRef module = py_modules.get_module(py_handler_name); - ceph_assert(module); - if (!module->is_enabled()) { - ss << "Module '" << py_handler_name << "' is not enabled (required by " - "command '" << prefix << "'): use `ceph mgr module enable " - << py_handler_name << "` to enable it"; - dout(4) << ss.str() << dendl; - cmdctx->reply(-EOPNOTSUPP, ss); - return; - } - // Hack: allow the self-test method to run on unhealthy modules. // Fix this in future by creating a special path for self test rather // than having the hook be a normal module command. -- 2.47.3