From: Laura Flores Date: Thu, 11 Sep 2025 22:13:51 +0000 (+0000) Subject: mgr, qa: add `pending_modules` to asock command X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e57bd6c2df3a65e813a524714ea16e6cd593d2e4;p=ceph.git mgr, qa: add `pending_modules` to asock command Now, the command `ceph tell mgr mgr_status` will show a "pending_modules" field. This is another way for Ceph operators to check which modules haven't been initalized yet (in addition to the health error). This command was also added to testing scenarios in the workunit. Fixes: https://tracker.ceph.com/issues/71631 Signed-off-by: Laura Flores (cherry picked from commit 68221661b00f8a6bef0fbd7b5401aa49eb5118d0) --- diff --git a/qa/suites/rados/mgr/tasks/4-units/mgr_module_loading_time.yaml b/qa/suites/rados/mgr/tasks/4-units/mgr_module_loading_time.yaml index bf80f1f4cdc1..2a56ff9a2838 100644 --- a/qa/suites/rados/mgr/tasks/4-units/mgr_module_loading_time.yaml +++ b/qa/suites/rados/mgr/tasks/4-units/mgr_module_loading_time.yaml @@ -1,11 +1,11 @@ overrides: ceph: log-ignorelist: - - \(CEPHADM_STRAY_DAEMON\) - - \(CEPHADM_STRAY_HOST\) - - \(MGR_DOWN\) + - CEPHADM_STRAY_DAEMON + - CEPHADM_STRAY_HOST + - MGR_DOWN - evicting unresponsive client - + - MGR_MODULE_ERROR tasks: - workunit: clients: diff --git a/qa/workunits/mgr/test_mgr_module_loading_time.sh b/qa/workunits/mgr/test_mgr_module_loading_time.sh index dd81a5276078..0f7cccf25155 100755 --- a/qa/workunits/mgr/test_mgr_module_loading_time.sh +++ b/qa/workunits/mgr/test_mgr_module_loading_time.sh @@ -66,11 +66,23 @@ if [[ "$stat" != *"active, since"* ]]; then exit 1 fi +echo "Check mgr_status to ensure 'pending_modules' is empty..." +expected='[]' +mgr_status=$("$ceph" tell mgr mgr_status | jq -c '.pending_modules') +if [[ "$mgr_status" == "$expected" ]]; then + echo "PASS: No modules are pending." +else + echo "FAIL: Some modules are pending when they shouldn't be." + echo "Expected: $expected" + echo "Actual: $mgr_status" + exit 1 +fi + # ------ Test 2 ------ echo "Select balancer module to receive loading delays..." "$ceph" config set mgr mgr_module_load_delay_name balancer -echo "Test 2: Inject small delay (10000 ms) that should not exceed max loading retries" +echo "Test 2: Inject small delay (10000 ms) that should not exceed max loading expiration" "$ceph" config set mgr mgr_module_load_delay 10000 "$ceph" mgr fail @@ -104,8 +116,20 @@ if [[ "$stat" != *"active, since"* ]]; then exit 1 fi +echo "Check mgr_status to ensure 'pending_modules' is empty..." +expected='[]' +mgr_status=$("$ceph" tell mgr mgr_status | jq -c '.pending_modules') +if [[ "$mgr_status" == "$expected" ]]; then + echo "PASS: No modules are pending." +else + echo "FAIL: Some modules are pending when there shouldn't be." + echo "Expected: $expected" + echo "Actual: $mgr_status" + exit 1 +fi + # ------ Test 3 ------ -echo "Test 3: Inject large delay (10000000000 ms) that exceeds max loading retries and emits cluster error" +echo "Test 3: Inject large delay (10000000000 ms) that exceeds max loading expiration and emits cluster error" "$ceph" config set mgr mgr_module_load_delay 10000000000 "$ceph" mgr fail @@ -139,6 +163,18 @@ if [[ "$stat" != *"active, since"* ]]; then exit 1 fi +echo "Check mgr_status to ensure 'pending_modules' is populated with modules we expect..." +expected='["balancer","cephadm","crash","devicehealth","iostat","nfs","orchestrator","pg_autoscaler","progress","rbd_support","status","telemetry","volumes"]' +mgr_status=$("$ceph" tell mgr mgr_status | jq -c '.pending_modules') +if [[ "$mgr_status" == "$expected" ]]; then + echo "PASS: Expected modules are pending." +else + echo "FAIL: Expected output does not match actual." + echo "Expected: $expected" + echo "Actual: $mgr_status" + exit 1 +fi + # ----- Test 4 ----- echo "Test 4: Disable the problematic module and confirm that the health error goes away" @@ -164,4 +200,22 @@ if [[ "$stat" != *"active, since"* ]]; then exit 1 fi +echo "Check mgr_status to ensure 'pending_modules' is empty..." +expected='[]' +mgr_status=$("$ceph" tell mgr mgr_status | jq -c '.pending_modules') +if [[ "$mgr_status" == "$expected" ]]; then + echo "PASS: No modules are pending." +else + echo "FAIL: Some modules are pending when there shouldn't be." + echo "Expected: $expected" + echo "Actual: $mgr_status" + exit 1 +fi + +echo "Re-enabling the balancer module..." +"$ceph" mgr module enable balancer + +# Give the health error a bit of time to clear +sleep 10 + echo "All tests passed." diff --git a/src/mgr/ActivePyModules.h b/src/mgr/ActivePyModules.h index f17113390382..62d0b18cf19e 100644 --- a/src/mgr/ActivePyModules.h +++ b/src/mgr/ActivePyModules.h @@ -223,6 +223,11 @@ public: bool is_pending(std::string_view name) const { return pending_modules.count(name) > 0; } + + // Return set of active modules where class instances are not yet created + const std::set>& get_pending_modules() const { + return pending_modules; + } bool module_exists(const std::string &name) const { return modules.count(name) > 0; diff --git a/src/mgr/Mgr.cc b/src/mgr/Mgr.cc index f848fd4ba4dc..7f281bd4c70a 100644 --- a/src/mgr/Mgr.cc +++ b/src/mgr/Mgr.cc @@ -828,12 +828,19 @@ int Mgr::call( try { if (admin_command == "mgr_status") { f->open_object_section("mgr_status"); - cluster_state.with_mgrmap( - [f](const MgrMap& mm) { - f->dump_unsigned("mgrmap_epoch", mm.get_epoch()); - }); - f->dump_bool("initialized", initialized); + { + cluster_state.with_mgrmap( + [f](const MgrMap& mm) { + f->dump_unsigned("mgrmap_epoch", mm.get_epoch()); + }); + f->dump_bool("initialized", initialized); + f->open_array_section("pending_modules"); + for (auto& mod : py_module_registry->get_pending_modules()) { + f->dump_string("module", mod); + } + f->close_section(); f->close_section(); + } return 0; } else { return -ENOSYS; diff --git a/src/mgr/PyModuleRegistry.h b/src/mgr/PyModuleRegistry.h index ba5a61d01b2b..66cfb728c142 100644 --- a/src/mgr/PyModuleRegistry.h +++ b/src/mgr/PyModuleRegistry.h @@ -245,5 +245,11 @@ public: // See "Mgr::background_init()". void check_all_modules_started(Context *modules_start_complete); + // Return set of active modules where class instances are not yet created. + // Protected by const; we only want to view the contents- not modify anything. + const std::set>& get_pending_modules() const { + return active_modules->get_pending_modules(); + } + // <<< (end of ActivePyModules cheeky call-throughs) };