From: Rotem Shapira Date: Wed, 18 Feb 2026 13:51:45 +0000 (+0000) Subject: rgw/lua: create fresh VM for each background script execution X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=52d46b496cb7896a79bfde15e21af6f63a0e7026;p=ceph.git rgw/lua: create fresh VM for each background script execution Previously, the background thread reused the same Lua VM across iterations, causing stale state to persist. This made operations like 'pairs(RGW)' fail to iterate properly. Now we create a fresh VM on each iteration, which: - Fixes the iteration bug - Simplifies the code (no need to update limits on existing VM) - Ensures clean state for each script execution Verified with unit tests: - TableIterateBackground - TableIterateBackgroundBreak - TableIterateStepByStep Fixes: https://tracker.ceph.com/issues/74839 Signed-off-by: Rotem Shapira --- diff --git a/src/rgw/rgw_lua_background.cc b/src/rgw/rgw_lua_background.cc index 0d0f34c91b6..bd01fc35d96 100644 --- a/src/rgw/rgw_lua_background.cc +++ b/src/rgw/rgw_lua_background.cc @@ -156,10 +156,6 @@ const BackgroundMapValue& Background::get_table_value(const std::string& key) co void Background::run() { ceph_pthread_setname("lua_background"); const DoutPrefixProvider* const dpp = &dp; - std::unique_ptr lguard = initialize_lguard_state(); - if (!lguard) { - return; - } while (!stopped) { if (paused) { @@ -172,20 +168,10 @@ void Background::run() { } ldpp_dout(dpp, 10) << "Lua background thread resumed" << dendl; } - const std::size_t max_memory = cct->_conf->rgw_lua_max_memory_per_state; - const std::uint64_t max_runtime = cct->_conf->rgw_lua_max_runtime_per_state; - auto res = lguard->set_max_memory(max_memory); - if (!res) { - ldpp_dout(dpp, 10) - << "Lua state required reset due to memory limit change" << dendl; - lguard = initialize_lguard_state(); - if (!lguard) { - return; - } - ldpp_dout(dpp, 10) << "Lua state restarted successfully." << dendl; + std::unique_ptr lguard = initialize_lguard_state(); + if (!lguard) { + return; } - lguard->set_max_runtime(max_runtime); - lguard->reset_start_time(); const auto rc = read_script(); if (rc == -ENOENT || rc == -EAGAIN) { // either no script or paused, nothing to do diff --git a/src/rgw/rgw_lua_utils.cc b/src/rgw/rgw_lua_utils.cc index 4abed330a9f..0b73e6aa14a 100644 --- a/src/rgw/rgw_lua_utils.cc +++ b/src/rgw/rgw_lua_utils.cc @@ -182,54 +182,10 @@ lua_state_guard::~lua_state_guard() { } } -bool lua_state_guard::set_max_memory(std::size_t _max_memory) { - if (_max_memory == max_memory) { - return true; - } - ldpp_dout(dpp, 20) << "Lua is using: " << mem_in_use << " bytes (" - << std::to_string(100.0 - - (100.0 * mem_in_use / max_memory)) - << "%)" << dendl; - - if (mem_in_use > _max_memory && _max_memory > 0) { - max_memory = _max_memory; - ldpp_dout(dpp, 10) << "Lua memory limit is below current usage" << dendl; - ldpp_dout(dpp, 20) << "Lua memory limit set to: " << max_memory << " bytes" - << dendl; - return false; - } - - max_memory = _max_memory; - ldpp_dout(dpp, 20) << "Lua memory limit set to: " - << (max_memory > 0 ? std::to_string(max_memory) : "N/A") - << " bytes" << dendl; - return true; -} - void lua_state_guard::set_mem_in_use(std::size_t _mem_in_use) { mem_in_use = _mem_in_use; } -void lua_state_guard::set_max_runtime(std::uint64_t _max_runtime) { - if (static_cast(max_runtime.count()) != _max_runtime) { - if (_max_runtime > 0) { - auto omax_runtime = max_runtime.count(); - max_runtime = std::chrono::milliseconds(_max_runtime); - if (omax_runtime == 0) { - set_runtime_hook(); - } - } else { - max_runtime = std::chrono::milliseconds(0); - lua_sethook(state, nullptr, 0, 0); - } - ldpp_dout(dpp, 20) << "Lua runtime limit set to: " - << (max_runtime.count() > 0 - ? std::to_string(max_runtime.count()) - : "N/A") - << " milliseconds" << dendl; - } -} - void lua_state_guard::runtime_hook(lua_State* L, lua_Debug* ar) { auto now = ceph::real_clock::now(); lua_getfield(L, LUA_REGISTRYINDEX, max_runtime_key); diff --git a/src/rgw/rgw_lua_utils.h b/src/rgw/rgw_lua_utils.h index 2a8efe39e38..9262ea7e0a2 100644 --- a/src/rgw/rgw_lua_utils.h +++ b/src/rgw/rgw_lua_utils.h @@ -84,14 +84,10 @@ class lua_state_guard { const DoutPrefixProvider* _dpp); ~lua_state_guard(); lua_State* get() { return state; } - void reset_start_time() { start_time = ceph::real_clock::now(); } std::size_t get_max_memory() const { return max_memory; } std::size_t get_mem_in_use() const { return mem_in_use; } - std::chrono::milliseconds get_max_runtime() const { return max_runtime; } - bool set_max_memory(std::size_t _max_memory); void set_mem_in_use(std::size_t _mem_in_use); - void set_max_runtime(std::uint64_t _max_runtime); }; int dostring(lua_State* L, const char* str); diff --git a/src/test/rgw/test_rgw_lua.cc b/src/test/rgw/test_rgw_lua.cc index 5160acc786c..f5ce8be2bf8 100644 --- a/src/test/rgw/test_rgw_lua.cc +++ b/src/test/rgw/test_rgw_lua.cc @@ -1447,6 +1447,146 @@ TEST(TestRGWLuaBackground, TableIncrementError) ASSERT_NE(rc, 0); } +// Regular background iteration test +TEST(TestRGWLuaBackground, TableIterateBackground) +{ + DEFINE_REQ_STATE; + + // Script counts elements in RGW table, excluding its own "count" key + const std::string background_script = R"( + local count = 0 + for k, v in pairs(RGW) do + if tostring(k) ~= "count" then + count = count + 1 + end + end + RGW["count"] = count + )"; + + // Use the helper function to set the background script + set_script(pe.lua.manager.get(), background_script); + + TestBackground lua_background(pe.lua.manager.get()); + pe.lua.background = &lua_background; + lua_background.start(); + + // Wait 1s to ensure background thread is initialized before injecting data + std::this_thread::sleep_for(std::chrono::seconds(1)); + + // Inject multiple data types via request context to populate the shared RGW table + const std::string request_script = R"( + RGW["key1"] = "string_value" + RGW["key2"] = 100 + RGW["key3"] = false + )"; + + // Inject data into the shared table via a request script + const auto rc = lua::request::execute(nullptr, nullptr, &s, nullptr, request_script); + ASSERT_EQ(rc, 0); + + // Wait 6s to allow at least one full background iteration (interval is 5s) + // This verifies that each new VM instance can correctly iterate over the shared RGW table. + std::this_thread::sleep_for(std::chrono::seconds(6)); + + // Verify that all data types were preserved and correctly counted + EXPECT_EQ(get_table_value(lua_background, "key1"), "string_value"); + EXPECT_EQ(get_table_value(lua_background, "key2"), 100); + EXPECT_FALSE(get_table_value(lua_background, "key3")); + EXPECT_EQ(get_table_value(lua_background, "count"), 3); + + lua_background.shutdown(); +} + +// Edge case: Test early exit from iteration using 'break' +TEST(TestRGWLuaBackground, TableIterateBackgroundBreak) +{ + DEFINE_REQ_STATE; + + // Script stops counting after reaching 2 elements to test if partial iteration works + const std::string background_script = R"( + local count = 0 + for k, v in pairs(RGW) do + if tostring(k) ~= "count" then + count = count + 1 + end + if count == 2 then break end + end + RGW["count"] = count + )"; + + // Use the helper function to set the background script + set_script(pe.lua.manager.get(), background_script); + + TestBackground lua_background(pe.lua.manager.get()); + pe.lua.background = &lua_background; + lua_background.start(); + + // Wait 1s to ensure background thread is initialized before injecting data + std::this_thread::sleep_for(std::chrono::seconds(1)); + + // Inject 3 items, but the script above should only count 2 + const std::string request_script = R"( + RGW["key1"] = "string_value" + RGW["key2"] = 100 + RGW["key3"] = false + )"; + + ASSERT_EQ(lua::request::execute(nullptr, nullptr, &s, nullptr, request_script), 0); + + // Wait 6s to allow at least one full background iteration (interval is 5s) + std::this_thread::sleep_for(std::chrono::seconds(6)); + + EXPECT_EQ(get_table_value(lua_background, "key1"), "string_value"); + EXPECT_EQ(get_table_value(lua_background, "key2"), 100); + // Even though 3 items exist, count should be 2 due to the 'break' in Lua + EXPECT_EQ(get_table_value(lua_background, "count"), 2); + + lua_background.shutdown(); +} + +// Incremental test: Verifies that the background thread detects table changes over time +TEST(TestRGWLuaBackground, TableIterateStepByStep) +{ + DEFINE_REQ_STATE; + + // Background script: Iterates over RGW table and counts elements, excluding the "count" key + const std::string background_script = R"( + local count = 0 + for k, v in pairs(RGW) do + if tostring(k) ~= "count" then + count = count + 1 + end + end + RGW["count"] = count + )"; + + // Use the helper function to set the background script + set_script(pe.lua.manager.get(), background_script); + + TestBackground lua_background(pe.lua.manager.get()); + pe.lua.background = &lua_background; + lua_background.start(); + + // --- Step 1: Initial state (Empty table) --- + // Wait 1s to ensure background thread is initialized before injecting data + std::this_thread::sleep_for(std::chrono::seconds(1)); + EXPECT_EQ(get_table_value(lua_background, "count"), 0); + + // --- Step 2: Add first item --- + ASSERT_EQ(lua::request::execute(nullptr, nullptr, &s, nullptr, "RGW['key1'] = 'val1'"), 0); + std::this_thread::sleep_for(std::chrono::seconds(6)); + EXPECT_EQ(get_table_value(lua_background, "key1"), "val1"); + EXPECT_EQ(get_table_value(lua_background, "count"), 1); + + // --- Step 3: Add second item --- + ASSERT_EQ(lua::request::execute(nullptr, nullptr, &s, nullptr, "RGW['key2'] = 42"), 0); + std::this_thread::sleep_for(std::chrono::seconds(6)); + EXPECT_EQ(get_table_value(lua_background, "key2"), 42); + EXPECT_EQ(get_table_value(lua_background, "count"), 2); + + lua_background.shutdown(); +} + TEST(TestRGWLua, TracingSetAttribute) { const std::string script = R"(