From 81a6d6072b79e50c5c62023cdb2d907adffdb79d Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Mon, 10 Nov 2025 21:44:07 +0800 Subject: [PATCH] rgw/posix: Fix race condition in Inotify causing segfault Fixed a race condition in the Inotify class where the ev_loop() thread and caller threads (add_watch/remove_watch) were accessing the wd_callback_map and wd_remove_map hash maps without synchronization. This caused a segfault during hash table operations when one thread was reading from the map while another was modifying it, leading to iterator invalidation and memory corruption. Backtrace from the crash: Frame 5: file::listing::Inotify::ev_loop()+0x190 Frame 4: ankerl::unordered_dense::v3_1_0::detail::table::find() Crash: Memory access violation during WatchRecord lookup The fix adds: - A mutex (map_mutex) to protect both hash maps - Lock guards in add_watch() and remove_watch() during map modifications - Lock guard in ev_loop() with proper copying of watch record data to avoid holding the lock during callbacks and prevent use-after-free See https://jenkins.ceph.com/job/ceph-pull-requests/169774/testReport/junit/projectroot.src.test/rgw/unittest_rgw_posix_driver/ Signed-off-by: Kefu Chai --- src/rgw/driver/posix/notify.h | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/rgw/driver/posix/notify.h b/src/rgw/driver/posix/notify.h index e10909b075b19..517615193d4f0 100644 --- a/src/rgw/driver/posix/notify.h +++ b/src/rgw/driver/posix/notify.h @@ -112,6 +112,7 @@ namespace file::listing { int wfd, efd; std::thread thrd; + std::mutex map_mutex; // protects wd_callback_map and wd_remove_map wd_callback_map_t wd_callback_map; wd_remove_map_t wd_remove_map; bool shutdown{false}; @@ -166,18 +167,28 @@ namespace file::listing { for (char* ptr = buf; ptr < buf + len; ptr += sizeof(struct inotify_event) + event->len) { event = reinterpret_cast(ptr); - const auto& it = wd_callback_map.find(event->wd); - //std::cout << fmt::format("event! {}", event->name) << std::endl; - if (it == wd_callback_map.end()) [[unlikely]] { - /* non-destructive race, it happens */ - continue; + + // Copy watch record data while holding the lock to avoid use-after-free + std::string watch_name; + void* watch_opaque; + { + std::lock_guard lock(map_mutex); + const auto& it = wd_callback_map.find(event->wd); + //std::cout << fmt::format("event! {}", event->name) << std::endl; + if (it == wd_callback_map.end()) [[unlikely]] { + /* non-destructive race, it happens */ + continue; + } + const auto& wr = it->second; + watch_name = wr.name; + watch_opaque = wr.opaque; } - const auto& wr = it->second; + if (event->mask & IN_Q_OVERFLOW) [[unlikely]] { /* cache blown, invalidate */ evec.clear(); evec.emplace_back(Notifiable::Event(Notifiable::EventType::INVALIDATE, std::nullopt)); - n->notify(wr.name, wr.opaque, evec); + n->notify(watch_name, watch_opaque, evec); goto restart; } else { if ((event->mask & IN_CREATE) || @@ -191,7 +202,7 @@ namespace file::listing { } } /* !overflow */ if (evec.size() > 0) { - n->notify(wr.name, wr.opaque, evec); + n->notify(watch_name, watch_opaque, evec); } } /* events */ } /* n > 0 */ @@ -223,6 +234,7 @@ namespace file::listing { if (wd == -1) { std::cerr << fmt::format("{} inotify_add_watch {} failed with {}", __func__, dname, wd) << std::endl; } else { + std::lock_guard lock(map_mutex); wd_callback_map.insert(wd_callback_map_t::value_type(wd, WatchRecord(wd, dname, opaque))); wd_remove_map.insert(wd_remove_map_t::value_type(dname, wd)); } @@ -231,6 +243,7 @@ namespace file::listing { virtual int remove_watch(const std::string& dname) override { int r{0}; + std::lock_guard lock(map_mutex); const auto& elt = wd_remove_map.find(dname); if (elt != wd_remove_map.end()) { auto& wd = elt->second; -- 2.39.5