]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-client.git/commitdiff
perf: Fix __perf_event_overflow() vs perf_remove_from_context() race
authorPeter Zijlstra <peterz@infradead.org>
Tue, 24 Feb 2026 12:29:09 +0000 (13:29 +0100)
committerPeter Zijlstra <peterz@infradead.org>
Wed, 25 Feb 2026 14:02:34 +0000 (15:02 +0100)
Make sure that __perf_event_overflow() runs with IRQs disabled for all
possible callchains. Specifically the software events can end up running
it with only preemption disabled.

This opens up a race vs perf_event_exit_event() and friends that will go
and free various things the overflow path expects to be present, like
the BPF program.

Fixes: 592903cdcbf6 ("perf_counter: add an event_list")
Reported-by: Simond Hu <cmdhh1767@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Simond Hu <cmdhh1767@gmail.com>
Link: https://patch.msgid.link/20260224122909.GV1395416@noisy.programming.kicks-ass.net
kernel/events/core.c

index 22a0f405585b50413fa809dca6741a1688a2ed0f..1f5699b339ec8ab84c05030cabdf2169f9486fae 100644 (file)
@@ -10777,6 +10777,13 @@ int perf_event_overflow(struct perf_event *event,
                        struct perf_sample_data *data,
                        struct pt_regs *regs)
 {
+       /*
+        * Entry point from hardware PMI, interrupts should be disabled here.
+        * This serializes us against perf_event_remove_from_context() in
+        * things like perf_event_release_kernel().
+        */
+       lockdep_assert_irqs_disabled();
+
        return __perf_event_overflow(event, 1, data, regs);
 }
 
@@ -10853,6 +10860,19 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
 {
        struct hw_perf_event *hwc = &event->hw;
 
+       /*
+        * This is:
+        *   - software         preempt
+        *   - tracepoint       preempt
+        *   -   tp_target_task irq (ctx->lock)
+        *   - uprobes          preempt/irq
+        *   - kprobes          preempt/irq
+        *   - hw_breakpoint    irq
+        *
+        * Any of these are sufficient to hold off RCU and thus ensure @event
+        * exists.
+        */
+       lockdep_assert_preemption_disabled();
        local64_add(nr, &event->count);
 
        if (!regs)
@@ -10861,6 +10881,16 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
        if (!is_sampling_event(event))
                return;
 
+       /*
+        * Serialize against event_function_call() IPIs like normal overflow
+        * event handling. Specifically, must not allow
+        * perf_event_release_kernel() -> perf_remove_from_context() to make
+        * progress and 'release' the event from under us.
+        */
+       guard(irqsave)();
+       if (event->state != PERF_EVENT_STATE_ACTIVE)
+               return;
+
        if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
                data->period = nr;
                return perf_swevent_overflow(event, 1, data, regs);
@@ -11359,6 +11389,11 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
        struct perf_sample_data data;
        struct perf_event *event;
 
+       /*
+        * Per being a tracepoint, this runs with preemption disabled.
+        */
+       lockdep_assert_preemption_disabled();
+
        struct perf_raw_record raw = {
                .frag = {
                        .size = entry_size,
@@ -11691,6 +11726,11 @@ void perf_bp_event(struct perf_event *bp, void *data)
        struct perf_sample_data sample;
        struct pt_regs *regs = data;
 
+       /*
+        * Exception context, will have interrupts disabled.
+        */
+       lockdep_assert_irqs_disabled();
+
        perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
 
        if (!bp->hw.state && !perf_exclude_event(bp, regs))
@@ -12155,7 +12195,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
 
        if (regs && !perf_exclude_event(event, regs)) {
                if (!(event->attr.exclude_idle && is_idle_task(current)))
-                       if (__perf_event_overflow(event, 1, &data, regs))
+                       if (perf_event_overflow(event, &data, regs))
                                ret = HRTIMER_NORESTART;
        }