]> git.apps.os.sepia.ceph.com Git - ceph-client.git/commitdiff
mm/oom_kill: allow process_mrelease to run under mmap_lock protection
authorSuren Baghdasaryan <surenb@google.com>
Fri, 14 Jan 2022 22:06:22 +0000 (14:06 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Jan 2022 14:30:28 +0000 (16:30 +0200)
With exit_mmap holding mmap_write_lock during free_pgtables call,
process_mrelease does not need to elevate mm->mm_users in order to
prevent exit_mmap from destrying pagetables while __oom_reap_task_mm is
walking the VMA tree.  The change prevents process_mrelease from calling
the last mmput, which can lead to waiting for IO completion in exit_aio.

Link: https://lkml.kernel.org/r/20211209191325.3069345-3-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Christian Brauner <christian@brauner.io>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Jan Engelhardt <jengelh@inai.de>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tim Murray <timmurray@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/oom_kill.c

index e52ce0b1465d6a426889dffbf6c93999e7812b37..3390316c8a32ac6460c98a412ded94d243480d71 100644 (file)
@@ -1170,15 +1170,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
                goto put_task;
        }
 
-       if (mmget_not_zero(p->mm)) {
-               mm = p->mm;
-               if (task_will_free_mem(p))
-                       reap = true;
-               else {
-                       /* Error only if the work has not been done already */
-                       if (!test_bit(MMF_OOM_SKIP, &mm->flags))
-                               ret = -EINVAL;
-               }
+       mm = p->mm;
+       mmgrab(mm);
+
+       if (task_will_free_mem(p))
+               reap = true;
+       else {
+               /* Error only if the work has not been done already */
+               if (!test_bit(MMF_OOM_SKIP, &mm->flags))
+                       ret = -EINVAL;
        }
        task_unlock(p);
 
@@ -1189,13 +1189,16 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
                ret = -EINTR;
                goto drop_mm;
        }
-       if (!__oom_reap_task_mm(mm))
+       /*
+        * Check MMF_OOM_SKIP again under mmap_read_lock protection to ensure
+        * possible change in exit_mmap is seen
+        */
+       if (!test_bit(MMF_OOM_SKIP, &mm->flags) && !__oom_reap_task_mm(mm))
                ret = -EAGAIN;
        mmap_read_unlock(mm);
 
 drop_mm:
-       if (mm)
-               mmput(mm);
+       mmdrop(mm);
 put_task:
        put_task_struct(task);
        return ret;