]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-client.git/commitdiff
drm/amdgpu: lock both VM and BO in amdgpu_gem_object_open
authorChristian König <christian.koenig@amd.com>
Tue, 20 Jan 2026 11:57:21 +0000 (12:57 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 12 Feb 2026 20:24:59 +0000 (15:24 -0500)
The VM was not locked in the past since we initially only cleared the
linked list element and not added it to any VM state.

But this has changed quite some time ago, we just never realized this
problem because the VM state lock was masking it.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 00ea69baa126d0571467ca65bfa5ba25afaa9078..6893d175f7c338792d8e46c7c2ea226349231303 100644 (file)
@@ -878,6 +878,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
        struct amdgpu_bo *bo[2] = {NULL, NULL};
        struct amdgpu_bo_va *bo_va;
        bool same_hive = false;
+       struct drm_exec exec;
        int i, ret;
 
        if (!va) {
@@ -958,19 +959,25 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
                        goto unwind;
                }
 
-               /* Add BO to VM internal data structures */
-               ret = amdgpu_bo_reserve(bo[i], false);
-               if (ret) {
-                       pr_debug("Unable to reserve BO during memory attach");
-                       goto unwind;
+               drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+               drm_exec_until_all_locked(&exec) {
+                       ret = amdgpu_vm_lock_pd(vm, &exec, 0);
+                       drm_exec_retry_on_contention(&exec);
+                       if (unlikely(ret))
+                               goto unwind;
+                       ret = drm_exec_lock_obj(&exec, &bo[i]->tbo.base);
+                       drm_exec_retry_on_contention(&exec);
+                       if (unlikely(ret))
+                               goto unwind;
                }
+
                bo_va = amdgpu_vm_bo_find(vm, bo[i]);
                if (!bo_va)
                        bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
                else
                        ++bo_va->ref_count;
                attachment[i]->bo_va = bo_va;
-               amdgpu_bo_unreserve(bo[i]);
+               drm_exec_fini(&exec);
                if (unlikely(!attachment[i]->bo_va)) {
                        ret = -ENOMEM;
                        pr_err("Failed to add BO object to VM. ret == %d\n",
index 5f9fa2140f0949f88a9c055c00f72711515c8a0a..5c90de58cc28544a1cb2c19963edf8d1193de4c0 100644 (file)
@@ -232,6 +232,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
        struct amdgpu_vm *vm = &fpriv->vm;
        struct amdgpu_bo_va *bo_va;
        struct mm_struct *mm;
+       struct drm_exec exec;
        int r;
 
        mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm);
@@ -242,9 +243,18 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
            !amdgpu_vm_is_bo_always_valid(vm, abo))
                return -EPERM;
 
-       r = amdgpu_bo_reserve(abo, false);
-       if (r)
-               return r;
+       drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+       drm_exec_until_all_locked(&exec) {
+               r = drm_exec_prepare_obj(&exec, &abo->tbo.base, 1);
+               drm_exec_retry_on_contention(&exec);
+               if (unlikely(r))
+                       goto out_unlock;
+
+               r = amdgpu_vm_lock_pd(vm, &exec, 0);
+               drm_exec_retry_on_contention(&exec);
+               if (unlikely(r))
+                       goto out_unlock;
+       }
 
        amdgpu_vm_bo_update_shared(abo);
        bo_va = amdgpu_vm_bo_find(vm, abo);
@@ -260,8 +270,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
                amdgpu_bo_unreserve(abo);
                return r;
        }
-
-       amdgpu_bo_unreserve(abo);
+       drm_exec_fini(&exec);
 
        /* Validate and add eviction fence to DMABuf imports with dynamic
         * attachment in compute VMs. Re-validation will be done by
@@ -294,7 +303,10 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
                }
        }
        mutex_unlock(&vm->process_info->lock);
+       return r;
 
+out_unlock:
+       drm_exec_fini(&exec);
        return r;
 }
 
index 1878e0faa722aee292aed2132eb5c78de6d4abc8..f69332eed0512667a6f7703456096897da00135e 100644 (file)
@@ -1445,6 +1445,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 {
        struct amdgpu_device *adev = drm_to_adev(dev);
        struct amdgpu_fpriv *fpriv;
+       struct drm_exec exec;
        int r, pasid;
 
        /* Ensure IB tests are run on ring */
@@ -1484,7 +1485,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
        if (r)
                goto error_pasid;
 
+       drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+       drm_exec_until_all_locked(&exec) {
+               r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
+               drm_exec_retry_on_contention(&exec);
+               if (unlikely(r))
+                       goto error_vm;
+       }
+
        fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
+       drm_exec_fini(&exec);
        if (!fpriv->prt_va) {
                r = -ENOMEM;
                goto error_vm;
index 31383583fc6821f9c2cbc20787fb5d9839aea561..11597224d4379678d114d8b59e0ca0c2ac11976f 100644 (file)
@@ -1735,6 +1735,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 {
        struct amdgpu_bo_va *bo_va;
 
+       amdgpu_vm_assert_locked(vm);
+
        bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
        if (bo_va == NULL) {
                return NULL;