]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-client.git/commitdiff
drm/amdgpu: correct single device PCIe reset flow for DPC
authorCe Sun <cesun102@amd.com>
Fri, 10 Apr 2026 07:26:59 +0000 (15:26 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 17 Apr 2026 18:49:11 +0000 (14:49 -0400)
For triggering the dpc event with a single device, we still need
to set the in_link_reset flag and the dpc status.

Signed-off-by: Ce Sun <cesun102@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

index 584c9ec28bf110280d2ee19267f176db23e2dc91..413145a958fc1e675d5998a1d2dd36e617d71388 100644 (file)
@@ -5518,8 +5518,6 @@ static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
                        list_add_tail(&tmp_adev->reset_list, device_list);
                        if (adev->shutdown)
                                tmp_adev->shutdown = true;
-                       if (amdgpu_reset_in_dpc(adev))
-                               tmp_adev->pcie_reset_ctx.in_link_reset = true;
                }
                if (!list_is_first(&adev->reset_list, device_list))
                        list_rotate_to_front(&adev->reset_list, device_list);
@@ -6291,6 +6289,9 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
                        amdgpu_reset_set_dpc_status(adev, true);
 
                        mutex_lock(&hive->hive_lock);
+               } else {
+                       if (amdgpu_device_bus_status_check(adev))
+                               amdgpu_reset_set_dpc_status(adev, true);
                }
                memset(&reset_context, 0, sizeof(reset_context));
                INIT_LIST_HEAD(&device_list);
@@ -6411,6 +6412,7 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
                list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
                        tmp_adev->pcie_reset_ctx.in_link_reset = true;
        } else {
+               adev->pcie_reset_ctx.in_link_reset = true;
                set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
        }
 
@@ -6467,9 +6469,10 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
                        tmp_adev->pcie_reset_ctx.in_link_reset = false;
                        list_add_tail(&tmp_adev->reset_list, &device_list);
                }
-       } else
+       } else {
+               adev->pcie_reset_ctx.in_link_reset = false;
                list_add_tail(&adev->reset_list, &device_list);
-
+       }
        amdgpu_device_sched_resume(&device_list, NULL, NULL);
        amdgpu_device_gpu_resume(adev, &device_list, false);
        amdgpu_device_recovery_put_reset_lock(adev, &device_list);
index 03d95dca93d755ab4cdb45acab8dcd5a96ae1c89..debb82a2e031d0283e970cf9c565c54503cd9b0f 100644 (file)
@@ -34,6 +34,7 @@
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_vcn.h"
+#include "amdgpu_reset.h"
 #include "soc15d.h"
 
 /* Firmware Names */
@@ -361,7 +362,7 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i)
 
        /* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to
         * restore fw data and clear buffer in amdgpu_vcn_resume() */
-       if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset)
+       if (in_ras_intr || amdgpu_reset_in_dpc(adev))
                return 0;
 
        return amdgpu_vcn_save_vcpu_bo_inst(adev, i);