[PATCH 5/5] drm/amdgpu: add graceful VM fault handling
Christian König
ckoenig.leichtzumerken at gmail.com
Fri Jun 28 12:18:12 UTC 2019
Next step towards HMM support. For now just silence the retry fault and
optionally redirect the request to the dummy page.
Signed-off-by: Christian König <christian.koenig at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 59 ++++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++
3 files changed, 65 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a2d18fe46691..6f8c7cc07e8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3122,3 +3122,62 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
}
}
}
+
+/**
+ * amdgpu_vm_handle_fault - graceful handling of VM faults.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ *
+ * Try to gracefully handle a VM fault. Return true if the fault was handled and
+ * shouldn't be reported any more.
+ */
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+ uint64_t addr)
+{
+ struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
+ uint64_t value, flags;
+ struct amdgpu_vm *vm;
+ long r;
+
+ if (!ring->sched.ready)
+ return false;
+
+ spin_lock(&adev->vm_manager.pasid_lock);
+ vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+ spin_unlock(&adev->vm_manager.pasid_lock);
+
+ if (!vm)
+ return false;
+
+ r = amdgpu_bo_reserve(vm->root.base.bo, true);
+ if (r)
+ return false;
+
+ addr /= AMDGPU_GPU_PAGE_SIZE;
+ flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
+ AMDGPU_PTE_SYSTEM;
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
+ /* Redirect the access to the dummy page */
+ value = adev->dummy_page_addr;
+ flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
+ AMDGPU_PTE_WRITEABLE;
+ } else {
+ value = 0;
+ }
+
+ r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
+ flags, value, NULL, NULL);
+ if (r)
+ goto error;
+
+ r = amdgpu_vm_update_pdes(adev, vm, true);
+
+error:
+ amdgpu_bo_unreserve(vm->root.base.bo);
+ if (r < 0)
+ DRM_ERROR("Can't handle page fault (%ld)\n", r);
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 7227c8da1a2a..d0a30614f70a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -408,6 +408,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
struct amdgpu_task_info *task_info);
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+ uint64_t addr);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index bd5d36944481..faf6d64cd4d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -324,6 +324,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
return 1; /* This also prevents sending it to KFD */
/* If it's the first fault for this address, process it normally */
+ if (retry_fault && !in_interrupt() &&
+ amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+ return 1; /* This also prevents sending it to KFD */
+
if (!amdgpu_sriov_vf(adev)) {
status = RREG32(hub->vm_l2_pro_fault_status);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
--
2.17.1
More information about the amd-gfx
mailing list