[PATCH 1/4] drm/amdgpu: add cached GPU fault structure to vm struct
Alex Deucher
alexander.deucher at amd.com
Thu Sep 28 20:06:05 UTC 2023
When we get a GPU page fault, cache the fault for later
analysis.
Cc: samuel.pitoiset at gmail.com
Acked-by: Guchun Chen <guchun.chen at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 ++++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 18 +++++++++++++++
2 files changed, 49 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8ce91f69bbeb..91e36b0ad062 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2730,3 +2730,34 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
total_done_objs);
}
#endif
+
+/**
+ * amdgpu_vm_update_fault_cache - update cached fault into.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ * @status: GPUVM fault status register
+ * @vmhub: which vmhub got the fault
+ *
+ * Cache the fault info for later use by userspace in debugging.
+ */
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+ unsigned int pasid,
+ uint64_t addr,
+ uint32_t status,
+ unsigned int vmhub)
+{
+ struct amdgpu_vm *vm;
+ unsigned long flags;
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ if (vm) {
+ vm->fault_info.addr = addr;
+ vm->fault_info.status = status;
+ vm->fault_info.vmhub = vmhub;
+ }
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 204ab13184ed..2bf328d9e04b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -252,6 +252,15 @@ struct amdgpu_vm_update_funcs {
struct dma_fence **fence);
};
+struct amdgpu_vm_fault_info {
+ /* fault address */
+ uint64_t addr;
+ /* fault status register */
+ uint32_t status;
+ /* which vmhub? gfxhub, mmhub, etc. */
+ unsigned int vmhub;
+};
+
struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
@@ -343,6 +352,9 @@ struct amdgpu_vm {
/* Memory partition number, -1 means any partition */
int8_t mem_id;
+
+ /* cached fault info */
+ struct amdgpu_vm_fault_info fault_info;
};
struct amdgpu_vm_manager {
@@ -554,4 +566,10 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
mutex_unlock(&vm->eviction_lock);
}
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+ unsigned int pasid,
+ uint64_t addr,
+ uint32_t status,
+ unsigned int vmhub);
+
#endif
--
2.41.0
More information about the amd-gfx
mailing list