[PATCH 1/3] drm/amdgpu: add cached GPU fault structure to vm struct
Alex Deucher
alexander.deucher at amd.com
Thu May 25 16:52:17 UTC 2023
When we get a GPU pge fault, cache the fault for later
analysis.
Cc: samuel.pitoiset at gmail.com
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 ++++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 18 +++++++++++++++
2 files changed, 49 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 22f9a65ca0fc..73e022f3daa4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2631,3 +2631,34 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
total_done_objs);
}
#endif
+
+/**
+ * amdgpu_vm_update_fault_cache - update cached fault into.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ * @status: fault status register
+ * @vmhub: which vmhub got the fault
+ *
+ * Cache the fault info for later use by userspace in debuggging.
+ */
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+ unsigned int pasid,
+ uint64_t addr,
+ uint32_t status,
+ unsigned int vmhub)
+{
+ struct amdgpu_vm *vm;
+ unsigned long flags;
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ if (vm) {
+ vm->fault_info.addr = addr;
+ vm->fault_info.status = status;
+ vm->fault_info.vmhub = vmhub;
+ }
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 14f9a2bf3acb..fb66a413110c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -244,6 +244,15 @@ struct amdgpu_vm_update_funcs {
struct dma_fence **fence);
};
+struct amdgpu_vm_fault_info {
+ /* fault address */
+ uint64_t addr;
+ /* fault status register */
+ uint32_t status;
+ /* which vmhub? gfxhub, mmhub, etc. */
+ unsigned int vmhub;
+};
+
struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
@@ -332,6 +341,9 @@ struct amdgpu_vm {
/* Memory partition number, -1 means any partition */
int8_t mem_id;
+
+ /* cached fault info */
+ struct amdgpu_vm_fault_info fault_info;
};
struct amdgpu_vm_manager {
@@ -540,4 +552,10 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
mutex_unlock(&vm->eviction_lock);
}
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+ unsigned int pasid,
+ uint64_t addr,
+ uint32_t status,
+ unsigned int vmhub);
+
#endif
--
2.40.1
More information about the amd-gfx
mailing list