[PATCH 1/3] drm/amdgpu: add cached GPU fault structure to vm struct

Chen, Guchun Guchun.Chen at amd.com
Wed Jun 7 09:26:24 UTC 2023


[Public]

3 nitpick comments.

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Alex
> Deucher
> Sent: Friday, May 26, 2023 12:52 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>;
> samuel.pitoiset at gmail.com
> Subject: [PATCH 1/3] drm/amdgpu: add cached GPU fault structure to vm
> struct
>
> When we get a GPU pge fault, cache the fault for later analysis.

A spelling typo, s/pge/page

> Cc: samuel.pitoiset at gmail.com
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31
> ++++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 18 +++++++++++++++
>  2 files changed, 49 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 22f9a65ca0fc..73e022f3daa4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2631,3 +2631,34 @@ void amdgpu_debugfs_vm_bo_info(struct
> amdgpu_vm *vm, struct seq_file *m)
>                  total_done_objs);
>  }
>  #endif
> +
> +/**
> + * amdgpu_vm_update_fault_cache - update cached fault into.
> + * @adev: amdgpu device pointer
> + * @pasid: PASID of the VM
> + * @addr: Address of the fault
> + * @status: fault status register

I guess this 'status' means the status from fault status register.

> + * @vmhub: which vmhub got the fault
> + *
> + * Cache the fault info for later use by userspace in debuggging.
A spelling typo, s/debuggging/debugging.

Regards,
Guchun
> + */
> +void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
> +                               unsigned int pasid,
> +                               uint64_t addr,
> +                               uint32_t status,
> +                               unsigned int vmhub)
> +{
> +     struct amdgpu_vm *vm;
> +     unsigned long flags;
> +
> +     xa_lock_irqsave(&adev->vm_manager.pasids, flags);
> +
> +     vm = xa_load(&adev->vm_manager.pasids, pasid);
> +     if (vm) {
> +             vm->fault_info.addr = addr;
> +             vm->fault_info.status = status;
> +             vm->fault_info.vmhub = vmhub;
> +     }
> +     xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); }
> +
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 14f9a2bf3acb..fb66a413110c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -244,6 +244,15 @@ struct amdgpu_vm_update_funcs {
>                     struct dma_fence **fence);
>  };
>
> +struct amdgpu_vm_fault_info {
> +     /* fault address */
> +     uint64_t        addr;
> +     /* fault status register */
> +     uint32_t        status;
> +     /* which vmhub? gfxhub, mmhub, etc. */
> +     unsigned int    vmhub;
> +};
> +
>  struct amdgpu_vm {
>       /* tree of virtual addresses mapped */
>       struct rb_root_cached   va;
> @@ -332,6 +341,9 @@ struct amdgpu_vm {
>
>       /* Memory partition number, -1 means any partition */
>       int8_t                  mem_id;
> +
> +     /* cached fault info */
> +     struct amdgpu_vm_fault_info fault_info;
>  };
>
>  struct amdgpu_vm_manager {
> @@ -540,4 +552,10 @@ static inline void
> amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
>       mutex_unlock(&vm->eviction_lock);
>  }
>
> +void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
> +                               unsigned int pasid,
> +                               uint64_t addr,
> +                               uint32_t status,
> +                               unsigned int vmhub);
> +
>  #endif
> --
> 2.40.1



More information about the amd-gfx mailing list