[PATCH v5 4/5] drm/amdgpu: address remove from fault filter
Christian König
ckoenig.leichtzumerken at gmail.com
Wed Apr 28 09:00:45 UTC 2021
Am 28.04.21 um 09:53 schrieb Felix Kuehling:
> Am 2021-04-28 um 2:54 a.m. schrieb Christian König:
>> Am 27.04.21 um 20:21 schrieb Felix Kuehling:
>>> On 2021-04-27 10:51 a.m., Philip Yang wrote:
>>>> Add interface to remove address from fault filter ring by resetting
>>>> fault ring entry key, then future vm fault on the address will be
>>>> processed to recover.
>>>>
>>>> Define fault key as atomic64_t type to use atomic read/set/cmpxchg key
>>>> to protect fault ring access by interrupt handler and interrupt
>>>> deferred
>>>> work for vg20. Change fault->timestamp to 48-bit to share same uint64_t
>>>> with 8-bit fault->next, it is enough for 48bit IH timestamp.
>>>>
>>>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 48
>>>> ++++++++++++++++++++++---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++--
>>>> 2 files changed, 48 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>>> index c39ed9eb0987..a2e81e913abb 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>>> @@ -332,6 +332,17 @@ void amdgpu_gmc_agp_location(struct
>>>> amdgpu_device *adev, struct amdgpu_gmc *mc)
>>>> mc->agp_size >> 20, mc->agp_start, mc->agp_end);
>>>> }
>>>> +/**
>>>> + * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
>>>> + *
>>>> + * @addr: 48bit physical address
>>>> + * @pasid: 4 bit
>>> This comment is misleading. The PASID is not 4-bit. It's 16-bit. But
>>> shifting the address by 4 bit is sufficient because the address is
>>> page-aligned, which means the low 12 bits are 0. So this would be
>>> more accurate:
>>>
>>> @addr: 48 bit physical address, page aligned (36 significant bits)
>>> @pasid: 16 bit process address space identifier
>>>
>>> With that fixed, the patch is
>>>
>>> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
>>>
>>>
>>>> + */
>>>> +static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t
>>>> pasid)
>>>> +{
>>>> + return addr << 4 | pasid;
>>>> +}
>> Maybe changing this so that we have "addr * ((1 << 16) /
>> AMDGPU_PAGE_SIZE) | pasid" would help to better document that?
> I find this a mix of multiplication, division and bit-shift more
> confusing. How about "addr << (16 - AMDGPU_GPU_PAGE_SHIFT) | pasid"?
Yeah, that is even better.
Regards,
Christian.
>
> Regards,
> Felix
>
>
>> Christian.
>>
>>>> +
>>>> /**
>>>> * amdgpu_gmc_filter_faults - filter VM faults
>>>> *
>>>> @@ -348,8 +359,7 @@ bool amdgpu_gmc_filter_faults(struct
>>>> amdgpu_device *adev, uint64_t addr,
>>>> uint16_t pasid, uint64_t timestamp)
>>>> {
>>>> struct amdgpu_gmc *gmc = &adev->gmc;
>>>> -
>>>> - uint64_t stamp, key = addr << 4 | pasid;
>>>> + uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
>>>> struct amdgpu_gmc_fault *fault;
>>>> uint32_t hash;
>>>> @@ -365,7 +375,7 @@ bool amdgpu_gmc_filter_faults(struct
>>>> amdgpu_device *adev, uint64_t addr,
>>>> while (fault->timestamp >= stamp) {
>>>> uint64_t tmp;
>>>> - if (fault->key == key)
>>>> + if (atomic64_read(&fault->key) == key)
>>>> return true;
>>>> tmp = fault->timestamp;
>>>> @@ -378,7 +388,7 @@ bool amdgpu_gmc_filter_faults(struct
>>>> amdgpu_device *adev, uint64_t addr,
>>>> /* Add the fault to the ring */
>>>> fault = &gmc->fault_ring[gmc->last_fault];
>>>> - fault->key = key;
>>>> + atomic64_set(&fault->key, key);
>>>> fault->timestamp = timestamp;
>>>> /* And update the hash */
>>>> @@ -387,6 +397,36 @@ bool amdgpu_gmc_filter_faults(struct
>>>> amdgpu_device *adev, uint64_t addr,
>>>> return false;
>>>> }
>>>> +/**
>>>> + * amdgpu_gmc_filter_faults_remove - remove address from VM faults
>>>> filter
>>>> + *
>>>> + * @adev: amdgpu device structure
>>>> + * @addr: address of the VM fault
>>>> + * @pasid: PASID of the process causing the fault
>>>> + *
>>>> + * Remove the address from fault filter, then future vm fault on
>>>> this address
>>>> + * will pass to retry fault handler to recover.
>>>> + */
>>>> +void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev,
>>>> uint64_t addr,
>>>> + uint16_t pasid)
>>>> +{
>>>> + struct amdgpu_gmc *gmc = &adev->gmc;
>>>> + uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
>>>> + struct amdgpu_gmc_fault *fault;
>>>> + uint32_t hash;
>>>> + uint64_t tmp;
>>>> +
>>>> + hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
>>>> + fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
>>>> + do {
>>>> + if (atomic64_cmpxchg(&fault->key, key, 0) == key)
>>>> + break;
>>>> +
>>>> + tmp = fault->timestamp;
>>>> + fault = &gmc->fault_ring[fault->next];
>>>> + } while (fault->timestamp < tmp);
>>>> +}
>>>> +
>>>> int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
>>>> {
>>>> int r;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>>> index 9d11c02a3938..6aa1d52d3aee 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>>> @@ -66,9 +66,9 @@ struct firmware;
>>>> * GMC page fault information
>>>> */
>>>> struct amdgpu_gmc_fault {
>>>> - uint64_t timestamp;
>>>> + uint64_t timestamp:48;
>>>> uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER;
>>>> - uint64_t key:52;
>>>> + atomic64_t key;
>>>> };
>>>> /*
>>>> @@ -318,6 +318,8 @@ void amdgpu_gmc_agp_location(struct
>>>> amdgpu_device *adev,
>>>> struct amdgpu_gmc *mc);
>>>> bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t
>>>> addr,
>>>> uint16_t pasid, uint64_t timestamp);
>>>> +void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev,
>>>> uint64_t addr,
>>>> + uint16_t pasid);
>>>> int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
>>>> void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
>>>> int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list