[PATCH v2 2/2] drm/amdkfd: Update mapping if range attributes changed

Felix Kuehling felix.kuehling at amd.com
Mon Apr 25 13:47:06 UTC 2022


Am 2022-04-22 um 18:05 schrieb Felix Kuehling:
> On 2022-04-22 10:06, Philip Yang wrote:
>> Change SVM range mapping flags or access attributes don't trigger
>> migration, if range is already mapped on GPUs we should update GPU
>> mapping and pass flush_tlb flag true to amdgpu vm.
>>
>> Change SVM range preferred_loc or migration granularity don't need
>> update GPU mapping, skip the validate_and_map.
>>
>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 46 +++++++++++++++++++---------
>>   1 file changed, 32 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 8a077cd066a1..e740384df9c7 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -686,7 +686,8 @@ svm_range_check_attr(struct kfd_process *p,
>>     static void
>>   svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
>> -              uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
>> +              uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
>> +              bool *update_mapping)
>>   {
>>       uint32_t i;
>>       int gpuidx;
>> @@ -702,6 +703,7 @@ svm_range_apply_attrs(struct kfd_process *p, 
>> struct svm_range *prange,
>>           case KFD_IOCTL_SVM_ATTR_ACCESS:
>>           case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
>>           case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
>> +            *update_mapping = true;
>>               gpuidx = kfd_process_gpuidx_from_gpuid(p,
>>                                      attrs[i].value);
>>               if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
>> @@ -716,9 +718,11 @@ svm_range_apply_attrs(struct kfd_process *p, 
>> struct svm_range *prange,
>>               }
>>               break;
>>           case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
>> +            *update_mapping = true;
>>               prange->flags |= attrs[i].value;
>>               break;
>>           case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
>> +            *update_mapping = true;
>>               prange->flags &= ~attrs[i].value;
>>               break;
>>           case KFD_IOCTL_SVM_ATTR_GRANULARITY:
>> @@ -1254,7 +1258,7 @@ static int
>>   svm_range_map_to_gpu(struct kfd_process_device *pdd, struct 
>> svm_range *prange,
>>                unsigned long offset, unsigned long npages, bool 
>> readonly,
>>                dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
>> -             struct dma_fence **fence)
>> +             struct dma_fence **fence, bool flush_tlb)
>>   {
>>       struct amdgpu_device *adev = pdd->dev->adev;
>>       struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
>> @@ -1292,7 +1296,7 @@ svm_range_map_to_gpu(struct kfd_process_device 
>> *pdd, struct svm_range *prange,
>>                (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>>                pte_flags);
>>   -        r = amdgpu_vm_update_range(adev, vm, false, false, false, 
>> NULL,
>> +        r = amdgpu_vm_update_range(adev, vm, false, false, 
>> flush_tlb, NULL,
>>                          last_start, prange->start + i,
>>                          pte_flags,
>>                          last_start - prange->start,
>> @@ -1326,7 +1330,7 @@ svm_range_map_to_gpu(struct kfd_process_device 
>> *pdd, struct svm_range *prange,
>>   static int
>>   svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
>>                 unsigned long npages, bool readonly,
>> -              unsigned long *bitmap, bool wait)
>> +              unsigned long *bitmap, bool wait, bool flush_tlb)
>>   {
>>       struct kfd_process_device *pdd;
>>       struct amdgpu_device *bo_adev;
>> @@ -1361,7 +1365,8 @@ svm_range_map_to_gpus(struct svm_range *prange, 
>> unsigned long offset,
>>             r = svm_range_map_to_gpu(pdd, prange, offset, npages, 
>> readonly,
>>                        prange->dma_addr[gpuidx],
>> -                     bo_adev, wait ? &fence : NULL);
>> +                     bo_adev, wait ? &fence : NULL,
>> +                     flush_tlb);
>>           if (r)
>>               break;
>>   @@ -1482,8 +1487,8 @@ static void *kfd_svm_page_owner(struct 
>> kfd_process *p, int32_t gpuidx)
>>    * 5. Release page table (and SVM BO) reservation
>>    */
>>   static int svm_range_validate_and_map(struct mm_struct *mm,
>> -                      struct svm_range *prange,
>> -                      int32_t gpuidx, bool intr, bool wait)
>> +                      struct svm_range *prange, int32_t gpuidx,
>> +                      bool intr, bool wait, bool flush_tlb)
>>   {
>>       struct svm_validate_context ctx;
>>       unsigned long start, end, addr;
>> @@ -1522,8 +1527,12 @@ static int svm_range_validate_and_map(struct 
>> mm_struct *mm,
>>                 prange->bitmap_aip, MAX_GPU_INSTANCE);
>>       }
>>   -    if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
>> -        return 0;
>> +    if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
>> +        if (!prange->mapped_to_gpu)
>> +            return 0;
>> +
>> +        bitmap_copy(ctx.bitmap, prange->bitmap_access, 
>> MAX_GPU_INSTANCE);
>> +    }
>>         if (prange->actual_loc && !prange->ttm_res) {
>>           /* This should never happen. actual_loc gets set by
>> @@ -1595,7 +1604,7 @@ static int svm_range_validate_and_map(struct 
>> mm_struct *mm,
>>           }
>>             r = svm_range_map_to_gpus(prange, offset, npages, readonly,
>> -                      ctx.bitmap, wait);
>> +                      ctx.bitmap, wait, flush_tlb);
>>     unlock_out:
>>           svm_range_unlock(prange);
>> @@ -1691,7 +1700,7 @@ static void svm_range_restore_work(struct 
>> work_struct *work)
>>           mutex_lock(&prange->migrate_mutex);
>>             r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
>> -                           false, true);
>> +                           false, true, false);
>>           if (r)
>>               pr_debug("failed %d to map 0x%lx to gpus\n", r,
>>                    prange->start);
>> @@ -2847,7 +2856,7 @@ svm_range_restore_pages(struct amdgpu_device 
>> *adev, unsigned int pasid,
>>           }
>>       }
>>   -    r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
>> +    r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, 
>> false);
>>       if (r)
>>           pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
>>                r, svms, prange->start, prange->last);
>> @@ -3264,6 +3273,8 @@ svm_range_set_attr(struct kfd_process *p, 
>> struct mm_struct *mm,
>>       struct svm_range_list *svms;
>>       struct svm_range *prange;
>>       struct svm_range *next;
>> +    bool update_mapping = false;
>> +    bool flush_tlb;
>>       int r = 0;
>>         pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
>> @@ -3302,7 +3313,7 @@ svm_range_set_attr(struct kfd_process *p, 
>> struct mm_struct *mm,
>>           svm_range_add_notifier_locked(mm, prange);
>>       }
>>       list_for_each_entry(prange, &update_list, update_list) {
>> -        svm_range_apply_attrs(p, prange, nattr, attrs);
>> +        svm_range_apply_attrs(p, prange, nattr, attrs, 
>> &update_mapping);
>>           /* TODO: unmap ranges from GPU that lost access */
>>       }
>>       list_for_each_entry_safe(prange, next, &remove_list, 
>> update_list) {
>> @@ -3335,8 +3346,15 @@ svm_range_set_attr(struct kfd_process *p, 
>> struct mm_struct *mm,
>>               continue;
>>           }
>>   +        if (!migrated && !update_mapping) {
>> +            mutex_unlock(&prange->migrate_mutex);
>> +            continue;
>> +        }
>> +
>> +        flush_tlb = !migrated && update_mapping && 
>> prange->mapped_to_gpu;
>> +
>
> Please check that this doesn't break the XNACK off case. If a new 
> range is created, and that does not trigger a migration or any of the 
> conditions that set update_mapping, we still need to make sure the GPU 
> mapping is created so that a GPU access to the new range won't fault.

Thanks for following up offline. For the record, with XNACK off, the 
default is "not accessible". So to create a GPU mapping, the application 
has to set one of the accessibility attributes, which will trigger a 
page table update with your patch.

The patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


>
> Regards,
>   Felix
>
>
>>           r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
>> -                           true, true);
>> +                           true, true, flush_tlb);
>>           if (r)
>>               pr_debug("failed %d to map svm range\n", r);


More information about the amd-gfx mailing list