[PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb

Felix Kuehling felix.kuehling at amd.com
Wed Jun 2 14:53:24 UTC 2021


Am 2021-06-02 um 2:53 a.m. schrieb Christian König:
> Mostly a question for Felix and Philip:
>
> I've been thinking for a bit about how that case happens in the first
> place?
>
> I mean if we have a PDE which points to PTEs and then switch that into
> a 2MiB PTE then why wasn't that range invalidated before?
>
> In other words when the PDE points to the PTEs we should have had an
> unmap operation on that range before which should have invalidated the
> TLB.

The unmap operation doesn't change the PDE, it only changes the PTEs one
level down in the page table. For example, imagine you unmap a 4KB BO.
It's the last 4KB mapping within this 2MB block. But the unmapping only
updates the PTE for that 4KB page. It does not consolidate the entire
2MB block into an invalid PDE because it doesn't look outside the small
address range that it's unmapping.

Now you map a new 2MB BO at that virtual address. That's when the PTB
gets freed and the PDE gets turned into a PTE with the P bit set.

Regards,
  Felix


>
> Regards,
> Christian.
>
> Am 02.06.21 um 00:59 schrieb Eric Huang:
>> It is to provide more tlb flush types opotion for different
>> case scenario.
>>
>> Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c              | 2 +-
>>   drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +++---
>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h                 | 2 +-
>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c              | 4 ++--
>>   4 files changed, 7 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 960913a35ee4..4da8aff3df27 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -1666,7 +1666,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct
>> file *filep,
>>           if (WARN_ON_ONCE(!peer_pdd))
>>               continue;
>>           if (!amdgpu_read_lock(peer->ddev, true)) {
>> -            kfd_flush_tlb(peer_pdd);
>> +            kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
>>               amdgpu_read_unlock(peer->ddev);
>>           }
>>       }
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index 2bd621eee4e0..904b8178c1d7 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -278,7 +278,7 @@ static int allocate_vmid(struct
>> device_queue_manager *dqm,
>>               qpd->vmid,
>>               qpd->page_table_base);
>>       /* invalidate the VM context after pasid and vmid mapping is
>> set up */
>> -    kfd_flush_tlb(qpd_to_pdd(qpd));
>> +    kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>>         if (dqm->dev->kfd2kgd->set_scratch_backing_va)
>>           dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
>> @@ -314,7 +314,7 @@ static void deallocate_vmid(struct
>> device_queue_manager *dqm,
>>           if (flush_texture_cache_nocpsch(q->device, qpd))
>>               pr_err("Failed to flush TC\n");
>>   -    kfd_flush_tlb(qpd_to_pdd(qpd));
>> +    kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>>         /* Release the vmid mapping */
>>       set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
>> @@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct
>> device_queue_manager *dqm,
>>                   dqm->dev->kgd,
>>                   qpd->vmid,
>>                   qpd->page_table_base);
>> -        kfd_flush_tlb(pdd);
>> +        kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
>>       }
>>         /* Take a safe reference to the mm_struct, which may otherwise
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index ecdd5e782b81..edce3ecf207d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
>>     void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32
>> pasid);
>>   -void kfd_flush_tlb(struct kfd_process_device *pdd);
>> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum
>> TLB_FLUSH_TYPE type);
>>     int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct
>> kfd_process *p);
>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index 3995002c582b..72741f6579d3 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -2159,7 +2159,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev,
>> struct kfd_process *process,
>>                      KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
>>   }
>>   -void kfd_flush_tlb(struct kfd_process_device *pdd)
>> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum
>> TLB_FLUSH_TYPE type)
>>   {
>>       struct kfd_dev *dev = pdd->dev;
>>   @@ -2172,7 +2172,7 @@ void kfd_flush_tlb(struct kfd_process_device
>> *pdd)
>>                               pdd->qpd.vmid);
>>       } else {
>>           amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
>> -                    pdd->process->pasid, TLB_FLUSH_LEGACY);
>> +                    pdd->process->pasid, type);
>>       }
>>   }
>>   
>


More information about the amd-gfx mailing list