[PATCH v5 4/11] drm/amdkfd: Add migration SMI event

Felix Kuehling felix.kuehling at amd.com
Thu Jun 30 14:29:52 UTC 2022


Am 2022-06-28 um 10:50 schrieb Philip Yang:
> For migration start and end event, output timestamp when migration
> starts, ends, svm range address and size, GPU id of migration source and
> destination and svm range attributes,
>
> Migration trigger could be prefetch, CPU or GPU page fault and TTM
> eviction.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_migrate.c    | 53 ++++++++++++++++-----
>   drivers/gpu/drm/amd/amdkfd/kfd_migrate.h    |  5 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 22 +++++++++
>   drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  8 ++++
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c        | 16 ++++---
>   5 files changed, 83 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index fb8a94e52656..9667015a6cbc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -32,6 +32,7 @@
>   #include "kfd_priv.h"
>   #include "kfd_svm.h"
>   #include "kfd_migrate.h"
> +#include "kfd_smi_events.h"
>   
>   #ifdef dev_fmt
>   #undef dev_fmt
> @@ -402,8 +403,9 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
>   static long
>   svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
>   			struct vm_area_struct *vma, uint64_t start,
> -			uint64_t end)
> +			uint64_t end, uint32_t trigger)
>   {
> +	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
>   	uint64_t npages = (end - start) >> PAGE_SHIFT;
>   	struct kfd_process_device *pdd;
>   	struct dma_fence *mfence = NULL;
> @@ -430,6 +432,11 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
>   	migrate.dst = migrate.src + npages;
>   	scratch = (dma_addr_t *)(migrate.dst + npages);
>   
> +	kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
> +				      start >> PAGE_SHIFT, end >> PAGE_SHIFT,
> +				      0, adev->kfd.dev->id, prange->prefetch_loc,
> +				      prange->preferred_loc, trigger);
> +
>   	r = migrate_vma_setup(&migrate);
>   	if (r) {
>   		dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
> @@ -458,6 +465,10 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
>   	svm_migrate_copy_done(adev, mfence);
>   	migrate_vma_finalize(&migrate);
>   
> +	kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
> +				    start >> PAGE_SHIFT, end >> PAGE_SHIFT,
> +				    0, adev->kfd.dev->id, trigger);
> +
>   	svm_range_dma_unmap(adev->dev, scratch, 0, npages);
>   	svm_range_free_dma_mappings(prange);
>   
> @@ -479,6 +490,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
>    * @prange: range structure
>    * @best_loc: the device to migrate to
>    * @mm: the process mm structure
> + * @trigger: reason of migration
>    *
>    * Context: Process context, caller hold mmap read lock, svms lock, prange lock
>    *
> @@ -487,7 +499,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
>    */
>   static int
>   svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
> -			struct mm_struct *mm)
> +			struct mm_struct *mm, uint32_t trigger)
>   {
>   	unsigned long addr, start, end;
>   	struct vm_area_struct *vma;
> @@ -524,7 +536,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
>   			break;
>   
>   		next = min(vma->vm_end, end);
> -		r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
> +		r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger);
>   		if (r < 0) {
>   			pr_debug("failed %ld to migrate\n", r);
>   			break;
> @@ -655,8 +667,10 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
>    */
>   static long
>   svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
> -		       struct vm_area_struct *vma, uint64_t start, uint64_t end)
> +		       struct vm_area_struct *vma, uint64_t start, uint64_t end,
> +		       uint32_t trigger)
>   {
> +	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
>   	uint64_t npages = (end - start) >> PAGE_SHIFT;
>   	unsigned long upages = npages;
>   	unsigned long cpages = 0;
> @@ -685,6 +699,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
>   	migrate.dst = migrate.src + npages;
>   	scratch = (dma_addr_t *)(migrate.dst + npages);
>   
> +	kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
> +				      start >> PAGE_SHIFT, end >> PAGE_SHIFT,
> +				      adev->kfd.dev->id, 0, prange->prefetch_loc,
> +				      prange->preferred_loc, trigger);
> +
>   	r = migrate_vma_setup(&migrate);
>   	if (r) {
>   		dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
> @@ -715,6 +734,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
>   
>   	svm_migrate_copy_done(adev, mfence);
>   	migrate_vma_finalize(&migrate);
> +
> +	kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
> +				    start >> PAGE_SHIFT, end >> PAGE_SHIFT,
> +				    adev->kfd.dev->id, 0, trigger);
> +
>   	svm_range_dma_unmap(adev->dev, scratch, 0, npages);
>   
>   out_free:
> @@ -732,13 +756,15 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
>    * svm_migrate_vram_to_ram - migrate svm range from device to system
>    * @prange: range structure
>    * @mm: process mm, use current->mm if NULL
> + * @trigger: reason of migration
>    *
>    * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
>    *
>    * Return:
>    * 0 - OK, otherwise error code
>    */
> -int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
> +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
> +			    uint32_t trigger)
>   {
>   	struct amdgpu_device *adev;
>   	struct vm_area_struct *vma;
> @@ -779,7 +805,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
>   		}
>   
>   		next = min(vma->vm_end, end);
> -		r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
> +		r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger);
>   		if (r < 0) {
>   			pr_debug("failed %ld to migrate prange %p\n", r, prange);
>   			break;
> @@ -802,6 +828,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
>    * @prange: range structure
>    * @best_loc: the device to migrate to
>    * @mm: process mm, use current->mm if NULL
> + * @trigger: reason of migration
>    *
>    * Context: Process context, caller hold mmap read lock, svms lock, prange lock
>    *
> @@ -810,7 +837,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
>    */
>   static int
>   svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
> -			 struct mm_struct *mm)
> +			 struct mm_struct *mm, uint32_t trigger)
>   {
>   	int r, retries = 3;
>   
> @@ -822,7 +849,7 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
>   	pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
>   
>   	do {
> -		r = svm_migrate_vram_to_ram(prange, mm);
> +		r = svm_migrate_vram_to_ram(prange, mm, trigger);
>   		if (r)
>   			return r;
>   	} while (prange->actual_loc && --retries);
> @@ -830,17 +857,17 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
>   	if (prange->actual_loc)
>   		return -EDEADLK;
>   
> -	return svm_migrate_ram_to_vram(prange, best_loc, mm);
> +	return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
>   }
>   
>   int
>   svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
> -		    struct mm_struct *mm)
> +		    struct mm_struct *mm, uint32_t trigger)
>   {
>   	if  (!prange->actual_loc)
> -		return svm_migrate_ram_to_vram(prange, best_loc, mm);
> +		return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
>   	else
> -		return svm_migrate_vram_to_vram(prange, best_loc, mm);
> +		return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
>   
>   }
>   
> @@ -909,7 +936,7 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
>   		goto out_unlock_prange;
>   	}
>   
> -	r = svm_migrate_vram_to_ram(prange, mm);
> +	r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU);
>   	if (r)
>   		pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
>   			 prange, prange->start, prange->last);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
> index 2f5b3394c9ed..b3f0754b32fa 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
> @@ -41,8 +41,9 @@ enum MIGRATION_COPY_DIR {
>   };
>   
>   int svm_migrate_to_vram(struct svm_range *prange,  uint32_t best_loc,
> -			struct mm_struct *mm);
> -int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
> +			struct mm_struct *mm, uint32_t trigger);
> +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
> +			    uint32_t trigger);
>   unsigned long
>   svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> index b7e68283925f..ec4d278c2a47 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> @@ -261,6 +261,28 @@ void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
>   			  pid, address, dev->id, migration ? 'M' : 'U');
>   }
>   
> +void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
> +				   unsigned long start, unsigned long end,
> +				   uint32_t from, uint32_t to,
> +				   uint32_t prefetch_loc, uint32_t preferred_loc,
> +				   uint32_t trigger)
> +{
> +	kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_START,
> +			  "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
> +			  ktime_get_boottime_ns(), pid, start, end - start,
> +			  from, to, prefetch_loc, preferred_loc, trigger);
> +}
> +
> +void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
> +				 unsigned long start, unsigned long end,
> +				 uint32_t from, uint32_t to, uint32_t trigger)
> +{
> +	kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_END,
> +			  "%lld -%d @%lx(%lx) %x->%x %d\n",
> +			  ktime_get_boottime_ns(), pid, start, end - start,
> +			  from, to, trigger);
> +}
> +
>   int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
>   {
>   	struct kfd_smi_client *client;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
> index 7903718cd9eb..ec5d74a2fef4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
> @@ -34,4 +34,12 @@ void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
>   				    ktime_t ts);
>   void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
>   				  unsigned long address, bool migration);
> +void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
> +			     unsigned long start, unsigned long end,
> +			     uint32_t from, uint32_t to,
> +			     uint32_t prefetch_loc, uint32_t preferred_loc,
> +			     uint32_t trigger);
> +void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
> +			     unsigned long start, unsigned long end,
> +			     uint32_t from, uint32_t to, uint32_t trigger);
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 2ad08a1f38dd..5cead2a0e819 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -2821,7 +2821,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>   	if (prange->actual_loc != best_loc) {
>   		migration = true;
>   		if (best_loc) {
> -			r = svm_migrate_to_vram(prange, best_loc, mm);
> +			r = svm_migrate_to_vram(prange, best_loc, mm,
> +					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
>   			if (r) {
>   				pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
>   					 r, addr);
> @@ -2829,12 +2830,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>   				 * VRAM failed
>   				 */
>   				if (prange->actual_loc)
> -					r = svm_migrate_vram_to_ram(prange, mm);
> +					r = svm_migrate_vram_to_ram(prange, mm,
> +					   KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
>   				else
>   					r = 0;
>   			}
>   		} else {
> -			r = svm_migrate_vram_to_ram(prange, mm);
> +			r = svm_migrate_vram_to_ram(prange, mm,
> +					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
>   		}
>   		if (r) {
>   			pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
> @@ -3157,12 +3160,12 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
>   		return 0;
>   
>   	if (!best_loc) {
> -		r = svm_migrate_vram_to_ram(prange, mm);
> +		r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
>   		*migrated = !r;
>   		return r;
>   	}
>   
> -	r = svm_migrate_to_vram(prange, best_loc, mm);
> +	r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
>   	*migrated = !r;
>   
>   	return r;
> @@ -3220,7 +3223,8 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
>   		mutex_lock(&prange->migrate_mutex);
>   		do {
>   			r = svm_migrate_vram_to_ram(prange,
> -						svm_bo->eviction_fence->mm);
> +						svm_bo->eviction_fence->mm,
> +						KFD_MIGRATE_TRIGGER_TTM_EVICTION);
>   		} while (!r && prange->actual_loc && --retries);
>   
>   		if (!r && prange->actual_loc)


More information about the amd-gfx mailing list