[PATCH 3/3] drm/amdgpu: add the amdgpu buffer object move speed metrics

Christian König ckoenig.leichtzumerken at gmail.com
Mon Apr 22 15:01:08 UTC 2024


Am 16.04.24 um 10:51 schrieb Prike Liang:
> Add the amdgpu buffer object move speed metrics.

What should that be good for? It adds quite a bunch of complexity for a 
feature we actually want to deprecate.

Regards,
Christian.

>
> Signed-off-by: Prike Liang <Prike.Liang at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 78 ++++++++++++++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   |  2 +-
>   3 files changed, 61 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 163d221b3bbd..2840f1536b51 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -502,7 +502,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
>   /*
>    * Benchmarking
>    */
> -int amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
> +int amdgpu_benchmark(struct amdgpu_device *adev, int test_number, struct seq_file *m);
>   
>   int amdgpu_benchmark_dump(struct amdgpu_device *adev, struct seq_file *m);
>   /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> index f6848b574dea..fcd186ca088a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> @@ -65,20 +65,27 @@ static void amdgpu_benchmark_log_results(struct amdgpu_device *adev,
>   					 int n, unsigned size,
>   					 s64 time_ms,
>   					 unsigned sdomain, unsigned ddomain,
> -					 char *kind)
> +					 char *kind, struct seq_file *m)
>   {
>   	s64 throughput = (n * (size >> 10));
>   
>   	throughput = div64_s64(throughput, time_ms);
>   
> -	dev_info(adev->dev, "amdgpu: %s %u bo moves of %u kB from"
> -		 " %d to %d in %lld ms, throughput: %lld Mb/s or %lld MB/s\n",
> -		 kind, n, size >> 10, sdomain, ddomain, time_ms,
> -		 throughput * 8, throughput);
> +	if (m) {
> +		seq_printf(m, "\tamdgpu: %s %u bo moves of %u kB from"
> +			 " %d to %d in %lld ms, throughput: %lld Mb/s or %lld MB/s\n",
> +			kind, n, size >> 10, sdomain, ddomain, time_ms,
> +			throughput * 8, throughput);
> +	} else {
> +		dev_info(adev->dev, "amdgpu: %s %u bo moves of %u kB from"
> +			 " %d to %d in %lld ms, throughput: %lld Mb/s or %lld MB/s\n",
> +			kind, n, size >> 10, sdomain, ddomain, time_ms,
> +			throughput * 8, throughput);
> +	}
>   }
>   
>   static int amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
> -				 unsigned sdomain, unsigned ddomain)
> +				 unsigned sdomain, unsigned ddomain, struct seq_file *m)
>   {
>   	struct amdgpu_bo *dobj = NULL;
>   	struct amdgpu_bo *sobj = NULL;
> @@ -109,7 +116,7 @@ static int amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
>   			goto out_cleanup;
>   		else
>   			amdgpu_benchmark_log_results(adev, n, size, time_ms,
> -						     sdomain, ddomain, "dma");
> +						     sdomain, ddomain, "dma", m);
>   	}
>   
>   out_cleanup:
> @@ -124,7 +131,7 @@ static int amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
>   	return r;
>   }
>   
> -int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
> +int amdgpu_benchmark(struct amdgpu_device *adev, int test_number, struct seq_file *m)
>   {
>   	int i, r;
>   	static const int common_modes[AMDGPU_BENCHMARK_COMMON_MODES_N] = {
> @@ -153,13 +160,16 @@ int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
>   		dev_info(adev->dev,
>   			 "benchmark test: %d (simple test, VRAM to GTT and GTT to VRAM)\n",
>   			 test_number);
> +		if (m)
> +			seq_printf(m, "\tbenchmark test: %d (simple test, VRAM to GTT and GTT to VRAM)\n",
> +			 test_number);
>   		/* simple test, VRAM to GTT and GTT to VRAM */
>   		r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_GTT,
> -					  AMDGPU_GEM_DOMAIN_VRAM);
> +					  AMDGPU_GEM_DOMAIN_VRAM, m);
>   		if (r)
>   			goto done;
>   		r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
> -					  AMDGPU_GEM_DOMAIN_GTT);
> +					  AMDGPU_GEM_DOMAIN_GTT, m);
>   		if (r)
>   			goto done;
>   		break;
> @@ -167,9 +177,13 @@ int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
>   		dev_info(adev->dev,
>   			 "benchmark test: %d (simple test, VRAM to VRAM)\n",
>   			 test_number);
> +		if (m)
> +			seq_printf(m, "\tbenchmark test: %d (simple test, VRAM to VRAM)\n",
> +			 test_number);
> +
>   		/* simple test, VRAM to VRAM */
>   		r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
> -					  AMDGPU_GEM_DOMAIN_VRAM);
> +					  AMDGPU_GEM_DOMAIN_VRAM, m);
>   		if (r)
>   			goto done;
>   		break;
> @@ -177,11 +191,15 @@ int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
>   		dev_info(adev->dev,
>   			 "benchmark test: %d (GTT to VRAM, buffer size sweep, powers of 2)\n",
>   			 test_number);
> +		if (m)
> +			seq_printf(m, "\tbenchmark test: %d (GTT to VRAM, buffer size sweep, powers of 2)\n",
> +				test_number);
> +
>   		/* GTT to VRAM, buffer size sweep, powers of 2 */
>   		for (i = 1; i <= 16384; i <<= 1) {
>   			r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
>   						  AMDGPU_GEM_DOMAIN_GTT,
> -						  AMDGPU_GEM_DOMAIN_VRAM);
> +						  AMDGPU_GEM_DOMAIN_VRAM, m);
>   			if (r)
>   				goto done;
>   		}
> @@ -190,11 +208,14 @@ int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
>   		dev_info(adev->dev,
>   			 "benchmark test: %d (VRAM to GTT, buffer size sweep, powers of 2)\n",
>   			 test_number);
> +		if (m)
> +			seq_printf(m,"\tbenchmark test: %d (VRAM to GTT, buffer size sweep, powers of 2)\n",
> +				test_number);
>   		/* VRAM to GTT, buffer size sweep, powers of 2 */
>   		for (i = 1; i <= 16384; i <<= 1) {
>   			r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
>   						  AMDGPU_GEM_DOMAIN_VRAM,
> -						  AMDGPU_GEM_DOMAIN_GTT);
> +						  AMDGPU_GEM_DOMAIN_GTT, m);
>   			if (r)
>   				goto done;
>   		}
> @@ -203,11 +224,14 @@ int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
>   		dev_info(adev->dev,
>   			 "benchmark test: %d (VRAM to VRAM, buffer size sweep, powers of 2)\n",
>   			 test_number);
> +		if (m)
> +			seq_printf(m, "\tbenchmark test: %d (VRAM to VRAM, buffer size sweep, powers of 2)\n",
> +				 test_number);
>   		/* VRAM to VRAM, buffer size sweep, powers of 2 */
>   		for (i = 1; i <= 16384; i <<= 1) {
>   			r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
>   						  AMDGPU_GEM_DOMAIN_VRAM,
> -						  AMDGPU_GEM_DOMAIN_VRAM);
> +						  AMDGPU_GEM_DOMAIN_VRAM, m);
>   			if (r)
>   				goto done;
>   		}
> @@ -216,11 +240,14 @@ int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
>   		dev_info(adev->dev,
>   			 "benchmark test: %d (GTT to VRAM, buffer size sweep, common modes)\n",
>   			 test_number);
> +		if (m)
> +			seq_printf(m,"\tbenchmark test: %d (GTT to VRAM, buffer size sweep, common modes)\n",
> +				test_number);
>   		/* GTT to VRAM, buffer size sweep, common modes */
>   		for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
>   			r = amdgpu_benchmark_move(adev, common_modes[i],
>   						  AMDGPU_GEM_DOMAIN_GTT,
> -						  AMDGPU_GEM_DOMAIN_VRAM);
> +						  AMDGPU_GEM_DOMAIN_VRAM, m);
>   			if (r)
>   				goto done;
>   		}
> @@ -229,11 +256,14 @@ int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
>   		dev_info(adev->dev,
>   			 "benchmark test: %d (VRAM to GTT, buffer size sweep, common modes)\n",
>   			 test_number);
> +		if (m)
> +			seq_printf(m,"\tbenchmark test: %d (VRAM to GTT, buffer size sweep, common modes)\n",
> +				test_number);
>   		/* VRAM to GTT, buffer size sweep, common modes */
>   		for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
>   			r = amdgpu_benchmark_move(adev, common_modes[i],
>   						  AMDGPU_GEM_DOMAIN_VRAM,
> -						  AMDGPU_GEM_DOMAIN_GTT);
> +						  AMDGPU_GEM_DOMAIN_GTT, m);
>   			if (r)
>   				goto done;
>   		}
> @@ -241,12 +271,15 @@ int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
>   	case 8:
>   		dev_info(adev->dev,
>   			 "benchmark test: %d (VRAM to VRAM, buffer size sweep, common modes)\n",
> -			 test_number);
> +			test_number);
> +		if (m)
> +			seq_printf(m,"\tbenchmark test: %d (VRAM to VRAM, buffer size sweep, common modes)\n",
> +				test_number);
>   		/* VRAM to VRAM, buffer size sweep, common modes */
>   		for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
>   			r = amdgpu_benchmark_move(adev, common_modes[i],
>   					      AMDGPU_GEM_DOMAIN_VRAM,
> -					      AMDGPU_GEM_DOMAIN_VRAM);
> +					      AMDGPU_GEM_DOMAIN_VRAM, m);
>   			if (r)
>   				goto done;
>   		}
> @@ -449,7 +482,7 @@ static void amdgpu_benchmark_mem_metrics_dump(struct amdgpu_device *adev, struct
>   			&adev->mman.gtt_mgr.manager;
>   	struct ttm_resource_manager *vram_man =
>   			&adev->mman.vram_mgr.manager;
> -	uint32_t l1_cache_size;
> +	uint32_t l1_cache_size, j;
>   
>   	seq_puts(m, "amdgpu benchmark mem metrics dump:\n");
>   
> @@ -483,6 +516,13 @@ static void amdgpu_benchmark_mem_metrics_dump(struct amdgpu_device *adev, struct
>   			atomic64_read(&adev->gart_pin_size));
>   	seq_printf(m, "\tGTT heap usage size:%lld KB\n", ttm_resource_manager_usage(gtt_man));
>   
> +	seq_printf(m, "\t--------amdgpu buffer object move speed test begin--------\n");
> +	for (j = 1; j < 8; j++) {
> +		/*TODO: Add the cases of gfx and CPU move cases.*/
> +		amdgpu_benchmark(adev, j, m);
> +	}
> +	seq_printf(m, "\t--------amdgpu buffer object move speed test end--------\n");
> +
>   	seq_printf(m, "\n");
>   
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index 7e935b9736c1..4d710c5f8465 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1760,7 +1760,7 @@ static int amdgpu_debugfs_benchmark(void *data, u64 val)
>   		return r;
>   	}
>   
> -	r = amdgpu_benchmark(adev, val);
> +	r = amdgpu_benchmark(adev, val, NULL);
>   
>   	pm_runtime_mark_last_busy(dev->dev);
>   	pm_runtime_put_autosuspend(dev->dev);



More information about the amd-gfx mailing list