[PATCH 3/3] drm/amdgpu: add the amdgpu buffer object move speed metrics

Liang, Prike Prike.Liang at amd.com
Tue Apr 23 08:37:32 UTC 2024


[Public]

Hi, Christian

The basic idea is to collect the following performance data and export this raw data into a centralized debugfs. This raw data may help in performance tuning from the AMDGPU kernel driver side. Additionally, this performance data should be easily used for tool libraries to enhance the tool's functionality.

- AMDGPU engine configuration dump
- GPU bus transaction speed metrics
- AMDGPU buffer move speed metrics
- AMDGPU performance counter
- AMDGPU driver sw information dump

Thanks,
Prike

> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken at gmail.com>
> Sent: Monday, April 22, 2024 11:01 PM
> To: Liang, Prike <Prike.Liang at amd.com>; amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>
> Subject: Re: [PATCH 3/3] drm/amdgpu: add the amdgpu buffer object move
> speed metrics
>
> Am 16.04.24 um 10:51 schrieb Prike Liang:
> > Add the amdgpu buffer object move speed metrics.
>
> What should that be good for? It adds quite a bunch of complexity for a
> feature we actually want to deprecate.
>
> Regards,
> Christian.
>
> >
> > Signed-off-by: Prike Liang <Prike.Liang at amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 78
> ++++++++++++++-----
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   |  2 +-
> >   3 files changed, 61 insertions(+), 21 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index 163d221b3bbd..2840f1536b51 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -502,7 +502,7 @@ void amdgpu_device_wb_free(struct
> amdgpu_device *adev, u32 wb);
> >   /*
> >    * Benchmarking
> >    */
> > -int amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
> > +int amdgpu_benchmark(struct amdgpu_device *adev, int test_number,
> > +struct seq_file *m);
> >
> >   int amdgpu_benchmark_dump(struct amdgpu_device *adev, struct
> seq_file *m);
> >   /*
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> > index f6848b574dea..fcd186ca088a 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> > @@ -65,20 +65,27 @@ static void amdgpu_benchmark_log_results(struct
> amdgpu_device *adev,
> >                                      int n, unsigned size,
> >                                      s64 time_ms,
> >                                      unsigned sdomain, unsigned
> ddomain,
> > -                                    char *kind)
> > +                                    char *kind, struct seq_file *m)
> >   {
> >     s64 throughput = (n * (size >> 10));
> >
> >     throughput = div64_s64(throughput, time_ms);
> >
> > -   dev_info(adev->dev, "amdgpu: %s %u bo moves of %u kB from"
> > -            " %d to %d in %lld ms, throughput: %lld Mb/s or %lld
> MB/s\n",
> > -            kind, n, size >> 10, sdomain, ddomain, time_ms,
> > -            throughput * 8, throughput);
> > +   if (m) {
> > +           seq_printf(m, "\tamdgpu: %s %u bo moves of %u kB from"
> > +                    " %d to %d in %lld ms, throughput: %lld Mb/s or %lld
> MB/s\n",
> > +                   kind, n, size >> 10, sdomain, ddomain, time_ms,
> > +                   throughput * 8, throughput);
> > +   } else {
> > +           dev_info(adev->dev, "amdgpu: %s %u bo moves of %u kB
> from"
> > +                    " %d to %d in %lld ms, throughput: %lld Mb/s or %lld
> MB/s\n",
> > +                   kind, n, size >> 10, sdomain, ddomain, time_ms,
> > +                   throughput * 8, throughput);
> > +   }
> >   }
> >
> >   static int amdgpu_benchmark_move(struct amdgpu_device *adev,
> unsigned size,
> > -                            unsigned sdomain, unsigned ddomain)
> > +                            unsigned sdomain, unsigned ddomain, struct
> seq_file *m)
> >   {
> >     struct amdgpu_bo *dobj = NULL;
> >     struct amdgpu_bo *sobj = NULL;
> > @@ -109,7 +116,7 @@ static int amdgpu_benchmark_move(struct
> amdgpu_device *adev, unsigned size,
> >                     goto out_cleanup;
> >             else
> >                     amdgpu_benchmark_log_results(adev, n, size,
> time_ms,
> > -                                                sdomain, ddomain,
> "dma");
> > +                                                sdomain, ddomain, "dma",
> m);
> >     }
> >
> >   out_cleanup:
> > @@ -124,7 +131,7 @@ static int amdgpu_benchmark_move(struct
> amdgpu_device *adev, unsigned size,
> >     return r;
> >   }
> >
> > -int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
> > +int amdgpu_benchmark(struct amdgpu_device *adev, int test_number,
> > +struct seq_file *m)
> >   {
> >     int i, r;
> >     static const int
> common_modes[AMDGPU_BENCHMARK_COMMON_MODES_N] = {
> > @@ -153,13 +160,16 @@ int amdgpu_benchmark(struct amdgpu_device
> *adev, int test_number)
> >             dev_info(adev->dev,
> >                      "benchmark test: %d (simple test, VRAM to GTT and
> GTT to VRAM)\n",
> >                      test_number);
> > +           if (m)
> > +                   seq_printf(m, "\tbenchmark test: %d (simple test,
> VRAM to GTT and GTT to VRAM)\n",
> > +                    test_number);
> >             /* simple test, VRAM to GTT and GTT to VRAM */
> >             r = amdgpu_benchmark_move(adev, 1024*1024,
> AMDGPU_GEM_DOMAIN_GTT,
> > -                                     AMDGPU_GEM_DOMAIN_VRAM);
> > +                                     AMDGPU_GEM_DOMAIN_VRAM,
> m);
> >             if (r)
> >                     goto done;
> >             r = amdgpu_benchmark_move(adev, 1024*1024,
> AMDGPU_GEM_DOMAIN_VRAM,
> > -                                     AMDGPU_GEM_DOMAIN_GTT);
> > +                                     AMDGPU_GEM_DOMAIN_GTT, m);
> >             if (r)
> >                     goto done;
> >             break;
> > @@ -167,9 +177,13 @@ int amdgpu_benchmark(struct amdgpu_device
> *adev, int test_number)
> >             dev_info(adev->dev,
> >                      "benchmark test: %d (simple test, VRAM to
> VRAM)\n",
> >                      test_number);
> > +           if (m)
> > +                   seq_printf(m, "\tbenchmark test: %d (simple test,
> VRAM to VRAM)\n",
> > +                    test_number);
> > +
> >             /* simple test, VRAM to VRAM */
> >             r = amdgpu_benchmark_move(adev, 1024*1024,
> AMDGPU_GEM_DOMAIN_VRAM,
> > -                                     AMDGPU_GEM_DOMAIN_VRAM);
> > +                                     AMDGPU_GEM_DOMAIN_VRAM,
> m);
> >             if (r)
> >                     goto done;
> >             break;
> > @@ -177,11 +191,15 @@ int amdgpu_benchmark(struct amdgpu_device
> *adev, int test_number)
> >             dev_info(adev->dev,
> >                      "benchmark test: %d (GTT to VRAM, buffer size
> sweep, powers of 2)\n",
> >                      test_number);
> > +           if (m)
> > +                   seq_printf(m, "\tbenchmark test: %d (GTT to VRAM,
> buffer size sweep, powers of 2)\n",
> > +                           test_number);
> > +
> >             /* GTT to VRAM, buffer size sweep, powers of 2 */
> >             for (i = 1; i <= 16384; i <<= 1) {
> >                     r = amdgpu_benchmark_move(adev, i *
> AMDGPU_GPU_PAGE_SIZE,
> >
> AMDGPU_GEM_DOMAIN_GTT,
> > -
> AMDGPU_GEM_DOMAIN_VRAM);
> > +
> AMDGPU_GEM_DOMAIN_VRAM, m);
> >                     if (r)
> >                             goto done;
> >             }
> > @@ -190,11 +208,14 @@ int amdgpu_benchmark(struct amdgpu_device
> *adev, int test_number)
> >             dev_info(adev->dev,
> >                      "benchmark test: %d (VRAM to GTT, buffer size
> sweep, powers of 2)\n",
> >                      test_number);
> > +           if (m)
> > +                   seq_printf(m,"\tbenchmark test: %d (VRAM to GTT,
> buffer size sweep, powers of 2)\n",
> > +                           test_number);
> >             /* VRAM to GTT, buffer size sweep, powers of 2 */
> >             for (i = 1; i <= 16384; i <<= 1) {
> >                     r = amdgpu_benchmark_move(adev, i *
> AMDGPU_GPU_PAGE_SIZE,
> >
> AMDGPU_GEM_DOMAIN_VRAM,
> > -
> AMDGPU_GEM_DOMAIN_GTT);
> > +
> AMDGPU_GEM_DOMAIN_GTT, m);
> >                     if (r)
> >                             goto done;
> >             }
> > @@ -203,11 +224,14 @@ int amdgpu_benchmark(struct amdgpu_device
> *adev, int test_number)
> >             dev_info(adev->dev,
> >                      "benchmark test: %d (VRAM to VRAM, buffer size
> sweep, powers of 2)\n",
> >                      test_number);
> > +           if (m)
> > +                   seq_printf(m, "\tbenchmark test: %d (VRAM to VRAM,
> buffer size sweep, powers of 2)\n",
> > +                            test_number);
> >             /* VRAM to VRAM, buffer size sweep, powers of 2 */
> >             for (i = 1; i <= 16384; i <<= 1) {
> >                     r = amdgpu_benchmark_move(adev, i *
> AMDGPU_GPU_PAGE_SIZE,
> >
> AMDGPU_GEM_DOMAIN_VRAM,
> > -
> AMDGPU_GEM_DOMAIN_VRAM);
> > +
> AMDGPU_GEM_DOMAIN_VRAM, m);
> >                     if (r)
> >                             goto done;
> >             }
> > @@ -216,11 +240,14 @@ int amdgpu_benchmark(struct amdgpu_device
> *adev, int test_number)
> >             dev_info(adev->dev,
> >                      "benchmark test: %d (GTT to VRAM, buffer size
> sweep, common modes)\n",
> >                      test_number);
> > +           if (m)
> > +                   seq_printf(m,"\tbenchmark test: %d (GTT to VRAM,
> buffer size sweep, common modes)\n",
> > +                           test_number);
> >             /* GTT to VRAM, buffer size sweep, common modes */
> >             for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N;
> i++) {
> >                     r = amdgpu_benchmark_move(adev,
> common_modes[i],
> >
> AMDGPU_GEM_DOMAIN_GTT,
> > -
> AMDGPU_GEM_DOMAIN_VRAM);
> > +
> AMDGPU_GEM_DOMAIN_VRAM, m);
> >                     if (r)
> >                             goto done;
> >             }
> > @@ -229,11 +256,14 @@ int amdgpu_benchmark(struct amdgpu_device
> *adev, int test_number)
> >             dev_info(adev->dev,
> >                      "benchmark test: %d (VRAM to GTT, buffer size
> sweep, common modes)\n",
> >                      test_number);
> > +           if (m)
> > +                   seq_printf(m,"\tbenchmark test: %d (VRAM to GTT,
> buffer size sweep, common modes)\n",
> > +                           test_number);
> >             /* VRAM to GTT, buffer size sweep, common modes */
> >             for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N;
> i++) {
> >                     r = amdgpu_benchmark_move(adev,
> common_modes[i],
> >
> AMDGPU_GEM_DOMAIN_VRAM,
> > -
> AMDGPU_GEM_DOMAIN_GTT);
> > +
> AMDGPU_GEM_DOMAIN_GTT, m);
> >                     if (r)
> >                             goto done;
> >             }
> > @@ -241,12 +271,15 @@ int amdgpu_benchmark(struct amdgpu_device
> *adev, int test_number)
> >     case 8:
> >             dev_info(adev->dev,
> >                      "benchmark test: %d (VRAM to VRAM, buffer size
> sweep, common modes)\n",
> > -                    test_number);
> > +                   test_number);
> > +           if (m)
> > +                   seq_printf(m,"\tbenchmark test: %d (VRAM to VRAM,
> buffer size sweep, common modes)\n",
> > +                           test_number);
> >             /* VRAM to VRAM, buffer size sweep, common modes */
> >             for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N;
> i++) {
> >                     r = amdgpu_benchmark_move(adev,
> common_modes[i],
> >                                           AMDGPU_GEM_DOMAIN_VRAM,
> > -                                         AMDGPU_GEM_DOMAIN_VRAM);
> > +                                         AMDGPU_GEM_DOMAIN_VRAM,
> m);
> >                     if (r)
> >                             goto done;
> >             }
> > @@ -449,7 +482,7 @@ static void
> amdgpu_benchmark_mem_metrics_dump(struct amdgpu_device *adev,
> struct
> >                     &adev->mman.gtt_mgr.manager;
> >     struct ttm_resource_manager *vram_man =
> >                     &adev->mman.vram_mgr.manager;
> > -   uint32_t l1_cache_size;
> > +   uint32_t l1_cache_size, j;
> >
> >     seq_puts(m, "amdgpu benchmark mem metrics dump:\n");
> >
> > @@ -483,6 +516,13 @@ static void
> amdgpu_benchmark_mem_metrics_dump(struct amdgpu_device *adev,
> struct
> >                     atomic64_read(&adev->gart_pin_size));
> >     seq_printf(m, "\tGTT heap usage size:%lld KB\n",
> > ttm_resource_manager_usage(gtt_man));
> >
> > +   seq_printf(m, "\t--------amdgpu buffer object move speed test begin---
> -----\n");
> > +   for (j = 1; j < 8; j++) {
> > +           /*TODO: Add the cases of gfx and CPU move cases.*/
> > +           amdgpu_benchmark(adev, j, m);
> > +   }
> > +   seq_printf(m, "\t--------amdgpu buffer object move speed test
> > +end--------\n");
> > +
> >     seq_printf(m, "\n");
> >
> >   }
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > index 7e935b9736c1..4d710c5f8465 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > @@ -1760,7 +1760,7 @@ static int amdgpu_debugfs_benchmark(void
> *data, u64 val)
> >             return r;
> >     }
> >
> > -   r = amdgpu_benchmark(adev, val);
> > +   r = amdgpu_benchmark(adev, val, NULL);
> >
> >     pm_runtime_mark_last_busy(dev->dev);
> >     pm_runtime_put_autosuspend(dev->dev);



More information about the amd-gfx mailing list