[Mesa-dev] [PATCH 1/2] winsys/amdgpu: adjust IB size based on buffer wait time
Nicolai Hähnle
nhaehnle at gmail.com
Wed Apr 20 00:20:21 UTC 2016
This is just a slight massaging of the patch you sent previously. What
happened to the discussion we had about how to do this properly?
Nicolai
On 19.04.2016 18:13, Grigori Goronzy wrote:
> Small IBs help to reduce stalls for workloads that require a lot of
> synchronization. On the other hand, if there is no notable
> synchronization, we can use a large IB size to slightly improve
> performance in some cases.
>
> This introduces tuning of the IB size based on feedback on the average
> buffer wait time. The average wait time is tracked with exponential
> smoothing.
> ---
> src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 6 +++++-
> src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 17 ++++++++++++-----
> src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 2 ++
> src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 9 +++++++++
> 4 files changed, 28 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> index 036301e..4b8554d 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> @@ -195,8 +195,10 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
> return NULL;
> }
> }
> + amdgpu_winsys_update_buffer_wait_avg(bo->ws, 0);
> } else {
> uint64_t time = os_time_get_nano();
> + uint64_t duration;
>
> if (!(usage & PIPE_TRANSFER_WRITE)) {
> /* Mapping for read.
> @@ -221,7 +223,9 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
> RADEON_USAGE_READWRITE);
> }
>
> - bo->ws->buffer_wait_time += os_time_get_nano() - time;
> + duration = os_time_get_nano() - time;
> + bo->ws->buffer_wait_time += duration;
> + amdgpu_winsys_update_buffer_wait_avg(bo->ws, duration);
> }
> }
>
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> index 69902c4..b9a7c5b 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> @@ -201,10 +201,7 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx)
> static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_ib *ib,
> struct amdgpu_cs_ib_info *info, unsigned ib_type)
> {
> - /* Small IBs are better than big IBs, because the GPU goes idle quicker
> - * and there is less waiting for buffers and fences. Proof:
> - * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
> - */
> + struct amdgpu_winsys *aws = (struct amdgpu_winsys *)ws;
> unsigned buffer_size, ib_size;
>
> switch (ib_type) {
> @@ -216,8 +213,18 @@ static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_ib *ib,
> ib_size = 128 * 1024 * 4;
> break;
> case IB_MAIN:
> + /* Small IBs are often better than big IBs, because the GPU goes idle
> + * quicker and there is less waiting for buffers and fences. Proof:
> + * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
> + * Tune IB size depending on average buffer waiting time, which is an
> + * indicator for the amount of synchronization going on. Some
> + * applications don't cause notable synchronization, so we can use
> + * large IB size for slightly improved throughput.
> + */
> buffer_size = 128 * 1024 * 4;
> - ib_size = 20 * 1024 * 4;
> + ib_size = 32 * 1024 * 4;
> + if (aws->buffer_wait_time_avg > IB_SIZE_WAIT_THRESHOLD_NS)
> + ib_size = 10 * 1024 * 4;
> }
>
> ib->base.cdw = 0;
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
> index 4ed830b..98e58a2 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
> @@ -35,6 +35,8 @@
> #include "amdgpu_bo.h"
> #include "util/u_memory.h"
>
> +#define IB_SIZE_WAIT_THRESHOLD_NS 10000
> +
> struct amdgpu_ctx {
> struct amdgpu_winsys *ws;
> amdgpu_context_handle ctx;
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
> index 91b9be4..3bd63b6 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
> @@ -54,6 +54,7 @@ struct amdgpu_winsys {
> uint64_t allocated_vram;
> uint64_t allocated_gtt;
> uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
> + uint64_t buffer_wait_time_avg;
> uint64_t num_cs_flushes;
> unsigned gart_page_size;
>
> @@ -76,6 +77,14 @@ amdgpu_winsys(struct radeon_winsys *base)
> return (struct amdgpu_winsys*)base;
> }
>
> +static inline
> +void amdgpu_winsys_update_buffer_wait_avg(struct amdgpu_winsys *ws,
> + uint64_t wait)
> +{
> + /* Exponential smoothing with alpha = 0.25 */
> + ws->buffer_wait_time_avg = (3 * ws->buffer_wait_time_avg + wait) / 4;
> +}
> +
> void amdgpu_surface_init_functions(struct amdgpu_winsys *ws);
> ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws);
>
>
More information about the mesa-dev
mailing list