[Mesa-dev] [PATCH 4/5] radeonsi/compute: Pass kernel arguments in a buffer

Patrick Baggett baggett.patrick at gmail.com
Fri May 24 11:32:20 PDT 2013


The only difference I could see is that in the old code you passed
&cb->buffer (which maybe points to a value?) directly into u_upload_data()
where as in the new code, you do pass &cb->buffer as the parameter rbuffer
to r600_upload_const_buffer(), but then inside that function, you do
*rbuffer = NULL before you start, which effectively erases any previous
pointer, so if *rbuffer was examined by u_upload_data(), it may be
different. I don't know if that matters, though.

Patrick


On Fri, May 24, 2013 at 1:07 PM, Tom Stellard <tom at stellard.net> wrote:

> From: Tom Stellard <thomas.stellard at amd.com>
>
> ---
>  src/gallium/drivers/radeonsi/r600_buffer.c      | 31
> +++++++++++++++++++++++++
>  src/gallium/drivers/radeonsi/radeonsi_compute.c | 26 ++++++++++-----------
>  src/gallium/drivers/radeonsi/si_state.c         | 29
> +++++++----------------
>  3 files changed, 51 insertions(+), 35 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/r600_buffer.c
> b/src/gallium/drivers/radeonsi/r600_buffer.c
> index cdf9988..87763c3 100644
> --- a/src/gallium/drivers/radeonsi/r600_buffer.c
> +++ b/src/gallium/drivers/radeonsi/r600_buffer.c
> @@ -25,6 +25,8 @@
>   *      Corbin Simpson <MostAwesomeDude at gmail.com>
>   */
>
> +#include <byteswap.h>
> +
>  #include "pipe/p_screen.h"
>  #include "util/u_format.h"
>  #include "util/u_math.h"
> @@ -168,3 +170,32 @@ void r600_upload_index_buffer(struct r600_context
> *rctx,
>         u_upload_data(rctx->uploader, 0, count * ib->index_size,
>                       ib->user_buffer, &ib->offset, &ib->buffer);
>  }
> +
> +void r600_upload_const_buffer(struct r600_context *rctx, struct
> si_resource **rbuffer,
> +                       const uint8_t *ptr, unsigned size,
> +                       uint32_t *const_offset)
> +{
> +       *rbuffer = NULL;
> +
> +       if (R600_BIG_ENDIAN) {
> +               uint32_t *tmpPtr;
> +               unsigned i;
> +
> +               if (!(tmpPtr = malloc(size))) {
> +                       R600_ERR("Failed to allocate BE swap buffer.\n");
> +                       return;
> +               }
> +
> +               for (i = 0; i < size / 4; ++i) {
> +                       tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]);
> +               }
> +
> +               u_upload_data(rctx->uploader, 0, size, tmpPtr,
> const_offset,
> +                               (struct pipe_resource**)rbuffer);
> +
> +               free(tmpPtr);
> +       } else {
> +               u_upload_data(rctx->uploader, 0, size, ptr, const_offset,
> +                                       (struct pipe_resource**)rbuffer);
> +       }
> +}
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_compute.c
> b/src/gallium/drivers/radeonsi/radeonsi_compute.c
> index 3fb6eb1..035076d 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_compute.c
> +++ b/src/gallium/drivers/radeonsi/radeonsi_compute.c
> @@ -91,8 +91,11 @@ static void radeonsi_launch_grid(
>         struct r600_context *rctx = (struct r600_context*)ctx;
>         struct si_pipe_compute *program = rctx->cs_shader_state.program;
>         struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
> +       struct si_resource *input_buffer;
> +       uint32_t input_offset = 0;
> +       uint64_t input_va;
>         uint64_t shader_va;
> -       unsigned arg_user_sgpr_count;
> +       unsigned arg_user_sgpr_count = 2;
>         unsigned i;
>         struct si_pipe_shader *shader = &program->kernels[pc];
>
> @@ -109,21 +112,16 @@ static void radeonsi_launch_grid(
>         si_pm4_inval_shader_cache(pm4);
>         si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
>
> -       arg_user_sgpr_count = program->input_size / 4;
> -       if (program->input_size % 4 != 0) {
> -               arg_user_sgpr_count++;
> -       }
> +       /* Upload the input data */
> +       r600_upload_const_buffer(rctx, &input_buffer, input,
> +                                       program->input_size,
> &input_offset);
> +       input_va = r600_resource_va(ctx->screen, (struct
> pipe_resource*)input_buffer);
> +       input_va += input_offset;
>
> -       /* XXX: We should store arguments in memory if we run out of user
> sgprs.
> -        */
> -       assert(arg_user_sgpr_count < 16);
> +       si_pm4_add_bo(pm4, input_buffer, RADEON_USAGE_READ);
>
> -       for (i = 0; i < arg_user_sgpr_count; i++) {
> -               uint32_t *args = (uint32_t*)input;
> -               si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 +
> -                                       (i * 4),
> -                                       args[i]);
> -       }
> +       si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, input_va);
> +       si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4,
> S_008F04_BASE_ADDRESS_HI (input_va >> 32) | S_008F04_STRIDE(0));
>
>         si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0);
>         si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0);
> diff --git a/src/gallium/drivers/radeonsi/si_state.c
> b/src/gallium/drivers/radeonsi/si_state.c
> index dec535c..1e94f7e 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -24,8 +24,6 @@
>   *      Christian König <christian.koenig at amd.com>
>   */
>
> -#include <byteswap.h>
> -
>  #include "util/u_memory.h"
>  #include "util/u_framebuffer.h"
>  #include "util/u_blitter.h"
> @@ -2526,25 +2524,14 @@ static void si_set_constant_buffer(struct
> pipe_context *ctx, uint shader, uint i
>         ptr = input->user_buffer;
>
>         if (ptr) {
> -               /* Upload the user buffer. */
> -               if (R600_BIG_ENDIAN) {
> -                       uint32_t *tmpPtr;
> -                       unsigned i, size = input->buffer_size;
> -
> -                       if (!(tmpPtr = malloc(size))) {
> -                               R600_ERR("Failed to allocate BE swap
> buffer.\n");
> -                               return;
> -                       }
> -
> -                       for (i = 0; i < size / 4; ++i) {
> -                               tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]);
> -                       }
> -
> -                       u_upload_data(rctx->uploader, 0, size, tmpPtr,
> &cb->buffer_offset, &cb->buffer);
> -                       free(tmpPtr);
> -               } else {
> -                       u_upload_data(rctx->uploader, 0,
> input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
> -               }
> +               /* XXX:
> +                * Using this function here causes a memory leak in X and
> makes
> +                * it impossible to do a full piglit run.  I'm not sure
> why this
> +                * is happening since we were using this function prior to
> +                * eb19163a4dd3d7bfeed63229820c926f99ed00d9
> +                */
> +               r600_upload_const_buffer(rctx, &cb->buffer, ptr,
> cb->buffer_size,
> +
> &cb->buffer_offset);
>         } else {
>                 /* Setup the hw buffer. */
>                 cb->buffer_offset = input->buffer_offset;
> --
> 1.8.1.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130524/05bf0471/attachment.html>


More information about the mesa-dev mailing list