[Mesa-dev] [PATCH 4/5] radeonsi/compute: Pass kernel arguments in a buffer

Tom Stellard tom at stellard.net
Fri May 24 13:36:46 PDT 2013


On Fri, May 24, 2013 at 01:32:20PM -0500, Patrick Baggett wrote:
> The only difference I could see is that in the old code you passed
> &cb->buffer (which maybe points to a value?) directly into u_upload_data()
> where as in the new code, you do pass &cb->buffer as the parameter rbuffer
> to r600_upload_const_buffer(), but then inside that function, you do
> *rbuffer = NULL before you start, which effectively erases any previous
> pointer, so if *rbuffer was examined by u_upload_data(), it may be
> different. I don't know if that matters, though.
> 

This was the problem, thanks for spotting it!  u_upload_data() was
deleting the old buffer, so by initializing rbuffer to NULL, the old
buffer was never being deleted.  An updated patch is on the way.

-Tom

> Patrick
> 
> 
> On Fri, May 24, 2013 at 1:07 PM, Tom Stellard <tom at stellard.net> wrote:
> 
> > From: Tom Stellard <thomas.stellard at amd.com>
> >
> > ---
> >  src/gallium/drivers/radeonsi/r600_buffer.c      | 31
> > +++++++++++++++++++++++++
> >  src/gallium/drivers/radeonsi/radeonsi_compute.c | 26 ++++++++++-----------
> >  src/gallium/drivers/radeonsi/si_state.c         | 29
> > +++++++----------------
> >  3 files changed, 51 insertions(+), 35 deletions(-)
> >
> > diff --git a/src/gallium/drivers/radeonsi/r600_buffer.c
> > b/src/gallium/drivers/radeonsi/r600_buffer.c
> > index cdf9988..87763c3 100644
> > --- a/src/gallium/drivers/radeonsi/r600_buffer.c
> > +++ b/src/gallium/drivers/radeonsi/r600_buffer.c
> > @@ -25,6 +25,8 @@
> >   *      Corbin Simpson <MostAwesomeDude at gmail.com>
> >   */
> >
> > +#include <byteswap.h>
> > +
> >  #include "pipe/p_screen.h"
> >  #include "util/u_format.h"
> >  #include "util/u_math.h"
> > @@ -168,3 +170,32 @@ void r600_upload_index_buffer(struct r600_context
> > *rctx,
> >         u_upload_data(rctx->uploader, 0, count * ib->index_size,
> >                       ib->user_buffer, &ib->offset, &ib->buffer);
> >  }
> > +
> > +void r600_upload_const_buffer(struct r600_context *rctx, struct
> > si_resource **rbuffer,
> > +                       const uint8_t *ptr, unsigned size,
> > +                       uint32_t *const_offset)
> > +{
> > +       *rbuffer = NULL;
> > +
> > +       if (R600_BIG_ENDIAN) {
> > +               uint32_t *tmpPtr;
> > +               unsigned i;
> > +
> > +               if (!(tmpPtr = malloc(size))) {
> > +                       R600_ERR("Failed to allocate BE swap buffer.\n");
> > +                       return;
> > +               }
> > +
> > +               for (i = 0; i < size / 4; ++i) {
> > +                       tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]);
> > +               }
> > +
> > +               u_upload_data(rctx->uploader, 0, size, tmpPtr,
> > const_offset,
> > +                               (struct pipe_resource**)rbuffer);
> > +
> > +               free(tmpPtr);
> > +       } else {
> > +               u_upload_data(rctx->uploader, 0, size, ptr, const_offset,
> > +                                       (struct pipe_resource**)rbuffer);
> > +       }
> > +}
> > diff --git a/src/gallium/drivers/radeonsi/radeonsi_compute.c
> > b/src/gallium/drivers/radeonsi/radeonsi_compute.c
> > index 3fb6eb1..035076d 100644
> > --- a/src/gallium/drivers/radeonsi/radeonsi_compute.c
> > +++ b/src/gallium/drivers/radeonsi/radeonsi_compute.c
> > @@ -91,8 +91,11 @@ static void radeonsi_launch_grid(
> >         struct r600_context *rctx = (struct r600_context*)ctx;
> >         struct si_pipe_compute *program = rctx->cs_shader_state.program;
> >         struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
> > +       struct si_resource *input_buffer;
> > +       uint32_t input_offset = 0;
> > +       uint64_t input_va;
> >         uint64_t shader_va;
> > -       unsigned arg_user_sgpr_count;
> > +       unsigned arg_user_sgpr_count = 2;
> >         unsigned i;
> >         struct si_pipe_shader *shader = &program->kernels[pc];
> >
> > @@ -109,21 +112,16 @@ static void radeonsi_launch_grid(
> >         si_pm4_inval_shader_cache(pm4);
> >         si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
> >
> > -       arg_user_sgpr_count = program->input_size / 4;
> > -       if (program->input_size % 4 != 0) {
> > -               arg_user_sgpr_count++;
> > -       }
> > +       /* Upload the input data */
> > +       r600_upload_const_buffer(rctx, &input_buffer, input,
> > +                                       program->input_size,
> > &input_offset);
> > +       input_va = r600_resource_va(ctx->screen, (struct
> > pipe_resource*)input_buffer);
> > +       input_va += input_offset;
> >
> > -       /* XXX: We should store arguments in memory if we run out of user
> > sgprs.
> > -        */
> > -       assert(arg_user_sgpr_count < 16);
> > +       si_pm4_add_bo(pm4, input_buffer, RADEON_USAGE_READ);
> >
> > -       for (i = 0; i < arg_user_sgpr_count; i++) {
> > -               uint32_t *args = (uint32_t*)input;
> > -               si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 +
> > -                                       (i * 4),
> > -                                       args[i]);
> > -       }
> > +       si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, input_va);
> > +       si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4,
> > S_008F04_BASE_ADDRESS_HI (input_va >> 32) | S_008F04_STRIDE(0));
> >
> >         si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0);
> >         si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0);
> > diff --git a/src/gallium/drivers/radeonsi/si_state.c
> > b/src/gallium/drivers/radeonsi/si_state.c
> > index dec535c..1e94f7e 100644
> > --- a/src/gallium/drivers/radeonsi/si_state.c
> > +++ b/src/gallium/drivers/radeonsi/si_state.c
> > @@ -24,8 +24,6 @@
> >   *      Christian König <christian.koenig at amd.com>
> >   */
> >
> > -#include <byteswap.h>
> > -
> >  #include "util/u_memory.h"
> >  #include "util/u_framebuffer.h"
> >  #include "util/u_blitter.h"
> > @@ -2526,25 +2524,14 @@ static void si_set_constant_buffer(struct
> > pipe_context *ctx, uint shader, uint i
> >         ptr = input->user_buffer;
> >
> >         if (ptr) {
> > -               /* Upload the user buffer. */
> > -               if (R600_BIG_ENDIAN) {
> > -                       uint32_t *tmpPtr;
> > -                       unsigned i, size = input->buffer_size;
> > -
> > -                       if (!(tmpPtr = malloc(size))) {
> > -                               R600_ERR("Failed to allocate BE swap
> > buffer.\n");
> > -                               return;
> > -                       }
> > -
> > -                       for (i = 0; i < size / 4; ++i) {
> > -                               tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]);
> > -                       }
> > -
> > -                       u_upload_data(rctx->uploader, 0, size, tmpPtr,
> > &cb->buffer_offset, &cb->buffer);
> > -                       free(tmpPtr);
> > -               } else {
> > -                       u_upload_data(rctx->uploader, 0,
> > input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
> > -               }
> > +               /* XXX:
> > +                * Using this function here causes a memory leak in X and
> > makes
> > +                * it impossible to do a full piglit run.  I'm not sure
> > why this
> > +                * is happening since we were using this function prior to
> > +                * eb19163a4dd3d7bfeed63229820c926f99ed00d9
> > +                */
> > +               r600_upload_const_buffer(rctx, &cb->buffer, ptr,
> > cb->buffer_size,
> > +
> > &cb->buffer_offset);
> >         } else {
> >                 /* Setup the hw buffer. */
> >                 cb->buffer_offset = input->buffer_offset;
> > --
> > 1.8.1.5
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >


More information about the mesa-dev mailing list