<div dir="ltr">The only difference I could see is that in the old code you passed &cb->buffer (which maybe points to a value?) directly into u_upload_data() where as in the new code, you do pass &cb->buffer as the parameter rbuffer to r600_upload_const_buffer(), but then inside that function, you do *rbuffer = NULL before you start, which effectively erases any previous pointer, so if *rbuffer was examined by u_upload_data(), it may be different. I don't know if that matters, though.<div>
<br></div><div>Patrick</div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Fri, May 24, 2013 at 1:07 PM, Tom Stellard <span dir="ltr"><<a href="mailto:tom@stellard.net" target="_blank">tom@stellard.net</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Tom Stellard <<a href="mailto:thomas.stellard@amd.com">thomas.stellard@amd.com</a>><br>
<br>
---<br>
 src/gallium/drivers/radeonsi/r600_buffer.c      | 31 +++++++++++++++++++++++++<br>
 src/gallium/drivers/radeonsi/radeonsi_compute.c | 26 ++++++++++-----------<br>
 src/gallium/drivers/radeonsi/si_state.c         | 29 +++++++----------------<br>
 3 files changed, 51 insertions(+), 35 deletions(-)<br>
<br>
diff --git a/src/gallium/drivers/radeonsi/r600_buffer.c b/src/gallium/drivers/radeonsi/r600_buffer.c<br>
index cdf9988..87763c3 100644<br>
--- a/src/gallium/drivers/radeonsi/r600_buffer.c<br>
+++ b/src/gallium/drivers/radeonsi/r600_buffer.c<br>
@@ -25,6 +25,8 @@<br>
  *      Corbin Simpson <<a href="mailto:MostAwesomeDude@gmail.com">MostAwesomeDude@gmail.com</a>><br>
  */<br>
<br>
+#include <byteswap.h><br>
+<br>
 #include "pipe/p_screen.h"<br>
 #include "util/u_format.h"<br>
 #include "util/u_math.h"<br>
@@ -168,3 +170,32 @@ void r600_upload_index_buffer(struct r600_context *rctx,<br>
        u_upload_data(rctx->uploader, 0, count * ib->index_size,<br>
                      ib->user_buffer, &ib->offset, &ib->buffer);<br>
 }<br>
+<br>
+void r600_upload_const_buffer(struct r600_context *rctx, struct si_resource **rbuffer,<br>
+                       const uint8_t *ptr, unsigned size,<br>
+                       uint32_t *const_offset)<br>
+{<br>
+       *rbuffer = NULL;<br>
+<br>
+       if (R600_BIG_ENDIAN) {<br>
+               uint32_t *tmpPtr;<br>
+               unsigned i;<br>
+<br>
+               if (!(tmpPtr = malloc(size))) {<br>
+                       R600_ERR("Failed to allocate BE swap buffer.\n");<br>
+                       return;<br>
+               }<br>
+<br>
+               for (i = 0; i < size / 4; ++i) {<br>
+                       tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]);<br>
+               }<br>
+<br>
+               u_upload_data(rctx->uploader, 0, size, tmpPtr, const_offset,<br>
+                               (struct pipe_resource**)rbuffer);<br>
+<br>
+               free(tmpPtr);<br>
+       } else {<br>
+               u_upload_data(rctx->uploader, 0, size, ptr, const_offset,<br>
+                                       (struct pipe_resource**)rbuffer);<br>
+       }<br>
+}<br>
diff --git a/src/gallium/drivers/radeonsi/radeonsi_compute.c b/src/gallium/drivers/radeonsi/radeonsi_compute.c<br>
index 3fb6eb1..035076d 100644<br>
--- a/src/gallium/drivers/radeonsi/radeonsi_compute.c<br>
+++ b/src/gallium/drivers/radeonsi/radeonsi_compute.c<br>
@@ -91,8 +91,11 @@ static void radeonsi_launch_grid(<br>
        struct r600_context *rctx = (struct r600_context*)ctx;<br>
        struct si_pipe_compute *program = rctx->cs_shader_state.program;<br>
        struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);<br>
+       struct si_resource *input_buffer;<br>
+       uint32_t input_offset = 0;<br>
+       uint64_t input_va;<br>
        uint64_t shader_va;<br>
-       unsigned arg_user_sgpr_count;<br>
+       unsigned arg_user_sgpr_count = 2;<br>
        unsigned i;<br>
        struct si_pipe_shader *shader = &program->kernels[pc];<br>
<br>
@@ -109,21 +112,16 @@ static void radeonsi_launch_grid(<br>
        si_pm4_inval_shader_cache(pm4);<br>
        si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);<br>
<br>
-       arg_user_sgpr_count = program->input_size / 4;<br>
-       if (program->input_size % 4 != 0) {<br>
-               arg_user_sgpr_count++;<br>
-       }<br>
+       /* Upload the input data */<br>
+       r600_upload_const_buffer(rctx, &input_buffer, input,<br>
+                                       program->input_size, &input_offset);<br>
+       input_va = r600_resource_va(ctx->screen, (struct pipe_resource*)input_buffer);<br>
+       input_va += input_offset;<br>
<br>
-       /* XXX: We should store arguments in memory if we run out of user sgprs.<br>
-        */<br>
-       assert(arg_user_sgpr_count < 16);<br>
+       si_pm4_add_bo(pm4, input_buffer, RADEON_USAGE_READ);<br>
<br>
-       for (i = 0; i < arg_user_sgpr_count; i++) {<br>
-               uint32_t *args = (uint32_t*)input;<br>
-               si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 +<br>
-                                       (i * 4),<br>
-                                       args[i]);<br>
-       }<br>
+       si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, input_va);<br>
+       si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (input_va >> 32) | S_008F04_STRIDE(0));<br>
<br>
        si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0);<br>
        si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0);<br>
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c<br>
index dec535c..1e94f7e 100644<br>
--- a/src/gallium/drivers/radeonsi/si_state.c<br>
+++ b/src/gallium/drivers/radeonsi/si_state.c<br>
@@ -24,8 +24,6 @@<br>
  *      Christian König <<a href="mailto:christian.koenig@amd.com">christian.koenig@amd.com</a>><br>
  */<br>
<br>
-#include <byteswap.h><br>
-<br>
 #include "util/u_memory.h"<br>
 #include "util/u_framebuffer.h"<br>
 #include "util/u_blitter.h"<br>
@@ -2526,25 +2524,14 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint i<br>
        ptr = input->user_buffer;<br>
<br>
        if (ptr) {<br>
-               /* Upload the user buffer. */<br>
-               if (R600_BIG_ENDIAN) {<br>
-                       uint32_t *tmpPtr;<br>
-                       unsigned i, size = input->buffer_size;<br>
-<br>
-                       if (!(tmpPtr = malloc(size))) {<br>
-                               R600_ERR("Failed to allocate BE swap buffer.\n");<br>
-                               return;<br>
-                       }<br>
-<br>
-                       for (i = 0; i < size / 4; ++i) {<br>
-                               tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]);<br>
-                       }<br>
-<br>
-                       u_upload_data(rctx->uploader, 0, size, tmpPtr, &cb->buffer_offset, &cb->buffer);<br>
-                       free(tmpPtr);<br>
-               } else {<br>
-                       u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);<br>
-               }<br>
+               /* XXX:<br>
+                * Using this function here causes a memory leak in X and makes<br>
+                * it impossible to do a full piglit run.  I'm not sure why this<br>
+                * is happening since we were using this function prior to<br>
+                * eb19163a4dd3d7bfeed63229820c926f99ed00d9<br>
+                */<br>
+               r600_upload_const_buffer(rctx, &cb->buffer, ptr, cb->buffer_size,<br>
+                                                       &cb->buffer_offset);<br>
        } else {<br>
                /* Setup the hw buffer. */<br>
                cb->buffer_offset = input->buffer_offset;<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.8.1.5<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div>