<div dir="ltr"><div><div><div><div><div><div><div><div>Hi,<br></div>About this patch: <br>1. It is not tested. I'll test it after 12th.<br></div>2. It implements atomic buffers as a surface which can be reused for ARB_shader_image_load_store<br></div><div>3. You can ignore the first patch.<br><br><br></div>My questions:<br></div>1. What does R_028AC0_ALU_ATOM_CACHE_GS_0 represent?<br></div>2. What determines the values 160, 336 and 0 in r600_emit_vs,gs,ps_atomic,constant_buffers();?<br>3. What does these macros represent? What are they used for?<br>#define R600_UCP_CONST_BUFFER            R600_MAX_USER_CONST_BUFFERS<br>#define R600_TXQ_CONST_BUFFER            R600_MAX_USER_CONST_BUFFERS + 1<br>#define R600_BUFFER_INFO_CONST_BUFFER        R600_MAX_USER_CONST_BUFFERS + 2<br>#define R600_GS_RING_CONST_BUFFER        R600_MAX_USER_CONST_BUFFERS + 3<br><br>#define R600_MAX_CONST_BUFFER_SIZE        (4096 * sizeof(float[4])) // It is self-explanatory here.<br><br></div>4. What is the function of R600_CONTEXT_INV_ATOM_CACHE?<br></div>5. Does my implementation make sense?<br><br></div>Thank you!!<br><div><div><br></div></div></div><div class="gmail_extra"><br><div class="gmail_quote">On Sun, Jan 4, 2015 at 3:44 PM, adityaatluri <span dir="ltr"><<a href="mailto:adityaavinash1@gmail.com" target="_blank">adityaavinash1@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">---<br>
 src/gallium/include/pipe/p_context.h |  5 +++++<br>
 src/gallium/include/pipe/p_defines.h |  7 ++++++-<br>
 src/gallium/include/pipe/p_state.h   | 10 ++++++++++<br>
 3 files changed, 21 insertions(+), 1 deletion(-)<br>
<br>
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h<br>
index af5674f..bf3be31 100644<br>
--- a/src/gallium/include/pipe/p_context.h<br>
+++ b/src/gallium/include/pipe/p_context.h<br>
@@ -44,6 +44,7 @@ struct pipe_blit_info;<br>
 struct pipe_box;<br>
 struct pipe_clip_state;<br>
 struct pipe_constant_buffer;<br>
+struct pipe_counter_buffer;<br>
 struct pipe_depth_stencil_alpha_state;<br>
 struct pipe_draw_info;<br>
 struct pipe_fence_handle;<br>
@@ -201,6 +202,10 @@ struct pipe_context {<br>
                                 uint shader, uint index,<br>
                                 struct pipe_constant_buffer *buf );<br>
<br>
+   void (*set_counter_buffer)( struct pipe_context *,<br>
+                               uint shader, uint index,<br>
+                               struct pipe_counter_buffer *buf );<br>
+<br>
    void (*set_framebuffer_state)( struct pipe_context *,<br>
                                   const struct pipe_framebuffer_state * );<br>
<br>
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h<br>
index 8c4e415..717ab6a 100644<br>
--- a/src/gallium/include/pipe/p_defines.h<br>
+++ b/src/gallium/include/pipe/p_defines.h<br>
@@ -341,6 +341,7 @@ enum pipe_flush_flags {<br>
 #define PIPE_BIND_VERTEX_BUFFER        (1 << 4) /* set_vertex_buffers */<br>
 #define PIPE_BIND_INDEX_BUFFER         (1 << 5) /* draw_elements */<br>
 #define PIPE_BIND_CONSTANT_BUFFER      (1 << 6) /* set_constant_buffer */<br>
+#define PIPE_BIND_COUNTER_BUFFER       (1 << 7) /* set_counter_buffer */<br>
 #define PIPE_BIND_DISPLAY_TARGET       (1 << 8) /* flush_front_buffer */<br>
 #define PIPE_BIND_TRANSFER_WRITE       (1 << 9) /* transfer_map */<br>
 #define PIPE_BIND_TRANSFER_READ        (1 << 10) /* transfer_map */<br>
@@ -572,6 +573,8 @@ enum pipe_cap {<br>
    PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE = 109,<br>
    PIPE_CAP_SAMPLER_VIEW_TARGET = 110,<br>
    PIPE_CAP_CLIP_HALFZ = 111,<br>
+   PIPE_CAP_USER_COUNTER_BUFFERS = 112,<br>
+   PIPE_CAP_COUNTER_BUFFER_OFFSET_ALIGNMENT = 113,<br>
 };<br>
<br>
 #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)<br>
@@ -631,7 +634,9 @@ enum pipe_shader_cap<br>
    PIPE_SHADER_CAP_PREFERRED_IR,<br>
    PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED,<br>
    PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS,<br>
-   PIPE_SHADER_CAP_DOUBLES<br>
+   PIPE_SHADER_CAP_DOUBLES,<br>
+   PIPE_SHADER_CAP_MAX_COUNTER_BUFFER_SIZE,<br>
+   PIPE_SHADER_CAP_MAX_COUNTER_BUFFERS<br>
 };<br>
<br>
 /**<br>
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h<br>
index 43bc48b..49fae5d 100644<br>
--- a/src/gallium/include/pipe/p_state.h<br>
+++ b/src/gallium/include/pipe/p_state.h<br>
@@ -57,6 +57,7 @@ extern "C" {<br>
 #define PIPE_MAX_CLIP_PLANES       8<br>
 #define PIPE_MAX_COLOR_BUFS        8<br>
 #define PIPE_MAX_CONSTANT_BUFFERS 32<br>
+#define PIPE_MAX_COUNTER_BUFFERS  32<br>
 #define PIPE_MAX_SAMPLERS         16<br>
 #define PIPE_MAX_SHADER_INPUTS    32<br>
 #define PIPE_MAX_SHADER_OUTPUTS   48 /* 32 GENERICs + POS, PSIZE, FOG, etc. */<br>
@@ -462,6 +463,15 @@ struct pipe_constant_buffer {<br>
    const void *user_buffer;  /**< pointer to a user buffer if buffer == NULL */<br>
 };<br>
<br>
+/**<br>
+ * A Counter buffer. A new buffer is set everytime a variable with<br>
+ * atomic_uint is defined.<br>
+ */<br>
+struct pipe_counter_buffer{<br>
+   struct pipe_resource *buffer; /**< The actual buffer */<br>
+   unsigned buffer_offset; /**< The offset to start of data in buffer in bytes */<br>
+   const void *user_buffer; /**< The buffer which is created by the compiler */<br>
+};<br>
<br>
 /**<br>
  * A stream output target. The structure specifies the range vertices can<br>
--<br>
1.9.1<br>
<br>
<br>
>From c80ca0e4704b8fc325e109d1770f6c4900d14cec Mon Sep 17 00:00:00 2001<br>
From: adityaatluri <<a href="mailto:adityaavinash1@gmail.com">adityaavinash1@gmail.com</a>><br>
Date: Sun, 4 Jan 2015 16:37:43 -0500<br>
Subject: [PATCH 2/2] drivers/r600: added atomic buffer bindings from mesa to<br>
 R600 backend as a surface<br>
<br>
---<br>
 src/gallium/drivers/r600/r600_hw_context.c    |  3 ++<br>
 src/gallium/drivers/r600/r600_pipe.h          |  9 ++++<br>
 src/gallium/drivers/r600/r600_state.c         | 66 +++++++++++++++++++++++++<br>
 src/gallium/drivers/r600/r600_state_common.c  | 69 +++++++++++++++++++++++++++<br>
 src/gallium/drivers/r600/r600d.h              | 11 +++++<br>
 src/gallium/drivers/radeon/r600_pipe_common.c |  2 +-<br>
 6 files changed, 159 insertions(+), 1 deletion(-)<br>
<br>
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c<br>
index b6fa3b0..53bc1ab 100644<br>
--- a/src/gallium/drivers/r600/r600_hw_context.c<br>
+++ b/src/gallium/drivers/r600/r600_hw_context.c<br>
@@ -338,14 +338,17 @@ void r600_begin_new_cs(struct r600_context *ctx)<br>
        for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {<br>
                struct r600_constbuf_state *constbuf = &ctx->constbuf_state[shader];<br>
                struct r600_textures_info *samplers = &ctx->samplers[shader];<br>
+               struct r600_atombuf_state *atombuf = &ctx->atomic_buffer[shader];<br>
<br>
                constbuf->dirty_mask = constbuf->enabled_mask;<br>
                samplers->views.dirty_mask = samplers->views.enabled_mask;<br>
                samplers->states.dirty_mask = samplers->states.enabled_mask;<br>
+               atombuf->dirty_mask = atombuf->enabled_mask;<br>
<br>
                r600_constant_buffers_dirty(ctx, constbuf);<br>
                r600_sampler_views_dirty(ctx, &samplers->views);<br>
                r600_sampler_states_dirty(ctx, &samplers->states);<br>
+               r600_atomic_buffers_dirty(ctx, atombuf);<br>
        }<br>
<br>
        r600_postflush_resume_features(&ctx->b);<br>
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h<br>
index 40b0328..bcdcfac 100644<br>
--- a/src/gallium/drivers/r600/r600_pipe.h<br>
+++ b/src/gallium/drivers/r600/r600_pipe.h<br>
@@ -347,6 +347,14 @@ struct r600_constbuf_state<br>
        uint32_t                        dirty_mask;<br>
 };<br>
<br>
+struct r600_atombuf_state<br>
+{<br>
+       struct r600_atom                atom;<br>
+       struct pipe_surface     ab[PIPE_MAX_ATOMIC_BUFFERS];<br>
+       uint32_t                                enabled_mask;<br>
+       uint32_t                                dirty_mask;<br>
+};<br>
+<br>
 struct r600_vertexbuf_state<br>
 {<br>
        struct r600_atom                atom;<br>
@@ -445,6 +453,7 @@ struct r600_context {<br>
        struct r600_shader_stages_state shader_stages;<br>
        struct r600_gs_rings_state      gs_rings;<br>
        struct r600_constbuf_state      constbuf_state[PIPE_SHADER_TYPES];<br>
+       struct r600_atombuf_state       atomic_buffer[PIPE_SHADER_TYPES];<br>
        struct r600_textures_info       samplers[PIPE_SHADER_TYPES];<br>
        /** Vertex buffers for fetch shaders */<br>
        struct r600_vertexbuf_state     vertex_buffer_state;<br>
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c<br>
index 61f5c5a..bcea22f 100644<br>
--- a/src/gallium/drivers/r600/r600_state.c<br>
+++ b/src/gallium/drivers/r600/r600_state.c<br>
@@ -1782,6 +1782,72 @@ static void r600_emit_ps_constant_buffers(struct r600_context *rctx, struct r600<br>
                                   R_028940_ALU_CONST_CACHE_PS_0);<br>
 }<br>
<br>
+static void r600_emit_atomic_buffers()<br>
+{<br>
+       struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;<br>
+       uint32_t dirty_mask = state->dirty_mask;<br>
+<br>
+       while(dirty_mask){<br>
+               struct pipe_surface *surf;<br>
+               struct r600_resource *rbuffer;<br>
+               unsigned offset;<br>
+               unsigned buffer_index = ffs(dirty_mask) - 1;<br>
+               unsigned gs_ring_buffer = (buffer_index == R600_GS_RING_ATOM_BUFFER);<br>
+               surf = &state->ab[buffer_index];<br>
+               rbuffer = (struct r600_resource*)surf->texture;<br>
+               assert(rbuffer);<br>
+<br>
+               offset = surf->u.buf.first_element;<br>
+<br>
+               if (!gs_ring_buffer) {<br>
+                       r600_write_context_reg(cs, reg_alu_atombuf_size + buffer_index,// * 4,<br>
+                               ALIGN_BIVUP(surf->height*surf->width*sizeof(unsigned) >> 4, 16));<br>
+                       r600_write_context_reg(cs, reg_alu_atom_cache + buffer_index, offset >> 8);<br>
+               }<br>
+<br>
+               radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));<br>
+               radeon_emit(cs, r600_context_bo_reg(&rctx->b, &rctx->b.rings.gfx, rbuffer,<br>
+                               RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));<br>
+               radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));<br>
+               radeon_emit(cs, (buffer_id_base + buffer_index) * 7);<br>
+               radeon_emit(cs, offset);<br>
+               radeon_emit(cs, rbuffer->buf->size - offset - 1);<br>
+               radeon_emit(cs,<br>
+                               S_038008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) | S_038008_STRIDE(gs_ring_buffer ? 4 : 16));<br>
+               radeon_emit(cs, 0);<br>
+               radeon_emit(cs, 0);<br>
+               radeon_emit(cs, 0);<br>
+               radeon_emit(cs, 0x0000000);<br>
+<br>
+               radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));<br>
+               radeon_emit(cs, r600_context_bo_reloc(&rctx->b,<br>
+                                                                               &rctx->b.rings.gfx,<br>
+                                                                               rbuffer,<br>
+                                                                               RADEON_USAGE_READ,<br>
+                                                                               RADEON_PRIO_SHADER_BUFFER_RO));<br>
+               dirty_mask &= ~(1 << buffer_index);<br>
+       }<br>
+       state->dirty_mask = 0;<br>
+}<br>
+<br>
+static void r600_emit_vs_atomic_buffers(struct r600_context *rctx, struct r600_atom *atom)<br>
+{<br>
+       r600_emit_atomic_buffers(rctx, &rctx->atomic_state[PIPE_SHADER_VERTEX], 160,<br>
+                       R_028240_ALU_ATOM_BUFFER_SIZE_VS_0, R_028A80_ALU_ATOM_CACHE_VS_0);<br>
+}<br>
+<br>
+static void r600_emit_gs_atomic_buffers(struct r600_context *rctx, struct r600_atom *atom)<br>
+{<br>
+       r600_emit_atomic_buffers(rctx, &rctx->atomic_state[PIPE_SHADER_GEOMETRY], 336,<br>
+                       R_028280_ALU_ATOM_BUFFER_SIZE_GS_0, R_028AC0_ALU_ATOM_CACHE_GS_0);<br>
+}<br>
+<br>
+static void r600_emit_ps_atomic_buffers(struct r600_context *rctx, struct r600_atom *atom)<br>
+{<br>
+       r600_emit_atomic_buffers(rctx, &rctx->atomic_state[PIPE_SHADER_FRAGMENT], 0,<br>
+                       R_028200_ALU_ATOM_BUFFER_SIZE_PS_0, R_028A40_ALU_ATOM_CACHE_PS_0);<br>
+}<br>
+<br>
 static void r600_emit_sampler_views(struct r600_context *rctx,<br>
                                    struct r600_samplerview_state *state,<br>
                                    unsigned resource_id_base)<br>
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c<br>
index c3f21cb..6d8d728 100644<br>
--- a/src/gallium/drivers/r600/r600_state_common.c<br>
+++ b/src/gallium/drivers/r600/r600_state_common.c<br>
@@ -966,6 +966,74 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint<br>
        r600_constant_buffers_dirty(rctx, state);<br>
 }<br>
<br>
+void r600_atomic_buffers_dirty(struct r600_context *rctx, struct r600_atombuf_state *state)<br>
+{<br>
+       if (state->dirty_mask) {<br>
+               rctx->b.flags |= R600_CONTEXT_INV_ATOM_CACHE;<br>
+               state->atom.num_dw = rctx->b.chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20 : util_bitcount(state->dirty_mask) * 19;<br>
+               state->atom.dirty = true;<br>
+       }<br>
+}<br>
+<br>
+static void r600_set_atomic_buffer(struct pipe_context *ctx, uint shader, uint index,<br>
+                                                                       struct pipe_surface *input)<br>
+{<br>
+       struct r600_context *rctx = (struct r600_context *)ctx;<br>
+       struct r600_atombuf_state *state = &rctx->atombuf_state[shader];<br>
+       struct pipe_surface *surf;<br>
+<br>
+       state->surf = r600_create_surface(ctx, input->texture , input);<br>
+<br>
+/*     The code below represent Atomic buffers as a buffer.<br>
+       In our implementation we bind atomic buffers to texture so that<br>
+       it can be reused for image_load_store<br>
+<br>
+       if(unlikely(!input || (!input->texture))){<br>
+               state->enabled_mask &= ~(1 << index);<br>
+               state->dirty_mask &= ~(1 << index);<br>
+               pipe_resource_reference(&state->ab[index].texture, NULL);<br>
+               return;<br>
+       }<br>
+<br>
+       ab = &state->ab[index];<br>
+       ab->width = input->width;<br>
+       ab->height = input->height;<br>
+<br>
+       ptr = input->texture;<br>
+       unsigned size = sizeof(unsigned)*input->height*input->width;<br>
+<br>
+       if (ptr){<br>
+               if(R600_BIG_ENDIAN) {<br>
+                       uint32_t *tmpPtr;<br>
+                       unsigned i;<br>
+<br>
+                       if (!(tmpPtr = malloc(size))) {<br>
+                               R600_ERR("Failed to allocate Atomic buffer\n");<br>
+                               return;<br>
+                       }<br>
+<br>
+                       for(i = 0; i < size / 4; ++i){<br>
+                               tmpPtr[i] = util_cpu_to_le32(((uint32_t *)ptr)[i]);<br>
+                       }<br>
+<br>
+                       // This can be changed as no need for data transfer. Offset can be changed<br>
+                       u_upload_data(rctx->b.uploader, 0, size, tmpPtr, &input->u.buf.first_element, &ab->texture);<br>
+                       free(tmpPtr);<br>
+               } else {<br>
+                       u_upload_data(rctx->b.uploader, 0, size, ptr, &input->u.buf.first_element, &ab->texture);<br>
+               }<br>
+               rctx->b.gtt += size;<br>
+       } else {<br>
+               // cb->buffer_offset = input->buffer_offset;<br>
+               pipe_resource_reference(&ab->texture, input->texture);<br>
+               r600_context_add_resource_size(ctx, input->texture);<br>
+       }<br>
+<br>
+       state->enabled_mask |= 1 << index;<br>
+       state->dirty_mask |= 1 << index;*/<br>
+       r600_atomic_buffers_dirty(rctx, state);<br>
+}<br>
+<br>
 static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)<br>
 {<br>
        struct r600_context *rctx = (struct r600_context*)pipe;<br>
@@ -2452,6 +2520,7 @@ void r600_init_common_state_functions(struct r600_context *rctx)<br>
        rctx->b.b.set_blend_color = r600_set_blend_color;<br>
        rctx->b.b.set_clip_state = r600_set_clip_state;<br>
        rctx->b.b.set_constant_buffer = r600_set_constant_buffer;<br>
+       rctx->b.b.set_atomic_buffer = r600_set_atomic_buffer;<br>
        rctx->b.b.set_sample_mask = r600_set_sample_mask;<br>
        rctx->b.b.set_stencil_ref = r600_set_pipe_stencil_ref;<br>
        rctx->b.b.set_viewport_states = r600_set_viewport_states;<br>
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h<br>
index 6a5b964..1a2af8b 100644<br>
--- a/src/gallium/drivers/r600/r600d.h<br>
+++ b/src/gallium/drivers/r600/r600d.h<br>
@@ -3716,6 +3716,17 @@<br>
 #define R_028984_ALU_CONST_CACHE_VS_1                0x00028984<br>
 #define R_0289C0_ALU_CONST_CACHE_GS_0                0x000289C0<br>
<br>
+#define R_028200_ALU_ATOM_BUFFER_SIZE_PS_0                      0x00028200<br>
+#define R_028204_ALU_ATOM_BUFFER_SIZE_PS_1                      0x00028204<br>
+#define R_028240_ALU_ATOM_BUFFER_SIZE_VS_0                      0x00028240<br>
+#define R_028244_ALU_ATOM_BUFFER_SIZE_VS_1                      0x00028244<br>
+#define R_028280_ALU_ATOM_BUFFER_SIZE_GS_0                      0x00028280<br>
+#define R_028A40_ALU_ATOM_CACHE_PS_0                            0x00028A40<br>
+#define R_028A44_ALU_ATOM_CACHE_PS_1                            0x00028A44<br>
+#define R_028A80_ALU_ATOM_CACHE_VS_0                            0x00028A80<br>
+#define R_028A84_ALU_ATOM_CACHE_VS_1                            0x00028A84<br>
+#define R_028AC0_ALU_ATOM_CACHE_GS_0                            0x00028AC0<br>
+<br>
 #define R_03CFF0_SQ_VTX_BASE_VTX_LOC                 0x03CFF0<br>
 #define R_03CFF4_SQ_VTX_START_INST_LOC               0x03CFF4<br>
<br>
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c<br>
index 8aad178..6c5e1a4 100644<br>
--- a/src/gallium/drivers/radeon/r600_pipe_common.c<br>
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c<br>
@@ -233,7 +233,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,<br>
<br>
        rctx->uploader = u_upload_create(&rctx->b, 1024 * 1024, 256,<br>
                                        PIPE_BIND_INDEX_BUFFER |<br>
-                                       PIPE_BIND_CONSTANT_BUFFER);<br>
+                                       PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_ATOMIC_BUFFER);<br>
        if (!rctx->uploader)<br>
                return false;<br>
<span class="HOEnZb"><font color="#888888"><br>
--<br>
1.9.1<br>
<br>
</font></span></blockquote></div><br><br clear="all"><br>-- <br><div class="gmail_signature"><div dir="ltr"><div><div><font style="font-family:trebuchet ms,sans-serif">Regards,<br></font></div><font style="font-family:trebuchet ms,sans-serif"><b style="background-color:rgb(255,255,255);color:rgb(0,0,153)">Aditya Atluri,<br></b></font></div><div><font style="font-family:trebuchet ms,sans-serif"><b style="background-color:rgb(255,255,255);color:rgb(0,0,153)">USA.<br></b></font></div><font style="font-family:trebuchet ms,sans-serif"><b style="background-color:rgb(255,255,255);color:rgb(0,0,153)"></b><span style="background-color:rgb(255,255,255);color:rgb(0,0,153)"></span></font><br></div></div>
</div>