[Mesa-dev] [PATCH 06/11] st/mesa: add atomic buffer support
Ilia Mirkin
imirkin at alum.mit.edu
Sun Sep 27 14:10:18 PDT 2015
On Sun, Sep 27, 2015 at 2:48 PM, Marek Olšák <maraeo at gmail.com> wrote:
> Patches 2-5 are:
>
> Reviewed-by: Marek Olšák <marek.olsak at amd.com>
>
> Same for patch 1 if you rename "RES" to "IMAGE".
>
> See below for patch 6 comments.
>
> On Sun, Sep 27, 2015 at 8:33 AM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
>> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
>> ---
>> src/mesa/Makefile.sources | 1 +
>> src/mesa/program/ir_to_mesa.cpp | 4 +-
>> src/mesa/state_tracker/st_atom.c | 5 +
>> src/mesa/state_tracker/st_atom.h | 5 +
>> src/mesa/state_tracker/st_atom_atomicbuf.c | 151 +++++++++++++++++++++++++++
>> src/mesa/state_tracker/st_cb_bufferobjects.c | 3 +
>> src/mesa/state_tracker/st_context.c | 1 +
>> src/mesa/state_tracker/st_context.h | 1 +
>> src/mesa/state_tracker/st_extensions.c | 15 +++
>> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 133 +++++++++++++++++++++--
>> 10 files changed, 310 insertions(+), 9 deletions(-)
>> create mode 100644 src/mesa/state_tracker/st_atom_atomicbuf.c
>>
>> diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
>> index 0915594..5dd98c3 100644
>> --- a/src/mesa/Makefile.sources
>> +++ b/src/mesa/Makefile.sources
>> @@ -393,6 +393,7 @@ VBO_FILES = \
>>
>> STATETRACKER_FILES = \
>> state_tracker/st_atom_array.c \
>> + state_tracker/st_atom_atomicbuf.c \
>> state_tracker/st_atom_blend.c \
>> state_tracker/st_atom.c \
>> state_tracker/st_atom_clip.c \
>> diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
>> index 4201a80..580f907 100644
>> --- a/src/mesa/program/ir_to_mesa.cpp
>> +++ b/src/mesa/program/ir_to_mesa.cpp
>> @@ -535,11 +535,11 @@ type_size(const struct glsl_type *type)
>> case GLSL_TYPE_SAMPLER:
>> case GLSL_TYPE_IMAGE:
>> case GLSL_TYPE_SUBROUTINE:
>> + case GLSL_TYPE_ATOMIC_UINT:
>> /* Samplers take up one slot in UNIFORMS[], but they're baked in
>> * at link time.
>> */
>> return 1;
>> - case GLSL_TYPE_ATOMIC_UINT:
>> case GLSL_TYPE_VOID:
>> case GLSL_TYPE_ERROR:
>> case GLSL_TYPE_INTERFACE:
>> @@ -2458,10 +2458,10 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,
>> case GLSL_TYPE_SAMPLER:
>> case GLSL_TYPE_IMAGE:
>> case GLSL_TYPE_SUBROUTINE:
>> + case GLSL_TYPE_ATOMIC_UINT:
>> format = uniform_native;
>> columns = 1;
>> break;
>> - case GLSL_TYPE_ATOMIC_UINT:
>> case GLSL_TYPE_ARRAY:
>> case GLSL_TYPE_VOID:
>> case GLSL_TYPE_STRUCT:
>> diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
>> index 43dbadd..920ee11 100644
>> --- a/src/mesa/state_tracker/st_atom.c
>> +++ b/src/mesa/state_tracker/st_atom.c
>> @@ -76,6 +76,11 @@ static const struct st_tracked_state *atoms[] =
>> &st_bind_tes_ubos,
>> &st_bind_fs_ubos,
>> &st_bind_gs_ubos,
>> + &st_bind_vs_atomics,
>> + &st_bind_tcs_atomics,
>> + &st_bind_tes_atomics,
>> + &st_bind_fs_atomics,
>> + &st_bind_gs_atomics,
>> &st_update_pixel_transfer,
>> &st_update_tess,
>>
>> diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
>> index a24842b..7cbd52e 100644
>> --- a/src/mesa/state_tracker/st_atom.h
>> +++ b/src/mesa/state_tracker/st_atom.h
>> @@ -78,6 +78,11 @@ extern const struct st_tracked_state st_bind_vs_ubos;
>> extern const struct st_tracked_state st_bind_gs_ubos;
>> extern const struct st_tracked_state st_bind_tcs_ubos;
>> extern const struct st_tracked_state st_bind_tes_ubos;
>> +extern const struct st_tracked_state st_bind_fs_atomics;
>> +extern const struct st_tracked_state st_bind_vs_atomics;
>> +extern const struct st_tracked_state st_bind_gs_atomics;
>> +extern const struct st_tracked_state st_bind_tcs_atomics;
>> +extern const struct st_tracked_state st_bind_tes_atomics;
>> extern const struct st_tracked_state st_update_pixel_transfer;
>> extern const struct st_tracked_state st_update_tess;
>>
>> diff --git a/src/mesa/state_tracker/st_atom_atomicbuf.c b/src/mesa/state_tracker/st_atom_atomicbuf.c
>> new file mode 100644
>> index 0000000..9bc7862
>> --- /dev/null
>> +++ b/src/mesa/state_tracker/st_atom_atomicbuf.c
>> @@ -0,0 +1,151 @@
>> +/**************************************************************************
>> + *
>> + * Copyright 2014 Ilia Mirkin. All Rights Reserved.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the
>> + * "Software"), to deal in the Software without restriction, including
>> + * without limitation the rights to use, copy, modify, merge, publish,
>> + * distribute, sub license, and/or sell copies of the Software, and to
>> + * permit persons to whom the Software is furnished to do so, subject to
>> + * the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> + * next paragraph) shall be included in all copies or substantial portions
>> + * of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
>> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
>> + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
>> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + **************************************************************************/
>> +
>> +#include "main/imports.h"
>> +#include "program/prog_parameter.h"
>> +#include "program/prog_print.h"
>> +
>> +#include "pipe/p_context.h"
>> +#include "pipe/p_defines.h"
>> +#include "util/u_inlines.h"
>> +#include "util/u_surface.h"
>> +
>> +#include "st_debug.h"
>> +#include "st_cb_bufferobjects.h"
>> +#include "st_context.h"
>> +#include "st_atom.h"
>> +#include "st_program.h"
>> +
>> +static void st_bind_atomics(struct st_context *st,
>> + struct gl_shader_program *prog,
>> + unsigned shader_type)
>> +{
>> + unsigned i;
>> +
>> + if (!prog)
>> + return;
>> +
>> + for (i = 0; i < prog->NumAtomicBuffers; i++) {
>
> This loops over all atomic buffers in a shader program, which can
> contain 5 linked shader stages, so NumAtomicBuffers can be <=
> MaxCombinedAtomicBuffers. I don't think drivers can handle so many.
>
>> + struct gl_atomic_buffer_binding *binding =
>> + &st->ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
>> + struct st_buffer_object *st_obj =
>> + st_buffer_object(binding->BufferObject);
>> + struct pipe_shader_buffer sb = {};
>> +
>> + pipe_resource_reference(&sb.buffer, st_obj->buffer);
>> + sb.buffer_offset = binding->Offset;
>> + sb.buffer_size = st_obj->buffer->width0 - binding->Offset;
>> +
>> + /* TODO: cso */
>
> You can remove the TODO. I don't see why this would need cso_context support.
>
>> + st->pipe->set_shader_buffers(st->pipe, shader_type,
>> + i /* XXX */, 1, &sb);
>
> What does this XXX mean? This needs a better comment at least.
This is what I was alluding to in the cover letter... it needs to look
up the buffer index from somewhere, but it's not easily available.
Timothy has a patch to fix up intel, if that's deemed to be the right
solution, then I'll copy it in here as well. Perhaps I should just do
that now and update it later as necessary.
>
>> + }
>> +}
>> +
>> +static void bind_vs_atomics(struct st_context *st)
>> +{
>> + struct gl_shader_program *prog =
>> + st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
>> +
>> + st_bind_atomics(st, prog, PIPE_SHADER_VERTEX);
>> +}
>> +
>> +const struct st_tracked_state st_bind_vs_atomics = {
>> + "st_bind_vs_atomics",
>> + {
>> + 0,
>> + ST_NEW_VERTEX_PROGRAM | ST_NEW_ATOMIC_BUFFER,
>> + },
>> + bind_vs_atomics
>> +};
>> +
>> +static void bind_fs_atomics(struct st_context *st)
>> +{
>> + struct gl_shader_program *prog =
>> + st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
>> +
>> + st_bind_atomics(st, prog, PIPE_SHADER_FRAGMENT);
>> +}
>> +
>> +const struct st_tracked_state st_bind_fs_atomics = {
>> + "st_bind_fs_atomics",
>> + {
>> + 0,
>> + ST_NEW_FRAGMENT_PROGRAM | ST_NEW_ATOMIC_BUFFER,
>> + },
>> + bind_fs_atomics
>> +};
>> +
>> +static void bind_gs_atomics(struct st_context *st)
>> +{
>> + struct gl_shader_program *prog =
>> + st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
>> +
>> + st_bind_atomics(st, prog, PIPE_SHADER_GEOMETRY);
>> +}
>> +
>> +const struct st_tracked_state st_bind_gs_atomics = {
>> + "st_bind_gs_atomics",
>> + {
>> + 0,
>> + ST_NEW_GEOMETRY_PROGRAM | ST_NEW_ATOMIC_BUFFER,
>> + },
>> + bind_gs_atomics
>> +};
>> +
>> +static void bind_tcs_atomics(struct st_context *st)
>> +{
>> + struct gl_shader_program *prog =
>> + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
>> +
>> + st_bind_atomics(st, prog, PIPE_SHADER_TESS_CTRL);
>> +}
>> +
>> +const struct st_tracked_state st_bind_tcs_atomics = {
>> + "st_bind_tcs_atomics",
>> + {
>> + 0,
>> + ST_NEW_TESSCTRL_PROGRAM | ST_NEW_ATOMIC_BUFFER,
>> + },
>> + bind_tcs_atomics
>> +};
>> +
>> +static void bind_tes_atomics(struct st_context *st)
>> +{
>> + struct gl_shader_program *prog =
>> + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
>> +
>> + st_bind_atomics(st, prog, PIPE_SHADER_TESS_EVAL);
>> +}
>> +
>> +const struct st_tracked_state st_bind_tes_atomics = {
>> + "st_bind_tes_atomics",
>> + {
>> + 0,
>> + ST_NEW_TESSEVAL_PROGRAM | ST_NEW_ATOMIC_BUFFER,
>> + },
>> + bind_tes_atomics
>> +};
>> diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
>> index db254c2..296c072 100644
>> --- a/src/mesa/state_tracker/st_cb_bufferobjects.c
>> +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
>> @@ -231,6 +231,9 @@ st_bufferobj_data(struct gl_context *ctx,
>> case GL_DRAW_INDIRECT_BUFFER:
>> bind = PIPE_BIND_COMMAND_ARGS_BUFFER;
>> break;
>> + case GL_ATOMIC_COUNTER_BUFFER:
>> + bind = PIPE_BIND_SHADER_BUFFER;
>> + break;
>> default:
>> bind = 0;
>> }
>> diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
>> index 72c23ca..d415f75 100644
>> --- a/src/mesa/state_tracker/st_context.c
>> +++ b/src/mesa/state_tracker/st_context.c
>> @@ -315,6 +315,7 @@ static void st_init_driver_flags(struct gl_driver_flags *f)
>> f->NewUniformBuffer = ST_NEW_UNIFORM_BUFFER;
>> f->NewDefaultTessLevels = ST_NEW_TESS_STATE;
>> f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS;
>> + f->NewAtomicBuffer = ST_NEW_ATOMIC_BUFFER;
>> }
>>
>> struct st_context *st_create_context(gl_api api, struct pipe_context *pipe,
>> diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
>> index 81d5480..02c4a84 100644
>> --- a/src/mesa/state_tracker/st_context.h
>> +++ b/src/mesa/state_tracker/st_context.h
>> @@ -61,6 +61,7 @@ struct u_upload_mgr;
>> #define ST_NEW_TESSCTRL_PROGRAM (1 << 9)
>> #define ST_NEW_TESSEVAL_PROGRAM (1 << 10)
>> #define ST_NEW_SAMPLER_VIEWS (1 << 11)
>> +#define ST_NEW_ATOMIC_BUFFER (1 << 12)
>>
>>
>> struct st_state_flags {
>> diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
>> index e290292..2cdcbe7 100644
>> --- a/src/mesa/state_tracker/st_extensions.c
>> +++ b/src/mesa/state_tracker/st_extensions.c
>> @@ -217,6 +217,10 @@ void st_init_limits(struct pipe_screen *screen,
>> c->MaxUniformBlockSize / 4 *
>> pc->MaxUniformBlocks);
>>
>> + pc->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
>> + pc->MaxAtomicBuffers =
>> + screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_BUFFERS);
>> +
>> /* Gallium doesn't really care about local vs. env parameters so use the
>> * same limits.
>> */
>> @@ -322,6 +326,17 @@ void st_init_limits(struct pipe_screen *screen,
>> c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks;
>> assert(c->MaxCombinedUniformBlocks <= MAX_COMBINED_UNIFORM_BUFFERS);
>> }
>> +
>> + c->MaxCombinedAtomicBuffers = c->MaxAtomicBufferBindings =
>> + c->Program[MESA_SHADER_VERTEX].MaxAtomicBuffers +
>> + c->Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers +
>> + c->Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers +
>> + c->Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers +
>> + c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
>> + assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS);
>> +
>> + if (c->MaxCombinedAtomicBuffers > 0)
>> + extensions->ARB_shader_atomic_counters = GL_TRUE;
>> }
>>
>>
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> index 633e90f..28c9637 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> @@ -261,6 +261,8 @@ public:
>> unsigned tex_offset_num_offset;
>> int dead_mask; /**< Used in dead code elimination */
>>
>> + st_src_reg buffer; /**< buffer register */
>> +
>> class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
>> const struct tgsi_opcode_info *info;
>> };
>> @@ -369,6 +371,7 @@ public:
>> int samplers_used;
>> glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
>> int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
>> + int buffers_used;
>> bool indirect_addr_consts;
>> int wpos_transform_const;
>>
>> @@ -422,6 +425,8 @@ public:
>> virtual void visit(ir_barrier *);
>> /*@}*/
>>
>> + void visit_atomic_counter_intrinsic(ir_call *);
>> +
>> st_src_reg result;
>>
>> /** List of variable_storage */
>> @@ -535,6 +540,28 @@ swizzle_for_size(int size)
>> return size_swizzles[size - 1];
>> }
>>
>> +static bool
>> +is_resource_instruction(unsigned opcode)
>> +{
>> + switch (opcode) {
>> + case TGSI_OPCODE_LOAD:
>> + case TGSI_OPCODE_STORE:
>> + case TGSI_OPCODE_ATOMUADD:
>> + case TGSI_OPCODE_ATOMXCHG:
>> + case TGSI_OPCODE_ATOMCAS:
>> + case TGSI_OPCODE_ATOMAND:
>> + case TGSI_OPCODE_ATOMOR:
>> + case TGSI_OPCODE_ATOMXOR:
>> + case TGSI_OPCODE_ATOMUMIN:
>> + case TGSI_OPCODE_ATOMUMAX:
>> + case TGSI_OPCODE_ATOMIMIN:
>> + case TGSI_OPCODE_ATOMIMAX:
>> + return true;
>> + default:
>> + return false;
>> + }
>> +}
>> +
>> static unsigned
>> num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
>> {
>> @@ -544,7 +571,8 @@ num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
>> static unsigned
>> num_inst_src_regs(const glsl_to_tgsi_instruction *op)
>> {
>> - return op->info->is_tex ? op->info->num_src - 1 : op->info->num_src;
>> + return op->info->is_tex || is_resource_instruction(op->op) ?
>> + op->info->num_src - 1 : op->info->num_src;
>> }
>>
>> glsl_to_tgsi_instruction *
>> @@ -1106,11 +1134,11 @@ type_size(const struct glsl_type *type)
>> case GLSL_TYPE_SAMPLER:
>> case GLSL_TYPE_IMAGE:
>> case GLSL_TYPE_SUBROUTINE:
>> + case GLSL_TYPE_ATOMIC_UINT:
>> /* Samplers take up one slot in UNIFORMS[], but they're baked in
>> * at link time.
>> */
>> return 1;
>> - case GLSL_TYPE_ATOMIC_UINT:
>> case GLSL_TYPE_INTERFACE:
>> case GLSL_TYPE_VOID:
>> case GLSL_TYPE_ERROR:
>> @@ -3025,13 +3053,72 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
>> }
>>
>> void
>> +glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
>> +{
>> + const char *callee = ir->callee->function_name();
>> + ir_dereference *deref = static_cast<ir_dereference *>(
>> + ir->actual_parameters.get_head());
>> + ir_variable *location = deref->variable_referenced();
>> +
>> + /* XXX use accept */
>> + st_src_reg buffer(
>> + PROGRAM_SAMPLER, location->data.binding /* XXX */, GLSL_TYPE_ATOMIC_UINT);
>
> Why don't you use accept? What's the second XXX about?
location->data.binding is wrong. I need the equivalent of the sampler
uniform lookup. But it's what i965 uses.
Using accept didn't work for some idiotic reason back when I was first
implementing this like a year ago. I can re-investigate.
>
>> +
>> + /* Calculate the surface offset */
>> + st_src_reg offset;
>> + ir_dereference_array *deref_array = deref->as_dereference_array();
>> +
>> + if (deref_array) {
>> + offset = get_temp(glsl_type::uint_type);
>> +
>> + deref_array->array_index->accept(this);
>> +
>> + emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
>> + this->result, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
>> + emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
>> + offset, st_src_reg_for_int(location->data.atomic.offset));
>> + } else {
>> + offset = st_src_reg_for_int(location->data.atomic.offset);
>> + }
>> +
>> + ir->return_deref->accept(this);
>> + st_dst_reg dst(this->result);
>> + dst.writemask = WRITEMASK_X;
>> +
>> + glsl_to_tgsi_instruction *inst;
>> +
>> + if (!strcmp("__intrinsic_atomic_read", callee)) {
>> + inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset);
>> + inst->buffer = buffer;
>> + } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
>> + inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
>> + st_src_reg_for_int(1));
>> + inst->buffer = buffer;
>> + } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
>> + inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
>> + st_src_reg_for_int(-1));
>> + inst->buffer = buffer;
>> + emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1));
>> + }
>> +}
>> +
>> +void
>> glsl_to_tgsi_visitor::visit(ir_call *ir)
>> {
>> glsl_to_tgsi_instruction *call_inst;
>> ir_function_signature *sig = ir->callee;
>> + const char *callee = sig->function_name();
>> function_entry *entry = get_function_signature(sig);
>> int i;
>>
>> + /* Filter out intrinsics */
>> + if (!strcmp("__intrinsic_atomic_read", callee) ||
>> + !strcmp("__intrinsic_atomic_increment", callee) ||
>> + !strcmp("__intrinsic_atomic_predecrement", callee)) {
>> + visit_atomic_counter_intrinsic(ir);
>> + return;
>> + }
>> +
>> /* Process in parameters. */
>> foreach_two_lists(formal_node, &sig->parameters,
>> actual_node, &ir->actual_parameters) {
>> @@ -3535,6 +3622,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
>> current_function = NULL;
>> num_address_regs = 0;
>> samplers_used = 0;
>> + buffers_used = 0;
>> indirect_addr_consts = false;
>> wpos_transform_const = -1;
>> glsl_version = 0;
>> @@ -3569,6 +3657,7 @@ static void
>> count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
>> {
>> v->samplers_used = 0;
>> + v->buffers_used = 0;
>>
>> foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
>> if (inst->info->is_tex) {
>> @@ -3586,6 +3675,10 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
>> }
>> }
>> }
>> + if (is_resource_instruction(inst->op)) {
>> + /* TODO: figure out if it's a buffer or image */
>> + v->buffers_used |= 1 << inst->buffer.index;
>> + }
>> }
>> prog->SamplersUsed = v->samplers_used;
>>
>> @@ -4181,7 +4274,11 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
>> foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
>> if (!inst->dead_mask || !inst->dst[0].writemask)
>> continue;
>> - else if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
>> + /* No amount of dead masks should remove memory stores */
>> + if (inst->info->is_store)
>> + continue;
>> +
>> + if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
>> inst->remove();
>> delete inst;
>> removed++;
>> @@ -4362,6 +4459,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
>> v->next_temp = original->next_temp;
>> v->num_address_regs = original->num_address_regs;
>> v->samplers_used = prog->SamplersUsed = original->samplers_used;
>> + v->buffers_used = original->buffers_used;
>> v->indirect_addr_consts = original->indirect_addr_consts;
>> memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
>> v->num_immediates = original->num_immediates;
>> @@ -4495,6 +4593,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
>> v->next_temp = original->next_temp;
>> v->num_address_regs = original->num_address_regs;
>> v->samplers_used = prog->SamplersUsed = original->samplers_used;
>> + v->buffers_used = original->buffers_used;
>> v->indirect_addr_consts = original->indirect_addr_consts;
>> memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
>> v->num_immediates = original->num_immediates;
>> @@ -4569,6 +4668,7 @@ struct st_translate {
>> struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
>> struct ureg_dst address[3];
>> struct ureg_src samplers[PIPE_MAX_SAMPLERS];
>> + struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
>> struct ureg_src systemValues[SYSTEM_VALUE_MAX];
>> struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
>> unsigned *array_sizes;
>> @@ -4995,13 +5095,13 @@ compile_tgsi_instruction(struct st_translate *t,
>> bool clamp_dst_color_output)
>> {
>> struct ureg_program *ureg = t->ureg;
>> - GLuint i;
>> + int i;
>> struct ureg_dst dst[2];
>> struct ureg_src src[4];
>> struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
>>
>> - unsigned num_dst;
>> - unsigned num_src;
>> + int num_dst;
>> + int num_src;
>> unsigned tex_target;
>>
>> num_dst = num_inst_dst_regs(inst);
>> @@ -5050,7 +5150,7 @@ compile_tgsi_instruction(struct st_translate *t,
>> src[num_src] =
>> ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
>> num_src++;
>> - for (i = 0; i < inst->tex_offset_num_offset; i++) {
>> + for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
>> texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
>> }
>> tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
>> @@ -5063,6 +5163,19 @@ compile_tgsi_instruction(struct st_translate *t,
>> src, num_src);
>> return;
>>
>> + case TGSI_OPCODE_LOAD:
>> + case TGSI_OPCODE_STORE:
>> + case TGSI_OPCODE_ATOMUADD:
>> + /* XXX the other atomic ops */
>
> Do we care about other atomic ops for ARB_shader_atomic_counters? I
> think the extension only needs LOAD and ATOMUADD, so this XXX can be
> removed.
For atomic counters that's it -- actually store isn't required
either.But I think I'm going to go the other way and add them all in
now instead. This will be necessary for SSBO.
>
> Marek
More information about the mesa-dev
mailing list