[Mesa-dev] [PATCH 2/2] st/mesa: add GL_ARB_shader_atomic_counter_ops support
Nicolai Hähnle
nhaehnle at gmail.com
Thu Mar 10 15:57:45 UTC 2016
On 20.02.2016 00:13, Ilia Mirkin wrote:
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
> src/mesa/state_tracker/st_extensions.c | 4 +-
> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 60 +++++++++++++++++++++++++++---
> 2 files changed, 57 insertions(+), 7 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
> index 94696ce..21e108d 100644
> --- a/src/mesa/state_tracker/st_extensions.c
> +++ b/src/mesa/state_tracker/st_extensions.c
> @@ -368,8 +368,10 @@ void st_init_limits(struct pipe_screen *screen,
> c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
> assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS);
>
> - if (c->MaxCombinedAtomicBuffers > 0)
> + if (c->MaxCombinedAtomicBuffers > 0) {
> extensions->ARB_shader_atomic_counters = GL_TRUE;
> + extensions->ARB_shader_atomic_counter_ops = GL_TRUE;
> + }
I believe there's pre-GCN AMD hardware which can support atomic counters
but not atomic_counter_ops (at least according to what the closed driver
exposes, I haven't actually checked the docs), so there should probably
be a capability flag here.
>
> c->MaxCombinedShaderOutputResources = c->MaxDrawBuffers;
> c->ShaderStorageBufferOffsetAlignment =
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 943582d..fe6d58b 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -3163,8 +3163,8 @@ void
> glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
> {
> const char *callee = ir->callee->function_name();
> - ir_dereference *deref = static_cast<ir_dereference *>(
> - ir->actual_parameters.get_head());
> + exec_node *param = ir->actual_parameters.get_head();
> + ir_dereference *deref = static_cast<ir_dereference *>(param);
> ir_variable *location = deref->variable_referenced();
>
> st_src_reg buffer(
> @@ -3193,17 +3193,56 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
>
> if (!strcmp("__intrinsic_atomic_read", callee)) {
> inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset);
> - inst->buffer = buffer;
> } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
> inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
> st_src_reg_for_int(1));
> - inst->buffer = buffer;
> } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
> inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
> st_src_reg_for_int(-1));
> - inst->buffer = buffer;
> emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1));
> + } else {
> + param = param->get_next();
> + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
> + val->accept(this);
> +
> + st_src_reg data = this->result, data2 = undef_src;
> + unsigned opcode;
> + if (!strcmp("__intrinsic_atomic_add", callee))
> + opcode = TGSI_OPCODE_ATOMUADD;
> + else if (!strcmp("__intrinsic_atomic_min", callee))
> + opcode = TGSI_OPCODE_ATOMIMIN;
> + else if (!strcmp("__intrinsic_atomic_max", callee))
> + opcode = TGSI_OPCODE_ATOMIMAX;
> + else if (!strcmp("__intrinsic_atomic_and", callee))
> + opcode = TGSI_OPCODE_ATOMAND;
> + else if (!strcmp("__intrinsic_atomic_or", callee))
> + opcode = TGSI_OPCODE_ATOMOR;
> + else if (!strcmp("__intrinsic_atomic_xor", callee))
> + opcode = TGSI_OPCODE_ATOMXOR;
> + else if (!strcmp("__intrinsic_atomic_exchange", callee))
> + opcode = TGSI_OPCODE_ATOMXCHG;
> + else if (!strcmp("__intrinsic_atomic_comp_swap", callee)) {
> + opcode = TGSI_OPCODE_ATOMCAS;
> + param = param->get_next();
> + val = ((ir_instruction *)param)->as_rvalue();
> + val->accept(this);
> + data2 = this->result;
> + } else if (!strcmp("__intrinsic_atomic_sub", callee)) {
> + opcode = TGSI_OPCODE_ATOMUADD;
> + st_src_reg res = get_temp(glsl_type::uvec4_type);
> + st_dst_reg dstres = st_dst_reg(res);
> + dstres.writemask = dst.writemask;
> + emit_asm(ir, TGSI_OPCODE_INEG, dstres, data);
> + data = res;
> + } else {
> + assert(!"Unexpected intrinsic");
> + return;
> + }
> +
> + inst = emit_asm(ir, opcode, dst, offset, data, data2);
> }
> +
> + inst->buffer = buffer;
You could refactor this a bit further so that all intrinsics go through
the same emit_asm call, but that's a minor point.
Cheers,
Nicolai
> }
>
> void
> @@ -3596,7 +3635,16 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
> /* Filter out intrinsics */
> if (!strcmp("__intrinsic_atomic_read", callee) ||
> !strcmp("__intrinsic_atomic_increment", callee) ||
> - !strcmp("__intrinsic_atomic_predecrement", callee)) {
> + !strcmp("__intrinsic_atomic_predecrement", callee) ||
> + !strcmp("__intrinsic_atomic_add", callee) ||
> + !strcmp("__intrinsic_atomic_sub", callee) ||
> + !strcmp("__intrinsic_atomic_min", callee) ||
> + !strcmp("__intrinsic_atomic_max", callee) ||
> + !strcmp("__intrinsic_atomic_and", callee) ||
> + !strcmp("__intrinsic_atomic_or", callee) ||
> + !strcmp("__intrinsic_atomic_xor", callee) ||
> + !strcmp("__intrinsic_atomic_exchange", callee) ||
> + !strcmp("__intrinsic_atomic_comp_swap", callee)) {
> visit_atomic_counter_intrinsic(ir);
> return;
> }
>
More information about the mesa-dev
mailing list