[Mesa-dev] [PATCH 2/2] st/mesa: add GL_ARB_shader_atomic_counter_ops support

Nicolai Hähnle nhaehnle at gmail.com
Thu Mar 10 15:57:45 UTC 2016


On 20.02.2016 00:13, Ilia Mirkin wrote:
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
>   src/mesa/state_tracker/st_extensions.c     |  4 +-
>   src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 60 +++++++++++++++++++++++++++---
>   2 files changed, 57 insertions(+), 7 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
> index 94696ce..21e108d 100644
> --- a/src/mesa/state_tracker/st_extensions.c
> +++ b/src/mesa/state_tracker/st_extensions.c
> @@ -368,8 +368,10 @@ void st_init_limits(struct pipe_screen *screen,
>            c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
>      assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS);
>
> -   if (c->MaxCombinedAtomicBuffers > 0)
> +   if (c->MaxCombinedAtomicBuffers > 0) {
>         extensions->ARB_shader_atomic_counters = GL_TRUE;
> +      extensions->ARB_shader_atomic_counter_ops = GL_TRUE;
> +   }

I believe there's pre-GCN AMD hardware which can support atomic counters 
but not atomic_counter_ops (at least according to what the closed driver 
exposes, I haven't actually checked the docs), so there should probably 
be a capability flag here.

>
>      c->MaxCombinedShaderOutputResources = c->MaxDrawBuffers;
>      c->ShaderStorageBufferOffsetAlignment =
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 943582d..fe6d58b 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -3163,8 +3163,8 @@ void
>   glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
>   {
>      const char *callee = ir->callee->function_name();
> -   ir_dereference *deref = static_cast<ir_dereference *>(
> -      ir->actual_parameters.get_head());
> +   exec_node *param = ir->actual_parameters.get_head();
> +   ir_dereference *deref = static_cast<ir_dereference *>(param);
>      ir_variable *location = deref->variable_referenced();
>
>      st_src_reg buffer(
> @@ -3193,17 +3193,56 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
>
>      if (!strcmp("__intrinsic_atomic_read", callee)) {
>         inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset);
> -      inst->buffer = buffer;
>      } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
>         inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
>                         st_src_reg_for_int(1));
> -      inst->buffer = buffer;
>      } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
>         inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
>                         st_src_reg_for_int(-1));
> -      inst->buffer = buffer;
>         emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1));
> +   } else {
> +      param = param->get_next();
> +      ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
> +      val->accept(this);
> +
> +      st_src_reg data = this->result, data2 = undef_src;
> +      unsigned opcode;
> +      if (!strcmp("__intrinsic_atomic_add", callee))
> +         opcode = TGSI_OPCODE_ATOMUADD;
> +      else if (!strcmp("__intrinsic_atomic_min", callee))
> +         opcode = TGSI_OPCODE_ATOMIMIN;
> +      else if (!strcmp("__intrinsic_atomic_max", callee))
> +         opcode = TGSI_OPCODE_ATOMIMAX;
> +      else if (!strcmp("__intrinsic_atomic_and", callee))
> +         opcode = TGSI_OPCODE_ATOMAND;
> +      else if (!strcmp("__intrinsic_atomic_or", callee))
> +         opcode = TGSI_OPCODE_ATOMOR;
> +      else if (!strcmp("__intrinsic_atomic_xor", callee))
> +         opcode = TGSI_OPCODE_ATOMXOR;
> +      else if (!strcmp("__intrinsic_atomic_exchange", callee))
> +         opcode = TGSI_OPCODE_ATOMXCHG;
> +      else if (!strcmp("__intrinsic_atomic_comp_swap", callee)) {
> +         opcode = TGSI_OPCODE_ATOMCAS;
> +         param = param->get_next();
> +         val = ((ir_instruction *)param)->as_rvalue();
> +         val->accept(this);
> +         data2 = this->result;
> +      } else if (!strcmp("__intrinsic_atomic_sub", callee)) {
> +         opcode = TGSI_OPCODE_ATOMUADD;
> +         st_src_reg res = get_temp(glsl_type::uvec4_type);
> +         st_dst_reg dstres = st_dst_reg(res);
> +         dstres.writemask = dst.writemask;
> +         emit_asm(ir, TGSI_OPCODE_INEG, dstres, data);
> +         data = res;
> +      } else {
> +         assert(!"Unexpected intrinsic");
> +         return;
> +      }
> +
> +      inst = emit_asm(ir, opcode, dst, offset, data, data2);
>      }
> +
> +   inst->buffer = buffer;

You could refactor this a bit further so that all intrinsics go through 
the same emit_asm call, but that's a minor point.

Cheers,
Nicolai

>   }
>
>   void
> @@ -3596,7 +3635,16 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
>      /* Filter out intrinsics */
>      if (!strcmp("__intrinsic_atomic_read", callee) ||
>          !strcmp("__intrinsic_atomic_increment", callee) ||
> -       !strcmp("__intrinsic_atomic_predecrement", callee)) {
> +       !strcmp("__intrinsic_atomic_predecrement", callee) ||
> +       !strcmp("__intrinsic_atomic_add", callee) ||
> +       !strcmp("__intrinsic_atomic_sub", callee) ||
> +       !strcmp("__intrinsic_atomic_min", callee) ||
> +       !strcmp("__intrinsic_atomic_max", callee) ||
> +       !strcmp("__intrinsic_atomic_and", callee) ||
> +       !strcmp("__intrinsic_atomic_or", callee) ||
> +       !strcmp("__intrinsic_atomic_xor", callee) ||
> +       !strcmp("__intrinsic_atomic_exchange", callee) ||
> +       !strcmp("__intrinsic_atomic_comp_swap", callee)) {
>         visit_atomic_counter_intrinsic(ir);
>         return;
>      }
>


More information about the mesa-dev mailing list