[Mesa-dev] [PATCH 4/6] i965/gen7: Handle atomic instructions from the FS back-end.
Paul Berry
stereotype441 at gmail.com
Fri Nov 1 10:23:00 PDT 2013
On 29 October 2013 16:37, Francisco Jerez <currojerez at riseup.net> wrote:
> This can deal with all the 15 32-bit untyped atomic operations the
> hardware supports, but only INC and PREDEC are going to be exposed
> through the API for now.
>
> v2: Represent atomics as GLSL intrinsics. Add support for variably
> indexed atomic counter arrays. Fix interaction with fragment
> discard.
> ---
> src/mesa/drivers/dri/i965/brw_fs.h | 9 ++
> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 131
> ++++++++++++++++++++++++++-
> 2 files changed, 138 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 5b78313..081f8a3 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -391,6 +391,13 @@ public:
> void emit_shader_time_write(enum shader_time_shader_type type,
> fs_reg value);
>
> + void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
> + fs_reg dst, fs_reg offset, fs_reg src0,
> + fs_reg src1);
> +
> + void emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
> + fs_reg offset);
> +
> bool try_rewrite_rhs_to_dst(ir_assignment *ir,
> fs_reg dst,
> fs_reg src,
> @@ -410,6 +417,8 @@ public:
>
> void dump_instruction(backend_instruction *inst);
>
> + void visit_atomic_counter_intrinsic(ir_call *ir);
> +
> struct gl_fragment_program *fp;
> struct brw_wm_compile *c;
> unsigned int sanity_param_count;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 71b4bf9..b6361d5 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -106,7 +106,7 @@ fs_visitor::visit(ir_variable *ir)
> * ir_binop_ubo_load expressions and not ir_dereference_variable
> for UBO
> * variables, so no need for them to be in variable_ht.
> */
> - if (ir->is_in_uniform_block())
> + if (ir->is_in_uniform_block() || ir->type->contains_atomic())
>
The comment above this "if" statement should be updated to explain why it's
ok to return when we encounter a type that contains an atomic.
With that change, the patch is:
Reviewed-by: Paul Berry <stereotype441 at gmail.com>
> return;
>
> if (dispatch_width == 16) {
> @@ -2187,9 +2187,58 @@ fs_visitor::visit(ir_loop_jump *ir)
> }
>
> void
> +fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
> +{
> + ir_dereference *deref = static_cast<ir_dereference *>(
> + ir->actual_parameters.get_head());
> + ir_variable *location = deref->variable_referenced();
> + unsigned surf_index = (c->prog_data.base.binding_table.abo_start +
> + location->atomic.buffer_index);
> +
> + /* Calculate the surface offset */
> + fs_reg offset(this, glsl_type::uint_type);
> + ir_dereference_array *deref_array = deref->as_dereference_array();
> +
> + if (deref_array) {
> + deref_array->array_index->accept(this);
> +
> + fs_reg tmp(this, glsl_type::uint_type);
> + emit(MUL(tmp, this->result, ATOMIC_COUNTER_SIZE));
> + emit(ADD(offset, tmp, location->atomic.offset));
> + } else {
> + offset = location->atomic.offset;
> + }
> +
> + /* Emit the appropriate machine instruction */
> + const char *callee = ir->callee->function_name();
> + ir->return_deref->accept(this);
> + fs_reg dst = this->result;
> +
> + if (!strcmp("__intrinsic_atomic_read", callee)) {
> + emit_untyped_surface_read(surf_index, dst, offset);
> +
> + } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
> + emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
> + fs_reg(), fs_reg());
> +
> + } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
> + emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
> + fs_reg(), fs_reg());
> + }
> +}
> +
> +void
> fs_visitor::visit(ir_call *ir)
> {
> - assert(!"FINISHME");
> + const char *callee = ir->callee->function_name();
> +
> + if (!strcmp("__intrinsic_atomic_read", callee) ||
> + !strcmp("__intrinsic_atomic_increment", callee) ||
> + !strcmp("__intrinsic_atomic_predecrement", callee)) {
> + visit_atomic_counter_intrinsic(ir);
> + } else {
> + assert(!"Unsupported intrinsic.");
> + }
> }
>
> void
> @@ -2240,6 +2289,84 @@ fs_visitor::visit(ir_end_primitive *)
> assert(!"not reached");
> }
>
> +void
> +fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
> + fs_reg dst, fs_reg offset, fs_reg src0,
> + fs_reg src1)
> +{
> + const unsigned operand_len = dispatch_width / 8;
> + unsigned mlen = 0;
> +
> + /* Initialize the sample mask in the message header. */
> + emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0)))
> + ->force_writemask_all = true;
> +
> + if (fp->UsesKill) {
> + emit(MOV(brw_uvec_mrf(1, mlen, 7), brw_flag_reg(0, 1)))
> + ->force_writemask_all = true;
> + } else {
> + emit(MOV(brw_uvec_mrf(1, mlen, 7),
> + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))
> + ->force_writemask_all = true;
> + }
> +
> + mlen++;
> +
> + /* Set the atomic operation offset. */
> + emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), offset));
> + mlen += operand_len;
> +
> + /* Set the atomic operation arguments. */
> + if (src0.file != BAD_FILE) {
> + emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src0));
> + mlen += operand_len;
> + }
> +
> + if (src1.file != BAD_FILE) {
> + emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src1));
> + mlen += operand_len;
> + }
> +
> + /* Emit the instruction. */
> + fs_inst inst(SHADER_OPCODE_UNTYPED_ATOMIC, dst, atomic_op, surf_index);
> + inst.base_mrf = 0;
> + inst.mlen = mlen;
> + emit(inst);
> +}
> +
> +void
> +fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
> + fs_reg offset)
> +{
> + const unsigned operand_len = dispatch_width / 8;
> + unsigned mlen = 0;
> +
> + /* Initialize the sample mask in the message header. */
> + emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0)))
> + ->force_writemask_all = true;
> +
> + if (fp->UsesKill) {
> + emit(MOV(brw_uvec_mrf(1, mlen, 7), brw_flag_reg(0, 1)))
> + ->force_writemask_all = true;
> + } else {
> + emit(MOV(brw_uvec_mrf(1, mlen, 7),
> + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))
> + ->force_writemask_all = true;
> + }
> +
> + mlen++;
> +
> + /* Set the surface read offset. */
> + emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), offset));
> + mlen += operand_len;
> +
> + /* Emit the instruction. */
> + fs_inst inst(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, surf_index);
> + inst.base_mrf = 0;
> + inst.mlen = mlen;
> + emit(inst);
> +}
> +
> fs_inst *
> fs_visitor::emit(fs_inst inst)
> {
> --
> 1.8.3.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131101/c548bb73/attachment.html>
More information about the mesa-dev
mailing list