[Mesa-dev] [PATCH 16/24] i965/gen7: Implement code generation for untyped atomic instructions.

Paul Berry stereotype441 at gmail.com
Thu Sep 26 10:24:19 PDT 2013


On 15 September 2013 00:10, Francisco Jerez <currojerez at riseup.net> wrote:

> ---
>  src/mesa/drivers/dri/i965/brw_defines.h     |  2 +
>  src/mesa/drivers/dri/i965/brw_eu.h          |  9 +++++
>  src/mesa/drivers/dri/i965/brw_eu_emit.c     | 62
> +++++++++++++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_fs.cpp        |  2 +
>  src/mesa/drivers/dri/i965/brw_fs.h          |  5 +++
>  src/mesa/drivers/dri/i965/brw_fs_emit.cpp   | 21 ++++++++++
>  src/mesa/drivers/dri/i965/brw_vec4.cpp      |  2 +
>  src/mesa/drivers/dri/i965/brw_vec4.h        |  5 +++
>  src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 22 ++++++++++
>  9 files changed, 130 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index e9e0c4a..ccb4ce4 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -770,6 +770,8 @@ enum opcode {
>
>     SHADER_OPCODE_SHADER_TIME_ADD,
>
> +   SHADER_OPCODE_UNTYPED_ATOMIC,
> +
>     FS_OPCODE_DDX,
>     FS_OPCODE_DDY,
>     FS_OPCODE_PIXEL_X,
> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h
> b/src/mesa/drivers/dri/i965/brw_eu.h
> index 720bc74..212d916 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu.h
> +++ b/src/mesa/drivers/dri/i965/brw_eu.h
> @@ -422,6 +422,15 @@ void brw_CMP(struct brw_compile *p,
>              struct brw_reg src0,
>              struct brw_reg src1);
>
> +void
> +brw_untyped_atomic(struct brw_compile *p,
> +                   struct brw_reg dest,
> +                   struct brw_reg mrf,
> +                   GLuint atomic_op,
> +                   GLuint bind_table_index,
> +                   GLuint msg_length,
> +                   GLuint response_length);
> +
>  /***********************************************************************
>   * brw_eu_util.c:
>   */
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index cce8752..f39bf99 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -2465,6 +2465,68 @@ brw_svb_write(struct brw_compile *p,
>                              send_commit_msg); /* send_commit_msg */
>  }
>
> +static void
> +brw_set_dp_untyped_atomic_message(struct brw_compile *p,
> +                                  struct brw_instruction *insn,
> +                                  GLuint atomic_op,
> +                                  GLuint bind_table_index,
> +                                  GLuint msg_length,
> +                                  GLuint response_length,
> +                                  bool header_present)
> +{
> +   if (p->brw->is_haswell) {
> +      brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
> +                                 msg_length, response_length,
> +                                 header_present, false);
> +
> +
> +      if (insn->header.access_mode == BRW_ALIGN_1) {
> +         if (insn->header.execution_size != BRW_EXECUTE_16)
> +            insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
> +
> +         insn->bits3.gen7_dp.msg_type =
> +            HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
> +      } else {
> +         insn->bits3.gen7_dp.msg_type =
> +            HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
> +      }
> +
> +   } else {
>

It would be nice to have a comment somewhere in the else block explaining
that on IVB, SIMD4x2 atomic operations aren't available, so we implement
them using SIMD8 messages, and we rely on the shader compiler to set the
execution mask accordingly.

With that comment added, this patch is:

Reviewed-by: Paul Berry <stereotype441 at gmail.com>


> +      brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
> +                                 msg_length, response_length,
> +                                 header_present, false);
> +
> +      insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
> +
> +      if (insn->header.execution_size != BRW_EXECUTE_16)
> +         insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
> +   }
> +
> +   if (response_length)
> +      insn->bits3.ud |= 1 << 13; /* Return data expected */
> +
> +   insn->bits3.gen7_dp.binding_table_index = bind_table_index;
> +   insn->bits3.ud |= atomic_op << 8;
> +}
> +
> +void
> +brw_untyped_atomic(struct brw_compile *p,
> +                   struct brw_reg dest,
> +                   struct brw_reg mrf,
> +                   GLuint atomic_op,
> +                   GLuint bind_table_index,
> +                   GLuint msg_length,
> +                   GLuint response_length) {
> +   struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
> +
> +   brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
> +   brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
> +   brw_set_src1(p, insn, brw_imm_d(0));
> +   brw_set_dp_untyped_atomic_message(
> +      p, insn, atomic_op, bind_table_index, msg_length, response_length,
> +      insn->header.access_mode == BRW_ALIGN_1);
> +}
> +
>  /**
>   * This instruction is generated as a single-channel align1 instruction by
>   * both the VS and FS stages when using INTEL_DEBUG=shader_time.
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index a98e7c7..4f1a665 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -738,6 +738,8 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
>        return inst->mlen;
>     case FS_OPCODE_SPILL:
>        return 2;
> +   case SHADER_OPCODE_UNTYPED_ATOMIC:
> +      return 0;
>     default:
>        assert(!"not reached");
>        return inst->mlen;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index e78267e..27a47fa 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -556,6 +556,11 @@ private:
>                                   struct brw_reg offset,
>                                   struct brw_reg value);
>
> +   void generate_untyped_atomic(fs_inst *inst,
> +                                struct brw_reg dst,
> +                                struct brw_reg atomic_op,
> +                                struct brw_reg surf_index);
> +
>     void mark_surface_used(unsigned surf_index);
>
>     void patch_discard_jumps_to_fb_writes();
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> index bfb3d33..cf30fcb 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> @@ -1066,6 +1066,23 @@ fs_generator::generate_shader_time_add(fs_inst
> *inst,
>  }
>
>  void
> +fs_generator::generate_untyped_atomic(fs_inst *inst, struct brw_reg dst,
> +                                      struct brw_reg atomic_op,
> +                                      struct brw_reg surf_index)
> +{
> +   assert(atomic_op.file == BRW_IMMEDIATE_VALUE &&
> +          atomic_op.type == BRW_REGISTER_TYPE_UD &&
> +          surf_index.file == BRW_IMMEDIATE_VALUE &&
> +         surf_index.type == BRW_REGISTER_TYPE_UD);
> +
> +   brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf),
> +                      atomic_op.dw1.ud, surf_index.dw1.ud,
> +                      inst->mlen, dispatch_width / 8);
> +
> +   mark_surface_used(surf_index.dw1.ud);
> +}
> +
> +void
>  fs_generator::generate_code(exec_list *instructions)
>  {
>     int last_native_insn_offset = p->next_insn_offset;
> @@ -1439,6 +1456,10 @@ fs_generator::generate_code(exec_list *instructions)
>           generate_shader_time_add(inst, src[0], src[1], src[2]);
>           break;
>
> +      case SHADER_OPCODE_UNTYPED_ATOMIC:
> +         generate_untyped_atomic(inst, dst, src[0], src[1]);
> +         break;
> +
>        case FS_OPCODE_SET_SIMD4X2_OFFSET:
>           generate_set_simd4x2_offset(inst, dst, src[0]);
>           break;
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 2c1f541..590c0a5 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -271,6 +271,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction
> *inst)
>     case SHADER_OPCODE_TXF_MS:
>     case SHADER_OPCODE_TXS:
>        return inst->header_present ? 1 : 0;
> +   case SHADER_OPCODE_UNTYPED_ATOMIC:
> +      return 0;
>     default:
>        assert(!"not reached");
>        return inst->mlen;
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 13c9166..233f233 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -620,6 +620,11 @@ private:
>     void generate_unpack_flags(vec4_instruction *inst,
>                                struct brw_reg dst);
>
> +   void generate_untyped_atomic(vec4_instruction *inst,
> +                                struct brw_reg dst,
> +                                struct brw_reg atomic_op,
> +                                struct brw_reg surf_index);
> +
>     void mark_surface_used(unsigned surf_index);
>
>     struct brw_context *brw;
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> index 6916134..05c5806 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> @@ -835,6 +835,24 @@
> vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
>     mark_surface_used(surf_index.dw1.ud);
>  }
>
> +void
> +vec4_generator::generate_untyped_atomic(vec4_instruction *inst,
> +                                        struct brw_reg dst,
> +                                        struct brw_reg atomic_op,
> +                                        struct brw_reg surf_index)
> +{
> +   assert(atomic_op.file == BRW_IMMEDIATE_VALUE &&
> +          atomic_op.type == BRW_REGISTER_TYPE_UD &&
> +          surf_index.file == BRW_IMMEDIATE_VALUE &&
> +         surf_index.type == BRW_REGISTER_TYPE_UD);
> +
> +   brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf),
> +                      atomic_op.dw1.ud, surf_index.dw1.ud,
> +                      inst->mlen, 1);
> +
> +   mark_surface_used(surf_index.dw1.ud);
> +}
> +
>  /**
>   * Generate assembly for a Vec4 IR instruction.
>   *
> @@ -1096,6 +1114,10 @@
> vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
>        mark_surface_used(SURF_INDEX_VEC4_SHADER_TIME);
>        break;
>
> +   case SHADER_OPCODE_UNTYPED_ATOMIC:
> +      generate_untyped_atomic(inst, dst, src[0], src[1]);
> +      break;
> +
>     case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
>        generate_unpack_flags(inst, dst);
>        break;
> --
> 1.8.3.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130926/048b909d/attachment-0001.html>


More information about the mesa-dev mailing list