[Mesa-dev] [PATCH 11/13] intel/compiler: Implement untyped atomic float min, max, and compare-swap dataport messages

Ian Romanick idr at freedesktop.org
Sat Jun 23 05:03:56 UTC 2018


From: Ian Romanick <ian.d.romanick at intel.com>

Also expand the message type field by a single bit.  This is necessary
for this Gen9 message type, but there are also Gen8 message types that
need the extra bit (mostly for bindless).

Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
 src/intel/compiler/brw_disasm.c                   |  15 +++-
 src/intel/compiler/brw_eu.h                       |  11 +++
 src/intel/compiler/brw_eu_defines.h               |  12 ++-
 src/intel/compiler/brw_eu_emit.c                  |  48 ++++++++++
 src/intel/compiler/brw_fs.cpp                     |  23 +++++
 src/intel/compiler/brw_fs.h                       |   4 +
 src/intel/compiler/brw_fs_copy_propagation.cpp    |   2 +
 src/intel/compiler/brw_fs_dead_code_eliminate.cpp |   2 +
 src/intel/compiler/brw_fs_generator.cpp           |   7 ++
 src/intel/compiler/brw_fs_nir.cpp                 | 103 ++++++++++++++++++++++
 src/intel/compiler/brw_fs_surface_builder.cpp     |  24 +++++
 src/intel/compiler/brw_fs_surface_builder.h       |   7 ++
 src/intel/compiler/brw_inst.h                     |   2 +-
 src/intel/compiler/brw_schedule_instructions.cpp  |   1 +
 src/intel/compiler/brw_shader.cpp                 |   6 ++
 15 files changed, 264 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c
index e45762afb07..322f4544dfd 100644
--- a/src/intel/compiler/brw_disasm.c
+++ b/src/intel/compiler/brw_disasm.c
@@ -406,7 +406,7 @@ static const char *const dp_dc0_msg_type_gen7[16] = {
    [GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE] = "DC untyped surface write",
 };
 
-static const char *const dp_dc1_msg_type_hsw[16] = {
+static const char *const dp_dc1_msg_type_hsw[32] = {
    [HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ] = "untyped surface read",
    [HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP] = "DC untyped atomic op",
    [HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2] =
@@ -421,6 +421,8 @@ static const char *const dp_dc1_msg_type_hsw[16] = {
    [HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2] =
       "DC 4x2 atomic counter op",
    [HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE] = "DC typed surface write",
+   [GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP] =
+      "DC untyped atomic float op",
 };
 
 static const char *const aop[16] = {
@@ -441,6 +443,12 @@ static const char *const aop[16] = {
    [BRW_AOP_PREDEC] = "predec",
 };
 
+static const char *const aop_float[4] = {
+   [BRW_AOP_FMAX]   = "fmax",
+   [BRW_AOP_FMIN]   = "fmin",
+   [BRW_AOP_FCMPWR] = "fcmpwr",
+};
+
 static const char * const pixel_interpolator_msg_types[4] = {
     [GEN7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET] = "per_message_offset",
     [GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE] = "sample_position",
@@ -1797,6 +1805,11 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
                          simd_modes[msg_ctrl >> 4], msg_ctrl & 0xf);
                   break;
                }
+               case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
+                  format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16);
+                  control(file, "atomic float op", aop_float, msg_ctrl & 0xf,
+                          &space);
+                  break;
                default:
                   format(file, "0x%x", msg_ctrl);
                }
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 0f07eeb3d6d..b0497806721 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -481,6 +481,17 @@ brw_untyped_atomic(struct brw_codegen *p,
                    bool response_expected,
                    bool header_present);
 
+void
+brw_untyped_atomic_float(struct brw_codegen *p,
+                         struct brw_reg dst,
+                         struct brw_reg payload,
+                         struct brw_reg surface,
+                         unsigned atomic_op,
+                         unsigned msg_length,
+                         bool response_expected,
+                         bool header_present);
+
+
 void
 brw_untyped_surface_read(struct brw_codegen *p,
                          struct brw_reg dst,
diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h
index ee306a6c2ce..ac6dd02330a 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -388,6 +388,8 @@ enum opcode {
     */
    SHADER_OPCODE_UNTYPED_ATOMIC,
    SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
+   SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT,
+   SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
    SHADER_OPCODE_UNTYPED_SURFACE_READ,
    SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
    SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
@@ -1153,6 +1155,7 @@ enum brw_message_target {
 #define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP                     11
 #define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2             12
 #define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE                   13
+#define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP              0x1b
 
 /* GEN9 */
 #define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE                        12
@@ -1171,7 +1174,9 @@ enum brw_message_target {
 #define GEN8_BTI_STATELESS_IA_COHERENT   255
 #define GEN8_BTI_STATELESS_NON_COHERENT  253
 
-/* dataport atomic operations. */
+/* Dataport atomic operations for Untyped Atomic Integer Operation message
+ * (and others).
+ */
 #define BRW_AOP_AND                   1
 #define BRW_AOP_OR                    2
 #define BRW_AOP_XOR                   3
@@ -1188,6 +1193,11 @@ enum brw_message_target {
 #define BRW_AOP_CMPWR                 14
 #define BRW_AOP_PREDEC                15
 
+/* Dataport atomic operations for Untyped Atomic Float Operation message. */
+#define BRW_AOP_FMAX                  1
+#define BRW_AOP_FMIN                  2
+#define BRW_AOP_FCMPWR                3
+
 #define BRW_MATH_FUNCTION_INV                              1
 #define BRW_MATH_FUNCTION_LOG                              2
 #define BRW_MATH_FUNCTION_EXP                              3
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index d3246edde44..815482c9b49 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -2937,6 +2937,54 @@ brw_untyped_atomic(struct brw_codegen *p,
       p, insn, atomic_op, response_expected);
 }
 
+static void
+brw_set_dp_untyped_atomic_float_message(struct brw_codegen *p,
+                                        brw_inst *insn,
+                                        unsigned atomic_op,
+                                        bool response_expected)
+{
+   const struct gen_device_info *devinfo = p->devinfo;
+   unsigned msg_control =
+      atomic_op | /* Atomic Operation Type: BRW_AOP_F* */
+      (response_expected ? 1 << 5 : 0); /* Return data expected */
+
+   assert(devinfo->gen >= 9);
+   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
+
+   if (brw_get_default_exec_size(p) != BRW_EXECUTE_16)
+      msg_control |= 1 << 4; /* SIMD8 mode */
+
+   brw_inst_set_dp_msg_type(devinfo, insn,
+                            GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP);
+
+   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
+}
+
+void
+brw_untyped_atomic_float(struct brw_codegen *p,
+                         struct brw_reg dst,
+                         struct brw_reg payload,
+                         struct brw_reg surface,
+                         unsigned atomic_op,
+                         unsigned msg_length,
+                         bool response_expected,
+                         bool header_present)
+{
+   const struct gen_device_info *devinfo = p->devinfo;
+
+   assert(devinfo->gen >= 9);
+   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
+
+   const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
+   struct brw_inst *insn = brw_send_indirect_surface_message(
+      p, sfid, brw_writemask(dst, WRITEMASK_XYZW), payload, surface, msg_length,
+      brw_surface_payload_size(p, response_expected, true, true),
+      header_present);
+
+   brw_set_dp_untyped_atomic_float_message(
+      p, insn, atomic_op, response_expected);
+}
+
 static void
 brw_set_dp_untyped_surface_read_message(struct brw_codegen *p,
                                         struct brw_inst *insn,
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 5c95e260aad..ba3991f7efa 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -242,6 +242,7 @@ fs_inst::is_send_from_grf() const
    case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
    case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
    case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
    case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
@@ -806,6 +807,20 @@ fs_inst::components_read(unsigned i) const
          return 1;
    }
 
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: {
+      assert(src[3].file == IMM &&
+             src[4].file == IMM);
+      const unsigned op = src[4].ud;
+      /* Surface coordinates. */
+      if (i == 0)
+         return src[3].ud;
+      /* Surface operation source. */
+      else if (i == 1 && op == BRW_AOP_FCMPWR)
+         return 2;
+      else
+         return 1;
+   }
+
    default:
       return 1;
    }
@@ -824,6 +839,7 @@ fs_inst::size_read(int arg) const
    case SHADER_OPCODE_URB_READ_SIMD8:
    case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
    case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
    case SHADER_OPCODE_TYPED_ATOMIC:
@@ -4855,6 +4871,12 @@ fs_visitor::lower_logical_sends()
                                     ibld.sample_mask_reg());
          break;
 
+      case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+         lower_surface_logical_send(ibld, inst,
+                                    SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT,
+                                    ibld.sample_mask_reg());
+         break;
+
       case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
          lower_surface_logical_send(ibld, inst,
                                     SHADER_OPCODE_TYPED_SURFACE_READ,
@@ -5333,6 +5355,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
       return 8;
 
    case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
    case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
    case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 17b1368d522..c0923eec8ec 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -226,8 +226,12 @@ public:
                                nir_intrinsic_instr *instr);
    void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
                              int op, nir_intrinsic_instr *instr);
+   void nir_emit_ssbo_atomic_float(const brw::fs_builder &bld,
+                                   int op, nir_intrinsic_instr *instr);
    void nir_emit_shared_atomic(const brw::fs_builder &bld,
                                int op, nir_intrinsic_instr *instr);
+   void nir_emit_shared_atomic_float(const brw::fs_builder &bld,
+                                     int op, nir_intrinsic_instr *instr);
    void nir_emit_texture(const brw::fs_builder &bld,
                          nir_tex_instr *instr);
    void nir_emit_jump(const brw::fs_builder &bld,
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp
index 92cc0a8de58..ab34b63748e 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -679,6 +679,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
          break;
 
       case SHADER_OPCODE_UNTYPED_ATOMIC:
+      case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
       case SHADER_OPCODE_UNTYPED_SURFACE_READ:
       case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
       case SHADER_OPCODE_TYPED_ATOMIC:
@@ -720,6 +721,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
       case SHADER_OPCODE_TG4_LOGICAL:
       case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
       case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+      case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
       case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
       case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
       case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
diff --git a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp
index 7adb4278919..eeb71dd2b92 100644
--- a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp
+++ b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp
@@ -55,6 +55,8 @@ can_omit_write(const fs_inst *inst)
    switch (inst->opcode) {
    case SHADER_OPCODE_UNTYPED_ATOMIC:
    case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
    case SHADER_OPCODE_TYPED_ATOMIC:
    case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
       return true;
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 09839d0b4da..65eb860386a 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2228,6 +2228,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
                             inst->header_size);
          break;
 
+      case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
+         assert(src[2].file == BRW_IMMEDIATE_VALUE);
+         brw_untyped_atomic_float(p, dst, src[0], src[1], src[2].ud,
+                                  inst->mlen, !inst->dst.is_null(),
+                                  inst->header_size);
+         break;
+
       case SHADER_OPCODE_UNTYPED_SURFACE_READ:
          assert(!inst->header_size);
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 243b1d2ae75..adcc9f35fa5 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3651,6 +3651,15 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
    case nir_intrinsic_shared_atomic_comp_swap:
       nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr);
       break;
+   case nir_intrinsic_shared_atomic_fmin:
+      nir_emit_shared_atomic_float(bld, BRW_AOP_FMIN, instr);
+      break;
+   case nir_intrinsic_shared_atomic_fmax:
+      nir_emit_shared_atomic_float(bld, BRW_AOP_FMAX, instr);
+      break;
+   case nir_intrinsic_shared_atomic_fcomp_swap:
+      nir_emit_shared_atomic_float(bld, BRW_AOP_FCMPWR, instr);
+      break;
 
    case nir_intrinsic_load_shared: {
       assert(devinfo->gen >= 7);
@@ -4378,6 +4387,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
    case nir_intrinsic_ssbo_atomic_comp_swap:
       nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
       break;
+   case nir_intrinsic_ssbo_atomic_fmin:
+      nir_emit_ssbo_atomic_float(bld, BRW_AOP_FMIN, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_fmax:
+      nir_emit_ssbo_atomic_float(bld, BRW_AOP_FMAX, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_fcomp_swap:
+      nir_emit_ssbo_atomic_float(bld, BRW_AOP_FCMPWR, instr);
+      break;
 
    case nir_intrinsic_get_buffer_size: {
       nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
@@ -4866,6 +4884,54 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
    bld.MOV(dest, atomic_result);
 }
 
+void
+fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld,
+                                       int op, nir_intrinsic_instr *instr)
+{
+   if (stage == MESA_SHADER_FRAGMENT)
+      brw_wm_prog_data(prog_data)->has_side_effects = true;
+
+   fs_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      dest = get_nir_dest(instr->dest);
+
+   fs_reg surface;
+   nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+   if (const_surface) {
+      unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
+                            const_surface->u32[0];
+      surface = brw_imm_ud(surf_index);
+      brw_mark_surface_used(prog_data, surf_index);
+   } else {
+      surface = vgrf(glsl_type::uint_type);
+      bld.ADD(surface, get_nir_src(instr->src[0]),
+              brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
+
+      /* Assume this may touch any SSBO. This is the same we do for other
+       * UBO/SSBO accesses with non-constant surface.
+       */
+      brw_mark_surface_used(prog_data,
+                            stage_prog_data->binding_table.ssbo_start +
+                            nir->info.num_ssbos - 1);
+   }
+
+   fs_reg offset = get_nir_src(instr->src[1]);
+   fs_reg data1 = get_nir_src(instr->src[2]);
+   fs_reg data2;
+   if (op == BRW_AOP_FCMPWR)
+      data2 = get_nir_src(instr->src[3]);
+
+   /* Emit the actual atomic operation */
+
+   fs_reg atomic_result = emit_untyped_atomic_float(bld, surface, offset,
+                                                    data1, data2,
+                                                    1 /* dims */, 1 /* rsize */,
+                                                    op,
+                                                    BRW_PREDICATE_NONE);
+   dest.type = atomic_result.type;
+   bld.MOV(dest, atomic_result);
+}
+
 void
 fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
                                    int op, nir_intrinsic_instr *instr)
@@ -4903,6 +4969,43 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
    bld.MOV(dest, atomic_result);
 }
 
+void
+fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
+                                         int op, nir_intrinsic_instr *instr)
+{
+   fs_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      dest = get_nir_dest(instr->dest);
+
+   fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
+   fs_reg offset;
+   fs_reg data1 = get_nir_src(instr->src[1]);
+   fs_reg data2;
+   if (op == BRW_AOP_FCMPWR)
+      data2 = get_nir_src(instr->src[2]);
+
+   /* Get the offset */
+   nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+   if (const_offset) {
+      offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+   } else {
+      offset = vgrf(glsl_type::uint_type);
+      bld.ADD(offset,
+              retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
+              brw_imm_ud(instr->const_index[0]));
+   }
+
+   /* Emit the actual atomic operation operation */
+
+   fs_reg atomic_result = emit_untyped_atomic_float(bld, surface, offset,
+                                                    data1, data2,
+                                                    1 /* dims */, 1 /* rsize */,
+                                                    op,
+                                                    BRW_PREDICATE_NONE);
+   dest.type = atomic_result.type;
+   bld.MOV(dest, atomic_result);
+}
+
 void
 fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
 {
diff --git a/src/intel/compiler/brw_fs_surface_builder.cpp b/src/intel/compiler/brw_fs_surface_builder.cpp
index 1d147747456..0b8418ca736 100644
--- a/src/intel/compiler/brw_fs_surface_builder.cpp
+++ b/src/intel/compiler/brw_fs_surface_builder.cpp
@@ -110,6 +110,30 @@ namespace brw {
                           addr, tmp, surface, dims, op, rsize, pred);
       }
 
+      /**
+       * Emit an untyped surface atomic float opcode.  \p dims determines the
+       * number of components of the address and \p rsize the number of
+       * components of the returned value (either zero or one).
+       */
+      fs_reg
+      emit_untyped_atomic_float(const fs_builder &bld,
+                                const fs_reg &surface, const fs_reg &addr,
+                                const fs_reg &src0, const fs_reg &src1,
+                                unsigned dims, unsigned rsize, unsigned op,
+                                brw_predicate pred)
+      {
+         /* FINISHME: Factor out this frequently recurring pattern into a
+          * helper function.
+          */
+         const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+         const fs_reg srcs[] = { src0, src1 };
+         const fs_reg tmp = bld.vgrf(src0.type, n);
+         bld.LOAD_PAYLOAD(tmp, srcs, n, 0);
+
+         return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
+                          addr, tmp, surface, dims, op, rsize, pred);
+      }
+
       /**
        * Emit a typed surface read opcode.  \p dims determines the number of
        * components of the address and \p size the number of components of the
diff --git a/src/intel/compiler/brw_fs_surface_builder.h b/src/intel/compiler/brw_fs_surface_builder.h
index f0048220d5d..6952df64286 100644
--- a/src/intel/compiler/brw_fs_surface_builder.h
+++ b/src/intel/compiler/brw_fs_surface_builder.h
@@ -48,6 +48,13 @@ namespace brw {
                           unsigned dims, unsigned rsize, unsigned op,
                           brw_predicate pred = BRW_PREDICATE_NONE);
 
+      fs_reg
+      emit_untyped_atomic_float(const fs_builder &bld,
+                                const fs_reg &surface, const fs_reg &addr,
+                                const fs_reg &src0, const fs_reg &src1,
+                                unsigned dims, unsigned rsize, unsigned op,
+                                brw_predicate pred);
+
       fs_reg
       emit_typed_read(const fs_builder &bld, const fs_reg &surface,
                       const fs_reg &addr, unsigned dims, unsigned size);
diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h
index 8663c1b7f5b..8c19e330b2b 100644
--- a/src/intel/compiler/brw_inst.h
+++ b/src/intel/compiler/brw_inst.h
@@ -629,7 +629,7 @@ FF(dp_msg_type,
    -1, -1, -1, -1, -1, -1,
    /* 6:   */ MD(16), MD(13),
    /* 7:   */ MD(17), MD(14),
-   /* 8:   */ MD(17), MD(14))
+   /* 8:   */ MD(18), MD(14))
 FF(dp_msg_control,
    /* 4:   */ MD(11), MD( 8),
    /* 4.5-5: use dp_read_msg_control or dp_write_msg_control */ -1, -1, -1, -1,
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp
index f817142a8b5..f29671859cb 100644
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -369,6 +369,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
       break;
 
    case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
    case SHADER_OPCODE_TYPED_ATOMIC:
       /* Test code:
        *   mov(8)    g112<1>ud       0x00000000ud       { align1 WE_all 1Q };
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp
index b7fb06ddbd9..8ac8a20aab5 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -274,6 +274,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
       return "untyped_atomic";
    case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
       return "untyped_atomic_logical";
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
+      return "untyped_atomic_float";
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+      return "untyped_atomic_float_logical";
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
       return "untyped_surface_read";
    case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
@@ -996,6 +1000,8 @@ backend_instruction::has_side_effects() const
    switch (opcode) {
    case SHADER_OPCODE_UNTYPED_ATOMIC:
    case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
-- 
2.14.4



More information about the mesa-dev mailing list