Mesa (master): i965: Add SIMD8 URB write low-level IR instruction

Kristian Høgsberg krh at kemper.freedesktop.org
Wed Dec 10 20:45:05 UTC 2014


Module: Mesa
Branch: master
Commit: d9e29f5d88d2ddd8ee9d10b7d88377a60fd0094f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d9e29f5d88d2ddd8ee9d10b7d88377a60fd0094f

Author: Kristian Høgsberg <krh at bitplanet.net>
Date:   Mon Oct 20 23:00:50 2014 -0700

i965: Add SIMD8 URB write low-level IR instruction

This is all we need from the generator for SIMD8 vertex shaders.  This
opcode is just the send instruction, all the hard work will happen
in the visitor using LOAD_PAYLOAD.

Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

---

 src/mesa/drivers/dri/i965/brw_defines.h           |    3 +++
 src/mesa/drivers/dri/i965/brw_fs.cpp              |    4 ++++
 src/mesa/drivers/dri/i965/brw_fs.h                |    1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp    |   25 +++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |   16 ++++++++++++-
 src/mesa/drivers/dri/i965/brw_shader.cpp          |    3 +++
 6 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index d4211496..843ef32 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -908,6 +908,8 @@ enum opcode {
    SHADER_OPCODE_GEN4_SCRATCH_WRITE,
    SHADER_OPCODE_GEN7_SCRATCH_READ,
 
+   SHADER_OPCODE_URB_WRITE_SIMD8,
+
    VEC4_OPCODE_PACK_BYTES,
    VEC4_OPCODE_UNPACK_UNIFORM,
 
@@ -1529,6 +1531,7 @@ enum brw_message_target {
 
 #define BRW_URB_OPCODE_WRITE_HWORD  0
 #define BRW_URB_OPCODE_WRITE_OWORD  1
+#define GEN8_URB_OPCODE_SIMD8_WRITE  7
 
 #define BRW_URB_SWIZZLE_NONE          0
 #define BRW_URB_SWIZZLE_INTERLEAVE    1
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a0fb565..9365b7a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -509,6 +509,7 @@ fs_inst::is_send_from_grf() const
    case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
    case SHADER_OPCODE_UNTYPED_ATOMIC:
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+   case SHADER_OPCODE_URB_WRITE_SIMD8:
       return true;
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
       return src[1].file == GRF;
@@ -898,6 +899,8 @@ fs_inst::regs_read(fs_visitor *v, int arg) const
       return mlen;
    } else if (opcode == FS_OPCODE_FB_WRITE && arg == 0) {
       return mlen;
+   } else if (opcode == SHADER_OPCODE_URB_WRITE_SIMD8 && arg == 0) {
+      return mlen;
    } else if (opcode == SHADER_OPCODE_UNTYPED_ATOMIC && arg == 0) {
       return mlen;
    } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ && arg == 0) {
@@ -992,6 +995,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
       return 2;
    case SHADER_OPCODE_UNTYPED_ATOMIC:
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+   case SHADER_OPCODE_URB_WRITE_SIMD8:
    case FS_OPCODE_INTERPOLATE_AT_CENTROID:
    case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
    case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index f0e6f25..d252caf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -705,6 +705,7 @@ private:
                       struct brw_reg implied_header,
                       GLuint nr);
    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
+   void generate_urb_write(fs_inst *inst, struct brw_reg payload);
    void generate_blorp_fb_write(fs_inst *inst);
    void generate_pixel_xy(struct brw_reg dst, bool is_x);
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 95bdfc1..c131db2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -336,6 +336,27 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
 }
 
 void
+fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
+{
+   brw_inst *insn;
+
+   insn = brw_next_insn(p, BRW_OPCODE_SEND);
+
+   brw_set_dest(p, insn, brw_null_reg());
+   brw_set_src0(p, insn, payload);
+   brw_set_src1(p, insn, brw_imm_d(0));
+
+   brw_inst_set_sfid(brw, insn, BRW_SFID_URB);
+   brw_inst_set_urb_opcode(brw, insn, GEN8_URB_OPCODE_SIMD8_WRITE);
+
+   brw_inst_set_mlen(brw, insn, inst->mlen);
+   brw_inst_set_rlen(brw, insn, 0);
+   brw_inst_set_eot(brw, insn, inst->eot);
+   brw_inst_set_header_present(brw, insn, true);
+   brw_inst_set_urb_global_offset(brw, insn, inst->offset);
+}
+
+void
 fs_generator::generate_blorp_fb_write(fs_inst *inst)
 {
    brw_fb_WRITE(p,
@@ -1887,6 +1908,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
 	 generate_scratch_read_gen7(inst, dst);
 	 break;
 
+      case SHADER_OPCODE_URB_WRITE_SIMD8:
+	 generate_urb_write(inst, src[0]);
+	 break;
+
       case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
 	 generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 0813f64..eba2fdd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -385,6 +385,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
 
       /* Special case instructions which have extra implied registers used. */
       switch (inst->opcode) {
+      case SHADER_OPCODE_URB_WRITE_SIMD8:
       case FS_OPCODE_FB_WRITE:
          /* We could omit this for the !inst->header_present case, except that
           * the simulator apparently incorrectly reads from g0/g1 instead of
@@ -522,6 +523,19 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
    }
 }
 
+static bool
+is_last_send(fs_inst *inst)
+{
+   switch (inst->opcode) {
+   case SHADER_OPCODE_URB_WRITE_SIMD8:
+   case FS_OPCODE_FB_WRITE:
+      return inst->eot;
+   default:
+      assert(!inst->eot);
+      return false;
+   }
+}
+
 bool
 fs_visitor::assign_regs(bool allow_spilling)
 {
@@ -594,7 +608,7 @@ fs_visitor::assign_regs(bool allow_spilling)
           * We could just do "something high".  Instead, we just pick the
           * highest register that works.
           */
-         if (inst->opcode == FS_OPCODE_FB_WRITE && inst->eot) {
+         if (is_last_send(inst)) {
             int size = virtual_grf_sizes[inst->src[0].reg];
             int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
             ra_set_node_reg(g, inst->src[0].reg, reg);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 56aa757..b3b1ad7 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -453,6 +453,8 @@ brw_instruction_name(enum opcode op)
       return "gen4_scratch_write";
    case SHADER_OPCODE_GEN7_SCRATCH_READ:
       return "gen7_scratch_read";
+   case SHADER_OPCODE_URB_WRITE_SIMD8:
+      return "gen8_urb_write_simd8";
 
    case VEC4_OPCODE_PACK_BYTES:
       return "pack_bytes";
@@ -754,6 +756,7 @@ backend_instruction::has_side_effects() const
 {
    switch (opcode) {
    case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_URB_WRITE_SIMD8:
    case FS_OPCODE_FB_WRITE:
       return true;
    default:




More information about the mesa-commit mailing list