[Mesa-dev] [PATCH 4/6] i965/vec4: add support for doing DF register spilling on IVB

Thu Jun 15 11:15:08 UTC 2017

Both spill/unspill process assume that both lower simd width
and DF scalarization were previously done.

* Spilling process does the following:

  1) Reads the existing content from the scratch memory that
     corresponds to the vertex (use inst->group to know if we
     are going to write data to the first or the second vertex).
     As it is already scalarized, we don't want to modify existing
     data of other components. We only read one GRF content as we are
     not going to modify the other (exec_size = 4).
  2) Overwrite the component the spilled instruction writes to.
  3) Do a scratch write to save the updated content of the respective
     vertex to scratch memory.

* Unspilling is implemented as several scratch reads when we find
  the first instruction whose sources were spilled.
  These scratch read get the content of the DF data for both vertices
  because we want to have DF data in two consecutive GRFs, even when
  this first instruction only reads one (exec_size = 4). Then, it is
  not needed to do more unspills until we write new content to the
  scratch memory, so we just need to update the register number in
  the affected sources of the following instructions.

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---
 src/intel/compiler/brw_vec4.cpp         |  3 ++
 src/intel/compiler/brw_vec4_visitor.cpp | 69 +++++++++++++++++++++++++++++----
 2 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index b6d409eea2..e25316d0b4 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -343,6 +343,9 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
    case SHADER_OPCODE_GEN4_SCRATCH_READ:
       return 2;
    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
+      if (devinfo->gen == 7 && !devinfo->is_haswell &&
+          type_sz(inst->dst.type) == 8)
+         return 2;
       return 3;
    case GS_OPCODE_URB_WRITE:
    case GS_OPCODE_URB_WRITE_ALLOCATE:
diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp
index 37ae31c0d5..0d5ad4d8f8 100644
--- a/src/intel/compiler/brw_vec4_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_visitor.cpp
@@ -254,11 +254,13 @@ vec4_instruction *
 vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
 {
    vec4_instruction *inst;
+   bool is_df_ivb = devinfo->gen == 7 && !devinfo->is_haswell &&
+      type_sz(dst.type) == 8;
 
    inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
 					dst, index);
    inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
-   inst->mlen = 2;
+   inst->mlen = is_df_ivb ? 1 : 2;
 
    return inst;
 }
@@ -286,11 +288,13 @@ vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
                             const src_reg &index)
 {
    vec4_instruction *inst;
+   bool is_df_ivb = devinfo->gen == 7 && !devinfo->is_haswell &&
+      type_sz(src.type) == 8;
 
    inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
 					dst, src, index);
    inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen);
-   inst->mlen = 3;
+   inst->mlen = is_df_ivb ? 2 : 3;
 
    return inst;
 }
@@ -1527,8 +1531,8 @@ vec4_visitor::emit_1grf_df_ivb_scratch_read(bblock_t *block,
  */
 void
 vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst,
-				dst_reg temp, src_reg orig_src,
-				int base_offset)
+                                dst_reg temp, src_reg orig_src,
+                                int base_offset)
 {
    assert(orig_src.offset % REG_SIZE == 0);
    int reg_offset = base_offset + orig_src.offset / REG_SIZE;
@@ -1537,6 +1541,19 @@ vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst,
 
    if (type_sz(orig_src.type) < 8) {
       emit_before(block, inst, SCRATCH_READ(temp, index));
+   } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
+              type_sz(temp.type) == 8) {
+      /* Set the offset to the base offset because we address the base GRF of
+       * the DF. We will take into account the second GRF in the scratch write emission.
+       */
+      if (inst->group == 4)
+         reg_offset = base_offset;
+      temp.offset = 0;
+      vec4_instruction *read = SCRATCH_READ(temp, index);
+      read->exec_size = 4;
+      read->offset = reg_offset;
+      read->size_written = 2;
+      emit_before(block, inst, read);
    } else {
       dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type);
       dst_reg shuffled_float = retype(shuffled, BRW_REGISTER_TYPE_F);
@@ -1574,9 +1591,11 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
    bool is_64bit = type_sz(inst->dst.type) == 8;
    const glsl_type *alloc_type =
       is_64bit ? glsl_type::dvec4_type : glsl_type::vec4_type;
-   const src_reg temp = swizzle(retype(src_reg(this, alloc_type),
-                                       inst->dst.type),
-                                brw_swizzle_for_mask(inst->dst.writemask));
+   src_reg temp;
+
+   temp = swizzle(retype(src_reg(this, alloc_type),
+                         inst->dst.type),
+                  brw_swizzle_for_mask(inst->dst.writemask));
 
    if (!is_64bit) {
       dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
@@ -1587,6 +1606,42 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
       write->ir = inst->ir;
       write->annotation = inst->annotation;
       inst->insert_after(block, write);
+   } else if (is_64bit && devinfo->gen == 7 && !devinfo->is_haswell) {
+      /* Set the offset to the base offset because we address the base GRF of
+       * the DF. We will take into account the second GRF in the scratch write emission.
+       */
+      if (inst->group == 4)
+         reg_offset = base_offset;
+
+      dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
+                                          inst->dst.writemask));
+
+      /* As scratch write/read for this case is implemented with align1
+       * instructions, we are going to unspill existing content, overwrite it
+       * taking into account the writemask of the original instruction and
+       * spill it again.
+       */
+
+      src_reg saved_value = src_reg(this, glsl_type::dvec4_type);
+      saved_value.swizzle = brw_swizzle_for_mask(inst->dst.writemask);
+      vec4_instruction *mov = MOV(dst_reg(saved_value), temp);
+      mov->group = inst->group;
+      mov->exec_size = inst->exec_size;
+      mov->size_written = 1;
+      inst->insert_after(block, mov);
+      emit_1grf_df_ivb_scratch_read(block, mov, dst_reg(saved_value),
+                                    temp, base_offset, inst->group == 0);
+
+      vec4_instruction *write = SCRATCH_WRITE(dst, saved_value, index);
+      if (inst->opcode != BRW_OPCODE_SEL)
+         write->predicate = inst->predicate;
+      write->exec_size = inst->exec_size;
+      write->group = inst->group;
+      write->offset = reg_offset;
+
+      write->ir = inst->ir;
+      write->annotation = inst->annotation;
+      mov->insert_after(block, write);
    } else {
       dst_reg shuffled = dst_reg(this, alloc_type);
       vec4_instruction *last =
-- 
2.11.0