[Mesa-dev] [PATCH v3] i965: Handle scratch accesses where reladdr also points to scratch space

Iago Toral Quiroga itoral at igalia.com
Thu Mar 26 10:46:33 PDT 2015


This is a problem when we have IR like this:

(array_ref (var_ref temps) (swiz x (expression ivec4 bitcast_f2i
   (swiz xxxx (array_ref (var_ref temps) (constant int (2)) ) )) )) ) )

where we are indexing an array with the result of an expression that
accesses the same array.

In this scenario, temps will be moved to scratch space and we will need
to add scratch reads/writes for all accesses to temps, however, the
current implementation does not consider the case where a reladdr pointer
(obtained by indexing into temps trough a expression) points to a register
that is also stored in scratch space (as in this case, where the expression
used to index temps access temps[2]), and thus, requires a scratch read
before it is accessed.

v2 (Francisco Jerez):
 - Handle also recursive reladdr addressing.
 - Do not memcpy dst_reg into src_reg when rewriting reladdr.

v3 (Francisco Jerez):
 - Reduce complexity by moving recursive reladdr scratch access handling
   to a separate recursive function.
 - Do not skip demoting reladdr index registers to scratch space if the
   top level GRF has already been visited.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89508
---
 src/mesa/drivers/dri/i965/brw_vec4.h           |   2 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 116 ++++++++++++++++++++-----
 2 files changed, 94 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 33297ae..6ec00d5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -367,6 +367,8 @@ public:
 				dst_reg dst,
 				src_reg orig_src,
 				int base_offset);
+   src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
+                                vec4_instruction *inst, src_reg src);
 
    bool try_emit_mad(ir_expression *ir);
    bool try_emit_b2f_of_compare(ir_expression *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 26a3b9f..8e256f9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -3352,6 +3352,48 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
 }
 
 /**
+ * Checks if @src and/or src.reladdr require a scratch read, and if so,
+ * adds the scratch read(s) before @inst. The function also checks for
+ * recursive reladdr scratch accesses, issuing the corresponding scratch
+ * loads and rewriting reladdr references accordingly.
+ *
+ * @return @src if it did not require a scratch load, otherwise, the
+ * register holding the result of the scratch load that the caller should
+ * use to rewrite src.
+ */
+src_reg
+vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
+                                   vec4_instruction *inst, src_reg src)
+{
+   /* First, deal with reladdr scratch accesses if needed */
+   if (src.reladdr && src.reladdr->file == GRF &&
+       scratch_loc[src.reladdr->reg] != -1) {
+      /* Resolve recursive reladdr scratch access by calling ourselves
+       * with src.reladdr
+       */
+      src_reg new_reladdr = emit_resolve_reladdr(scratch_loc, block,
+                                                 inst, *src.reladdr);
+      /* Rewrite our reladdr with the register holding the result of
+       * our sec.reladdr scratch load
+       */
+      src.reladdr->file = new_reladdr.file;
+      src.reladdr->reg = new_reladdr.reg;
+      src.reladdr->reg_offset = new_reladdr.reg_offset;
+      src.reladdr->reladdr = NULL;
+   }
+
+   /* Now handle scratch access on src */
+   if (src.file == GRF && scratch_loc[src.reg] != -1) {
+      dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+      emit_scratch_read(block, inst, temp, src,
+                        scratch_loc[src.reg]);
+      return src_reg(temp);
+   } else {
+      return src;
+   }
+}
+
+/**
  * We can't generally support array access in GRF space, because a
  * single instruction's destination can only span 2 contiguous
  * registers.  So, we send all GRF arrays that get variable index
@@ -3368,20 +3410,29 @@ vec4_visitor::move_grf_array_access_to_scratch()
     * scratch.
     */
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == GRF && inst->dst.reladdr &&
-	  scratch_loc[inst->dst.reg] == -1) {
-	 scratch_loc[inst->dst.reg] = c->last_scratch;
-	 c->last_scratch += this->alloc.sizes[inst->dst.reg];
+      if (inst->dst.file == GRF && inst->dst.reladdr) {
+         if (scratch_loc[inst->dst.reg] == -1) {
+            scratch_loc[inst->dst.reg] = c->last_scratch;
+            c->last_scratch += this->alloc.sizes[inst->dst.reg];
+         }
+
+         src_reg *iter = inst->dst.reladdr;
+         for (; iter->reladdr; iter = iter->reladdr) {
+            if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
+               scratch_loc[iter->reg] = c->last_scratch;
+               c->last_scratch += this->alloc.sizes[iter->reg];
+            }
+         }
       }
 
       for (int i = 0 ; i < 3; i++) {
-	 src_reg *src = &inst->src[i];
-
-	 if (src->file == GRF && src->reladdr &&
-	     scratch_loc[src->reg] == -1) {
-	    scratch_loc[src->reg] = c->last_scratch;
-	    c->last_scratch += this->alloc.sizes[src->reg];
-	 }
+         src_reg *iter = &inst->src[i];
+         for (; iter->reladdr; iter = iter->reladdr) {
+            if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
+               scratch_loc[iter->reg] = c->last_scratch;
+               c->last_scratch += this->alloc.sizes[iter->reg];
+            }
+         }
       }
    }
 
@@ -3395,23 +3446,40 @@ vec4_visitor::move_grf_array_access_to_scratch()
       base_ir = inst->ir;
       current_annotation = inst->annotation;
 
+      /* First handle scratch access on the dst. Notice we have to handle
+       * the case where the dst's reladdr also points to scratch space.
+       */
+      if (inst->dst.reladdr) {
+         src_reg temp = emit_resolve_reladdr(scratch_loc, block, inst,
+                                             *inst->dst.reladdr);
+         inst->dst.reladdr->file = temp.file;
+         inst->dst.reladdr->reg = temp.reg;
+         inst->dst.reladdr->reg_offset = temp.reg_offset;
+         inst->dst.reladdr->reladdr = NULL;
+      }
+
       if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
-	 emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
+         /* Now that we have handled any (possibly recursive) reladdr scratch
+          * accesses for dst we can safely do the scratch write for dst itself
+          */
+         emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
       }
 
+      /* Now handle scratch access on any src. In this case, since inst->src[i]
+       * already is a src_reg, we can just call emit_resolve_reladdr with
+       * inst->src[i] and it will take care of handling scratch loads for
+       * both src and src.reladdr (recursively).
+       */
       for (int i = 0 ; i < 3; i++) {
-	 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
-	    continue;
-
-	 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
-
-	 emit_scratch_read(block, inst, temp, inst->src[i],
-			   scratch_loc[inst->src[i].reg]);
-
-	 inst->src[i].file = temp.file;
-	 inst->src[i].reg = temp.reg;
-	 inst->src[i].reg_offset = temp.reg_offset;
-	 inst->src[i].reladdr = NULL;
+         if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
+            continue;
+
+         src_reg temp = emit_resolve_reladdr(scratch_loc, block, inst,
+                                             inst->src[i]);
+         inst->src[i].file = temp.file;
+         inst->src[i].reg = temp.reg;
+         inst->src[i].reg_offset = temp.reg_offset;
+         inst->src[i].reladdr = NULL;
       }
    }
 }
-- 
1.9.1



More information about the mesa-dev mailing list