[Mesa-dev] [PATCH v2 5/5] i965/vec4: allow partial DF register spilling
Samuel Iglesias Gonsálvez
siglesias at igalia.com
Wed Jul 19 13:51:13 UTC 2017
v2:
- Enable spilling for partial DF reads/writes on HSW+
Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---
src/intel/compiler/brw_vec4_reg_allocate.cpp | 54 ++++++++++++++++++++--------
1 file changed, 40 insertions(+), 14 deletions(-)
diff --git a/src/intel/compiler/brw_vec4_reg_allocate.cpp b/src/intel/compiler/brw_vec4_reg_allocate.cpp
index a6f1070ebd..3ad18b12bb 100644
--- a/src/intel/compiler/brw_vec4_reg_allocate.cpp
+++ b/src/intel/compiler/brw_vec4_reg_allocate.cpp
@@ -411,17 +411,21 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
spill_costs[inst->src[i].nr] +=
loop_scale * spill_cost_for_type(inst->src[i].type);
if (inst->src[i].reladdr ||
- inst->src[i].offset >= REG_SIZE)
+ (inst->src[i].offset >= REG_SIZE &&
+ (type_sz(inst->src[i].type) != 8 ||
+ !(inst->src[i].offset == 32 && inst->group == 4))))
no_spill[inst->src[i].nr] = true;
- /* We don't support unspills of partial DF reads.
+ /* For execsize == 8, our 64-bit unspills are implemented with
+ * two 32-bit scratch messages, each one reading that for both
+ * SIMD4x2 threads that we need to shuffle into correct 64-bit
+ * data. Ensure that we are reading data for both threads.
*
- * Our 64-bit unspills are implemented with two 32-bit scratch
- * messages, each one reading that for both SIMD4x2 threads that
- * we need to shuffle into correct 64-bit data. Ensure that we
- * are reading data for both threads.
+ * For execsize == 4, it is similar but using 1-Oword block
+ * read messages and we don't need to shuffle data.
*/
- if (type_sz(inst->src[i].type) == 8 && inst->exec_size != 8)
+ if (type_sz(inst->src[i].type) == 8 &&
+ inst->exec_size != 8 && inst->exec_size != 4)
no_spill[inst->src[i].nr] = true;
}
@@ -439,16 +443,21 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
if (inst->dst.file == VGRF && !no_spill[inst->dst.nr]) {
spill_costs[inst->dst.nr] +=
loop_scale * spill_cost_for_type(inst->dst.type);
- if (inst->dst.reladdr || inst->dst.offset >= REG_SIZE)
+ if (inst->dst.reladdr ||
+ (inst->dst.offset >= REG_SIZE &&
+ (type_sz(inst->dst.type) != 8 ||
+ !(inst->dst.offset == 32 && inst->group == 4))))
no_spill[inst->dst.nr] = true;
- /* We don't support spills of partial DF writes.
+ /* For execsize == 8, our 64-bit spills are implemented with two
+ * 32-bit scratch messages, each one writing that for both SIMD4x2
+ * threads. Ensure that we are writing data for both threads.
*
- * Our 64-bit spills are implemented with two 32-bit scratch messages,
- * each one writing that for both SIMD4x2 threads. Ensure that we
- * are writing data for both threads.
+ * For execsize == 4, it is similar but using 1-Oword block
+ * write messages.
*/
- if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8)
+ if (type_sz(inst->dst.type) == 8 &&
+ inst->exec_size != 8 && inst->exec_size != 4)
no_spill[inst->dst.nr] = true;
/* We can't spill registers that mix 32-bit and 64-bit access (that
@@ -514,11 +523,25 @@ vec4_visitor::spill_reg(int spill_reg_nr)
/* Generate spill/unspill instructions for the objects being spilled. */
int scratch_reg = -1;
+ bool do_partial_df_scratch_read = false;
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == VGRF && inst->src[i].nr == spill_reg_nr) {
+ /* DF scratch reads are not actual partial reads because we are
+ * going to read both GRFs in the first read instruction.
+ * Because of that, we will skip scratch read of the other splitted
+ * instruction (if any), as it can reuse the read value. We check
+ * the value of done_scratch_read to know if we need to do scratch
+ * read or not.
+ */
+ bool do_df_scratch_read = devinfo->gen >= 7 &&
+ type_sz(inst->src[i].type) == 8 &&
+ (inst->exec_size != 4 || do_partial_df_scratch_read);
+
if (scratch_reg == -1 ||
- !can_use_scratch_for_source(inst, i, scratch_reg, false)) {
+ (!can_use_scratch_for_source(inst, i, scratch_reg,
+ do_partial_df_scratch_read) &&
+ (do_df_scratch_read || type_sz(inst->src[i].type) != 8))) {
/* We need to unspill anyway so make sure we read the full vec4
* in any case. This way, the cached register can be reused
* for consecutive instructions that read different channels of
@@ -532,6 +555,7 @@ vec4_visitor::spill_reg(int spill_reg_nr)
emit_scratch_read(block, inst,
dst_reg(temp), inst->src[i], spill_offset, false);
temp.offset = inst->src[i].offset;
+ do_partial_df_scratch_read = false;
}
assert(scratch_reg != -1);
inst->src[i].nr = scratch_reg;
@@ -541,6 +565,8 @@ vec4_visitor::spill_reg(int spill_reg_nr)
if (inst->dst.file == VGRF && inst->dst.nr == spill_reg_nr) {
emit_scratch_write(block, inst, spill_offset, false);
scratch_reg = inst->dst.nr;
+ if (type_sz(inst->dst.type) == 8 && inst->exec_size == 4)
+ do_partial_df_scratch_read = true;
}
}
--
2.11.0
More information about the mesa-dev
mailing list