<div dir="ltr">ACK<br></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Wed, Apr 24, 2019 at 8:15 AM Juan A. Suarez Romero <<a href="mailto:jasuarez@igalia.com">jasuarez@igalia.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">This reverts commit 40b3abb4d16af4cef0307e1b4904c2ec0924299e.<br>
<br>
It is not clear that this commit was entirely correct, and unfortunately<br>
it was pushed by error.<br>
<br>
CC: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>><br>
---<br>
<br>
Jason, we agreed on leaving this out of the VK_KHR_shader_float16_int8<br>
patchset, but due a mistake I've pushed it with the other patches.<br>
<br>
So here is the revert.<br>
<br>
<br>
src/intel/compiler/brw_fs.cpp | 31 ++++---------------<br>
.../compiler/brw_fs_cmod_propagation.cpp | 20 ++++++------<br>
.../compiler/brw_fs_copy_propagation.cpp | 8 ++---<br>
src/intel/compiler/brw_fs_cse.cpp | 3 +-<br>
.../compiler/brw_fs_dead_code_eliminate.cpp | 2 +-<br>
src/intel/compiler/brw_fs_live_variables.cpp | 2 +-<br>
src/intel/compiler/brw_fs_reg_allocate.cpp | 2 +-<br>
.../compiler/brw_fs_register_coalesce.cpp | 2 +-<br>
.../compiler/brw_fs_saturate_propagation.cpp | 7 ++---<br>
src/intel/compiler/brw_fs_sel_peephole.cpp | 4 +--<br>
src/intel/compiler/brw_ir_fs.h | 3 +-<br>
11 files changed, 30 insertions(+), 54 deletions(-)<br>
<br>
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp<br>
index 22eefd4ef49..335eaa0e934 100644<br>
--- a/src/intel/compiler/brw_fs.cpp<br>
+++ b/src/intel/compiler/brw_fs.cpp<br>
@@ -734,33 +734,14 @@ fs_visitor::limit_dispatch_width(unsigned n, const char *msg)<br>
* it.<br>
*/<br>
bool<br>
-fs_inst::is_partial_reg_write() const<br>
+fs_inst::is_partial_write() const<br>
{<br>
return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||<br>
+ (this->exec_size * type_sz(this->dst.type)) < 32 ||<br>
!this->dst.is_contiguous() ||<br>
- (this->exec_size * type_sz(this->dst.type)) < REG_SIZE ||<br>
this->dst.offset % REG_SIZE != 0);<br>
}<br>
<br>
-/**<br>
- * Returns true if the instruction has a flag that means it won't<br>
- * update an entire variable for the given dispatch width.<br>
- *<br>
- * This is only different from is_partial_reg_write() for SIMD8<br>
- * dispatches of 16-bit (or smaller) instructions.<br>
- */<br>
-bool<br>
-fs_inst::is_partial_var_write(uint32_t dispatch_width) const<br>
-{<br>
- const uint32_t type_size = type_sz(this->dst.type);<br>
- uint32_t var_size = MIN2(REG_SIZE, dispatch_width * type_size);<br>
-<br>
- return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||<br>
- !this->dst.is_contiguous() ||<br>
- (this->exec_size * type_sz(this->dst.type)) < var_size ||<br>
- this->dst.offset % var_size != 0);<br>
-}<br>
-<br>
unsigned<br>
fs_inst::components_read(unsigned i) const<br>
{<br>
@@ -2943,7 +2924,7 @@ fs_visitor::opt_register_renaming()<br>
if (depth == 0 &&<br>
inst->dst.file == VGRF &&<br>
alloc.sizes[inst-><a href="http://dst.nr" rel="noreferrer" target="_blank">dst.nr</a>] * REG_SIZE == inst->size_written &&<br>
- !inst->is_partial_reg_write()) {<br>
+ !inst->is_partial_write()) {<br>
if (remap[dst] == ~0u) {<br>
remap[dst] = dst;<br>
} else {<br>
@@ -3147,7 +3128,7 @@ fs_visitor::compute_to_mrf()<br>
next_ip++;<br>
<br>
if (inst->opcode != BRW_OPCODE_MOV ||<br>
- inst->is_partial_reg_write() ||<br>
+ inst->is_partial_write() ||<br>
inst->dst.file != MRF || inst->src[0].file != VGRF ||<br>
inst->dst.type != inst->src[0].type ||<br>
inst->src[0].abs || inst->src[0].negate ||<br>
@@ -3180,7 +3161,7 @@ fs_visitor::compute_to_mrf()<br>
* that writes that reg, but it would require smarter<br>
* tracking.<br>
*/<br>
- if (scan_inst->is_partial_reg_write())<br>
+ if (scan_inst->is_partial_write())<br>
break;<br>
<br>
/* Handling things not fully contained in the source of the copy<br>
@@ -3498,7 +3479,7 @@ fs_visitor::remove_duplicate_mrf_writes()<br>
if (inst->opcode == BRW_OPCODE_MOV &&<br>
inst->dst.file == MRF &&<br>
inst->src[0].file != ARF &&<br>
- !inst->is_partial_reg_write()) {<br>
+ !inst->is_partial_write()) {<br>
last_mrf_move[inst-><a href="http://dst.nr" rel="noreferrer" target="_blank">dst.nr</a>] = inst;<br>
}<br>
}<br>
diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp<br>
index a2c11e0959c..c9725263929 100644<br>
--- a/src/intel/compiler/brw_fs_cmod_propagation.cpp<br>
+++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp<br>
@@ -50,13 +50,13 @@<br>
<br>
static bool<br>
cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block,<br>
- fs_inst *inst, unsigned dispatch_width)<br>
+ fs_inst *inst)<br>
{<br>
bool read_flag = false;<br>
<br>
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {<br>
if (scan_inst->opcode == BRW_OPCODE_ADD &&<br>
- !scan_inst->is_partial_var_write(dispatch_width) &&<br>
+ !scan_inst->is_partial_write() &&<br>
scan_inst->exec_size == inst->exec_size) {<br>
bool negate;<br>
<br>
@@ -126,7 +126,7 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block,<br>
*/<br>
static bool<br>
cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block,<br>
- fs_inst *inst, unsigned dispatch_width)<br>
+ fs_inst *inst)<br>
{<br>
const enum brw_conditional_mod cond = brw_negate_cmod(inst->conditional_mod);<br>
bool read_flag = false;<br>
@@ -141,7 +141,7 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block,<br>
scan_inst->opcode != BRW_OPCODE_AND)<br>
break;<br>
<br>
- if (scan_inst->is_partial_var_write(dispatch_width) ||<br>
+ if (scan_inst->is_partial_write() ||<br>
scan_inst->dst.offset != inst->src[0].offset ||<br>
scan_inst->exec_size != inst->exec_size)<br>
break;<br>
@@ -166,9 +166,7 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block,<br>
}<br>
<br>
static bool<br>
-opt_cmod_propagation_local(const gen_device_info *devinfo,<br>
- bblock_t *block,<br>
- unsigned dispatch_width)<br>
+opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)<br>
{<br>
bool progress = false;<br>
int ip = block->end_ip + 1;<br>
@@ -221,14 +219,14 @@ opt_cmod_propagation_local(const gen_device_info *devinfo,<br>
*/<br>
if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) {<br>
if (brw_reg_type_is_floating_point(inst->src[0].type) &&<br>
- cmod_propagate_cmp_to_add(devinfo, block, inst, dispatch_width))<br>
+ cmod_propagate_cmp_to_add(devinfo, block, inst))<br>
progress = true;<br>
<br>
continue;<br>
}<br>
<br>
if (inst->opcode == BRW_OPCODE_NOT) {<br>
- progress = cmod_propagate_not(devinfo, block, inst, dispatch_width) || progress;<br>
+ progress = cmod_propagate_not(devinfo, block, inst) || progress;<br>
continue;<br>
}<br>
<br>
@@ -236,7 +234,7 @@ opt_cmod_propagation_local(const gen_device_info *devinfo,<br>
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {<br>
if (regions_overlap(scan_inst->dst, scan_inst->size_written,<br>
inst->src[0], inst->size_read(0))) {<br>
- if (scan_inst->is_partial_var_write(dispatch_width) ||<br>
+ if (scan_inst->is_partial_write() ||<br>
scan_inst->dst.offset != inst->src[0].offset ||<br>
scan_inst->exec_size != inst->exec_size)<br>
break;<br>
@@ -371,7 +369,7 @@ fs_visitor::opt_cmod_propagation()<br>
bool progress = false;<br>
<br>
foreach_block_reverse(block, cfg) {<br>
- progress = opt_cmod_propagation_local(devinfo, block, dispatch_width) || progress;<br>
+ progress = opt_cmod_propagation_local(devinfo, block) || progress;<br>
}<br>
<br>
if (progress)<br>
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp<br>
index 1670eedce77..1f4e122e6c9 100644<br>
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp<br>
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp<br>
@@ -515,7 +515,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)<br>
/* Compute the first component of the copy that the instruction is<br>
* reading, and the base byte offset within that component.<br>
*/<br>
- assert(entry->dst.stride == 1);<br>
+ assert(entry->dst.offset % REG_SIZE == 0 && entry->dst.stride == 1);<br>
const unsigned component = rel_offset / type_sz(entry->dst.type);<br>
const unsigned suboffset = rel_offset % type_sz(entry->dst.type);<br>
<br>
@@ -767,7 +767,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)<br>
}<br>
<br>
static bool<br>
-can_propagate_from(fs_inst *inst, unsigned dispatch_width)<br>
+can_propagate_from(fs_inst *inst)<br>
{<br>
return (inst->opcode == BRW_OPCODE_MOV &&<br>
inst->dst.file == VGRF &&<br>
@@ -778,7 +778,7 @@ can_propagate_from(fs_inst *inst, unsigned dispatch_width)<br>
inst->src[0].file == UNIFORM ||<br>
inst->src[0].file == IMM) &&<br>
inst->src[0].type == inst->dst.type &&<br>
- !inst->is_partial_var_write(dispatch_width));<br>
+ !inst->is_partial_write());<br>
}<br>
<br>
/* Walks a basic block and does copy propagation on it using the acp<br>
@@ -830,7 +830,7 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block,<br>
/* If this instruction's source could potentially be folded into the<br>
* operand of another instruction, add it to the ACP.<br>
*/<br>
- if (can_propagate_from(inst, dispatch_width)) {<br>
+ if (can_propagate_from(inst)) {<br>
acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);<br>
entry->dst = inst->dst;<br>
entry->src = inst->src[0];<br>
diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp<br>
index e955c35e145..6efa111b1a4 100644<br>
--- a/src/intel/compiler/brw_fs_cse.cpp<br>
+++ b/src/intel/compiler/brw_fs_cse.cpp<br>
@@ -252,8 +252,7 @@ fs_visitor::opt_cse_local(bblock_t *block)<br>
int ip = block->start_ip;<br>
foreach_inst_in_block(fs_inst, inst, block) {<br>
/* Skip some cases. */<br>
- if (is_expression(this, inst) &&<br>
- !inst->is_partial_var_write(dispatch_width) &&<br>
+ if (is_expression(this, inst) && !inst->is_partial_write() &&<br>
((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) ||<br>
inst->dst.is_null()))<br>
{<br>
diff --git a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp<br>
index 7c60a33eb58..38ae1d41a6a 100644<br>
--- a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp<br>
+++ b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp<br>
@@ -107,7 +107,7 @@ fs_visitor::dead_code_eliminate()<br>
}<br>
<br>
if (inst->dst.file == VGRF) {<br>
- if (!inst->is_partial_reg_write()) {<br>
+ if (!inst->is_partial_write()) {<br>
int var = live_intervals->var_from_reg(inst->dst);<br>
for (unsigned i = 0; i < regs_written(inst); i++) {<br>
BITSET_CLEAR(live, var + i);<br>
diff --git a/src/intel/compiler/brw_fs_live_variables.cpp b/src/intel/compiler/brw_fs_live_variables.cpp<br>
index 30625aa586a..059f076fa51 100644<br>
--- a/src/intel/compiler/brw_fs_live_variables.cpp<br>
+++ b/src/intel/compiler/brw_fs_live_variables.cpp<br>
@@ -84,7 +84,7 @@ fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst,<br>
* screens off previous updates of that variable (VGRF channel).<br>
*/<br>
if (inst->dst.file == VGRF) {<br>
- if (!inst->is_partial_reg_write() && !BITSET_TEST(bd->use, var))<br>
+ if (!inst->is_partial_write() && !BITSET_TEST(bd->use, var))<br>
BITSET_SET(bd->def, var);<br>
<br>
BITSET_SET(bd->defout, var);<br>
diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp<br>
index d5c4f032182..17a9dc8e9c4 100644<br>
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp<br>
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp<br>
@@ -1066,7 +1066,7 @@ fs_visitor::spill_reg(unsigned spill_reg)<br>
* write, there should be no need for the unspill since the<br>
* instruction will be overwriting the whole destination in any case.<br>
*/<br>
- if (inst->is_partial_reg_write() ||<br>
+ if (inst->is_partial_write() ||<br>
(!inst->force_writemask_all && !per_channel))<br>
emit_unspill(ubld, spill_src, subset_spill_offset,<br>
regs_written(inst));<br>
diff --git a/src/intel/compiler/brw_fs_register_coalesce.cpp b/src/intel/compiler/brw_fs_register_coalesce.cpp<br>
index 27234292c30..4fe6773da54 100644<br>
--- a/src/intel/compiler/brw_fs_register_coalesce.cpp<br>
+++ b/src/intel/compiler/brw_fs_register_coalesce.cpp<br>
@@ -70,7 +70,7 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)<br>
{<br>
if ((inst->opcode != BRW_OPCODE_MOV &&<br>
inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD) ||<br>
- inst->is_partial_reg_write() ||<br>
+ inst->is_partial_write() ||<br>
inst->saturate ||<br>
inst->src[0].file != VGRF ||<br>
inst->src[0].negate ||<br>
diff --git a/src/intel/compiler/brw_fs_saturate_propagation.cpp b/src/intel/compiler/brw_fs_saturate_propagation.cpp<br>
index fe3fa7ecfea..8140de69749 100644<br>
--- a/src/intel/compiler/brw_fs_saturate_propagation.cpp<br>
+++ b/src/intel/compiler/brw_fs_saturate_propagation.cpp<br>
@@ -43,8 +43,7 @@<br>
*/<br>
<br>
static bool<br>
-opt_saturate_propagation_local(fs_visitor *v, bblock_t *block,<br>
- unsigned dispatch_width)<br>
+opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)<br>
{<br>
bool progress = false;<br>
int ip = block->end_ip + 1;<br>
@@ -68,7 +67,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block,<br>
if (scan_inst->exec_size == inst->exec_size &&<br>
regions_overlap(scan_inst->dst, scan_inst->size_written,<br>
inst->src[0], inst->size_read(0))) {<br>
- if (scan_inst->is_partial_var_write(dispatch_width) ||<br>
+ if (scan_inst->is_partial_write() ||<br>
(scan_inst->dst.type != inst->dst.type &&<br>
!scan_inst->can_change_types()))<br>
break;<br>
@@ -155,7 +154,7 @@ fs_visitor::opt_saturate_propagation()<br>
calculate_live_intervals();<br>
<br>
foreach_block (block, cfg) {<br>
- progress = opt_saturate_propagation_local(this, block, dispatch_width) || progress;<br>
+ progress = opt_saturate_propagation_local(this, block) || progress;<br>
}<br>
<br>
/* Live intervals are still valid. */<br>
diff --git a/src/intel/compiler/brw_fs_sel_peephole.cpp b/src/intel/compiler/brw_fs_sel_peephole.cpp<br>
index 98d640a3bfe..6395b409b7c 100644<br>
--- a/src/intel/compiler/brw_fs_sel_peephole.cpp<br>
+++ b/src/intel/compiler/brw_fs_sel_peephole.cpp<br>
@@ -167,8 +167,8 @@ fs_visitor::opt_peephole_sel()<br>
then_mov[i]->exec_size != else_mov[i]->exec_size ||<br>
then_mov[i]->group != else_mov[i]->group ||<br>
then_mov[i]->force_writemask_all != else_mov[i]->force_writemask_all ||<br>
- then_mov[i]->is_partial_var_write(dispatch_width) ||<br>
- else_mov[i]->is_partial_var_write(dispatch_width) ||<br>
+ then_mov[i]->is_partial_write() ||<br>
+ else_mov[i]->is_partial_write() ||<br>
then_mov[i]->conditional_mod != BRW_CONDITIONAL_NONE ||<br>
else_mov[i]->conditional_mod != BRW_CONDITIONAL_NONE) {<br>
movs = i;<br>
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h<br>
index 87e03258d93..56a4bdc6e52 100644<br>
--- a/src/intel/compiler/brw_ir_fs.h<br>
+++ b/src/intel/compiler/brw_ir_fs.h<br>
@@ -348,8 +348,7 @@ public:<br>
void resize_sources(uint8_t num_sources);<br>
<br>
bool is_send_from_grf() const;<br>
- bool is_partial_reg_write() const;<br>
- bool is_partial_var_write(unsigned dispatch_width) const;<br>
+ bool is_partial_write() const;<br>
bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;<br>
unsigned components_read(unsigned i) const;<br>
unsigned size_read(int arg) const;<br>
-- <br>
2.20.1<br>
<br>
</blockquote></div>