[Mesa-dev] [PATCH 19/22] i965/vec4: fix SIMD-with lowering for CMP/MOV instructions with conditional modifiers
Samuel Iglesias Gonsálvez
siglesias at igalia.com
Thu Jan 5 13:07:39 UTC 2017
From: "Juan A. Suarez Romero" <jasuarez at igalia.com>
When splitting a CMP/MOV instruction with NULL dest, DF sources, and
conditional modifier; we can't use directly the flag registers, as they will
have the wrong results in IVB/VLV after the scalarization.
Rather, we need to store the result in a temporary register, and then use
that register to set proper the flags values.
If a MOV has a null destination register and a conditional modifier, it
can be replaced with a CMP against zero with the same conditional
modifier. By doing this replacement, we can do the SIMD lowering
without any problem.
Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
Signed-off-by: Juan A. Suarez Romero <jasuarez at igalia.com
---
src/mesa/drivers/dri/i965/brw_vec4.cpp | 80 +++++++++++++++++++++++++++++++---
1 file changed, 74 insertions(+), 6 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index c654c8b..74ab866 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -2190,6 +2190,46 @@ vec4_visitor::lower_simd_width()
* value of the instruction's dst.
*/
bool needs_temp = dst_src_regions_overlap(inst);
+
+ /* When splitting instructions with conditional modifiers and NULL
+ * dest we can't rely directly on the flags to store the result. Rather,
+ * we need first to enqueue the result in a temporary register, and then
+ * move those values into flags.
+ */
+ bool inst_df_dst_null =
+ inst->dst.is_null() && inst->exec_data_size() == 8 &&
+ inst->conditional_mod != BRW_CONDITIONAL_NONE;
+
+ if (inst_df_dst_null) {
+ /* If there are other DF instructions with NULL destination,
+ * we need to verify if we can use the temporary register or
+ * if we need an extra lowering step.
+ */
+ assert(inst->opcode == BRW_OPCODE_MOV ||
+ inst->opcode == BRW_OPCODE_CMP);
+
+ /* Replace MOV.XX with null destination with the equivalente CMP.XX
+ * with null destination, so we can lower it as explained before.
+ */
+ if (inst->opcode == BRW_OPCODE_MOV) {
+ vec4_instruction *cmp =
+ new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst_null_df(),
+ inst->src[0],
+ setup_imm_df(0.0, block, inst));
+ cmp->conditional_mod = inst->conditional_mod;
+ cmp->exec_size = inst->exec_size;
+ cmp->group = inst->group;
+ cmp->size_written = inst->size_written;
+ inst->insert_before(block, cmp);
+ inst->remove(block);
+ inst = cmp;
+ }
+ }
+ dst_reg inst_dst;
+ if (inst_df_dst_null)
+ inst_dst =
+ retype(dst_reg(VGRF, alloc.allocate(1)), BRW_REGISTER_TYPE_F);
+
for (unsigned n = 0; n < inst->exec_size / lowered_width; n++) {
unsigned channel_offset = lowered_width * n;
@@ -2212,7 +2252,7 @@ vec4_visitor::lower_simd_width()
bool d2f_pass = (inst->opcode == VEC4_OPCODE_FROM_DOUBLE && n > 0);
/* Compute split dst region */
dst_reg dst;
- if (needs_temp || d2f_pass) {
+ if (needs_temp || d2f_pass || inst_df_dst_null) {
unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE);
dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)),
inst->dst.type);
@@ -2242,24 +2282,52 @@ vec4_visitor::lower_simd_width()
inst->insert_before(block, linst);
+ dst_reg d2f_dst;
+ if (inst_df_dst_null) {
+ unsigned num_regs = DIV_ROUND_UP(lowered_width, type_sz(BRW_REGISTER_TYPE_F));
+ d2f_dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)), BRW_REGISTER_TYPE_F);
+ vec4_instruction *d2f = new(mem_ctx) vec4_instruction(VEC4_OPCODE_FROM_DOUBLE, d2f_dst, src_reg(dst));
+ d2f->group = channel_offset;
+ d2f->exec_size = lowered_width;
+ d2f->size_written = lowered_width * type_sz(d2f_dst.type);
+ d2f->predicate = inst->predicate;
+ inst->insert_before(block, d2f);
+ }
+
/* If we used a temporary to store the result of the split
* instruction, copy the result to the original destination
*/
- if (needs_temp || d2f_pass) {
+ if (needs_temp || d2f_pass || inst_df_dst_null) {
vec4_instruction *mov;
- if (d2f_pass)
+ if (d2f_pass) {
mov = MOV(horiz_offset(inst->dst, n * type_sz(inst->dst.type)), src_reg(dst));
- else
+ mov->size_written = size_written;
+ } else if (inst_df_dst_null) {
+ mov = MOV(horiz_offset(inst_dst, n * 4), src_reg(d2f_dst));
+ mov->size_written = lowered_width * type_sz(inst_dst.type);
+ } else {
mov = MOV(offset(inst->dst, lowered_width, n), src_reg(dst));
+ mov->size_written = size_written;
+ }
mov->exec_size = lowered_width;
mov->group = channel_offset;
- mov->size_written = size_written;
mov->predicate = inst->predicate;
inst->insert_before(block, mov);
}
}
- inst->remove(block);
+ /* For CMP.XX instruction, we need to set the flags correctly. We do
+ * this by comparing the register that has the results against 0.0,
+ * getting the values in the flags.
+ */
+ if (inst_df_dst_null) {
+ inst->dst.type = BRW_REGISTER_TYPE_F;
+ inst->src[0] = src_reg(inst_dst);
+ inst->src[1] = brw_imm_f(0.0f);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ } else {
+ inst->remove(block);
+ }
progress = true;
}
--
2.9.3
More information about the mesa-dev
mailing list