[Mesa-dev] [PATCH 29/32] i965/gen8: Fix F32TO16 in vec4 mode if the source and destination registers alias.

Francisco Jerez currojerez at riseup.net
Fri Feb 6 06:43:09 PST 2015


We cannot zero out the destination register if it overlaps with the
source.  Use an Align1 instruction instead to zero out the high 16
bits after the conversion to half float.
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 36 ++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index fa06fd6..9d2e65d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1127,7 +1127,15 @@ brw_inst *
 brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
 {
    const struct brw_context *brw = p->brw;
-   bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+   const bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+   /* The F32TO16 instruction doesn't support 32-bit destination types in
+    * Align1 mode, and neither does the Gen8 implementation in terms of a
+    * converting MOV.  Gen7 does zero out the high 16 bits in Align16 mode as
+    * an undocumented feature.
+    */
+   const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
+                                 brw->gen >= 8);
+   brw_inst *inst;
 
    if (align16) {
       assert(dst.type == BRW_REGISTER_TYPE_UD);
@@ -1137,18 +1145,28 @@ brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
              dst.type == BRW_REGISTER_TYPE_HF);
    }
 
+   brw_push_insn_state(p);
+
+   if (needs_zero_fill) {
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+      dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
+   }
+
    if (brw->gen >= 8) {
-      if (align16) {
-         /* Emulate the Gen7 zeroing bug (see comments in vec4_visitor's
-          * emit_pack_half_2x16 method.)
-          */
-         brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
-      }
-      return brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
+      inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
    } else {
       assert(brw->gen == 7);
-      return brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+      inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+   }
+
+   if (needs_zero_fill) {
+      brw_inst_set_no_dd_clear(brw, inst, true);
+      inst = brw_MOV(p, suboffset(dst, 1), brw_imm_ud(0u));
+      brw_inst_set_no_dd_check(brw, inst, true);
    }
+
+   brw_pop_insn_state(p);
+   return inst;
 }
 
 brw_inst *
-- 
2.1.3



More information about the mesa-dev mailing list