[Mesa-dev] [PATCH 2/3] svga: fix CMP translation for vertex shaders

Mon Jul 2 16:05:09 PDT 2012

Converting CMP to SLT+LRP didn't work when src2 or src3 was Inf/NaN.
That's the case for GLSL sqrt(0).  sqrt(0) actually happens in many
piglit auto-generated tests that use the distance() function.

v2: remove debug/devel code, per Jose
---
 src/gallium/drivers/svga/svga_tgsi_insn.c |   73 +++++++++++++++--------------
 1 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 595b06d..a2d6b51 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -896,42 +896,6 @@ static boolean emit_ceil(struct svga_shader_emitter *emit,
 }
 
 
-/* Translate the following TGSI CMP instruction.
- *    CMP  DST, SRC0, SRC1, SRC2
- * To the following SVGA3D instruction sequence.
- *    CMP  DST, SRC0, SRC2, SRC1
- */
-static boolean emit_cmp(struct svga_shader_emitter *emit,
-                          const struct tgsi_full_instruction *insn )
-{
-   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   const struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
-   const struct src_register src1 = translate_src_register(
-      emit, &insn->Src[1] );
-   const struct src_register src2 = translate_src_register(
-      emit, &insn->Src[2] );
-
-   if (emit->unit == PIPE_SHADER_VERTEX) {
-      SVGA3dShaderDestToken temp = get_temp(emit);
-      struct src_register zero = scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X);
-
-      /* Since vertex shaders don't support the CMP instruction,
-       * simulate it with SLT and LRP instructions.
-       *    SLT  TMP, SRC0, 0.0
-       *    LRP  DST, TMP, SRC1, SRC2
-       */
-      if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero))
-         return FALSE;
-      return submit_lrp(emit, dst, src(temp), src1, src2);
-   }
-
-   /* CMP  DST, SRC0, SRC2, SRC1 */
-   return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
-}
-
-
-
 /* Translate the following TGSI DIV instruction.
  *    DIV  DST.xy, SRC0, SRC1
  * To the following SVGA3D instruction sequence.
@@ -1421,6 +1385,43 @@ static boolean emit_select_op(struct svga_shader_emitter *emit,
 }
 
 
+/**
+ * Translate TGSI CMP instruction.
+ */
+static boolean
+emit_cmp(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0] );
+   const struct src_register src1 =
+      translate_src_register(emit, &insn->Src[1] );
+   const struct src_register src2 =
+      translate_src_register(emit, &insn->Src[2] );
+
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      struct src_register zero =
+         scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X);
+      /* We used to simulate CMP with SLT+LRP.  But that didn't work when
+       * src1 or src2 was Inf/NaN.  In particular, GLSL sqrt(0) failed
+       * because it involves a CMP to handle the 0 case.
+       * Use a conditional expression instead.
+       */
+      return emit_conditional(emit, PIPE_FUNC_LESS, dst,
+                              src0, zero, src1, src2);
+   }
+   else {
+      assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+      /* CMP  DST, SRC0, SRC2, SRC1 */
+      return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst,
+                         src0, src2, src1);
+   }
+
+}
+
+
 /* Translate texture instructions to SVGA3D representation.
  */
 static boolean emit_tex2(struct svga_shader_emitter *emit,
-- 
1.7.3.4