Mesa (gallium-mesa-7.4): gallium: fix glean's vertProg1

Alan Hourihane alanh at kemper.freedesktop.org
Mon Feb 16 20:54:10 UTC 2009


Module: Mesa
Branch: gallium-mesa-7.4
Commit: f5c6a690eb9fed0d32c981ebe8feec4762ea2ebf
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f5c6a690eb9fed0d32c981ebe8feec4762ea2ebf

Author: Alan Hourihane <alanh at vmware.com>
Date:   Mon Feb 16 20:20:55 2009 +0000

gallium: fix glean's vertProg1
         RSQ test 2 (reciprocal square toot of negative value)

---

 src/gallium/auxiliary/draw/draw_vs_aos.c |   28 ++++++++++++++++++----------
 src/gallium/auxiliary/tgsi/tgsi_exec.c   |    1 +
 src/gallium/auxiliary/tgsi/tgsi_sse2.c   |    1 +
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 6817f29..ae110bd 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -1559,7 +1559,6 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst
  */
 static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-
    if (0) {
       struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
       struct x86_reg r = aos_get_xmm_reg(cp);
@@ -1568,21 +1567,30 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst
       return TRUE;
    }
    else {
-      struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-      struct x86_reg r = aos_get_xmm_reg(cp);
+      struct x86_reg arg0           = fetch_src(cp, &op->FullSrcRegisters[0]);
+      struct x86_reg r              = aos_get_xmm_reg(cp);
 
       struct x86_reg neg_half       = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ );
       struct x86_reg one_point_five = x86_make_disp( neg_half, 4 );
       struct x86_reg src            = get_xmm_writable( cp, arg0 );
-      
-      sse_rsqrtss( cp->func, r, src  );             /* rsqrtss(a) */
-      sse_mulss(   cp->func, src, neg_half  );      /* -.5 * a */
-      sse_mulss(   cp->func, src,  r );             /* -.5 * a * r */
-      sse_mulss(   cp->func, src,  r );             /* -.5 * a * r * r */
-      sse_addss(   cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */
-      sse_mulss(   cp->func, r,  src );             /* r * (1.5 - .5 * a * r * r) */
+      struct x86_reg neg            = aos_get_internal(cp, IMM_NEGS);
+      struct x86_reg tmp            = aos_get_xmm_reg(cp);
+
+      sse_movaps(cp->func, tmp, src);
+      sse_mulps(cp->func, tmp, neg);
+      sse_maxps(cp->func, tmp, src);
+   
+      sse_rsqrtss( cp->func, r, tmp  );             /* rsqrtss(a) */
+      sse_mulss(   cp->func, tmp, neg_half  );      /* -.5 * a */
+      sse_mulss(   cp->func, tmp,  r );             /* -.5 * a * r */
+      sse_mulss(   cp->func, tmp,  r );             /* -.5 * a * r * r */
+      sse_addss(   cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */
+      sse_mulss(   cp->func, r,  tmp );             /* r * (1.5 - .5 * a * r * r) */
 
       store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+
+      aos_release_xmm_reg(cp, tmp.idx);
+
       return TRUE;
    }
 }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index a182e67..17e9175 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1889,6 +1889,7 @@ exec_instruction(
    case TGSI_OPCODE_RSQ:
    /* TGSI_OPCODE_RECIPSQRT */
       FETCH( &r[0], 0, CHAN_X );
+      micro_abs( &r[0], &r[0] );
       micro_sqrt( &r[0], &r[0] );
       micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index cac44af..9424cb4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1575,6 +1575,7 @@ emit_instruction(
    case TGSI_OPCODE_RSQ:
    /* TGSI_OPCODE_RECIPSQRT */
       FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_abs( func, 0 );
       emit_rsqrt( func, 1, 0 );
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          STORE( func, *inst, 1, 0, chan_index );




More information about the mesa-commit mailing list