Mesa (gallium-0.2): gallium: ppc: don't replicate/smear immediate values, use vspltw instruction as with constants

Brian Paul brianp at kemper.freedesktop.org
Wed Oct 29 01:01:55 UTC 2008


Module: Mesa
Branch: gallium-0.2
Commit: f4e9526addc617dc78af9b1af781ffe09ce62504
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f4e9526addc617dc78af9b1af781ffe09ce62504

Author: Brian Paul <brian.paul at tungstengraphics.com>
Date:   Tue Oct 28 18:21:03 2008 -0600

gallium: ppc: don't replicate/smear immediate values, use vspltw instruction as with constants

---

 src/gallium/auxiliary/draw/draw_vs_ppc.c |    8 ++++----
 src/gallium/auxiliary/tgsi/tgsi_ppc.c    |   22 +++++++++++++---------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index 8eff6d4..ff40263 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -54,7 +54,7 @@
 typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4],
                                              float (*outputs)[4][4],
                                              float (*temps)[4][4],
-                                             float (*immeds)[4][4],
+                                             float (*immeds)[4],
                                              float (*consts)[4],
                                              const float *builtins);
 
@@ -151,7 +151,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
                    output_stride );
 #else
       shader->func(inputs_soa, outputs_soa, temps_soa,
-		   (float (*)[4][4]) shader->base.immediates,
+		   (float (*)[4]) shader->base.immediates,
 		   (float (*)[4]) constants,
                    ppc_builtin_constants);
 
@@ -227,7 +227,7 @@ draw_create_vs_ppc(struct draw_context *draw,
    vs->base.run_linear = vs_ppc_run_linear;
    vs->base.delete = vs_ppc_delete;
    
-   vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 *
+   vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
                                       sizeof(float), 16);
 
    vs->machine = &draw->vs.machine;
@@ -236,7 +236,7 @@ draw_create_vs_ppc(struct draw_context *draw,
 
    if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens,
 			&vs->ppc_program, 
-                        (float (*)[4])vs->base.immediates, 
+                       (float (*)[4]) vs->base.immediates, 
                         TRUE )) 
       goto fail;
       
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 0de9b97..dd574ac 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -299,10 +299,18 @@ emit_fetch(struct gen_context *gen,
          break;
       case TGSI_FILE_IMMEDIATE:
          {
-            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
             int offset_reg = emit_li_offset(gen, offset);
             dst_vec = ppc_allocate_vec_register(gen->f);
-            ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg);
+            /* Load 4-byte word into vector register.
+             * The vector slot depends on the effective address we load from.
+             * We know that our immediates start at a 16-byte boundary so we
+             * know that 'swizzle' tells us which vector slot will have the
+             * loaded word.  The other vector slots will be undefined.
+             */
+            ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg);
+            /* splat word[swizzle] across the vector reg */
+            ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
          }
          break;
       case TGSI_FILE_CONSTANT:
@@ -1095,14 +1103,10 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
             assert(size <= 4);
             assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
             for (i = 0; i < size; i++) {
-               const float value =
-                  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
-               imm[num_immediates * 4 + 0] = 
-               imm[num_immediates * 4 + 1] = 
-               imm[num_immediates * 4 + 2] = 
-               imm[num_immediates * 4 + 3] = value;
-               num_immediates++;
+               immediates[num_immediates][i] =
+		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
             }
+            num_immediates++;
          }
          break;
 




More information about the mesa-commit mailing list