Mesa (gallium-0.2): cell: implement basic TXP instruction in fragment shaders

Brian Paul brianp at kemper.freedesktop.org
Fri Oct 10 01:55:07 UTC 2008


Module: Mesa
Branch: gallium-0.2
Commit: 583098e3cb602fd9810a7c65718155fd9b0b3fda
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=583098e3cb602fd9810a7c65718155fd9b0b3fda

Author: Brian Paul <brian.paul at tungstengraphics.com>
Date:   Thu Oct  9 19:48:53 2008 -0600

cell: implement basic TXP instruction in fragment shaders

Lots of restrictions for now (one 2D texture, no mipmaps, etc.) for now
but basic texture demos work.
TEX, TXD, TXP do the same thing for the time being.

---

 src/gallium/drivers/cell/ppu/cell_gen_fp.c |  109 ++++++++++++++++++++++++----
 src/gallium/drivers/cell/spu/spu_funcs.c   |   51 +++++++++++--
 src/gallium/drivers/cell/spu/spu_tri.c     |    2 +-
 3 files changed, 138 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 5647bb2..c8125a8 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -226,6 +226,11 @@ get_src_reg(struct codegen *gen,
             spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
          }
          break;
+      case TGSI_FILE_SAMPLER:
+         {
+            reg = 3; /* XXX total hack */
+         }
+         break;
       default:
          assert(0);
       }
@@ -1162,6 +1167,21 @@ print_functions(struct cell_context *cell)
 #endif
 
 
+static uint
+lookup_function(struct cell_context *cell, const char *funcname)
+{
+   const struct cell_spu_function_info *funcs = &cell->spu_functions;
+   uint i, addr = 0;
+   for (i = 0; i < funcs->num; i++) {
+      if (strcmp(funcs->names[i], funcname) == 0) {
+         addr = funcs->addrs[i];
+      }
+   }
+   assert(addr && "spu function not found");
+   return addr / 4;  /* discard 2 least significant bits */
+}
+
+
 /**
  * Emit code to call a SPU function.
  * Used to implement instructions like SIN/COS/POW/TEX/etc.
@@ -1171,27 +1191,12 @@ emit_function_call(struct codegen *gen,
                    const struct tgsi_full_instruction *inst,
                    char *funcname, uint num_args)
 {
-   const struct cell_spu_function_info *funcs = &gen->cell->spu_functions;
+   const uint addr = lookup_function(gen->cell, funcname);
    char comment[100];
-   uint addr;
    int ch;
 
    assert(num_args <= 3);
 
-   /* lookup function address */
-   {
-      uint i;
-      addr = 0;
-      for (i = 0; i < funcs->num; i++) {
-         if (strcmp(funcs->names[i], funcname) == 0) {
-            addr = funcs->addrs[i];
-         }
-      }
-      assert(addr && "spu function not found");
-   }
-
-   addr /= 4; /* discard 2 least significant bits */
-
    snprintf(comment, sizeof(comment), "CALL %s:", funcname);
    spe_comment(gen->f, -4, comment);
 
@@ -1245,6 +1250,72 @@ emit_function_call(struct codegen *gen,
 }
 
 
+static boolean
+emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+   const uint addr = lookup_function(gen->cell, "spu_txp");
+   int ch;
+   int coord_regs[4], d_regs[4];
+
+   spe_comment(gen->f, -4, "CALL txp:");
+
+   /* get src/dst reg info */
+   for (ch = 0; ch < 4; ch++) {
+      coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+      d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+   }
+
+   {
+      ubyte usedRegs[SPE_NUM_REGS];
+      uint i, numUsed;
+
+      numUsed = spe_get_registers_used(gen->f, usedRegs);
+      assert(numUsed < gen->frame_size / 16 - 32);
+
+      /* save registers to stack */
+      for (i = 0; i < numUsed; i++) {
+         uint reg = usedRegs[i];
+         int offset = 2 + i;
+         spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+      }
+
+      /* setup function arguments */
+      for (i = 0; i < 4; i++) {
+         spe_move(gen->f, 3 + i, coord_regs[i]);
+      }
+
+      /* branch to function, save return addr */
+      spe_brasl(gen->f, SPE_REG_RA, addr);
+
+      /* save function's return values (four pixel's colors) */
+      for (i = 0; i < 4; i++) {
+         spe_move(gen->f, d_regs[i], 3 + i);
+      }
+
+      /* restore registers from stack */
+      for (i = 0; i < numUsed; i++) {
+         uint reg = usedRegs[i];
+         if (reg != d_regs[0] &&
+             reg != d_regs[1] &&
+             reg != d_regs[2] &&
+             reg != d_regs[3]) {
+            int offset = 2 + i;
+            spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+         }
+      }
+   }
+
+   for (ch = 0; ch < 4; ch++) {
+      if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+         store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
+         free_itemps(gen);
+      }
+   }
+
+   return TRUE;
+}
+
+
 /**
  * Emit max.  See emit_SGT for comments.
  */
@@ -1483,6 +1554,12 @@ emit_instruction(struct codegen *gen,
       return emit_function_call(gen, inst, "spu_exp2", 1);
    case TGSI_OPCODE_LOGBASE2:
       return emit_function_call(gen, inst, "spu_log2", 1);
+   case TGSI_OPCODE_TEX:
+      /* fall-through for now */
+   case TGSI_OPCODE_TXD:
+      /* fall-through for now */
+   case TGSI_OPCODE_TXP:
+      return emit_TXP(gen, inst);
 
    case TGSI_OPCODE_IF:
       return emit_IF(gen, inst);
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c
index 1adf9de..c7bcb3d 100644
--- a/src/gallium/drivers/cell/spu/spu_funcs.c
+++ b/src/gallium/drivers/cell/spu/spu_funcs.c
@@ -38,12 +38,20 @@
 #include <math.h>
 #include <cos14_v.h>
 #include <sin14_v.h>
+#include <transpose_matrix4x4.h>
 
 #include "cell/common.h"
 #include "spu_main.h"
 #include "spu_funcs.h"
 
 
+/** For "return"-ing four vectors */
+struct vec_4x4
+{
+   vector float v[4];
+};
+
+
 static vector float
 spu_cos(vector float x)
 {
@@ -92,16 +100,44 @@ spu_log2(vector float x)
    return spu_mul(v, k);
 }
 
+static struct vec_4x4
+spu_txp(vector float s, vector float t, vector float r, vector float q)
+{
+   const uint unit = 0;
+   struct vec_4x4 colors;
+   vector float coords[4];
+
+   coords[0] = s;
+   coords[1] = t;
+   coords[2] = r;
+   coords[3] = q;
+   _transpose_matrix4x4(coords, coords);
+
+   /* get four texture samples */
+   colors.v[0] = spu.sample_texture[unit](unit, coords[0]);
+   colors.v[1] = spu.sample_texture[unit](unit, coords[1]);
+   colors.v[2] = spu.sample_texture[unit](unit, coords[2]);
+   colors.v[3] = spu.sample_texture[unit](unit, coords[3]);
+
+   _transpose_matrix4x4(colors.v, colors.v);
+   return colors;
+}
+
 
+/**
+ * Add named function to list of "exported" functions that will be
+ * made available to the PPU-hosted code generator.
+ */
 static void
-add_func(struct cell_spu_function_info *spu_functions,
-             const char *name, void *addr)
+export_func(struct cell_spu_function_info *spu_functions,
+            const char *name, void *addr)
 {
    uint n = spu_functions->num;
    ASSERT(strlen(name) < 16);
    strcpy(spu_functions->names[n], name);
    spu_functions->addrs[n] = (uint) addr;
    spu_functions->num++;
+   ASSERT(spu_functions->num <= 16);
 }
 
 
@@ -119,11 +155,12 @@ return_function_info(void)
    ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */
 
    funcs.num = 0;
-   add_func(&funcs, "spu_cos", &spu_cos);
-   add_func(&funcs, "spu_sin", &spu_sin);
-   add_func(&funcs, "spu_pow", &spu_pow);
-   add_func(&funcs, "spu_exp2", &spu_exp2);
-   add_func(&funcs, "spu_log2", &spu_log2);
+   export_func(&funcs, "spu_cos", &spu_cos);
+   export_func(&funcs, "spu_sin", &spu_sin);
+   export_func(&funcs, "spu_pow", &spu_pow);
+   export_func(&funcs, "spu_exp2", &spu_exp2);
+   export_func(&funcs, "spu_log2", &spu_log2);
+   export_func(&funcs, "spu_txp", &spu_txp);
 
    /* Send the function info back to the PPU / main memory */
    mfc_put((void *) &funcs,  /* src in local store */
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index 6039cd8..87991c3 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -286,7 +286,7 @@ emit_quad( int x, int y, mask_t mask)
       spu.cur_ctile_status = TILE_STATUS_DIRTY;
       spu.cur_ztile_status = TILE_STATUS_DIRTY;
 
-      if (spu.texture[0].start) {
+      if (0/*spu.texture[0].start*/) {
          /*
           * Temporary texture mapping path
           * This will go away when fragment programs support TEX inst.




More information about the mesa-commit mailing list