Mesa (gallium-0.2): cell: implement KIL instruction

Brian Paul brianp at kemper.freedesktop.org
Thu Oct 16 20:34:16 UTC 2008


Module: Mesa
Branch: gallium-0.2
Commit: 033c90f4c16c1da517d676282508208319bd5ec5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=033c90f4c16c1da517d676282508208319bd5ec5

Author: Brian Paul <brian.paul at tungstengraphics.com>
Date:   Thu Oct 16 13:49:42 2008 -0600

cell: implement KIL instruction

---

 src/gallium/drivers/cell/ppu/cell_gen_fp.c |   80 ++++++++++++++++++++++++++++
 src/gallium/drivers/cell/spu/spu_main.h    |    6 +-
 src/gallium/drivers/cell/spu/spu_tri.c     |    5 ++-
 3 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 2b34cf1..493ee1a 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -84,6 +84,9 @@ struct codegen
    /** Index of execution mask register */
    int exec_mask_reg;
 
+   /** KIL mask: indicates which fragments have been killed */
+   int kill_mask_reg;
+
    int frame_size;  /**< Stack frame size, in words */
 
    struct spe_function *f;
@@ -431,8 +434,21 @@ emit_prologue(struct codegen *gen)
 static void
 emit_epilogue(struct codegen *gen)
 {
+   const int return_reg = 3;
+
    spe_comment(gen->f, -4, "Function epilogue:");
 
+   spe_comment(gen->f, 0, "return the killed mask");
+   if (gen->kill_mask_reg > 0) {
+      /* shader called KIL, return the "alive" mask */
+      spe_move(gen->f, return_reg, gen->kill_mask_reg);
+   }
+   else {
+      /* return {0,0,0,0} */
+      spe_load_uint(gen->f, return_reg, 0);
+   }
+
+   spe_comment(gen->f, 0, "restore stack and return");
    if (gen->frame_size >= 512) {
       /* offset is too large for ai instruction */
       int offset_reg = spe_allocate_available_register(gen->f);
@@ -1424,6 +1440,68 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
 
 
 /**
+ * KILL if any of src reg values are less than zero.
+ */
+static boolean
+emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+   int ch;
+   int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
+
+   spe_comment(gen->f, -4, "CALL kil:");
+
+   /* zero = {0,0,0,0} */
+   zero_reg = get_itemp(gen);
+   spe_load_uint(gen->f, zero_reg, 0);
+
+   cmp_reg = get_itemp(gen);
+
+   /* get src regs */
+   for (ch = 0; ch < 4; ch++) {
+      if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+         s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+      }
+   }
+
+   /* test if any src regs are < 0 */
+   for (ch = 0; ch < 4; ch++) {
+      if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+         if (kil_reg >= 0) {
+            /* cmp = 0 > src ? : ~0 : 0 */
+            spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
+            /* kil = kil | cmp */
+            spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
+         }
+         else {
+            kil_reg = get_itemp(gen);
+            /* kil = 0 > src ? : ~0 : 0 */
+            spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
+         }
+      }
+   }
+
+   if (gen->if_nesting) {
+      /* may have been a conditional kil */
+      spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
+   }
+
+   /* allocate the kill mask reg if needed */
+   if (gen->kill_mask_reg <= 0) {
+      gen->kill_mask_reg = spe_allocate_available_register(gen->f);
+      spe_move(gen->f, gen->kill_mask_reg, kil_reg);
+   }
+   else {
+      spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
+   }
+
+   free_itemps(gen);
+
+   return TRUE;
+}
+
+
+
+/**
  * Emit max.  See emit_SGT for comments.
  */
 static boolean
@@ -1695,6 +1773,8 @@ emit_instruction(struct codegen *gen,
       /* fall-through for now */
    case TGSI_OPCODE_TXP:
       return emit_TEX(gen, inst);
+   case TGSI_OPCODE_KIL:
+      return emit_KIL(gen, inst);
 
    case TGSI_OPCODE_IF:
       return emit_IF(gen, inst);
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 80e9c69..95ef4c9 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -89,9 +89,9 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y,
                                       uint facing);
 
 /** Function for running fragment program */
-typedef void (*spu_fragment_program_func)(vector float *inputs,
-                                          vector float *outputs,
-                                          vector float *constants);
+typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
+                                                         vector float *outputs,
+                                                         vector float *constants);
 
 
 struct spu_framebuffer
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index d83085d..4caf7d6 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -254,6 +254,7 @@ emit_quad( int x, int y, mask_t mask)
          vector float inputs[4*4], outputs[2*4];
          vector float fragZ = eval_z((float) x, (float) y);
          vector float fragW = eval_w((float) x, (float) y);
+         vector unsigned int kill_mask;
 
          /* setup inputs */
 #if 0
@@ -268,7 +269,9 @@ emit_quad( int x, int y, mask_t mask)
          ASSERT(spu.fragment_ops);
 
          /* Execute the current fragment program */
-         spu.fragment_program(inputs, outputs, spu.constants);
+         kill_mask = spu.fragment_program(inputs, outputs, spu.constants);
+
+         mask = spu_andc(mask, kill_mask);
 
          /* Execute per-fragment/quad operations, including:
           * alpha test, z test, stencil test, blend and framebuffer writing.




More information about the mesa-commit mailing list