Mesa (master): i965: Move OPCODE_DDX/DDY to brw_wm_emit. c and make it actually work.

Eric Anholt anholt at kemper.freedesktop.org
Fri Sep 11 21:15:03 UTC 2009


Module: Mesa
Branch: master
Commit: 57d16c4cc37689710f951cb13981e2efc160cd23
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=57d16c4cc37689710f951cb13981e2efc160cd23

Author: Eric Anholt <eric at anholt.net>
Date:   Fri Sep 11 14:09:03 2009 -0700

i965: Move OPCODE_DDX/DDY to brw_wm_emit.c and make it actually work.

Previously, it was trying to mess around with the varying's
WM setup data to produce a result.  Along with not actually working when
passed a varying, this wouldn't work if you did dFd[xy]() on a temporary.
Instead, just calculate the derivative using the neighbors in the subspan.

---

 src/mesa/drivers/dri/i965/brw_wm.h       |    8 ++-
 src/mesa/drivers/dri/i965/brw_wm_emit.c  |   83 +++++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_wm_fp.c    |   38 ------------
 src/mesa/drivers/dri/i965/brw_wm_glsl.c  |   94 ++++++++++-------------------
 src/mesa/drivers/dri/i965/brw_wm_pass1.c |    5 ++
 5 files changed, 126 insertions(+), 102 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index ae98b54..872b1f3 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -38,6 +38,8 @@
 #include "brw_context.h"
 #include "brw_eu.h"
 
+#define SATURATE (1<<5)
+
 /* A big lookup table is used to figure out which and how many
  * additional regs will inserted before the main payload in the WM
  * program execution.  These mainly relate to depth and stencil
@@ -203,7 +205,6 @@ struct brw_wm_compile {
    GLuint fp_temp;
    GLuint fp_interp_emitted;
    GLuint fp_fragcolor_emitted;
-   GLuint fp_deriv_emitted;
 
    struct prog_src_register pixel_xy;
    struct prog_src_register delta_xy;
@@ -299,5 +300,10 @@ void brw_wm_lookup_iz( GLuint line_aa,
 GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
 void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
 
+void emit_ddxy(struct brw_compile *p,
+	       const struct brw_reg *dst,
+	       GLuint mask,
+	       GLboolean is_ddx,
+	       const struct brw_reg *arg0);
 
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 9818643..bf80a29 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -34,8 +34,6 @@
 #include "brw_context.h"
 #include "brw_wm.h"
 
-#define SATURATE (1<<5)
-
 /* Not quite sure how correct this is - need to understand horiz
  * vs. vertical strides a little better.
  */
@@ -281,6 +279,79 @@ static void emit_frontfacing( struct brw_compile *p,
    brw_set_predicate_control_flag_value(p, 0xff);
 }
 
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ *
+ * and we're trying to produce:
+ *
+ *           DDX                     DDY
+ * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
+ *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
+ *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
+ *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
+ *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
+ *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
+ *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
+ *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
+ *
+ * and add another set of two more subspans if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other.  We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ */
+void emit_ddxy(struct brw_compile *p,
+	       const struct brw_reg *dst,
+	       GLuint mask,
+	       GLboolean is_ddx,
+	       const struct brw_reg *arg0)
+{
+   int i;
+   struct brw_reg src0, src1;
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 1);
+   for (i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
+	 if (is_ddx) {
+	    src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
+			   BRW_REGISTER_TYPE_F,
+			   BRW_VERTICAL_STRIDE_2,
+			   BRW_WIDTH_2,
+			   BRW_HORIZONTAL_STRIDE_0,
+			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+	    src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+			   BRW_REGISTER_TYPE_F,
+			   BRW_VERTICAL_STRIDE_2,
+			   BRW_WIDTH_2,
+			   BRW_HORIZONTAL_STRIDE_0,
+			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+	 } else {
+	    src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+			   BRW_REGISTER_TYPE_F,
+			   BRW_VERTICAL_STRIDE_4,
+			   BRW_WIDTH_4,
+			   BRW_HORIZONTAL_STRIDE_0,
+			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+	    src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
+			   BRW_REGISTER_TYPE_F,
+			   BRW_VERTICAL_STRIDE_4,
+			   BRW_WIDTH_4,
+			   BRW_HORIZONTAL_STRIDE_0,
+			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+	 }
+	 brw_ADD(p, dst[i], src0, negate(src1));
+      }
+   }
+   if (mask & SATURATE)
+      brw_set_saturate(p, 0);
+}
+
 static void emit_alu1( struct brw_compile *p, 
 		       struct brw_instruction *(*func)(struct brw_compile *, 
 						       struct brw_reg, 
@@ -1272,6 +1343,14 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
 	 break;
 
+      case OPCODE_DDX:
+	 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+	 break;
+
+      case OPCODE_DDY:
+	 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+	 break;
+
       case OPCODE_DP3:
 	 emit_dp3(p, dst, dst_flags, args[0], args[1]);
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 123fe84..4e3edfb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -494,38 +494,6 @@ static void emit_interp( struct brw_wm_compile *c,
    c->fp_interp_emitted |= 1<<idx;
 }
 
-static void emit_ddx( struct brw_wm_compile *c,
-        const struct prog_instruction *inst )
-{
-    GLuint idx = inst->SrcReg[0].Index;
-    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-
-    c->fp_deriv_emitted |= 1<<idx;
-    emit_op(c,
-            OPCODE_DDX,
-            inst->DstReg,
-            0,
-            interp,
-            get_pixel_w(c),
-            src_undef());
-}
-
-static void emit_ddy( struct brw_wm_compile *c,
-        const struct prog_instruction *inst )
-{
-    GLuint idx = inst->SrcReg[0].Index;
-    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-
-    c->fp_deriv_emitted |= 1<<idx;
-    emit_op(c,
-            OPCODE_DDY,
-            inst->DstReg,
-            0,
-            interp,
-            get_pixel_w(c),
-            src_undef());
-}
-
 /***********************************************************************
  * Hacks to extend the program parameter and constant lists.
  */
@@ -1186,12 +1154,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
 	  */
 	 out->DstReg.WriteMask = 0;
 	 break;
-      case OPCODE_DDX:
-	 emit_ddx(c, inst);
-	 break;
-      case OPCODE_DDY:
-         emit_ddy(c, inst);
-	break;
       case OPCODE_END:
 	 emit_fb_write(c);
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 7c210ab..c9fe1dd 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -22,6 +22,7 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
 GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
 {
     int i;
+
     for (i = 0; i < fp->Base.NumInstructions; i++) {
 	const struct prog_instruction *inst = &fp->Base.Instructions[i];
 	switch (inst->Opcode) {
@@ -31,8 +32,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
 	    case OPCODE_CAL:
 	    case OPCODE_BRK:
 	    case OPCODE_RET:
-	    case OPCODE_DDX:
-	    case OPCODE_DDY:
 	    case OPCODE_NOISE1:
 	    case OPCODE_NOISE2:
 	    case OPCODE_NOISE3:
@@ -293,7 +292,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
     int i, j;
     struct brw_reg reg;
     int urb_read_length = 0;
-    GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
+    GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
     GLuint reg_index = 0;
 
     memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
@@ -1474,61 +1473,6 @@ static void emit_sne(struct brw_wm_compile *c,
     emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
 }
 
-static void emit_ddx(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    GLuint mask = inst->DstReg.WriteMask;
-    struct brw_reg interp[4];
-    struct brw_reg dst;
-    struct brw_reg src0, w;
-    GLuint nr, i;
-    src0 = get_src_reg(c, inst, 0, 0);
-    w = get_src_reg(c, inst, 1, 3);
-    nr = src0.nr;
-    interp[0] = brw_vec1_grf(nr, 0);
-    interp[1] = brw_vec1_grf(nr, 4);
-    interp[2] = brw_vec1_grf(nr+1, 0);
-    interp[3] = brw_vec1_grf(nr+1, 4);
-    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
-    for(i = 0; i < 4; i++ ) {
-        if (mask & (1<<i)) {
-            dst = get_dst_reg(c, inst, i);
-            brw_MOV(p, dst, interp[i]);
-            brw_MUL(p, dst, dst, w);
-        }
-    }
-    brw_set_saturate(p, 0);
-}
-
-static void emit_ddy(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    GLuint mask = inst->DstReg.WriteMask;
-    struct brw_reg interp[4];
-    struct brw_reg dst;
-    struct brw_reg src0, w;
-    GLuint nr, i;
-
-    src0 = get_src_reg(c, inst, 0, 0);
-    nr = src0.nr;
-    w = get_src_reg(c, inst, 1, 3);
-    interp[0] = brw_vec1_grf(nr, 0);
-    interp[1] = brw_vec1_grf(nr, 4);
-    interp[2] = brw_vec1_grf(nr+1, 0);
-    interp[3] = brw_vec1_grf(nr+1, 4);
-    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
-    for(i = 0; i < 4; i++ ) {
-        if (mask & (1<<i)) {
-            dst = get_dst_reg(c, inst, i);
-            brw_MOV(p, dst, suboffset(interp[i], 1));
-            brw_MUL(p, dst, dst, w);
-        }
-    }
-    brw_set_saturate(p, 0);
-}
-
 static INLINE struct brw_reg high_words( struct brw_reg reg )
 {
     return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
@@ -2780,6 +2724,21 @@ static void post_wm_emit( struct brw_wm_compile *c )
     brw_resolve_cals(&c->func);
 }
 
+static void
+get_argument_regs(struct brw_wm_compile *c,
+		  const struct prog_instruction *inst,
+		  int index,
+		  struct brw_reg *regs,
+		  int mask)
+{
+    int i;
+
+    for (i = 0; i < 4; i++) {
+	if (mask & (1 << i))
+	    regs[i] = get_src_reg(c, inst, index, i);
+    }
+}
+
 static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 {
 #define MAX_IF_DEPTH 32
@@ -2797,6 +2756,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 
     for (i = 0; i < c->nr_fp_insns; i++) {
         const struct prog_instruction *inst = &c->prog_instructions[i];
+	int dst_flags;
+	struct brw_reg args[3][4], dst[4];
+	int j;
 
         c->cur_inst = i;
 
@@ -2814,6 +2776,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 	else
 	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
 
+	dst_flags = inst->DstReg.WriteMask;
+	if (inst->SaturateMode == SATURATE_ZERO_ONE)
+	    dst_flags |= SATURATE;
+
 	switch (inst->Opcode) {
 	    case WM_PIXELXY:
 		emit_pixel_xy(c, inst);
@@ -2899,10 +2865,16 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 		emit_min_max(c, inst);
 		break;
 	    case OPCODE_DDX:
-		emit_ddx(c, inst);
-		break;
 	    case OPCODE_DDY:
-                emit_ddy(c, inst);
+		for (j = 0; j < 4; j++) {
+		    if (inst->DstReg.WriteMask & (1 << j))
+			dst[j] = get_dst_reg(c, inst, j);
+		    else
+			dst[j] = brw_null_reg();
+		}
+		get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW);
+		emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
+			  args[0]);
                 break;
 	    case OPCODE_SLT:
 		emit_slt(c, inst);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
index 9c68bfd..b449394 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
@@ -178,6 +178,11 @@ void brw_wm_pass1( struct brw_wm_compile *c )
 	 read1 = writemask;
 	 break;
 
+      case OPCODE_DDX:
+      case OPCODE_DDY:
+	 read0 = writemask;
+	 break;
+
       case OPCODE_MAD:	
       case OPCODE_CMP:
       case OPCODE_LRP:




More information about the mesa-commit mailing list