Mesa (8.0): i965: Compute dFdy() correctly for FBOs.

Ian Romanick idr at kemper.freedesktop.org
Tue Jul 31 19:29:22 UTC 2012


Module: Mesa
Branch: 8.0
Commit: c3ad361f47b5e6a9066935b08adc403896f6e95d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3ad361f47b5e6a9066935b08adc403896f6e95d

Author: Paul Berry <stereotype441 at gmail.com>
Date:   Wed Jun 20 13:40:45 2012 -0700

i965: Compute dFdy() correctly for FBOs.

On i965, dFdx() and dFdy() are computed by taking advantage of the
fact that each consecutive set of 4 pixels dispatched to the fragment
shader always constitutes a contiguous 2x2 block of pixels in a fixed
arrangement known as a "sub-span".  So we calculate dFdx() by taking
the difference between the values computed for the left and right
halves of the sub-span, and we calculate dFdy() by taking the
difference between the values computed for the top and bottom halves
of the sub-span.

However, there's a subtlety when FBOs are in use: since FBOs use a
coordinate system where the origin is at the upper left, and window
system framebuffers use a coordinate system where the origin is at the
lower left, the computation of dFdy() needs to be negated for FBOs.

This patch modifies the fragment shader back-ends to negate the value
of dFdy() when an FBO is in use.  It also modifies the code that
populates the program key (brw_wm_populate_key() and
brw_fs_precompile()) so that they always record in the program key
whether we are rendering to an FBO or to a window system framebuffer;
this ensures that the fragment shader will get recompiled when
switching between FBO and non-FBO use.

This will result in unnecessary recompiles of fragment shaders that
don't use dFdy().  To fix that, we will need to adapt the GLSL and
NV_fragment_program front-ends to record whether or not a given shader
uses dFdy().  I plan to implement this in a future patch series; I've
left FIXME comments in the code as a reminder.

Fixes Piglit test "fbo-deriv".

NOTE: This is a candidate for stable release branches.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
(cherry picked from commit 82d25963a838cfebdeb9b080169979329ee850ea)

---

 src/mesa/drivers/dri/i965/brw_fs.cpp      |   10 ++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h        |    3 ++-
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp |   14 +++++++++++---
 src/mesa/drivers/dri/i965/brw_wm.c        |   10 ++++++++++
 src/mesa/drivers/dri/i965/brw_wm.h        |    3 ++-
 src/mesa/drivers/dri/i965/brw_wm_emit.c   |   15 +++++++++++----
 6 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8584bb2..8c20190 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1878,6 +1878,13 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
    struct brw_context *brw = brw_context(ctx);
    struct brw_wm_prog_key key;
 
+   /* As a temporary measure we assume that all programs use dFdy() (and hence
+    * need to be compiled differently depending on whether we're rendering to
+    * an FBO).  FIXME: set this bool correctly based on the contents of the
+    * program.
+    */
+   bool program_uses_dfdy = true;
+
    if (!prog->_LinkedShaders[MESA_SHADER_FRAGMENT])
       return true;
 
@@ -1922,6 +1929,9 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
 
    if (fp->Base.InputsRead & FRAG_BIT_WPOS) {
       key.drawable_height = ctx->DrawBuffer->Height;
+   }
+
+   if ((fp->Base.InputsRead & FRAG_BIT_WPOS) || program_uses_dfdy) {
       key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index d43a782..bb42de1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -542,7 +542,8 @@ public:
 			   struct brw_reg src);
    void generate_discard(fs_inst *inst);
    void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
-   void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
+   void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+                     bool negate_value);
    void generate_spill(fs_inst *inst, struct brw_reg src);
    void generate_unspill(fs_inst *inst, struct brw_reg dst);
    void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index cc70904..ebead4f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -475,8 +475,13 @@ fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
    brw_ADD(p, dst, src0, negate(src1));
 }
 
+/* The negate_value boolean is used to negate the derivative computation for
+ * FBOs, since they place the origin at the upper left instead of the lower
+ * left.
+ */
 void
-fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+                         bool negate_value)
 {
    struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
 				 BRW_REGISTER_TYPE_F,
@@ -490,7 +495,10 @@ fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 				 BRW_WIDTH_4,
 				 BRW_HORIZONTAL_STRIDE_0,
 				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
-   brw_ADD(p, dst, src0, negate(src1));
+   if (negate_value)
+      brw_ADD(p, dst, src1, negate(src0));
+   else
+      brw_ADD(p, dst, src0, negate(src1));
 }
 
 void
@@ -913,7 +921,7 @@ fs_visitor::generate_code()
 	 generate_ddx(inst, dst, src[0]);
 	 break;
       case FS_OPCODE_DDY:
-	 generate_ddy(inst, dst, src[0]);
+	 generate_ddy(inst, dst, src[0], c->key.render_to_fbo);
 	 break;
 
       case FS_OPCODE_SPILL:
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 6227baa..11fb03d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -418,6 +418,13 @@ static void brw_wm_populate_key( struct brw_context *brw,
    GLuint line_aa;
    GLuint i;
 
+   /* As a temporary measure we assume that all programs use dFdy() (and hence
+    * need to be compiled differently depending on whether we're rendering to
+    * an FBO).  FIXME: set this bool correctly based on the contents of the
+    * program.
+    */
+   bool program_uses_dfdy = true;
+
    memset(key, 0, sizeof(*key));
 
    /* Build the index for table lookup
@@ -516,6 +523,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
     */
    if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) {
       key->drawable_height = ctx->DrawBuffer->Height;
+   }
+
+   if ((fp->program.Base.InputsRead & FRAG_BIT_WPOS) || program_uses_dfdy) {
       key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 8f1cb8c..2cde2a0 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -346,7 +346,8 @@ void emit_ddxy(struct brw_compile *p,
 	       const struct brw_reg *dst,
 	       GLuint mask,
 	       bool is_ddx,
-	       const struct brw_reg *arg0);
+	       const struct brw_reg *arg0,
+	       bool negate_value);
 void emit_delta_xy(struct brw_compile *p,
 		   const struct brw_reg *dst,
 		   GLuint mask,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 270e321..e565b71 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -457,12 +457,16 @@ void emit_frontfacing(struct brw_compile *p,
  * between each other.  We could probably do it like ddx and swizzle the right
  * order later, but bail for now and just produce
  * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ *
+ * The negate_value boolean is used to negate the d/dy computation for FBOs,
+ * since they place the origin at the upper left instead of the lower left.
  */
 void emit_ddxy(struct brw_compile *p,
 	       const struct brw_reg *dst,
 	       GLuint mask,
 	       bool is_ddx,
-	       const struct brw_reg *arg0)
+	       const struct brw_reg *arg0,
+               bool negate_value)
 {
    int i;
    struct brw_reg src0, src1;
@@ -498,7 +502,10 @@ void emit_ddxy(struct brw_compile *p,
 			   BRW_HORIZONTAL_STRIDE_0,
 			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 	 }
-	 brw_ADD(p, dst[i], src0, negate(src1));
+         if (negate_value)
+            brw_ADD(p, dst[i], src1, negate(src0));
+         else
+            brw_ADD(p, dst[i], src0, negate(src1));
       }
    }
    if (mask & SATURATE)
@@ -1739,11 +1746,11 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	 break;
 
       case OPCODE_DDX:
-	 emit_ddxy(p, dst, dst_flags, true, args[0]);
+	 emit_ddxy(p, dst, dst_flags, true, args[0], false);
 	 break;
 
       case OPCODE_DDY:
-	 emit_ddxy(p, dst, dst_flags, false, args[0]);
+	 emit_ddxy(p, dst, dst_flags, false, args[0], c->key.render_to_fbo);
 	 break;
 
       case OPCODE_DP2:




More information about the mesa-commit mailing list