[Mesa-dev] [PATCH v2] i965: fix textureGrad for cubemaps

Fri Sep 18 05:34:36 PDT 2015

On Fri, 2015-09-18 at 15:02 +0300, Tapani Pälli wrote:
> Fixes bugs exposed by commit
> 2b1cdb0eddb73f62e4848d4b64840067f1f70865 in:
>    ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_frag
> 
> No regressions observed in deqp, CTS or Piglit.
> 
> v2: address review feedback from Iago Toral:
>    - move rho calculation to else branch
>    - optimize dx and dy calculation
>    - fix documentation inconsistensies
> 
> Signed-off-by: Tapani Pälli <tapani.palli at intel.com>
> Signed-off-by: Kevin Rogovin <kevin.rogovin at intel.com>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91114
> Cc: "10.6 11.0" <mesa-stable at lists.freedesktop.org>
> ---
>  .../dri/i965/brw_lower_texture_gradients.cpp       | 200 +++++++++++++++++++--
>  1 file changed, 181 insertions(+), 19 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> index 7a5f983..03dc021 100644
> --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> @@ -48,6 +48,7 @@ public:
>  
>  private:
>     void emit(ir_variable *, ir_rvalue *);
> +   ir_variable *temp(void *ctx, const glsl_type *type, const char *name);
>  };
>  
>  /**
> @@ -60,6 +61,17 @@ lower_texture_grad_visitor::emit(ir_variable *var, ir_rvalue *value)
>     base_ir->insert_before(assign(var, value));
>  }
>  
> +/**
> + * Emit a temporary variable declaration
> + */
> +ir_variable *
> +lower_texture_grad_visitor::temp(void *ctx, const glsl_type *type, const char *name)
> +{
> +   ir_variable *var = new(ctx) ir_variable(type, name, ir_var_temporary);
> +   base_ir->insert_before(var);
> +   return var;
> +}
> +
>  static const glsl_type *
>  txs_type(const glsl_type *type)
>  {
> @@ -144,28 +156,178 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir)
>        new(mem_ctx) ir_variable(grad_type, "dPdy", ir_var_temporary);
>     emit(dPdy, mul(size, ir->lod_info.grad.dPdy));
>  
> -   /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
> -   ir_rvalue *rho;
> -   if (dPdx->type->is_scalar()) {
> -      rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
> -			       expr(ir_unop_abs, dPdy));
> -   } else {
> -      rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
> -			       expr(ir_unop_sqrt, dot(dPdy, dPdy)));
> -   }
> -
> -   /* lambda_base = log2(rho).  We're ignoring GL state biases for now.
> -    *
> -    * For cube maps the result of these formulas is giving us a value of rho
> -    * that is twice the value we should use, so divide it by 2 or,
> -    * alternatively, remove one unit from the result of the log2 computation.
> -    */
>     ir->op = ir_txl;
>     if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
> -      ir->lod_info.lod = expr(ir_binop_add,
> -                              expr(ir_unop_log2, rho),
> -                              new(mem_ctx) ir_constant(-1.0f));
> +      /* Cubemap texture lookups first generate a texture coordinate normalized
> +       * to [-1, 1] on the appropiate face. The appropiate face is determined
> +       * by which component has largest magnitude and its sign. The texture
> +       * coordinate is the quotient of the remaining texture coordinates against
> +       * that absolute value of the component of largest magnitude. This
> +       * division requires that the computing of the derivative of the texel
> +       * coordinate must use the quotient rule. The high level GLSL code is as
> +       * follows:
> +       *
> +       * Step 1: selection
> +       *
> +       * vec3 abs_p, Q, dQdx, dQdy;
> +       * abs_p = abs(ir->coordinate);
> +       * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
> +       *    Q = ir->coordinate.yzx;
> +       *    dQdx = ir->lod_info.grad.dPdx.yzx;
> +       *    dQdy = ir->lod_info.grad.dPdy.yzx;
> +       * }
> +       * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
> +       *    Q = ir->coordinate.xzy;
> +       *    dQdx = ir->lod_info.grad.dPdx.xzy;
> +       *    dQdy = ir->lod_info.grad.dPdy.xzy;
> +       * }
> +       * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
> +       *    Q = ir->coordinate;
> +       *    dQdx = ir->lod_info.grad.dPdx;
> +       *    dQdy = ir->lod_info.grad.dPdy;
> +       * }
> +       *
> +       * Step 2: use quotient rule to compute derivative. The normalized to
> +       * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
> +       * only concerned with the magnitudes of the derivatives whose values are
> +       * not affected by the sign. We drop the sign from the computation.
> +       *
> +       * vec2 dx, dy;
> +       * float recip;
> +       *
> +       * recip = 1.0 / Q.z;
> +       * dx = recip * ( dqdx.xy - Q.xy * (dQdx.z * recip) );
> +       * dy = recip * ( dqdy.xy - Q.xy * (dQdy.z * recip) );
                          ^^^^^^^^
                          dQdx and dQdy in the 2 lines above

> +       * Step 3: compute LOD. At this point we have the derivatives of the
> +       * texture coordinates normalized to [-1,1]. We take the LOD to be
> +       *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
> +       *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
> +       *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
> +       *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
> +       *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
> +       * where L is the dimension of the cubemap. The code is:
> +       *
> +       * float M, result;
> +       * M = max(dot(dx, dx), dot(dy, dy));
> +       * L = textureSize(sampler, 0).x;
> +       * result = -1.0 + 0.5 * log2(L * L * M);
> +       */
> +
> +/* Helpers to make code more human readable. */
> +#define EMIT(instr) base_ir->insert_before(instr)
> +#define THEN(irif, instr) irif->then_instructions.push_tail(instr)
> +#define CLONE(x) x->clone(mem_ctx, NULL)
> +
> +      ir_variable *abs_p = temp(mem_ctx, glsl_type::vec3_type, "abs_p");
> +
> +      EMIT(assign(abs_p, swizzle_for_size(abs(CLONE(ir->coordinate)), 3)));
> +
> +      ir_variable *Q = temp(mem_ctx, glsl_type::vec3_type, "Q");
> +      ir_variable *dQdx = temp(mem_ctx, glsl_type::vec3_type, "dQdx");
> +      ir_variable *dQdy = temp(mem_ctx, glsl_type::vec3_type, "dQdy");
> +
> +      /* unmodified dPdx, dPdy values */
> +      ir_rvalue *dPdx = ir->lod_info.grad.dPdx;
> +      ir_rvalue *dPdy = ir->lod_info.grad.dPdy;
> +
> +      /* 1. compute selector */
> +
> +      /* if (abs_p.x >= max(abs_p.y, abs_p.z))  ... */
> +      ir_if *branch_x =
> +         new(mem_ctx) ir_if(gequal(swizzle_x(abs_p),
> +                                   max2(swizzle_y(abs_p), swizzle_z(abs_p))));
> +
> +      /* Q = p.yzx;
> +       * dQdx = dPdx.yzx;
> +       * dQdy = dPdy.yzx;
> +       */
> +      int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0);
> +      THEN(branch_x, assign(Q, swizzle(CLONE(ir->coordinate), yzx, 3)));
> +      THEN(branch_x, assign(dQdx, swizzle(CLONE(dPdx), yzx, 3)));
> +      THEN(branch_x, assign(dQdy, swizzle(CLONE(dPdy), yzx, 3)));
> +      EMIT(branch_x);
> +
> +      /* if (abs_p.y >= max(abs_p.x, abs_p.z)) */
> +      ir_if *branch_y =
> +         new(mem_ctx) ir_if(gequal(swizzle_y(abs_p),
> +                                   max2(swizzle_x(abs_p), swizzle_z(abs_p))));
> +
> +      /* Q = p.xzy;
> +       * dQdx = dPdx.xzy;
> +       * dQdy = dPdy.xzy;
> +       */
> +      int xzy = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Z, SWIZZLE_Y, 0);
> +      THEN(branch_y, assign(Q, swizzle(CLONE(ir->coordinate), xzy, 3)));
> +      THEN(branch_y, assign(dQdx, swizzle(CLONE(dPdx), xzy, 3)));
> +      THEN(branch_y, assign(dQdy, swizzle(CLONE(dPdy), xzy, 3)));
> +      EMIT(branch_y);
> +
> +      /* if (abs_p.z >= max(abs_p.x, abs_p.y)) */
> +      ir_if *branch_z =
> +         new(mem_ctx) ir_if(gequal(swizzle_z(abs_p),
> +                            max2(swizzle_x(abs_p), swizzle_y(abs_p))));
> +
> +      /* Q = p;
> +       * dQdx = dPdx;
> +       * dQdy = dPdy;
> +       */
> +      THEN(branch_z, assign(Q, swizzle_for_size(CLONE(ir->coordinate), 3)));
> +      THEN(branch_z, assign(dQdx, CLONE(dPdx)));
> +      THEN(branch_z, assign(dQdy, CLONE(dPdy)));
> +      EMIT(branch_z);
> +
> +      /* 2. quotient rule */
> +      ir_variable *recip = temp(mem_ctx, glsl_type::float_type, "recip");
> +      EMIT(assign(recip, div(new(mem_ctx) ir_constant(1.0f), swizzle_z(Q))));
> +
> +      ir_variable *dx = temp(mem_ctx, glsl_type::vec2_type, "dx");
> +      ir_variable *dy = temp(mem_ctx, glsl_type::vec2_type, "dy");
> +
> +      /* dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
> +       * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
> +       */

Maybe update the comment above so it reflects the change you just added
in this version:

/* dx = recip * ( dQdx.xy - (Q.xy * recip) * dQdx.z) );
 * dy = recip * ( dQdy.xy - (Q.xy * recip) * dQdy.z) );
 */

> +      ir_variable *tmp = temp(mem_ctx, glsl_type::vec2_type, "tmp");
> +      EMIT(assign(tmp, mul(swizzle_xy(Q), recip)));
> +      EMIT(assign(dx, mul(recip, sub(swizzle_xy(dQdx),
> +                                     mul(tmp, swizzle_z(dQdx))))));
> +      EMIT(assign(dy, mul(recip, sub(swizzle_xy(dQdy),
> +                                     mul(tmp, swizzle_z(dQdy))))));
> +
> +      /* M = max(dot(dx, dx), dot(dy, dy)); */
> +      ir_variable *M = temp(mem_ctx, glsl_type::float_type, "M");
> +      EMIT(assign(M, max2(dot(dx, dx), dot(dy, dy))));
> +
> +      /* size has textureSize() of LOD 0 */
> +      ir_variable *L = temp(mem_ctx, glsl_type::float_type, "L");
> +      EMIT(assign(L, swizzle_x(size)));
> +
> +      ir_variable *result = temp(mem_ctx, glsl_type::float_type, "result");
> +
> +      /* result = -1.0 + 0.5 * log2(L * L * M); */
> +      EMIT(assign(result,
> +                  add(new(mem_ctx)ir_constant(-1.0f),
> +                      mul(new(mem_ctx)ir_constant(0.5f),
> +                          expr(ir_unop_log2, mul(mul(L, L), M))))));
> +
> +      /* 3. final assignment of parameters to textureLod call */
> +      ir->lod_info.lod = new (mem_ctx) ir_dereference_variable(result);
> +
> +#undef THEN
> +#undef EMIT
> +
>     } else {
> +      /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
> +      ir_rvalue *rho;
> +      if (dPdx->type->is_scalar()) {
> +         rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
> +                    expr(ir_unop_abs, dPdy));
> +      } else {
> +         rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
> +                    expr(ir_unop_sqrt, dot(dPdy, dPdy)));
> +      }
> +
> +      /* lambda_base = log2(rho).  We're ignoring GL state biases for now. */
>        ir->lod_info.lod = expr(ir_unop_log2, rho);
>     }

Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>

It would be nice if you could also add a reference to any spec/document
where the underlying Math is explained (if there is a specific source
material that Kevin used for it). If there is such thing, I would add
that in the comment where you enumerate the steps, right before Step 1.

Iago