[Mesa-dev] [PATCH v2 2/7] nir/lower_tex: add lowering for texture gradient on cube maps

Mon Dec 12 13:11:43 UTC 2016

This is ported from the Intel lowering pass that we use with GLSL IR.
The NIR pass only handles cube maps, not shadow samplers, which are
also lowered for gen < 8 on Intel hardware. We will add support for
that in a later patch, at which point we should be able to remove
the GLSL IR lowering pass.

v2:
- added a helper to retrieve ddx/ddy parameters (Ken)
- No need to make size.z=1.0, we are only using component x anyway (Iago)
---
 src/compiler/nir/nir.h           |   5 +
 src/compiler/nir/nir_lower_tex.c | 265 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 270 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 544d4ba..600e3d6 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2394,6 +2394,11 @@ typedef struct nir_lower_tex_options {
     * of the texture are lowered to linear.
     */
    unsigned lower_srgb;
+
+   /**
+    * If true, lower nir_texop_txd on cube maps with nir_texop_txl.
+    */
+   bool lower_txd_cube_map;
 } nir_lower_tex_options;
 
 bool nir_lower_tex(nir_shader *shader,
diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
index ccca59b..da024e2 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -305,6 +305,265 @@ lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex)
 }
 
 static void
+get_ddx_ddy(nir_tex_instr *tex, nir_ssa_def **ddx, nir_ssa_def **ddy)
+{
+   for (int i = 0; i < tex->num_srcs; i++) {
+      switch (tex->src[i].src_type) {
+      case nir_tex_src_ddx:
+         *ddx = tex->src[i].src.ssa;
+         break;
+      case nir_tex_src_ddy:
+         *ddy = tex->src[i].src.ssa;
+         break;
+      default:
+         break;
+      }
+   }
+}
+
+/*
+ * Emits a textureLod operation used to replace an existing
+ * textureGrad instruction.
+ */
+static void
+replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
+{
+   /* Check whether we need projector, shadow comparitor or offset */
+   int comparitor_index = -1;
+   int projector_index = -1;
+   int offset_index = -1;
+   int extra_srcs = 0;
+   for (int i = 0; i < tex->num_srcs; i++) {
+      switch (tex->src[i].src_type) {
+      case nir_tex_src_projector:
+         projector_index = i;
+         extra_srcs++;
+         break;
+      case nir_tex_src_offset:
+         offset_index = i;
+         extra_srcs++;
+         break;
+      case nir_tex_src_comparitor:
+         comparitor_index = i;
+         extra_srcs++;
+         break;
+      default:
+         break;
+      }
+   }
+
+   /* Emit textureLod(). Coordinate and the lod parameters are always
+    * required.
+    */
+   int num_srcs = 2 + extra_srcs;
+   nir_tex_instr *txl = nir_tex_instr_create(b->shader, num_srcs);
+
+   txl->op = nir_texop_txl;
+   txl->sampler_dim = tex->sampler_dim;
+   txl->texture_index = tex->texture_index;
+   txl->dest_type = tex->dest_type;
+   txl->is_array = tex->is_array;
+   txl->is_shadow = tex->is_shadow;
+   txl->is_new_style_shadow = tex->is_new_style_shadow;
+   txl->sampler_index = tex->sampler_index;
+   txl->texture = (nir_deref_var *)
+      nir_copy_deref(txl, &tex->texture->deref);
+   txl->sampler = (nir_deref_var *)
+      nir_copy_deref(txl, &tex->sampler->deref);
+   txl->coord_components = tex->coord_components;
+
+   nir_ssa_dest_init(&txl->instr, &txl->dest, 4, 32, NULL);
+
+   int src_num = 0;
+   assert(tex->src[0].src_type == nir_tex_src_coord);
+   nir_src_copy(&txl->src[src_num].src, &tex->src[0].src, txl);
+   txl->src[src_num].src_type = nir_tex_src_coord;
+   src_num++;
+
+   if (projector_index >= 0) {
+      assert(tex->src[projector_index].src_type == nir_tex_src_projector);
+      nir_src_copy(&txl->src[src_num].src,
+                   &tex->src[projector_index].src, txl);
+      txl->src[src_num].src_type = nir_tex_src_projector;
+      src_num++;
+   }
+
+   if (comparitor_index >= 0) {
+      assert(tex->src[comparitor_index].src_type == nir_tex_src_comparitor);
+      nir_src_copy(&txl->src[src_num].src,
+                   &tex->src[comparitor_index].src, txl);
+      txl->src[src_num].src_type = nir_tex_src_comparitor;
+      src_num++;
+   }
+
+   if (offset_index >= 0) {
+      assert(tex->src[offset_index].src_type == nir_tex_src_offset);
+      nir_src_copy(&txl->src[src_num].src,
+                   &tex->src[offset_index].src, txl);
+      txl->src[src_num].src_type = nir_tex_src_offset;
+      src_num++;
+   }
+
+   txl->src[src_num].src = nir_src_for_ssa(lod);
+   txl->src[src_num].src_type = nir_tex_src_lod;
+   src_num++;
+
+   assert(src_num == num_srcs);
+
+   nir_ssa_dest_init(&txl->instr, &txl->dest, 4, 32, NULL);
+   nir_builder_instr_insert(b, &txl->instr);
+
+   nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(&txl->dest.ssa));
+
+   nir_instr_remove(&tex->instr);
+}
+
+static void
+lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
+{
+   assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
+   assert(tex->op == nir_texop_txd);
+   assert(tex->dest.is_ssa);
+
+   /* Use textureSize() to get the width and height of LOD 0 */
+   nir_ssa_def *size = get_texture_size(b, tex);
+
+   /* Cubemap texture lookups first generate a texture coordinate normalized
+    * to [-1, 1] on the appropiate face. The appropiate face is determined
+    * by which component has largest magnitude and its sign. The texture
+    * coordinate is the quotient of the remaining texture coordinates against
+    * that absolute value of the component of largest magnitude. This
+    * division requires that the computing of the derivative of the texel
+    * coordinate must use the quotient rule. The high level GLSL code is as
+    * follows:
+    *
+    * Step 1: selection
+    *
+    * vec3 abs_p, Q, dQdx, dQdy;
+    * abs_p = abs(ir->coordinate);
+    * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
+    *    Q = ir->coordinate.yzx;
+    *    dQdx = ir->lod_info.grad.dPdx.yzx;
+    *    dQdy = ir->lod_info.grad.dPdy.yzx;
+    * }
+    * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
+    *    Q = ir->coordinate.xzy;
+    *    dQdx = ir->lod_info.grad.dPdx.xzy;
+    *    dQdy = ir->lod_info.grad.dPdy.xzy;
+    * }
+    * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
+    *    Q = ir->coordinate;
+    *    dQdx = ir->lod_info.grad.dPdx;
+    *    dQdy = ir->lod_info.grad.dPdy;
+    * }
+    *
+    * Step 2: use quotient rule to compute derivative. The normalized to
+    * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
+    * only concerned with the magnitudes of the derivatives whose values are
+    * not affected by the sign. We drop the sign from the computation.
+    *
+    * vec2 dx, dy;
+    * float recip;
+    *
+    * recip = 1.0 / Q.z;
+    * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
+    * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
+    *
+    * Step 3: compute LOD. At this point we have the derivatives of the
+    * texture coordinates normalized to [-1,1]. We take the LOD to be
+    *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
+    *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
+    *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
+    *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
+    *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
+    * where L is the dimension of the cubemap. The code is:
+    *
+    * float M, result;
+    * M = max(dot(dx, dx), dot(dy, dy));
+    * L = textureSize(sampler, 0).x;
+    * result = -1.0 + 0.5 * log2(L * L * M);
+    */
+
+   /* coordinate */
+   nir_ssa_def *p = tex->src[0].src.ssa;
+
+   /* unmodified dPdx, dPdy values */
+   nir_ssa_def *dPdx, *dPdy;
+   get_ddx_ddy(tex, &dPdx, &dPdy);
+
+   nir_ssa_def *abs_p = nir_fabs(b, p);
+   nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
+   nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
+   nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
+
+   /* 1. compute selector */
+   nir_ssa_def *Q, *dQdx, *dQdy;
+
+   nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
+   nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
+
+   unsigned yzx[4] = { 1, 2, 0, 0 };
+   unsigned xzy[4] = { 0, 2, 1, 0 };
+
+   Q = nir_bcsel(b, cond_z,
+                 p,
+                 nir_bcsel(b, cond_y,
+                           nir_swizzle(b, p, xzy, 3, false),
+                           nir_swizzle(b, p, yzx, 3, false)));
+
+   dQdx = nir_bcsel(b, cond_z,
+                    dPdx,
+                    nir_bcsel(b, cond_y,
+                              nir_swizzle(b, dPdx, xzy, 3, false),
+                              nir_swizzle(b, dPdx, yzx, 3, false)));
+
+   dQdy = nir_bcsel(b, cond_z,
+                    dPdy,
+                    nir_bcsel(b, cond_y,
+                              nir_swizzle(b, dPdy, xzy, 3, false),
+                              nir_swizzle(b, dPdy, yzx, 3, false)));
+
+   /* 2. quotient rule */
+
+   /* tmp = Q.xy * recip;
+    * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
+    * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
+    */
+   nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
+
+   unsigned xy[4] = { 0, 1, 0, 0 };
+   nir_ssa_def *Q_xy = nir_swizzle(b, Q, xy, 2, false);
+   nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
+
+   nir_ssa_def *dQdx_xy = nir_swizzle(b, dQdx, xy, 2, false);
+   nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
+   nir_ssa_def *dx =
+      nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
+
+   nir_ssa_def *dQdy_xy = nir_swizzle(b, dQdy, xy, 2, false);
+   nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
+   nir_ssa_def *dy =
+      nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
+
+   /* M = max(dot(dx, dx), dot(dy, dy)); */
+   nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
+
+   /* size has textureSize() of LOD 0 */
+   nir_ssa_def *L = nir_channel(b, size, 0);
+
+   /* lod = -1.0 + 0.5 * log2(L * L * M); */
+   nir_ssa_def *lod =
+      nir_fadd(b,
+               nir_imm_float(b, -1.0f),
+               nir_fmul(b,
+                        nir_imm_float(b, 0.5f),
+                        nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
+
+   /* 3. Replace the gradient instruction with an equivalent lod instruction */
+   replace_gradient_with_lod(b, lod, tex);
+}
+
+static void
 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
 {
    b->cursor = nir_before_instr(&tex->instr);
@@ -529,6 +788,12 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
          linearize_srgb_result(b, tex);
          progress = true;
       }
+
+      if (tex->op == nir_texop_txd && options->lower_txd_cube_map &&
+          tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+         lower_gradient_cube_map(b, tex);
+         progress = true;
+      }
    }
 
    return progress;
-- 
2.7.4