[Mesa-dev] [PATCH 5/6] nir/lower_tex: add support to clamp texture coords

Fri Sep 18 09:24:37 PDT 2015

On Fri, Sep 18, 2015 at 11:55 AM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> On Friday, September 18, 2015 10:55:08 AM Rob Clark wrote:
>> From: Rob Clark <robclark at freedesktop.org>
>>
>> Some hardware needs to clamp texture coordinates to [0.0, 1.0] in the
>> shader to emulate GL_CLAMP.  This is added to lower_tex_proj since, in
>> the case of projected coords, the clamping needs to happen *after*
>> projection.
>>
>> v2: comments/suggestions from Ilia and Eric, use txs to get texture size
>> and clamp RECT textures to their dimensions rather than [0.0, 1.0] to
>> avoid having to lower RECT textures to 2D.
>>
>> Signed-off-by: Rob Clark <robclark at freedesktop.org>
>> ---
>>  src/glsl/nir/nir.h           | 18 ++++++++++
>>  src/glsl/nir/nir_lower_tex.c | 86 +++++++++++++++++++++++++++++++++++++++++++-
>>  2 files changed, 103 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
>> index 3c908b9..255d455 100644
>> --- a/src/glsl/nir/nir.h
>> +++ b/src/glsl/nir/nir.h
>> @@ -1850,6 +1850,24 @@ typedef struct nir_lower_tex_options {
>>      * texture dims to normalize.
>>      */
>>     bool lower_rect;
>> +
>> +   /**
>> +    * To emulate certain texture wrap modes, this can be used
>> +    * to saturate the specified tex coord to [0.0, 1.0].  The
>> +    * bits are according to sampler #, ie. if, for example:
>> +    *
>> +    *   (conf->saturate_s & (1 << n))
>> +    *
>> +    * is true, then the s coord for sampler n is saturated.
>> +    *
>> +    * Note that clamping must happen *after* projector lowering
>> +    * so any projected texture sample instruction with a clamped
>> +    * coordinate gets automatically lowered, regardless of the
>> +    * 'lower_txp' setting.
>> +    */
>> +   unsigned saturate_s;
>> +   unsigned saturate_t;
>> +   unsigned saturate_r;
>>  } nir_lower_tex_options;
>>
>>  void nir_lower_tex(nir_shader *shader,
>> diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
>> index a71a5c5..7c7a077 100644
>> --- a/src/glsl/nir/nir_lower_tex.c
>> +++ b/src/glsl/nir/nir_lower_tex.c
>> @@ -29,6 +29,10 @@
>>   *     asking the texture operation to do so.
>>   *   + lowering RECT: converts the un-normalized RECT texture coordinates
>>   *     to normalized coordinates with txs plus ALU instructions
>> + *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
>> + *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
>> + *     Note that this automatically triggers texture projector lowering if
>> + *     needed, since clamping must happen after projector lowering.
>>   */
>>
>>  #include "nir.h"
>> @@ -164,6 +168,70 @@ lower_rect(nir_builder *b, nir_tex_instr *tex)
>>     tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
>>  }
>>
>> +static void
>> +saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
>> +{
>> +   b->cursor = nir_before_instr(&tex->instr);
>> +
>> +   /* Walk through the sources saturating the requested arguments. */
>> +   for (unsigned i = 0; i < tex->num_srcs; i++) {
>> +      if (tex->src[i].src_type != nir_tex_src_coord)
>> +         continue;
>> +
>> +      nir_ssa_def *src =
>> +         nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
>> +
>> +      /* split src into components: */
>> +      nir_ssa_def *comp[4];
>> +
>> +      for (unsigned j = 0; j < tex->coord_components; j++)
>> +         comp[j] = nir_channel(b, src, j);
>> +
>> +      /* clamp requested components, array index does not get clamped: */
>> +      unsigned ncomp = tex->coord_components;
>> +      if (tex->is_array)
>> +         ncomp--;
>> +
>> +      for (unsigned j = 0; j < ncomp; j++) {
>> +         if ((1 << j) & sat_mask) {
>> +            if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
>> +               /* non-normalized texture coords, so clamp to texture
>> +                * size rather than [0.0, 1.0]
>> +                */
>> +               nir_ssa_def *txs = get_texture_size(b, tex);
>> +               comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
>> +               comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
>> +            } else {
>> +               comp[j] = nir_fsat(b, comp[j]);
>> +            }
>> +         }
>> +      }
>
> You might be able to do this with vector operations instead of
> scalarizing...but I'm not sure if it would actually be better.  *shrug*

the problem is that sat_mask might not have all bits set.. I suppose
with a bit more logic I could handle the case that all components are
clamped, but I think everyone is converting to scalar (and then
possibly converting back), so I wasn't really thinking that it was
worth the extra logic..

>> +
>> +      /* and move the result back into a single vecN: */
>> +      switch (tex->coord_components) {
>> +      case 4:
>> +         src = nir_vec4(b, comp[0], comp[1], comp[2], comp[3]);
>> +         break;
>> +      case 3:
>> +         src = nir_vec3(b, comp[0], comp[1], comp[2]);
>> +         break;
>> +      case 2:
>> +         src = nir_vec2(b, comp[0], comp[1]);
>> +         break;
>> +      case 1:
>> +         src = comp[0];
>> +         break;
>> +      default:
>> +         unreachable("bad texture coord count");
>> +         break;
>> +      }
>
> A nir_vec() helper that takes the array and num_components would make a
> nice addition to nir_builder.  You could then do src = nir_vec4(b, comp,
> tex->coord_components).

hmm, looks like we could use that in nir_lower_load_const_to_scalar.c
too.. would be nice if we could get a helper that would deal w/ the
other switch(num_components) (where we re-assemble projected and
unprojected), and maybe the vaguely similar thing in
nir_normalize_cubemap_coords.c (re-assembling normalized and
orig_coord).. although not quite sure what that would look like.

I guess I'll put together a patch on top of this which handles the
first two cases at least..

BR,
-R

> Without any changes, this is:
> Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
>
>> +
>> +      nir_instr_rewrite_src(&tex->instr,
>> +                            &tex->src[i].src,
>> +                            nir_src_for_ssa(src));
>> +   }
>> +}
>> +
>>  static bool
>>  nir_lower_tex_block(nir_block *block, void *void_state)
>>  {
>> @@ -177,12 +245,28 @@ nir_lower_tex_block(nir_block *block, void *void_state)
>>        nir_tex_instr *tex = nir_instr_as_tex(instr);
>>        bool lower_txp = !!(state->options->lower_txp & (1 << tex->sampler_dim));
>>
>> -      if (lower_txp)
>> +      /* mask of src coords to saturate (clamp): */
>> +      unsigned sat_mask = 0;
>> +
>> +      if ((1 << tex->sampler_index) & state->options->saturate_r)
>> +         sat_mask |= (1 << 2);    /* .z */
>> +      if ((1 << tex->sampler_index) & state->options->saturate_t)
>> +         sat_mask |= (1 << 1);    /* .y */
>> +      if ((1 << tex->sampler_index) & state->options->saturate_s)
>> +         sat_mask |= (1 << 0);    /* .x */
>> +
>> +      /* If we are clamping any coords, we must lower projector first
>> +       * as clamping happens *after* projection:
>> +       */
>> +      if (lower_txp || sat_mask)
>>           project_src(b, tex);
>>
>>        if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) &&
>>            state->options->lower_rect)
>>           lower_rect(b, tex);
>> +
>> +      if (sat_mask)
>> +         saturate_src(b, tex, sat_mask);
>>     }
>>
>>     return true;
>>