[Mesa-dev] [PATCH 04/17] nir: add lowering pass for y-transform

Thu May 12 23:46:20 UTC 2016

On Thu, May 12, 2016 at 7:11 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> On Monday, May 9, 2016 3:33:52 PM PDT Rob Clark wrote:
>> From: Rob Clark <robclark at freedesktop.org>
>>
>> Signed-off-by: Rob Clark <robclark at freedesktop.org>
>> Reviewed-by: Connor Abbott <cwabbott0 at gmail.com>
>> ---
>>  src/compiler/Makefile.sources                |   1 +
>>  src/compiler/nir/nir.h                       |  11 +
>>  src/compiler/nir/nir_lower_wpos_ytransform.c | 310 ++++++++++++++++++++++++
> +++
>>  3 files changed, 322 insertions(+)
>>  create mode 100644 src/compiler/nir/nir_lower_wpos_ytransform.c
>>
>> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
>> index 2a52319..b542a1a 100644
>> --- a/src/compiler/Makefile.sources
>> +++ b/src/compiler/Makefile.sources
>> @@ -208,6 +208,7 @@ NIR_FILES = \
>>       nir/nir_lower_vars_to_ssa.c \
>>       nir/nir_lower_var_copies.c \
>>       nir/nir_lower_vec_to_movs.c \
>> +     nir/nir_lower_wpos_ytransform.c \
>>       nir/nir_metadata.c \
>>       nir/nir_move_vec_src_uses_to_dest.c \
>>       nir/nir_normalize_cubemap_coords.c \
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index 8a616d4..474ba63 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -2374,6 +2374,17 @@ void nir_lower_two_sided_color(nir_shader *shader);
>>
>>  void nir_lower_clamp_color_outputs(nir_shader *shader);
>>
>> +typedef struct nir_lower_wpos_ytransform_options {
>> +   int state_tokens[5];
>> +   bool fs_coord_origin_upper_left :1;
>> +   bool fs_coord_origin_lower_left :1;
>> +   bool fs_coord_pixel_center_integer :1;
>> +   bool fs_coord_pixel_center_half_integer :1;
>> +} nir_lower_wpos_ytransform_options;
>> +
>> +bool nir_lower_wpos_ytransform(nir_shader *shader,
>> +                               const nir_lower_wpos_ytransform_options
> *options);
>> +
>>  void nir_lower_atomics(nir_shader *shader,
>>                         const struct gl_shader_program *shader_program);
>>  void nir_lower_to_source_mods(nir_shader *shader);
>> diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c b/src/compiler/
> nir/nir_lower_wpos_ytransform.c
>> new file mode 100644
>> index 0000000..1d53530
>> --- /dev/null
>> +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c
>> @@ -0,0 +1,310 @@
>> +/*
>> + * Copyright © 2015 Red Hat
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the
> "Software"),
>> + * to deal in the Software without restriction, including without
> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
> next
>> + * paragraph) shall be included in all copies or substantial portions of
> the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> IN THE
>> + * SOFTWARE.
>> + */
>> +
>> +#include "nir.h"
>> +#include "nir_builder.h"
>> +
>> +/* Lower gl_FragCoord (and fddy) to account for driver's requested
> coordinate-
>> + * origin and pixel-center vs. shader.  If transformation is required, a
>> + * gl_FbWposYTransform uniform is inserted (with the specified state-slots)
>> + * and additional instructions are inserted to transform gl_FragCoord (and
>> + * fddy src arg).
>> + *
>> + * This is based on the logic in emit_wpos()/emit_wpos_adjustment() in TGSI
>> + * compiler.
>> + *
>> + * Run before nir_lower_io.
>> + */
>> +
>> +typedef struct {
>> +   const nir_lower_wpos_ytransform_options *options;
>> +   nir_shader   *shader;
>> +   nir_builder   b;
>> +   nir_variable *transform;
>> +} lower_wpos_ytransform_state;
>> +
>> +static nir_ssa_def *
>> +get_transform(lower_wpos_ytransform_state *state)
>> +{
>> +   if (state->transform == NULL) {
>> +      /* NOTE: name must be prefixed w/ "gl_" to trigger slot based
>> +       * special handling in uniform setup:
>> +       */
>> +      nir_variable *var = nir_variable_create(state->shader,
>> +                                              nir_var_uniform,
>> +                                              glsl_vec4_type(),
>> +                                              "gl_FbWposYTransform");
>> +
>> +      var->num_state_slots = 1;
>> +      var->state_slots = ralloc_array(var, nir_state_slot, 1);
>> +      memcpy(var->state_slots[0].tokens, state->options->state_tokens,
>> +             sizeof(var->state_slots[0].tokens));
>> +
>> +      state->transform = var;
>> +   }
>> +   return nir_load_var(&state->b, state->transform);
>> +}
>> +
>> +/* NIR equiv of TGSI CMP instruction: */
>> +static nir_ssa_def *
>> +nir_cmp(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def
> *src2)
>> +{
>> +   return nir_bcsel(b, nir_flt(b, src0, nir_imm_float(b, 0.0)), src1,
> src2);
>> +}
>> +
>> +/* see emit_wpos_adjustment() in st_mesa_to_tgsi.c */
>> +static void
>> +emit_wpos_adjustment(lower_wpos_ytransform_state *state,
>> +                     nir_intrinsic_instr *intr,
>> +                     bool invert, float adjX, float adjY[2])
>> +{
>> +   nir_builder *b = &state->b;
>> +   nir_variable *fragcoord = intr->variables[0]->var;
>> +   nir_ssa_def *wpostrans, *wpos_temp, *wpos_temp_y, *wpos_input;
>> +
>> +   assert(intr->dest.is_ssa);
>> +
>> +   b->cursor = nir_before_instr(&intr->instr);
>> +
>> +   wpostrans = get_transform(state);
>> +   wpos_input = nir_load_var(b, fragcoord);
>> +
>> +   /* First, apply the coordinate shift: */
>> +   if (adjX || adjY[0] || adjY[1]) {
>> +      if (adjY[0] != adjY[1]) {
>> +         /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
>> +          * depending on whether inversion is actually going to be applied
>> +          * or not, which is determined by testing against the inversion
>> +          * state variable used below, which will be either +1 or -1.
>> +          */
>> +         nir_ssa_def *adj_temp;
>> +
>> +         adj_temp = nir_cmp(b,
>> +                            nir_channel(b, wpostrans, invert ? 2 : 0),
>> +                            nir_imm_vec4(b, adjX, adjY[0], 0.0f, 0.0f),
>> +                            nir_imm_vec4(b, adjX, adjY[1], 0.0f, 0.0f));
>> +
>> +         wpos_temp = nir_fadd(b, wpos_input, adj_temp);
>> +      } else {
>> +         wpos_temp = nir_fadd(b,
>> +                              wpos_input,
>> +                              nir_imm_vec4(b, adjX, adjY[0], 0.0f, 0.0f));
>> +      }
>> +      wpos_input = wpos_temp;
>> +   } else {
>> +      /* MOV wpos_temp, input[wpos]
>> +       */
>> +      wpos_temp = wpos_input;
>> +   }
>> +
>> +   /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
>> +    * inversion/identity, or the other way around if we're drawing to an
> FBO.
>> +    */
>> +   if (invert) {
>> +      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
>> +       */
>> +      wpos_temp_y = nir_ffma(b,
>> +                             nir_channel(b, wpos_temp, 1),
>> +                             nir_channel(b, wpostrans, 0),
>> +                             nir_channel(b, wpostrans, 1));
>> +   } else {
>> +      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
>> +       */
>> +      wpos_temp_y = nir_ffma(b,
>> +                             nir_channel(b, wpos_temp, 1),
>> +                             nir_channel(b, wpostrans, 2),
>> +                             nir_channel(b, wpostrans, 3));
>> +   }
>> +
>> +   wpos_temp = nir_vec4(b,
>> +                        nir_channel(b, wpos_temp, 0),
>> +                        wpos_temp_y,
>> +                        nir_channel(b, wpos_temp, 2),
>> +                        nir_channel(b, wpos_temp, 3));
>> +
>> +   nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(wpos_temp));
>> +}
>> +
>> +static void
>> +lower_fragcoord(lower_wpos_ytransform_state *state, nir_intrinsic_instr
> *intr)
>> +{
>> +   const nir_lower_wpos_ytransform_options *options = state->options;
>> +   nir_variable *fragcoord = intr->variables[0]->var;
>> +   float adjX = 0.0f;
>> +   float adjY[2] = { 0.0f, 0.0f };
>> +   bool invert = false;
>> +
>> +   /* Based on logic in emit_wpos():
>> +    *
>> +    * Query the pixel center conventions supported by the pipe driver and
> set
>> +    * adjX, adjY to help out if it cannot handle the requested one
> internally.
>> +    *
>> +    * The bias of the y-coordinate depends on whether y-inversion takes
> place
>> +    * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we
> are
>> +    * drawing to an FBO (causes additional inversion), and whether the the
> pipe
>> +    * driver origin and the requested origin differ (the latter condition
> is
>> +    * stored in the 'invert' variable).
>> +    *
>> +    * For height = 100 (i = integer, h = half-integer, l = lower, u =
> upper):
>> +    *
>> +    * center shift only:
>> +    * i -> h: +0.5
>> +    * h -> i: -0.5
>> +    *
>> +    * inversion only:
>> +    * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
>> +    * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
>> +    * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
>> +    * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
>> +    *
>> +    * inversion and center shift:
>> +    * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
>> +    * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
>> +    * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
>> +    * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
>> +    */
>> +
>> +   if (fragcoord->data.origin_upper_left) {
>> +      /* Fragment shader wants origin in upper-left */
>> +      if (options->fs_coord_origin_upper_left) {
>> +         /* the driver supports upper-left origin */
>> +      } else if (options->fs_coord_origin_lower_left) {
>> +         /* the driver supports lower-left origin, need to invert Y */
>> +         invert = true;
>> +      } else {
>> +         unreachable("invalid options");
>> +      }
>> +   } else {
>> +      /* Fragment shader wants origin in lower-left */
>> +      if (options->fs_coord_origin_lower_left) {
>> +         /* the driver supports lower-left origin */
>> +      } else if (options->fs_coord_origin_upper_left) {
>> +         /* the driver supports upper-left origin, need to invert Y */
>> +         invert = true;
>> +      } else {
>> +         unreachable("invalid options");
>> +      }
>> +   }
>> +
>> +   if (fragcoord->data.pixel_center_integer) {
>> +      /* Fragment shader wants pixel center integer */
>> +      if (options->fs_coord_pixel_center_integer) {
>> +         /* the driver supports pixel center integer */
>> +         adjY[1] = 1.0f;
>> +      } else if (options->fs_coord_pixel_center_half_integer) {
>> +         /* the driver supports pixel center half integer, need to bias X,Y
> */
>> +         adjX = -0.5f;
>> +         adjY[0] = -0.5f;
>> +         adjY[1] = 0.5f;
>> +      } else {
>> +         unreachable("invalid options");
>> +      }
>> +   } else {
>> +      /* Fragment shader wants pixel center half integer */
>> +      if (options->fs_coord_pixel_center_half_integer) {
>> +         /* the driver supports pixel center half integer */
>> +      } else if (options->fs_coord_pixel_center_integer) {
>> +         /* the driver supports pixel center integer, need to bias X,Y */
>> +         adjX = adjY[0] = adjY[1] = 0.5f;
>> +      } else {
>> +         unreachable("invalid options");
>> +      }
>> +   }
>> +
>> +   emit_wpos_adjustment(state, intr, invert, adjX, adjY);
>> +}
>> +
>> +/* turns 'fddy(p)' into 'fddy(fmul(p, transform.x))' */
>> +static void
>> +lower_fddy(lower_wpos_ytransform_state *state, nir_alu_instr *fddy)
>> +{
>> +   nir_builder *b = &state->b;
>> +   nir_ssa_def *p, *pt, *trans;
>> +
>> +   b->cursor = nir_before_instr(&fddy->instr);
>> +
>> +   p = nir_ssa_for_alu_src(b, fddy, 0);
>> +   trans = get_transform(state);
>> +   pt = nir_fmul(b, p, nir_channel(b, trans, 0));
>> +
>> +   nir_instr_rewrite_src(&fddy->instr,
>> +                         &fddy->src[0].src,
>> +                         nir_src_for_ssa(pt));
>> +}
>> +
>> +static bool
>> +lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block
> *block)
>> +{
>> +   nir_foreach_instr_safe(instr, block) {
>> +      if (instr->type == nir_instr_type_intrinsic) {
>> +         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
>> +         if (intr->intrinsic == nir_intrinsic_load_var) {
>> +            nir_deref_var *dvar = intr->variables[0];
>> +            nir_variable *var = dvar->var;
>> +
>> +            if (strcmp(var->name, "gl_FragCoord") == 0) {
>
> How about
>
>    if (var->data.location == VARYING_SLOT_POS)
>
> as it's cheaper than strcmp()?

I have this nagging feeling that I used strcmp for a reason, but don't
remember why..  (although that might have also been a different pass
I'm thinking of..  that's the problem when patches live on lists for
so long after I wrote them)..  I'll give a try w/ piglit first with
some asserts..

>> +               /* gl_FragCoord should not have array/struct deref's: */
>> +               assert(dvar->deref.child == NULL);
>> +               lower_fragcoord(state, intr);
>> +            }
>> +         }
>> +      } else if (instr->type == nir_instr_type_alu) {
>> +         nir_alu_instr *alu = nir_instr_as_alu(instr);
>> +         if (alu->op == nir_op_fddy)
>> +            lower_fddy(state, alu);
>> +      }
>> +   }
>> +
>> +   return true;
>
> How about making this a void function now that we've switched to the new
> style nir_foreach_block?

good point.. unfortunately already pushed this but I'll make a cleanup patch..

> I'll try and review in more detail later when I get the chance - I'd
> really like to use this in i965 as well...

cool, would be nice if some of these passes are useful outside of
gallium.  Possibly some of the others (bitmap and drawpix lowering,
iirc?) could replace some things i956 currently uses meta for.. not
sure

BR,
-R

>> +}
>> +
>> +static void
>> +lower_wpos_ytransform_impl(lower_wpos_ytransform_state *state,
> nir_function_impl *impl)
>> +{
>> +   nir_builder_init(&state->b, impl);
>> +
>> +   nir_foreach_block(block, impl) {
>> +      lower_wpos_ytransform_block(state, block);
>> +   }
>> +   nir_metadata_preserve(impl, nir_metadata_block_index |
>> +                               nir_metadata_dominance);
>> +}
>> +
>> +bool
>> +nir_lower_wpos_ytransform(nir_shader *shader,
>> +                          const nir_lower_wpos_ytransform_options *options)
>> +{
>> +   lower_wpos_ytransform_state state = {
>> +      .options = options,
>> +      .shader = shader,
>> +   };
>> +
>> +   assert(shader->stage == MESA_SHADER_FRAGMENT);
>> +
>> +   nir_foreach_function(function, shader) {
>> +      if (function->impl)
>> +         lower_wpos_ytransform_impl(&state, function->impl);
>> +   }
>> +
>> +   return state.transform != NULL;
>> +}
>>
>