[Mesa-dev] [RFCv3 05/11] nir: add lowering pass for y-transform

Connor Abbott cwabbott0 at gmail.com
Tue Feb 2 18:25:07 UTC 2016


On Mon, Feb 1, 2016 at 11:48 AM, Rob Clark <robdclark at gmail.com> wrote:
> On Mon, Feb 1, 2016 at 11:00 AM, Connor Abbott <cwabbott0 at gmail.com> wrote:
>> On Sun, Jan 31, 2016 at 3:16 PM, Rob Clark <robdclark at gmail.com> wrote:
>>> From: Rob Clark <robclark at freedesktop.org>
>>>
>>> Signed-off-by: Rob Clark <robclark at freedesktop.org>
>>> ---
>>>  src/compiler/Makefile.sources                |   1 +
>>>  src/compiler/nir/nir.h                       |  12 +
>>>  src/compiler/nir/nir_lower_wpos_ytransform.c | 317 +++++++++++++++++++++++++++
>>>  3 files changed, 330 insertions(+)
>>>  create mode 100644 src/compiler/nir/nir_lower_wpos_ytransform.c
>>>
>>> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
>>> index c9780d6..26c4c65 100644
>>> --- a/src/compiler/Makefile.sources
>>> +++ b/src/compiler/Makefile.sources
>>> @@ -200,6 +200,7 @@ NIR_FILES = \
>>>         nir/nir_lower_vars_to_ssa.c \
>>>         nir/nir_lower_var_copies.c \
>>>         nir/nir_lower_vec_to_movs.c \
>>> +       nir/nir_lower_wpos_ytransform.c \
>>>         nir/nir_metadata.c \
>>>         nir/nir_move_vec_src_uses_to_dest.c \
>>>         nir/nir_normalize_cubemap_coords.c \
>>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>>> index 67fe079..a973a44 100644
>>> --- a/src/compiler/nir/nir.h
>>> +++ b/src/compiler/nir/nir.h
>>> @@ -2119,6 +2119,18 @@ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
>>>
>>>  void nir_lower_two_sided_color(nir_shader *shader);
>>>
>>> +
>>> +typedef struct nir_lower_wpos_ytransform_options {
>>> +   int state_tokens[5];
>>
>> AFAIK the state slot stuff is only used for old assembly shaders. Why
>> are we using it here?
>
> nope, state-slots are alive and well w/ glsl for builtins, etc.  Quite
> possibly it dates back to old ARB assembly shaders.. but it is still
> the way for compiler / lowering to insert builtin params, afaict.  We
> do the same thing in other places too (like drawpix lowering).

Right, I forgot about that... sorry.

>
>>> +   bool fs_coord_origin_upper_left :1;
>>> +   bool fs_coord_origin_lower_left :1;
>>> +   bool fs_coord_pixel_center_integer :1;
>>> +   bool fs_coord_pixel_center_half_integer :1;
>>> +} nir_lower_wpos_ytransform_options;
>>> +
>>> +bool nir_lower_wpos_ytransform(nir_shader *shader,
>>> +                               const nir_lower_wpos_ytransform_options *options);
>>> +
>>>  void nir_lower_atomics(nir_shader *shader,
>>>                         const struct gl_shader_program *shader_program);
>>>  void nir_lower_to_source_mods(nir_shader *shader);
>>> diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c b/src/compiler/nir/nir_lower_wpos_ytransform.c
>>> new file mode 100644
>>> index 0000000..89d160b
>>> --- /dev/null
>>> +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c
>>> @@ -0,0 +1,317 @@
>>> +/*
>>> + * Copyright © 2015 Red Hat
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person obtaining a
>>> + * copy of this software and associated documentation files (the "Software"),
>>> + * to deal in the Software without restriction, including without limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to whom the
>>> + * Software is furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice (including the next
>>> + * paragraph) shall be included in all copies or substantial portions of the
>>> + * Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
>>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
>>> + * SOFTWARE.
>>> + */
>>> +
>>> +#include "nir.h"
>>> +#include "nir_builder.h"
>>> +
>>> +/* Lower gl_FragCoord (and fddy) to account for driver's requested coordinate-
>>> + * origin and pixel-center vs. shader.  If transformation is required, a
>>> + * gl_FbWposYTransform uniform is inserted (with the specified state-slots)
>>> + * and additional instructions are inserted to transform gl_FragCoord (and
>>> + * fddy src arg).
>>> + *
>>> + * This is based on the logic in emit_wpos()/emit_wpos_adjustment() in TGSI
>>> + * compiler.
>>> + *
>>> + * Run before nir_lower_io.
>>> + */
>>> +
>>> +typedef struct {
>>> +   const nir_lower_wpos_ytransform_options *options;
>>> +   nir_shader   *shader;
>>> +   nir_builder   b;
>>> +   nir_variable *transform;
>>> +} lower_wpos_ytransform_state;
>>> +
>>> +static nir_ssa_def *
>>> +get_transform(lower_wpos_ytransform_state *state)
>>> +{
>>> +   if (state->transform == NULL) {
>>> +      /* NOTE: name must be prefixed w/ "gl_" to trigger slot based
>>> +       * special handling in uniform setup:
>>> +       */
>>> +      nir_variable *var = nir_variable_create(state->shader,
>>> +                                              nir_var_uniform,
>>> +                                              glsl_vec4_type(),
>>> +                                              "gl_FbWposYTransform");
>>> +
>>> +      var->num_state_slots = 1;
>>> +      var->state_slots = ralloc_array(var, nir_state_slot, 1);
>>> +      memcpy(var->state_slots[0].tokens, state->options->state_tokens,
>>> +             sizeof(var->state_slots[0].tokens));
>>> +
>>> +      state->transform = var;
>>> +   }
>>> +   return nir_load_var(&state->b, state->transform);
>>> +}
>>> +
>>> +/* NIR equiv of TGSI CMP instruction: */
>>> +static nir_ssa_def *
>>> +nir_cmp(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2)
>>> +{
>>> +   return nir_bcsel(b, nir_flt(b, src0, nir_imm_float(b, 0.0)), src1, src2);
>>
>> I'm not that familiar with TGSI, but you're selecting src1 if src0 is
>> less than zero, which seems... not quite right. Is that really how
>> TGSI CMP works?
>
> I'm never sure if TGSI is backwards or my hw is backwards (since they
> disagree about src argument order)..  but yes, this is how TGSI works.

Ok. It seems backwards to me compared to how I'd expect it to be, but
if that's that way... oh well.

>
>>> +}
>>> +
>>> +static nir_ssa_def *
>>> +nir_scalar(nir_builder *b, nir_ssa_def *def, int c)
>>> +{
>>> +   unsigned swizzle[4] = {c, c, c, c};
>>> +   return nir_swizzle(b, def, swizzle, 4, false);
>>> +}
>>> +
>>> +/* see emit_wpos_adjustment() in st_mesa_to_tgsi.c */
>>> +static void
>>> +emit_wpos_adjustment(lower_wpos_ytransform_state *state,
>>> +                     nir_intrinsic_instr *intr,
>>> +                     bool invert, float adjX, float adjY[2])
>>> +{
>>> +   nir_builder *b = &state->b;
>>> +   nir_variable *fragcoord = intr->variables[0]->var;
>>> +   nir_ssa_def *wpostrans, *wpos_temp, *wpos_temp_y, *wpos_input;
>>> +
>>> +   assert(intr->dest.is_ssa);
>>> +
>>> +   b->cursor = nir_before_instr(&intr->instr);
>>> +
>>> +   wpostrans = get_transform(state);
>>> +   wpos_input = nir_load_var(b, fragcoord);
>>> +
>>> +   /* First, apply the coordinate shift: */
>>> +   if (adjX || adjY[0] || adjY[1]) {
>>> +      if (adjY[0] != adjY[1]) {
>>> +         /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
>>> +          * depending on whether inversion is actually going to be applied
>>> +          * or not, which is determined by testing against the inversion
>>> +          * state variable used below, which will be either +1 or -1.
>>> +          */
>>> +         nir_ssa_def *adj_temp;
>>> +
>>> +         adj_temp = nir_cmp(b,
>>> +                            nir_scalar(b, wpostrans, invert ? 2 : 0),
>>> +                            nir_imm_vec4(b, adjX, adjY[0], 0.0f, 0.0f),
>>> +                            nir_imm_vec4(b, adjX, adjY[1], 0.0f, 0.0f));
>>
>> First, the nir builder will automatically splat things for you when
>> the sizes are mismatched so you can just do nir_channel() here and
>> avoid having to define nir_scalar().
>
> ok, I can drop nir_scalar()..
>
>> Secondly, why the weird TGSI-style comparison? Why can't you just pass
>> in a normal boolean? At least, I'd like to see nir_cmp() inlined here
>> -- for someone who doesn't know about TGSI, it would be easy to skip
>> over the nir_cmp without realizing that there's some weird
>> gallium-specific stuff going on inside here.
>
> Hmm, otoh the way it is currently makes it easier for folks familiar
> w/ TGSI, and makes it easier to compare side by side the NIR lowering
> pass with the original TGSI version.  This was intentional.

Ok, fair enough. After looking back at this again, I realize what's
going on, and why it's -1/+1 instead of a boolean. The lowering passes
themselves seem sane, although I'm not familiar enough with mesa/st to
know why all of them are necessary/the details of what they're doing.
I guess you could call that a r-b then for patches 5-7.

>
> BR,
> -R
>
>>> +
>>> +         wpos_temp = nir_fadd(b, wpos_input, adj_temp);
>>> +      } else {
>>> +         wpos_temp = nir_fadd(b,
>>> +                              wpos_input,
>>> +                              nir_imm_vec4(b, adjX, adjY[0], 0.0f, 0.0f));
>>> +      }
>>> +      wpos_input = wpos_temp;
>>> +   } else {
>>> +      /* MOV wpos_temp, input[wpos]
>>> +       */
>>> +      wpos_temp = wpos_input;
>>> +   }
>>> +
>>> +   /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
>>> +    * inversion/identity, or the other way around if we're drawing to an FBO.
>>> +    */
>>> +   if (invert) {
>>> +      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
>>> +       */
>>> +      wpos_temp_y = nir_ffma(b,
>>> +                             nir_channel(b, wpos_temp, 1),
>>> +                             nir_channel(b, wpostrans, 0),
>>> +                             nir_channel(b, wpostrans, 1));
>>> +   } else {
>>> +      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
>>> +       */
>>> +      wpos_temp_y = nir_ffma(b,
>>> +                             nir_channel(b, wpos_temp, 1),
>>> +                             nir_channel(b, wpostrans, 2),
>>> +                             nir_channel(b, wpostrans, 3));
>>> +   }
>>> +
>>> +   wpos_temp = nir_vec4(b,
>>> +                        nir_channel(b, wpos_temp, 0),
>>> +                        wpos_temp_y,
>>> +                        nir_channel(b, wpos_temp, 2),
>>> +                        nir_channel(b, wpos_temp, 3));
>>> +
>>> +   nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(wpos_temp));
>>> +}
>>> +
>>> +static void
>>> +lower_fragcoord(lower_wpos_ytransform_state *state, nir_intrinsic_instr *intr)
>>> +{
>>> +   const nir_lower_wpos_ytransform_options *options = state->options;
>>> +   nir_variable *fragcoord = intr->variables[0]->var;
>>> +   float adjX = 0.0f;
>>> +   float adjY[2] = { 0.0f, 0.0f };
>>> +   bool invert = false;
>>> +
>>> +   /* Based on logic in emit_wpos():
>>> +    *
>>> +    * Query the pixel center conventions supported by the pipe driver and set
>>> +    * adjX, adjY to help out if it cannot handle the requested one internally.
>>> +    *
>>> +    * The bias of the y-coordinate depends on whether y-inversion takes place
>>> +    * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
>>> +    * drawing to an FBO (causes additional inversion), and whether the the pipe
>>> +    * driver origin and the requested origin differ (the latter condition is
>>> +    * stored in the 'invert' variable).
>>> +    *
>>> +    * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
>>> +    *
>>> +    * center shift only:
>>> +    * i -> h: +0.5
>>> +    * h -> i: -0.5
>>> +    *
>>> +    * inversion only:
>>> +    * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
>>> +    * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
>>> +    * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
>>> +    * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
>>> +    *
>>> +    * inversion and center shift:
>>> +    * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
>>> +    * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
>>> +    * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
>>> +    * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
>>> +    */
>>> +
>>> +   if (fragcoord->data.origin_upper_left) {
>>> +      /* Fragment shader wants origin in upper-left */
>>> +      if (options->fs_coord_origin_upper_left) {
>>> +         /* the driver supports upper-left origin */
>>> +      } else if (options->fs_coord_origin_lower_left) {
>>> +         /* the driver supports lower-left origin, need to invert Y */
>>> +         invert = true;
>>> +      } else {
>>> +         unreachable("invalid options");
>>> +      }
>>> +   } else {
>>> +      /* Fragment shader wants origin in lower-left */
>>> +      if (options->fs_coord_origin_lower_left) {
>>> +         /* the driver supports lower-left origin */
>>> +      } else if (options->fs_coord_origin_upper_left) {
>>> +         /* the driver supports upper-left origin, need to invert Y */
>>> +         invert = true;
>>> +      } else {
>>> +         unreachable("invalid options");
>>> +      }
>>> +   }
>>> +
>>> +   if (fragcoord->data.pixel_center_integer) {
>>> +      /* Fragment shader wants pixel center integer */
>>> +      if (options->fs_coord_pixel_center_integer) {
>>> +         /* the driver supports pixel center integer */
>>> +         adjY[1] = 1.0f;
>>> +      } else if (options->fs_coord_pixel_center_half_integer) {
>>> +         /* the driver supports pixel center half integer, need to bias X,Y */
>>> +         adjX = -0.5f;
>>> +         adjY[0] = -0.5f;
>>> +         adjY[1] = 0.5f;
>>> +      } else {
>>> +         unreachable("invalid options");
>>> +      }
>>> +   } else {
>>> +      /* Fragment shader wants pixel center half integer */
>>> +      if (options->fs_coord_pixel_center_half_integer) {
>>> +         /* the driver supports pixel center half integer */
>>> +      } else if (options->fs_coord_pixel_center_integer) {
>>> +         /* the driver supports pixel center integer, need to bias X,Y */
>>> +         adjX = adjY[0] = adjY[1] = 0.5f;
>>> +      } else {
>>> +         unreachable("invalid options");
>>> +      }
>>> +   }
>>> +
>>> +   emit_wpos_adjustment(state, intr, invert, adjX, adjY);
>>> +}
>>> +
>>> +/* turns 'fddy(p)' into 'fddy(fmul(p, transform.x))' */
>>> +static void
>>> +lower_fddy(lower_wpos_ytransform_state *state, nir_alu_instr *fddy)
>>> +{
>>> +   nir_builder *b = &state->b;
>>> +   nir_ssa_def *p, *pt, *trans;
>>> +
>>> +   b->cursor = nir_before_instr(&fddy->instr);
>>> +
>>> +   p = nir_ssa_for_alu_src(b, fddy, 0);
>>> +   trans = get_transform(state);
>>> +   pt = nir_fmul(b, p, nir_channel(b, trans, 0));
>>> +
>>> +   nir_instr_rewrite_src(&fddy->instr,
>>> +                         &fddy->src[0].src,
>>> +                         nir_src_for_ssa(pt));
>>> +}
>>> +
>>> +static bool
>>> +lower_wpos_ytransform_block(nir_block *block, void *_state)
>>> +{
>>> +   lower_wpos_ytransform_state *state = _state;
>>> +
>>> +   nir_foreach_instr_safe(block, instr) {
>>> +      if (instr->type == nir_instr_type_intrinsic) {
>>> +         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
>>> +         if (intr->intrinsic == nir_intrinsic_load_var) {
>>> +            nir_deref_var *dvar = intr->variables[0];
>>> +            nir_variable *var = dvar->var;
>>> +
>>> +            if (strcmp(var->name, "gl_FragCoord") == 0) {
>>> +               /* gl_FragCoord should not have array/struct deref's: */
>>> +               assert(dvar->deref.child == NULL);
>>> +               lower_fragcoord(state, intr);
>>> +            }
>>> +         }
>>> +      } else if (instr->type == nir_instr_type_alu) {
>>> +         nir_alu_instr *alu = nir_instr_as_alu(instr);
>>> +         if (alu->op == nir_op_fddy)
>>> +            lower_fddy(state, alu);
>>> +      }
>>> +   }
>>> +
>>> +   return true;
>>> +}
>>> +
>>> +static void
>>> +lower_wpos_ytransform_impl(lower_wpos_ytransform_state *state, nir_function_impl *impl)
>>> +{
>>> +   nir_builder_init(&state->b, impl);
>>> +
>>> +   nir_foreach_block(impl, lower_wpos_ytransform_block, state);
>>> +   nir_metadata_preserve(impl, nir_metadata_block_index |
>>> +                               nir_metadata_dominance);
>>> +}
>>> +
>>> +bool
>>> +nir_lower_wpos_ytransform(nir_shader *shader,
>>> +                          const nir_lower_wpos_ytransform_options *options)
>>> +{
>>> +   lower_wpos_ytransform_state state = {
>>> +      .options = options,
>>> +      .shader = shader,
>>> +   };
>>> +
>>> +   assert(shader->stage == MESA_SHADER_FRAGMENT);
>>> +
>>> +   nir_foreach_function(shader, function) {
>>> +      if (function->impl)
>>> +         lower_wpos_ytransform_impl(&state, function->impl);
>>> +   }
>>> +
>>> +   return state.transform != NULL;
>>> +}
>>> --
>>> 2.5.0
>>>


More information about the mesa-dev mailing list