[Mesa-dev] [PATCH 09/21] i965/fs: Rework representation of fragment output locations in NIR.

Kenneth Graunke kenneth at whitecape.org
Sun Jul 31 07:32:27 UTC 2016


On Friday, July 22, 2016 8:59:03 PM PDT Francisco Jerez wrote:
> The problem with the current approach is that driver output locations
> are represented as a linear offset within the nir_outputs array, which
> makes it rather difficult for the back-end to figure out what color
> output and index some nir_intrinsic_load/store_output was meant for,
> because the offset of a given output within the nir_output array is
> dependent on the type and size of all previously allocated outputs.
> Instead this defines the driver location of an output to be the pair
> formed by its GLSL-assigned location and index (I've borrowed the
> bitfield macros from brw_defines.h in order to represent the pair of
> integers as a single scalar value that can be assigned to
> nir_variable_data::driver_location).  nir_assign_var_locations is no
> longer useful for fragment outputs.
> 
> Because fragment outputs are now allocated independently rather than
> within the nir_outputs array, the get_frag_output() helper becomes
> necessary in order to obtain the right temporary register for a given
> location-index pair.
> 
> The type_size helper passed to nir_lower_io is now type_size_dvec4
> rather than type_size_vec4_times_4 so that output array offsets are
> provided in terms of whole array elements rather than in terms of
> scalar components (dvec4 is the largest vector type supported by the
> GLSL so this will cause all individual fragment outputs to have a size
> of one regardless of the type).

I don't think you should need type_size_dvec4 - double-precision
fragment shader outputs don't exist.  GL_ARB_gpu_shader_fp64 says:

    (3) Should double-precision fragment shader outputs be supported?

      RESOLVED:  Not in this extension.  Note that we don't have
      double-precision framebuffer formats to accept such values.

Why not just use type_size_vec4?  With that changed, this patch would
get my Reviewed-by.

> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 50 +++++++++++++++++++++++++++-----
>  src/mesa/drivers/dri/i965/brw_nir.c      | 10 +++++--
>  src/mesa/drivers/dri/i965/brw_nir.h      |  5 ++++
>  3 files changed, 55 insertions(+), 10 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 28de29a..8e069e0 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -89,17 +89,19 @@ fs_visitor::nir_setup_outputs()
>     nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
>  
>     nir_foreach_variable(var, &nir->outputs) {
> -      fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
> -
>        switch (stage) {
>        case MESA_SHADER_VERTEX:
>        case MESA_SHADER_TESS_EVAL:
>        case MESA_SHADER_GEOMETRY: {
> +         fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
>           unsigned location = var->data.location;
>           nir_setup_single_output_varying(&reg, var->type, &location);
>           break;
>        }
> -      case MESA_SHADER_FRAGMENT:
> +      case MESA_SHADER_FRAGMENT: {
> +         const fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F,
> +                                     type_size_vec4_times_4(var->type));
> +
>           if (key->force_dual_color_blend &&
>               var->data.location == FRAG_RESULT_DATA1) {
>              this->dual_src_output = reg;
> @@ -130,6 +132,7 @@ fs_visitor::nir_setup_outputs()
>              }
>           }
>           break;
> +      }
>        default:
>           unreachable("unhandled shader stage");
>        }
> @@ -3244,6 +3247,38 @@ emit_non_coherent_fb_read(fs_visitor *v, const fs_reg &dst, unsigned target)
>     return inst;
>  }
>  
> +static fs_reg
> +get_frag_output(const fs_visitor *v, unsigned location)
> +{
> +   assert(v->stage == MESA_SHADER_FRAGMENT);
> +   const brw_wm_prog_key *const key =
> +      reinterpret_cast<const brw_wm_prog_key *>(v->key);
> +   const unsigned l = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_LOCATION);
> +   const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX);
> +
> +   if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1))
> +      return v->dual_src_output;
> +
> +   else if (l == FRAG_RESULT_COLOR)
> +      return v->outputs[0];
> +
> +   else if (l == FRAG_RESULT_DEPTH)
> +      return v->frag_depth;
> +
> +   else if (l == FRAG_RESULT_STENCIL)
> +      return v->frag_stencil;
> +
> +   else if (l == FRAG_RESULT_SAMPLE_MASK)
> +      return v->sample_mask;
> +
> +   else if (l >= FRAG_RESULT_DATA0 &&
> +            l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS)
> +      return v->outputs[l - FRAG_RESULT_DATA0];
> +
> +   else
> +      unreachable("Invalid location");
> +}
> +
>  void
>  fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
>                                    nir_intrinsic_instr *instr)
> @@ -3282,11 +3317,12 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
>  
>     case nir_intrinsic_store_output: {
>        const fs_reg src = get_nir_src(instr->src[0]);
> -      nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
> +      const nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
>        assert(const_offset && "Indirect output stores not allowed");
> -      const fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
> -                                     nir_intrinsic_base(instr) +
> -                                     const_offset->u32[0]);
> +      const unsigned location = nir_intrinsic_base(instr) +
> +         SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION);
> +      const fs_reg new_dest = retype(get_frag_output(this, location),
> +                                     src.type);
>  
>        for (unsigned j = 0; j < instr->num_components; j++)
>           bld.MOV(offset(new_dest, bld, nir_intrinsic_component(instr) + j),
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
> index 388ae9e..ee58115 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -339,9 +339,13 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map)
>  void
>  brw_nir_lower_fs_outputs(nir_shader *nir)
>  {
> -   nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
> -                            FRAG_RESULT_DATA0, type_size_vec4_times_4);
> -   nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
> +   nir_foreach_variable(var, &nir->outputs) {
> +      var->data.driver_location =
> +         SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) |
> +         SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION);
> +   }
> +
> +   nir_lower_io(nir, nir_var_shader_out, type_size_dvec4);
>  }
>  
>  void
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
> index 74c354f..be97d8f 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.h
> +++ b/src/mesa/drivers/dri/i965/brw_nir.h
> @@ -137,6 +137,11 @@ void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
>  
>  bool brw_nir_opt_peephole_ffma(nir_shader *shader);
>  
> +#define BRW_NIR_FRAG_OUTPUT_INDEX_SHIFT 0
> +#define BRW_NIR_FRAG_OUTPUT_INDEX_MASK INTEL_MASK(0, 0)
> +#define BRW_NIR_FRAG_OUTPUT_LOCATION_SHIFT 1
> +#define BRW_NIR_FRAG_OUTPUT_LOCATION_MASK INTEL_MASK(31, 1)
> +
>  #ifdef __cplusplus
>  }
>  #endif
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160731/3473610d/attachment-0001.sig>


More information about the mesa-dev mailing list