[Mesa-dev] [PATCH 09/21] i965/fs: Rework representation of fragment output locations in NIR.
Kenneth Graunke
kenneth at whitecape.org
Sun Jul 31 07:32:27 UTC 2016
On Friday, July 22, 2016 8:59:03 PM PDT Francisco Jerez wrote:
> The problem with the current approach is that driver output locations
> are represented as a linear offset within the nir_outputs array, which
> makes it rather difficult for the back-end to figure out what color
> output and index some nir_intrinsic_load/store_output was meant for,
> because the offset of a given output within the nir_output array is
> dependent on the type and size of all previously allocated outputs.
> Instead this defines the driver location of an output to be the pair
> formed by its GLSL-assigned location and index (I've borrowed the
> bitfield macros from brw_defines.h in order to represent the pair of
> integers as a single scalar value that can be assigned to
> nir_variable_data::driver_location). nir_assign_var_locations is no
> longer useful for fragment outputs.
>
> Because fragment outputs are now allocated independently rather than
> within the nir_outputs array, the get_frag_output() helper becomes
> necessary in order to obtain the right temporary register for a given
> location-index pair.
>
> The type_size helper passed to nir_lower_io is now type_size_dvec4
> rather than type_size_vec4_times_4 so that output array offsets are
> provided in terms of whole array elements rather than in terms of
> scalar components (dvec4 is the largest vector type supported by the
> GLSL so this will cause all individual fragment outputs to have a size
> of one regardless of the type).
I don't think you should need type_size_dvec4 - double-precision
fragment shader outputs don't exist. GL_ARB_gpu_shader_fp64 says:
(3) Should double-precision fragment shader outputs be supported?
RESOLVED: Not in this extension. Note that we don't have
double-precision framebuffer formats to accept such values.
Why not just use type_size_vec4? With that changed, this patch would
get my Reviewed-by.
> ---
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 50 +++++++++++++++++++++++++++-----
> src/mesa/drivers/dri/i965/brw_nir.c | 10 +++++--
> src/mesa/drivers/dri/i965/brw_nir.h | 5 ++++
> 3 files changed, 55 insertions(+), 10 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 28de29a..8e069e0 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -89,17 +89,19 @@ fs_visitor::nir_setup_outputs()
> nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
>
> nir_foreach_variable(var, &nir->outputs) {
> - fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
> -
> switch (stage) {
> case MESA_SHADER_VERTEX:
> case MESA_SHADER_TESS_EVAL:
> case MESA_SHADER_GEOMETRY: {
> + fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
> unsigned location = var->data.location;
> nir_setup_single_output_varying(®, var->type, &location);
> break;
> }
> - case MESA_SHADER_FRAGMENT:
> + case MESA_SHADER_FRAGMENT: {
> + const fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F,
> + type_size_vec4_times_4(var->type));
> +
> if (key->force_dual_color_blend &&
> var->data.location == FRAG_RESULT_DATA1) {
> this->dual_src_output = reg;
> @@ -130,6 +132,7 @@ fs_visitor::nir_setup_outputs()
> }
> }
> break;
> + }
> default:
> unreachable("unhandled shader stage");
> }
> @@ -3244,6 +3247,38 @@ emit_non_coherent_fb_read(fs_visitor *v, const fs_reg &dst, unsigned target)
> return inst;
> }
>
> +static fs_reg
> +get_frag_output(const fs_visitor *v, unsigned location)
> +{
> + assert(v->stage == MESA_SHADER_FRAGMENT);
> + const brw_wm_prog_key *const key =
> + reinterpret_cast<const brw_wm_prog_key *>(v->key);
> + const unsigned l = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_LOCATION);
> + const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX);
> +
> + if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1))
> + return v->dual_src_output;
> +
> + else if (l == FRAG_RESULT_COLOR)
> + return v->outputs[0];
> +
> + else if (l == FRAG_RESULT_DEPTH)
> + return v->frag_depth;
> +
> + else if (l == FRAG_RESULT_STENCIL)
> + return v->frag_stencil;
> +
> + else if (l == FRAG_RESULT_SAMPLE_MASK)
> + return v->sample_mask;
> +
> + else if (l >= FRAG_RESULT_DATA0 &&
> + l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS)
> + return v->outputs[l - FRAG_RESULT_DATA0];
> +
> + else
> + unreachable("Invalid location");
> +}
> +
> void
> fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
> nir_intrinsic_instr *instr)
> @@ -3282,11 +3317,12 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
>
> case nir_intrinsic_store_output: {
> const fs_reg src = get_nir_src(instr->src[0]);
> - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
> + const nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
> assert(const_offset && "Indirect output stores not allowed");
> - const fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
> - nir_intrinsic_base(instr) +
> - const_offset->u32[0]);
> + const unsigned location = nir_intrinsic_base(instr) +
> + SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION);
> + const fs_reg new_dest = retype(get_frag_output(this, location),
> + src.type);
>
> for (unsigned j = 0; j < instr->num_components; j++)
> bld.MOV(offset(new_dest, bld, nir_intrinsic_component(instr) + j),
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
> index 388ae9e..ee58115 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -339,9 +339,13 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map)
> void
> brw_nir_lower_fs_outputs(nir_shader *nir)
> {
> - nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
> - FRAG_RESULT_DATA0, type_size_vec4_times_4);
> - nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
> + nir_foreach_variable(var, &nir->outputs) {
> + var->data.driver_location =
> + SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) |
> + SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION);
> + }
> +
> + nir_lower_io(nir, nir_var_shader_out, type_size_dvec4);
> }
>
> void
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
> index 74c354f..be97d8f 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.h
> +++ b/src/mesa/drivers/dri/i965/brw_nir.h
> @@ -137,6 +137,11 @@ void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
>
> bool brw_nir_opt_peephole_ffma(nir_shader *shader);
>
> +#define BRW_NIR_FRAG_OUTPUT_INDEX_SHIFT 0
> +#define BRW_NIR_FRAG_OUTPUT_INDEX_MASK INTEL_MASK(0, 0)
> +#define BRW_NIR_FRAG_OUTPUT_LOCATION_SHIFT 1
> +#define BRW_NIR_FRAG_OUTPUT_LOCATION_MASK INTEL_MASK(31, 1)
> +
> #ifdef __cplusplus
> }
> #endif
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160731/3473610d/attachment-0001.sig>
More information about the mesa-dev
mailing list