[Mesa-dev] [PATCH 09/21] i965/fs: Rework representation of fragment output locations in NIR.
Francisco Jerez
currojerez at riseup.net
Sun Jul 31 19:22:40 UTC 2016
Kenneth Graunke <kenneth at whitecape.org> writes:
> On Friday, July 22, 2016 8:59:03 PM PDT Francisco Jerez wrote:
>> The problem with the current approach is that driver output locations
>> are represented as a linear offset within the nir_outputs array, which
>> makes it rather difficult for the back-end to figure out what color
>> output and index some nir_intrinsic_load/store_output was meant for,
>> because the offset of a given output within the nir_output array is
>> dependent on the type and size of all previously allocated outputs.
>> Instead this defines the driver location of an output to be the pair
>> formed by its GLSL-assigned location and index (I've borrowed the
>> bitfield macros from brw_defines.h in order to represent the pair of
>> integers as a single scalar value that can be assigned to
>> nir_variable_data::driver_location). nir_assign_var_locations is no
>> longer useful for fragment outputs.
>>
>> Because fragment outputs are now allocated independently rather than
>> within the nir_outputs array, the get_frag_output() helper becomes
>> necessary in order to obtain the right temporary register for a given
>> location-index pair.
>>
>> The type_size helper passed to nir_lower_io is now type_size_dvec4
>> rather than type_size_vec4_times_4 so that output array offsets are
>> provided in terms of whole array elements rather than in terms of
>> scalar components (dvec4 is the largest vector type supported by the
>> GLSL so this will cause all individual fragment outputs to have a size
>> of one regardless of the type).
>
> I don't think you should need type_size_dvec4 - double-precision
> fragment shader outputs don't exist. GL_ARB_gpu_shader_fp64 says:
>
> (3) Should double-precision fragment shader outputs be supported?
>
> RESOLVED: Not in this extension. Note that we don't have
> double-precision framebuffer formats to accept such values.
>
> Why not just use type_size_vec4? With that changed, this patch would
> get my Reviewed-by.
>
Yeah, I'm aware of that restriction (c.f. PATCH 7), but the point here
was to get output offsets counted in whole array elements regardless of
the type, type_size_dvec4 is strictly better for that purpose than
type_size_vec4, because dvec4 is the largest GLSL vector type, and they
are otherwise equivalent when used on types smaller than a dvec4. A
type_size_vectors helper may make sense instead though.
>> ---
>> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 50 +++++++++++++++++++++++++++-----
>> src/mesa/drivers/dri/i965/brw_nir.c | 10 +++++--
>> src/mesa/drivers/dri/i965/brw_nir.h | 5 ++++
>> 3 files changed, 55 insertions(+), 10 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index 28de29a..8e069e0 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -89,17 +89,19 @@ fs_visitor::nir_setup_outputs()
>> nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
>>
>> nir_foreach_variable(var, &nir->outputs) {
>> - fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
>> -
>> switch (stage) {
>> case MESA_SHADER_VERTEX:
>> case MESA_SHADER_TESS_EVAL:
>> case MESA_SHADER_GEOMETRY: {
>> + fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
>> unsigned location = var->data.location;
>> nir_setup_single_output_varying(®, var->type, &location);
>> break;
>> }
>> - case MESA_SHADER_FRAGMENT:
>> + case MESA_SHADER_FRAGMENT: {
>> + const fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F,
>> + type_size_vec4_times_4(var->type));
>> +
>> if (key->force_dual_color_blend &&
>> var->data.location == FRAG_RESULT_DATA1) {
>> this->dual_src_output = reg;
>> @@ -130,6 +132,7 @@ fs_visitor::nir_setup_outputs()
>> }
>> }
>> break;
>> + }
>> default:
>> unreachable("unhandled shader stage");
>> }
>> @@ -3244,6 +3247,38 @@ emit_non_coherent_fb_read(fs_visitor *v, const fs_reg &dst, unsigned target)
>> return inst;
>> }
>>
>> +static fs_reg
>> +get_frag_output(const fs_visitor *v, unsigned location)
>> +{
>> + assert(v->stage == MESA_SHADER_FRAGMENT);
>> + const brw_wm_prog_key *const key =
>> + reinterpret_cast<const brw_wm_prog_key *>(v->key);
>> + const unsigned l = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_LOCATION);
>> + const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX);
>> +
>> + if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1))
>> + return v->dual_src_output;
>> +
>> + else if (l == FRAG_RESULT_COLOR)
>> + return v->outputs[0];
>> +
>> + else if (l == FRAG_RESULT_DEPTH)
>> + return v->frag_depth;
>> +
>> + else if (l == FRAG_RESULT_STENCIL)
>> + return v->frag_stencil;
>> +
>> + else if (l == FRAG_RESULT_SAMPLE_MASK)
>> + return v->sample_mask;
>> +
>> + else if (l >= FRAG_RESULT_DATA0 &&
>> + l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS)
>> + return v->outputs[l - FRAG_RESULT_DATA0];
>> +
>> + else
>> + unreachable("Invalid location");
>> +}
>> +
>> void
>> fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
>> nir_intrinsic_instr *instr)
>> @@ -3282,11 +3317,12 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
>>
>> case nir_intrinsic_store_output: {
>> const fs_reg src = get_nir_src(instr->src[0]);
>> - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
>> + const nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
>> assert(const_offset && "Indirect output stores not allowed");
>> - const fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
>> - nir_intrinsic_base(instr) +
>> - const_offset->u32[0]);
>> + const unsigned location = nir_intrinsic_base(instr) +
>> + SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION);
>> + const fs_reg new_dest = retype(get_frag_output(this, location),
>> + src.type);
>>
>> for (unsigned j = 0; j < instr->num_components; j++)
>> bld.MOV(offset(new_dest, bld, nir_intrinsic_component(instr) + j),
>> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
>> index 388ae9e..ee58115 100644
>> --- a/src/mesa/drivers/dri/i965/brw_nir.c
>> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
>> @@ -339,9 +339,13 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map)
>> void
>> brw_nir_lower_fs_outputs(nir_shader *nir)
>> {
>> - nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
>> - FRAG_RESULT_DATA0, type_size_vec4_times_4);
>> - nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
>> + nir_foreach_variable(var, &nir->outputs) {
>> + var->data.driver_location =
>> + SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) |
>> + SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION);
>> + }
>> +
>> + nir_lower_io(nir, nir_var_shader_out, type_size_dvec4);
>> }
>>
>> void
>> diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
>> index 74c354f..be97d8f 100644
>> --- a/src/mesa/drivers/dri/i965/brw_nir.h
>> +++ b/src/mesa/drivers/dri/i965/brw_nir.h
>> @@ -137,6 +137,11 @@ void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
>>
>> bool brw_nir_opt_peephole_ffma(nir_shader *shader);
>>
>> +#define BRW_NIR_FRAG_OUTPUT_INDEX_SHIFT 0
>> +#define BRW_NIR_FRAG_OUTPUT_INDEX_MASK INTEL_MASK(0, 0)
>> +#define BRW_NIR_FRAG_OUTPUT_LOCATION_SHIFT 1
>> +#define BRW_NIR_FRAG_OUTPUT_LOCATION_MASK INTEL_MASK(31, 1)
>> +
>> #ifdef __cplusplus
>> }
>> #endif
>>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160731/42f7d6ee/attachment.sig>
More information about the mesa-dev
mailing list