[Mesa-dev] [PATCH 09/21] i965/fs: Rework representation of fragment output locations in NIR.

Francisco Jerez currojerez at riseup.net
Sun Jul 31 19:22:40 UTC 2016


Kenneth Graunke <kenneth at whitecape.org> writes:

> On Friday, July 22, 2016 8:59:03 PM PDT Francisco Jerez wrote:
>> The problem with the current approach is that driver output locations
>> are represented as a linear offset within the nir_outputs array, which
>> makes it rather difficult for the back-end to figure out what color
>> output and index some nir_intrinsic_load/store_output was meant for,
>> because the offset of a given output within the nir_output array is
>> dependent on the type and size of all previously allocated outputs.
>> Instead this defines the driver location of an output to be the pair
>> formed by its GLSL-assigned location and index (I've borrowed the
>> bitfield macros from brw_defines.h in order to represent the pair of
>> integers as a single scalar value that can be assigned to
>> nir_variable_data::driver_location).  nir_assign_var_locations is no
>> longer useful for fragment outputs.
>> 
>> Because fragment outputs are now allocated independently rather than
>> within the nir_outputs array, the get_frag_output() helper becomes
>> necessary in order to obtain the right temporary register for a given
>> location-index pair.
>> 
>> The type_size helper passed to nir_lower_io is now type_size_dvec4
>> rather than type_size_vec4_times_4 so that output array offsets are
>> provided in terms of whole array elements rather than in terms of
>> scalar components (dvec4 is the largest vector type supported by the
>> GLSL so this will cause all individual fragment outputs to have a size
>> of one regardless of the type).
>
> I don't think you should need type_size_dvec4 - double-precision
> fragment shader outputs don't exist.  GL_ARB_gpu_shader_fp64 says:
>
>     (3) Should double-precision fragment shader outputs be supported?
>
>       RESOLVED:  Not in this extension.  Note that we don't have
>       double-precision framebuffer formats to accept such values.
>
> Why not just use type_size_vec4?  With that changed, this patch would
> get my Reviewed-by.
>

Yeah, I'm aware of that restriction (c.f. PATCH 7), but the point here
was to get output offsets counted in whole array elements regardless of
the type, type_size_dvec4 is strictly better for that purpose than
type_size_vec4, because dvec4 is the largest GLSL vector type, and they
are otherwise equivalent when used on types smaller than a dvec4.  A
type_size_vectors helper may make sense instead though.

>> ---
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 50 +++++++++++++++++++++++++++-----
>>  src/mesa/drivers/dri/i965/brw_nir.c      | 10 +++++--
>>  src/mesa/drivers/dri/i965/brw_nir.h      |  5 ++++
>>  3 files changed, 55 insertions(+), 10 deletions(-)
>> 
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index 28de29a..8e069e0 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -89,17 +89,19 @@ fs_visitor::nir_setup_outputs()
>>     nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
>>  
>>     nir_foreach_variable(var, &nir->outputs) {
>> -      fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
>> -
>>        switch (stage) {
>>        case MESA_SHADER_VERTEX:
>>        case MESA_SHADER_TESS_EVAL:
>>        case MESA_SHADER_GEOMETRY: {
>> +         fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
>>           unsigned location = var->data.location;
>>           nir_setup_single_output_varying(&reg, var->type, &location);
>>           break;
>>        }
>> -      case MESA_SHADER_FRAGMENT:
>> +      case MESA_SHADER_FRAGMENT: {
>> +         const fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F,
>> +                                     type_size_vec4_times_4(var->type));
>> +
>>           if (key->force_dual_color_blend &&
>>               var->data.location == FRAG_RESULT_DATA1) {
>>              this->dual_src_output = reg;
>> @@ -130,6 +132,7 @@ fs_visitor::nir_setup_outputs()
>>              }
>>           }
>>           break;
>> +      }
>>        default:
>>           unreachable("unhandled shader stage");
>>        }
>> @@ -3244,6 +3247,38 @@ emit_non_coherent_fb_read(fs_visitor *v, const fs_reg &dst, unsigned target)
>>     return inst;
>>  }
>>  
>> +static fs_reg
>> +get_frag_output(const fs_visitor *v, unsigned location)
>> +{
>> +   assert(v->stage == MESA_SHADER_FRAGMENT);
>> +   const brw_wm_prog_key *const key =
>> +      reinterpret_cast<const brw_wm_prog_key *>(v->key);
>> +   const unsigned l = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_LOCATION);
>> +   const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX);
>> +
>> +   if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1))
>> +      return v->dual_src_output;
>> +
>> +   else if (l == FRAG_RESULT_COLOR)
>> +      return v->outputs[0];
>> +
>> +   else if (l == FRAG_RESULT_DEPTH)
>> +      return v->frag_depth;
>> +
>> +   else if (l == FRAG_RESULT_STENCIL)
>> +      return v->frag_stencil;
>> +
>> +   else if (l == FRAG_RESULT_SAMPLE_MASK)
>> +      return v->sample_mask;
>> +
>> +   else if (l >= FRAG_RESULT_DATA0 &&
>> +            l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS)
>> +      return v->outputs[l - FRAG_RESULT_DATA0];
>> +
>> +   else
>> +      unreachable("Invalid location");
>> +}
>> +
>>  void
>>  fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
>>                                    nir_intrinsic_instr *instr)
>> @@ -3282,11 +3317,12 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
>>  
>>     case nir_intrinsic_store_output: {
>>        const fs_reg src = get_nir_src(instr->src[0]);
>> -      nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
>> +      const nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
>>        assert(const_offset && "Indirect output stores not allowed");
>> -      const fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
>> -                                     nir_intrinsic_base(instr) +
>> -                                     const_offset->u32[0]);
>> +      const unsigned location = nir_intrinsic_base(instr) +
>> +         SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION);
>> +      const fs_reg new_dest = retype(get_frag_output(this, location),
>> +                                     src.type);
>>  
>>        for (unsigned j = 0; j < instr->num_components; j++)
>>           bld.MOV(offset(new_dest, bld, nir_intrinsic_component(instr) + j),
>> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
>> index 388ae9e..ee58115 100644
>> --- a/src/mesa/drivers/dri/i965/brw_nir.c
>> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
>> @@ -339,9 +339,13 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map)
>>  void
>>  brw_nir_lower_fs_outputs(nir_shader *nir)
>>  {
>> -   nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
>> -                            FRAG_RESULT_DATA0, type_size_vec4_times_4);
>> -   nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
>> +   nir_foreach_variable(var, &nir->outputs) {
>> +      var->data.driver_location =
>> +         SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) |
>> +         SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION);
>> +   }
>> +
>> +   nir_lower_io(nir, nir_var_shader_out, type_size_dvec4);
>>  }
>>  
>>  void
>> diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
>> index 74c354f..be97d8f 100644
>> --- a/src/mesa/drivers/dri/i965/brw_nir.h
>> +++ b/src/mesa/drivers/dri/i965/brw_nir.h
>> @@ -137,6 +137,11 @@ void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
>>  
>>  bool brw_nir_opt_peephole_ffma(nir_shader *shader);
>>  
>> +#define BRW_NIR_FRAG_OUTPUT_INDEX_SHIFT 0
>> +#define BRW_NIR_FRAG_OUTPUT_INDEX_MASK INTEL_MASK(0, 0)
>> +#define BRW_NIR_FRAG_OUTPUT_LOCATION_SHIFT 1
>> +#define BRW_NIR_FRAG_OUTPUT_LOCATION_MASK INTEL_MASK(31, 1)
>> +
>>  #ifdef __cplusplus
>>  }
>>  #endif
>> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160731/42f7d6ee/attachment.sig>


More information about the mesa-dev mailing list