[Mesa-dev] [PATCH] i965: Push most TES inputs in vec4 mode.

Matt Turner mattst88 at gmail.com
Tue Mar 1 00:00:44 UTC 2016


On Wed, Feb 17, 2016 at 3:05 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> (This is commit 4a1c8a3037cd29938b2a6e2c680c341e9903cfbe for vec4 mode.)
>
> Using the push model for inputs is much more efficient than pulling
> inputs - the hardware can simply copy a large chunk into URB registers
> at thread creation time, rather than having the thread send messages to
> request data from the L3 cache.  Unfortunately, it's possible to have
> more TES inputs than fit in registers, so we have to fall back to the
> pull model in some cases.
>
> However, it turns out that most tessellation evaluation shaders are
> fairly simple, and don't use many inputs.  An arbitrary cut-off of
> 24 vec4 slots (12 registers) should suffice.  (I chose this instead of
> the 32 vec4 slots used in the scalar backend to avoid regressing a few
> Piglit tests due to the vec4 register allocator being too stupid to
> figure out what to do.  We probably ought to fix that, but it's a
> separate issue.)
>
> Improves performance in GPUTest's tessmark_x64 microbenchmark by
> 41.5394% +/- 0.288519% (n = 115) at 1024x768 on my Clevo W740SU
> (with Iris Pro 5200).
>
> Improves performance in Synmark's Gl40TerrainFlyTess microbenchmark by
> 38.3576% +/- 0.759748% (n = 42).
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp |  4 +-
>  src/mesa/drivers/dri/i965/brw_vec4_tes.cpp | 86 +++++++++++++++++++-----------
>  2 files changed, 56 insertions(+), 34 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index 74ec4f0..9b721e5 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -685,9 +685,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>     case nir_intrinsic_load_instance_id:
>     case nir_intrinsic_load_base_instance:
>     case nir_intrinsic_load_draw_id:
> -   case nir_intrinsic_load_invocation_id:
> -   case nir_intrinsic_load_tess_level_inner:
> -   case nir_intrinsic_load_tess_level_outer: {
> +   case nir_intrinsic_load_invocation_id: {
>        gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
>        src_reg val = src_reg(nir_system_values[sv]);
>        assert(val.file != BAD_FILE);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
> index ce5fefc..90cbd2b8 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
> @@ -28,6 +28,7 @@
>   */
>
>  #include "brw_vec4_tes.h"
> +#include "brw_cfg.h"
>
>  namespace brw {
>
> @@ -53,39 +54,10 @@ vec4_tes_visitor::make_reg_for_system_value(int location, const glsl_type *type)
>  void
>  vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
>  {
> -   const struct brw_tes_prog_data *tes_prog_data =
> -      (const struct brw_tes_prog_data *) prog_data;
> -
>     switch (instr->intrinsic) {
> -   case nir_intrinsic_load_tess_level_outer: {
> -      dst_reg dst(this, glsl_type::vec4_type);
> -      nir_system_values[SYSTEM_VALUE_TESS_LEVEL_OUTER] = dst;
> -
> -      dst_reg temp(this, glsl_type::vec4_type);
> -      vec4_instruction *read =
> -         emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
> -      read->offset = 1;
> -      read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
> -      emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
> +   case nir_intrinsic_load_tess_level_outer:
> +   case nir_intrinsic_load_tess_level_inner:
>        break;
> -   }
> -   case nir_intrinsic_load_tess_level_inner: {
> -      dst_reg dst(this, glsl_type::vec2_type);
> -      nir_system_values[SYSTEM_VALUE_TESS_LEVEL_INNER] = dst;
> -
> -      /* Set up the message header to reference the proper parts of the URB */
> -      dst_reg temp(this, glsl_type::vec4_type);
> -      vec4_instruction *read =
> -         emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
> -      read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
> -      if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
> -         emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
> -      } else {
> -         read->offset = 1;
> -         emit(MOV(dst, src_reg(temp)));
> -      }
> -      break;
> -   }
>     default:
>        vec4_visitor::nir_setup_system_value_intrinsic(instr);
>     }
> @@ -105,6 +77,27 @@ vec4_tes_visitor::setup_payload()
>
>     reg = setup_uniforms(reg);
>
> +   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
> +      for (int i = 0; i < 3; i++) {
> +         if (inst->src[i].file != ATTR)
> +            continue;
> +
> +         struct brw_reg grf =
> +            brw_vec4_grf(reg + inst->src[i].nr / 2, 4 * (inst->src[i].nr % 2));
> +         grf = stride(grf, 0, 4, 1);
> +         grf.swizzle = inst->src[i].swizzle;
> +         grf.type = inst->src[i].type;
> +         if (inst->src[i].abs)
> +            grf = brw_abs(grf);
> +         if (inst->src[i].negate)
> +            grf = negate(grf);

Just

   grf.abs = inst->src[i].abs;
   grf.negate = inst->src[i].negate;


Otherwise,

Reviewed-by: Matt Turner <mattst88 at gmail.com>


More information about the mesa-dev mailing list