[Mesa-dev] [PATCH] i965: Push most TES inputs in vec4 mode.
Matt Turner
mattst88 at gmail.com
Tue Mar 1 00:00:44 UTC 2016
On Wed, Feb 17, 2016 at 3:05 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> (This is commit 4a1c8a3037cd29938b2a6e2c680c341e9903cfbe for vec4 mode.)
>
> Using the push model for inputs is much more efficient than pulling
> inputs - the hardware can simply copy a large chunk into URB registers
> at thread creation time, rather than having the thread send messages to
> request data from the L3 cache. Unfortunately, it's possible to have
> more TES inputs than fit in registers, so we have to fall back to the
> pull model in some cases.
>
> However, it turns out that most tessellation evaluation shaders are
> fairly simple, and don't use many inputs. An arbitrary cut-off of
> 24 vec4 slots (12 registers) should suffice. (I chose this instead of
> the 32 vec4 slots used in the scalar backend to avoid regressing a few
> Piglit tests due to the vec4 register allocator being too stupid to
> figure out what to do. We probably ought to fix that, but it's a
> separate issue.)
>
> Improves performance in GPUTest's tessmark_x64 microbenchmark by
> 41.5394% +/- 0.288519% (n = 115) at 1024x768 on my Clevo W740SU
> (with Iris Pro 5200).
>
> Improves performance in Synmark's Gl40TerrainFlyTess microbenchmark by
> 38.3576% +/- 0.759748% (n = 42).
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 4 +-
> src/mesa/drivers/dri/i965/brw_vec4_tes.cpp | 86 +++++++++++++++++++-----------
> 2 files changed, 56 insertions(+), 34 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index 74ec4f0..9b721e5 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -685,9 +685,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
> case nir_intrinsic_load_instance_id:
> case nir_intrinsic_load_base_instance:
> case nir_intrinsic_load_draw_id:
> - case nir_intrinsic_load_invocation_id:
> - case nir_intrinsic_load_tess_level_inner:
> - case nir_intrinsic_load_tess_level_outer: {
> + case nir_intrinsic_load_invocation_id: {
> gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
> src_reg val = src_reg(nir_system_values[sv]);
> assert(val.file != BAD_FILE);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
> index ce5fefc..90cbd2b8 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
> @@ -28,6 +28,7 @@
> */
>
> #include "brw_vec4_tes.h"
> +#include "brw_cfg.h"
>
> namespace brw {
>
> @@ -53,39 +54,10 @@ vec4_tes_visitor::make_reg_for_system_value(int location, const glsl_type *type)
> void
> vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
> {
> - const struct brw_tes_prog_data *tes_prog_data =
> - (const struct brw_tes_prog_data *) prog_data;
> -
> switch (instr->intrinsic) {
> - case nir_intrinsic_load_tess_level_outer: {
> - dst_reg dst(this, glsl_type::vec4_type);
> - nir_system_values[SYSTEM_VALUE_TESS_LEVEL_OUTER] = dst;
> -
> - dst_reg temp(this, glsl_type::vec4_type);
> - vec4_instruction *read =
> - emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
> - read->offset = 1;
> - read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
> - emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
> + case nir_intrinsic_load_tess_level_outer:
> + case nir_intrinsic_load_tess_level_inner:
> break;
> - }
> - case nir_intrinsic_load_tess_level_inner: {
> - dst_reg dst(this, glsl_type::vec2_type);
> - nir_system_values[SYSTEM_VALUE_TESS_LEVEL_INNER] = dst;
> -
> - /* Set up the message header to reference the proper parts of the URB */
> - dst_reg temp(this, glsl_type::vec4_type);
> - vec4_instruction *read =
> - emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
> - read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
> - if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
> - emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
> - } else {
> - read->offset = 1;
> - emit(MOV(dst, src_reg(temp)));
> - }
> - break;
> - }
> default:
> vec4_visitor::nir_setup_system_value_intrinsic(instr);
> }
> @@ -105,6 +77,27 @@ vec4_tes_visitor::setup_payload()
>
> reg = setup_uniforms(reg);
>
> + foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
> + for (int i = 0; i < 3; i++) {
> + if (inst->src[i].file != ATTR)
> + continue;
> +
> + struct brw_reg grf =
> + brw_vec4_grf(reg + inst->src[i].nr / 2, 4 * (inst->src[i].nr % 2));
> + grf = stride(grf, 0, 4, 1);
> + grf.swizzle = inst->src[i].swizzle;
> + grf.type = inst->src[i].type;
> + if (inst->src[i].abs)
> + grf = brw_abs(grf);
> + if (inst->src[i].negate)
> + grf = negate(grf);
Just
grf.abs = inst->src[i].abs;
grf.negate = inst->src[i].negate;
Otherwise,
Reviewed-by: Matt Turner <mattst88 at gmail.com>
More information about the mesa-dev
mailing list