[Mesa-dev] [PATCH] i965/skl: Send a message header when doing constant loads SIMD4x2

Kristian Høgsberg krh at bitplanet.net
Fri Mar 13 11:10:24 PDT 2015


On Fri, Mar 13, 2015 at 9:29 AM, Neil Roberts <neil at linux.intel.com> wrote:
> Commit 0ac4c272755c7 made it add a header for the send message when
> using SIMD4x2 on Skylake because without this it will end up using
> SIMD8D. However the patch missed the case when a sampler is being used
> to implement constant loads from a buffer surface in a SIMD4x2 vertex
> shader.
>
> This fixes 29 Piglit tests, mostly related to the ARL instruction in
> vertex programs.

The patch looks fine, and

Reviewed-by: Kristian Høgsberg <krh at bitplanet.net>

but why are we running vec4 VS on SKL?  Is it really vec4 GS?  It may
be better to enable SIMD8 GS and stop using vec4 on SKL. On one hand
we can avoid adding these workarounds to vec4, on the other it may be
nice to be able to run vec4 on SKL (using INTEL_DEBUG=vec4vs).  I
don't really have a preference.

Kristian

> Cc: Kristian Høgsberg <krh at bitplanet.net>
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 32 +++++++++++++++++++-----
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 18 +++++++++++++
>  src/mesa/drivers/dri/i965/brw_vec4_vp.cpp        |  9 +++++++
>  3 files changed, 53 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index 010a5c4..e3a94ff 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -1049,18 +1049,38 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
>  {
>     assert(surf_index.type == BRW_REGISTER_TYPE_UD);
>
> +   struct brw_reg src = offset;
> +   bool header_present = false;
> +   int mlen = 1;
> +
> +   if (brw->gen >= 9) {
> +      /* Skylake requires a message header in order to use SIMD4x2 mode. */
> +      src = retype(brw_vec4_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD);
> +      mlen = 2;
> +      header_present = true;
> +
> +      brw_push_insn_state(p);
> +      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> +      brw_MOV(p, src, retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
> +      brw_set_default_access_mode(p, BRW_ALIGN_1);
> +
> +      brw_MOV(p, get_element_ud(src, 2),
> +              brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2));
> +      brw_pop_insn_state(p);
> +   }
> +
>     if (surf_index.file == BRW_IMMEDIATE_VALUE) {
>
>        brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND);
>        brw_set_dest(p, insn, dst);
> -      brw_set_src0(p, insn, offset);
> +      brw_set_src0(p, insn, src);
>        brw_set_sampler_message(p, insn,
>                                surf_index.dw1.ud,
>                                0, /* LD message ignores sampler unit */
>                                GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
>                                1, /* rlen */
> -                              1, /* mlen */
> -                              false, /* no header */
> +                              mlen,
> +                              header_present,
>                                BRW_SAMPLER_SIMD_MODE_SIMD4X2,
>                                0);
>
> @@ -1089,8 +1109,8 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
>                                0 /* sampler */,
>                                GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
>                                1 /* rlen */,
> -                              1 /* mlen */,
> -                              false /* header */,
> +                              mlen /* mlen */,
> +                              header_present /* header */,
>                                BRW_SAMPLER_SIMD_MODE_SIMD4X2,
>                                0);
>        brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
> @@ -1102,7 +1122,7 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
>        /* dst = send(offset, a0.0) */
>        brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
>        brw_set_dest(p, insn_send, dst);
> -      brw_set_src0(p, insn_send, offset);
> +      brw_set_src0(p, insn_send, src);
>        brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr);
>
>        brw_pop_insn_state(p);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index 5bf9e1b..38eb4ce 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -1770,6 +1770,15 @@ vec4_visitor::visit(ir_expression *ir)
>
>        if (brw->gen >= 7) {
>           dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
> +
> +         /* We have to use a message header on Skylake to get SIMD4x2 mode.
> +          * Reserve space for the register.
> +          */
> +         if (brw->gen >= 9) {
> +            grf_offset.reg_offset++;
> +            alloc.sizes[grf_offset.reg] = 2;
> +         }
> +
>           grf_offset.type = offset.type;
>
>           emit(MOV(grf_offset, offset));
> @@ -3464,6 +3473,15 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
>
>     if (brw->gen >= 7) {
>        dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
> +
> +      /* We have to use a message header on Skylake to get SIMD4x2 mode.
> +       * Reserve space for the register.
> +       */
> +      if (brw->gen >= 9) {
> +         grf_offset.reg_offset++;
> +         alloc.sizes[grf_offset.reg] = 2;
> +      }
> +
>        grf_offset.type = offset.type;
>        emit_before(block, inst, MOV(grf_offset, offset));
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
> index ba3264d..c3b0233 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
> @@ -527,6 +527,15 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
>
>           /* Add the small constant index to the address register */
>           src_reg reladdr = src_reg(this, glsl_type::int_type);
> +
> +         /* We have to use a message header on Skylake to get SIMD4x2 mode.
> +          * Reserve space for the register.
> +          */
> +         if (brw->gen >= 9) {
> +            reladdr.reg_offset++;
> +            alloc.sizes[reladdr.reg] = 2;
> +         }
> +
>           dst_reg dst_reladdr = dst_reg(reladdr);
>           dst_reladdr.writemask = WRITEMASK_X;
>           emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
> --
> 1.9.3
>


More information about the mesa-dev mailing list