[Mesa-dev] [PATCH v2] i965/skl: Always use a header for SIMD4x2 sampler messages

Kenneth Graunke kenneth at whitecape.org
Thu Jan 8 01:52:16 PST 2015


On Wednesday, January 07, 2015 10:43:25 PM Kristian Høgsberg wrote:
> SKL+ overloads the SIMD4x2 SIMD mode to mean either SIMD8D or SIMD4x2
> depending on bit 22 in the message header.  If the bit is 0 or there is
> no header we get SIMD8D.  We always wand SIMD4x2 in vec4 and for fs pull
> constants, so use a message header in those cases and set bit 22 there.
> 
> Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h          |  5 ++++
>  src/mesa/drivers/dri/i965/brw_fs.cpp             |  8 ++++++
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 32 +++++++++++++++++++-----
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15 ++++++++---
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   |  4 ++-
>  5 files changed, 53 insertions(+), 11 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 28e398d..f02a0b8 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1373,6 +1373,11 @@ enum brw_message_target {
>  #define BRW_SAMPLER_SIMD_MODE_SIMD16                    2
>  #define BRW_SAMPLER_SIMD_MODE_SIMD32_64                 3
>  
> +/* GEN9 changes SIMD mode 0 to mean SIMD8D, but lets us get the SIMD4x2
> + * behavior by setting bit 22 of dword 2 in the message header. */
> +#define GEN9_SAMPLER_SIMD_MODE_SIMD8D                   0
> +#define GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2        (1 << 22)
> +
>  #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
>  #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
>  #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 8c7d780..9dfb7b7 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -2994,6 +2994,14 @@ fs_visitor::lower_uniform_pull_constant_loads()
>           const_offset_reg.fixed_hw_reg.dw1.ud /= 4;
>           fs_reg payload = fs_reg(this, glsl_type::uint_type);
>  
> +         /* We have to use a message header on Skylake to get SIMD4x2 mode.
> +          * Reserve space for the register.
> +          */
> +         if (brw->gen >= 9) {
> +            payload.reg_offset++;
> +            virtual_grf_sizes[payload.reg] = 2;
> +         }
> +
>           /* This is actually going to be a MOV, but since only the first dword
>            * is accessed, we have a special opcode to do just that one.  Note
>            * that this needs to be an operation that will be considered a def
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index c652d65..7b4ac8d 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -1017,6 +1017,26 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
>      */
>     dst.width = BRW_WIDTH_4;
>  
> +   struct brw_reg src = offset;
> +   bool header_present = false;
> +   int mlen = 1;
> +
> +   if (brw->gen >= 9) {
> +      /* Skylake requires a message header in order to use SIMD4x2 mode. */
> +      src = retype(brw_vec8_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD);
> +      mlen = 2;
> +      header_present = true;
> +
> +      brw_push_insn_state(p);
> +      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> +      brw_MOV(p, src, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
> +      brw_set_default_access_mode(p, BRW_ALIGN_1);
> +
> +      brw_MOV(p, get_element_ud(src, 2),
> +              brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2));
> +      brw_pop_insn_state(p);
> +   }
> +
>     if (index.file == BRW_IMMEDIATE_VALUE) {
>  
>        uint32_t surf_index = index.dw1.ud;
> @@ -1028,14 +1048,14 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
>        brw_pop_insn_state(p);
>  
>        brw_set_dest(p, send, dst);
> -      brw_set_src0(p, send, offset);
> +      brw_set_src0(p, send, src);
>        brw_set_sampler_message(p, send,
>                                surf_index,
>                                0, /* LD message ignores sampler unit */
>                                GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
>                                1, /* rlen */
> -                              1, /* mlen */
> -                              false, /* no header */
> +                              mlen,
> +                              header_present,
>                                BRW_SAMPLER_SIMD_MODE_SIMD4X2,
>                                0);
>  
> @@ -1064,8 +1084,8 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
>                                0 /* sampler */,
>                                GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
>                                1 /* rlen */,
> -                              1 /* mlen */,
> -                              false /* header */,
> +                              mlen,
> +                              header_present,
>                                BRW_SAMPLER_SIMD_MODE_SIMD4X2,
>                                0);
>        brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
> @@ -1077,7 +1097,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
>        /* dst = send(offset, a0.0) */
>        brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
>        brw_set_dest(p, insn_send, dst);
> -      brw_set_src0(p, insn_send, offset);
> +      brw_set_src0(p, insn_send, src);
>        brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr);
>  
>        brw_pop_insn_state(p);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index b88a579..19e82ef 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -328,6 +328,7 @@ vec4_generator::generate_tex(vec4_instruction *inst,
>        } else {
>           struct brw_reg header =
>              retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);
> +         uint32_t dw2 = 0;
>  
>           /* Explicitly set up the message header by copying g0 to the MRF. */
>           brw_push_insn_state(p);
> @@ -336,11 +337,17 @@ vec4_generator::generate_tex(vec4_instruction *inst,
>  
>           brw_set_default_access_mode(p, BRW_ALIGN_1);
>  
> -         if (inst->offset) {
> +         if (inst->offset)
>              /* Set the texel offset bits in DWord 2. */
> -            brw_MOV(p, get_element_ud(header, 2),
> -                    brw_imm_ud(inst->offset));
> -         }
> +            dw2 = inst->offset;
> +
> +         if (brw->gen >= 9)
> +            /* SKL+ overloads BRW_SAMPLER_SIMD_MODE_SIMD4X2 to also do SIMD8D,
> +             * based on bit 22 in the header. */

*/ goes on its own line.

Thanks for porting this to the new (or old, if you prefer) generator framework.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150108/60353cb8/attachment-0001.sig>


More information about the mesa-dev mailing list