[Mesa-dev] [PATCH 15/19] i965/fs: Support register coalescing on LOAD_PAYLOAD operands.

Kenneth Graunke kenneth at whitecape.org
Sun Jun 1 00:01:50 PDT 2014


On Friday, April 18, 2014 11:56:51 AM Matt Turner wrote:
> ---
>  .../drivers/dri/i965/brw_fs_register_coalesce.cpp  | 59 
++++++++++++++++++----
>  1 file changed, 49 insertions(+), 10 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
> index cb4ab57..a363c4c 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
> @@ -46,7 +46,14 @@
>  static bool
>  is_nop_mov(const fs_inst *inst)
>  {
> -   if (inst->opcode == BRW_OPCODE_MOV) {
> +   if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
> +      for (int i = 0; i < inst->sources; i++) {
> +         if (!inst->dst.equals(inst->src[i])) {

I don't think this is what you want.  You're not altering dst.reg_offset,
here, so dst.reg_offset will presumably always be 0.

For dst.equals(inst->src[i]) to be true for every source, then

   dst.reg == src[i].reg for all sources.
   dst.reg_offset == src[i].reg_offset for all sources.

which means src[i].reg_offset == 0 for all sources.

In other words, I think this detects LOAD_PAYLOAD operations that splats the 
first component of a size > 1 VGRF dst to all components.

That's definitely not a nop mov.

I think if you add dst.reg_offset++ on each iteration of the loop, you'll be 
okay here.

> +            return false;
> +         }
> +      }
> +      return true;
> +   } else if (inst->opcode == BRW_OPCODE_MOV) {
>        return inst->dst.equals(inst->src[0]);
>     }
>  
> @@ -54,9 +61,26 @@ is_nop_mov(const fs_inst *inst)
>  }
>  
>  static bool
> +is_copy_payload(const fs_inst *inst)
> +{
> +   const int reg = inst->src[0].reg;
> +   if (inst->src[0].reg_offset != 0)
> +      return false;

This looks okay, for both src[0] == reg_undef and where it's an actual value.

> +
> +   for (int i = 1; i < inst->sources; i++) {
> +      if (inst->src[i].reg != reg ||
> +          inst->src[i].reg_offset != i) {
> +         return false;
> +      }
> +   }
> +   return true;
> +}
> +
> +static bool
>  is_coalesce_candidate(const fs_inst *inst, const int *virtual_grf_sizes)
>  {
> -   if (inst->opcode != BRW_OPCODE_MOV ||
> +   if ((inst->opcode != BRW_OPCODE_MOV &&
> +        inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD) ||
>         inst->is_partial_write() ||
>         inst->saturate ||
>         inst->src[0].file != GRF ||
> @@ -72,6 +96,12 @@ is_coalesce_candidate(const fs_inst *inst, const int 
*virtual_grf_sizes)
>         virtual_grf_sizes[inst->dst.reg])
>        return false;
>  
> +   if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
> +      if (!is_copy_payload(inst)) {
> +         return false;
> +      }
> +   }
> +
>     return true;
>  }
>  
> @@ -144,10 +174,18 @@ fs_visitor::register_coalesce()
>        if (reg_to != inst->dst.reg)
>           continue;
>  
> -      const int offset = inst->src[0].reg_offset;
> -      reg_to_offset[offset] = inst->dst.reg_offset;
> -      mov[offset] = inst;
> -      channels_remaining--;
> +      if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
> +         for (int i = 0; i < src_size; i++) {

Isn't src_size always 1 for LOAD_PAYLOAD?  Earlier:

         src_size = virtual_grf_sizes[inst->src[0].reg];

Presumably you want inst->sources here...

> +            reg_to_offset[i] = i;
> +         }
> +         mov[0] = inst;
> +         channels_remaining -= inst->sources;
> +      } else {
> +         const int offset = inst->src[0].reg_offset;
> +         reg_to_offset[offset] = inst->dst.reg_offset;
> +         mov[offset] = inst;
> +         channels_remaining--;
> +      }
>  
>        if (channels_remaining)
>           continue;
> @@ -169,15 +207,16 @@ fs_visitor::register_coalesce()
>           continue;
>  
>        progress = true;
> +      bool was_load_payload = inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD;
>  
>        for (int i = 0; i < src_size; i++) {
>           if (mov[i]) {
>              mov[i]->opcode = BRW_OPCODE_NOP;
>              mov[i]->conditional_mod = BRW_CONDITIONAL_NONE;
>              mov[i]->dst = reg_undef;
> -            mov[i]->src[0] = reg_undef;
> -            mov[i]->src[1] = reg_undef;
> -            mov[i]->src[2] = reg_undef;
> +            for (int j = 0; j < mov[i]->sources; j++) {
> +               mov[i]->src[j] = reg_undef;
> +            }
>           }
>        }
>  
> @@ -185,7 +224,7 @@ fs_visitor::register_coalesce()
>           fs_inst *scan_inst = (fs_inst *)node;
>  
>           for (int i = 0; i < src_size; i++) {
> -            if (mov[i]) {
> +            if (mov[i] || was_load_payload) {
>                 if (scan_inst->dst.file == GRF &&
>                     scan_inst->dst.reg == reg_from &&
>                     scan_inst->dst.reg_offset == i) {
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140601/8a3fd73b/attachment-0001.sig>


More information about the mesa-dev mailing list