[Mesa-dev] [PATCH 5/8] i965/vs: Prepare split_virtual_grfs() for the presence of SENDs from GRFs.

Kenneth Graunke kenneth at whitecape.org
Thu Mar 28 00:24:21 PDT 2013


On 03/19/2013 05:06 PM, Eric Anholt wrote:
> ---
>   src/mesa/drivers/dri/i965/brw_vec4.cpp |   49 +++++++++++++++++++++++++-------
>   src/mesa/drivers/dri/i965/brw_vec4.h   |    1 +
>   2 files changed, 39 insertions(+), 11 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 3e628c8..36b5408 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -218,6 +218,13 @@ vec4_instruction::is_math()
>   	   opcode == SHADER_OPCODE_INT_REMAINDER ||
>   	   opcode == SHADER_OPCODE_POW);
>   }
> +
> +bool
> +vec4_instruction::is_send_from_grf()
> +{
> +   return false;
> +}
> +
>   /**
>    * Returns how many MRFs an opcode will write over.
>    *
> @@ -878,27 +885,49 @@ vec4_visitor::opt_register_coalesce()
>    *
>    * We initially create large virtual GRFs for temporary structures, arrays,
>    * and matrices, so that the dereference visitor functions can add reg_offsets
> - * to work their way down to the actual member being accessed.
> + * to work their way down to the actual member being accessed.  But when it
> + * comes to optimization, we'd like to treat each register as individual
> + * storage if possible.
>    *
> - * Unlike in the FS visitor, though, we have no SEND messages that return more
> - * than 1 register.  We also don't do any array access in register space,
> - * which would have required contiguous physical registers.  Thus, all those
> - * large virtual GRFs can be split up into independent single-register virtual
> - * GRFs, making allocation and optimization easier.
> + * So far, the only thing that might prevent splitting is a send message from
> + * a GRF on IVB.
>    */
>   void
>   vec4_visitor::split_virtual_grfs()
>   {
>      int num_vars = this->virtual_grf_count;
>      int new_virtual_grf[num_vars];
> +   bool split_grf[num_vars];
>
>      memset(new_virtual_grf, 0, sizeof(new_virtual_grf));
>
> +   /* Try to split anything > 0 sized. */
> +   for (int i = 0; i < num_vars; i++) {
> +      if (this->virtual_grf_sizes[i] != 1)
> +	 split_grf[i] = true;
> +      else
> +	 split_grf[i] = false;

This "if x then true else false" pattern is silly.  Please just do:

    split_grf[i] = this->virtual_grf_sizes[i] != 1;

> +   }
> +
> +   /* Check that the instructions are compatible with the registers we're trying
> +    * to split.
> +    */
> +   foreach_list(node, &this->instructions) {
> +      vec4_instruction *inst = (vec4_instruction *)node;
> +
> +      /* If there's a SEND message loading from a GRF on gen7+, it needs to be
> +       * contiguous.  Assume that the GRF for the SEND is always in src[0].
> +       */
> +      if (inst->is_send_from_grf()) {
> +	 split_grf[inst->src[0].reg] = false;

Bad whitespace here (tabs).

> +      }
> +   }
> +
>      /* Allocate new space for split regs.  Note that the virtual
>       * numbers will be contiguous.
>       */
>      for (int i = 0; i < num_vars; i++) {
> -      if (this->virtual_grf_sizes[i] == 1)
> +      if (!split_grf[i])
>            continue;
>
>         new_virtual_grf[i] = virtual_grf_alloc(1);
> @@ -913,16 +942,14 @@ vec4_visitor::split_virtual_grfs()
>      foreach_list(node, &this->instructions) {
>         vec4_instruction *inst = (vec4_instruction *)node;
>
> -      if (inst->dst.file == GRF &&
> -	  new_virtual_grf[inst->dst.reg] &&
> +      if (inst->dst.file == GRF && split_grf[inst->dst.reg] &&
>   	  inst->dst.reg_offset != 0) {
>   	 inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
>   			  inst->dst.reg_offset - 1);
>   	 inst->dst.reg_offset = 0;
>         }
>         for (int i = 0; i < 3; i++) {
> -	 if (inst->src[i].file == GRF &&
> -	     new_virtual_grf[inst->src[i].reg] &&
> +	 if (inst->src[i].file == GRF && split_grf[inst->src[i].reg] &&
>   	     inst->src[i].reg_offset != 0) {
>   	    inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
>   				inst->src[i].reg_offset - 1);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 9e700a9..e792ec0 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -194,6 +194,7 @@ public:
>
>      bool is_tex();
>      bool is_math();
> +   bool is_send_from_grf();
>      bool can_reswizzle_dst(int dst_writemask, int swizzle, int swizzle_mask);
>      void reswizzle_dst(int dst_writemask, int swizzle);
>   };
>



More information about the mesa-dev mailing list