[Mesa-dev] [PATCH 05/18] i965/fs: Move the computation of register block count from unit to compile.

Kenneth Graunke kenneth at whitecape.org
Wed May 25 11:07:55 PDT 2011


On 05/24/2011 04:00 PM, Eric Anholt wrote:
> No net code size change, but unit update is down 0.8% code size.
> ---
>   src/mesa/drivers/dri/i965/brw_context.h  |   15 +++++++++++++--
>   src/mesa/drivers/dri/i965/brw_fs.cpp     |    4 ++--
>   src/mesa/drivers/dri/i965/brw_wm.c       |    2 +-
>   src/mesa/drivers/dri/i965/brw_wm_state.c |    4 ++--
>   4 files changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 1d2ef06..621b6f8 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -231,8 +231,8 @@ struct brw_wm_prog_data {
>
>      GLuint first_curbe_grf;
>      GLuint first_curbe_grf_16;
> -   GLuint total_grf;
> -   GLuint total_grf_16;
> +   GLuint reg_blocks;
> +   GLuint reg_blocks_16;
>      GLuint total_scratch;
>
>      GLuint nr_params;       /**<  number of float params/constants */
> @@ -863,6 +863,17 @@ float convert_param(enum param_conversion conversion, float param)
>      }
>   }
>
> +/**
> + * Pre-gen6, the register file of the EUs was shared between threads,
> + * and each thread used some subset allocated on a 16-register block
> + * granularity.  The unit states wanted these block counts.
> + */
> +static inline int
> +brw_register_blocks(int reg_count)
> +{
> +   return ALIGN(reg_count, 16) / 16 - 1;
> +}
> +

I was a little concerned that the comment states that this is only 
necessary pre-Gen6, yet most of this code runs on Gen6+ as well. 
Presumably it calculates the number of register blocks used regardless 
of generation...but Gen6 doesn't /use/ that information.

So, basically this moves a few instructions from the Gen4/5 3DSTATE_WM 
update code (frequent since used in drawing) to the compile path (rare). 
  That might help Gen4/5 performance ever so slightly, but won't help 
Gen6 at all.

It was nice having the Gen4/5-specific code at the point where it was 
needed, but I suppose I'm okay with this.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

>   GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
>
>   #endif
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 1586048..6ff692a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -4170,9 +4170,9 @@ fs_visitor::run()
>      generate_code();
>
>      if (c->dispatch_width == 8) {
> -      c->prog_data.total_grf = grf_used;
> +      c->prog_data.reg_blocks = brw_register_blocks(grf_used);
>      } else {
> -      c->prog_data.total_grf_16 = grf_used;
> +      c->prog_data.reg_blocks_16 = brw_register_blocks(grf_used);
>         c->prog_data.prog_offset_16 = prog_offset_16;
>
>         /* Make sure we didn't try to sneak in an extra uniform */
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
> index 9079762..3478ff1 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -115,7 +115,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
>      brw_wm_pass2(c);
>
>      /* how many general-purpose registers are used */
> -   c->prog_data.total_grf = c->max_wm_grf;
> +   c->prog_data.reg_blocks = brw_register_blocks(c->max_wm_grf);
>
>      /* Emit GEN4 code.
>       */
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
> index a356711..ef98f81 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
> @@ -91,8 +91,8 @@ brw_prepare_wm_unit(struct brw_context *brw)
>      }
>
>      /* CACHE_NEW_WM_PROG */
> -   wm->thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1;
> -   wm->wm9.grf_reg_count_2 = ALIGN(brw->wm.prog_data->total_grf_16, 16) / 16 - 1;
> +   wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks;
> +   wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16;
>      wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset>>  6; /* reloc */
>      /* reloc */
>      wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset +



More information about the mesa-dev mailing list