[Mesa-dev] [PATCH 2/3] i965: Make the param pointer arrays for the WM dynamically sized.

Kenneth Graunke kenneth at whitecape.org
Mon Aug 27 14:48:35 PDT 2012


On 08/27/2012 10:49 AM, Eric Anholt wrote:
> Saves 26.5MB of wasted memory allocation in the l4d2 demo.
> ---
>  src/mesa/drivers/dri/i965/brw_context.h     |   15 ++++++++---
>  src/mesa/drivers/dri/i965/brw_fs.cpp        |    2 --
>  src/mesa/drivers/dri/i965/brw_state_cache.c |    7 +++++
>  src/mesa/drivers/dri/i965/brw_wm.c          |   38 +++++++++++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_wm.h          |    1 +
>  5 files changed, 58 insertions(+), 5 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 15ef0a3..c1cd500 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -313,11 +313,11 @@ struct brw_wm_prog_data {
>      */
>     uint32_t barycentric_interp_modes;
>  
> -   /* Pointer to tracked values (only valid once
> +   /* Pointers to tracked values (only valid once
>      * _mesa_load_state_parameters has been called at runtime).
>      */
> -   const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
> -   const float *pull_param[MAX_UNIFORMS * 4];
> +   const float **param;
> +   const float **pull_param;
>  };
>  
>  /**
> @@ -619,6 +619,7 @@ struct brw_cache_item {
>  };   
>  
>  
> +typedef void (*cache_aux_free_func)(const void *aux);
>  
>  struct brw_cache {
>     struct brw_context *brw;
> @@ -629,6 +630,14 @@ struct brw_cache {
>  
>     uint32_t next_offset;
>     bool bo_used_by_gpu;
> +
> +   /**
> +    * Functions used in determining whether the prog_data for a new cache item
> +    * matches an existing cache item.  This is sometimes done by memcmp, but
> +    * for some cache ids that would mean statically sizing arrays instead of
> +    * using nice pointers.
> +    */

This comment makes no sense to me.  Your newly introduced function is a
helper to free arrays.  I don't see any changes to how key comparisons
are done (memcmp or something else).  I'm kind of scared that this
comment implies that memcmp doesn't work but I don't see a change to not
use memcmp.

> +   cache_aux_free_func aux_free[BRW_MAX_CACHE];
>  };
>  
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index f5b2649..35ed3be 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -528,8 +528,6 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
>        for (unsigned int i = 0; i < type->vector_elements; i++) {
>  	 unsigned int param = c->prog_data.nr_params++;
>  
> -	 assert(param < ARRAY_SIZE(c->prog_data.param));
> -
>  	 this->param_index[param] = loc;
>  	 this->param_offset[param] = i;
>        }
> diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
> index 57a5ee9..092baf3 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_cache.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
> @@ -47,6 +47,7 @@
>  #include "main/imports.h"
>  #include "intel_batchbuffer.h"
>  #include "brw_state.h"
> +#include "brw_wm.h"
>  
>  #define FILE_DEBUG_FLAG DEBUG_STATE
>  
> @@ -333,6 +334,8 @@ brw_init_caches(struct brw_context *brw)
>     cache->bo = drm_intel_bo_alloc(intel->bufmgr,
>  				  "program cache",
>  				  4096, 64);
> +
> +   cache->aux_free[BRW_WM_PROG] = brw_wm_prog_data_free;
>  }
>  
>  static void
> @@ -347,6 +350,10 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
>     for (i = 0; i < cache->size; i++) {
>        for (c = cache->items[i]; c; c = next) {
>  	 next = c->next;
> +         if (cache->aux_free[c->cache_id]) {
> +            const void *item_aux = c->key + c->key_size;
> +            cache->aux_free[c->cache_id](item_aux);
> +         }
>  	 free((void *)c->key);
>  	 free(c);
>        }
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
> index e7ef9f2..1ec8264 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -253,6 +253,15 @@ brw_wm_payload_setup(struct brw_context *brw,
>     }
>  }
>  
> +void
> +brw_wm_prog_data_free(const void *in_prog_data)
> +{
> +   const struct brw_wm_prog_data *prog_data = in_prog_data;
> +
> +   ralloc_free((void *)prog_data->param);
> +   ralloc_free((void *)prog_data->pull_param);
> +}
> +
>  /**
>   * All Mesa program -> GPU code generation goes through this function.
>   * Depending on the instructions used (i.e. flow control instructions)
> @@ -266,8 +275,12 @@ bool do_wm_prog(struct brw_context *brw,
>     struct intel_context *intel = &brw->intel;
>     struct brw_wm_compile *c;
>     const GLuint *program;
> +   struct gl_shader *fs = NULL;
>     GLuint program_size;
>  
> +   if (prog)
> +      fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
> +
>     c = brw->wm.compile_data;
>     if (c == NULL) {
>        brw->wm.compile_data = rzalloc(NULL, struct brw_wm_compile);
> @@ -290,6 +303,31 @@ bool do_wm_prog(struct brw_context *brw,
>        c->vreg = vreg;
>        c->refs = refs;
>     }
> +
> +   /* Allocate the references to the uniforms that will end up in the
> +    * prog_data associated with the compiled program, and which will be freed
> +    * by the state cache.
> +    */
> +   if (fs) {
> +      int param_count = fs->num_uniform_components;
> +      /* The backend also sometimes adds params for texture size. */
> +      param_count += 2 * BRW_MAX_TEX_UNIT;
> +
> +      c->prog_data.param = rzalloc_array(c, const float *, param_count);
> +      c->prog_data.pull_param = rzalloc_array(c, const float *, param_count);
> +   } else {
> +      /* brw_wm_pass0.c will also add references to 0.0 and 1.0 which are
> +       * uploaded as push parameters.
> +       */
> +      int param_count = (fp->program.Base.Parameters->NumParameters + 2) * 4;
> +      /* The old backend also uploads constants using push parameters, so we
> +       * can't just rely on the NumParameters set up by .
> +       */
> +      c->prog_data.param = rzalloc_array(c, const float *, param_count);

Your sentence here is unfinished, but it sounds like you're saying we
need to include constants, and I sure don't see that in the code (other
than 0.0 and 1.0).  Don't you need to solve that?

A totally lame solution would be to use MAX_UNIFORMS for the count when
using the old backend.  That gives the benefit for anything not using
ARB programs...

> +      /* The old backend never does pull constants. */
> +      c->prog_data.pull_param = NULL;
> +   }
> +
>     memcpy(&c->key, key, sizeof(*key));
>  
>     c->fp = fp;
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
> index 5e4af27..1de1690 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.h
> +++ b/src/mesa/drivers/dri/i965/brw_wm.h
> @@ -484,5 +484,6 @@ bool do_wm_prog(struct brw_context *brw,
>  void brw_wm_debug_recompile(struct brw_context *brw,
>                              struct gl_shader_program *prog,
>                              const struct brw_wm_prog_key *key);
> +void brw_wm_prog_data_free(const void *in_prog_data);
>  
>  #endif



More information about the mesa-dev mailing list