[Mesa-dev] [PATCH 2/3] i965: Make the param pointer arrays for the WM dynamically sized.
Kenneth Graunke
kenneth at whitecape.org
Mon Aug 27 14:48:35 PDT 2012
On 08/27/2012 10:49 AM, Eric Anholt wrote:
> Saves 26.5MB of wasted memory allocation in the l4d2 demo.
> ---
> src/mesa/drivers/dri/i965/brw_context.h | 15 ++++++++---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 2 --
> src/mesa/drivers/dri/i965/brw_state_cache.c | 7 +++++
> src/mesa/drivers/dri/i965/brw_wm.c | 38 +++++++++++++++++++++++++++
> src/mesa/drivers/dri/i965/brw_wm.h | 1 +
> 5 files changed, 58 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 15ef0a3..c1cd500 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -313,11 +313,11 @@ struct brw_wm_prog_data {
> */
> uint32_t barycentric_interp_modes;
>
> - /* Pointer to tracked values (only valid once
> + /* Pointers to tracked values (only valid once
> * _mesa_load_state_parameters has been called at runtime).
> */
> - const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
> - const float *pull_param[MAX_UNIFORMS * 4];
> + const float **param;
> + const float **pull_param;
> };
>
> /**
> @@ -619,6 +619,7 @@ struct brw_cache_item {
> };
>
>
> +typedef void (*cache_aux_free_func)(const void *aux);
>
> struct brw_cache {
> struct brw_context *brw;
> @@ -629,6 +630,14 @@ struct brw_cache {
>
> uint32_t next_offset;
> bool bo_used_by_gpu;
> +
> + /**
> + * Functions used in determining whether the prog_data for a new cache item
> + * matches an existing cache item. This is sometimes done by memcmp, but
> + * for some cache ids that would mean statically sizing arrays instead of
> + * using nice pointers.
> + */
This comment makes no sense to me. Your newly introduced function is a
helper to free arrays. I don't see any changes to how key comparisons
are done (memcmp or something else). I'm kind of scared that this
comment implies that memcmp doesn't work but I don't see a change to not
use memcmp.
> + cache_aux_free_func aux_free[BRW_MAX_CACHE];
> };
>
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index f5b2649..35ed3be 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -528,8 +528,6 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
> for (unsigned int i = 0; i < type->vector_elements; i++) {
> unsigned int param = c->prog_data.nr_params++;
>
> - assert(param < ARRAY_SIZE(c->prog_data.param));
> -
> this->param_index[param] = loc;
> this->param_offset[param] = i;
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
> index 57a5ee9..092baf3 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_cache.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
> @@ -47,6 +47,7 @@
> #include "main/imports.h"
> #include "intel_batchbuffer.h"
> #include "brw_state.h"
> +#include "brw_wm.h"
>
> #define FILE_DEBUG_FLAG DEBUG_STATE
>
> @@ -333,6 +334,8 @@ brw_init_caches(struct brw_context *brw)
> cache->bo = drm_intel_bo_alloc(intel->bufmgr,
> "program cache",
> 4096, 64);
> +
> + cache->aux_free[BRW_WM_PROG] = brw_wm_prog_data_free;
> }
>
> static void
> @@ -347,6 +350,10 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
> for (i = 0; i < cache->size; i++) {
> for (c = cache->items[i]; c; c = next) {
> next = c->next;
> + if (cache->aux_free[c->cache_id]) {
> + const void *item_aux = c->key + c->key_size;
> + cache->aux_free[c->cache_id](item_aux);
> + }
> free((void *)c->key);
> free(c);
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
> index e7ef9f2..1ec8264 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -253,6 +253,15 @@ brw_wm_payload_setup(struct brw_context *brw,
> }
> }
>
> +void
> +brw_wm_prog_data_free(const void *in_prog_data)
> +{
> + const struct brw_wm_prog_data *prog_data = in_prog_data;
> +
> + ralloc_free((void *)prog_data->param);
> + ralloc_free((void *)prog_data->pull_param);
> +}
> +
> /**
> * All Mesa program -> GPU code generation goes through this function.
> * Depending on the instructions used (i.e. flow control instructions)
> @@ -266,8 +275,12 @@ bool do_wm_prog(struct brw_context *brw,
> struct intel_context *intel = &brw->intel;
> struct brw_wm_compile *c;
> const GLuint *program;
> + struct gl_shader *fs = NULL;
> GLuint program_size;
>
> + if (prog)
> + fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
> +
> c = brw->wm.compile_data;
> if (c == NULL) {
> brw->wm.compile_data = rzalloc(NULL, struct brw_wm_compile);
> @@ -290,6 +303,31 @@ bool do_wm_prog(struct brw_context *brw,
> c->vreg = vreg;
> c->refs = refs;
> }
> +
> + /* Allocate the references to the uniforms that will end up in the
> + * prog_data associated with the compiled program, and which will be freed
> + * by the state cache.
> + */
> + if (fs) {
> + int param_count = fs->num_uniform_components;
> + /* The backend also sometimes adds params for texture size. */
> + param_count += 2 * BRW_MAX_TEX_UNIT;
> +
> + c->prog_data.param = rzalloc_array(c, const float *, param_count);
> + c->prog_data.pull_param = rzalloc_array(c, const float *, param_count);
> + } else {
> + /* brw_wm_pass0.c will also add references to 0.0 and 1.0 which are
> + * uploaded as push parameters.
> + */
> + int param_count = (fp->program.Base.Parameters->NumParameters + 2) * 4;
> + /* The old backend also uploads constants using push parameters, so we
> + * can't just rely on the NumParameters set up by .
> + */
> + c->prog_data.param = rzalloc_array(c, const float *, param_count);
Your sentence here is unfinished, but it sounds like you're saying we
need to include constants, and I sure don't see that in the code (other
than 0.0 and 1.0). Don't you need to solve that?
A totally lame solution would be to use MAX_UNIFORMS for the count when
using the old backend. That gives the benefit for anything not using
ARB programs...
> + /* The old backend never does pull constants. */
> + c->prog_data.pull_param = NULL;
> + }
> +
> memcpy(&c->key, key, sizeof(*key));
>
> c->fp = fp;
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
> index 5e4af27..1de1690 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.h
> +++ b/src/mesa/drivers/dri/i965/brw_wm.h
> @@ -484,5 +484,6 @@ bool do_wm_prog(struct brw_context *brw,
> void brw_wm_debug_recompile(struct brw_context *brw,
> struct gl_shader_program *prog,
> const struct brw_wm_prog_key *key);
> +void brw_wm_prog_data_free(const void *in_prog_data);
>
> #endif
More information about the mesa-dev
mailing list