[Mesa-dev] [PATCH 19/21] intel: Allocate prog_data::[pull_]param deeper inside the compiler

Jason Ekstrand jason at jlekstrand.net
Fri Sep 29 21:25:19 UTC 2017


Now that we're always growing the param array as-needed, we can
allocate the param array in common code and stop repeating the
allocation everywere.  In order to keep things sane, we ralloc the
[pull_]param array off of the compile context and then steal it back
to a NULL context later.  This doesn't get us all the way to where
prog_data::[pull_]param is purely an out parameter of the back-end
compiler but it gets us a lot closer.
---
 src/intel/compiler/brw_nir.h                   |  5 +++--
 src/intel/compiler/brw_vec4.cpp                |  3 +--
 src/mesa/drivers/dri/i965/brw_cs.c             | 17 +++++------------
 src/mesa/drivers/dri/i965/brw_gs.c             | 22 ++++++----------------
 src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 16 ++++++++++++++--
 src/mesa/drivers/dri/i965/brw_tcs.c            | 24 +++++++++---------------
 src/mesa/drivers/dri/i965/brw_tes.c            | 23 +++++++----------------
 src/mesa/drivers/dri/i965/brw_vs.c             | 17 +++++------------
 src/mesa/drivers/dri/i965/brw_wm.c             | 16 +++++-----------
 9 files changed, 55 insertions(+), 88 deletions(-)

diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index f4b13b1..97f3efb 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -134,12 +134,13 @@ enum brw_reg_type brw_type_for_nir_type(const struct gen_device_info *devinfo,
 
 enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type);
 
-void brw_nir_setup_glsl_uniforms(nir_shader *shader,
+void brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader,
                                  const struct gl_program *prog,
                                  struct brw_stage_prog_data *stage_prog_data,
                                  bool is_scalar);
 
-void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
+void brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
+                                struct gl_program *prog,
                                 struct brw_stage_prog_data *stage_prog_data);
 
 void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 8614886..04304fc 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -1762,8 +1762,7 @@ vec4_visitor::setup_uniforms(int reg)
     * matter what, or the GPU would hang.
     */
    if (devinfo->gen < 6 && this->uniforms == 0) {
-      stage_prog_data->param =
-         reralloc(NULL, stage_prog_data->param, uint32_t, 4);
+      brw_stage_prog_data_add_params(stage_prog_data, 4);
       for (unsigned int i = 0; i < 4; i++) {
 	 unsigned int slot = this->uniforms * 4 + i;
 	 stage_prog_data->param[slot] = BRW_PARAM_BUILTIN_ZERO;
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c
index dacb25e..be7680d 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -77,18 +77,8 @@ brw_codegen_cs_prog(struct brw_context *brw,
 
    assign_cs_binding_table_offsets(devinfo, &cp->program, &prog_data);
 
-   /* Allocate the references to the uniforms that will end up in the
-    * prog_data associated with the compiled program, and which will be freed
-    * by the state cache.
-    */
-   int param_count = cp->program.nir->num_uniforms / 4;
-
-   prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count);
-   prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
-   prog_data.base.nr_params = param_count;
-
-   brw_nir_setup_glsl_uniforms(cp->program.nir, &cp->program,&prog_data.base,
-                               true);
+   brw_nir_setup_glsl_uniforms(mem_ctx, cp->program.nir,
+                               &cp->program, &prog_data.base, true);
 
    if (unlikely(brw->perf_debug)) {
       start_busy = (brw->batch.last_bo &&
@@ -149,6 +139,9 @@ brw_codegen_cs_prog(struct brw_context *brw,
                            prog_data.base.total_scratch,
                            scratch_ids_per_subslice * subslices);
 
+   /* The param and pull_param arrays will be freed by the shader cache. */
+   ralloc_steal(NULL, prog_data.base.param);
+   ralloc_steal(NULL, prog_data.base.pull_param);
    brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
                     key, sizeof(*key),
                     program, program_size,
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 917742a..007629c 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -87,23 +87,11 @@ brw_codegen_gs_prog(struct brw_context *brw,
 
    memset(&prog_data, 0, sizeof(prog_data));
 
-   assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data);
-
-   /* Allocate the references to the uniforms that will end up in the
-    * prog_data associated with the compiled program, and which will be freed
-    * by the state cache.
-    *
-    * Note: param_count needs to be num_uniform_components * 4, since we add
-    * padding around uniform values below vec4 size, so the worst case is that
-    * every uniform is a float which gets padded to the size of a vec4.
-    */
-   int param_count = gp->program.nir->num_uniforms / 4;
+   void *mem_ctx = ralloc_context(NULL);
 
-   prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count);
-   prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
-   prog_data.base.base.nr_params = param_count;
+   assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data);
 
-   brw_nir_setup_glsl_uniforms(gp->program.nir, &gp->program,
+   brw_nir_setup_glsl_uniforms(mem_ctx, gp->program.nir, &gp->program,
                                &prog_data.base.base,
                                compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
    brw_nir_analyze_ubo_ranges(compiler, gp->program.nir,
@@ -124,7 +112,6 @@ brw_codegen_gs_prog(struct brw_context *brw,
       start_time = get_time();
    }
 
-   void *mem_ctx = ralloc_context(NULL);
    unsigned program_size;
    char *error_str;
    const unsigned *program =
@@ -155,6 +142,9 @@ brw_codegen_gs_prog(struct brw_context *brw,
                            prog_data.base.base.total_scratch,
                            devinfo->max_gs_threads);
 
+   /* The param and pull_param arrays will be freed by the shader cache. */
+   ralloc_steal(NULL, prog_data.base.base.param);
+   ralloc_steal(NULL, prog_data.base.base.pull_param);
    brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
                     key, sizeof(*key),
                     program, program_size,
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index de6df48..a3e7b12 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -187,10 +187,16 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
 }
 
 void
-brw_nir_setup_glsl_uniforms(nir_shader *shader, const struct gl_program *prog,
+brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader,
+                            const struct gl_program *prog,
                             struct brw_stage_prog_data *stage_prog_data,
                             bool is_scalar)
 {
+   unsigned nr_params = shader->num_uniforms / 4;
+   stage_prog_data->nr_params = nr_params;
+   stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
+   stage_prog_data->pull_param = rzalloc_array(mem_ctx, uint32_t, nr_params);
+
    nir_foreach_variable(var, &shader->uniforms) {
       /* UBO's, atomics and samplers don't take up space in the
          uniform file */
@@ -208,11 +214,17 @@ brw_nir_setup_glsl_uniforms(nir_shader *shader, const struct gl_program *prog,
 }
 
 void
-brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
+brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
+                           struct gl_program *prog,
                            struct brw_stage_prog_data *stage_prog_data)
 {
    struct gl_program_parameter_list *plist = prog->Parameters;
 
+   unsigned nr_params = plist->NumParameters * 4;
+   stage_prog_data->nr_params = nr_params;
+   stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
+   stage_prog_data->pull_param = rzalloc_array(mem_ctx, uint32_t, nr_params);
+
    /* For ARB programs, prog_to_nir generates a single "parameters" variable
     * for all uniform data.  nir_lower_wpos_ytransform may also create an
     * additional variable.
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c
index 8fd7364..6c9cb15 100644
--- a/src/mesa/drivers/dri/i965/brw_tcs.c
+++ b/src/mesa/drivers/dri/i965/brw_tcs.c
@@ -178,25 +178,12 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
 
    memset(&prog_data, 0, sizeof(prog_data));
 
-   /* Allocate the references to the uniforms that will end up in the
-    * prog_data associated with the compiled program, and which will be freed
-    * by the state cache.
-    *
-    * Note: param_count needs to be num_uniform_components * 4, since we add
-    * padding around uniform values below vec4 size, so the worst case is that
-    * every uniform is a float which gets padded to the size of a vec4.
-    */
-   int param_count = nir->num_uniforms / 4;
-
-   prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count);
-   prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
-   prog_data.base.base.nr_params = param_count;
-
    if (tcp) {
       brw_assign_common_binding_table_offsets(devinfo, &tcp->program,
                                               &prog_data.base.base, 0);
 
-      brw_nir_setup_glsl_uniforms(nir, &tcp->program, &prog_data.base.base,
+      brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tcp->program,
+                                  &prog_data.base.base,
                                   compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
       brw_nir_analyze_ubo_ranges(compiler, tcp->program.nir,
                                  prog_data.base.base.ubo_ranges);
@@ -204,6 +191,10 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
       /* Upload the Patch URB Header as the first two uniforms.
        * Do the annoying scrambling so the shader doesn't have to.
        */
+      assert(nir->num_uniforms == 32);
+      prog_data.base.base.param = rzalloc_array(mem_ctx, uint32_t, 8);
+      prog_data.base.base.nr_params = 8;
+
       uint32_t *param = prog_data.base.base.param;
       for (int i = 0; i < 8; i++)
          param[i] = BRW_PARAM_BUILTIN_ZERO;
@@ -272,6 +263,9 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
                            prog_data.base.base.total_scratch,
                            devinfo->max_tcs_threads);
 
+   /* The param and pull_param arrays will be freed by the shader cache. */
+   ralloc_steal(NULL, prog_data.base.base.param);
+   ralloc_steal(NULL, prog_data.base.base.pull_param);
    brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG,
                     key, sizeof(*key),
                     program, program_size,
diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c
index 763207f..47f3b51 100644
--- a/src/mesa/drivers/dri/i965/brw_tes.c
+++ b/src/mesa/drivers/dri/i965/brw_tes.c
@@ -77,24 +77,13 @@ brw_codegen_tes_prog(struct brw_context *brw,
 
    memset(&prog_data, 0, sizeof(prog_data));
 
+   void *mem_ctx = ralloc_context(NULL);
+
    brw_assign_common_binding_table_offsets(devinfo, &tep->program,
                                            &prog_data.base.base, 0);
 
-   /* Allocate the references to the uniforms that will end up in the
-    * prog_data associated with the compiled program, and which will be freed
-    * by the state cache.
-    *
-    * Note: param_count needs to be num_uniform_components * 4, since we add
-    * padding around uniform values below vec4 size, so the worst case is that
-    * every uniform is a float which gets padded to the size of a vec4.
-    */
-   int param_count = nir->num_uniforms / 4;
-
-   prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count);
-   prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
-   prog_data.base.base.nr_params = param_count;
-
-   brw_nir_setup_glsl_uniforms(nir, &tep->program, &prog_data.base.base,
+   brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program,
+                               &prog_data.base.base,
                                compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
    brw_nir_analyze_ubo_ranges(compiler, tep->program.nir,
                               prog_data.base.base.ubo_ranges);
@@ -112,7 +101,6 @@ brw_codegen_tes_prog(struct brw_context *brw,
    brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
                             key->patch_inputs_read);
 
-   void *mem_ctx = ralloc_context(NULL);
    unsigned program_size;
    char *error_str;
    const unsigned *program =
@@ -145,6 +133,9 @@ brw_codegen_tes_prog(struct brw_context *brw,
                            prog_data.base.base.total_scratch,
                            devinfo->max_tes_threads);
 
+   /* The param and pull_param arrays will be freed by the shader cache. */
+   ralloc_steal(NULL, prog_data.base.base.param);
+   ralloc_steal(NULL, prog_data.base.base.pull_param);
    brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
                     key, sizeof(*key),
                     program, program_size,
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index c3440fd..fb5ea4e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -178,24 +178,14 @@ brw_codegen_vs_prog(struct brw_context *brw,
    brw_assign_common_binding_table_offsets(devinfo, &vp->program,
                                            &prog_data.base.base, 0);
 
-   /* Allocate the references to the uniforms that will end up in the
-    * prog_data associated with the compiled program, and which will be freed
-    * by the state cache.
-    */
-   int param_count = vp->program.nir->num_uniforms / 4;
-
-   stage_prog_data->param = rzalloc_array(NULL, uint32_t, param_count);
-   stage_prog_data->pull_param = rzalloc_array(NULL, uint32_t, param_count);
-   stage_prog_data->nr_params = param_count;
-
    if (!vp->program.is_arb_asm) {
-      brw_nir_setup_glsl_uniforms(vp->program.nir, &vp->program,
+      brw_nir_setup_glsl_uniforms(mem_ctx, vp->program.nir, &vp->program,
                                   &prog_data.base.base,
                                   compiler->scalar_stage[MESA_SHADER_VERTEX]);
       brw_nir_analyze_ubo_ranges(compiler, vp->program.nir,
                                  prog_data.base.base.ubo_ranges);
    } else {
-      brw_nir_setup_arb_uniforms(vp->program.nir, &vp->program,
+      brw_nir_setup_arb_uniforms(mem_ctx, vp->program.nir, &vp->program,
                                  &prog_data.base.base);
    }
 
@@ -262,6 +252,9 @@ brw_codegen_vs_prog(struct brw_context *brw,
                            prog_data.base.base.total_scratch,
                            devinfo->max_vs_threads);
 
+   /* The param and pull_param arrays will be freed by the shader cache. */
+   ralloc_steal(NULL, prog_data.base.base.param);
+   ralloc_steal(NULL, prog_data.base.base.pull_param);
    brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
 		    key, sizeof(struct brw_vs_prog_key),
 		    program, program_size,
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 1b5774e..69d8e61 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -144,22 +144,13 @@ brw_codegen_wm_prog(struct brw_context *brw,
 
    assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data);
 
-   /* Allocate the references to the uniforms that will end up in the
-    * prog_data associated with the compiled program, and which will be freed
-    * by the state cache.
-    */
-   int param_count = fp->program.nir->num_uniforms / 4;
-   prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count);
-   prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
-   prog_data.base.nr_params = param_count;
-
    if (!fp->program.is_arb_asm) {
-      brw_nir_setup_glsl_uniforms(fp->program.nir, &fp->program,
+      brw_nir_setup_glsl_uniforms(mem_ctx, fp->program.nir, &fp->program,
                                   &prog_data.base, true);
       brw_nir_analyze_ubo_ranges(brw->screen->compiler, fp->program.nir,
                                  prog_data.base.ubo_ranges);
    } else {
-      brw_nir_setup_arb_uniforms(fp->program.nir, &fp->program,
+      brw_nir_setup_arb_uniforms(mem_ctx, fp->program.nir, &fp->program,
                                  &prog_data.base);
 
       if (unlikely(INTEL_DEBUG & DEBUG_WM))
@@ -217,6 +208,9 @@ brw_codegen_wm_prog(struct brw_context *brw,
    if (unlikely((INTEL_DEBUG & DEBUG_WM) && fp->program.is_arb_asm))
       fprintf(stderr, "\n");
 
+   /* The param and pull_param arrays will be freed by the shader cache. */
+   ralloc_steal(NULL, prog_data.base.param);
+   ralloc_steal(NULL, prog_data.base.pull_param);
    brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG,
                     key, sizeof(struct brw_wm_prog_key),
                     program, program_size,
-- 
2.5.0.400.gff86faf



More information about the mesa-dev mailing list