[Mesa-dev] [PATCH 12/13] i965: Handle mix-and-match TCS/TES with separate shader objects.

Jordan Justen jordan.l.justen at intel.com
Tue Dec 22 14:21:32 PST 2015


On 2015-12-22 02:20:57, Kenneth Graunke wrote:
> GL_ARB_separate_shader_objects allows the application to mix-and-match
> TCS and TES programs separately.  This means that the interface between
> the two stages isn't known until the final SSO pipeline is in place.
> 
> This isn't a great match for our hardware: the TCS and TES have to agree
> on the Patch URB entry layout.  Since we store data as per-patch slots
> followed by per-vertex slots, changing the number of per-patch slots can
> significantly alter the layout.  This can easily happen with SSO.
> 
> To handle this, we store the [Patch]OutputsWritten and [Patch]InputsRead
> bitfields in the TCS/TES program keys, introducing program recompiles.
> brw_upload_programs() decides the layout for both TCS and TES, and
> passes it to brw_upload_tcs/tes(), which store it in the key.
> 
> When creating the NIR for a shader specialization, we override
> nir->info.inputs_read (and friends) to the program key's values.
> Since everything uses those, no further compiler changes are needed.
> This also replaces the hack in brw_create_nir().
> 
> To avoid recompiles, brw_precompile_tes() looks to see if there's a
> TCS in the linked shader.  If so, it accounts for the TCS outputs,
> just as brw_upload_programs() would.  This eliminates all recompiles
> in the non-SSO case.  In the SSO case, there should only be recompiles
> when using a TCS and TES that have different input/output interfaces.
> 
> Fixes Piglit's mix-and-match-tcs-tes test.
> 
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/brw_compiler.h     |  9 +++++++++
>  src/mesa/drivers/dri/i965/brw_nir.c          | 11 ----------
>  src/mesa/drivers/dri/i965/brw_program.h      |  6 ++++--
>  src/mesa/drivers/dri/i965/brw_shader.cpp     |  2 ++
>  src/mesa/drivers/dri/i965/brw_state_upload.c | 18 +++++++++++++++--
>  src/mesa/drivers/dri/i965/brw_tcs.c          | 13 +++++++++++-
>  src/mesa/drivers/dri/i965/brw_tes.c          | 30 +++++++++++++++++++++++++++-
>  src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp   |  2 ++
>  8 files changed, 74 insertions(+), 17 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
> index 0ffaac7..e66deb1 100644
> --- a/src/mesa/drivers/dri/i965/brw_compiler.h
> +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
> @@ -200,6 +200,9 @@ struct brw_tcs_prog_key
>  
>     unsigned input_vertices;
>  
> +   /** A bitfield of per-patch outputs written. */
> +   uint32_t patch_outputs_written;
> +
>     /** A bitfield of per-vertex outputs written. */
>     uint64_t outputs_written;
>  
> @@ -211,6 +214,12 @@ struct brw_tes_prog_key
>  {
>     unsigned program_string_id;
>  
> +   /** A bitfield of per-patch inputs read. */
> +   uint32_t patch_inputs_read;
> +
> +   /** A bitfield of per-vertex inputs read. */
> +   uint64_t inputs_read;
> +
>     struct brw_sampler_prog_key_data tex;
>  };
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
> index 8b06dbe..eebd2a3 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -618,17 +618,6 @@ brw_create_nir(struct brw_context *brw,
>     /* First, lower the GLSL IR or Mesa IR to NIR */
>     if (shader_prog) {
>        nir = glsl_to_nir(shader_prog, stage, options);
> -
> -      if (nir->stage == MESA_SHADER_TESS_EVAL &&
> -          shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) {
> -         const struct gl_program *tcs =
> -            shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program;
> -         /* Work around the TCS having bonus outputs used as shared memory
> -          * segments, which makes OutputsWritten not match InputsRead
> -          */
> -         nir->info.inputs_read = tcs->OutputsWritten;
> -         nir->info.patch_inputs_read = tcs->PatchOutputsWritten;
> -      }
>     } else {
>        nir = prog_to_nir(prog, options);
>        OPT_V(nir_convert_to_ssa); /* turn registers into SSA */
> diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
> index 3d9e1b9..059ccf8 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.h
> +++ b/src/mesa/drivers/dri/i965/brw_program.h
> @@ -56,8 +56,10 @@ void
>  brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
>              struct gl_shader *shader, struct gl_program *prog);
>  
> -void brw_upload_tcs_prog(struct brw_context *brw);
> -void brw_upload_tes_prog(struct brw_context *brw);
> +void brw_upload_tcs_prog(struct brw_context *brw,
> +                         uint64_t per_vertex_slots, uint32_t per_patch_slots);
> +void brw_upload_tes_prog(struct brw_context *brw,
> +                         uint64_t per_vertex_slots, uint32_t per_patch_slots);
>  
>  #ifdef __cplusplus
>  } /* extern "C" */
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 57f9eb2..5140cfb 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -1329,6 +1329,8 @@ brw_compile_tes(const struct brw_compiler *compiler,
>  
>     nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
>     nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
> +   nir->info.inputs_read = key->inputs_read;
> +   nir->info.patch_inputs_read = key->patch_inputs_read;
>     nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar);
>     nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
> index eb0357b..9ad4ae8 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> @@ -680,8 +680,22 @@ brw_upload_programs(struct brw_context *brw,
>     if (pipeline == BRW_RENDER_PIPELINE) {
>        brw_upload_vs_prog(brw);
>        if (brw->tess_eval_program) {
> -         brw_upload_tcs_prog(brw);
> -         brw_upload_tes_prog(brw);
> +         uint64_t per_vertex_slots = brw->tess_eval_program->Base.InputsRead;
> +         uint32_t per_patch_slots =
> +            brw->tess_eval_program->Base.PatchInputsRead;
> +
> +         /* The TCS may have additional outputs which aren't read by the
> +          * TES (possibly for cross-thread communication).  These need to
> +          * be stored in the Patch URB Entry as well.
> +          */
> +         if (brw->tess_ctrl_program) {
> +            per_vertex_slots |= brw->tess_ctrl_program->Base.OutputsWritten;
> +            per_patch_slots |=
> +               brw->tess_ctrl_program->Base.PatchOutputsWritten;
> +         }
> +
> +         brw_upload_tcs_prog(brw, per_vertex_slots, per_patch_slots);
> +         brw_upload_tes_prog(brw, per_vertex_slots, per_patch_slots);

How about a new brw_upload_tess_progs function, and put this hunk in
there?

-Jordan

>        } else {
>           brw->tcs.prog_data = NULL;
>           brw->tcs.base.prog_data = NULL;
> diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c
> index ecb6fd0..2c925e7 100644
> --- a/src/mesa/drivers/dri/i965/brw_tcs.c
> +++ b/src/mesa/drivers/dri/i965/brw_tcs.c
> @@ -67,6 +67,10 @@ brw_tcs_debug_recompile(struct brw_context *brw,
>  
>     found |= key_debug(brw, "input vertices", old_key->input_vertices,
>                        key->input_vertices);
> +   found |= key_debug(brw, "outputs written", old_key->outputs_written,
> +                      key->outputs_written);
> +   found |= key_debug(brw, "patch outputs written", old_key->patch_outputs_written,
> +                      key->patch_outputs_written);
>     found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode,
>                        key->tes_primitive_mode);
>     found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
> @@ -224,7 +228,9 @@ brw_codegen_tcs_prog(struct brw_context *brw,
>  
>  
>  void
> -brw_upload_tcs_prog(struct brw_context *brw)
> +brw_upload_tcs_prog(struct brw_context *brw,
> +                    uint64_t per_vertex_slots,
> +                    uint32_t per_patch_slots)
>  {
>     struct gl_context *ctx = &brw->ctx;
>     struct gl_shader_program **current = ctx->_Shader->CurrentProgram;
> @@ -248,6 +254,8 @@ brw_upload_tcs_prog(struct brw_context *brw)
>     memset(&key, 0, sizeof(key));
>  
>     key.input_vertices = ctx->TessCtrlProgram.patch_vertices;
> +   key.outputs_written = per_vertex_slots;
> +   key.patch_outputs_written = per_patch_slots;
>  
>     /* We need to specialize our code generation for tessellation levels
>      * based on the domain the DS is expecting to tessellate.
> @@ -301,6 +309,9 @@ brw_tcs_precompile(struct gl_context *ctx,
>  
>     key.tes_primitive_mode = GL_TRIANGLES;
>  
> +   key.outputs_written = prog->OutputsWritten;
> +   key.patch_outputs_written = prog->PatchOutputsWritten;
> +
>     success = brw_codegen_tcs_prog(brw, shader_prog, btcp, &key);
>  
>     brw->tcs.base.prog_offset = old_prog_offset;
> diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c
> index 844c5b2..27dc7e5 100644
> --- a/src/mesa/drivers/dri/i965/brw_tes.c
> +++ b/src/mesa/drivers/dri/i965/brw_tes.c
> @@ -66,6 +66,10 @@ brw_tes_debug_recompile(struct brw_context *brw,
>     }
>  
>     found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
> +   found |= key_debug(brw, "inputs read", old_key->inputs_read,
> +                      key->inputs_read);
> +   found |= key_debug(brw, "patch inputs read", old_key->patch_inputs_read,
> +                      key->patch_inputs_read);
>  
>     if (!found) {
>        perf_debug("  Something else\n");
> @@ -226,7 +230,9 @@ brw_codegen_tes_prog(struct brw_context *brw,
>  
>  
>  void
> -brw_upload_tes_prog(struct brw_context *brw)
> +brw_upload_tes_prog(struct brw_context *brw,
> +                    uint64_t per_vertex_slots,
> +                    uint32_t per_patch_slots)
>  {
>     struct gl_context *ctx = &brw->ctx;
>     struct gl_shader_program **current = ctx->_Shader->CurrentProgram;
> @@ -247,6 +253,14 @@ brw_upload_tes_prog(struct brw_context *brw)
>  
>     key.program_string_id = tep->id;
>  
> +   /* Ignore gl_TessLevelInner/Outer - we treat them as system values,
> +    * not inputs, and they're always present in the URB entry regardless
> +    * of whether or not we read them.
> +    */
> +   key.inputs_read = per_vertex_slots &
> +      ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
> +   key.patch_inputs_read = per_patch_slots;
> +
>     /* _NEW_TEXTURE */
>     brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
>                                        &key.tex);
> @@ -280,6 +294,20 @@ brw_tes_precompile(struct gl_context *ctx,
>     memset(&key, 0, sizeof(key));
>  
>     key.program_string_id = btep->id;
> +   key.inputs_read = prog->InputsRead;
> +   key.patch_inputs_read = prog->PatchInputsRead;
> +
> +   if (shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) {
> +      struct gl_program *tcp =
> +         shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program;
> +      key.inputs_read |= tcp->OutputsWritten;
> +      key.patch_inputs_read |= tcp->PatchOutputsWritten;
> +   }
> +
> +   /* Ignore gl_TessLevelInner/Outer - they're system values. */
> +   key.inputs_read &= ~(VARYING_BIT_TESS_LEVEL_INNER |
> +                        VARYING_BIT_TESS_LEVEL_OUTER);
> +
>     brw_setup_tex_for_precompile(brw, &key.tex, prog);
>  
>     success = brw_codegen_tes_prog(brw, shader_prog, btep, &key);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
> index fba55b5..507db749 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
> @@ -476,6 +476,8 @@ brw_compile_tcs(const struct brw_compiler *compiler,
>  
>     nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
>     nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
> +   nir->info.outputs_written = key->outputs_written;
> +   nir->info.patch_outputs_written = key->patch_outputs_written;
>     nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar);
>     nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
>  
> -- 
> 2.6.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list