[Mesa-dev] [PATCH] i965/fs: Add an allow_spilling flag to brw_compile_fs
Francisco Jerez
currojerez at riseup.net
Tue May 17 04:04:57 UTC 2016
Jason Ekstrand <jason at jlekstrand.net> writes:
> On May 16, 2016 6:44 PM, "Francisco Jerez" <currojerez at riseup.net> wrote:
>>
>> Jason Ekstrand <jason at jlekstrand.net> writes:
>>
>> > This allows us to disable spilling for blorp shaders since blorp state
>> > setup doesn't handle spilling. Without this, blorp faisl hard if you
> run
>> > with INTEL_DEBUG=spill.
>> >
>> > Cc: Curro <currojerez at riseup.net>
>>
>> Reviewed-by: Francisco Jerez <currojerez at riseup.net>
>
> Did you test it? One of is probably should. :-p
>
I've just done a full piglit run on SKL with INTEL_DEBUG=spill_fs and
this patch applied on top of my spilling fixes. Didn't notice any
regressions so feel free to add my:
Tested-by: Francisco Jerez <currojerez at riseup.net>
>> > ---
>> > src/intel/vulkan/anv_pipeline.c | 3 ++-
>> > src/mesa/drivers/dri/i965/brw_blorp.c | 2 +-
>> > src/mesa/drivers/dri/i965/brw_compiler.h | 1 +
>> > src/mesa/drivers/dri/i965/brw_fs.cpp | 29
> +++++++++++++----------
>> > src/mesa/drivers/dri/i965/brw_fs.h | 6 ++---
>> > src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 ++--
>> > src/mesa/drivers/dri/i965/brw_wm.c | 3 ++-
>> > 7 files changed, 28 insertions(+), 20 deletions(-)
>> >
>> > diff --git a/src/intel/vulkan/anv_pipeline.c
> b/src/intel/vulkan/anv_pipeline.c
>> > index a8e31b1..7d265d8 100644
>> > --- a/src/intel/vulkan/anv_pipeline.c
>> > +++ b/src/intel/vulkan/anv_pipeline.c
>> > @@ -675,7 +675,8 @@ anv_pipeline_compile_fs(struct anv_pipeline
> *pipeline,
>> > unsigned code_size;
>> > const unsigned *shader_code =
>> > brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
>> > - NULL, -1, -1, pipeline->use_repclear,
> &code_size, NULL);
>> > + NULL, -1, -1, true, pipeline->use_repclear,
>> > + &code_size, NULL);
>> > if (shader_code == NULL) {
>> > ralloc_free(mem_ctx);
>> > return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
>> > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c
> b/src/mesa/drivers/dri/i965/brw_blorp.c
>> > index 09a0fd1..9590968 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_blorp.c
>> > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
>> > @@ -223,7 +223,7 @@ brw_blorp_compile_nir_shader(struct brw_context
> *brw, struct nir_shader *nir,
>> >
>> > const unsigned *program =
>> > brw_compile_fs(compiler, brw, mem_ctx, wm_key, &wm_prog_data,
> nir,
>> > - NULL, -1, -1, use_repclear, program_size, NULL);
>> > + NULL, -1, -1, false, use_repclear, program_size,
> NULL);
>> >
>> > /* Copy the relavent bits of wm_prog_data over into the blorp prog
> data */
>> > prog_data->dispatch_8 = wm_prog_data.dispatch_8;
>> > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h
> b/src/mesa/drivers/dri/i965/brw_compiler.h
>> > index a2148ae..15f32be 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_compiler.h
>> > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
>> > @@ -789,6 +789,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
> void *log_data,
>> > struct gl_program *prog,
>> > int shader_time_index8,
>> > int shader_time_index16,
>> > + bool allow_spilling,
>> > bool use_rep_send,
>> > unsigned *final_assembly_size,
>> > char **error_str);
>> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> > index 2de5533..55d10fc 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> > @@ -5462,7 +5462,7 @@ fs_visitor::fixup_3src_null_dest()
>> > }
>> >
>> > void
>> > -fs_visitor::allocate_registers()
>> > +fs_visitor::allocate_registers(bool allow_spilling)
>> > {
>> > bool allocated_without_spills;
>> >
>> > @@ -5472,6 +5472,8 @@ fs_visitor::allocate_registers()
>> > SCHEDULE_PRE_LIFO,
>> > };
>> >
>> > + bool spill_all = allow_spilling && (INTEL_DEBUG & DEBUG_SPILL_FS);
>> > +
>> > /* Try each scheduling heuristic to see if it can successfully
> register
>> > * allocate without spilling. They should be ordered by decreasing
>> > * performance but increasing likelihood of allocating.
>> > @@ -5483,7 +5485,7 @@ fs_visitor::allocate_registers()
>> > assign_regs_trivial();
>> > allocated_without_spills = true;
>> > } else {
>> > - allocated_without_spills = assign_regs(false);
>> > + allocated_without_spills = assign_regs(false, spill_all);
>> > }
>> > if (allocated_without_spills)
>> > break;
>> > @@ -5508,12 +5510,14 @@ fs_visitor::allocate_registers()
>> > /* Since we're out of heuristics, just go spill registers until
> we
>> > * get an allocation.
>> > */
>> > - while (!assign_regs(true)) {
>> > + while (!assign_regs(true, spill_all)) {
>> > if (failed)
>> > break;
>> > }
>> > }
>> >
>> > + assert(last_scratch == 0 || allow_spilling);
>> > +
>> > /* This must come after all optimization and register allocation,
> since
>> > * it inserts dead code that happens to have side effects, and it
> does
>> > * so based on the actual physical registers in use.
>> > @@ -5559,7 +5563,7 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
>> > assign_vs_urb_setup();
>> >
>> > fixup_3src_null_dest();
>> > - allocate_registers();
>> > + allocate_registers(true);
>> >
>> > return !failed;
>> > }
>> > @@ -5641,7 +5645,7 @@ fs_visitor::run_tcs_single_patch()
>> > assign_tcs_single_patch_urb_setup();
>> >
>> > fixup_3src_null_dest();
>> > - allocate_registers();
>> > + allocate_registers(true);
>> >
>> > return !failed;
>> > }
>> > @@ -5675,7 +5679,7 @@ fs_visitor::run_tes()
>> > assign_tes_urb_setup();
>> >
>> > fixup_3src_null_dest();
>> > - allocate_registers();
>> > + allocate_registers(true);
>> >
>> > return !failed;
>> > }
>> > @@ -5724,13 +5728,13 @@ fs_visitor::run_gs()
>> > assign_gs_urb_setup();
>> >
>> > fixup_3src_null_dest();
>> > - allocate_registers();
>> > + allocate_registers(true);
>> >
>> > return !failed;
>> > }
>> >
>> > bool
>> > -fs_visitor::run_fs(bool do_rep_send)
>> > +fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
>> > {
>> > brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *)
> this->prog_data;
>> > brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key;
>> > @@ -5794,7 +5798,7 @@ fs_visitor::run_fs(bool do_rep_send)
>> > assign_urb_setup();
>> >
>> > fixup_3src_null_dest();
>> > - allocate_registers();
>> > + allocate_registers(allow_spilling);
>> >
>> > if (failed)
>> > return false;
>> > @@ -5837,7 +5841,7 @@ fs_visitor::run_cs()
>> > assign_curb_setup();
>> >
>> > fixup_3src_null_dest();
>> > - allocate_registers();
>> > + allocate_registers(true);
>> >
>> > if (failed)
>> > return false;
>> > @@ -5962,6 +5966,7 @@ brw_compile_fs(const struct brw_compiler
> *compiler, void *log_data,
>> > const nir_shader *src_shader,
>> > struct gl_program *prog,
>> > int shader_time_index8, int shader_time_index16,
>> > + bool allow_spilling,
>> > bool use_rep_send,
>> > unsigned *final_assembly_size,
>> > char **error_str)
>> > @@ -6005,7 +6010,7 @@ brw_compile_fs(const struct brw_compiler
> *compiler, void *log_data,
>> > fs_visitor v8(compiler, log_data, mem_ctx, key,
>> > &prog_data->base, prog, shader, 8,
>> > shader_time_index8);
>> > - if (!v8.run_fs(false /* do_rep_send */)) {
>> > + if (!v8.run_fs(allow_spilling, false /* do_rep_send */)) {
>> > if (error_str)
>> > *error_str = ralloc_strdup(mem_ctx, v8.fail_msg);
>> >
>> > @@ -6023,7 +6028,7 @@ brw_compile_fs(const struct brw_compiler
> *compiler, void *log_data,
>> > &prog_data->base, prog, shader, 16,
>> > shader_time_index16);
>> > v16.import_uniforms(&v8);
>> > - if (!v16.run_fs(use_rep_send)) {
>> > + if (!v16.run_fs(allow_spilling, use_rep_send)) {
>> > compiler->shader_perf_log(log_data,
>> > "SIMD16 shader failed to compile:
> %s",
>> > v16.fail_msg);
>> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
>> > index d4eb8fb..b6fe564 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_fs.h
>> > +++ b/src/mesa/drivers/dri/i965/brw_fs.h
>> > @@ -105,14 +105,14 @@ public:
>> > uint32_t const_offset);
>> > void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
>> >
>> > - bool run_fs(bool do_rep_send);
>> > + bool run_fs(bool allow_spilling, bool do_rep_send);
>> > bool run_vs(gl_clip_plane *clip_planes);
>> > bool run_tcs_single_patch();
>> > bool run_tes();
>> > bool run_gs();
>> > bool run_cs();
>> > void optimize();
>> > - void allocate_registers();
>> > + void allocate_registers(bool allow_spilling);
>> > void setup_fs_payload_gen4();
>> > void setup_fs_payload_gen6();
>> > void setup_vs_payload();
>> > @@ -127,7 +127,7 @@ public:
>> > void assign_tcs_single_patch_urb_setup();
>> > void assign_tes_urb_setup();
>> > void assign_gs_urb_setup();
>> > - bool assign_regs(bool allow_spilling);
>> > + bool assign_regs(bool allow_spilling, bool spill_all);
>> > void assign_regs_trivial();
>> > void calculate_payload_ranges(int payload_node_count,
>> > int *payload_last_use_ip);
>> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
>> > index 2347cd5..e65f73b 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
>> > +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
>> > @@ -542,7 +542,7 @@ setup_mrf_hack_interference(fs_visitor *v, struct
> ra_graph *g,
>> > }
>> >
>> > bool
>> > -fs_visitor::assign_regs(bool allow_spilling)
>> > +fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
>> > {
>> > /* Most of this allocation was written for a reg_width of 1
>> > * (dispatch_width == 8). In extending to SIMD16, the code was
>> > @@ -668,7 +668,7 @@ fs_visitor::assign_regs(bool allow_spilling)
>> > }
>> >
>> > /* Debug of register spilling: Go spill everything. */
>> > - if (unlikely(INTEL_DEBUG & DEBUG_SPILL_FS)) {
>> > + if (unlikely(spill_all)) {
>> > int reg = choose_spill_reg(g);
>> >
>> > if (reg != -1) {
>> > diff --git a/src/mesa/drivers/dri/i965/brw_wm.c
> b/src/mesa/drivers/dri/i965/brw_wm.c
>> > index 395b0b8..8af1ff7 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_wm.c
>> > +++ b/src/mesa/drivers/dri/i965/brw_wm.c
>> > @@ -137,7 +137,8 @@ brw_codegen_wm_prog(struct brw_context *brw,
>> > program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx,
>> > key, &prog_data, fp->program.Base.nir,
>> > &fp->program.Base, st_index8, st_index16,
>> > - brw->use_rep_send, &program_size,
> &error_str);
>> > + true, brw->use_rep_send,
>> > + &program_size, &error_str);
>> > if (program == NULL) {
>> > if (prog) {
>> > prog->LinkStatus = false;
>> > --
>> > 2.5.0.400.gff86faf
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160516/5cff5254/attachment-0001.sig>
More information about the mesa-dev
mailing list