[Mesa-dev] [PATCH] i965/fs: Add an allow_spilling flag to brw_compile_fs

Mon May 16 21:33:07 UTC 2016

This allows us to disable spilling for blorp shaders since blorp state
setup doesn't handle spilling.  Without this, blorp faisl hard if you run
with INTEL_DEBUG=spill.

Cc: Curro <currojerez at riseup.net>
---
 src/intel/vulkan/anv_pipeline.c                   |  3 ++-
 src/mesa/drivers/dri/i965/brw_blorp.c             |  2 +-
 src/mesa/drivers/dri/i965/brw_compiler.h          |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp              | 29 +++++++++++++----------
 src/mesa/drivers/dri/i965/brw_fs.h                |  6 ++---
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |  4 ++--
 src/mesa/drivers/dri/i965/brw_wm.c                |  3 ++-
 7 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index a8e31b1..7d265d8 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -675,7 +675,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
       unsigned code_size;
       const unsigned *shader_code =
          brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
-                        NULL, -1, -1, pipeline->use_repclear, &code_size, NULL);
+                        NULL, -1, -1, true, pipeline->use_repclear,
+                        &code_size, NULL);
       if (shader_code == NULL) {
          ralloc_free(mem_ctx);
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
index 09a0fd1..9590968 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -223,7 +223,7 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir,
 
    const unsigned *program =
       brw_compile_fs(compiler, brw, mem_ctx, wm_key, &wm_prog_data, nir,
-                     NULL, -1, -1, use_repclear, program_size, NULL);
+                     NULL, -1, -1, false, use_repclear, program_size, NULL);
 
    /* Copy the relavent bits of wm_prog_data over into the blorp prog data */
    prog_data->dispatch_8 = wm_prog_data.dispatch_8;
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index a2148ae..15f32be 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -789,6 +789,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
                struct gl_program *prog,
                int shader_time_index8,
                int shader_time_index16,
+               bool allow_spilling,
                bool use_rep_send,
                unsigned *final_assembly_size,
                char **error_str);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 2de5533..55d10fc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5462,7 +5462,7 @@ fs_visitor::fixup_3src_null_dest()
 }
 
 void
-fs_visitor::allocate_registers()
+fs_visitor::allocate_registers(bool allow_spilling)
 {
    bool allocated_without_spills;
 
@@ -5472,6 +5472,8 @@ fs_visitor::allocate_registers()
       SCHEDULE_PRE_LIFO,
    };
 
+   bool spill_all = allow_spilling && (INTEL_DEBUG & DEBUG_SPILL_FS);
+
    /* Try each scheduling heuristic to see if it can successfully register
     * allocate without spilling.  They should be ordered by decreasing
     * performance but increasing likelihood of allocating.
@@ -5483,7 +5485,7 @@ fs_visitor::allocate_registers()
          assign_regs_trivial();
          allocated_without_spills = true;
       } else {
-         allocated_without_spills = assign_regs(false);
+         allocated_without_spills = assign_regs(false, spill_all);
       }
       if (allocated_without_spills)
          break;
@@ -5508,12 +5510,14 @@ fs_visitor::allocate_registers()
       /* Since we're out of heuristics, just go spill registers until we
        * get an allocation.
        */
-      while (!assign_regs(true)) {
+      while (!assign_regs(true, spill_all)) {
          if (failed)
             break;
       }
    }
 
+   assert(last_scratch == 0 || allow_spilling);
+
    /* This must come after all optimization and register allocation, since
     * it inserts dead code that happens to have side effects, and it does
     * so based on the actual physical registers in use.
@@ -5559,7 +5563,7 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
    assign_vs_urb_setup();
 
    fixup_3src_null_dest();
-   allocate_registers();
+   allocate_registers(true);
 
    return !failed;
 }
@@ -5641,7 +5645,7 @@ fs_visitor::run_tcs_single_patch()
    assign_tcs_single_patch_urb_setup();
 
    fixup_3src_null_dest();
-   allocate_registers();
+   allocate_registers(true);
 
    return !failed;
 }
@@ -5675,7 +5679,7 @@ fs_visitor::run_tes()
    assign_tes_urb_setup();
 
    fixup_3src_null_dest();
-   allocate_registers();
+   allocate_registers(true);
 
    return !failed;
 }
@@ -5724,13 +5728,13 @@ fs_visitor::run_gs()
    assign_gs_urb_setup();
 
    fixup_3src_null_dest();
-   allocate_registers();
+   allocate_registers(true);
 
    return !failed;
 }
 
 bool
-fs_visitor::run_fs(bool do_rep_send)
+fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
 {
    brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;
    brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key;
@@ -5794,7 +5798,7 @@ fs_visitor::run_fs(bool do_rep_send)
       assign_urb_setup();
 
       fixup_3src_null_dest();
-      allocate_registers();
+      allocate_registers(allow_spilling);
 
       if (failed)
          return false;
@@ -5837,7 +5841,7 @@ fs_visitor::run_cs()
    assign_curb_setup();
 
    fixup_3src_null_dest();
-   allocate_registers();
+   allocate_registers(true);
 
    if (failed)
       return false;
@@ -5962,6 +5966,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
                const nir_shader *src_shader,
                struct gl_program *prog,
                int shader_time_index8, int shader_time_index16,
+               bool allow_spilling,
                bool use_rep_send,
                unsigned *final_assembly_size,
                char **error_str)
@@ -6005,7 +6010,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
    fs_visitor v8(compiler, log_data, mem_ctx, key,
                  &prog_data->base, prog, shader, 8,
                  shader_time_index8);
-   if (!v8.run_fs(false /* do_rep_send */)) {
+   if (!v8.run_fs(allow_spilling, false /* do_rep_send */)) {
       if (error_str)
          *error_str = ralloc_strdup(mem_ctx, v8.fail_msg);
 
@@ -6023,7 +6028,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
                      &prog_data->base, prog, shader, 16,
                      shader_time_index16);
       v16.import_uniforms(&v8);
-      if (!v16.run_fs(use_rep_send)) {
+      if (!v16.run_fs(allow_spilling, use_rep_send)) {
          compiler->shader_perf_log(log_data,
                                    "SIMD16 shader failed to compile: %s",
                                    v16.fail_msg);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index d4eb8fb..b6fe564 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -105,14 +105,14 @@ public:
                                    uint32_t const_offset);
    void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
 
-   bool run_fs(bool do_rep_send);
+   bool run_fs(bool allow_spilling, bool do_rep_send);
    bool run_vs(gl_clip_plane *clip_planes);
    bool run_tcs_single_patch();
    bool run_tes();
    bool run_gs();
    bool run_cs();
    void optimize();
-   void allocate_registers();
+   void allocate_registers(bool allow_spilling);
    void setup_fs_payload_gen4();
    void setup_fs_payload_gen6();
    void setup_vs_payload();
@@ -127,7 +127,7 @@ public:
    void assign_tcs_single_patch_urb_setup();
    void assign_tes_urb_setup();
    void assign_gs_urb_setup();
-   bool assign_regs(bool allow_spilling);
+   bool assign_regs(bool allow_spilling, bool spill_all);
    void assign_regs_trivial();
    void calculate_payload_ranges(int payload_node_count,
                                  int *payload_last_use_ip);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 2347cd5..e65f73b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -542,7 +542,7 @@ setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g,
 }
 
 bool
-fs_visitor::assign_regs(bool allow_spilling)
+fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
 {
    /* Most of this allocation was written for a reg_width of 1
     * (dispatch_width == 8).  In extending to SIMD16, the code was
@@ -668,7 +668,7 @@ fs_visitor::assign_regs(bool allow_spilling)
    }
 
    /* Debug of register spilling: Go spill everything. */
-   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_FS)) {
+   if (unlikely(spill_all)) {
       int reg = choose_spill_reg(g);
 
       if (reg != -1) {
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 395b0b8..8af1ff7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -137,7 +137,8 @@ brw_codegen_wm_prog(struct brw_context *brw,
    program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx,
                             key, &prog_data, fp->program.Base.nir,
                             &fp->program.Base, st_index8, st_index16,
-                            brw->use_rep_send, &program_size, &error_str);
+                            true, brw->use_rep_send,
+                            &program_size, &error_str);
    if (program == NULL) {
       if (prog) {
          prog->LinkStatus = false;
-- 
2.5.0.400.gff86faf