[Mesa-dev] [PATCH 3/3] i965/gen9+: Switch thread scratch space to non-coherent stateless access.

Francisco Jerez currojerez at riseup.net
Wed Nov 25 11:37:54 PST 2015


The thread scratch space is thread-local so using the full IA-coherent
stateless surface index (255 since Gen8) is unnecessary and
potentially expensive.  On Gen8 and early steppings of Gen9 this is
not a functional change because the kernel already sets bit 4 of
HDC_CHICKEN0 which overrides all HDC memory access to be non-coherent
in order to workaround a hardware bug.

This happens to fix a full system hang when running any spilling code
on a pre-production SKL GT4e machine I have on my desk (forcing all
HDC access to non-coherent from the kernel up to stepping F0 might be
a good idea though regardless of this patch), and improves performance
of the OglPSBump2 SynMark benchmark run with INTEL_DEBUG=spill_fs by
33% (11 runs, 5% significance) on a production SKL GT2 (on which HDC
IA-coherency is apparently functional so it wouldn't make sense to
disable globally).
---
 src/mesa/drivers/dri/i965/brw_eu.h               |  2 ++
 src/mesa/drivers/dri/i965/brw_eu_emit.c          | 17 +++++++++++++++--
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  4 ++--
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 829e393..62891c7 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -317,6 +317,8 @@ void brw_oword_block_read(struct brw_codegen *p,
 			  uint32_t offset,
 			  uint32_t bind_table_index);
 
+unsigned brw_scratch_surface_idx(const struct brw_codegen *p);
+
 void brw_oword_block_read_scratch(struct brw_codegen *p,
 				  struct brw_reg dest,
 				  struct brw_reg mrf,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index da1ddfd..bb6f5dc 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1997,6 +1997,19 @@ void gen6_math(struct brw_codegen *p,
    brw_set_src1(p, insn, src1);
 }
 
+/**
+ * Return the right surface index to access the thread scratch space using
+ * stateless dataport messages.
+ */
+unsigned
+brw_scratch_surface_idx(const struct brw_codegen *p)
+{
+   /* The scratch space is thread-local so IA coherency is unnecessary. */
+   if (p->devinfo->gen >= 8)
+      return GEN8_BTI_STATELESS_NON_COHERENT;
+   else
+      return BRW_BTI_STATELESS;
+}
 
 /**
  * Write a block of OWORDs (half a GRF each) from the scratch buffer,
@@ -2097,7 +2110,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
 
       brw_set_dp_write_message(p,
 			       insn,
-			       255, /* binding table index (255=stateless) */
+                               brw_scratch_surface_idx(p),
 			       msg_control,
 			       msg_type,
 			       mlen,
@@ -2183,7 +2196,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p,
 
       brw_set_dp_read_message(p,
 			      insn,
-			      255, /* binding table index (255=stateless) */
+                              brw_scratch_surface_idx(p),
 			      msg_control,
 			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
 			      BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 20107ac..65b4358 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -800,7 +800,7 @@ generate_scratch_read(struct brw_codegen *p,
    if (devinfo->gen < 6)
       brw_inst_set_cond_modifier(devinfo, send, inst->base_mrf);
    brw_set_dp_read_message(p, send,
-			   255, /* binding table index: stateless access */
+                           brw_scratch_surface_idx(p),
 			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
 			   msg_type,
 			   BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
@@ -873,7 +873,7 @@ generate_scratch_write(struct brw_codegen *p,
    if (devinfo->gen < 6)
       brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf);
    brw_set_dp_write_message(p, send,
-			    255, /* binding table index: stateless access */
+                            brw_scratch_surface_idx(p),
 			    BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
 			    msg_type,
 			    3, /* mlen */
-- 
2.5.1



More information about the mesa-dev mailing list