[Mesa-dev] [PATCH v4] i965: Update workaround names and platforms
Sameer Kibey
sameer.kibey at intel.com
Wed Feb 10 20:21:49 UTC 2016
Update the format in which workarounds are documented
in the source code. This allows mesa to be parsed
by the list-workarounds utility in intel-gpu-tools.
Signed-off-by: Sameer Kibey <sameer.kibey at intel.com>
---
src/mesa/drivers/dri/i965/brw_binding_tables.c | 3 ++-
src/mesa/drivers/dri/i965/brw_blorp.cpp | 2 ++
src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 ++
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 6 ++++++
src/mesa/drivers/dri/i965/brw_pipe_control.c | 2 ++
src/mesa/drivers/dri/i965/gen6_queryobj.c | 5 +++--
src/mesa/drivers/dri/i965/gen8_depth_state.c | 7 ++++---
src/mesa/drivers/dri/i965/intel_batchbuffer.c | 2 +-
8 files changed, 22 insertions(+), 7 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index f3a0310..6dd35dd 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -54,13 +54,14 @@ static uint32_t
reserve_hw_bt_space(struct brw_context *brw, unsigned bytes)
{
/* From the Broadwell PRM, Volume 16, "Workarounds",
- * WaStateBindingTableOverfetch:
* "HW over-fetches two cache lines of binding table indices. When
* using the resource streamer, SW needs to pad binding table pointer
* updates with an additional two cache lines."
*
* Cache lines are 64 bytes, so we subtract 128 bytes from the size of
* the binding table pool buffer.
+ *
+ * WaStateBindingTableOverfetch:hsw,bdw,chv,bxt
*/
if (brw->hw_bt_pool.next_offset + bytes >= brw->hw_bt_pool.bo->size - 128) {
gen7_reset_hw_bt_pool_offsets(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index 1bc6d15..f798e29 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -318,6 +318,8 @@ brw_hiz_op_params::brw_hiz_op_params(struct intel_mipmap_tree *mt,
* SURFACE_STATE.Surface_Horizontal_Alignment should be 4 for Z24 surfaces,
* not 8. But commit 1f112cc increased the alignment from 4 to 8, which
* prevents the clobbering.
+ *
+ * WaHizAmbiguate8x4Aligned:hsw
*/
depth.width = ALIGN(depth.width, 8);
depth.height = ALIGN(depth.height, 4);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 35d8039..7a6179a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1891,6 +1891,8 @@ void brw_CMP(struct brw_codegen *p,
*
* It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
* mentioned on their work-arounds pages.
+ *
+ * WaCMPInstNullDstForcesThreadSwitch:ivb,hsw,vlv
*/
if (devinfo->gen == 7) {
if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 1916a99..b91a405 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1846,6 +1846,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
* We choose to split into CMP(8) instructions since disabling
* coissuing would affect CMP instructions not otherwise affected by
* the errata.
+ *
+ * WaCMPInstFlagDepClearedEarly:ivb,vlv
*/
if (dispatch_width == 16 && devinfo->gen == 7 && !devinfo->is_haswell) {
if (dst.file == BRW_GENERAL_REGISTER_FILE) {
@@ -1932,6 +1934,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
* should
*
* "Force BFI instructions to be executed always in SIMD8."
+ *
+ * WaForceSIMD8ForBFIInstruction:hsw
*/
if (dispatch_width == 16 && devinfo->is_haswell) {
brw_set_default_exec_size(p, BRW_EXECUTE_8);
@@ -1954,6 +1958,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
*
* Otherwise we would be able to emit compressed instructions like we
* do for the other three-source instructions.
+ *
+ * WaForceSIMD8ForBFIInstruction:hsw
*/
if (dispatch_width == 16 &&
(devinfo->is_haswell || !devinfo->supports_simd16_3src)) {
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index b41e28e..51b20b2 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -67,6 +67,8 @@ gen8_add_cs_stall_workaround_bits(uint32_t *flags)
*
* Note that the kernel does CS stalls between batches, so we only need
* to count them within a batch.
+ *
+ * WaCsStallAtEveryFourthPipecontrol:ivb,vlv
*/
static uint32_t
gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index d508c4c..4e8bab1 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -238,8 +238,7 @@ gen6_queryobj_get_results(struct gl_context *ctx,
case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
query->Base.Result = (results[1] - results[0]);
- /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround:
- * "Invocation counter is 4 times actual. WA: SW to divide HW reported
+ /* "Invocation counter is 4 times actual. WA: SW to divide HW reported
* PS Invocations value by 4."
*
* Prior to Haswell, invocation count was counted by the WM, and it
@@ -247,6 +246,8 @@ gen6_queryobj_get_results(struct gl_context *ctx,
* correct value, the CS multiplied this by 4. With HSW the logic moved,
* and correctly emitted the number of pixel shader invocations, but,
* whomever forgot to undo the multiply by 4.
+ *
+ * WaDividePSInvocationCountBy4:hsw,bdw,chv
*/
if (brw->gen == 8 || brw->is_haswell)
query->Base.Result /= 4;
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index 93100a0..fd3d5a7 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -193,9 +193,10 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw,
case GL_TEXTURE_1D_ARRAY:
case GL_TEXTURE_1D:
if (brw->gen >= 9) {
- /* WaDisable1DDepthStencil. Skylake+ doesn't support 1D depth
- * textures but it does allow pretending it's a 2D texture
- * instead.
+ /* Skylake+ doesn't support 1D depth textures but it does allow
+ * pretending it's a 2D texture instead.
+ *
+ * WaDisable1DDepthStencil:skl,bxt,kbl
*/
surftype = BRW_SURFACE_2D;
break;
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index f778074..c6ec379 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -228,7 +228,7 @@ brw_finish_batch(struct brw_context *brw)
* From the example in the docs, it seems to expect a regular pipe control
* flush here as well. We may have done it already, but meh.
*
- * See also WaAvoidRCZCounterRollover.
+ * WaAvoidRCZCounterRollover:hsw
*/
brw_emit_mi_flush(brw);
BEGIN_BATCH(2);
--
1.9.1
More information about the mesa-dev
mailing list