[Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug
Rogovin, Kevin
kevin.rogovin at intel.com
Tue Dec 5 05:41:56 UTC 2017
Hi,
The patch series I have submitted handles the case of needing to resolve texture surfaces when a draw (or compute) accesses a texture which is astc5x5. As it is quite clear you understand the issue and know the code of i965 the patch centers on, you are an excellent person to review the code.
Here are my comments of the patch posted:
1. it is essentially replication and moving around of the code of the patch series posted earlier but missing various
important bits: preventing the sampler from using the auxiliary buffer (this requires to modify surface state
sent in brw_wm_surfaces.c). It also does not cover blorp sufficiently (blorp might read from an ASTC5x5
and there are more paths in blorp than blorp_surf_for_miptree() that sample from surfaces.
2. using the check that the GPU is gen9 (and for that matter, using the name gen9_ astc5x5_sampler_wa())
is not ideal; I would not be surprised that the bug might not be present on various re-spins of Gen9 and/or
it might linger on to future Gens. I do NOT know; using a Boolean assigned earlier makes the code more
future-ish proof.
3. the nature of GPU fragment dispatch is going to require a texture invalidate even if the sampler only
have the bug in one direction; this is because a subslice is not guaranteed to process fragments in any
order. The crux is that a single sampler serves an entire sub-slice which has more than 1 EU. It is quite
possible that one EU has threads of a draw call ahead of the other and depending on the timing, portions
of those fragments' coming after might be processed by the sampler of those before of those fragments
coming before in batchbuffer order. Indeed a single EU might have threads from separate draws as well.
A texture invalidate places a barrier in the pipeline preventing the mixing (and means that efficiency of
GPU drops quite a bit with every texture invalidate sadly).
4. With 3 in mind, using the bit-masks is not a good idea as we want to then enforce at the code level
that only one of the two is possible without texture invalidates.
-Kevin
-----Original Message-----
From: Topi Pohjolainen [mailto:topi.pohjolainen at gmail.com]
Sent: Monday, December 4, 2017 12:49 PM
To: mesa-dev at lists.freedesktop.org
Cc: Rogovin, Kevin <kevin.rogovin at intel.com>
Subject: [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug
This is just drafting some thoughts and only compile tested.
CC: "Rogovin, Kevin" <kevin.rogovin at intel.com>
---
src/mesa/drivers/dri/i965/brw_blorp.c | 8 +++++
src/mesa/drivers/dri/i965/brw_context.h | 10 ++++++
src/mesa/drivers/dri/i965/brw_draw.c | 54 ++++++++++++++++++++++++++++++++-
3 files changed, 71 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
index 680121b6ab..b3f84ab8ca 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -186,11 +186,19 @@ blorp_surf_for_miptree(struct brw_context *brw,
surf->aux_addr.buffer = mt->hiz_buf->bo;
surf->aux_addr.offset = mt->hiz_buf->offset;
}
+
+ if (!is_render_target && brw->screen->devinfo.gen == 9)
+ gen9_astc5x5_sampler_wa(brw, GEN9_ASTC5X5_WA_TEX_TYPE_AUX);
} else {
surf->aux_addr = (struct blorp_address) {
.buffer = NULL,
};
memset(&surf->clear_color, 0, sizeof(surf->clear_color));
+
+ if (!is_render_target && brw->screen->devinfo.gen == 9 &&
+ (mt->format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+ mt->format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5))
+ gen9_astc5x5_sampler_wa(brw,
+ GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5);
}
assert((surf->aux_usage == ISL_AUX_USAGE_NONE) ==
(surf->aux_addr.buffer == NULL)); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0670483806..44602c23c0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -165,6 +165,11 @@ enum brw_cache_id {
BRW_MAX_CACHE
};
+enum gen9_astc5x5_wa_tex_type {
+ GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0,
+ GEN9_ASTC5X5_WA_TEX_TYPE_AUX = 1 << 1,
+};
+
enum brw_state_id {
/* brw_cache_ids must come first - see brw_program_cache.c */
BRW_STATE_URB_FENCE = BRW_MAX_CACHE, @@ -1262,6 +1267,8 @@ struct brw_context
*/
bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS];
+ enum gen9_astc5x5_wa_tex_type gen9_sampler_wa_tex_mask;
+
__DRIcontext *driContext;
struct intel_screen *screen;
};
@@ -1286,6 +1293,9 @@ void intel_update_renderbuffers(__DRIcontext *context,
__DRIdrawable *drawable); void intel_prepare_render(struct brw_context *brw);
+void gen9_astc5x5_sampler_wa(struct brw_context *brw,
+ enum gen9_astc5x5_wa_tex_type curr_mask);
+
void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering);
void intel_resolve_for_dri2_flush(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 7e29dcfd4e..929f806eb3 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -371,6 +371,50 @@ intel_disable_rb_aux_buffer(struct brw_context *brw,
return found;
}
+static enum gen9_astc5x5_wa_tex_type
+gen9_astc5x5_wa_get_tex_mask(const struct brw_context *brw) {
+ enum gen9_astc5x5_wa_tex_type mask = 0;
+ const struct gl_context *ctx = &brw->ctx;
+ const struct intel_texture_object *tex_obj;
+
+ const int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
+ for (int i = 0; i <= maxEnabledUnit; i++) {
+ if (!ctx->Texture.Unit[i]._Current)
+ continue;
+ tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+ if (!tex_obj || !tex_obj->mt)
+ continue;
+
+ if (tex_obj->mt->aux_usage != ISL_AUX_USAGE_NONE)
+ mask |= GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
+
+ if (tex_obj->_Format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+ tex_obj->_Format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)
+ mask |= GEN9_ASTC5X5_WA_TEX_TYPE_AUX;
+ }
+
+ return mask;
+}
+
+/* TODO: Do we actually need this both ways: astc5x5 followed by aux
+ * and vice-versa? Or is only one direction problematic?
+ */
+void
+gen9_astc5x5_sampler_wa(struct brw_context *brw,
+ enum gen9_astc5x5_wa_tex_type curr_mask) {
+ if ((brw->gen9_sampler_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
+ (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX))
+ brw_emit_pipe_control_flush(brw,
+PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+
+ if ((brw->gen9_sampler_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) &&
+ (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+
+ brw->gen9_sampler_wa_tex_mask = curr_mask; }
+
/**
* \brief Resolve buffers before drawing.
*
@@ -383,6 +427,12 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering)
struct gl_context *ctx = &brw->ctx;
struct intel_texture_object *tex_obj;
+ const enum gen9_astc5x5_wa_tex_type curr_wa_mask =
+ (brw->screen->devinfo.gen == 9) ?
+ gen9_astc5x5_wa_get_tex_mask(brw) : 0;
+
+ if (brw->screen->devinfo.gen == 9)
+ gen9_astc5x5_sampler_wa(brw, curr_wa_mask);
+
memset(brw->draw_aux_buffer_disabled, 0,
sizeof(brw->draw_aux_buffer_disabled));
@@ -413,6 +463,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering)
num_layers = INTEL_REMAINING_LAYERS;
}
+ const bool sampler_wa_disable_aux =
+ curr_wa_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
const bool disable_aux = rendering &&
intel_disable_rb_aux_buffer(brw, tex_obj->mt, min_level, num_levels,
"for sampling"); @@ -420,7 +472,7 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering)
intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
min_level, num_levels,
min_layer, num_layers,
- disable_aux);
+ disable_aux ||
+ sampler_wa_disable_aux);
brw_cache_flush_for_read(brw, tex_obj->mt->bo);
--
2.14.1
More information about the mesa-dev
mailing list