[Mesa-dev] [PATCH 5/5] i965: Fix rendering to small mipmaps of depth/stencil buffers using a temp mt.
Eric Anholt
eric at anholt.net
Fri Oct 12 15:30:38 PDT 2012
Fixes 51 piglit tests (fbo-clear-formats, and most of the remaining failures
in depthstencil).
---
src/mesa/drivers/dri/i965/brw_context.h | 1 +
src/mesa/drivers/dri/i965/brw_draw.c | 5 +
src/mesa/drivers/dri/i965/brw_misc_state.c | 187 ++++++++++++++++++---------
src/mesa/drivers/dri/i965/gen7_misc_state.c | 100 ++++++--------
4 files changed, 172 insertions(+), 121 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 16bcb9c..eae41d1 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1134,6 +1134,7 @@ void brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
struct intel_mipmap_tree *stencil_mt,
uint32_t *out_tile_mask_x,
uint32_t *out_tile_mask_y);
+void brw_workaround_depthstencil_alignment(struct brw_context *brw);
/*======================================================================
* brw_queryobj.c
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 21abfbe..323310a 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -440,6 +440,11 @@ static bool brw_try_draw_prims( struct gl_context *ctx,
*/
brw_predraw_resolve_buffers(brw);
+ /* This workaround has to happen outside of brw_state_upload() because it
+ * may flush the batchbuffer for a blit, affecting the state flags.
+ */
+ brw_workaround_depthstencil_alignment(brw);
+
/* Bind all inputs, derive varying and size information:
*/
brw_merge_inputs( brw, arrays );
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index be8dcc4..ce23fa0 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -317,6 +317,91 @@ brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
*out_tile_mask_y = tile_mask_y;
}
+void
+brw_workaround_depthstencil_alignment(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+ bool rebase_depth = false;
+ bool rebase_stencil = false;
+ struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+ struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+ struct intel_mipmap_tree *depth_mt = NULL;
+ struct intel_mipmap_tree *stencil_mt = NULL;
+
+ if (depth_irb)
+ depth_mt = depth_irb->mt;
+ if (stencil_irb)
+ stencil_mt = stencil_irb->mt;
+
+ uint32_t tile_mask_x, tile_mask_y;
+ brw_get_depthstencil_tile_masks(depth_mt, stencil_mt,
+ &tile_mask_x, &tile_mask_y);
+
+ if (depth_irb) {
+ uint32_t depth_tile_x = depth_irb->draw_x & tile_mask_x;
+ uint32_t depth_tile_y = depth_irb->draw_y & tile_mask_y;
+
+ /* The low 3 bits of x and y tile offset are ignored by the hardware.
+ * Rebase if they're set, so that we can actually render to the buffer.
+ */
+ if (depth_tile_x & 7 || depth_tile_y & 7)
+ rebase_depth = true;
+
+ /* We didn't even have intra-tile offsets before g45. */
+ if (intel->gen == 4 && !intel->is_g4x) {
+ if (depth_tile_x || depth_tile_y)
+ rebase_depth = true;
+ }
+
+ if (stencil_irb) {
+ int stencil_tile_x = stencil_irb->draw_x & tile_mask_x;
+ int stencil_tile_y = stencil_irb->draw_y & tile_mask_y;
+
+ /* If the two don't match up, then we need to move them to a
+ * temporary so that the x/y draw offsets will end up being 0.
+ */
+ if (depth_tile_x != stencil_tile_x ||
+ depth_tile_y != stencil_tile_y) {
+ rebase_depth = true;
+ rebase_stencil = true;
+ }
+ }
+ }
+
+ /* If we have (just) stencil, check it for ignored low bits as well */
+ if (stencil_irb) {
+ uint32_t stencil_tile_x = stencil_irb->draw_x & tile_mask_x;
+ uint32_t stencil_tile_y = stencil_irb->draw_y & tile_mask_y;
+
+ if (stencil_tile_x & 7 || stencil_tile_y & 7)
+ rebase_stencil = true;
+
+ if (intel->gen == 4 && !intel->is_g4x) {
+ if (stencil_tile_x || stencil_tile_y)
+ rebase_stencil = true;
+ }
+ }
+
+ if (rebase_depth) {
+ intel_renderbuffer_move_to_temp(intel, depth_irb);
+
+ if (stencil_irb && stencil_irb->mt == depth_mt) {
+ intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
+ intel_renderbuffer_set_draw_offset(stencil_irb);
+ }
+ }
+ if (rebase_stencil) {
+ intel_renderbuffer_move_to_temp(intel, stencil_irb);
+
+ if (depth_irb && depth_irb->mt == stencil_mt) {
+ intel_miptree_reference(&depth_irb->mt, stencil_irb->mt);
+ intel_renderbuffer_set_draw_offset(depth_irb);
+ }
+ }
+}
+
static void emit_depthbuffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
@@ -331,12 +416,6 @@ static void emit_depthbuffer(struct brw_context *brw)
unsigned int len;
bool separate_stencil = false;
- /* Amount by which drawing should be offset in order to draw to the
- * appropriate miplevel/zoffset/cubeface. We will extract these values
- * from depth_irb or stencil_irb once we determine which is present.
- */
- uint32_t draw_x = 0, draw_y = 0;
-
if (depth_irb &&
depth_irb->mt &&
depth_irb->mt->hiz_mt) {
@@ -367,6 +446,39 @@ static void emit_depthbuffer(struct brw_context *brw)
brw_get_depthstencil_tile_masks(depth_mt, stencil_mt,
&tile_mask_x, &tile_mask_y);
+ /* The intra-tile offsets should already have been forced into agreement by
+ * gen7_workaround_depthstencil_alignment().
+ */
+ uint32_t tile_x = 0, tile_y = 0;
+ if (depth_mt) {
+ tile_x = depth_irb->draw_x & tile_mask_x;
+ tile_y = depth_irb->draw_y & tile_mask_y;
+
+ if (stencil_mt) {
+ assert((stencil_irb->draw_x & tile_mask_x) == tile_x);
+ assert((stencil_irb->draw_y & tile_mask_y) == tile_y);
+ }
+ } else if (stencil_mt) {
+ tile_x = stencil_irb->draw_x & tile_mask_x;
+ tile_y = stencil_irb->draw_y & tile_mask_y;
+ }
+
+ /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+ * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+ * Coordinate Offset X/Y":
+ *
+ * "The 3 LSBs of both offsets must be zero to ensure correct
+ * alignment"
+ *
+ * This should already have been corrected by
+ * gen6_workaround_depthstencil_alignment.
+ */
+ WARN_ONCE((tile_x & 7) || (tile_y & 7),
+ "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
+ "Truncating offset, bad rendering may occur.\n");
+ tile_x &= ~7;
+ tile_y &= ~7;
+
/* If there's a packed depth/stencil bound to stencil only, we need to
* emit the packed depth/stencil buffer packet.
*/
@@ -398,8 +510,6 @@ static void emit_depthbuffer(struct brw_context *brw)
ADVANCE_BATCH();
} else if (!depth_irb && separate_stencil) {
- uint32_t tile_x, tile_y;
-
/*
* There exists a separate stencil buffer but no depth buffer.
*
@@ -422,29 +532,6 @@ static void emit_depthbuffer(struct brw_context *brw)
*/
assert(intel->has_separate_stencil);
- draw_x = stencil_irb->draw_x;
- draw_y = stencil_irb->draw_y;
- tile_x = draw_x & tile_mask_x;
- tile_y = draw_y & tile_mask_y;
-
- /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
- * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
- * Coordinate Offset X/Y":
- *
- * "The 3 LSBs of both offsets must be zero to ensure correct
- * alignment"
- *
- * We have no guarantee that tile_x and tile_y are correctly aligned,
- * since they are determined by the mipmap layout, which is only aligned
- * to multiples of 4.
- *
- * So, to avoid hanging the GPU, just smash the low order 3 bits of
- * tile_x and tile_y to 0. This is a temporary workaround until we come
- * up with a better solution.
- */
- tile_x &= ~7;
- tile_y &= ~7;
-
BEGIN_BATCH(len);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
@@ -470,7 +557,7 @@ static void emit_depthbuffer(struct brw_context *brw)
} else {
struct intel_region *region = depth_irb->mt->region;
- uint32_t tile_x, tile_y, offset;
+ uint32_t offset;
/* If using separate stencil, hiz must be enabled. */
assert(!separate_stencil || hiz_region);
@@ -478,32 +565,10 @@ static void emit_depthbuffer(struct brw_context *brw)
assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
assert(!hiz_region || region->tiling == I915_TILING_Y);
- draw_x = depth_irb->draw_x;
- draw_y = depth_irb->draw_y;
- tile_x = draw_x & tile_mask_x;
- tile_y = draw_y & tile_mask_y;
-
- /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
- * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
- * Coordinate Offset X/Y":
- *
- * "The 3 LSBs of both offsets must be zero to ensure correct
- * alignment"
- *
- * We have no guarantee that tile_x and tile_y are correctly aligned,
- * since they are determined by the mipmap layout, which is only aligned
- * to multiples of 4.
- *
- * So, to avoid hanging the GPU, just smash the low order 3 bits of
- * tile_x and tile_y to 0. This is a temporary workaround until we come
- * up with a better solution.
- */
- tile_x &= ~7;
- tile_y &= ~7;
-
offset = intel_region_get_aligned_offset(region,
- draw_x & ~tile_mask_x,
- draw_y & ~tile_mask_y, false);
+ depth_irb->draw_x & ~tile_mask_x,
+ depth_irb->draw_y & ~tile_mask_y,
+ false);
BEGIN_BATCH(len);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
@@ -546,8 +611,8 @@ static void emit_depthbuffer(struct brw_context *brw)
if (hiz_region) {
uint32_t hiz_offset =
intel_region_get_aligned_offset(hiz_region,
- draw_x & ~tile_mask_x,
- (draw_y & ~tile_mask_y) / 2,
+ depth_irb->draw_x & ~tile_mask_x,
+ (depth_irb->draw_y & ~tile_mask_y) / 2,
false);
BEGIN_BATCH(3);
@@ -574,8 +639,8 @@ static void emit_depthbuffer(struct brw_context *brw)
* that the region is untiled; in fact it's W tiled.
*/
uint32_t stencil_offset =
- (draw_y & ~tile_mask_y) * region->pitch +
- (draw_x & ~tile_mask_x) * 64;
+ (stencil_irb->draw_y & ~tile_mask_y) * region->pitch +
+ (stencil_irb->draw_x & ~tile_mask_x) * 64;
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c
index 337143c..50fa0ec 100644
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -42,12 +42,6 @@ static void emit_depthbuffer(struct brw_context *brw)
*stencil_mt = NULL,
*hiz_mt = NULL;
- /* Amount by which drawing should be offset in order to draw to the
- * appropriate miplevel/zoffset/cubeface. We will extract these values
- * from depth_irb or stencil_irb once we determine which is present.
- */
- uint32_t draw_x = 0, draw_y = 0;
-
if (drb)
depth_mt = drb->mt;
@@ -66,6 +60,39 @@ static void emit_depthbuffer(struct brw_context *brw)
brw_get_depthstencil_tile_masks(depth_mt, stencil_mt,
&tile_mask_x, &tile_mask_y);
+ /* The intra-tile offsets should already have been forced into agreement by
+ * gen7_workaround_depthstencil_alignment().
+ */
+ uint32_t tile_x = 0, tile_y = 0;
+ if (depth_mt) {
+ tile_x = drb->draw_x & tile_mask_x;
+ tile_y = drb->draw_y & tile_mask_y;
+
+ if (stencil_mt) {
+ assert((srb->draw_x & tile_mask_x) == tile_x);
+ assert((srb->draw_y & tile_mask_y) == tile_y);
+ }
+ } else if (stencil_mt) {
+ tile_x = srb->draw_x & tile_mask_x;
+ tile_y = srb->draw_y & tile_mask_y;
+ }
+
+ /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+ * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+ * Coordinate Offset X/Y":
+ *
+ * "The 3 LSBs of both offsets must be zero to ensure correct
+ * alignment"
+ *
+ * This should already have been corrected by
+ * gen6_workaround_depthstencil_alignment.
+ */
+ WARN_ONCE((tile_x & 7) || (tile_y & 7),
+ "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
+ "Truncating offset, bad rendering may occur.\n");
+ tile_x &= ~7;
+ tile_y &= ~7;
+
/* Gen7 doesn't support packed depth/stencil */
assert(stencil_mt == NULL || depth_mt != stencil_mt);
assert(!depth_mt || !_mesa_is_format_packed_depth_stencil(depth_mt->format));
@@ -75,7 +102,6 @@ static void emit_depthbuffer(struct brw_context *brw)
if (depth_mt == NULL) {
uint32_t dw1 = BRW_DEPTHFORMAT_D32_FLOAT << 18;
uint32_t dw3 = 0;
- uint32_t tile_x = 0, tile_y = 0;
if (stencil_mt == NULL) {
dw1 |= (BRW_SURFACE_NULL << 29);
@@ -83,29 +109,6 @@ static void emit_depthbuffer(struct brw_context *brw)
/* _NEW_STENCIL: enable stencil buffer writes */
dw1 |= ((ctx->Stencil.WriteMask != 0) << 27);
- draw_x = srb->draw_x;
- draw_y = srb->draw_y;
- tile_x = draw_x & tile_mask_x;
- tile_y = draw_y & tile_mask_y;
-
- /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
- * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
- * Coordinate Offset X/Y":
- *
- * "The 3 LSBs of both offsets must be zero to ensure correct
- * alignment"
- *
- * We have no guarantee that tile_x and tile_y are correctly aligned,
- * since they are determined by the mipmap layout, which is only
- * aligned to multiples of 4.
- *
- * So, to avoid hanging the GPU, just smash the low order 3 bits of
- * tile_x and tile_y to 0. This is a temporary workaround until we
- * come up with a better solution.
- */
- tile_x &= ~7;
- tile_y &= ~7;
-
/* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */
dw1 |= (BRW_SURFACE_2D << 29);
dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) |
@@ -123,34 +126,11 @@ static void emit_depthbuffer(struct brw_context *brw)
ADVANCE_BATCH();
} else {
struct intel_region *region = depth_mt->region;
- uint32_t tile_x, tile_y, offset;
-
- draw_x = drb->draw_x;
- draw_y = drb->draw_y;
- tile_x = draw_x & tile_mask_x;
- tile_y = draw_y & tile_mask_y;
-
- /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
- * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
- * Coordinate Offset X/Y":
- *
- * "The 3 LSBs of both offsets must be zero to ensure correct
- * alignment"
- *
- * We have no guarantee that tile_x and tile_y are correctly aligned,
- * since they are determined by the mipmap layout, which is only aligned
- * to multiples of 4.
- *
- * So, to avoid hanging the GPU, just smash the low order 3 bits of
- * tile_x and tile_y to 0. This is a temporary workaround until we come
- * up with a better solution.
- */
- tile_x &= ~7;
- tile_y &= ~7;
+ uint32_t offset;
offset = intel_region_get_aligned_offset(region,
- draw_x & ~tile_mask_x,
- draw_y & ~tile_mask_y,
+ drb->draw_x & ~tile_mask_x,
+ drb->draw_y & ~tile_mask_y,
false);
assert(region->tiling == I915_TILING_Y);
@@ -184,8 +164,8 @@ static void emit_depthbuffer(struct brw_context *brw)
} else {
uint32_t hiz_offset =
intel_region_get_aligned_offset(hiz_mt->region,
- draw_x & ~tile_mask_x,
- (draw_y & ~tile_mask_y) / 2,
+ drb->draw_x & ~tile_mask_x,
+ (drb->draw_y & ~tile_mask_y) / 2,
false);
BEGIN_BATCH(3);
OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
@@ -211,8 +191,8 @@ static void emit_depthbuffer(struct brw_context *brw)
* that the region is untiled; in fact it's W tiled.
*/
uint32_t stencil_offset =
- (draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
- (draw_x & ~tile_mask_x) * 64;
+ (srb->draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
+ (srb->draw_x & ~tile_mask_x) * 64;
BEGIN_BATCH(3);
OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
--
1.7.10.4
More information about the mesa-dev
mailing list