[Mesa-dev] [PATCH 1/2] i965: Fix mipmap offsets for HiZ and separate stencil buffers.

Paul Berry stereotype441 at gmail.com
Tue Apr 24 12:23:30 PDT 2012


When rendering to a miplevel other than 0 within a color, depth,
stencil, or HiZ buffer, we need to tell the GPU to render to an offset
within the buffer, so that the data is written into the correct
miplevel.  We do this using a coarse offset (in pages), and a fine
adjustment (the so-called "tile_x" and "tile_y" values, which are
measured in pixels).

We have always computed the coarse offset and fine adjustment using
intel_renderbuffer_tile_offsets() function.  This worked fine for
color and combined depth/stencil buffers, but failed to work properly
when HiZ and separate stencil were in use.  It failed to work because
there is only one set of fine adjustment controls shared by the HiZ,
depth, and stencil buffers, so we need to choose tile_x and tile_y
values that are compatible with the tiling of all three buffers, and
then compute separate coarse offsets for each buffer.

This patch fixes the HiZ and separate stencil case by replacing the
call to intel_renderbuffer_tile_offsets() with calls to two functions:
intel_region_get_tile_masks(), which determines how much of the
adjustment can be performed using offsets and how much can be
performed using tile_x and tile_y, and
intel_region_get_aligned_offset(), which computes the coarse offset.

intel_region_get_tile_offsets() is still used for color renderbuffers,
so to avoid code duplication, I've re-worked it to use
intel_region_get_tile_masks() and intel_region_get_aligned_offset().

On i965 Gen6, fixes piglit tests
"texturing/depthstencil-render-miplevels 1024 X" where X is one of
(depth, depth_and_stencil, depth_stencil_single_binding, depth_x,
depth_x_and_stencil, stencil, stencil_and_depth, stencil_and_depth_x).

On i965 Gen7, the variants of
"texturing/depthstencil-render-miplevels" that contain a stencil
buffer still fail, due to another problem: Gen7 seems to ignore the 3
LSB's of the tile_y adjustment (and possibly also tile_x).
---
 src/mesa/drivers/dri/i965/brw_misc_state.c  |   96 ++++++++++++++++++++++++--
 src/mesa/drivers/dri/i965/gen6_hiz.c        |   54 +++++++++++----
 src/mesa/drivers/dri/i965/gen7_hiz.c        |   54 +++++++++++----
 src/mesa/drivers/dri/i965/gen7_misc_state.c |   79 ++++++++++++++++++++--
 src/mesa/drivers/dri/intel/intel_fbo.c      |   27 ++-----
 src/mesa/drivers/dri/intel/intel_fbo.h      |   28 ++++++++
 src/mesa/drivers/dri/intel/intel_regions.c  |   52 ++++++++++++++
 src/mesa/drivers/dri/intel/intel_regions.h  |    8 ++
 8 files changed, 338 insertions(+), 60 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 62bcc93..6d39ca2 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -264,10 +264,45 @@ static void emit_depthbuffer(struct brw_context *brw)
    unsigned int len;
    bool separate_stencil = false;
 
+   /* Amount by which drawing should be offset in order to draw to the
+    * appropriate miplevel/zoffset/cubeface.  We will extract these values
+    * from depth_irb or stencil_irb once we determine which is present.
+    */
+   uint32_t draw_x = 0, draw_y = 0;
+
+   /* Masks used to determine how much of the draw_x and draw_y offsets should
+    * be performed using the fine adjustment of "depth coordinate offset X/Y"
+    * (dw5 of 3DSTATE_DEPTH_BUFFER).  Any remaining coarse adjustment will be
+    * performed by changing the base addresses of the buffers.
+    *
+    * Since the HiZ, depth, and stencil buffers all use the same "depth
+    * coordinate offset X/Y" values, we need to make sure that the coarse
+    * adjustment will be possible to apply to all three buffers.  Since coarse
+    * adjustment can only be applied in multiples of the tile size, we will OR
+    * together the tile masks of all the buffers to determine which offsets to
+    * perform as fine adjustments.
+    */
+   uint32_t tile_mask_x = 0, tile_mask_y = 0;
+
+   if (depth_irb) {
+      intel_region_get_tile_masks(depth_irb->mt->region,
+                                  &tile_mask_x, &tile_mask_y);
+   }
+
    if (depth_irb &&
        depth_irb->mt &&
        depth_irb->mt->hiz_mt) {
       hiz_region = depth_irb->mt->hiz_mt->region;
+
+      uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
+      intel_region_get_tile_masks(hiz_region,
+                                  &hiz_tile_mask_x, &hiz_tile_mask_y);
+
+      /* Each HiZ row represents 2 rows of pixels */
+      hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
+
+      tile_mask_x |= hiz_tile_mask_x;
+      tile_mask_y |= hiz_tile_mask_y;
    }
 
    /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
@@ -284,8 +319,21 @@ static void emit_depthbuffer(struct brw_context *brw)
       if (stencil_mt->stencil_mt)
 	 stencil_mt = stencil_mt->stencil_mt;
 
-      if (stencil_mt->format == MESA_FORMAT_S8)
+      if (stencil_mt->format == MESA_FORMAT_S8) {
 	 separate_stencil = true;
+
+         /* Separate stencil buffer uses 64x64 tiles. */
+         tile_mask_x |= 63;
+         tile_mask_y |= 63;
+      } else {
+         uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
+         intel_region_get_tile_masks(stencil_mt->region,
+                                     &stencil_tile_mask_x,
+                                     &stencil_tile_mask_y);
+
+         tile_mask_x |= stencil_tile_mask_x;
+         tile_mask_y |= stencil_tile_mask_y;
+      }
    }
 
    /* If there's a packed depth/stencil bound to stencil only, we need to
@@ -319,6 +367,8 @@ static void emit_depthbuffer(struct brw_context *brw)
       ADVANCE_BATCH();
 
    } else if (!depth_irb && separate_stencil) {
+      uint32_t tile_x, tile_y;
+
       /*
        * There exists a separate stencil buffer but no depth buffer.
        *
@@ -341,6 +391,11 @@ static void emit_depthbuffer(struct brw_context *brw)
        */
       assert(intel->has_separate_stencil);
 
+      draw_x = stencil_irb->draw_x;
+      draw_y = stencil_irb->draw_y;
+      tile_x = draw_x & tile_mask_x;
+      tile_y = draw_y & tile_mask_y;
+
       BEGIN_BATCH(len);
       OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
       OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
@@ -350,11 +405,15 @@ static void emit_depthbuffer(struct brw_context *brw)
 	        (1 << 27) | /* tiled surface */
 	        (BRW_SURFACE_2D << 29));
       OUT_BATCH(0);
-      OUT_BATCH(((stencil_irb->Base.Base.Width - 1) << 6) |
-	         (stencil_irb->Base.Base.Height - 1) << 19);
-      OUT_BATCH(0);
+      OUT_BATCH(((stencil_irb->Base.Base.Width + tile_x - 1) << 6) |
+	         (stencil_irb->Base.Base.Height + tile_y - 1) << 19);
       OUT_BATCH(0);
 
+      if (intel->is_g4x || intel->gen >= 5)
+         OUT_BATCH(tile_x | (tile_y << 16));
+      else
+	 assert(tile_x == 0 && tile_y == 0);
+
       if (intel->gen >= 6)
 	 OUT_BATCH(0);
 
@@ -367,11 +426,18 @@ static void emit_depthbuffer(struct brw_context *brw)
       /* If using separate stencil, hiz must be enabled. */
       assert(!separate_stencil || hiz_region);
 
-      offset = intel_renderbuffer_tile_offsets(depth_irb, &tile_x, &tile_y);
-
       assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
       assert(!hiz_region || region->tiling == I915_TILING_Y);
 
+      draw_x = depth_irb->draw_x;
+      draw_y = depth_irb->draw_y;
+      tile_x = draw_x & tile_mask_x;
+      tile_y = draw_y & tile_mask_y;
+
+      offset = intel_region_get_aligned_offset(region,
+                                               draw_x & ~tile_mask_x,
+                                               draw_y & ~tile_mask_y);
+
       BEGIN_BATCH(len);
       OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
       OUT_BATCH(((region->pitch * region->cpp) - 1) |
@@ -411,12 +477,17 @@ static void emit_depthbuffer(struct brw_context *brw)
 
       /* Emit hiz buffer. */
       if (hiz_region) {
+         uint32_t hiz_offset =
+            intel_region_get_aligned_offset(hiz_region,
+                                            draw_x & ~tile_mask_x,
+                                            (draw_y & ~tile_mask_y) / 2);
+
 	 BEGIN_BATCH(3);
 	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
 	 OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
 	 OUT_RELOC(hiz_region->bo,
 		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-		   0);
+		   hiz_offset);
 	 ADVANCE_BATCH();
       } else {
 	 BEGIN_BATCH(3);
@@ -429,6 +500,15 @@ static void emit_depthbuffer(struct brw_context *brw)
       /* Emit stencil buffer. */
       if (separate_stencil) {
 	 struct intel_region *region = stencil_mt->region;
+
+         /* Note: we can't compute the stencil offset using
+          * intel_region_get_aligned_offset(), because stencil_region claims
+          * that the region is untiled; in fact it's W tiled.
+          */
+         uint32_t stencil_offset =
+            (draw_y & ~tile_mask_y) * region->pitch +
+            (draw_x & ~tile_mask_x) * 64;
+
 	 BEGIN_BATCH(3);
 	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
          /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
@@ -439,7 +519,7 @@ static void emit_depthbuffer(struct brw_context *brw)
 	 OUT_BATCH(2 * region->pitch * region->cpp - 1);
 	 OUT_RELOC(region->bo,
 		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-		   0);
+		   stencil_offset);
 	 ADVANCE_BATCH();
       } else {
 	 BEGIN_BATCH(3);
diff --git a/src/mesa/drivers/dri/i965/gen6_hiz.c b/src/mesa/drivers/dri/i965/gen6_hiz.c
index 9837b1f..d9b547c 100644
--- a/src/mesa/drivers/dri/i965/gen6_hiz.c
+++ b/src/mesa/drivers/dri/i965/gen6_hiz.c
@@ -261,11 +261,42 @@ gen6_hiz_exec(struct intel_context *intel,
 {
    struct gl_context *ctx = &intel->ctx;
    struct brw_context *brw = brw_context(ctx);
+   uint32_t draw_x, draw_y;
+   uint32_t tile_mask_x, tile_mask_y;
 
    assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */
    assert(mt->hiz_mt != NULL);
    intel_miptree_check_level_layer(mt, level, layer);
 
+   {
+      /* Construct a dummy renderbuffer just to extract tile offsets. */
+      struct intel_renderbuffer rb;
+      rb.mt = mt;
+      rb.mt_level = level;
+      rb.mt_layer = layer;
+      intel_renderbuffer_set_draw_offset(&rb);
+      draw_x = rb.draw_x;
+      draw_y = rb.draw_y;
+   }
+
+   /* Compute masks to determine how much of draw_x and draw_y should be
+    * performed using the fine adjustment of "depth coordinate offset X/Y"
+    * (dw5 of 3DSTATE_DEPTH_BUFFER).  See the emit_depthbuffer() function for
+    * details.
+    */
+   {
+      uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y;
+      intel_region_get_tile_masks(mt->region, &depth_mask_x, &depth_mask_y);
+      intel_region_get_tile_masks(mt->hiz_mt->region,
+                                  &hiz_mask_x, &hiz_mask_y);
+
+      /* Each HiZ row represents 2 rows of pixels */
+      hiz_mask_y = hiz_mask_y << 1 | 1;
+
+      tile_mask_x = depth_mask_x | hiz_mask_x;
+      tile_mask_y = depth_mask_y | hiz_mask_y;
+   }
+
    gen6_hiz_emit_batch_head(brw);
    gen6_hiz_emit_vertices(brw, mt, level, layer);
 
@@ -450,18 +481,11 @@ gen6_hiz_exec(struct intel_context *intel,
       uint32_t width = mt->level[level].width;
       uint32_t height = mt->level[level].height;
 
-      uint32_t tile_x;
-      uint32_t tile_y;
-      uint32_t offset;
-      {
-         /* Construct a dummy renderbuffer just to extract tile offsets. */
-         struct intel_renderbuffer rb;
-         rb.mt = mt;
-         rb.mt_level = level;
-         rb.mt_layer = layer;
-         intel_renderbuffer_set_draw_offset(&rb);
-         offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y);
-      }
+      uint32_t tile_x = draw_x & tile_mask_x;
+      uint32_t tile_y = draw_y & tile_mask_y;
+      uint32_t offset = intel_region_get_aligned_offset(mt->region,
+                                                        draw_x & ~tile_mask_x,
+                                                        draw_y & ~tile_mask_y);
 
       uint32_t format;
       switch (mt->format) {
@@ -499,13 +523,17 @@ gen6_hiz_exec(struct intel_context *intel,
    /* 3DSTATE_HIER_DEPTH_BUFFER */
    {
       struct intel_region *hiz_region = mt->hiz_mt->region;
+      uint32_t hiz_offset =
+         intel_region_get_aligned_offset(hiz_region,
+                                         draw_x & ~tile_mask_x,
+                                         (draw_y & ~tile_mask_y) / 2);
 
       BEGIN_BATCH(3);
       OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
       OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
       OUT_RELOC(hiz_region->bo,
                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                0);
+                hiz_offset);
       ADVANCE_BATCH();
    }
 
diff --git a/src/mesa/drivers/dri/i965/gen7_hiz.c b/src/mesa/drivers/dri/i965/gen7_hiz.c
index 18c178e..4538ec9 100644
--- a/src/mesa/drivers/dri/i965/gen7_hiz.c
+++ b/src/mesa/drivers/dri/i965/gen7_hiz.c
@@ -46,6 +46,8 @@ gen7_hiz_exec(struct intel_context *intel,
 {
    struct gl_context *ctx = &intel->ctx;
    struct brw_context *brw = brw_context(ctx);
+   uint32_t draw_x, draw_y;
+   uint32_t tile_mask_x, tile_mask_y;
 
    assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */
    assert(mt->hiz_mt != NULL);
@@ -59,6 +61,35 @@ gen7_hiz_exec(struct intel_context *intel,
    default:                    assert(0); break;
    }
 
+   {
+      /* Construct a dummy renderbuffer just to extract tile offsets. */
+      struct intel_renderbuffer rb;
+      rb.mt = mt;
+      rb.mt_level = level;
+      rb.mt_layer = layer;
+      intel_renderbuffer_set_draw_offset(&rb);
+      draw_x = rb.draw_x;
+      draw_y = rb.draw_y;
+   }
+
+   /* Compute masks to determine how much of draw_x and draw_y should be
+    * performed using the fine adjustment of "depth coordinate offset X/Y"
+    * (dw5 of 3DSTATE_DEPTH_BUFFER).  See the emit_depthbuffer() function for
+    * details.
+    */
+   {
+      uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y;
+      intel_region_get_tile_masks(mt->region, &depth_mask_x, &depth_mask_y);
+      intel_region_get_tile_masks(mt->hiz_mt->region,
+                                  &hiz_mask_x, &hiz_mask_y);
+
+      /* Each HiZ row represents 2 rows of pixels */
+      hiz_mask_y = hiz_mask_y << 1 | 1;
+
+      tile_mask_x = depth_mask_x | hiz_mask_x;
+      tile_mask_y = depth_mask_y | hiz_mask_y;
+   }
+
    gen6_hiz_emit_batch_head(brw);
    gen6_hiz_emit_vertices(brw, mt, level, layer);
 
@@ -327,18 +358,11 @@ gen7_hiz_exec(struct intel_context *intel,
       uint32_t width = mt->level[level].width;
       uint32_t height = mt->level[level].height;
 
-      uint32_t tile_x;
-      uint32_t tile_y;
-      uint32_t offset;
-      {
-         /* Construct a dummy renderbuffer just to extract tile offsets. */
-         struct intel_renderbuffer rb;
-         rb.mt = mt;
-         rb.mt_level = level;
-         rb.mt_layer = layer;
-         intel_renderbuffer_set_draw_offset(&rb);
-         offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y);
-      }
+      uint32_t tile_x = draw_x & tile_mask_x;
+      uint32_t tile_y = draw_y & tile_mask_y;
+      uint32_t offset = intel_region_get_aligned_offset(mt->region,
+                                                        draw_x & ~tile_mask_x,
+                                                        draw_y & ~tile_mask_y);
 
       intel_emit_depth_stall_flushes(intel);
 
@@ -364,13 +388,17 @@ gen7_hiz_exec(struct intel_context *intel,
    /* 3DSTATE_HIER_DEPTH_BUFFER */
    {
       struct intel_region *hiz_region = mt->hiz_mt->region;
+      uint32_t hiz_offset =
+         intel_region_get_aligned_offset(hiz_region,
+                                         draw_x & ~tile_mask_x,
+                                         (draw_y & ~tile_mask_y) / 2);
 
       BEGIN_BATCH(3);
       OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
       OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
       OUT_RELOC(hiz_region->bo,
                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                0);
+                hiz_offset);
       ADVANCE_BATCH();
    }
 
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c
index 3a6144f..d9beda8 100644
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -42,18 +42,58 @@ static void emit_depthbuffer(struct brw_context *brw)
 			    *stencil_mt = NULL,
 			    *hiz_mt = NULL;
 
+   /* Amount by which drawing should be offset in order to draw to the
+    * appropriate miplevel/zoffset/cubeface.  We will extract these values
+    * from depth_irb or stencil_irb once we determine which is present.
+    */
+   uint32_t draw_x = 0, draw_y = 0;
+
+   /* Masks used to determine how much of the draw_x and draw_y offsets should
+    * be performed using the fine adjustment of "depth coordinate offset X/Y"
+    * (dw5 of 3DSTATE_DEPTH_BUFFER).  Any remaining coarse adjustment will be
+    * performed by changing the base addresses of the buffers.
+    *
+    * Since the HiZ, depth, and stencil buffers all use the same "depth
+    * coordinate offset X/Y" values, we need to make sure that the coarse
+    * adjustment will be possible to apply to all three buffers.  Since coarse
+    * adjustment can only be applied in multiples of the tile size, we will OR
+    * together the tile masks of all the buffers to determine which offsets to
+    * perform as fine adjustments.
+    */
+   uint32_t tile_mask_x = 0, tile_mask_y = 0;
+
    if (drb)
       depth_mt = drb->mt;
 
-   if (depth_mt)
+   if (depth_mt) {
       hiz_mt = depth_mt->hiz_mt;
 
+      intel_region_get_tile_masks(depth_mt->region,
+                                  &tile_mask_x, &tile_mask_y);
+
+      if (hiz_mt) {
+         uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
+         intel_region_get_tile_masks(hiz_mt->region,
+                                     &hiz_tile_mask_x, &hiz_tile_mask_y);
+
+         /* Each HiZ row represents 2 rows of pixels */
+         hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
+
+         tile_mask_x |= hiz_tile_mask_x;
+         tile_mask_y |= hiz_tile_mask_y;
+      }
+   }
+
    if (srb) {
       stencil_mt = srb->mt;
       if (stencil_mt->stencil_mt)
 	 stencil_mt = stencil_mt->stencil_mt;
 
       assert(stencil_mt->format == MESA_FORMAT_S8);
+
+      /* Stencil buffer uses 64x64 tiles. */
+      tile_mask_x |= 63;
+      tile_mask_y |= 63;
    }
 
    /* Gen7 doesn't support packed depth/stencil */
@@ -65,6 +105,7 @@ static void emit_depthbuffer(struct brw_context *brw)
    if (depth_mt == NULL) {
       uint32_t dw1 = BRW_DEPTHFORMAT_D32_FLOAT << 18;
       uint32_t dw3 = 0;
+      uint32_t tile_x, tile_y;
 
       if (stencil_mt == NULL) {
 	 dw1 |= (BRW_SURFACE_NULL << 29);
@@ -72,10 +113,15 @@ static void emit_depthbuffer(struct brw_context *brw)
 	 /* _NEW_STENCIL: enable stencil buffer writes */
 	 dw1 |= ((ctx->Stencil.WriteMask != 0) << 27);
 
+         draw_x = srb->draw_x;
+         draw_y = srb->draw_y;
+         tile_x = draw_x & tile_mask_x;
+         tile_y = draw_y & tile_mask_y;
+
 	 /* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */
 	 dw1 |= (BRW_SURFACE_2D << 29);
-	 dw3 = ((srb->Base.Base.Width - 1) << 4) |
-	       ((srb->Base.Base.Height - 1) << 18);
+	 dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) |
+	       ((srb->Base.Base.Height + tile_y - 1) << 18);
       }
 
       BEGIN_BATCH(7);
@@ -84,14 +130,21 @@ static void emit_depthbuffer(struct brw_context *brw)
       OUT_BATCH(0);
       OUT_BATCH(dw3);
       OUT_BATCH(0);
-      OUT_BATCH(0);
+      OUT_BATCH(tile_x | (tile_y << 16));
       OUT_BATCH(0);
       ADVANCE_BATCH();
    } else {
       struct intel_region *region = depth_mt->region;
       uint32_t tile_x, tile_y, offset;
 
-      offset = intel_renderbuffer_tile_offsets(drb, &tile_x, &tile_y);
+      draw_x = drb->draw_x;
+      draw_y = drb->draw_y;
+      tile_x = draw_x & tile_mask_x;
+      tile_y = draw_y & tile_mask_y;
+
+      offset = intel_region_get_aligned_offset(region,
+                                               draw_x & ~tile_mask_x,
+                                               draw_y & ~tile_mask_y);
 
       assert(region->tiling == I915_TILING_Y);
 
@@ -122,13 +175,17 @@ static void emit_depthbuffer(struct brw_context *brw)
       OUT_BATCH(0);
       ADVANCE_BATCH();
    } else {
+      uint32_t hiz_offset =
+         intel_region_get_aligned_offset(hiz_mt->region,
+                                         draw_x & ~tile_mask_x,
+                                         (draw_y & ~tile_mask_y) / 2);
       BEGIN_BATCH(3);
       OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
       OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1);
       OUT_RELOC(hiz_mt->region->bo,
                 I915_GEM_DOMAIN_RENDER,
                 I915_GEM_DOMAIN_RENDER,
-                0);
+                hiz_offset);
       ADVANCE_BATCH();
    }
 
@@ -141,6 +198,14 @@ static void emit_depthbuffer(struct brw_context *brw)
    } else {
       const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0;
 
+      /* Note: We can't compute the stencil offset using
+       * intel_region_get_aligned_offset(), because the stencil region claims
+       * that the region is untiled; in fact it's W tiled.
+       */
+      uint32_t stencil_offset =
+         (draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
+         (draw_x & ~tile_mask_x) * 64;
+
       BEGIN_BATCH(3);
       OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
       /* The stencil buffer has quirky pitch requirements.  From the Graphics
@@ -161,7 +226,7 @@ static void emit_depthbuffer(struct brw_context *brw)
 	        (2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1));
       OUT_RELOC(stencil_mt->region->bo,
 	        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-		0);
+		stencil_offset);
       ADVANCE_BATCH();
    }
 
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 2d1a798..44bd62e 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -532,25 +532,14 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb,
 				uint32_t *tile_y)
 {
    struct intel_region *region = irb->mt->region;
-   int cpp = region->cpp;
-   uint32_t pitch = region->pitch * cpp;
-
-   if (region->tiling == I915_TILING_NONE) {
-      *tile_x = 0;
-      *tile_y = 0;
-      return irb->draw_x * cpp + irb->draw_y * pitch;
-   } else if (region->tiling == I915_TILING_X) {
-      *tile_x = irb->draw_x % (512 / cpp);
-      *tile_y = irb->draw_y % 8;
-      return ((irb->draw_y / 8) * (8 * pitch) +
-	      (irb->draw_x - *tile_x) / (512 / cpp) * 4096);
-   } else {
-      assert(region->tiling == I915_TILING_Y);
-      *tile_x = irb->draw_x % (128 / cpp);
-      *tile_y = irb->draw_y % 32;
-      return ((irb->draw_y / 32) * (32 * pitch) +
-	      (irb->draw_x - *tile_x) / (128 / cpp) * 4096);
-   }
+   uint32_t mask_x, mask_y;
+
+   intel_region_get_tile_masks(region, &mask_x, &mask_y);
+
+   *tile_x = irb->draw_x & mask_x;
+   *tile_y = irb->draw_y & mask_y;
+   return intel_region_get_aligned_offset(region, irb->draw_x & ~mask_x,
+                                          irb->draw_y & ~mask_y);
 }
 
 /**
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index 724f141..503b006 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -153,6 +153,34 @@ intel_flip_renderbuffers(struct gl_framebuffer *fb);
 void
 intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb);
 
+void
+intel_renderbuffer_fine_offset_masks(struct intel_renderbuffer *irb,
+                                     uint32_t *fine_offset_mask_x,
+                                     uint32_t *fine_offset_mask_y);
+
+/**
+ * When rendering to a texture with multiple miplevels, depth planes, or cube
+ * faces, we need to instruct the GPU to render to an offset within the
+ * texture image corresponding to the appropriate miplevel/plane/cubeface.  We
+ * do this by a combination of two techniques: by offsetting the base address
+ * of the texture image, and by supplying additional X and Y coordinate
+ * offsets to the GPU in the SURFACE_STATE structure.
+ *
+ * This function computes the additional Y coordinate offset.
+ */
+
+/**
+ * When rendering to a texture with multiple miplevels, depth planes, or cube
+ * faces, we need to instruct the GPU to render to an offset within the
+ * texture image corresponding to the appropriate miplevel/plane/cubeface.  We
+ * do this by a combination of two techniques: by offsetting the base address
+ * of the texture image, and by supplying additional X and Y coordinate
+ * offsets to the GPU in the SURFACE_STATE structure.
+ *
+ * This function computes the additional X coordinate offset.
+ */
+
+
 uint32_t
 intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb,
 				uint32_t *tile_x,
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index abea2bd..af9ebff 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -390,3 +390,55 @@ intel_region_copy(struct intel_context *intel,
 			    srcx, srcy, dstx, dsty, width, height,
 			    logicop);
 }
+
+/**
+ * This function computes masks that may be used to select the bits of the X
+ * and Y coordinates that indicate the offset within a tile.  If the region is
+ * untiled, the masks are set to 0.
+ */
+void
+intel_region_get_tile_masks(struct intel_region *region,
+                            uint32_t *mask_x, uint32_t *mask_y)
+{
+   int cpp = region->cpp;
+
+   switch (region->tiling) {
+   default:
+      assert(false);
+   case I915_TILING_NONE:
+      *mask_x = *mask_y = 0;
+      break;
+   case I915_TILING_X:
+      *mask_x = 512 / cpp - 1;
+      *mask_y = 7;
+      break;
+   case I915_TILING_Y:
+      *mask_x = 128 / cpp - 1;
+      *mask_y = 31;
+      break;
+   }
+}
+
+/**
+ * Compute the offset (in bytes) from the start of the region to the given x
+ * and y coordinate.  For tiled regions, caller must ensure that x and y are
+ * multiples of the tile size.
+ */
+uint32_t
+intel_region_get_aligned_offset(struct intel_region *region, uint32_t x,
+                                uint32_t y)
+{
+   int cpp = region->cpp;
+   uint32_t pitch = region->pitch * cpp;
+
+   switch (region->tiling) {
+   default:
+      assert(false);
+   case I915_TILING_NONE:
+      return y * pitch + x * cpp;
+   case I915_TILING_X:
+      return y * pitch + x / (512 / cpp) * 4096;
+   case I915_TILING_Y:
+      return y * pitch + x / (128 / cpp) * 4096;
+   }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h
index 4ea970a..9a5b150 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.h
+++ b/src/mesa/drivers/dri/intel/intel_regions.h
@@ -129,6 +129,14 @@ void _mesa_copy_rect(GLubyte * dst,
                 const GLubyte * src,
                 GLuint src_pitch, GLuint src_x, GLuint src_y);
 
+void
+intel_region_get_tile_masks(struct intel_region *region,
+                            uint32_t *mask_x, uint32_t *mask_y);
+
+uint32_t
+intel_region_get_aligned_offset(struct intel_region *region, uint32_t x,
+                                uint32_t y);
+
 struct __DRIimageRec {
    struct intel_region *region;
    GLenum internal_format;
-- 
1.7.7.6



More information about the mesa-dev mailing list