[Mesa-dev] [PATCH] i965: Stop lying about cpp and height of a stencil buffer.

Paul Berry stereotype441 at gmail.com
Mon Apr 9 09:43:50 PDT 2012


When using a separate stencil buffer, i965 requires that the pitch of
the buffer (in the 3DSTATE_STENCIL_BUFFER command) be specified as 2x
the actual pitch.

Previously this was accomplished by doubling the "cpp" and "pitch"
values stored in the intel_region data structure, and halving the
height.  However, this was confusing, and it led to a subtle (but
benign) bug: since a stencil buffer is W-tiled, its true height must
be aligned to a multiple of 64; we were accidentally aligning its faux
height to a multiple of 64, causing memory to be wasted.

Note that for window system stencil buffers, the X server also doubles
the cpp and pitch values.  To facilitate fixing this X server bug in
the future, we fix the cpp and pitch values we receive from the X
server only if cpp has the "incorrect" value of 2.
---
 src/mesa/drivers/dri/i965/brw_misc_state.c     |   11 ++++---
 src/mesa/drivers/dri/i965/gen7_misc_state.c    |   16 ++++++++++-
 src/mesa/drivers/dri/intel/intel_context.c     |   35 +++++++++++++++++------
 src/mesa/drivers/dri/intel/intel_mipmap_tree.c |   24 ++--------------
 src/mesa/drivers/dri/intel/intel_screen.c      |   24 ++++++++++------
 5 files changed, 65 insertions(+), 45 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 3479333..62bcc93 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -326,10 +326,6 @@ static void emit_depthbuffer(struct brw_context *brw)
        * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
        * height.
        *
-       * Since the stencil buffer has quirky pitch requirements, its region
-       * was allocated with half height and double cpp. So we need
-       * a multiplier of 2 to obtain the surface's real height.
-       *
        * Enable the hiz bit because it and the separate stencil bit must have
        * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
        * 1.21 "Separate Stencil Enable":
@@ -435,7 +431,12 @@ static void emit_depthbuffer(struct brw_context *brw)
 	 struct intel_region *region = stencil_mt->region;
 	 BEGIN_BATCH(3);
 	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
-	 OUT_BATCH(region->pitch * region->cpp - 1);
+         /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
+          * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
+          *    The pitch must be set to 2x the value computed based on width, as
+          *    the stencil buffer is stored with two rows interleaved.
+          */
+	 OUT_BATCH(2 * region->pitch * region->cpp - 1);
 	 OUT_RELOC(region->bo,
 		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 		   0);
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c
index 5da6434..3a6144f 100644
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -143,8 +143,22 @@ static void emit_depthbuffer(struct brw_context *brw)
 
       BEGIN_BATCH(3);
       OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
+      /* The stencil buffer has quirky pitch requirements.  From the Graphics
+       * BSpec: vol2a.11 3D Pipeline Windower > Early Depth/Stencil Processing
+       * > Depth/Stencil Buffer State > 3DSTATE_STENCIL_BUFFER [DevIVB+],
+       * field "Surface Pitch":
+       *
+       *    The pitch must be set to 2x the value computed based on width, as
+       *    the stencil buffer is stored with two rows interleaved.
+       *
+       * (Note that it is not 100% clear whether this intended to apply to
+       * Gen7; the BSpec flags this comment as "DevILK,DevSNB" (which would
+       * imply that it doesn't), however the comment appears on a "DevIVB+"
+       * page (which would imply that it does).  Experiments with the hardware
+       * indicate that it does.
+       */
       OUT_BATCH(enabled |
-	        (stencil_mt->region->pitch * stencil_mt->region->cpp - 1));
+	        (2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1));
       OUT_RELOC(stencil_mt->region->bo,
 	        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 		0);
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 16a9887..fc3258b 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -1251,17 +1251,34 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel,
 
    int buffer_width;
    int buffer_height;
+   int buffer_cpp = buffer->cpp;
+   int buffer_pitch = buffer->pitch;
    if (buffer->attachment == __DRI_BUFFER_STENCIL) {
-      /* The stencil buffer has quirky pitch requirements.  From Section
-       * 2.11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
-       *    The pitch must be set to 2x the value computed based on width, as
-       *    the stencil buffer is stored with two rows interleaved.
+      /* Stencil buffers use W tiling, a tiling format that the DRM functions
+       * don't properly account for.  Therefore, when we allocate a stencil
+       * buffer that is private to Mesa (see intel_miptree_create), we round
+       * the height and width up to the next multiple of the tile size (64x64)
+       * and then ask DRM to allocate an untiled buffer.  Consequently, the
+       * height and the width stored in the stencil buffer's region structure
+       * are always multiples of 64, even if the stencil buffer itself is
+       * smaller.
        *
-       * To satisfy the pitch requirement, the X driver allocated the region
-       * with the following dimensions.
+       * To avoid inconsistencies between how we represent private buffers and
+       * buffers shared with the window system, round up the height and width
+       * for window system buffers too.
        */
        buffer_width = ALIGN(drawable->w, 64);
-       buffer_height = ALIGN(ALIGN(drawable->h, 2) / 2, 64);
+       buffer_height = ALIGN(drawable->h, 64);
+
+       /* As of 4/6/2012, the X server lies and sends cpp and pitch values
+        * that are two times too large for stencil buffers.  Hopefully this
+        * will be fixed someday, but for now detect the bug by checking if cpp
+        * is 2, and fixing cpp and pitch if it is.
+        */
+       if (buffer_cpp == 2) {
+          buffer_cpp = 1;
+          buffer_pitch /= 2;
+       }
    } else {
        buffer_width = drawable->w;
        buffer_height = drawable->h;
@@ -1279,10 +1296,10 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel,
 
    struct intel_region *region =
       intel_region_alloc_for_handle(intel->intelScreen,
-				    buffer->cpp,
+				    buffer_cpp,
 				    buffer_width,
 				    buffer_height,
-				    buffer->pitch / buffer->cpp,
+				    buffer_pitch / buffer_cpp,
 				    buffer->name,
 				    buffer_name);
    if (!region)
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 636ee17..91ebc8d 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -102,16 +102,6 @@ intel_miptree_create_internal(struct intel_context *intel,
       mt->depth0 = depth0;
    }
 
-   if (format == MESA_FORMAT_S8) {
-      /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
-       * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
-       *    The pitch must be set to 2x the value computed based on width, as
-       *    the stencil buffer is stored with two rows interleaved.
-       */
-      assert(intel->has_separate_stencil);
-      mt->cpp = 2;
-   }
-
    if (!for_region &&
        _mesa_is_depthstencil_format(_mesa_get_format_base_format(format)) &&
        (intel->must_use_separate_stencil ||
@@ -188,20 +178,12 @@ intel_miptree_create(struct intel_context *intel,
 
    if (format == MESA_FORMAT_S8) {
       /* The stencil buffer is W tiled. However, we request from the kernel a
-       * non-tiled buffer because the GTT is incapable of W fencing.
-       *
-       * The stencil buffer has quirky pitch requirements.  From Vol 2a,
-       * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
-       *    The pitch must be set to 2x the value computed based on width, as
-       *    the stencil buffer is stored with two rows interleaved.
-       * To accomplish this, we resort to the nasty hack of doubling the drm
-       * region's cpp and halving its height.
-       *
-       * If we neglect to double the pitch, then render corruption occurs.
+       * non-tiled buffer because the GTT is incapable of W fencing.  So round
+       * up the width and height to match the size of W tiles (64x64).
        */
       tiling = I915_TILING_NONE;
       width0 = ALIGN(width0, 64);
-      height0 = ALIGN((height0 + 1) / 2, 64);
+      height0 = ALIGN(height0, 64);
    }
 
    mt = intel_miptree_create_internal(intel, target, format,
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 49e208c..3fc1d3c 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -956,22 +956,28 @@ intelAllocateBuffer(__DRIscreen *screen,
       return NULL;
 
    if (attachment == __DRI_BUFFER_STENCIL) {
-      /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
-       * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
-       *    The pitch must be set to 2x the value computed based on width, as
-       *    the stencil buffer is stored with two rows interleaved.
-       * To accomplish this, we resort to the nasty hack of doubling the
-       * region's cpp and halving its height.
+      /* Stencil buffers use W tiling, a tiling format that the DRM functions
+       * don't properly account for.  Therefore, when we allocate a stencil
+       * buffer that is private to Mesa (see intel_miptree_create), we round
+       * the height and width up to the next multiple of the tile size (64x64)
+       * and then ask DRM to allocate an untiled buffer.  Consequently, the
+       * height and the width stored in the stencil buffer's region structure
+       * are always multiples of 64, even if the stencil buffer itself is
+       * smaller.
+       *
+       * To avoid inconsistencies between how we represent private buffers and
+       * buffers shared with the window system, round up the height and width
+       * for window system buffers too.
        */
       region_width = ALIGN(width, 64);
-      region_height = ALIGN(ALIGN(height, 2) / 2, 64);
-      region_cpp = format / 4;
+      region_height = ALIGN(height, 64);
    } else {
       region_width = width;
       region_height = height;
-      region_cpp = format / 8;
    }
 
+   region_cpp = format / 8;
+
    intelBuffer->region = intel_region_alloc(intelScreen,
                                             tiling,
                                             region_cpp,
-- 
1.7.7.6



More information about the mesa-dev mailing list