[Mesa-dev] [PATCH 2/2] i965: Use IVB specific formula for depthbuffer

Ben Widawsky benjamin.widawsky at intel.com
Wed Oct 9 01:27:47 CEST 2013


After the last patch, we can replace the region allocated in the miptree
creation with a more straightforward (and hopefully smaller resulting)
buffer based on the bspec's allocation formula.

Since I am relatively new to this part of the bspec, I would very much
appreciate scrutiny during review of this. There were some ambiguities
to me which are likely obvious to others.

To prove the reduced [GPU] memory usage I created a simple script which
polls the memory usage of the process through debugfs ever .1 seconds.
The following results show the memory usage difference over 5 runs of
xonotic-glx with ultra settings.

The data suggests a 10MB savings on average. I've not measured the
savings on the CPU side, but I imagine some amount of savings would be
present there as well.

x head/xonotic
+ mine/xonotic
    N           Min           Max        Median           Avg Stddev
x   5     25.683336     25.898164     25.872499     25.842426 0.089829019
+   5     25.841368     25.934931     25.869051     25.877494 0.039885576

x head/memusage
+ mine/memusage
    N           Min           Max        Median           Avg Stddev
x 18036      89432064 8.6380954e+08 7.9515648e+08  7.930405e+08 42774265
+ 18030      86548480 8.6262989e+08 7.8178714e+08 7.7978462e+08 42099587

NOTE: There were a couple of places in the arithmetic where I could have
taken some shortcuts. In order to make the code match with the spec as
much as possible, I've decided not to do this. One shortcut I did make
was the tiling type. Digging through the code it looks like you always
want Y-tiled, except when it won't fit, in which case you want X-tiled.
I wasn't a fan of the existing helper function that's there since it has
a few irrelevant parameters for this operation. I suspect people
reviewing this might ask me to change this, which is fine; I just wanted
to explain the motivation.

v2: copy-paste fix where I used I915_TILING_Y where I meant _X. (Topi)

v3:
Updated to directly use the bo/stride instead of intel_region. (Ken, Chad)
Fix the reference count leak on the hiz buffer (Chad)
Don't allow fallback to old mt allocation. It should never happen. (Ben)
Break out hz_depth/width calculation to separete functions. (Ben)
Use cpp = 1, since the calculation takes cpp into account (Ben)

v4: Don't make the physical size calculation. It is unnecessary and just
confusion on my part (Chad)

BO size before: 10485760
BO size after:   1310720

This savings of 8.75MB provides an 8x savings over the original size.

v5:
Commit message cleanups (Chad)
Align Z_Height to 8. (Chad)
Formatting cleanups (Ian, Chad)
Change comments to quote PRM more explicitly (Chad)
Use mt->logical_depth0 instead of mt->level[i].depth (Chad)
Kill TODO about compressed depth textures (Chad)
Use target_to_target (Chad)
Add missing GL_TEXTURE_1D, and GL_TEXTURE_CUBE_MAP cases
Remove X tiled fallback (Chad)

CC: Chad Versace <chad.versace at linux.intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=67564
Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 147 ++++++++++++++++++++++----
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |   2 +-
 2 files changed, 129 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index e1da9de..3c94d89 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -793,8 +793,12 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
 
       intel_region_release(&((*mt)->region));
       intel_miptree_release(&(*mt)->stencil_mt);
-      intel_miptree_release(&(*mt)->hiz_buffer.mt);
-      (*mt)->hiz_buffer.bo = NULL;
+      if (&(*mt)->hiz_buffer.mt)
+         intel_miptree_release(&(*mt)->hiz_buffer.mt);
+      else {
+         drm_intel_bo_unreference((*mt)->hiz_buffer.bo);
+	 (*mt)->hiz_buffer.bo = NULL;
+      }
       intel_miptree_release(&(*mt)->mcs_mt);
       intel_miptree_release(&(*mt)->singlesample_mt);
       intel_resolve_map_clear(&(*mt)->hiz_map);
@@ -1271,30 +1275,135 @@ intel_miptree_slice_enable_hiz(struct brw_context *brw,
    return true;
 }
 
+static unsigned int
+calculate_z_height(const struct intel_mipmap_tree *mt, const int level)
+{
+   unsigned int height = ALIGN(minify(mt->logical_height0, level), 8);
+
+   /* From the Ivybridge PRM, Section 11.5.3: "Hierarchical Depth Buffer":
+    *   The Surface Type, Height, Width, Depth, Minimum Array Element, Render
+    *   Target View Extent, and Depth Coordinate Offset X/Y of the hierarchical
+    *   depth buffer are inherited from the depth buffer The height and width of
+    *   the hierarchical depth buffer that must be allocated are computed by the
+    *   following formulas, where HZ is the hierarchical depth buffer and Z is
+    *   the depth buffer The Z_Height, Z_Width, and Z_Depth values given in these
+    *   formulas are those present in 3DSTATE_DEPTH_BUFFER incremented by one. :
+    *   The value of Z_Height and Z_Width must each be multiplied by 2 before
+    *   being applied to the table below if Number of Multisamples is set to
+    *   NUMSAMPLES_4.  The value of Z_Height must be multiplied by 2 and Z_Width
+    *   must be multiplied by 4 before being applied to the table below if
+    *   Number of Multisamples is set to NUMSAMPLES_8..
+    */
+   if (mt->num_samples == 4 || mt->num_samples == 8)
+      height *= 2;
+
+   return height;
+}
+
+static unsigned int
+calculate_z_width(const struct intel_mipmap_tree *mt, const int level)
+{
+   unsigned int width = minify(mt->logical_width0, level);
+
+   /* See PRM notice in calculate_z_height() */
+   if (mt->num_samples == 4)
+      width *= 2;
+   else if (mt->num_samples == 8)
+      width *= 4;
+
+   return width;
+}
+
+static void
+gen7_create_hiz_buffer(struct brw_context *brw,
+                       struct intel_mipmap_tree *mt)
+{
+   uint32_t q_pitch, w0, h0, h1, h_level, z_depth; /* Inputs to formula */
+   size_t hz_width; /* Number of bytes */
+   unsigned int hz_height; /* Number of rows */
+   unsigned int tiling = I915_TILING_Y;
+
+   z_depth = mt->logical_depth0;
+   w0 = calculate_z_width(mt, 0);
+   h0 = calculate_z_height(mt, 0);
+   h1 = calculate_z_height(mt, 1);
+
+   hz_width = ALIGN(w0, 16);
+
+   /* ... Where, Qpitch is computed using vertical alignment j=8. Please refer
+    * to the GPU overview volume for Qpitch definition.  NB: The docs have
+    * multiple formulas for q_pitch on IVB, but the HSW docs only have the
+    * below definition.
+    */
+   q_pitch = h0 + h1 + 11 * 8;
+
+   /* The following is directly derived from the "Hierarchical Depth Buffer"
+    * section of the bspec.
+    */
+   switch (target_to_target(mt->target)) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_1D_ARRAY:
+   case GL_TEXTURE_2D_ARRAY:
+   case GL_TEXTURE_2D:
+      hz_height = ALIGN((q_pitch * z_depth / 2), 8);
+      break;
+   case GL_TEXTURE_CUBE_MAP:
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+      hz_height = ALIGN((q_pitch * z_depth * 6 / 2), 8);
+      break;
+   case GL_TEXTURE_3D:
+      hz_height = 0;
+      for (int i = 0; i < mt->last_level; i++) {
+         int tmp;
+         h_level = calculate_z_height(mt, i);
+         tmp = floorf(z_depth / pow(2, i));
+         if (!tmp)
+            tmp++;
+         hz_height += h_level * tmp;
+      }
+      hz_height /= 2;
+      break;
+   default:
+      perf_debug("Unknown depthbuffer texture type (%d).", mt->target);
+      return;
+   }
+
+   mt->hiz_buffer.bo = drm_intel_bo_alloc_tiled(brw->intelScreen->bufmgr,
+                                                "hiz_buffer",
+                                                hz_width, hz_height, 1,
+                                                &tiling, &mt->hiz_buffer.stride,
+                                                BO_ALLOC_FOR_RENDER);
+}
 
 
 bool
 intel_miptree_alloc_hiz(struct brw_context *brw,
 			struct intel_mipmap_tree *mt)
 {
-   assert(mt->hiz_buffer.mt == NULL);
-   mt->hiz_buffer.mt = intel_miptree_create(brw,
-						  mt->target,
-						  mt->format,
-						  mt->first_level,
-						  mt->last_level,
-						  mt->logical_width0,
-						  mt->logical_height0,
-						  mt->logical_depth0,
-						  true,
-						  mt->num_samples,
-						  INTEL_MIPTREE_TILING_ANY);
-
-   if (!mt->hiz_buffer.mt)
-      return false;
+   if (brw->gen >= 7) {
+      assert(mt->hiz_buffer.bo == NULL);
+      gen7_create_hiz_buffer(brw, mt);
+   } else {
+      assert(mt->hiz_buffer.mt == NULL);
+      mt->hiz_buffer.mt = intel_miptree_create(brw,
+					       mt->target,
+					       mt->format,
+					       mt->first_level,
+					       mt->last_level,
+					       mt->logical_width0,
+					       mt->logical_height0,
+					       mt->logical_depth0,
+					       true,
+					       mt->num_samples,
+					       INTEL_MIPTREE_TILING_ANY);
+      if (mt->hiz_buffer.mt) {
+	 mt->hiz_buffer.bo = mt->hiz_buffer.mt->region->bo;
+         mt->hiz_buffer.stride = mt->hiz_buffer.mt->region->pitch;
+      }
+   }
 
-   mt->hiz_buffer.bo = mt->hiz_buffer.mt->region->bo;
-   mt->hiz_buffer.stride = mt->hiz_buffer.mt->region->pitch;
+   if (!mt->hiz_buffer.bo)
+      return false;
 
    /* Mark that all slices need a HiZ resolve. */
    struct intel_resolve_map *head = &mt->hiz_map;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 92d26fa..694e5e7 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -417,7 +417,7 @@ struct intel_mipmap_tree
    struct {
       struct intel_mipmap_tree *mt;
       drm_intel_bo *bo;
-      uint32_t stride;
+      unsigned long stride;
    } hiz_buffer;
 
    /**
-- 
1.8.4



More information about the mesa-dev mailing list