[Mesa-dev] [PATCH 2/2] i965: Use IVB specific formula for depthbuffer

Pohjolainen, Topi topi.pohjolainen at intel.com
Mon Sep 23 01:10:08 PDT 2013


On Sun, Sep 22, 2013 at 10:37:19AM -0700, Ben Widawsky wrote:
> After the last patch, we can replace the region allocated in the miptree
> creation with a more straightforward (and hopefully smaller resulting)
> buffer based on the bspec's allocation formula.
> 
> Since I am relatively new to this part of the bspec, I would very much
> appreciate scrutiny during review of this. There were some ambiguities
> to me which are likely obvious to others.
> 
> To prove the reduced [GPU] memory usage I created a simple script which
> polls the memory usage of the process through debugfs ever .1 seconds.
> The following results show the memory usage difference over 5 runs of
> xonotic-glx with ultra settings.
> 
> The data suggests a 10MB savings on average. I've not measured the
> savings on the CPU side, but I imagine some amount of savings would be
> present there as well.
> 
> x master/mem_usage.txt
> + mine/mem_usage.txt
>     N           Min           Max        Median           Avg Stddev
> x 17121      98959360 7.3394995e+08 7.2782234e+08 7.2209615e+08 43633222
> + 17166 1.2538266e+08 7.2241562e+08   7.16288e+08 7.1071472e+08 42964578
> 
> Below is the FPS data over those same 5 tests. I'm not sure if the
> decrease is statistically significant to y'all. I don't have any
> theories about it.
> 
> x master/xonotic.fps
> + mine/xonotic.fps
>     N           Min           Max        Median           Avg Stddev
> x   5     27.430746     27.524985      27.50568     27.487017 0.039439874
> +   5     27.409173     27.461715     27.441207     27.440883 0.021086805
> 
> NOTE: There were a couple of places in the arithmetic where I could have
> taken some shortcuts. In order to make the code match with the spec as
> much as possible, I've decided not to do this. One shortcut I did make
> was the tiling type. Digging through the code it looks like you always
> want Y-tiled, except when it won't fit, in which case you want X-tiled.
> I wasn't a fan of the existing helper function that's there since it has
> a few irrelevant parameters for this operation. I suspect people
> reviewing this might ask me to change this, which is fine; I just wanted
> to explain the motivation.
> 
> CC: Chad Versace <chad.versace at linux.intel.com>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=67564
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 149 +++++++++++++++++++++++---
>  1 file changed, 133 insertions(+), 16 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index cb6ead3..595228f 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -1271,29 +1271,146 @@ intel_miptree_slice_enable_hiz(struct brw_context *brw,
>     return true;
>  }
>  
> +static struct intel_region *
> +gen7_create_hiz_depth_buffer(struct brw_context *brw,
> +			     struct intel_mipmap_tree *mt)
> +{
> +#define level(x, l) ((x) >> (l) > 0 ? (x) >> (l) : 1)
> +   struct intel_region *region;
> +   uint32_t q_pitch, w0, h0, h1, h_level, z_depth; /* Inputs to formula */
> +   size_t hz_width; /* Number of bytes */
> +   unsigned int hz_height; /* Number of rows */
> +
> +   z_depth = mt->level[0].depth;
> +   w0 = level(mt->logical_width0, 0);
> +   h0 = level(mt->logical_height0, 0);
> +   h1 = level(mt->logical_height0, 1);
> +
> +   /* The value of Z_Height and Z_Width must each be multiplied by 2 before
> +    * being applied to the table below if Number of Multisamples is set to
> +    * NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and Z_Width
> +    * must be multiplied by 4 before being applied to the table below if Number
> +    * of Multisamples is set to NUMSAMPLES_8.
> +    */
> +   switch (mt->num_samples) {
> +      case 16:
> +	 h0 = CEILING(h0, 2) * 8;
> +	 h1 = CEILING(h1, 2) * 8;
> +	 break;
> +      case 8:
> +	 h0 = CEILING(h0, 2) * 4 * z_depth;
> +	 h1 = CEILING(h1, 2) * 4 * z_depth;
> +	 w0 = CEILING(w0, 2) * 8;
> +	 break;
> +      case 4:
> +	 h0 = CEILING(h0, 2) * 4;
> +	 h1 = CEILING(h1, 2) * 4;
> +	 w0 = CEILING(w0, 2) * 4;
> +	 break;
> +      case 2:
> +	 w0 = CEILING(w0, 2) * 4;
> +	 break;
> +      case 1:
> +      case 0:
> +	 break;
> +      default:
> +	 fprintf(stderr, "Unknown number of samples: %d\n", mt->num_samples);
> +	 abort();
> +   }
> +
> +   hz_width = CEILING(w0, 16) * 16;
> +
> +   /* ... Where, Qpitch is computed using vertical alignment j=8. Please refer
> +    * to the GPU overview volume for Qpitch definition.  NB: The docs have
> +    * multiple formulas for q_pitch on IVB, but the HSW docs only have the
> +    * below definition.
> +    */
> +   q_pitch = h0 + h1 + 11 * 8;
> +
> +   /* The following is directly derived from the "Hierarchical Depth Buffer"
> +    * section of the bspec.
> +    */
> +   switch (mt->target) {
> +   case GL_TEXTURE_1D_ARRAY:
> +   case GL_TEXTURE_2D_ARRAY:
> +   case GL_TEXTURE_2D:
> +      hz_height = CEILING((q_pitch * z_depth / 2), 8) * 8;
> +      break;
> +   case GL_TEXTURE_CUBE_MAP_ARRAY:
> +      hz_height = CEILING((q_pitch * z_depth * 6 / 2), 8) * 8;
> +      break;
> +   case GL_TEXTURE_3D:
> +      hz_height = 0;
> +      for (int i = 0; i < mt->last_level; i++) {
> +	 int tmp;
> +	 h_level = level(mt->logical_height0, i);
> +	 tmp = floorf(z_depth / pow(2, i));
> +	 if (!tmp)
> +	    tmp++;
> +	 hz_height += h_level * tmp;
> +      }
> +      hz_height /= 2;
> +      break;
> +   default:
> +      perf_debug("Unknown depthbuffer texture type (%d).", mt->target);
> +      return NULL;
> +   }
> +#undef level
> +
> +   region = intel_region_alloc(brw->intelScreen,
> +			       I915_TILING_Y,
> +			       mt->cpp,
> +			       hz_width,
> +			       hz_height,
> +			       true);
> +   /* We need to do the same check in intel_miptree_create() to make
> +    * sure we have a region that can be blitted.
> +    */
> +   if (region->bo->size >= brw->max_gtt_map_object_size) {
> +      perf_debug("%zx%d depthbuffer larger than aperture; falling back to X-tiled\n",
> +	    hz_width, hz_height);
> +
> +      intel_region_release(&region);
> +
> +      region = intel_region_alloc(brw->intelScreen,
> +				  I915_TILING_Y,

I915_TILING_X instead?

> +				  mt->cpp,
> +				  hz_width,
> +				  hz_height,
> +				  true);
> +   }
> +
> +   return region;
> +}
>  
>  
>  bool
>  intel_miptree_alloc_hiz(struct brw_context *brw,
>  			struct intel_mipmap_tree *mt)
>  {
> -   assert(mt->hiz_depth_buffer.mt == NULL);
> -   mt->hiz_depth_buffer.mt = intel_miptree_create(brw,
> -						  mt->target,
> -						  mt->format,
> -						  mt->first_level,
> -						  mt->last_level,
> -						  mt->logical_width0,
> -						  mt->logical_height0,
> -						  mt->logical_depth0,
> -						  true,
> -						  mt->num_samples,
> -						  INTEL_MIPTREE_TILING_ANY);
> -
> -   if (!mt->hiz_depth_buffer.mt)
> -      return false;
> +   if (brw->gen > 6) {
> +      assert(mt->hiz_depth_buffer.region == NULL);
> +      mt->hiz_depth_buffer.region = gen7_create_hiz_depth_buffer(brw, mt);
> +   }
> +   if (brw->gen < 7 || !mt->hiz_depth_buffer.region) {
> +      assert(mt->hiz_depth_buffer.mt == NULL);
> +      mt->hiz_depth_buffer.mt = intel_miptree_create(brw,
> +						     mt->target,
> +						     mt->format,
> +						     mt->first_level,
> +						     mt->last_level,
> +						     mt->logical_width0,
> +						     mt->logical_height0,
> +						     mt->logical_depth0,
> +						     true,
> +						     mt->num_samples,
> +						     INTEL_MIPTREE_TILING_ANY);
> +      if (mt->hiz_depth_buffer.mt)
> +	 mt->hiz_depth_buffer.region = mt->hiz_depth_buffer.mt->region;
> +   }
>  
> -   mt->hiz_depth_buffer.region = mt->hiz_depth_buffer.mt->region;
> +   if (!mt->hiz_depth_buffer.region)
> +      return false;
>  
>     /* Mark that all slices need a HiZ resolve. */
>     struct intel_resolve_map *head = &mt->hiz_map;
> -- 
> 1.8.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list