[Mesa-dev] [PATCH 5/5] i965/gen8: Don't allocate hiz miptree structure

Ben Widawsky ben at bwidawsk.net
Fri Nov 21 16:15:39 PST 2014


On Fri, Nov 21, 2014 at 03:09:03PM -0800, Jordan Justen wrote:
> We now skip allocating a hiz miptree for gen8. Instead, we calculate
> the required hiz buffer parameters and allocate a bo directly.
> 
> v2:
>  * Update hz_height calculation as suggested by Topi
> v3:
>  * Bail if we failed to create the bo (Ben)
> 
> Signed-off-by: Jordan Justen <jordan.l.justen at intel.com>
> Reviewed-by: Topi Pohjolainen <topi.pohjolainen at intel.com>

Add the bugzilla

> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 100 ++++++++++++++++++++++++++
>  1 file changed, 100 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index e6ee8d7..c62b0b8 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -1497,6 +1497,104 @@ intel_gen7_hiz_buf_create(struct brw_context *brw,
>  }
>  
>  
> +/**
> + * Helper for intel_miptree_alloc_hiz() that determines the required hiz
> + * buffer dimensions and allocates a bo for the hiz buffer.
> + */
> +static struct intel_miptree_aux_buffer *
> +intel_gen8_hiz_buf_create(struct brw_context *brw,
> +                          struct intel_mipmap_tree *mt)
> +{
> +   unsigned z_width = mt->logical_width0;
> +   unsigned z_height = mt->logical_height0;
> +   const unsigned z_depth = mt->logical_depth0;
> +   unsigned hz_width, hz_height;
> +   struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
> +
> +   if (!buf)
> +      return NULL;
> +
> +   /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
> +    * adjustments required for Z_Height and Z_Width based on multisampling.
> +    */
> +   switch (mt->num_samples) {
> +   case 0:
> +   case 1:
> +      break;
> +   case 2:
> +   case 4:
> +      z_width *= 2;
> +      z_height *= 2;
> +      break;
> +   case 8:
> +      z_width *= 4;
> +      z_height *= 2;
> +      break;

I think that you need:
case 16:
	z_height *= 4;
	z_width *= 4; /* Not 100% certain about this one */

I'd really like if you put a 0 in the variable names, but I can live without it.

> +   default:
> +      assert(!"Unsupported sample count!");
> +   }
> +
> +   const unsigned vertical_align = 8; /* 'j' in the docs */
> +   const unsigned H0 = z_height;
> +   const unsigned h0 = ALIGN(H0, vertical_align);
> +   const unsigned h1 = ALIGN(minify(H0, 1), vertical_align);
> +   const unsigned Z0 = z_depth;
> +
> +   /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */
> +   hz_width = ALIGN(z_width, 16);
> +
> +   unsigned H_i = H0;
> +   unsigned Z_i = Z0;
> +   unsigned sum_h_i = 0;
> +   unsigned hz_height_3d_sum = 0;
> +   for (int level = mt->first_level; level <= mt->last_level; ++level) {
> +      unsigned i = level - mt->first_level;
> +      unsigned h_i = ALIGN(H_i, vertical_align);
> +      /* sum(i=2 to m; h_i) */
> +      if (i >= 2) {
> +         sum_h_i += h_i;
> +      }
> +      /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
> +      hz_height_3d_sum += h_i * Z_i;

Is it possible for Z_i to be zero on the first iteration? If so, you need to do
something.

> +      H_i = minify(H_i, 1);
> +      Z_i = minify(Z_i, 1);
> +   }
> +   /* HZ_QPitch = h0 + max(h1, sum(i=2 to m; h_i)) */
> +   buf->qpitch = h0 + MAX2(h1, sum_h_i);

Don't ask me why, but I think you can optimize:
if (level < 2)
	buf->qpitch = h0/2.

> +
> +   if (mt->target == GL_TEXTURE_3D) {
> +      /* (1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
> +      hz_height = CEILING(hz_height_3d_sum, 2);
> +   } else {
> +      /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * Z_Depth */
> +      hz_height = CEILING(buf->qpitch, 2 * 8) * 8 * Z0;
> +      if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
> +          mt->target == GL_TEXTURE_CUBE_MAP) {
> +         /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * 6
> +          *
> +          * Assumption: the doc meant to multiply by Z_Depth as well.
> +          */
> +         hz_height *= 6;

I don't think this assumption is correct. Isn't the depth just always 6 in a
cube map?

> +      }
> +   }
> +
> +   unsigned long pitch;
> +   uint32_t tiling = I915_TILING_Y;
> +   buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz",
> +                                      hz_width, hz_height, 1,
> +                                      &tiling, &pitch,
> +                                      BO_ALLOC_FOR_RENDER);
> +   if (!buf->bo) {
> +      free(buf);
> +      return NULL;
> +   }
> +
> +   buf->pitch = pitch;
> +
> +   return buf;
> +}

I'm fine with you not handling the potential tiling coming back different, but
put a warn or something at least.

> +
> +
>  static struct intel_miptree_aux_buffer *
>  intel_hiz_miptree_buf_create(struct brw_context *brw,
>                               struct intel_mipmap_tree *mt)
> @@ -1540,6 +1638,8 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
>  
>     if (brw->gen == 7) {
>        mt->hiz_buf = intel_gen7_hiz_buf_create(brw, mt);
> +   } else if (brw->gen >= 8) {
> +      mt->hiz_buf = intel_gen8_hiz_buf_create(brw, mt);
>     } else {
>        mt->hiz_buf = intel_hiz_miptree_buf_create(brw, mt);
>     }

Everything else looks good. If you don't have arguments with my, add my 
Reviewed-by: Ben Widawsky <ben at bwidawsk.net> after you make the changes.
Otherwise, we can discuss it further.

-- 
Ben Widawsky, Intel Open Source Technology Center


More information about the mesa-dev mailing list