[Mesa-dev] [PATCH v2 04/14] i965/gen7: Don't allocate hiz miptree structure
Ben Widawsky
ben at bwidawsk.net
Mon Nov 10 14:50:53 PST 2014
On Mon, Jul 21, 2014 at 11:00:53PM -0700, Jordan Justen wrote:
> We now skip allocating a hiz miptree for gen7. Instead, we calculate
> the required hiz buffer parameters and allocate a bo directly.
>
> v2:
> * Update hz_height calculation as suggested by Topi
>
> Signed-off-by: Jordan Justen <jordan.l.justen at intel.com>
> ---
> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 97 ++++++++++++++++++++++++++-
> 1 file changed, 95 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index d5a2227..e3bcd8a 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -823,7 +823,10 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
> drm_intel_bo_unreference((*mt)->bo);
> intel_miptree_release(&(*mt)->stencil_mt);
> if ((*mt)->hiz_buf) {
> - intel_miptree_release(&(*mt)->hiz_buf->mt);
> + if ((*mt)->hiz_buf->mt)
> + intel_miptree_release(&(*mt)->hiz_buf->mt);
> + else
> + drm_intel_bo_unreference((*mt)->hiz_buf->bo);
> free((*mt)->hiz_buf);
> }
> intel_miptree_release(&(*mt)->mcs_mt);
> @@ -1374,6 +1377,91 @@ intel_miptree_level_enable_hiz(struct brw_context *brw,
> }
>
>
> +/**
> + * Helper for intel_miptree_alloc_hiz() that determines the required hiz
> + * buffer dimensions and allocates a bo for the hiz buffer.
> + */
> +static struct intel_miptree_aux_buffer *
> +intel_gen7_hiz_buf_create(struct brw_context *brw,
> + struct intel_mipmap_tree *mt)
> +{
> + unsigned z_width = mt->logical_width0;
> + unsigned z_height = mt->logical_height0;
> + const unsigned z_depth = mt->logical_depth0;
> + unsigned hz_width, hz_height;
> + struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
> +
> + if (!buf)
> + return NULL;
> +
> + /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
> + * adjustments required for Z_Height and Z_Width based on multisampling.
> + */
> + switch(mt->num_samples) {
I'm new here, but you need a space, I think.
> + case 0:
> + case 1:
> + break;
> + case 2:
> + case 4:
> + z_width *= 2;
> + z_height *= 2;
> + break;
> + case 8:
> + z_width *= 4;
> + z_height *= 2;
> + break;
> + default:
> + assert(!"Unsupported sample count!");
> + }
> +
> + const unsigned vertical_align = 8; /* 'j' in the docs */
j seems to vary based on surface type (if I am reading this correctly).
8 seems safe in all cases.
> + const unsigned H0 = z_height;
> + const unsigned h0 = ALIGN(H0, vertical_align);
> + const unsigned h1 = ALIGN(minify(H0, 1), vertical_align);
I'm having trouble verifying this. On gen8+ it seems this shouldn't be
aligned, ie. h1 = mt->logical_height0. Can you help me find why you are
doing this operation.
> + const unsigned Z0 = z_depth;
> +
> + /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */
> + hz_width = ALIGN(z_width, 16);
> +
> + if (mt->target == GL_TEXTURE_3D) {
> + unsigned H_i = H0;
> + unsigned Z_i = Z0;
> + hz_height = 0;
> + for (int level = mt->first_level; level <= mt->last_level; ++level) {
> + unsigned h_i = ALIGN(H_i, vertical_align);
> + /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
You could optimize this I think, if z_depth is 0 we can ignore this
whole operation. Not sure if that ever happens or not.
> + hz_height += h_i * Z_i;
> + H_i = minify(H_i, 1);
> + Z_i = minify(Z_i, 1);
I want to discuss this with you in person.
> + }
> + /* HZ_Height =
> + * (1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i)))
> + */
> + hz_height = CEILING(hz_height, 2);
> + } else {
> + const unsigned hz_qpitch = h0 + h1 + (12 * vertical_align);
> + if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
> + mt->target == GL_TEXTURE_CUBE_MAP) {
> + /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth * 6/2) /8 ) * 8 */
> + hz_height = CEILING(hz_qpitch * Z0 * 6, 2 * 8) * 8;
> + } else {
> + /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */
> + hz_height = CEILING(hz_qpitch * Z0, 2 * 8) * 8;
> + }
> + }
> +
> + unsigned long pitch;
> + uint32_t tiling = I915_TILING_Y;
> + buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz",
> + hz_width, hz_height, 1,
> + &tiling, &pitch,
> + BO_ALLOC_FOR_RENDER);
I think you need to check tiling here (didn't check pitch).
> + buf->pitch = pitch;
> +
> + return buf;
> +}
> +
> +
> static struct intel_miptree_aux_buffer *
> intel_hiz_miptree_buf_create(struct brw_context *brw,
> struct intel_mipmap_tree *mt)
> @@ -1412,7 +1500,12 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
> struct intel_mipmap_tree *mt)
> {
> assert(mt->hiz_buf == NULL);
> - mt->hiz_buf = intel_hiz_miptree_buf_create(brw, mt);
> +
> + if (brw->gen == 7) {
> + mt->hiz_buf = intel_gen7_hiz_buf_create(brw, mt);
> + } else {
> + mt->hiz_buf = intel_hiz_miptree_buf_create(brw, mt);
> + }
>
> if (!mt->hiz_buf)
> return false;
I think you need to check mt->hiz_buf->bo as the code currently works.
IMO however, it'd be preferable to just do the check in
intel_gen7_hiz_buf_create()
> --
--
Ben Widawsky, Intel Open Source Technology Center
More information about the mesa-dev
mailing list