[Mesa-dev] [v2 14/17] i965: Use ISL for CCS layouts
Pohjolainen, Topi
topi.pohjolainen at gmail.com
Thu Nov 24 07:10:06 UTC 2016
On Wed, Nov 23, 2016 at 01:10:59PM -0800, Jason Ekstrand wrote:
> On Wed, Nov 23, 2016 at 1:16 AM, Topi Pohjolainen
> <[1]topi.pohjolainen at gmail.com> wrote:
>
> One can now also delete intel_get_non_msrt_mcs_alignment().
> Signed-off-by: Topi Pohjolainen <[2]topi.pohjolainen at intel.com>
> ---
> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 138
> +++++++-------------------
> src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 4 -
> 2 files changed, 38 insertions(+), 104 deletions(-)
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index a4a7ee0..9428e7b 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -101,66 +101,6 @@ compute_msaa_layout(struct brw_context *brw,
> mesa_format format,
> }
> }
> -
> -/**
> - * For single-sampled render targets ("non-MSRT"), the MCS buffer
> is a
> - * scaled-down bitfield representation of the color buffer which is
> capable of
> - * recording when blocks of the color buffer are equal to the clear
> value.
> - * This function returns the block size that will be used by the
> MCS buffer
> - * corresponding to a certain color miptree.
> - *
> - * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
> Target(s)",
> - * beneath the "Fast Color Clear" bullet (p327):
> - *
> - * The following table describes the RT alignment
> - *
> - * Pixels Lines
> - * TiledY RT CL
> - * bpp
> - * 32 8 4
> - * 64 4 4
> - * 128 2 4
> - * TiledX RT CL
> - * bpp
> - * 32 16 2
> - * 64 8 2
> - * 128 4 2
> - *
> - * This alignment has the following uses:
> - *
> - * - For figuring out the size of the MCS buffer. Each 4k tile in
> the MCS
> - * buffer contains 128 blocks horizontally and 256 blocks
> vertically.
> - *
> - * - For figuring out alignment restrictions for a fast clear
> operation. Fast
> - * clear operations must always clear aligned multiples of 16
> blocks
> - * horizontally and 32 blocks vertically.
> - *
> - * - For scaling down the coordinates sent through the render
> pipeline during
> - * a fast clear. X coordinates must be scaled down by 8 times
> the block
> - * width, and Y coordinates by 16 times the block height.
> - *
> - * - For scaling down the coordinates sent through the render
> pipeline during
> - * a "Render Target Resolve" operation. X coordinates must be
> scaled down
> - * by half the block width, and Y coordinates by half the block
> height.
> - */
> -void
> -intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree
> *mt,
> - unsigned *width_px, unsigned
> *height)
> -{
> - switch (mt->tiling) {
> - default:
> - unreachable("Non-MSRT MCS requires X or Y tiling");
> - /* In release builds, fall through */
> - case I915_TILING_Y:
> - *width_px = 32 / mt->cpp;
> - *height = 4;
> - break;
> - case I915_TILING_X:
> - *width_px = 64 / mt->cpp;
> - *height = 2;
> - }
> -}
> -
> bool
> intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw,
> unsigned tiling)
> @@ -1654,55 +1594,53 @@ intel_miptree_alloc_non_msrt_mcs(struct
> brw_context *brw,
> assert(!mt->disable_aux_buffers);
> assert(!mt->no_ccs);
> - /* The format of the MCS buffer is opaque to the driver; all
> that matters
> - * is that we get its size and pitch right. We'll pretend that
> the format
> - * is R32. Since an MCS tile covers 128 blocks horizontally,
> and a Y-tiled
> - * R32 buffer is 32 pixels across, we'll need to scale the width
> down by
> - * the block width and then a further factor of 4. Since an MCS
> tile
> - * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32
> rows high,
> - * we'll need to scale the height down by the block height and
> then a
> - * further factor of 8.
> - */
> - const mesa_format format = MESA_FORMAT_R_UINT32;
> - unsigned block_width_px;
> - unsigned block_height;
> - intel_get_non_msrt_mcs_alignment(mt, &block_width_px,
> &block_height);
> - unsigned width_divisor = block_width_px * 4;
> - unsigned height_divisor = block_height * 8;
> -
> - /* The Skylake MCS is twice as tall as the Broadwell MCS.
> - *
> - * In pre-Skylake, each bit in the MCS contained the state of 2
> cachelines
> - * in the main surface. In Skylake, it's two bits. The extra
> bit
> - * doubles the MCS height, not width, because in Skylake the MCS
> is always
> - * Y-tiled.
> + struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
> + if (!buf)
> + return false;
> +
> + struct isl_surf temp_main_surf;
> + struct isl_surf temp_ccs_surf;
> +
> + /* Create first an ISL presentation for the main color surface
> and let ISL
> + * calculate equivalent CCS surface against it.
> */
> - if (brw->gen >= 9)
> - height_divisor /= 2;
> + intel_miptree_get_isl_surf(brw, mt, &temp_main_surf);
> + if (!isl_surf_get_ccs_surf(&brw->isl_dev, &temp_main_surf,
> &temp_ccs_surf))
> + return false;
>
> You're leaking the aux_buffer here. Maybe move this to before
> allocating the aux_buffer?
Auts. Good catch. Allocation can actually be moved below just after the
assert.
>
> - unsigned mcs_width =
> - ALIGN(mt->logical_width0, width_divisor) / width_divisor;
> - unsigned mcs_height =
> - ALIGN(mt->logical_height0, height_divisor) / height_divisor;
> - assert(mt->logical_depth0 == 1);
> + assert(temp_ccs_surf.size &&
> + (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0));
> +
> + buf->size = temp_ccs_surf.size;
> + buf->pitch = temp_ccs_surf.row_pitch;
> + buf->qpitch = isl_surf_get_array_pitch_sa_rows(&temp_ccs_surf);
> - uint32_t layout_flags =
> - (brw->gen >= 8) ? MIPTREE_LAYOUT_FORCE_HALIGN16 : 0;
> /* In case of compression mcs buffer needs to be initialised
> requiring the
> * buffer to be immediately mapped to cpu space for writing.
> Therefore do
> * not use the gpu access flag which can cause an unnecessary
> delay if the
> * backing pages happened to be just used by the GPU.
> */
> - if (!is_lossless_compressed)
> - layout_flags |= MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
> -
> - mt->mcs_buf = intel_mcs_miptree_buf_create(brw, mt,
> - format,
> - mcs_width,
> - mcs_height,
> - layout_flags);
> - if (!mt->mcs_buf)
> + const uint32_t alloc_flags =
> + is_lossless_compressed ? 0 : BO_ALLOC_FOR_RENDER;
> + uint32_t tiling = I915_TILING_Y;
> + unsigned long pitch;
> +
> + /* ISL has stricter set of alignment rules then the drm
> allocator.
> + * Therefore one can pass the ISL dimensions in terms of bytes
> instead of
> + * trying to recalculate based on different format block sizes.
> + */
> + buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "ccs-miptree",
> + buf->pitch, buf->size /
> buf->pitch,
> + 1, &tiling, &pitch,
> alloc_flags);
> + if (buf->bo) {
> + assert(pitch == buf->pitch);
> + assert(tiling == I915_TILING_Y);
> + } else {
> + free(buf);
> return false;
> + }
> +
> + mt->mcs_buf = buf;
> /* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers
> are
> * used for lossless compression which requires similar
> initialisation
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> index 51ab664..38c00f8 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> @@ -656,10 +656,6 @@ struct intel_mipmap_tree
> GLuint refcount;
> };
> -void
> -intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree
> *mt,
> - unsigned *width_px, unsigned
> *height);
> -
> bool
> intel_miptree_is_lossless_compressed(const struct brw_context *brw,
> const struct intel_mipmap_tree
> *mt);
> --
> 2.5.5
> _______________________________________________
> mesa-dev mailing list
> [3]mesa-dev at lists.freedesktop.org
> [4]https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
> References
>
> 1. mailto:topi.pohjolainen at gmail.com
> 2. mailto:topi.pohjolainen at intel.com
> 3. mailto:mesa-dev at lists.freedesktop.org
> 4. https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list