[Mesa-dev] [v2 14/17] i965: Use ISL for CCS layouts

Wed Nov 23 21:10:59 UTC 2016

On Wed, Nov 23, 2016 at 1:16 AM, Topi Pohjolainen <
topi.pohjolainen at gmail.com> wrote:

> One can now also delete intel_get_non_msrt_mcs_alignment().
>
> Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 138
> +++++++-------------------
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |   4 -
>  2 files changed, 38 insertions(+), 104 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index a4a7ee0..9428e7b 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -101,66 +101,6 @@ compute_msaa_layout(struct brw_context *brw,
> mesa_format format,
>     }
>  }
>
> -
> -/**
> - * For single-sampled render targets ("non-MSRT"), the MCS buffer is a
> - * scaled-down bitfield representation of the color buffer which is
> capable of
> - * recording when blocks of the color buffer are equal to the clear value.
> - * This function returns the block size that will be used by the MCS
> buffer
> - * corresponding to a certain color miptree.
> - *
> - * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
> Target(s)",
> - * beneath the "Fast Color Clear" bullet (p327):
> - *
> - *     The following table describes the RT alignment
> - *
> - *                       Pixels  Lines
> - *         TiledY RT CL
> - *             bpp
> - *              32          8      4
> - *              64          4      4
> - *             128          2      4
> - *         TiledX RT CL
> - *             bpp
> - *              32         16      2
> - *              64          8      2
> - *             128          4      2
> - *
> - * This alignment has the following uses:
> - *
> - * - For figuring out the size of the MCS buffer.  Each 4k tile in the MCS
> - *   buffer contains 128 blocks horizontally and 256 blocks vertically.
> - *
> - * - For figuring out alignment restrictions for a fast clear operation.
> Fast
> - *   clear operations must always clear aligned multiples of 16 blocks
> - *   horizontally and 32 blocks vertically.
> - *
> - * - For scaling down the coordinates sent through the render pipeline
> during
> - *   a fast clear.  X coordinates must be scaled down by 8 times the block
> - *   width, and Y coordinates by 16 times the block height.
> - *
> - * - For scaling down the coordinates sent through the render pipeline
> during
> - *   a "Render Target Resolve" operation.  X coordinates must be scaled
> down
> - *   by half the block width, and Y coordinates by half the block height.
> - */
> -void
> -intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree *mt,
> -                                 unsigned *width_px, unsigned *height)
> -{
> -   switch (mt->tiling) {
> -   default:
> -      unreachable("Non-MSRT MCS requires X or Y tiling");
> -      /* In release builds, fall through */
> -   case I915_TILING_Y:
> -      *width_px = 32 / mt->cpp;
> -      *height = 4;
> -      break;
> -   case I915_TILING_X:
> -      *width_px = 64 / mt->cpp;
> -      *height = 2;
> -   }
> -}
> -
>  bool
>  intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw,
>                                     unsigned tiling)
> @@ -1654,55 +1594,53 @@ intel_miptree_alloc_non_msrt_mcs(struct
> brw_context *brw,
>     assert(!mt->disable_aux_buffers);
>     assert(!mt->no_ccs);
>
> -   /* The format of the MCS buffer is opaque to the driver; all that
> matters
> -    * is that we get its size and pitch right.  We'll pretend that the
> format
> -    * is R32.  Since an MCS tile covers 128 blocks horizontally, and a
> Y-tiled
> -    * R32 buffer is 32 pixels across, we'll need to scale the width down
> by
> -    * the block width and then a further factor of 4.  Since an MCS tile
> -    * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows
> high,
> -    * we'll need to scale the height down by the block height and then a
> -    * further factor of 8.
> -    */
> -   const mesa_format format = MESA_FORMAT_R_UINT32;
> -   unsigned block_width_px;
> -   unsigned block_height;
> -   intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height);
> -   unsigned width_divisor = block_width_px * 4;
> -   unsigned height_divisor = block_height * 8;
> -
> -   /* The Skylake MCS is twice as tall as the Broadwell MCS.
> -    *
> -    * In pre-Skylake, each bit in the MCS contained the state of 2
> cachelines
> -    * in the main surface. In Skylake, it's two bits.  The extra bit
> -    * doubles the MCS height, not width, because in Skylake the MCS is
> always
> -    * Y-tiled.
> +   struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
> +   if (!buf)
> +      return false;
> +
> +   struct isl_surf temp_main_surf;
> +   struct isl_surf temp_ccs_surf;
> +
> +   /* Create first an ISL presentation for the main color surface and let
> ISL
> +    * calculate equivalent CCS surface against it.
>      */
> -   if (brw->gen >= 9)
> -      height_divisor /= 2;
> +   intel_miptree_get_isl_surf(brw, mt, &temp_main_surf);
> +   if (!isl_surf_get_ccs_surf(&brw->isl_dev, &temp_main_surf,
> &temp_ccs_surf))
> +      return false;
>

You're leaking the aux_buffer here.  Maybe move this to before allocating
the aux_buffer?

>
> -   unsigned mcs_width =
> -      ALIGN(mt->logical_width0, width_divisor) / width_divisor;
> -   unsigned mcs_height =
> -      ALIGN(mt->logical_height0, height_divisor) / height_divisor;
> -   assert(mt->logical_depth0 == 1);
> +   assert(temp_ccs_surf.size &&
> +          (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0));
> +
> +   buf->size = temp_ccs_surf.size;
> +   buf->pitch = temp_ccs_surf.row_pitch;
> +   buf->qpitch = isl_surf_get_array_pitch_sa_rows(&temp_ccs_surf);
>
> -   uint32_t layout_flags =
> -      (brw->gen >= 8) ? MIPTREE_LAYOUT_FORCE_HALIGN16 : 0;
>     /* In case of compression mcs buffer needs to be initialised requiring
> the
>      * buffer to be immediately mapped to cpu space for writing. Therefore
> do
>      * not use the gpu access flag which can cause an unnecessary delay if
> the
>      * backing pages happened to be just used by the GPU.
>      */
> -   if (!is_lossless_compressed)
> -      layout_flags |= MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
> -
> -   mt->mcs_buf = intel_mcs_miptree_buf_create(brw, mt,
> -                                              format,
> -                                              mcs_width,
> -                                              mcs_height,
> -                                              layout_flags);
> -   if (!mt->mcs_buf)
> +   const uint32_t alloc_flags =
> +      is_lossless_compressed ? 0 : BO_ALLOC_FOR_RENDER;
> +   uint32_t tiling = I915_TILING_Y;
> +   unsigned long pitch;
> +
> +   /* ISL has stricter set of alignment rules then the drm allocator.
> +    * Therefore one can pass the ISL dimensions in terms of bytes instead
> of
> +    * trying to recalculate based on different format block sizes.
> +    */
> +   buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "ccs-miptree",
> +                                      buf->pitch, buf->size / buf->pitch,
> +                                      1, &tiling, &pitch, alloc_flags);
> +   if (buf->bo) {
> +      assert(pitch == buf->pitch);
> +      assert(tiling == I915_TILING_Y);
> +   } else {
> +      free(buf);
>        return false;
> +   }
> +
> +   mt->mcs_buf = buf;
>
>     /* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers are
>      * used for lossless compression which requires similar initialisation
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> index 51ab664..38c00f8 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> @@ -656,10 +656,6 @@ struct intel_mipmap_tree
>     GLuint refcount;
>  };
>
> -void
> -intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree *mt,
> -                                 unsigned *width_px, unsigned *height);
> -
>  bool
>  intel_miptree_is_lossless_compressed(const struct brw_context *brw,
>                                       const struct intel_mipmap_tree *mt);
> --
> 2.5.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20161123/903ed36d/attachment-0001.html>