[Mesa-dev] [v2 14/17] i965: Use ISL for CCS layouts

Thu Nov 24 07:10:06 UTC 2016

On Wed, Nov 23, 2016 at 01:10:59PM -0800, Jason Ekstrand wrote:
>    On Wed, Nov 23, 2016 at 1:16 AM, Topi Pohjolainen
>    <[1]topi.pohjolainen at gmail.com> wrote:
> 
>      One can now also delete intel_get_non_msrt_mcs_alignment().
>      Signed-off-by: Topi Pohjolainen <[2]topi.pohjolainen at intel.com>
>      ---
>       src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 138
>      +++++++-------------------
>       src/mesa/drivers/dri/i965/intel_mipmap_tree.h |   4 -
>       2 files changed, 38 insertions(+), 104 deletions(-)
>      diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>      b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>      index a4a7ee0..9428e7b 100644
>      --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>      +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>      @@ -101,66 +101,6 @@ compute_msaa_layout(struct brw_context *brw,
>      mesa_format format,
>          }
>       }
>      -
>      -/**
>      - * For single-sampled render targets ("non-MSRT"), the MCS buffer
>      is a
>      - * scaled-down bitfield representation of the color buffer which is
>      capable of
>      - * recording when blocks of the color buffer are equal to the clear
>      value.
>      - * This function returns the block size that will be used by the
>      MCS buffer
>      - * corresponding to a certain color miptree.
>      - *
>      - * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
>      Target(s)",
>      - * beneath the "Fast Color Clear" bullet (p327):
>      - *
>      - *     The following table describes the RT alignment
>      - *
>      - *                       Pixels  Lines
>      - *         TiledY RT CL
>      - *             bpp
>      - *              32          8      4
>      - *              64          4      4
>      - *             128          2      4
>      - *         TiledX RT CL
>      - *             bpp
>      - *              32         16      2
>      - *              64          8      2
>      - *             128          4      2
>      - *
>      - * This alignment has the following uses:
>      - *
>      - * - For figuring out the size of the MCS buffer.  Each 4k tile in
>      the MCS
>      - *   buffer contains 128 blocks horizontally and 256 blocks
>      vertically.
>      - *
>      - * - For figuring out alignment restrictions for a fast clear
>      operation.  Fast
>      - *   clear operations must always clear aligned multiples of 16
>      blocks
>      - *   horizontally and 32 blocks vertically.
>      - *
>      - * - For scaling down the coordinates sent through the render
>      pipeline during
>      - *   a fast clear.  X coordinates must be scaled down by 8 times
>      the block
>      - *   width, and Y coordinates by 16 times the block height.
>      - *
>      - * - For scaling down the coordinates sent through the render
>      pipeline during
>      - *   a "Render Target Resolve" operation.  X coordinates must be
>      scaled down
>      - *   by half the block width, and Y coordinates by half the block
>      height.
>      - */
>      -void
>      -intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree
>      *mt,
>      -                                 unsigned *width_px, unsigned
>      *height)
>      -{
>      -   switch (mt->tiling) {
>      -   default:
>      -      unreachable("Non-MSRT MCS requires X or Y tiling");
>      -      /* In release builds, fall through */
>      -   case I915_TILING_Y:
>      -      *width_px = 32 / mt->cpp;
>      -      *height = 4;
>      -      break;
>      -   case I915_TILING_X:
>      -      *width_px = 64 / mt->cpp;
>      -      *height = 2;
>      -   }
>      -}
>      -
>       bool
>       intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw,
>                                          unsigned tiling)
>      @@ -1654,55 +1594,53 @@ intel_miptree_alloc_non_msrt_mcs(struct
>      brw_context *brw,
>          assert(!mt->disable_aux_buffers);
>          assert(!mt->no_ccs);
>      -   /* The format of the MCS buffer is opaque to the driver; all
>      that matters
>      -    * is that we get its size and pitch right.  We'll pretend that
>      the format
>      -    * is R32.  Since an MCS tile covers 128 blocks horizontally,
>      and a Y-tiled
>      -    * R32 buffer is 32 pixels across, we'll need to scale the width
>      down by
>      -    * the block width and then a further factor of 4.  Since an MCS
>      tile
>      -    * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32
>      rows high,
>      -    * we'll need to scale the height down by the block height and
>      then a
>      -    * further factor of 8.
>      -    */
>      -   const mesa_format format = MESA_FORMAT_R_UINT32;
>      -   unsigned block_width_px;
>      -   unsigned block_height;
>      -   intel_get_non_msrt_mcs_alignment(mt, &block_width_px,
>      &block_height);
>      -   unsigned width_divisor = block_width_px * 4;
>      -   unsigned height_divisor = block_height * 8;
>      -
>      -   /* The Skylake MCS is twice as tall as the Broadwell MCS.
>      -    *
>      -    * In pre-Skylake, each bit in the MCS contained the state of 2
>      cachelines
>      -    * in the main surface. In Skylake, it's two bits.  The extra
>      bit
>      -    * doubles the MCS height, not width, because in Skylake the MCS
>      is always
>      -    * Y-tiled.
>      +   struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
>      +   if (!buf)
>      +      return false;
>      +
>      +   struct isl_surf temp_main_surf;
>      +   struct isl_surf temp_ccs_surf;
>      +
>      +   /* Create first an ISL presentation for the main color surface
>      and let ISL
>      +    * calculate equivalent CCS surface against it.
>           */
>      -   if (brw->gen >= 9)
>      -      height_divisor /= 2;
>      +   intel_miptree_get_isl_surf(brw, mt, &temp_main_surf);
>      +   if (!isl_surf_get_ccs_surf(&brw->isl_dev, &temp_main_surf,
>      &temp_ccs_surf))
>      +      return false;
> 
>    You're leaking the aux_buffer here.  Maybe move this to before
>    allocating the aux_buffer?

Auts. Good catch. Allocation can actually be moved below just after the
assert.

> 
>      -   unsigned mcs_width =
>      -      ALIGN(mt->logical_width0, width_divisor) / width_divisor;
>      -   unsigned mcs_height =
>      -      ALIGN(mt->logical_height0, height_divisor) / height_divisor;
>      -   assert(mt->logical_depth0 == 1);
>      +   assert(temp_ccs_surf.size &&
>      +          (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0));
>      +
>      +   buf->size = temp_ccs_surf.size;
>      +   buf->pitch = temp_ccs_surf.row_pitch;
>      +   buf->qpitch = isl_surf_get_array_pitch_sa_rows(&temp_ccs_surf);
>      -   uint32_t layout_flags =
>      -      (brw->gen >= 8) ? MIPTREE_LAYOUT_FORCE_HALIGN16 : 0;
>          /* In case of compression mcs buffer needs to be initialised
>      requiring the
>           * buffer to be immediately mapped to cpu space for writing.
>      Therefore do
>           * not use the gpu access flag which can cause an unnecessary
>      delay if the
>           * backing pages happened to be just used by the GPU.
>           */
>      -   if (!is_lossless_compressed)
>      -      layout_flags |= MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
>      -
>      -   mt->mcs_buf = intel_mcs_miptree_buf_create(brw, mt,
>      -                                              format,
>      -                                              mcs_width,
>      -                                              mcs_height,
>      -                                              layout_flags);
>      -   if (!mt->mcs_buf)
>      +   const uint32_t alloc_flags =
>      +      is_lossless_compressed ? 0 : BO_ALLOC_FOR_RENDER;
>      +   uint32_t tiling = I915_TILING_Y;
>      +   unsigned long pitch;
>      +
>      +   /* ISL has stricter set of alignment rules then the drm
>      allocator.
>      +    * Therefore one can pass the ISL dimensions in terms of bytes
>      instead of
>      +    * trying to recalculate based on different format block sizes.
>      +    */
>      +   buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "ccs-miptree",
>      +                                      buf->pitch, buf->size /
>      buf->pitch,
>      +                                      1, &tiling, &pitch,
>      alloc_flags);
>      +   if (buf->bo) {
>      +      assert(pitch == buf->pitch);
>      +      assert(tiling == I915_TILING_Y);
>      +   } else {
>      +      free(buf);
>             return false;
>      +   }
>      +
>      +   mt->mcs_buf = buf;
>          /* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers
>      are
>           * used for lossless compression which requires similar
>      initialisation
>      diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
>      b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
>      index 51ab664..38c00f8 100644
>      --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
>      +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
>      @@ -656,10 +656,6 @@ struct intel_mipmap_tree
>          GLuint refcount;
>       };
>      -void
>      -intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree
>      *mt,
>      -                                 unsigned *width_px, unsigned
>      *height);
>      -
>       bool
>       intel_miptree_is_lossless_compressed(const struct brw_context *brw,
>                                            const struct intel_mipmap_tree
>      *mt);
>      --
>      2.5.5
>      _______________________________________________
>      mesa-dev mailing list
>      [3]mesa-dev at lists.freedesktop.org
>      [4]https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> References
> 
>    1. mailto:topi.pohjolainen at gmail.com
>    2. mailto:topi.pohjolainen at intel.com
>    3. mailto:mesa-dev at lists.freedesktop.org
>    4. https://lists.freedesktop.org/mailman/listinfo/mesa-dev