[Mesa-dev] [PATCH v2 14/24] intel/blorp: Add a CCS ambiguation pass

Sat Jan 20 01:25:41 UTC 2018

On Fri, Jan 19, 2018 at 3:47 PM, Jason Ekstrand <jason at jlekstrand.net>
wrote:

> This pass performs an "ambiguate" operation on a CCS-compressed surface
> by manually writing zeros into the CCS.  On gen8+, ISL gives us a fairly
> detailed notion of how the CCS is laid out so this is fairly simple to
> do.  On gen7, the CCS tiling is quite crazy but that isn't an issue
> because we can only do CCS on single-slice images so we can just blast
> over the entire CCS buffer if we want to.
> ---
>  src/intel/blorp/blorp.h       |   5 ++
>  src/intel/blorp/blorp_clear.c | 149 ++++++++++++++++++++++++++++++
> ++++++++++++
>  2 files changed, 154 insertions(+)
>
> diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> index a1dd571..478a9af 100644
> --- a/src/intel/blorp/blorp.h
> +++ b/src/intel/blorp/blorp.h
> @@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
>                    enum blorp_fast_clear_op resolve_op);
>
>  void
> +blorp_ccs_ambiguate(struct blorp_batch *batch,
> +                    struct blorp_surf *surf,
> +                    uint32_t level, uint32_t layer);
> +
> +void
>  blorp_mcs_partial_resolve(struct blorp_batch *batch,
>                            struct blorp_surf *surf,
>                            enum isl_format format,
> diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
> index 8e7bc9f..fa2abd9 100644
> --- a/src/intel/blorp/blorp_clear.c
> +++ b/src/intel/blorp/blorp_clear.c
> @@ -881,3 +881,152 @@ blorp_mcs_partial_resolve(struct blorp_batch *batch,
>
>     batch->blorp->exec(batch, &params);
>  }
> +
> +/** Clear a CCS to the "uncompressed" state
> + *
> + * This pass is the CCS equivalent of a "HiZ resolve".  It sets the CCS
> values
> + * for a given layer/level of a surface to 0x0 which is the "uncompressed"
> + * state which tells the sampler to go look at the main surface.
> + */
> +void
> +blorp_ccs_ambiguate(struct blorp_batch *batch,
> +                    struct blorp_surf *surf,
> +                    uint32_t level, uint32_t layer)
> +{
> +   struct blorp_params params;
> +   blorp_params_init(&params);
> +
> +   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
> +
> +   const struct isl_format_layout *aux_fmtl =
> +      isl_format_get_layout(surf->aux_surf->format);
> +   assert(aux_fmtl->txc == ISL_TXC_CCS);
> +
> +   params.dst = (struct brw_blorp_surface_info) {
> +      .enabled = true,
> +      .addr = surf->aux_addr,
> +      .view = {
> +         .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
> +         .format = ISL_FORMAT_R32G32B32A32_UINT,
> +         .base_level = 0,
> +         .base_array_layer = 0,
> +         .levels = 1,
> +         .array_len = 1,
> +         .swizzle = ISL_SWIZZLE_IDENTITY,
> +      },
> +   };
> +
> +   uint32_t z = 0;
> +   if (surf->surf->dim == ISL_SURF_DIM_3D) {
> +      z = layer;
> +      layer = 0;
> +   }
> +
> +   uint32_t offset_B, x_offset_el, y_offset_el;
> +   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
> +                                &x_offset_el, &y_offset_el);
> +   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling,
> aux_fmtl->bpb,
> +                                      surf->aux_surf->row_pitch,
> +                                      x_offset_el, y_offset_el,
> +                                      &offset_B, &x_offset_el,
> &y_offset_el);
> +   params.dst.addr.offset += offset_B;
> +
> +   const uint32_t width_px = minify(surf->surf->logical_level0_px.width,
> level);
> +   const uint32_t height_px = minify(surf->surf->logical_level0_px.height,
> level);
> +   const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
> +   const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
> +
> +   struct isl_tile_info ccs_tile_info;
> +   isl_surf_get_tile_info(surf->aux_surf, &ccs_tile_info);
> +
> +   /* We're going to map it as a regular RGBA32_UINT surface.  We need to
> +    * downscale a good deal.  We start by computing the area on the CCS to
> +    * clear in units of Y-tiled cache lines.
> +    */
> +   uint32_t x_offset_y_cl, y_offset_y_cl, width_y_cl, height_y_cl;
> +   if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) {
> +      /* From the Sky Lake PRM Vol. 12 in the section on planes:
> +       *
> +       *    "The Color Control Surface (CCS) contains the compression
> status
> +       *    of the cache-line pairs. The compression state of the
> cache-line
> +       *    pair is specified by 2 bits in the CCS.  Each CCS cache-line
> +       *    represents an area on the main surface of 16x16 sets of 128
> byte
> +       *    Y-tiled cache-line-pairs. CCS is always Y tiled."
> +       *
> +       * Each 2-bit surface element in the CCS corresponds to a single
> +       * cache-line pair in the main surface.  This means that 16x16 el
> block
> +       * in the CCS maps to a Y-tiled cache line.  Fortunately, CCS
> layouts
> +       * are calculated with a very large alignment so we can round up to
> a
> +       * whole cache line without worrying about overdraw.
> +       */
> +
> +      /* On Broadwell and above, a CCS tile is the same as a Y tile when
> +       * viewed at the cache-line granularity.  Fortunately, the
> horizontal
> +       * and vertical alignment requirements of the CCS are such that we
> can
> +       * align to an entire cache line without worrying about crossing
> over
> +       * from one LOD to another.
> +       */
> +      const uint32_t scale_x = ccs_tile_info.logical_extent_el.w / 8;
> +      const uint32_t scale_y = ccs_tile_info.logical_extent_el.h / 8;
> +      assert(surf->aux_surf->image_alignment_el.w % scale_x == 0);
> +      assert(surf->aux_surf->image_alignment_el.h % scale_y == 0);
> +
> +      assert(x_offset_el % scale_x == 0 && y_offset_el % scale_y == 0);
> +      x_offset_y_cl = x_offset_el / scale_x;
> +      y_offset_y_cl = y_offset_el / scale_y;
> +      width_y_cl = DIV_ROUND_UP(width_el, scale_x);
> +      height_y_cl = DIV_ROUND_UP(height_el, scale_y);
> +   } else {
> +      /* On gen7, the CCS tiling is not so nice.  However, there we are
> +       * guaranteed that we only have a single level and slice so we don't
> +       * have to worry about it and can just align to a whole tile.
> +       */
> +      assert(x_offset_el == 0 && y_offset_el == 0);
> +      const uint32_t width_tl =
> +         DIV_ROUND_UP(width_el, ccs_tile_info.logical_extent_el.w);
> +      const uint32_t height_tl =
> +         DIV_ROUND_UP(height_el, ccs_tile_info.logical_extent_el.h);
> +      x_offset_y_cl = 0;
> +      y_offset_y_cl = 0;
> +      width_y_cl = width_tl * 8;
> +      height_y_cl = height_tl * 8;
> +   }
> +
> +   /* We're going to use a RGBA32 format so as to write data as quickly as
> +    * possible.  A y-tiled cache line will then be 1x4 px.
> +    */
> +   const uint32_t x_offset_rgba_px = x_offset_y_cl;
> +   const uint32_t y_offset_rgba_px = y_offset_y_cl * 4;
> +   const uint32_t width_rgba_px = width_y_cl;
> +   const uint32_t height_rgba_px = height_y_cl * 4;
> +
> +   MAYBE_UNUSED bool ok =
> +      isl_surf_init(batch->blorp->isl_dev, &params.dst.surf,
> +                    .dim = ISL_SURF_DIM_2D,
> +                    .format = ISL_FORMAT_R32G32B32A32_UINT,
> +                    .width = width_rgba_px + x_offset_rgba_px,
> +                    .height = height_rgba_px + y_offset_rgba_px,
> +                    .depth = 1,
> +                    .levels = 1,
> +                    .array_len = 1,
> +                    .samples = 1,
> +                    .row_pitch = surf->aux_surf->row_pitch,
> +                    .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
> +                    .tiling_flags = ISL_TILING_Y0_BIT);
> +   assert(ok);
> +   assert(offset_B + params.dst.surf.size <= surf->aux_surf->size);
>

This assertion is bogus.  I added it last-minute and didn't run Jenkins
between adding it and sending it out.

The reason it's bogus is because offset_B may offset you horizontally into
the image and, when you start at offset_B and go to the right by a full
stride worth of pages, you end up outside the CCS.  We will never render
outside the CCS because our width is much smaller than that.  If I tried
hard enough, I could come up with a better assertion but I think it's best
to just delete it.

> +
> +   params.x0 = x_offset_rgba_px;
> +   params.y0 = y_offset_rgba_px;
> +   params.x1 = x_offset_rgba_px + width_rgba_px;
> +   params.y1 = y_offset_rgba_px + height_rgba_px;
> +
> +   /* A CCS value of 0 means "uncompressed." */
> +   memset(&params.wm_inputs.clear_color, 0,
> +          sizeof(params.wm_inputs.clear_color));
> +
> +   if (!blorp_params_get_clear_kernel(batch->blorp, &params, true))
> +      return;
> +
> +   batch->blorp->exec(batch, &params);
> +}
> --
> 2.5.0.400.gff86faf
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180119/a1b9d68e/attachment-0001.html>