[Mesa-dev] [RFC 16/16] intel/blorp: Add gen7-8 support to ccs_ambiguate

Jason Ekstrand jason at jlekstrand.net
Thu May 18 21:01:03 UTC 2017


This patch is completely untested but the math and logic should be
right.  It's mostly intended as an example of how to extend the pass.
---
 src/intel/blorp/blorp_clear.c | 80 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 63 insertions(+), 17 deletions(-)

diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index 0df2dde..c995fdb 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -302,11 +302,12 @@ get_fast_clear_rect(const struct isl_device *dev,
    *y1 = ALIGN(*y1, y_align) / y_scaledown;
 }
 
-void
-blorp_fast_clear(struct blorp_batch *batch,
-                 const struct blorp_surf *surf, enum isl_format format,
-                 uint32_t level, uint32_t start_layer, uint32_t num_layers,
-                 uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1)
+static void
+fast_clear(struct blorp_batch *batch,
+           const struct blorp_surf *surf, enum isl_format format,
+           uint32_t level, uint32_t start_layer, uint32_t num_layers,
+           uint8_t clear_value,
+           uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1)
 {
    struct blorp_params params;
    blorp_params_init(&params);
@@ -317,7 +318,7 @@ blorp_fast_clear(struct blorp_batch *batch,
    params.x1 = x1;
    params.y1 = y1;
 
-   memset(&params.wm_inputs.clear_color, 0xff, 4*sizeof(float));
+   memset(&params.wm_inputs.clear_color, clear_value, 4*sizeof(float));
    params.fast_clear_op = BLORP_FAST_CLEAR_OP_CLEAR;
 
    get_fast_clear_rect(batch->blorp->isl_dev, surf->aux_surf,
@@ -333,6 +334,16 @@ blorp_fast_clear(struct blorp_batch *batch,
    batch->blorp->exec(batch, &params);
 }
 
+void
+blorp_fast_clear(struct blorp_batch *batch,
+                 const struct blorp_surf *surf, enum isl_format format,
+                 uint32_t level, uint32_t start_layer, uint32_t num_layers,
+                 uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1)
+{
+   fast_clear(batch, surf, format, level, start_layer, num_layers,
+              0xff, x0, y0, x1, y1);
+}
+
 static union isl_color_value
 swizzle_color_value(union isl_color_value src, struct isl_swizzle swizzle)
 {
@@ -743,15 +754,31 @@ blorp_ccs_ambiguate(struct blorp_batch *batch,
                     struct blorp_surf *surf, uint32_t level,
                     uint32_t layer, uint32_t z)
 {
-   struct blorp_params params;
-   blorp_params_init(&params);
-
-   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 9);
+   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
 
    const struct isl_format_layout *aux_fmtl =
       isl_format_get_layout(surf->aux_surf->format);
+
    assert(aux_fmtl->txc == ISL_TXC_CCS);
 
+   const uint32_t width_px = minify(surf->surf->logical_level0_px.width, level);
+   const uint32_t height_px = minify(surf->surf->logical_level0_px.height, level);
+   if (ISL_DEV_GEN(batch->blorp->isl_dev) == 7) {
+      /* On gen7, we can do an ambiguate by simply doing a fast clear with a
+       * clear value of 0x0.
+       */
+      fast_clear(batch, surf, ISL_FORMAT_UNSUPPORTED, level, layer + z, 1,
+                 0x0, 0, 0, width_px, height_px);
+      return;
+   }
+
+   /* On Broadwell and above, the hardware ignores the value written out by
+    * the shader so we have to do things manually.
+    */
+
+   struct blorp_params params;
+   blorp_params_init(&params);
+
    params.dst = (struct brw_blorp_surface_info) {
       .enabled = true,
       .addr = surf->aux_addr,
@@ -776,8 +803,6 @@ blorp_ccs_ambiguate(struct blorp_batch *batch,
                                       &offset_B, &x_offset_el, &y_offset_el);
    params.dst.addr.offset += offset_B;
 
-   const uint32_t width_px = minify(surf->surf->logical_level0_px.width, level);
-   const uint32_t height_px = minify(surf->surf->logical_level0_px.height, level);
    const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
    const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
 
@@ -796,12 +821,33 @@ blorp_ccs_ambiguate(struct blorp_batch *batch,
     * maps to a Y-tiled cache line.  Fortunately, CCS layouts are calculated
     * with a very large alignment so we can round up without worrying about
     * overdraw.
+    *
+    * On Broadwell, it's a 16x32 block for a Y-tiled main surface and a 8x64
+    * block for an X-tiled main surface.
+    *
+    * We're going to be rendering as RGBA32 so each Y-tiled cache line is 1x4
+    * elements in the resulting color surface.
     */
-   assert(x_offset_el % 16 == 0 && y_offset_el % 4 == 0);
-   const uint32_t x_offset_rgba_px = x_offset_el / 16;
-   const uint32_t y_offset_rgba_px = y_offset_el / 4;
-   const uint32_t width_rgba_px = DIV_ROUND_UP(width_el, 16);
-   const uint32_t height_rgba_px = DIV_ROUND_UP(height_el, 4);
+   uint32_t scale_x, scale_y;
+   if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 9) {
+      scale_x = 16;
+      scale_y = 4;
+   } else {
+      assert(ISL_DEV_GEN(batch->blorp->isl_dev) == 8);
+      if (surf->surf->tiling == ISL_TILING_X) {
+         scale_x = 8;
+         scale_y = 16;
+      } else {
+         assert(surf->surf->tiling == ISL_TILING_Y0);
+         scale_x = 16;
+         scale_y = 8;
+      }
+   }
+   assert(x_offset_el % scale_x == 0 && y_offset_el % scale_y == 0);
+   const uint32_t x_offset_rgba_px = x_offset_el / scale_x;
+   const uint32_t y_offset_rgba_px = y_offset_el / scale_y;
+   const uint32_t width_rgba_px = DIV_ROUND_UP(width_el, scale_x);
+   const uint32_t height_rgba_px = DIV_ROUND_UP(height_el, scale_y);
 
    MAYBE_UNUSED bool ok =
       isl_surf_init(batch->blorp->isl_dev, &params.dst.surf,
-- 
2.5.0.400.gff86faf



More information about the mesa-dev mailing list